From 60549675b6910571481870e8a5c78a5ac9ef6b9b Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 6 Oct 2023 00:01:13 -0700 Subject: [PATCH] Repeat Python bindings properly pass host task dependencies 1d variant of repeat was not passed host task event dependency for allocating shapes and strides on the device. This caused sporadic segfaults, where the kernel would attempt to access unallocated device data. --- dpctl/tensor/libtensor/source/repeat.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/dpctl/tensor/libtensor/source/repeat.cpp b/dpctl/tensor/libtensor/source/repeat.cpp index 3b1c956dd4..391f995feb 100644 --- a/dpctl/tensor/libtensor/source/repeat.cpp +++ b/dpctl/tensor/libtensor/source/repeat.cpp @@ -253,11 +253,18 @@ py_repeat_by_sequence(const dpctl::tensor::usm_ndarray &src, } sycl::event copy_shapes_strides_ev = std::get<2>(ptr_size_event_tuple1); + std::vector all_deps; + all_deps.reserve(depends.size() + 1); + all_deps.insert(all_deps.end(), depends.begin(), depends.end()); + all_deps.push_back(copy_shapes_strides_ev); + + assert(all_deps.size() == depends.size() + 1); + repeat_ev = fn(exec_q, src_axis_nelems, src_data_p, dst_data_p, reps_data_p, cumsum_data_p, src_nd, packed_src_shape_strides, dst_shape_vec[0], dst_strides_vec[0], reps_shape_vec[0], - reps_strides_vec[0], depends); + reps_strides_vec[0], all_deps); sycl::event cleanup_tmp_allocations_ev = exec_q.submit([&](sycl::handler &cgh) { @@ -496,10 +503,10 @@ py_repeat_by_sequence(const dpctl::tensor::usm_ndarray &src, assert(all_deps.size() == depends.size() + 1); - sycl::event repeat_ev = - fn(exec_q, src_sz, src_data_p, dst_data_p, reps_data_p, cumsum_data_p, - src_nd, packed_src_shapes_strides, dst_shape_vec[0], - dst_strides_vec[0], reps_shape_vec[0], reps_strides_vec[0], depends); + sycl::event repeat_ev = fn( + exec_q, src_sz, src_data_p, dst_data_p, reps_data_p, cumsum_data_p, + src_nd, packed_src_shapes_strides, dst_shape_vec[0], dst_strides_vec[0], + reps_shape_vec[0], reps_strides_vec[0], all_deps); sycl::event cleanup_tmp_allocations_ev = exec_q.submit([&](sycl::handler &cgh) { @@ -652,7 +659,7 @@ py_repeat_by_scalar(const dpctl::tensor::usm_ndarray &src, repeat_ev = fn(exec_q, dst_axis_nelems, src_data_p, dst_data_p, reps, src_nd, packed_src_shape_strides, dst_shape_vec[0], - dst_strides_vec[0], depends); + dst_strides_vec[0], all_deps); sycl::event cleanup_tmp_allocations_ev = exec_q.submit([&](sycl::handler &cgh) { @@ -856,7 +863,7 @@ py_repeat_by_scalar(const dpctl::tensor::usm_ndarray &src, sycl::event repeat_ev = fn(exec_q, dst_sz, src_data_p, dst_data_p, reps, src_nd, packed_src_shape_strides, - dst_shape_vec[0], dst_strides_vec[0], depends); + dst_shape_vec[0], dst_strides_vec[0], all_deps); sycl::event cleanup_tmp_allocations_ev = exec_q.submit([&](sycl::handler &cgh) {