Skip to content

Commit 0cbc7af

Browse files
committed
[SYCL][CUDA] Fix context scope in kernel launch
The `guessLocalWorkSize` function uses the CUDA API so it needs an active context, and there was no active `ScopedContext` when it was called which may cause issue. This fixes #2777
1 parent f37b22d commit 0cbc7af

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2578,6 +2578,8 @@ pi_result cuda_piEnqueueKernelLaunch(
25782578
bool providedLocalWorkGroupSize = (local_work_size != nullptr);
25792579
pi_uint32 local_size = kernel->get_local_size();
25802580

2581+
// Set the active context here as guessLocalWorkSize needs an active context
2582+
ScopedContext active(command_queue->get_context());
25812583
{
25822584
size_t *reqdThreadsPerBlock = kernel->reqdThreadsPerBlock_;
25832585
maxWorkGroupSize = command_queue->device_->get_max_work_group_size();
@@ -2631,7 +2633,6 @@ pi_result cuda_piEnqueueKernelLaunch(
26312633
std::unique_ptr<_pi_event> retImplEv{nullptr};
26322634

26332635
try {
2634-
ScopedContext active(command_queue->get_context());
26352636
CUstream cuStream = command_queue->get();
26362637
CUfunction cuFunc = kernel->get();
26372638

0 commit comments

Comments
 (0)