From b59e6c2a73c85958d7a93b94505bddc5fc7009c2 Mon Sep 17 00:00:00 2001 From: "Mokhov, Dmitri N" Date: Thu, 13 Feb 2020 15:59:51 -0600 Subject: [PATCH 1/2] [SYCL] Fix undefined symbols in async_work_group_copy Ensure proper name mangling by casting from SYCL types (namely cl::sycl::vec) to cl_* when calling OpenCL. Signed-off-by: Mokhov, Dmitri N --- sycl/include/CL/sycl/group.hpp | 45 +++++++++++++++-------- sycl/test/regression/group.cpp | 67 ++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 16 deletions(-) diff --git a/sycl/include/CL/sycl/group.hpp b/sycl/include/CL/sycl/group.hpp index 7fc777a53e64..21dc32e9b791 100644 --- a/sycl/include/CL/sycl/group.hpp +++ b/sycl/include/CL/sycl/group.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -264,10 +265,13 @@ template class group { device_event async_work_group_copy(local_ptr dest, global_ptr src, size_t numElements) const { - __ocl_event_t e = - OpGroupAsyncCopyGlobalToLocal( - __spv::Scope::Workgroup, - dest.get(), src.get(), numElements, 1, 0); + using T = detail::ConvertToOpenCLType_t; + using Local = detail::ConvertToOpenCLType_t>; + using Global = detail::ConvertToOpenCLType_t>; + + __ocl_event_t e = OpGroupAsyncCopyGlobalToLocal( + __spv::Scope::Workgroup, Local(dest.get()), Global(src.get()), + numElements, 1, 0); return device_event(&e); } @@ -275,10 +279,13 @@ template class group { device_event async_work_group_copy(global_ptr dest, local_ptr src, size_t numElements) const { - __ocl_event_t e = - OpGroupAsyncCopyLocalToGlobal( - __spv::Scope::Workgroup, - dest.get(), src.get(), numElements, 1, 0); + using T = detail::ConvertToOpenCLType_t; + using Local = detail::ConvertToOpenCLType_t>; + using Global = detail::ConvertToOpenCLType_t>; + + __ocl_event_t e = OpGroupAsyncCopyLocalToGlobal( + __spv::Scope::Workgroup, Global(dest.get()), Local(src.get()), + numElements, 1, 0); return device_event(&e); } @@ -287,10 +294,13 @@ template class group { global_ptr src, size_t numElements, size_t srcStride) const { - __ocl_event_t e = - OpGroupAsyncCopyGlobalToLocal( - __spv::Scope::Workgroup, - dest.get(), src.get(), numElements, srcStride, 0); + using T = detail::ConvertToOpenCLType_t; + using Local = detail::ConvertToOpenCLType_t>; + using Global = detail::ConvertToOpenCLType_t>; + + __ocl_event_t e = OpGroupAsyncCopyGlobalToLocal( + __spv::Scope::Workgroup, Local(dest.get()), Global(src.get()), + numElements, srcStride, 0); return device_event(&e); } @@ -299,10 +309,13 @@ template class group { local_ptr src, size_t numElements, size_t destStride) const { - __ocl_event_t e = - OpGroupAsyncCopyLocalToGlobal( - __spv::Scope::Workgroup, - dest.get(), src.get(), numElements, destStride, 0); + using T = detail::ConvertToOpenCLType_t; + using Local = detail::ConvertToOpenCLType_t>; + using Global = detail::ConvertToOpenCLType_t>; + + __ocl_event_t e = OpGroupAsyncCopyLocalToGlobal( + __spv::Scope::Workgroup, Global(dest.get()), Local(src.get()), + numElements, destStride, 0); return device_event(&e); } diff --git a/sycl/test/regression/group.cpp b/sycl/test/regression/group.cpp index 264283181b79..7ac6434a23ad 100644 --- a/sycl/test/regression/group.cpp +++ b/sycl/test/regression/group.cpp @@ -162,10 +162,77 @@ bool group__get_linear_id() { return Pass; } +// Tests group::async_work_group_copy() +bool group__async_work_group_copy() { + std::cout << "+++ Running group::async_work_group_copy() test...\n"; + constexpr int DIMS = 2; + const range LocalRange{3, 1}; + const range GroupRange{2, 3}; + const range GlobalRange = LocalRange * GroupRange; + using DataType = vec; + const int DataLen = GlobalRange.size(); + std::unique_ptr Data(new DataType[DataLen]); + std::memset(Data.get(), 0, DataLen * sizeof(DataType)); + + try { + buffer Buf(Data.get(), DataLen); + queue Q(AsyncHandler{}); + + Q.submit([&](handler &cgh) { + auto AccGlobal = Buf.get_access(cgh); + accessor + AccLocal(LocalRange, cgh); + + cgh.parallel_for( + nd_range<2>{GlobalRange, LocalRange}, [=](nd_item I) { + const auto Group = I.get_group(); + const auto NumElem = AccLocal.get_count(); + const auto Off = Group[0] * I.get_group_range(1) * NumElem + Group[1]; + const auto Stride = I.get_global_range(1); + auto PtrGlobal = AccGlobal.get_pointer() + Off; + auto PtrLocal = AccLocal.get_pointer(); + Group.async_work_group_copy(PtrLocal, PtrGlobal, NumElem, Stride); + AccLocal[I.get_local_id()][0] += I.get_global_id(0); + AccLocal[I.get_local_id()][1] += I.get_global_id(1); + Group.async_work_group_copy(PtrGlobal, PtrLocal, NumElem, Stride); + }); + }); + } catch (cl::sycl::exception const &E) { + std::cout << "SYCL exception caught: " << E.what() << '\n'; + return 2; + } + const size_t SIZE_Y = GlobalRange.get(0); + const size_t SIZE_X = GlobalRange.get(1); + bool Pass = true; + int ErrCnt = 0; + + for (size_t Y = 0; Y < SIZE_Y; Y++) { + for (size_t X = 0; X < SIZE_X; X++) { + const size_t Ind = Y * SIZE_X + X; + const auto Test0 = Data[Ind][0]; + const auto Test1 = Data[Ind][1]; + const auto Gold0 = Y; + const auto Gold1 = X; + const bool Ok = (Test0 == Gold0 && Test1 == Gold1); + Pass &= Ok; + + if (!Ok && ErrCnt++ < 10) { + std::cout << "*** ERROR at [" << Y << "][" << X << "]: "; + std::cout << Test0 << " " << Test1 << " != "; + std::cout << Gold0 << " " << Gold1 << "\n"; + } + } + } + if (Pass) + std::cout << " pass\n"; + return Pass; +} + int main() { bool Pass = 1; Pass &= group__get_group_range(); Pass &= group__get_linear_id(); + Pass &= group__async_work_group_copy(); if (!Pass) { std::cout << "FAILED\n"; From e6e02a302611852182cf3a40d0d58ec3a183a41a Mon Sep 17 00:00:00 2001 From: "Mokhov, Dmitri N" Date: Thu, 5 Mar 2020 22:17:59 -0600 Subject: [PATCH 2/2] Added tests of async_work_group_copy without stride. Signed-off-by: Mokhov, Dmitri N --- sycl/include/CL/sycl/group.hpp | 24 +++---- sycl/test/regression/group.cpp | 121 +++++++++++++++++++-------------- 2 files changed, 82 insertions(+), 63 deletions(-) diff --git a/sycl/include/CL/sycl/group.hpp b/sycl/include/CL/sycl/group.hpp index 21dc32e9b791..addb8b954897 100644 --- a/sycl/include/CL/sycl/group.hpp +++ b/sycl/include/CL/sycl/group.hpp @@ -266,11 +266,11 @@ template class group { global_ptr src, size_t numElements) const { using T = detail::ConvertToOpenCLType_t; - using Local = detail::ConvertToOpenCLType_t>; - using Global = detail::ConvertToOpenCLType_t>; + using DestT = detail::ConvertToOpenCLType_t; + using SrcT = detail::ConvertToOpenCLType_t; __ocl_event_t e = OpGroupAsyncCopyGlobalToLocal( - __spv::Scope::Workgroup, Local(dest.get()), Global(src.get()), + __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), numElements, 1, 0); return device_event(&e); } @@ -280,11 +280,11 @@ template class group { local_ptr src, size_t numElements) const { using T = detail::ConvertToOpenCLType_t; - using Local = detail::ConvertToOpenCLType_t>; - using Global = detail::ConvertToOpenCLType_t>; + using DestT = detail::ConvertToOpenCLType_t; + using SrcT = detail::ConvertToOpenCLType_t; __ocl_event_t e = OpGroupAsyncCopyLocalToGlobal( - __spv::Scope::Workgroup, Global(dest.get()), Local(src.get()), + __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), numElements, 1, 0); return device_event(&e); } @@ -295,11 +295,11 @@ template class group { size_t numElements, size_t srcStride) const { using T = detail::ConvertToOpenCLType_t; - using Local = detail::ConvertToOpenCLType_t>; - using Global = detail::ConvertToOpenCLType_t>; + using DestT = detail::ConvertToOpenCLType_t; + using SrcT = detail::ConvertToOpenCLType_t; __ocl_event_t e = OpGroupAsyncCopyGlobalToLocal( - __spv::Scope::Workgroup, Local(dest.get()), Global(src.get()), + __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), numElements, srcStride, 0); return device_event(&e); } @@ -310,11 +310,11 @@ template class group { size_t numElements, size_t destStride) const { using T = detail::ConvertToOpenCLType_t; - using Local = detail::ConvertToOpenCLType_t>; - using Global = detail::ConvertToOpenCLType_t>; + using DestT = detail::ConvertToOpenCLType_t; + using SrcT = detail::ConvertToOpenCLType_t; __ocl_event_t e = OpGroupAsyncCopyLocalToGlobal( - __spv::Scope::Workgroup, Global(dest.get()), Local(src.get()), + __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), numElements, destStride, 0); return device_event(&e); } diff --git a/sycl/test/regression/group.cpp b/sycl/test/regression/group.cpp index 7ac6434a23ad..c53dc3cc6436 100644 --- a/sycl/test/regression/group.cpp +++ b/sycl/test/regression/group.cpp @@ -166,60 +166,79 @@ bool group__get_linear_id() { bool group__async_work_group_copy() { std::cout << "+++ Running group::async_work_group_copy() test...\n"; constexpr int DIMS = 2; - const range LocalRange{3, 1}; - const range GroupRange{2, 3}; - const range GlobalRange = LocalRange * GroupRange; - using DataType = vec; - const int DataLen = GlobalRange.size(); - std::unique_ptr Data(new DataType[DataLen]); - std::memset(Data.get(), 0, DataLen * sizeof(DataType)); + bool Pass = true; - try { - buffer Buf(Data.get(), DataLen); - queue Q(AsyncHandler{}); + std::vector, range>> ranges; + ranges.push_back({{3, 1}, {2, 3}}); + ranges.push_back({{1, 3}, {3, 2}}); - Q.submit([&](handler &cgh) { - auto AccGlobal = Buf.get_access(cgh); - accessor - AccLocal(LocalRange, cgh); - - cgh.parallel_for( - nd_range<2>{GlobalRange, LocalRange}, [=](nd_item I) { - const auto Group = I.get_group(); - const auto NumElem = AccLocal.get_count(); - const auto Off = Group[0] * I.get_group_range(1) * NumElem + Group[1]; - const auto Stride = I.get_global_range(1); - auto PtrGlobal = AccGlobal.get_pointer() + Off; - auto PtrLocal = AccLocal.get_pointer(); - Group.async_work_group_copy(PtrLocal, PtrGlobal, NumElem, Stride); - AccLocal[I.get_local_id()][0] += I.get_global_id(0); - AccLocal[I.get_local_id()][1] += I.get_global_id(1); - Group.async_work_group_copy(PtrGlobal, PtrLocal, NumElem, Stride); - }); - }); - } catch (cl::sycl::exception const &E) { - std::cout << "SYCL exception caught: " << E.what() << '\n'; - return 2; - } - const size_t SIZE_Y = GlobalRange.get(0); - const size_t SIZE_X = GlobalRange.get(1); - bool Pass = true; - int ErrCnt = 0; + for (const auto &i : ranges) { + const auto LocalRange = i.first; + const auto GroupRange = i.second; + const range GlobalRange = LocalRange * GroupRange; + using DataType = vec; + const int DataLen = GlobalRange.size(); + std::unique_ptr Data(new DataType[DataLen]); + std::memset(Data.get(), 0, DataLen * sizeof(DataType)); + + try { + buffer Buf(Data.get(), DataLen); + queue Q(AsyncHandler{}); + + Q.submit([&](handler &cgh) { + auto AccGlobal = Buf.get_access(cgh); + accessor + AccLocal(LocalRange, cgh); + + cgh.parallel_for( + nd_range<2>{GlobalRange, LocalRange}, + [=](nd_item I) { + const auto Group = I.get_group(); + const auto NumElem = AccLocal.get_count(); + const auto Off = Group[0] * I.get_group_range(1) * NumElem + + Group[1] * I.get_local_range(1); + auto PtrGlobal = AccGlobal.get_pointer() + Off; + auto PtrLocal = AccLocal.get_pointer(); + if (I.get_local_range(0) == 1) { + Group.async_work_group_copy(PtrLocal, PtrGlobal, NumElem); + } else { + Group.async_work_group_copy(PtrLocal, PtrGlobal, NumElem, + I.get_global_range(1)); + } + AccLocal[I.get_local_id()][0] += I.get_global_id(0); + AccLocal[I.get_local_id()][1] += I.get_global_id(1); + if (I.get_local_range(0) == 1) { + Group.async_work_group_copy(PtrGlobal, PtrLocal, NumElem); + } else { + Group.async_work_group_copy(PtrGlobal, PtrLocal, NumElem, + I.get_global_range(1)); + } + }); + }); + } catch (cl::sycl::exception const &E) { + std::cout << "SYCL exception caught: " << E.what() << '\n'; + return 2; + } + const size_t SIZE_Y = GlobalRange.get(0); + const size_t SIZE_X = GlobalRange.get(1); + int ErrCnt = 0; - for (size_t Y = 0; Y < SIZE_Y; Y++) { - for (size_t X = 0; X < SIZE_X; X++) { - const size_t Ind = Y * SIZE_X + X; - const auto Test0 = Data[Ind][0]; - const auto Test1 = Data[Ind][1]; - const auto Gold0 = Y; - const auto Gold1 = X; - const bool Ok = (Test0 == Gold0 && Test1 == Gold1); - Pass &= Ok; - - if (!Ok && ErrCnt++ < 10) { - std::cout << "*** ERROR at [" << Y << "][" << X << "]: "; - std::cout << Test0 << " " << Test1 << " != "; - std::cout << Gold0 << " " << Gold1 << "\n"; + for (size_t Y = 0; Y < SIZE_Y; Y++) { + for (size_t X = 0; X < SIZE_X; X++) { + const size_t Ind = Y * SIZE_X + X; + const auto Test0 = Data[Ind][0]; + const auto Test1 = Data[Ind][1]; + const auto Gold0 = Y; + const auto Gold1 = X; + const bool Ok = (Test0 == Gold0 && Test1 == Gold1); + Pass &= Ok; + + if (!Ok && ErrCnt++ < 10) { + std::cout << "*** ERROR at [" << Y << "][" << X << "]: "; + std::cout << Test0 << " " << Test1 << " != "; + std::cout << Gold0 << " " << Gold1 << "\n"; + } } } }