diff --git a/SYCL/DeviceLib/built-ins/ext_native_math.cpp b/SYCL/DeviceLib/built-ins/ext_native_math.cpp index 9cc5096b38..4ab4dcf4d1 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math.cpp @@ -3,88 +3,36 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// OpenCL CPU driver does not support cl_khr_fp16 extension for this reason this -// test is compiled with the -fsycl-device-code-split flag +// Tests oneapi extension native tanh math function for sycl::vec and +// sycl::marray float cases. -#include -#include - -template void assert_out_of_bound(T val, T lower, T upper) { - assert(sycl::all(lower < val && val < upper)); -} - -template <> -void assert_out_of_bound(float val, float lower, float upper) { - assert(lower < val && val < upper); -} - -template <> -void assert_out_of_bound(sycl::half val, sycl::half lower, - sycl::half upper) { - assert(lower < val && val < upper); -} - -template -void native_tanh_tester(sycl::queue q, T val, T up, T lo) { - T r = val; - -#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH - { - sycl::buffer BufR(&r, sycl::range<1>(1)); - q.submit([&](sycl::handler &cgh) { - auto AccR = BufR.template get_access(cgh); - cgh.single_task([=]() { - AccR[0] = sycl::ext::oneapi::experimental::native::tanh(AccR[0]); - }); - }); - } - - assert_out_of_bound(r, up, lo); -#else - assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); -#endif -} - -template -void native_exp2_tester(sycl::queue q, T val, T up, T lo) { - T r = val; - -#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH - { - sycl::buffer BufR(&r, sycl::range<1>(1)); - q.submit([&](sycl::handler &cgh) { - auto AccR = BufR.template get_access(cgh); - cgh.single_task([=]() { - AccR[0] = sycl::ext::oneapi::experimental::native::exp2(AccR[0]); - }); - }); - } - - assert_out_of_bound(r, up, lo); -#else - assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); -#endif -} +#include "ext_native_math_common.hpp" int main() { sycl::queue q; - const double tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, - -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; - const double tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, - -0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; - const double tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, - -0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; + const float tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; + const float tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, + -0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; + const float tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, + -0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; native_tanh_tester(q, tv[0], tl[0], tu[0]); native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, {tu[0], tu[1]}); native_tanh_tester( q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, {tl[0], tl[1], tl[2], tl[3]}, {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester>( + q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); native_tanh_tester( q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, @@ -98,57 +46,5 @@ int main() { {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); - if (q.get_device().has(sycl::aspect::fp16)) { - - native_tanh_tester(q, tv[0], tl[0], tu[0]); - native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, - {tu[0], tu[1]}); - native_tanh_tester( - q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); - native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); - native_tanh_tester( - q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, - {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, - {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); - native_tanh_tester( - q, - {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], - tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, - {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], - tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, - {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], - tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); - - const double ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, - -2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0}; - const double el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9, - 0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9}; - const double eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1, - 0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1}; - - native_exp2_tester(q, ev[0], el[0], eu[0]); - native_exp2_tester(q, {ev[0], ev[1]}, {el[0], el[1]}, - {eu[0], eu[1]}); - native_exp2_tester( - q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]}); - native_exp2_tester(q, {ev[0], ev[1], ev[2], ev[3]}, - {el[0], el[1], el[2], el[3]}, - {eu[0], eu[1], eu[2], eu[3]}); - native_exp2_tester( - q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]}, - {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]}, - {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]}); - native_exp2_tester( - q, - {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9], - ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]}, - {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9], - el[10], el[11], el[12], el[13], el[14], el[15]}, - {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9], - eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]}); - } - return 0; } diff --git a/SYCL/DeviceLib/built-ins/ext_native_math_common.hpp b/SYCL/DeviceLib/built-ins/ext_native_math_common.hpp new file mode 100644 index 0000000000..7511f03206 --- /dev/null +++ b/SYCL/DeviceLib/built-ins/ext_native_math_common.hpp @@ -0,0 +1,67 @@ +#include +#include + +template +void assert_out_of_bound(sycl::marray val, sycl::marray lower, + sycl::marray upper) { + for (int i = 0; i < N; i++) { + assert(lower[i] < val[i] && val[i] < upper[i]); + } +} + +template void assert_out_of_bound(T val, T lower, T upper) { + assert(sycl::all(lower < val && val < upper)); +} + +template <> +void assert_out_of_bound(float val, float lower, float upper) { + assert(lower < val && val < upper); +} + +template <> +void assert_out_of_bound(sycl::half val, sycl::half lower, + sycl::half upper) { + assert(lower < val && val < upper); +} + +template +void native_tanh_tester(sycl::queue q, T val, T up, T lo) { + T r = val; + +#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH + { + sycl::buffer BufR(&r, sycl::range<1>(1)); + q.submit([&](sycl::handler &cgh) { + auto AccR = BufR.template get_access(cgh); + cgh.single_task([=]() { + AccR[0] = sycl::ext::oneapi::experimental::native::tanh(AccR[0]); + }); + }); + } + + assert_out_of_bound(r, up, lo); +#else + assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); +#endif +} + +template +void native_exp2_tester(sycl::queue q, T val, T up, T lo) { + T r = val; + +#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH + { + sycl::buffer BufR(&r, sycl::range<1>(1)); + q.submit([&](sycl::handler &cgh) { + auto AccR = BufR.template get_access(cgh); + cgh.single_task([=]() { + AccR[0] = sycl::ext::oneapi::experimental::native::exp2(AccR[0]); + }); + }); + } + + assert_out_of_bound(r, up, lo); +#else + assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); +#endif +} diff --git a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp new file mode 100644 index 0000000000..74bd0f8213 --- /dev/null +++ b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp @@ -0,0 +1,93 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// OpenCL CPU driver does not support cl_khr_fp16 extension for this reason this +// test is compiled with the -fsycl-device-code-split flag + +// Tests oneapi extension native math functions for sycl::vec and sycl::marray +// fp16 cases. + +#include "ext_native_math_common.hpp" + +int main() { + + sycl::queue q; + + if (!q.get_device().has(sycl::aspect::fp16)) { + std::cout << "skipping fp16 tests: requires fp16 device aspect." + << std::endl; + return 0; + } + + const sycl::half tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; + const sycl::half tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, + 0.75, -0.1, -0.94, 0.92, -0.84, 0.82, + -1.0, 0.98, -1.10, 0.98}; + const sycl::half tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, + 0.77, 0.1, -0.92, 0.94, -0.82, 0.84, + -0.98, 1.00, -0.98, 1.10}; + + native_tanh_tester(q, tv[0], tl[0], tu[0]); + native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, + {tu[0], tu[1]}); + native_tanh_tester(q, {tv[0], tv[1], tv[2]}, + {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + native_tanh_tester>( + q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester>( + q, {tv[0], tv[1], tv[2], tv[3]}, {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester( + q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); + native_tanh_tester( + q, + {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], + tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], + tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], + tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); + + const sycl::half ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0}; + const sycl::half el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9, + 0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9}; + const sycl::half eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1, + 0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1}; + + native_exp2_tester(q, ev[0], el[0], eu[0]); + native_exp2_tester(q, {ev[0], ev[1]}, {el[0], el[1]}, + {eu[0], eu[1]}); + native_exp2_tester(q, {ev[0], ev[1], ev[2]}, + {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]}); + native_exp2_tester(q, {ev[0], ev[1], ev[2], ev[3]}, + {el[0], el[1], el[2], el[3]}, + {eu[0], eu[1], eu[2], eu[3]}); + native_exp2_tester>( + q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]}); + native_exp2_tester>( + q, {ev[0], ev[1], ev[2], ev[3]}, {el[0], el[1], el[2], el[3]}, + {eu[0], eu[1], eu[2], eu[3]}); + native_exp2_tester( + q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]}, + {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]}, + {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]}); + native_exp2_tester( + q, + {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9], + ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]}, + {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9], + el[10], el[11], el[12], el[13], el[14], el[15]}, + {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9], + eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]}); + + return 0; +} diff --git a/SYCL/DeviceLib/built-ins/fast-math-flag.cpp b/SYCL/DeviceLib/built-ins/fast-math-flag.cpp index e6b7d08238..d90845e58d 100644 --- a/SYCL/DeviceLib/built-ins/fast-math-flag.cpp +++ b/SYCL/DeviceLib/built-ins/fast-math-flag.cpp @@ -6,47 +6,64 @@ #include #include -#define __TEST_FFMATH_BINARY(func) \ - int test_ffmath_##func() { \ - sycl::float4 r[2]; \ - sycl::float4 val[2] = {{1.0004f, 1e-4f, 1.4f, 14.0f}, \ - {1.0004f, 1e-4f, 1.4f, 14.0f}}; \ +using namespace sycl; + +template bool checkEqual(vec A, vec B) { + + return sycl::all(A == B); +} + +template +bool checkEqual(marray A, marray B) { + for (int i = 0; i < N; i++) { + if (A[i] != B[i]) { + return false; + } + } + return true; +} + +#define __TEST_FFMATH_UNARY(func) \ + template void test_ffmath_##func(queue &deviceQueue) { \ + T input{1.0004f, 1e-4f, 1.4f, 14.0f}; \ + T res[2] = {{-1, -1, -1, -1}, {-2, -2, -2, -2}}; \ { \ - sycl::buffer output(&r[0], sycl::range<1>(2)); \ - sycl::buffer input(&val[0], sycl::range<1>(2)); \ - sycl::queue q; \ - q.submit([&](sycl::handler &cgh) { \ - auto AccO = \ - output.template get_access(cgh); \ - auto AccI = input.template get_access(cgh); \ + buffer input_buff(&input, 1); \ + buffer res_buff(&res[0], sycl::range<1>(2)); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_acc(res_buff, \ + cgh); \ + accessor input_acc( \ + input_buff, cgh); \ cgh.single_task([=]() { \ - AccO[0] = sycl::func(AccI[0], AccI[1]); \ - AccO[1] = sycl::native::func(AccI[0], AccI[1]); \ + res_acc[0] = sycl::native::func(input_acc[0]); \ + res_acc[1] = sycl::func(input_acc[0]); \ }); \ }); \ } \ - return sycl::all(r[0] == r[1]); \ + assert(checkEqual(res[0], res[1])); \ } -#define __TEST_FFMATH_UNARY(func) \ - int test_ffmath_##func() { \ - sycl::float4 val = {1.0004f, 1e-4f, 1.4f, 14.0f}; \ - sycl::float4 r[2]; \ +#define __TEST_FFMATH_BINARY(func) \ + template void test_ffmath_##func(queue &deviceQueue) { \ + T input[2] = {{1.0004f, 1e-4f, 1.4f, 14.0f}, \ + {1.0004f, 1e-4f, 1.4f, 14.0f}}; \ + T res[2] = {{-1, -1, -1, -1}, {-2, -2, -2, -2}}; \ { \ - sycl::buffer output(&r[0], sycl::range<1>(2)); \ - sycl::buffer input(&val, sycl::range<1>(1)); \ - sycl::queue q; \ - q.submit([&](sycl::handler &cgh) { \ - auto AccO = \ - output.template get_access(cgh); \ - auto AccI = input.template get_access(cgh); \ + buffer input_buff(&input[0], range<1>(2)); \ + buffer res_buff(&res[0], range<1>(2)); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_acc(res_buff, \ + cgh); \ + accessor input_acc( \ + input_buff, cgh); \ cgh.single_task([=]() { \ - AccO[0] = sycl::func(AccI[0]); \ - AccO[1] = sycl::native::func(AccI[0]); \ + res_acc[0] = sycl::native::func(input_acc[0], input_acc[1]); \ + res_acc[1] = sycl::func(input_acc[0], input_acc[1]); \ }); \ }); \ } \ - return sycl::all(r[0] == r[1]); \ + assert(checkEqual(res[0], res[1])); \ } __TEST_FFMATH_UNARY(cos) @@ -56,26 +73,43 @@ __TEST_FFMATH_UNARY(exp10) __TEST_FFMATH_UNARY(log) __TEST_FFMATH_UNARY(log2) __TEST_FFMATH_UNARY(log10) -__TEST_FFMATH_BINARY(powr) __TEST_FFMATH_UNARY(rsqrt) __TEST_FFMATH_UNARY(sin) __TEST_FFMATH_UNARY(sqrt) __TEST_FFMATH_UNARY(tan) +__TEST_FFMATH_BINARY(powr) + int main() { - assert(test_ffmath_cos()); - assert(test_ffmath_exp()); - assert(test_ffmath_exp2()); - assert(test_ffmath_exp10()); - assert(test_ffmath_log()); - assert(test_ffmath_log2()); - assert(test_ffmath_log10()); - assert(test_ffmath_powr()); - assert(test_ffmath_rsqrt()); - assert(test_ffmath_sin()); - assert(test_ffmath_sqrt()); - assert(test_ffmath_tan()); + queue q; + test_ffmath_cos>(q); + test_ffmath_exp>(q); + test_ffmath_exp2>(q); + test_ffmath_exp10>(q); + test_ffmath_log>(q); + test_ffmath_log2>(q); + test_ffmath_log10>(q); + test_ffmath_powr>(q); + test_ffmath_rsqrt>(q); + test_ffmath_sin>(q); + test_ffmath_sqrt>(q); + test_ffmath_tan>(q); + test_ffmath_powr>(q); + + test_ffmath_cos(q); + test_ffmath_exp(q); + test_ffmath_exp2(q); + test_ffmath_exp10(q); + test_ffmath_log(q); + test_ffmath_log2(q); + test_ffmath_log10(q); + test_ffmath_powr(q); + test_ffmath_rsqrt(q); + test_ffmath_sin(q); + test_ffmath_sqrt(q); + test_ffmath_tan(q); + test_ffmath_powr(q); return 0; } diff --git a/SYCL/DeviceLib/math_test_marray_vec.cpp b/SYCL/DeviceLib/math_test_marray_vec.cpp new file mode 100644 index 0000000000..18dfc1ae00 --- /dev/null +++ b/SYCL/DeviceLib/math_test_marray_vec.cpp @@ -0,0 +1,31 @@ +// TODO fix windows failures +// UNSUPPORTED: windows && (level_zero || opencl) +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// tests sycl floating point math functions for sycl::vec and sycl::marray float +// and double cases. + +#include "math_test_marray_vec_common.hpp" + +int main() { + queue deviceQueue; + math_tests_4(deviceQueue); + math_tests_4>(deviceQueue); + + math_tests_3(deviceQueue); + math_tests_3>(deviceQueue); + + if (deviceQueue.get_device().has(sycl::aspect::fp64)) { + math_tests_4(deviceQueue); + math_tests_4>(deviceQueue); + + math_tests_3(deviceQueue); + math_tests_3>(deviceQueue); + } + + std::cout << "Pass" << std::endl; + return 0; +} diff --git a/SYCL/DeviceLib/math_test_marray_vec_common.hpp b/SYCL/DeviceLib/math_test_marray_vec_common.hpp new file mode 100644 index 0000000000..1c10543f68 --- /dev/null +++ b/SYCL/DeviceLib/math_test_marray_vec_common.hpp @@ -0,0 +1,250 @@ +#include + +using namespace sycl; + +template class TypeHelper; + +template bool checkEqual(vec A, size_t B) { + T TB = B; + return A.x() == TB && A.y() == TB && A.z() == TB; +} + +template bool checkEqual(vec A, size_t B) { + T TB = B; + return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB; +} + +template bool checkEqual(marray A, size_t B) { + for (int i = 0; i < N; i++) { + if (A[i] != B) { + return false; + } + } + return true; +} + +#define OPERATOR(NAME) \ + template \ + void math_test_##NAME(queue &deviceQueue, T result, T input, size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input_access( \ + buffer2, cgh); \ + cgh.single_task>( \ + [=]() { res_access[0] = NAME(input_access[0]); }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +OPERATOR(cos) +OPERATOR(cospi) +OPERATOR(sin) +OPERATOR(sinpi) +OPERATOR(cosh) +OPERATOR(sinh) +OPERATOR(tan) +OPERATOR(tanpi) +OPERATOR(atan) +OPERATOR(atanpi) +OPERATOR(tanh) +OPERATOR(acos) +OPERATOR(acospi) +OPERATOR(asin) +OPERATOR(asinpi) +OPERATOR(acosh) +OPERATOR(asinh) +OPERATOR(atanh) +OPERATOR(cbrt) +OPERATOR(ceil) +OPERATOR(exp) +OPERATOR(exp2) +OPERATOR(exp10) +OPERATOR(expm1) +OPERATOR(tgamma) +OPERATOR(lgamma) +OPERATOR(erf) +OPERATOR(erfc) +OPERATOR(log) +OPERATOR(log2) +OPERATOR(log10) +OPERATOR(log1p) +OPERATOR(logb) +OPERATOR(sqrt) +OPERATOR(rsqrt) +OPERATOR(rint) +OPERATOR(round) +OPERATOR(trunc) + +#undef OPERATOR + +#define OPERATOR_2(NAME) \ + template \ + void math_test_2_##NAME(queue &deviceQueue, T result, T input1, T input2, \ + size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input1, 1); \ + buffer buffer3(&input2, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input1_access( \ + buffer2, cgh); \ + accessor input2_access( \ + buffer3, cgh); \ + cgh.single_task>([=]() { \ + res_access[0] = NAME(input1_access[0], input2_access[0]); \ + }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +OPERATOR_2(pow) +OPERATOR_2(powr) +OPERATOR_2(atan2) +OPERATOR_2(atan2pi) +OPERATOR_2(copysign) +OPERATOR_2(fdim) +OPERATOR_2(fmin) +OPERATOR_2(fmax) +OPERATOR_2(fmod) +OPERATOR_2(hypot) +OPERATOR_2(maxmag) +OPERATOR_2(minmag) +OPERATOR_2(nextafter) +OPERATOR_2(remainder) + +#undef OPERATOR_2 + +#define OPERATOR_3(NAME) \ + template \ + void math_test_3_##NAME(queue &deviceQueue, T result, T input1, T input2, \ + T input3, size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input1, 1); \ + buffer buffer3(&input2, 1); \ + buffer buffer4(&input3, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input1_access( \ + buffer2, cgh); \ + accessor input2_access( \ + buffer3, cgh); \ + accessor input3_access( \ + buffer4, cgh); \ + cgh.single_task>([=]() { \ + res_access[0] = \ + NAME(input1_access[0], input2_access[0], input3_access[0]); \ + }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +OPERATOR_3(mad) +OPERATOR_3(mix) +OPERATOR_3(fma) + +#undef OPERATOR_3 + +template void math_tests_4(queue &deviceQueue) { + math_test_tanh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_cosh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + math_test_sinh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_acos(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_acospi(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_acosh(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_asin(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_asinpi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_asinh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_cbrt(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 1); + math_test_atan(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_atanpi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_atanh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_exp(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 4); + math_test_exp10(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 100); + math_test_expm1(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_ceil(deviceQueue, T{-1, -1, -1, -1}, T{0.6, 0.6, 0.6, 0.6}, 1); + math_test_tgamma(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 1); + math_test_lgamma(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_erf(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_erfc(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + math_test_2_pow(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + 4); + math_test_2_powr(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + 4); + math_test_2_atan2(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, + T{2, 2, 2, 2}, 0); + math_test_2_atan2pi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, + T{2, 2, 2, 2}, 0); + math_test_2_copysign(deviceQueue, T{-1, -1, -1, -1}, T{-3, -3, -3, -3}, + T{2, 2, 2, 2}, 3); + math_test_2_fmin(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{3, 3, 3, 3}, + 2); + math_test_2_fmax(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{3, 3, 3, 3}, + 3); + math_test_2_hypot(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, + T{3, 3, 3, 3}, 5); + math_test_2_maxmag(deviceQueue, T{-1, -1, -1, -1}, T{-2, -2, -2, -2}, + T{3, 3, 3, 3}, 3); + math_test_2_minmag(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, + T{-3, -3, -3, -3}, 2); + math_test_2_remainder(deviceQueue, T{-1, -1, -1, -1}, T{5, 5, 5, 5}, + T{2, 2, 2, 2}, 1); + math_test_2_fdim(deviceQueue, T{-1, -1, -1, -1}, T{3, 3, 3, 3}, T{3, 3, 3, 3}, + 0); + math_test_2_fmod(deviceQueue, T{-1, -1, -1, -1}, T{5, 5, 5, 5}, T{3, 3, 3, 3}, + 2); + math_test_2_nextafter(deviceQueue, T{-1, -1, -1, -1}, T{-0, -0, -0, -0}, + T{+0, +0, +0, +0}, 0); + math_test_3_fma(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + T{1, 1, 1, 1}, 5); + math_test_3_mad(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + T{1, 1, 1, 1}, 5); + math_test_3_mix(deviceQueue, T{-1, -1, -1, -1}, T{3, 3, 3, 3}, T{5, 5, 5, 5}, + T{0.5, 0.5, 0.5, 0.5}, 4); +} + +template void math_tests_3(queue &deviceQueue) { + math_test_tan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_tanh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_sin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_cosh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_sinh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_acos(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_acosh(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_asin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_asinh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_cbrt(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 1); + math_test_atan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_atanh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_exp2(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 4); + math_test_exp10(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 100); + math_test_expm1(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_ceil(deviceQueue, T{-1, -1, -1}, T{0.6, 0.6, 0.6}, 1); + math_test_tgamma(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 1); + math_test_lgamma(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_erf(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_erfc(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); + math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100}, 2); + math_test_log1p(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_logb(deviceQueue, T{-1, -1, -1}, T{1.1, 1.1, 1.1}, 0); + math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); + math_test_rsqrt(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 2); + math_test_rint(deviceQueue, T{-1, -1, -1}, T{2.9, 2.9, 2.9}, 3); + math_test_round(deviceQueue, T{-1, -1, -1}, T{0.5, 0.5, 0.5}, 1); + math_test_trunc(deviceQueue, T{-1, -1, -1}, T{1.9, 1.9, 1.9}, 1); +} diff --git a/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp b/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp new file mode 100644 index 0000000000..7db7350ece --- /dev/null +++ b/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp @@ -0,0 +1,26 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// tests sycl floating point math functions for sycl::vec and sycl::marray fp16 +// cases. + +#include "math_test_marray_vec_common.hpp" + +int main() { + queue deviceQueue; + + if (!deviceQueue.get_device().has(sycl::aspect::fp16)) { + std::cout << "skipping fp16 tests: requires fp16 device aspect." + << std::endl; + return 0; + } + math_tests_4(deviceQueue); + math_tests_4>(deviceQueue); + math_tests_3(deviceQueue); + math_tests_3>(deviceQueue); + + std::cout << "Pass" << std::endl; + return 0; +}