From f95793b9f67f50682d611061b3da853747a568e3 Mon Sep 17 00:00:00 2001 From: "jack.kirk" Date: Thu, 21 Apr 2022 18:26:54 +0100 Subject: [PATCH 01/20] tests for vec/marray math sycl::math, native and half_precision cases covered. Signed-off-by: jack.kirk --- SYCL/Basic/half_builtins.cpp | 3 +- SYCL/DeviceLib/built-ins/ext_native_math.cpp | 184 ++++++++++++ .../half_precision_math_test_marray_vec.cpp | 169 +++++++++++ SYCL/DeviceLib/math_test_marray_vec.cpp | 281 ++++++++++++++++++ .../DeviceLib/native_math_test_marray_vec.cpp | 156 ++++++++++ 5 files changed, 792 insertions(+), 1 deletion(-) create mode 100644 SYCL/DeviceLib/built-ins/ext_native_math.cpp create mode 100644 SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp create mode 100644 SYCL/DeviceLib/math_test_marray_vec.cpp create mode 100644 SYCL/DeviceLib/native_math_test_marray_vec.cpp diff --git a/SYCL/Basic/half_builtins.cpp b/SYCL/Basic/half_builtins.cpp index 604ffb3c3a..e4f47695e1 100644 --- a/SYCL/Basic/half_builtins.cpp +++ b/SYCL/Basic/half_builtins.cpp @@ -166,6 +166,7 @@ template bool check(vec a, vec b) { int main() { queue q; + if (q.get_device().has(sycl::aspect::fp16)) { float16 a, b, c, d; for (int i = 0; i < SZ_max; i++) { a[i] = i / (float)SZ_max; @@ -193,6 +194,6 @@ int main() { }); } assert(err == 0); - +} return 0; } diff --git a/SYCL/DeviceLib/built-ins/ext_native_math.cpp b/SYCL/DeviceLib/built-ins/ext_native_math.cpp new file mode 100644 index 0000000000..9c64ec817f --- /dev/null +++ b/SYCL/DeviceLib/built-ins/ext_native_math.cpp @@ -0,0 +1,184 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// OpenCL CPU driver does not support cl_khr_fp16 extension for this reason this +// test is compiled with the -fsycl-device-code-split flag + +#include +#include + +template +void assert_out_of_bound(sycl::marray val, sycl::marray lower, + sycl::marray upper) { + for (int i = 0; i < N; i++) { + assert(lower[i] < val[i] && val[i] < upper[i]); + } +} + +template void assert_out_of_bound(T val, T lower, T upper) { + assert(sycl::all(lower < val && val < upper)); +} + +template <> +void assert_out_of_bound(float val, float lower, float upper) { + assert(lower < val && val < upper); +} + +template <> +void assert_out_of_bound(sycl::half val, sycl::half lower, + sycl::half upper) { + assert(lower < val && val < upper); +} + +template +void native_tanh_tester(sycl::queue q, T val, T up, T lo) { + T r = val; + +#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH + { + sycl::buffer BufR(&r, sycl::range<1>(1)); + q.submit([&](sycl::handler &cgh) { + auto AccR = BufR.template get_access(cgh); + cgh.single_task([=]() { + AccR[0] = sycl::ext::oneapi::experimental::native::tanh(AccR[0]); + }); + }); + } + + assert_out_of_bound(r, up, lo); +#else + assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); +#endif +} + +template +void native_exp2_tester(sycl::queue q, T val, T up, T lo) { + T r = val; + +#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH + { + sycl::buffer BufR(&r, sycl::range<1>(1)); + q.submit([&](sycl::handler &cgh) { + auto AccR = BufR.template get_access(cgh); + cgh.single_task([=]() { + AccR[0] = sycl::ext::oneapi::experimental::native::exp2(AccR[0]); + }); + }); + } + + assert_out_of_bound(r, up, lo); +#else + assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); +#endif +} + +int main() { + + sycl::queue q; + + const double tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; + const double tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, + -0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; + const double tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, + -0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; + + native_tanh_tester(q, tv[0], tl[0], tu[0]); + native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, + {tu[0], tu[1]}); + native_tanh_tester( + q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + + native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester>(q, {tv[0], tv[1], tv[2]}, + {tl[0], tl[1], tl[2]}, + {tu[0], tu[1], tu[2]}); + native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester( + q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); + native_tanh_tester( + q, + {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], + tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], + tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], + tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); + + if (q.get_device().has(sycl::aspect::fp16)) { + + native_tanh_tester(q, tv[0], tl[0], tu[0]); + native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, + {tu[0], tu[1]}); + native_tanh_tester( + q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + native_tanh_tester>( + q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester( + q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); + native_tanh_tester( + q, + {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], + tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], + tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], + tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); + + const double ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0}; + const double el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9, + 0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9}; + const double eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1, + 0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1}; + + native_exp2_tester(q, ev[0], el[0], eu[0]); + native_exp2_tester(q, {ev[0], ev[1]}, {el[0], el[1]}, + {eu[0], eu[1]}); + native_exp2_tester( + q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]}); + native_exp2_tester(q, {ev[0], ev[1], ev[2], ev[3]}, + {el[0], el[1], el[2], el[3]}, + {eu[0], eu[1], eu[2], eu[3]}); + native_exp2_tester>(q, {ev[0], ev[1], ev[2]}, + {el[0], el[1], el[2]}, + {eu[0], eu[1], eu[2]}); + native_exp2_tester>(q, {ev[0], ev[1], ev[2], ev[3]}, + {el[0], el[1], el[2], el[3]}, + {eu[0], eu[1], eu[2], eu[3]}); + native_exp2_tester( + q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]}, + {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]}, + {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]}); + native_exp2_tester( + q, + {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9], + ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]}, + {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9], + el[10], el[11], el[12], el[13], el[14], el[15]}, + {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9], + eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]}); + } + + return 0; +} \ No newline at end of file diff --git a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp new file mode 100644 index 0000000000..0cc306a075 --- /dev/null +++ b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp @@ -0,0 +1,169 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +using namespace sycl; + +template class TypeHelper; + +template bool checkEqual(vec A, size_t B) { + T TB = B; + return A.x() == TB && A.y() == TB && A.z() == TB; +} + +template bool checkEqual(vec A, size_t B) { + T TB = B; + return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB; +} + +template bool checkEqual(marray A, size_t B) { + for (int i = 0; i < N; i++) { + if (A[i] != B) { + return false; + } + } + return true; +} + +#define COMMA , + +#define HALF_PRECISION_OPERATOR(NAME) \ + template \ + void half_precision_math_test_##NAME(queue &deviceQueue, T result, T input, \ + size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input_access( \ + buffer2, cgh); \ + cgh.single_task>([=]() { \ + res_access[0] = sycl::half_precision::NAME(input_access[0]); \ + }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +HALF_PRECISION_OPERATOR(sin) +HALF_PRECISION_OPERATOR(tan) +HALF_PRECISION_OPERATOR(cos) +HALF_PRECISION_OPERATOR(exp) +HALF_PRECISION_OPERATOR(exp2) +HALF_PRECISION_OPERATOR(exp10) +HALF_PRECISION_OPERATOR(log) +HALF_PRECISION_OPERATOR(log2) +HALF_PRECISION_OPERATOR(log10) +HALF_PRECISION_OPERATOR(sqrt) +HALF_PRECISION_OPERATOR(rsqrt) +HALF_PRECISION_OPERATOR(recip) + +#undef HALF_PRECISION_OPERATOR + +#define HALF_PRECISION_OPERATOR_2(NAME) \ + template \ + void half_precision_math_test_2_##NAME(queue &deviceQueue, T result, \ + T input1, T input2, size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input1, 1); \ + buffer buffer3(&input2, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input1_access( \ + buffer2, cgh); \ + accessor input2_access( \ + buffer3, cgh); \ + cgh.single_task>([=]() { \ + res_access[0] = \ + sycl::half_precision::NAME(input1_access[0], input2_access[0]); \ + }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +HALF_PRECISION_OPERATOR_2(divide) +HALF_PRECISION_OPERATOR_2(powr) + +#undef HALF_PRECISION_OPERATOR_2 + +#define HALF_PRECISION_TESTS_3(TYPE) \ + half_precision_math_test_sin(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \ + 0); \ + half_precision_math_test_tan(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \ + 0); \ + half_precision_math_test_cos(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \ + 1); \ + half_precision_math_test_exp(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \ + 1); \ + half_precision_math_test_exp2(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, \ + 4); \ + half_precision_math_test_exp10(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, \ + 100); \ + half_precision_math_test_log(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, \ + 0); \ + half_precision_math_test_log2(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, \ + 2); \ + half_precision_math_test_log10(deviceQueue, TYPE{-1, -1, -1}, \ + TYPE{100, 100, 100}, 2); \ + half_precision_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, \ + 2); \ + half_precision_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1}, \ + TYPE{0.25, 0.25, 0.25}, 2); \ + half_precision_math_test_recip(deviceQueue, TYPE{-1, -1, -1}, \ + TYPE{0.25, 0.25, 0.25}, 4); \ + half_precision_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1}, \ + TYPE{2, 2, 2}, TYPE{2, 2, 2}, 4); \ + half_precision_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1}, \ + TYPE{4, 4, 4}, TYPE{2, 2, 2}, 2); + +#define HALF_PRECISION_TESTS_4(TYPE) \ + half_precision_math_test_sin(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{0, 0, 0, 0}, 0); \ + half_precision_math_test_tan(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{0, 0, 0, 0}, 0); \ + half_precision_math_test_cos(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{0, 0, 0, 0}, 1); \ + half_precision_math_test_exp(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{0, 0, 0, 0}, 1); \ + half_precision_math_test_exp2(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{2, 2, 2, 2}, 4); \ + half_precision_math_test_exp10(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{2, 2, 2, 2}, 100); \ + half_precision_math_test_log(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{1, 1, 1, 1}, 0); \ + half_precision_math_test_log2(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{4, 4, 4, 4}, 2); \ + half_precision_math_test_log10(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{100, 100, 100, 100}, 2); \ + half_precision_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{4, 4, 4, 4}, 2); \ + half_precision_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{0.25, 0.25, 0.25, 0.25}, 2); \ + half_precision_math_test_recip(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{0.25, 0.25, 0.25, 0.25}, 4); \ + half_precision_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{2, 2, 2, 2}, TYPE{2, 2, 2, 2}, 4); \ + half_precision_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{4, 4, 4, 4}, TYPE{2, 2, 2, 2}, 2); + +int main() { + queue deviceQueue; + + HALF_PRECISION_TESTS_3(float3) + HALF_PRECISION_TESTS_3(marray) + + HALF_PRECISION_TESTS_4(float4) + HALF_PRECISION_TESTS_4(marray) + + std::cout << "Pass" << std::endl; + return 0; +} diff --git a/SYCL/DeviceLib/math_test_marray_vec.cpp b/SYCL/DeviceLib/math_test_marray_vec.cpp new file mode 100644 index 0000000000..53ca722725 --- /dev/null +++ b/SYCL/DeviceLib/math_test_marray_vec.cpp @@ -0,0 +1,281 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +using namespace sycl; + +template class TypeHelper; + +template bool checkEqual(vec A, size_t B) { + T TB = B; + return A.x() == TB && A.y() == TB && A.z() == TB; +} + +template bool checkEqual(vec A, size_t B) { + T TB = B; + return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB; +} + +template bool checkEqual(marray A, size_t B) { + for (int i = 0; i < N; i++) { + if (A[i] != B) { + return false; + } + } + return true; +} + +#define COMMA , + +#define OPERATOR(NAME) \ + template \ + void math_test_##NAME(queue &deviceQueue, T result, T input, size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input_access( \ + buffer2, cgh); \ + cgh.single_task>( \ + [=]() { res_access[0] = NAME(input_access[0]); }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +OPERATOR(cos) +OPERATOR(cospi) +OPERATOR(sin) +OPERATOR(sinpi) +OPERATOR(cosh) +OPERATOR(sinh) +OPERATOR(tan) +OPERATOR(tanpi) +OPERATOR(atan) +OPERATOR(atanpi) +OPERATOR(tanh) +OPERATOR(acos) +OPERATOR(acospi) +OPERATOR(asin) +OPERATOR(asinpi) +OPERATOR(acosh) +OPERATOR(asinh) +OPERATOR(atanh) +OPERATOR(cbrt) +OPERATOR(ceil) +OPERATOR(exp) +OPERATOR(exp2) +OPERATOR(exp10) +OPERATOR(expm1) +OPERATOR(tgamma) +OPERATOR(lgamma) +OPERATOR(erf) +OPERATOR(erfc) +OPERATOR(log) +OPERATOR(log2) +OPERATOR(log10) +OPERATOR(log1p) +OPERATOR(logb) +OPERATOR(sqrt) +OPERATOR(rsqrt) +OPERATOR(rint) +OPERATOR(round) +OPERATOR(trunc) + +#undef OPERATOR + +#define OPERATOR_2(NAME) \ + template \ + void math_test_2_##NAME(queue &deviceQueue, T result, T input1, T input2, \ + size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input1, 1); \ + buffer buffer3(&input2, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input1_access( \ + buffer2, cgh); \ + accessor input2_access( \ + buffer3, cgh); \ + cgh.single_task>([=]() { \ + res_access[0] = NAME(input1_access[0], input2_access[0]); \ + }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +OPERATOR_2(pow) +OPERATOR_2(powr) +OPERATOR_2(atan2) +OPERATOR_2(atan2pi) +OPERATOR_2(copysign) +OPERATOR_2(fdim) +OPERATOR_2(fmin) +OPERATOR_2(fmax) +OPERATOR_2(fmod) +OPERATOR_2(hypot) +OPERATOR_2(maxmag) +OPERATOR_2(minmag) +OPERATOR_2(nextafter) +OPERATOR_2(remainder) + +#undef OPERATOR_2 + +#define OPERATOR_3(NAME) \ + template \ + void math_test_3_##NAME(queue &deviceQueue, T result, T input1, T input2, \ + T input3, size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input1, 1); \ + buffer buffer3(&input2, 1); \ + buffer buffer4(&input3, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input1_access( \ + buffer2, cgh); \ + accessor input2_access( \ + buffer3, cgh); \ + accessor input3_access( \ + buffer4, cgh); \ + cgh.single_task>([=]() { \ + res_access[0] = \ + NAME(input1_access[0], input2_access[0], input3_access[0]); \ + }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +OPERATOR_3(mad) +OPERATOR_3(mix) +OPERATOR_3(fma) + +#undef OPERATOR_3 + +#define TESTS_4(TYPE) \ + math_test_tanh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ + math_test_cosh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 1); \ + math_test_sinh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ + math_test_acos(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 0); \ + math_test_acospi(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 0); \ + math_test_acosh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 0); \ + math_test_asin(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ + math_test_asinpi(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ + math_test_asinh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ + math_test_cbrt(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 1); \ + math_test_atan(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ + math_test_atanpi(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ + math_test_atanh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ + math_test_exp(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 1); \ + math_test_exp2(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, 4); \ + math_test_exp10(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, 100); \ + math_test_expm1(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ + math_test_ceil(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0.6, 0.6, 0.6, 0.6}, \ + 1); \ + math_test_tgamma(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 1); \ + math_test_lgamma(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 0); \ + math_test_erf(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ + math_test_erfc(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 1); \ + math_test_2_pow(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ + TYPE{2, 2, 2, 2}, 4); \ + math_test_2_powr(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ + TYPE{2, 2, 2, 2}, 4); \ + math_test_2_atan2(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ + TYPE{2, 2, 2, 2}, 0); \ + math_test_2_atan2pi(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ + TYPE{2, 2, 2, 2}, 0); \ + math_test_2_copysign(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{-3, -3, -3, -3}, TYPE{2, 2, 2, 2}, 3); \ + math_test_2_fmin(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ + TYPE{3, 3, 3, 3}, 2); \ + math_test_2_fmax(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ + TYPE{3, 3, 3, 3}, 3); \ + math_test_2_hypot(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{4, 4, 4, 4}, \ + TYPE{3, 3, 3, 3}, 5); \ + math_test_2_maxmag(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{-2, -2, -2, -2}, \ + TYPE{3, 3, 3, 3}, 3); \ + math_test_2_minmag(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ + TYPE{-3, -3, -3, -3}, 2); \ + math_test_2_remainder(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{5, 5, 5, 5}, \ + TYPE{2, 2, 2, 2}, 1); \ + math_test_2_fdim(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{3, 3, 3, 3}, \ + TYPE{3, 3, 3, 3}, 0); \ + math_test_2_fmod(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{5, 5, 5, 5}, \ + TYPE{3, 3, 3, 3}, 2); \ + math_test_2_nextafter(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{-0, -0, -0, -0}, TYPE{+0, +0, +0, +0}, 0); \ + math_test_3_fma(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ + TYPE{2, 2, 2, 2}, TYPE{1, 1, 1, 1}, 5); \ + math_test_3_mad(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ + TYPE{2, 2, 2, 2}, TYPE{1, 1, 1, 1}, 5); \ + math_test_3_mix(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{3, 3, 3, 3}, \ + TYPE{5, 5, 5, 5}, TYPE{0.5, 0.5, 0.5, 0.5}, 4); + +#define TESTS_3(TYPE) \ + math_test_tan(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_tanh(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_cos(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ + math_test_sin(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_cosh(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ + math_test_sinh(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_acos(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 0); \ + math_test_acosh(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 0); \ + math_test_asin(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_asinh(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_cbrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 1); \ + math_test_atan(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_atanh(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_exp(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ + math_test_exp2(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, 4); \ + math_test_exp10(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, 100); \ + math_test_expm1(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_ceil(deviceQueue, TYPE{-1, -1, -1}, TYPE{0.6, 0.6, 0.6}, 1); \ + math_test_tgamma(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 1); \ + math_test_lgamma(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 0); \ + math_test_erf(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_erfc(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ + math_test_log(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 0); \ + math_test_log2(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, 2); \ + math_test_log10(deviceQueue, TYPE{-1, -1, -1}, TYPE{100, 100, 100}, 2); \ + math_test_log1p(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + math_test_logb(deviceQueue, TYPE{-1, -1, -1}, TYPE{1.1, 1.1, 1.1}, 0); \ + math_test_sqrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, 2); \ + math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{0.25, 0.25, 0.25}, 2); \ + math_test_rint(deviceQueue, TYPE{-1, -1, -1}, TYPE{2.9, 2.9, 2.9}, 3); \ + math_test_round(deviceQueue, TYPE{-1, -1, -1}, TYPE{0.5, 0.5, 0.5}, 1); \ + math_test_trunc(deviceQueue, TYPE{-1, -1, -1}, TYPE{1.9, 1.9, 1.9}, 1); + +int main() { + queue deviceQueue; + + TESTS_4(float4) + TESTS_4(double4) + TESTS_4(marray) + TESTS_4(marray) + + TESTS_3(float3) + TESTS_3(double3) + TESTS_3(marray) + TESTS_3(marray) + + if (deviceQueue.get_device().has(sycl::aspect::fp16)) { + TESTS_4(half4) + TESTS_4(marray) + TESTS_3(half3) + TESTS_3(marray) + } + + std::cout << "Pass" << std::endl; + return 0; +} diff --git a/SYCL/DeviceLib/native_math_test_marray_vec.cpp b/SYCL/DeviceLib/native_math_test_marray_vec.cpp new file mode 100644 index 0000000000..e6708f3779 --- /dev/null +++ b/SYCL/DeviceLib/native_math_test_marray_vec.cpp @@ -0,0 +1,156 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +using namespace sycl; + +template class TypeHelper; + +template bool checkEqual(vec A, size_t B) { + T TB = B; + return A.x() == TB && A.y() == TB && A.z() == TB; +} + +template bool checkEqual(vec A, size_t B) { + T TB = B; + return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB; +} + +template bool checkEqual(marray A, size_t B) { + for (int i = 0; i < N; i++) { + if (A[i] != B) { + return false; + } + } + return true; +} + +#define COMMA , + +#define NATIVE_OPERATOR(NAME) \ + template \ + void native_math_test_##NAME(queue &deviceQueue, T result, T input, \ + size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input_access( \ + buffer2, cgh); \ + cgh.single_task>( \ + [=]() { res_access[0] = sycl::native::NAME(input_access[0]); }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +NATIVE_OPERATOR(sin) +NATIVE_OPERATOR(tan) +NATIVE_OPERATOR(cos) +NATIVE_OPERATOR(exp) +NATIVE_OPERATOR(exp2) +NATIVE_OPERATOR(exp10) +NATIVE_OPERATOR(log) +NATIVE_OPERATOR(log2) +NATIVE_OPERATOR(log10) +NATIVE_OPERATOR(sqrt) +NATIVE_OPERATOR(rsqrt) +NATIVE_OPERATOR(recip) + +#undef NATIVE_OPERATOR + +#define NATIVE_OPERATOR_2(NAME) \ + template \ + void native_math_test_2_##NAME(queue &deviceQueue, T result, T input1, \ + T input2, size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input1, 1); \ + buffer buffer3(&input2, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input1_access( \ + buffer2, cgh); \ + accessor input2_access( \ + buffer3, cgh); \ + cgh.single_task>([=]() { \ + res_access[0] = \ + sycl::native::NAME(input1_access[0], input2_access[0]); \ + }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +NATIVE_OPERATOR_2(divide) +NATIVE_OPERATOR_2(powr) + +#undef NATIVE_OPERATOR_2 + +#define NATIVE_TESTS_3(TYPE) \ + native_math_test_sin(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + native_math_test_tan(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ + native_math_test_cos(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ + native_math_test_exp(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ + native_math_test_exp2(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, 4); \ + native_math_test_log(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 0); \ + native_math_test_log2(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, 2); \ + native_math_test_log10(deviceQueue, TYPE{-1, -1, -1}, TYPE{100, 100, 100}, \ + 2); \ + native_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, 2); \ + native_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1}, \ + TYPE{0.25, 0.25, 0.25}, 2); \ + native_math_test_recip(deviceQueue, TYPE{-1, -1, -1}, \ + TYPE{0.25, 0.25, 0.25}, 4); \ + native_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, \ + TYPE{2, 2, 2}, 4); \ + native_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, \ + TYPE{2, 2, 2}, 2); + +#define NATIVE_TESTS_4(TYPE) \ + native_math_test_sin(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ + 0); \ + native_math_test_tan(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ + 0); \ + native_math_test_cos(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ + 1); \ + native_math_test_exp(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ + 1); \ + native_math_test_exp2(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ + 4); \ + native_math_test_log(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, \ + 0); \ + native_math_test_log2(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{4, 4, 4, 4}, \ + 2); \ + native_math_test_log10(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{100, 100, 100, 100}, 2); \ + native_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{4, 4, 4, 4}, \ + 2); \ + native_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{0.25, 0.25, 0.25, 0.25}, 2); \ + native_math_test_recip(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{0.25, 0.25, 0.25, 0.25}, 4); \ + native_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ + TYPE{2, 2, 2, 2}, 4); \ + native_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1, -1}, \ + TYPE{4, 4, 4, 4}, TYPE{2, 2, 2, 2}, 2); + +int main() { + queue deviceQueue; + + NATIVE_TESTS_3(float3) + NATIVE_TESTS_3(marray) + + NATIVE_TESTS_4(float4) + NATIVE_TESTS_4(marray) + + std::cout << "Pass" << std::endl; + return 0; +} From c7e2ff884d243f7c3ac2cb1d1524f38abee7522a Mon Sep 17 00:00:00 2001 From: "jack.kirk" Date: Wed, 11 May 2022 14:11:03 +0100 Subject: [PATCH 02/20] Made template function improvement. Signed-off-by: jack.kirk --- SYCL/DeviceLib/built-ins/ext_native_math.cpp | 2 +- .../half_precision_math_test_marray_vec.cpp | 122 +++++----- SYCL/DeviceLib/math_test_marray_vec.cpp | 210 +++++++++--------- .../DeviceLib/native_math_test_marray_vec.cpp | 95 ++++---- 4 files changed, 203 insertions(+), 226 deletions(-) diff --git a/SYCL/DeviceLib/built-ins/ext_native_math.cpp b/SYCL/DeviceLib/built-ins/ext_native_math.cpp index 9c64ec817f..913f51070e 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math.cpp @@ -181,4 +181,4 @@ int main() { } return 0; -} \ No newline at end of file +} diff --git a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp index 0cc306a075..b2f3d152fe 100644 --- a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp @@ -29,8 +29,6 @@ template bool checkEqual(marray A, size_t B) { return true; } -#define COMMA , - #define HALF_PRECISION_OPERATOR(NAME) \ template \ void half_precision_math_test_##NAME(queue &deviceQueue, T result, T input, \ @@ -95,75 +93,67 @@ HALF_PRECISION_OPERATOR_2(powr) #undef HALF_PRECISION_OPERATOR_2 -#define HALF_PRECISION_TESTS_3(TYPE) \ - half_precision_math_test_sin(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \ - 0); \ - half_precision_math_test_tan(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \ - 0); \ - half_precision_math_test_cos(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \ - 1); \ - half_precision_math_test_exp(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \ - 1); \ - half_precision_math_test_exp2(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, \ - 4); \ - half_precision_math_test_exp10(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, \ - 100); \ - half_precision_math_test_log(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, \ - 0); \ - half_precision_math_test_log2(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, \ - 2); \ - half_precision_math_test_log10(deviceQueue, TYPE{-1, -1, -1}, \ - TYPE{100, 100, 100}, 2); \ - half_precision_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, \ - 2); \ - half_precision_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1}, \ - TYPE{0.25, 0.25, 0.25}, 2); \ - half_precision_math_test_recip(deviceQueue, TYPE{-1, -1, -1}, \ - TYPE{0.25, 0.25, 0.25}, 4); \ - half_precision_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1}, \ - TYPE{2, 2, 2}, TYPE{2, 2, 2}, 4); \ - half_precision_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1}, \ - TYPE{4, 4, 4}, TYPE{2, 2, 2}, 2); - -#define HALF_PRECISION_TESTS_4(TYPE) \ - half_precision_math_test_sin(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{0, 0, 0, 0}, 0); \ - half_precision_math_test_tan(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{0, 0, 0, 0}, 0); \ - half_precision_math_test_cos(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{0, 0, 0, 0}, 1); \ - half_precision_math_test_exp(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{0, 0, 0, 0}, 1); \ - half_precision_math_test_exp2(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{2, 2, 2, 2}, 4); \ - half_precision_math_test_exp10(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{2, 2, 2, 2}, 100); \ - half_precision_math_test_log(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{1, 1, 1, 1}, 0); \ - half_precision_math_test_log2(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{4, 4, 4, 4}, 2); \ - half_precision_math_test_log10(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{100, 100, 100, 100}, 2); \ - half_precision_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{4, 4, 4, 4}, 2); \ - half_precision_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{0.25, 0.25, 0.25, 0.25}, 2); \ - half_precision_math_test_recip(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{0.25, 0.25, 0.25, 0.25}, 4); \ - half_precision_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{2, 2, 2, 2}, TYPE{2, 2, 2, 2}, 4); \ - half_precision_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{4, 4, 4, 4}, TYPE{2, 2, 2, 2}, 2); +template void half_precision_math_tests_3(queue &deviceQueue) { + half_precision_math_test_sin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + half_precision_math_test_tan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + half_precision_math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + half_precision_math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + half_precision_math_test_exp2(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 4); + half_precision_math_test_exp10(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 100); + half_precision_math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + half_precision_math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); + half_precision_math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100}, + 2); + half_precision_math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); + half_precision_math_test_rsqrt(deviceQueue, T{-1, -1, -1}, + T{0.25, 0.25, 0.25}, 2); + half_precision_math_test_recip(deviceQueue, T{-1, -1, -1}, + T{0.25, 0.25, 0.25}, 4); + half_precision_math_test_2_powr(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, + T{2, 2, 2}, 4); + half_precision_math_test_2_divide(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, + T{2, 2, 2}, 2); +} + +template void half_precision_math_tests_4(queue &deviceQueue) { + half_precision_math_test_sin(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, + 0); + half_precision_math_test_tan(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, + 0); + half_precision_math_test_cos(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, + 1); + half_precision_math_test_exp(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, + 1); + half_precision_math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, + 4); + half_precision_math_test_exp10(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, + 100); + half_precision_math_test_log(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, + 0); + half_precision_math_test_log2(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, + 2); + half_precision_math_test_log10(deviceQueue, T{-1, -1, -1, -1}, + T{100, 100, 100, 100}, 2); + half_precision_math_test_sqrt(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, + 2); + half_precision_math_test_rsqrt(deviceQueue, T{-1, -1, -1, -1}, + T{0.25, 0.25, 0.25, 0.25}, 2); + half_precision_math_test_recip(deviceQueue, T{-1, -1, -1, -1}, + T{0.25, 0.25, 0.25, 0.25}, 4); + half_precision_math_test_2_powr(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, + T{2, 2, 2, 2}, 4); + half_precision_math_test_2_divide(deviceQueue, T{-1, -1, -1, -1}, + T{4, 4, 4, 4}, T{2, 2, 2, 2}, 2); +} int main() { queue deviceQueue; - HALF_PRECISION_TESTS_3(float3) - HALF_PRECISION_TESTS_3(marray) - - HALF_PRECISION_TESTS_4(float4) - HALF_PRECISION_TESTS_4(marray) + half_precision_math_tests_3(deviceQueue); + half_precision_math_tests_3>(deviceQueue); + half_precision_math_tests_4(deviceQueue); + half_precision_math_tests_4>(deviceQueue); std::cout << "Pass" << std::endl; return 0; } diff --git a/SYCL/DeviceLib/math_test_marray_vec.cpp b/SYCL/DeviceLib/math_test_marray_vec.cpp index 53ca722725..17aec48570 100644 --- a/SYCL/DeviceLib/math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/math_test_marray_vec.cpp @@ -29,8 +29,6 @@ template bool checkEqual(marray A, size_t B) { return true; } -#define COMMA , - #define OPERATOR(NAME) \ template \ void math_test_##NAME(queue &deviceQueue, T result, T input, size_t ref) { \ @@ -163,117 +161,117 @@ OPERATOR_3(fma) #undef OPERATOR_3 -#define TESTS_4(TYPE) \ - math_test_tanh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ - math_test_cosh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 1); \ - math_test_sinh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ - math_test_acos(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 0); \ - math_test_acospi(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 0); \ - math_test_acosh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 0); \ - math_test_asin(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ - math_test_asinpi(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ - math_test_asinh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ - math_test_cbrt(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 1); \ - math_test_atan(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ - math_test_atanpi(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ - math_test_atanh(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ - math_test_exp(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 1); \ - math_test_exp2(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, 4); \ - math_test_exp10(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, 100); \ - math_test_expm1(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ - math_test_ceil(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0.6, 0.6, 0.6, 0.6}, \ - 1); \ - math_test_tgamma(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 1); \ - math_test_lgamma(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, 0); \ - math_test_erf(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 0); \ - math_test_erfc(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, 1); \ - math_test_2_pow(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ - TYPE{2, 2, 2, 2}, 4); \ - math_test_2_powr(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ - TYPE{2, 2, 2, 2}, 4); \ - math_test_2_atan2(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ - TYPE{2, 2, 2, 2}, 0); \ - math_test_2_atan2pi(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ - TYPE{2, 2, 2, 2}, 0); \ - math_test_2_copysign(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{-3, -3, -3, -3}, TYPE{2, 2, 2, 2}, 3); \ - math_test_2_fmin(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ - TYPE{3, 3, 3, 3}, 2); \ - math_test_2_fmax(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ - TYPE{3, 3, 3, 3}, 3); \ - math_test_2_hypot(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{4, 4, 4, 4}, \ - TYPE{3, 3, 3, 3}, 5); \ - math_test_2_maxmag(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{-2, -2, -2, -2}, \ - TYPE{3, 3, 3, 3}, 3); \ - math_test_2_minmag(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ - TYPE{-3, -3, -3, -3}, 2); \ - math_test_2_remainder(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{5, 5, 5, 5}, \ - TYPE{2, 2, 2, 2}, 1); \ - math_test_2_fdim(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{3, 3, 3, 3}, \ - TYPE{3, 3, 3, 3}, 0); \ - math_test_2_fmod(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{5, 5, 5, 5}, \ - TYPE{3, 3, 3, 3}, 2); \ - math_test_2_nextafter(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{-0, -0, -0, -0}, TYPE{+0, +0, +0, +0}, 0); \ - math_test_3_fma(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ - TYPE{2, 2, 2, 2}, TYPE{1, 1, 1, 1}, 5); \ - math_test_3_mad(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ - TYPE{2, 2, 2, 2}, TYPE{1, 1, 1, 1}, 5); \ - math_test_3_mix(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{3, 3, 3, 3}, \ - TYPE{5, 5, 5, 5}, TYPE{0.5, 0.5, 0.5, 0.5}, 4); +template void math_tests_4(queue &deviceQueue) { + math_test_tanh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_cosh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + math_test_sinh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_acos(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_acospi(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_acosh(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_asin(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_asinpi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_asinh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_cbrt(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 1); + math_test_atan(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_atanpi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_atanh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_exp(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 4); + math_test_exp10(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 100); + math_test_expm1(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_ceil(deviceQueue, T{-1, -1, -1, -1}, T{0.6, 0.6, 0.6, 0.6}, 1); + math_test_tgamma(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 1); + math_test_lgamma(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_erf(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_erfc(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + math_test_2_pow(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + 4); + math_test_2_powr(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + 4); + math_test_2_atan2(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, + T{2, 2, 2, 2}, 0); + math_test_2_atan2pi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, + T{2, 2, 2, 2}, 0); + math_test_2_copysign(deviceQueue, T{-1, -1, -1, -1}, T{-3, -3, -3, -3}, + T{2, 2, 2, 2}, 3); + math_test_2_fmin(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{3, 3, 3, 3}, + 2); + math_test_2_fmax(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{3, 3, 3, 3}, + 3); + math_test_2_hypot(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, + T{3, 3, 3, 3}, 5); + math_test_2_maxmag(deviceQueue, T{-1, -1, -1, -1}, T{-2, -2, -2, -2}, + T{3, 3, 3, 3}, 3); + math_test_2_minmag(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, + T{-3, -3, -3, -3}, 2); + math_test_2_remainder(deviceQueue, T{-1, -1, -1, -1}, T{5, 5, 5, 5}, + T{2, 2, 2, 2}, 1); + math_test_2_fdim(deviceQueue, T{-1, -1, -1, -1}, T{3, 3, 3, 3}, T{3, 3, 3, 3}, + 0); + math_test_2_fmod(deviceQueue, T{-1, -1, -1, -1}, T{5, 5, 5, 5}, T{3, 3, 3, 3}, + 2); + math_test_2_nextafter(deviceQueue, T{-1, -1, -1, -1}, T{-0, -0, -0, -0}, + T{+0, +0, +0, +0}, 0); + math_test_3_fma(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + T{1, 1, 1, 1}, 5); + math_test_3_mad(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + T{1, 1, 1, 1}, 5); + math_test_3_mix(deviceQueue, T{-1, -1, -1, -1}, T{3, 3, 3, 3}, T{5, 5, 5, 5}, + T{0.5, 0.5, 0.5, 0.5}, 4); +} -#define TESTS_3(TYPE) \ - math_test_tan(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_tanh(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_cos(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ - math_test_sin(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_cosh(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ - math_test_sinh(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_acos(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 0); \ - math_test_acosh(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 0); \ - math_test_asin(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_asinh(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_cbrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 1); \ - math_test_atan(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_atanh(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_exp(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ - math_test_exp2(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, 4); \ - math_test_exp10(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, 100); \ - math_test_expm1(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_ceil(deviceQueue, TYPE{-1, -1, -1}, TYPE{0.6, 0.6, 0.6}, 1); \ - math_test_tgamma(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 1); \ - math_test_lgamma(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 0); \ - math_test_erf(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_erfc(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ - math_test_log(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 0); \ - math_test_log2(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, 2); \ - math_test_log10(deviceQueue, TYPE{-1, -1, -1}, TYPE{100, 100, 100}, 2); \ - math_test_log1p(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - math_test_logb(deviceQueue, TYPE{-1, -1, -1}, TYPE{1.1, 1.1, 1.1}, 0); \ - math_test_sqrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, 2); \ - math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{0.25, 0.25, 0.25}, 2); \ - math_test_rint(deviceQueue, TYPE{-1, -1, -1}, TYPE{2.9, 2.9, 2.9}, 3); \ - math_test_round(deviceQueue, TYPE{-1, -1, -1}, TYPE{0.5, 0.5, 0.5}, 1); \ - math_test_trunc(deviceQueue, TYPE{-1, -1, -1}, TYPE{1.9, 1.9, 1.9}, 1); +template void math_tests_3(queue &deviceQueue) { + math_test_tan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_tanh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_sin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_cosh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_sinh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_acos(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_acosh(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_asin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_asinh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_cbrt(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 1); + math_test_atan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_atanh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_exp2(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 4); + math_test_exp10(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 100); + math_test_expm1(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_ceil(deviceQueue, T{-1, -1, -1}, T{0.6, 0.6, 0.6}, 1); + math_test_tgamma(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 1); + math_test_lgamma(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_erf(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_erfc(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); + math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100}, 2); + math_test_log1p(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_logb(deviceQueue, T{-1, -1, -1}, T{1.1, 1.1, 1.1}, 0); + math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); + math_test_rsqrt(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 2); + math_test_rint(deviceQueue, T{-1, -1, -1}, T{2.9, 2.9, 2.9}, 3); + math_test_round(deviceQueue, T{-1, -1, -1}, T{0.5, 0.5, 0.5}, 1); + math_test_trunc(deviceQueue, T{-1, -1, -1}, T{1.9, 1.9, 1.9}, 1); +} int main() { queue deviceQueue; + math_tests_4(deviceQueue); + math_tests_4(deviceQueue); + math_tests_4>(deviceQueue); + math_tests_4>(deviceQueue); - TESTS_4(float4) - TESTS_4(double4) - TESTS_4(marray) - TESTS_4(marray) - - TESTS_3(float3) - TESTS_3(double3) - TESTS_3(marray) - TESTS_3(marray) + math_tests_3(deviceQueue); + math_tests_3(deviceQueue); + math_tests_3>(deviceQueue); + math_tests_3>(deviceQueue); if (deviceQueue.get_device().has(sycl::aspect::fp16)) { - TESTS_4(half4) - TESTS_4(marray) - TESTS_3(half3) - TESTS_3(marray) + math_tests_4(deviceQueue); + math_tests_4>(deviceQueue); + math_tests_3(deviceQueue); + math_tests_3>(deviceQueue); } std::cout << "Pass" << std::endl; diff --git a/SYCL/DeviceLib/native_math_test_marray_vec.cpp b/SYCL/DeviceLib/native_math_test_marray_vec.cpp index e6708f3779..f138ced366 100644 --- a/SYCL/DeviceLib/native_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/native_math_test_marray_vec.cpp @@ -29,8 +29,6 @@ template bool checkEqual(marray A, size_t B) { return true; } -#define COMMA , - #define NATIVE_OPERATOR(NAME) \ template \ void native_math_test_##NAME(queue &deviceQueue, T result, T input, \ @@ -94,62 +92,53 @@ NATIVE_OPERATOR_2(powr) #undef NATIVE_OPERATOR_2 -#define NATIVE_TESTS_3(TYPE) \ - native_math_test_sin(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - native_math_test_tan(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 0); \ - native_math_test_cos(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ - native_math_test_exp(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, 1); \ - native_math_test_exp2(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, 4); \ - native_math_test_log(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, 0); \ - native_math_test_log2(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, 2); \ - native_math_test_log10(deviceQueue, TYPE{-1, -1, -1}, TYPE{100, 100, 100}, \ - 2); \ - native_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, 2); \ - native_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1}, \ - TYPE{0.25, 0.25, 0.25}, 2); \ - native_math_test_recip(deviceQueue, TYPE{-1, -1, -1}, \ - TYPE{0.25, 0.25, 0.25}, 4); \ - native_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, \ - TYPE{2, 2, 2}, 4); \ - native_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, \ - TYPE{2, 2, 2}, 2); - -#define NATIVE_TESTS_4(TYPE) \ - native_math_test_sin(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ - 0); \ - native_math_test_tan(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ - 0); \ - native_math_test_cos(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ - 1); \ - native_math_test_exp(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{0, 0, 0, 0}, \ - 1); \ - native_math_test_exp2(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ - 4); \ - native_math_test_log(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{1, 1, 1, 1}, \ - 0); \ - native_math_test_log2(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{4, 4, 4, 4}, \ - 2); \ - native_math_test_log10(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{100, 100, 100, 100}, 2); \ - native_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{4, 4, 4, 4}, \ - 2); \ - native_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{0.25, 0.25, 0.25, 0.25}, 2); \ - native_math_test_recip(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{0.25, 0.25, 0.25, 0.25}, 4); \ - native_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1, -1}, TYPE{2, 2, 2, 2}, \ - TYPE{2, 2, 2, 2}, 4); \ - native_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1, -1}, \ - TYPE{4, 4, 4, 4}, TYPE{2, 2, 2, 2}, 2); +template void native_tests_3(queue &deviceQueue) { + native_math_test_sin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + native_math_test_tan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + native_math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + native_math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + native_math_test_exp2(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 4); + native_math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + native_math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); + native_math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100}, 2); + native_math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); + native_math_test_rsqrt(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 2); + native_math_test_recip(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 4); + native_math_test_2_powr(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, T{2, 2, 2}, + 4); + native_math_test_2_divide(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, T{2, 2, 2}, + 2); +} + +template void native_tests_4(queue &deviceQueue) { + native_math_test_sin(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + native_math_test_tan(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + native_math_test_cos(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + native_math_test_exp(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + native_math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 4); + native_math_test_log(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + native_math_test_log2(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, 2); + native_math_test_log10(deviceQueue, T{-1, -1, -1, -1}, T{100, 100, 100, 100}, + 2); + native_math_test_sqrt(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, 2); + native_math_test_rsqrt(deviceQueue, T{-1, -1, -1, -1}, + T{0.25, 0.25, 0.25, 0.25}, 2); + native_math_test_recip(deviceQueue, T{-1, -1, -1, -1}, + T{0.25, 0.25, 0.25, 0.25}, 4); + native_math_test_2_powr(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, + T{2, 2, 2, 2}, 4); + native_math_test_2_divide(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, + T{2, 2, 2, 2}, 2); +} int main() { queue deviceQueue; - NATIVE_TESTS_3(float3) - NATIVE_TESTS_3(marray) + native_tests_3(deviceQueue); + native_tests_3>(deviceQueue); - NATIVE_TESTS_4(float4) - NATIVE_TESTS_4(marray) + native_tests_4(deviceQueue); + native_tests_4>(deviceQueue); std::cout << "Pass" << std::endl; return 0; From b589c329071b05c4f393f8767092d16573a9dacf Mon Sep 17 00:00:00 2001 From: "jack.kirk" Date: Wed, 11 May 2022 14:33:38 +0100 Subject: [PATCH 03/20] format Signed-off-by: jack.kirk --- SYCL/Basic/half_builtins.cpp | 52 ++++++++++---------- SYCL/DeviceLib/built-ins/ext_native_math.cpp | 32 ++++++------ 2 files changed, 41 insertions(+), 43 deletions(-) diff --git a/SYCL/Basic/half_builtins.cpp b/SYCL/Basic/half_builtins.cpp index e4f47695e1..f26e58494d 100644 --- a/SYCL/Basic/half_builtins.cpp +++ b/SYCL/Basic/half_builtins.cpp @@ -167,33 +167,33 @@ template bool check(vec a, vec b) { int main() { queue q; if (q.get_device().has(sycl::aspect::fp16)) { - float16 a, b, c, d; - for (int i = 0; i < SZ_max; i++) { - a[i] = i / (float)SZ_max; - b[i] = (SZ_max - i) / (float)SZ_max; - c[i] = (float)(3 * i); - } - int err = 0; - { - buffer a_buf(&a, 1); - buffer b_buf(&b, 1); - buffer c_buf(&c, 1); - buffer err_buf(&err, 1); - q.submit([&](handler &cgh) { - auto A = a_buf.get_access(cgh); - auto B = b_buf.get_access(cgh); - auto C = c_buf.get_access(cgh); - auto err = err_buf.get_access(cgh); - cgh.parallel_for(SZ_max, [=](item<1> index) { - size_t i = index.get_id(0); - TEST_BUILTIN_1(fabs); - TEST_BUILTIN_2(fmin); - TEST_BUILTIN_2(fmax); - TEST_BUILTIN_3(fma); + float16 a, b, c, d; + for (int i = 0; i < SZ_max; i++) { + a[i] = i / (float)SZ_max; + b[i] = (SZ_max - i) / (float)SZ_max; + c[i] = (float)(3 * i); + } + int err = 0; + { + buffer a_buf(&a, 1); + buffer b_buf(&b, 1); + buffer c_buf(&c, 1); + buffer err_buf(&err, 1); + q.submit([&](handler &cgh) { + auto A = a_buf.get_access(cgh); + auto B = b_buf.get_access(cgh); + auto C = c_buf.get_access(cgh); + auto err = err_buf.get_access(cgh); + cgh.parallel_for(SZ_max, [=](item<1> index) { + size_t i = index.get_id(0); + TEST_BUILTIN_1(fabs); + TEST_BUILTIN_2(fmin); + TEST_BUILTIN_2(fmax); + TEST_BUILTIN_3(fma); + }); }); - }); + } + assert(err == 0); } - assert(err == 0); -} return 0; } diff --git a/SYCL/DeviceLib/built-ins/ext_native_math.cpp b/SYCL/DeviceLib/built-ins/ext_native_math.cpp index 913f51070e..5f2d7e36d1 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math.cpp @@ -91,19 +91,18 @@ int main() { {tu[0], tu[1]}); native_tanh_tester( q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); - + native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, {tl[0], tl[1], tl[2], tl[3]}, {tu[0], tu[1], tu[2], tu[3]}); - native_tanh_tester>(q, {tv[0], tv[1], tv[2]}, - {tl[0], tl[1], tl[2]}, - {tu[0], tu[1], tu[2]}); + native_tanh_tester>( + q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); native_tanh_tester( q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, @@ -129,9 +128,9 @@ int main() { native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, {tl[0], tl[1], tl[2], tl[3]}, {tu[0], tu[1], tu[2], tu[3]}); - native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester>( + q, {tv[0], tv[1], tv[2], tv[3]}, {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); native_tanh_tester( q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, @@ -160,12 +159,11 @@ int main() { native_exp2_tester(q, {ev[0], ev[1], ev[2], ev[3]}, {el[0], el[1], el[2], el[3]}, {eu[0], eu[1], eu[2], eu[3]}); - native_exp2_tester>(q, {ev[0], ev[1], ev[2]}, - {el[0], el[1], el[2]}, - {eu[0], eu[1], eu[2]}); - native_exp2_tester>(q, {ev[0], ev[1], ev[2], ev[3]}, - {el[0], el[1], el[2], el[3]}, - {eu[0], eu[1], eu[2], eu[3]}); + native_exp2_tester>( + q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]}); + native_exp2_tester>( + q, {ev[0], ev[1], ev[2], ev[3]}, {el[0], el[1], el[2], el[3]}, + {eu[0], eu[1], eu[2], eu[3]}); native_exp2_tester( q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]}, {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]}, From ab479c843b77016acf0863ecc70eb0c0f367f658 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 16 Jun 2022 09:04:32 +0100 Subject: [PATCH 04/20] split fp16 cases into new files. Signed-off-by: JackAKirk --- SYCL/DeviceLib/built-ins/ext_native_math.cpp | 210 +++----------- .../built-ins/ext_native_math_common.hpp | 67 +++++ .../built-ins/ext_native_math_fp16.cpp | 94 +++++++ SYCL/DeviceLib/math_test_marray_vec.cpp | 259 +----------------- .../DeviceLib/math_test_marray_vec_common.hpp | 250 +++++++++++++++++ SYCL/DeviceLib/math_test_marray_vec_fp16.cpp | 27 ++ 6 files changed, 481 insertions(+), 426 deletions(-) create mode 100644 SYCL/DeviceLib/built-ins/ext_native_math_common.hpp create mode 100644 SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp create mode 100644 SYCL/DeviceLib/math_test_marray_vec_common.hpp create mode 100644 SYCL/DeviceLib/math_test_marray_vec_fp16.cpp diff --git a/SYCL/DeviceLib/built-ins/ext_native_math.cpp b/SYCL/DeviceLib/built-ins/ext_native_math.cpp index 5f2d7e36d1..fa24addd0e 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math.cpp @@ -1,182 +1,52 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// OpenCL CPU driver does not support cl_khr_fp16 extension for this reason this -// test is compiled with the -fsycl-device-code-split flag +// tests oneapi extension native tanh math function for sycl::vec and +// sycl::marray float cases. -#include -#include - -template -void assert_out_of_bound(sycl::marray val, sycl::marray lower, - sycl::marray upper) { - for (int i = 0; i < N; i++) { - assert(lower[i] < val[i] && val[i] < upper[i]); - } -} - -template void assert_out_of_bound(T val, T lower, T upper) { - assert(sycl::all(lower < val && val < upper)); -} - -template <> -void assert_out_of_bound(float val, float lower, float upper) { - assert(lower < val && val < upper); -} - -template <> -void assert_out_of_bound(sycl::half val, sycl::half lower, - sycl::half upper) { - assert(lower < val && val < upper); -} - -template -void native_tanh_tester(sycl::queue q, T val, T up, T lo) { - T r = val; - -#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH - { - sycl::buffer BufR(&r, sycl::range<1>(1)); - q.submit([&](sycl::handler &cgh) { - auto AccR = BufR.template get_access(cgh); - cgh.single_task([=]() { - AccR[0] = sycl::ext::oneapi::experimental::native::tanh(AccR[0]); - }); - }); - } - - assert_out_of_bound(r, up, lo); -#else - assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); -#endif -} - -template -void native_exp2_tester(sycl::queue q, T val, T up, T lo) { - T r = val; - -#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH - { - sycl::buffer BufR(&r, sycl::range<1>(1)); - q.submit([&](sycl::handler &cgh) { - auto AccR = BufR.template get_access(cgh); - cgh.single_task([=]() { - AccR[0] = sycl::ext::oneapi::experimental::native::exp2(AccR[0]); - }); - }); - } - - assert_out_of_bound(r, up, lo); -#else - assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); -#endif -} +#include "ext_native_math_common.hpp" int main() { - sycl::queue q; - - const double tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, - -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; - const double tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, - -0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; - const double tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, - -0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; - - native_tanh_tester(q, tv[0], tl[0], tu[0]); - native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, - {tu[0], tu[1]}); - native_tanh_tester( - q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); - - native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); - native_tanh_tester>( - q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); - native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); - native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); - native_tanh_tester( - q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, - {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, - {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); - native_tanh_tester( - q, - {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], - tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, - {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], - tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, - {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], - tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); - - if (q.get_device().has(sycl::aspect::fp16)) { - - native_tanh_tester(q, tv[0], tl[0], tu[0]); - native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, - {tu[0], tu[1]}); - native_tanh_tester( - q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); - native_tanh_tester>( - q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); - native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); - native_tanh_tester>( - q, {tv[0], tv[1], tv[2], tv[3]}, {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); - native_tanh_tester( - q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, - {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, - {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); - native_tanh_tester( - q, - {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], - tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, - {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], - tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, - {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], - tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); - - const double ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, - -2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0}; - const double el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9, - 0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9}; - const double eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1, - 0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1}; - - native_exp2_tester(q, ev[0], el[0], eu[0]); - native_exp2_tester(q, {ev[0], ev[1]}, {el[0], el[1]}, - {eu[0], eu[1]}); - native_exp2_tester( - q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]}); - native_exp2_tester(q, {ev[0], ev[1], ev[2], ev[3]}, - {el[0], el[1], el[2], el[3]}, - {eu[0], eu[1], eu[2], eu[3]}); - native_exp2_tester>( - q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]}); - native_exp2_tester>( - q, {ev[0], ev[1], ev[2], ev[3]}, {el[0], el[1], el[2], el[3]}, - {eu[0], eu[1], eu[2], eu[3]}); - native_exp2_tester( - q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]}, - {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]}, - {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]}); - native_exp2_tester( - q, - {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9], - ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]}, - {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9], - el[10], el[11], el[12], el[13], el[14], el[15]}, - {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9], - eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]}); - } +sycl::queue q; + +const float tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; +const float tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, + -0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; +const float tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, + -0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; + +native_tanh_tester(q, tv[0], tl[0], tu[0]); +native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, + {tu[0], tu[1]}); +native_tanh_tester(q, {tv[0], tv[1], tv[2]}, + {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + +native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); +native_tanh_tester>(q, {tv[0], tv[1], tv[2]}, + {tl[0], tl[1], tl[2]}, + {tu[0], tu[1], tu[2]}); +native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); +native_tanh_tester( + q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); +native_tanh_tester( + q, + {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], + tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], + tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], + tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); return 0; } diff --git a/SYCL/DeviceLib/built-ins/ext_native_math_common.hpp b/SYCL/DeviceLib/built-ins/ext_native_math_common.hpp new file mode 100644 index 0000000000..1ac9203537 --- /dev/null +++ b/SYCL/DeviceLib/built-ins/ext_native_math_common.hpp @@ -0,0 +1,67 @@ +#include +#include + +template +void assert_out_of_bound(sycl::marray val, sycl::marray lower, + sycl::marray upper) { + for (int i = 0; i < N; i++) { + assert(lower[i] < val[i] && val[i] < upper[i]); + } +} + +template void assert_out_of_bound(T val, T lower, T upper) { + assert(sycl::all(lower < val && val < upper)); +} + +template <> +void assert_out_of_bound(float val, float lower, float upper) { + assert(lower < val && val < upper); +} + +template <> +void assert_out_of_bound(sycl::half val, sycl::half lower, + sycl::half upper) { + assert(lower < val && val < upper); +} + +template +void native_tanh_tester(sycl::queue q, T val, T up, T lo) { + T r = val; + +#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH + { + sycl::buffer BufR(&r, sycl::range<1>(1)); + q.submit([&](sycl::handler &cgh) { + auto AccR = BufR.template get_access(cgh); + cgh.single_task([=]() { + AccR[0] = sycl::ext::oneapi::experimental::native::tanh(AccR[0]); + }); + }); + } + + assert_out_of_bound(r, up, lo); +#else + assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); +#endif +} + +template +void native_exp2_tester(sycl::queue q, T val, T up, T lo) { + T r = val; + +#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH + { + sycl::buffer BufR(&r, sycl::range<1>(1)); + q.submit([&](sycl::handler &cgh) { + auto AccR = BufR.template get_access(cgh); + cgh.single_task([=]() { + AccR[0] = sycl::ext::oneapi::experimental::native::exp2(AccR[0]); + }); + }); + } + + assert_out_of_bound(r, up, lo); +#else + assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); +#endif +} diff --git a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp new file mode 100644 index 0000000000..de52fadd5a --- /dev/null +++ b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp @@ -0,0 +1,94 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// OpenCL CPU driver does not support cl_khr_fp16 extension for this reason this +// test is compiled with the -fsycl-device-code-split flag + +// tests oneapi extension native math functions for sycl::vec and sycl::marray +// fp16 cases. + +#include "ext_native_math_common.hpp" + +int main() { + + if (!q.get_device().has(sycl::aspect::fp16)) { + std::cout << "skipping fp16 tests: requires fp16 device aspect." + << std::endl; + return 0; + } + +sycl::queue q; + +const half tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; +const half tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, + -0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; +const half tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, + -0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; + +native_tanh_tester(q, tv[0], tl[0], tu[0]); +native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, + {tu[0], tu[1]}); +native_tanh_tester(q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, + {tu[0], tu[1], tu[2]}); +native_tanh_tester>(q, {tv[0], tv[1], tv[2]}, + {tl[0], tl[1], tl[2]}, + {tu[0], tu[1], tu[2]}); +native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); +native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); +native_tanh_tester( + q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); +native_tanh_tester( + q, + {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], + tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], + tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], + tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); + +const half ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0}; +const half el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9, + 0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9}; +const half eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1, + 0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1}; + +native_exp2_tester(q, ev[0], el[0], eu[0]); +native_exp2_tester(q, {ev[0], ev[1]}, {el[0], el[1]}, + {eu[0], eu[1]}); +native_exp2_tester(q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, + {eu[0], eu[1], eu[2]}); +native_exp2_tester(q, {ev[0], ev[1], ev[2], ev[3]}, + {el[0], el[1], el[2], el[3]}, + {eu[0], eu[1], eu[2], eu[3]}); +native_exp2_tester>(q, {ev[0], ev[1], ev[2]}, + {el[0], el[1], el[2]}, + {eu[0], eu[1], eu[2]}); +native_exp2_tester>(q, {ev[0], ev[1], ev[2], ev[3]}, + {el[0], el[1], el[2], el[3]}, + {eu[0], eu[1], eu[2], eu[3]}); +native_exp2_tester( + q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]}, + {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]}, + {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]}); +native_exp2_tester( + q, + {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9], + ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]}, + {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9], + el[10], el[11], el[12], el[13], el[14], el[15]}, + {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9], + eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]}); + + return 0; +} diff --git a/SYCL/DeviceLib/math_test_marray_vec.cpp b/SYCL/DeviceLib/math_test_marray_vec.cpp index 17aec48570..de054a2c1f 100644 --- a/SYCL/DeviceLib/math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/math_test_marray_vec.cpp @@ -4,256 +4,10 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -#include +// tests sycl floating point math functions for sycl::vec and sycl::marray float +// and double cases. -using namespace sycl; - -template class TypeHelper; - -template bool checkEqual(vec A, size_t B) { - T TB = B; - return A.x() == TB && A.y() == TB && A.z() == TB; -} - -template bool checkEqual(vec A, size_t B) { - T TB = B; - return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB; -} - -template bool checkEqual(marray A, size_t B) { - for (int i = 0; i < N; i++) { - if (A[i] != B) { - return false; - } - } - return true; -} - -#define OPERATOR(NAME) \ - template \ - void math_test_##NAME(queue &deviceQueue, T result, T input, size_t ref) { \ - { \ - buffer buffer1(&result, 1); \ - buffer buffer2(&input, 1); \ - deviceQueue.submit([&](handler &cgh) { \ - accessor res_access( \ - buffer1, cgh); \ - accessor input_access( \ - buffer2, cgh); \ - cgh.single_task>( \ - [=]() { res_access[0] = NAME(input_access[0]); }); \ - }); \ - } \ - assert(checkEqual(result, ref)); \ - } - -OPERATOR(cos) -OPERATOR(cospi) -OPERATOR(sin) -OPERATOR(sinpi) -OPERATOR(cosh) -OPERATOR(sinh) -OPERATOR(tan) -OPERATOR(tanpi) -OPERATOR(atan) -OPERATOR(atanpi) -OPERATOR(tanh) -OPERATOR(acos) -OPERATOR(acospi) -OPERATOR(asin) -OPERATOR(asinpi) -OPERATOR(acosh) -OPERATOR(asinh) -OPERATOR(atanh) -OPERATOR(cbrt) -OPERATOR(ceil) -OPERATOR(exp) -OPERATOR(exp2) -OPERATOR(exp10) -OPERATOR(expm1) -OPERATOR(tgamma) -OPERATOR(lgamma) -OPERATOR(erf) -OPERATOR(erfc) -OPERATOR(log) -OPERATOR(log2) -OPERATOR(log10) -OPERATOR(log1p) -OPERATOR(logb) -OPERATOR(sqrt) -OPERATOR(rsqrt) -OPERATOR(rint) -OPERATOR(round) -OPERATOR(trunc) - -#undef OPERATOR - -#define OPERATOR_2(NAME) \ - template \ - void math_test_2_##NAME(queue &deviceQueue, T result, T input1, T input2, \ - size_t ref) { \ - { \ - buffer buffer1(&result, 1); \ - buffer buffer2(&input1, 1); \ - buffer buffer3(&input2, 1); \ - deviceQueue.submit([&](handler &cgh) { \ - accessor res_access( \ - buffer1, cgh); \ - accessor input1_access( \ - buffer2, cgh); \ - accessor input2_access( \ - buffer3, cgh); \ - cgh.single_task>([=]() { \ - res_access[0] = NAME(input1_access[0], input2_access[0]); \ - }); \ - }); \ - } \ - assert(checkEqual(result, ref)); \ - } - -OPERATOR_2(pow) -OPERATOR_2(powr) -OPERATOR_2(atan2) -OPERATOR_2(atan2pi) -OPERATOR_2(copysign) -OPERATOR_2(fdim) -OPERATOR_2(fmin) -OPERATOR_2(fmax) -OPERATOR_2(fmod) -OPERATOR_2(hypot) -OPERATOR_2(maxmag) -OPERATOR_2(minmag) -OPERATOR_2(nextafter) -OPERATOR_2(remainder) - -#undef OPERATOR_2 - -#define OPERATOR_3(NAME) \ - template \ - void math_test_3_##NAME(queue &deviceQueue, T result, T input1, T input2, \ - T input3, size_t ref) { \ - { \ - buffer buffer1(&result, 1); \ - buffer buffer2(&input1, 1); \ - buffer buffer3(&input2, 1); \ - buffer buffer4(&input3, 1); \ - deviceQueue.submit([&](handler &cgh) { \ - accessor res_access( \ - buffer1, cgh); \ - accessor input1_access( \ - buffer2, cgh); \ - accessor input2_access( \ - buffer3, cgh); \ - accessor input3_access( \ - buffer4, cgh); \ - cgh.single_task>([=]() { \ - res_access[0] = \ - NAME(input1_access[0], input2_access[0], input3_access[0]); \ - }); \ - }); \ - } \ - assert(checkEqual(result, ref)); \ - } - -OPERATOR_3(mad) -OPERATOR_3(mix) -OPERATOR_3(fma) - -#undef OPERATOR_3 - -template void math_tests_4(queue &deviceQueue) { - math_test_tanh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - math_test_cosh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); - math_test_sinh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - math_test_acos(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); - math_test_acospi(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); - math_test_acosh(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); - math_test_asin(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - math_test_asinpi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - math_test_asinh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - math_test_cbrt(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 1); - math_test_atan(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - math_test_atanpi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - math_test_atanh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - math_test_exp(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); - math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 4); - math_test_exp10(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 100); - math_test_expm1(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - math_test_ceil(deviceQueue, T{-1, -1, -1, -1}, T{0.6, 0.6, 0.6, 0.6}, 1); - math_test_tgamma(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 1); - math_test_lgamma(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); - math_test_erf(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - math_test_erfc(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); - math_test_2_pow(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, - 4); - math_test_2_powr(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, - 4); - math_test_2_atan2(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, - T{2, 2, 2, 2}, 0); - math_test_2_atan2pi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, - T{2, 2, 2, 2}, 0); - math_test_2_copysign(deviceQueue, T{-1, -1, -1, -1}, T{-3, -3, -3, -3}, - T{2, 2, 2, 2}, 3); - math_test_2_fmin(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{3, 3, 3, 3}, - 2); - math_test_2_fmax(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{3, 3, 3, 3}, - 3); - math_test_2_hypot(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, - T{3, 3, 3, 3}, 5); - math_test_2_maxmag(deviceQueue, T{-1, -1, -1, -1}, T{-2, -2, -2, -2}, - T{3, 3, 3, 3}, 3); - math_test_2_minmag(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, - T{-3, -3, -3, -3}, 2); - math_test_2_remainder(deviceQueue, T{-1, -1, -1, -1}, T{5, 5, 5, 5}, - T{2, 2, 2, 2}, 1); - math_test_2_fdim(deviceQueue, T{-1, -1, -1, -1}, T{3, 3, 3, 3}, T{3, 3, 3, 3}, - 0); - math_test_2_fmod(deviceQueue, T{-1, -1, -1, -1}, T{5, 5, 5, 5}, T{3, 3, 3, 3}, - 2); - math_test_2_nextafter(deviceQueue, T{-1, -1, -1, -1}, T{-0, -0, -0, -0}, - T{+0, +0, +0, +0}, 0); - math_test_3_fma(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, - T{1, 1, 1, 1}, 5); - math_test_3_mad(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, - T{1, 1, 1, 1}, 5); - math_test_3_mix(deviceQueue, T{-1, -1, -1, -1}, T{3, 3, 3, 3}, T{5, 5, 5, 5}, - T{0.5, 0.5, 0.5, 0.5}, 4); -} - -template void math_tests_3(queue &deviceQueue) { - math_test_tan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_tanh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); - math_test_sin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_cosh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); - math_test_sinh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_acos(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); - math_test_acosh(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); - math_test_asin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_asinh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_cbrt(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 1); - math_test_atan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_atanh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); - math_test_exp2(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 4); - math_test_exp10(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 100); - math_test_expm1(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_ceil(deviceQueue, T{-1, -1, -1}, T{0.6, 0.6, 0.6}, 1); - math_test_tgamma(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 1); - math_test_lgamma(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); - math_test_erf(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_erfc(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); - math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); - math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); - math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100}, 2); - math_test_log1p(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - math_test_logb(deviceQueue, T{-1, -1, -1}, T{1.1, 1.1, 1.1}, 0); - math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); - math_test_rsqrt(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 2); - math_test_rint(deviceQueue, T{-1, -1, -1}, T{2.9, 2.9, 2.9}, 3); - math_test_round(deviceQueue, T{-1, -1, -1}, T{0.5, 0.5, 0.5}, 1); - math_test_trunc(deviceQueue, T{-1, -1, -1}, T{1.9, 1.9, 1.9}, 1); -} +#include "math_test_marray_vec_common.hpp" int main() { queue deviceQueue; @@ -267,13 +21,6 @@ int main() { math_tests_3>(deviceQueue); math_tests_3>(deviceQueue); - if (deviceQueue.get_device().has(sycl::aspect::fp16)) { - math_tests_4(deviceQueue); - math_tests_4>(deviceQueue); - math_tests_3(deviceQueue); - math_tests_3>(deviceQueue); - } - std::cout << "Pass" << std::endl; return 0; } diff --git a/SYCL/DeviceLib/math_test_marray_vec_common.hpp b/SYCL/DeviceLib/math_test_marray_vec_common.hpp new file mode 100644 index 0000000000..cc4b440448 --- /dev/null +++ b/SYCL/DeviceLib/math_test_marray_vec_common.hpp @@ -0,0 +1,250 @@ +#include + +using namespace sycl; + +template class TypeHelper; + +template bool checkEqual(vec A, size_t B) { + T TB = B; + return A.x() == TB && A.y() == TB && A.z() == TB; +} + +template bool checkEqual(vec A, size_t B) { + T TB = B; + return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB; +} + +template bool checkEqual(marray A, size_t B) { + for (int i = 0; i < N; i++) { + if (A[i] != B) { + return false; + } + } + return true; +} + +#define OPERATOR(NAME) \ + template \ + void math_test_##NAME(queue &deviceQueue, T result, T input, size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input_access( \ + buffer2, cgh); \ + cgh.single_task>( \ + [=]() { res_access[0] = NAME(input_access[0]); }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +OPERATOR(cos) +OPERATOR(cospi) +OPERATOR(sin) +OPERATOR(sinpi) +OPERATOR(cosh) +OPERATOR(sinh) +OPERATOR(tan) +OPERATOR(tanpi) +OPERATOR(atan) +OPERATOR(atanpi) +OPERATOR(tanh) +OPERATOR(acos) +OPERATOR(acospi) +OPERATOR(asin) +OPERATOR(asinpi) +OPERATOR(acosh) +OPERATOR(asinh) +OPERATOR(atanh) +OPERATOR(cbrt) +OPERATOR(ceil) +OPERATOR(exp) +OPERATOR(exp2) +OPERATOR(exp10) +OPERATOR(expm1) +OPERATOR(tgamma) +OPERATOR(lgamma) +OPERATOR(erf) +OPERATOR(erfc) +OPERATOR(log) +OPERATOR(log2) +OPERATOR(log10) +OPERATOR(log1p) +OPERATOR(logb) +OPERATOR(sqrt) +OPERATOR(rsqrt) +OPERATOR(rint) +OPERATOR(round) +OPERATOR(trunc) + +#undef OPERATOR + +#define OPERATOR_2(NAME) \ + template \ + void math_test_2_##NAME(queue &deviceQueue, T result, T input1, T input2, \ + size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input1, 1); \ + buffer buffer3(&input2, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input1_access( \ + buffer2, cgh); \ + accessor input2_access( \ + buffer3, cgh); \ + cgh.single_task>([=]() { \ + res_access[0] = NAME(input1_access[0], input2_access[0]); \ + }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +OPERATOR_2(pow) +OPERATOR_2(powr) +OPERATOR_2(atan2) +OPERATOR_2(atan2pi) +OPERATOR_2(copysign) +OPERATOR_2(fdim) +OPERATOR_2(fmin) +OPERATOR_2(fmax) +OPERATOR_2(fmod) +OPERATOR_2(hypot) +OPERATOR_2(maxmag) +OPERATOR_2(minmag) +OPERATOR_2(nextafter) +OPERATOR_2(remainder) + +#undef OPERATOR_2 + +#define OPERATOR_3(NAME) \ + template \ + void math_test_3_##NAME(queue &deviceQueue, T result, T input1, T input2, \ + T input3, size_t ref) { \ + { \ + buffer buffer1(&result, 1); \ + buffer buffer2(&input1, 1); \ + buffer buffer3(&input2, 1); \ + buffer buffer4(&input3, 1); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_access( \ + buffer1, cgh); \ + accessor input1_access( \ + buffer2, cgh); \ + accessor input2_access( \ + buffer3, cgh); \ + accessor input3_access( \ + buffer4, cgh); \ + cgh.single_task>([=]() { \ + res_access[0] = \ + NAME(input1_access[0], input2_access[0], input3_access[0]); \ + }); \ + }); \ + } \ + assert(checkEqual(result, ref)); \ + } + +OPERATOR_3(mad) +OPERATOR_3(mix) +OPERATOR_3(fma) + +#undef OPERATOR_3 + +template void math_tests_4(queue &deviceQueue) { + math_test_tanh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_cosh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + math_test_sinh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_acos(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_acospi(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_acosh(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_asin(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_asinpi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_asinh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_cbrt(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 1); + math_test_atan(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_atanpi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_atanh(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_exp(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 4); + math_test_exp10(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 100); + math_test_expm1(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_ceil(deviceQueue, T{-1, -1, -1, -1}, T{0.6, 0.6, 0.6, 0.6}, 1); + math_test_tgamma(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 1); + math_test_lgamma(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); + math_test_erf(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); + math_test_erfc(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); + math_test_2_pow(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + 4); + math_test_2_powr(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + 4); + math_test_2_atan2(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, + T{2, 2, 2, 2}, 0); + math_test_2_atan2pi(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, + T{2, 2, 2, 2}, 0); + math_test_2_copysign(deviceQueue, T{-1, -1, -1, -1}, T{-3, -3, -3, -3}, + T{2, 2, 2, 2}, 3); + math_test_2_fmin(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{3, 3, 3, 3}, + 2); + math_test_2_fmax(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{3, 3, 3, 3}, + 3); + math_test_2_hypot(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, + T{3, 3, 3, 3}, 5); + math_test_2_maxmag(deviceQueue, T{-1, -1, -1, -1}, T{-2, -2, -2, -2}, + T{3, 3, 3, 3}, 3); + math_test_2_minmag(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, + T{-3, -3, -3, -3}, 2); + math_test_2_remainder(deviceQueue, T{-1, -1, -1, -1}, T{5, 5, 5, 5}, + T{2, 2, 2, 2}, 1); + math_test_2_fdim(deviceQueue, T{-1, -1, -1, -1}, T{3, 3, 3, 3}, T{3, 3, 3, 3}, + 0); + math_test_2_fmod(deviceQueue, T{-1, -1, -1, -1}, T{5, 5, 5, 5}, T{3, 3, 3, 3}, + 2); + math_test_2_nextafter(deviceQueue, T{-1, -1, -1, -1}, T{-0, -0, -0, -0}, + T{+0, +0, +0, +0}, 0); + math_test_3_fma(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + T{1, 1, 1, 1}, 5); + math_test_3_mad(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, T{2, 2, 2, 2}, + T{1, 1, 1, 1}, 5); + math_test_3_mix(deviceQueue, T{-1, -1, -1, -1}, T{3, 3, 3, 3}, T{5, 5, 5, 5}, + T{0.5, 0.5, 0.5, 0.5}, 4); +} + +template void math_tests_3(queue &deviceQueue) { + math_test_tan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_tanh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_sin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_cosh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_sinh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_acos(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_acosh(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_asin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_asinh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_cbrt(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 1); + math_test_atan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_atanh(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_exp2(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 4); + math_test_exp10(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 100); + math_test_expm1(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_ceil(deviceQueue, T{-1, -1, -1}, T{0.6, 0.6, 0.6}, 1); + math_test_tgamma(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 1); + math_test_lgamma(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_erf(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_erfc(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); + math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); + math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); + math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100}, 2); + math_test_log1p(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); + math_test_logb(deviceQueue, T{-1, -1, -1}, T{1.1, 1.1, 1.1}, 0); + math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); + math_test_rsqrt(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 2); + math_test_rint(deviceQueue, T{-1, -1, -1}, T{2.9, 2.9, 2.9}, 3); + math_test_round(deviceQueue, T{-1, -1, -1}, T{0.5, 0.5, 0.5}, 1); + math_test_trunc(deviceQueue, T{-1, -1, -1}, T{1.9, 1.9, 1.9}, 1); +} diff --git a/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp b/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp new file mode 100644 index 0000000000..71740ad335 --- /dev/null +++ b/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp @@ -0,0 +1,27 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// tests sycl floating point math functions for sycl::vec and sycl::marray fp16 +// cases. + +#include "math_test_marray_vec_common.hpp" + +int main() { + queue deviceQueue; + + if (!deviceQueue.get_device().has(sycl::aspect::fp16)) { + std::cout << "skipping fp16 tests: requires fp16 device aspect." + << std::endl; + return 0; + } + math_tests_4(deviceQueue); + math_tests_4>(deviceQueue); + math_tests_3(deviceQueue); + math_tests_3>(deviceQueue); + + std::cout << "Pass" << std::endl; + return 0; +} From 9e2394e99912cf102978cc77b039efd3324e8127 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 16 Jun 2022 09:52:43 +0100 Subject: [PATCH 05/20] fixed queue constructor mistake. Signed-off-by: JackAKirk --- .../DeviceLib/built-ins/ext_native_math_fp16.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp index de52fadd5a..f3b0b7623b 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp @@ -14,19 +14,19 @@ int main() { +sycl::queue q; + if (!q.get_device().has(sycl::aspect::fp16)) { std::cout << "skipping fp16 tests: requires fp16 device aspect." << std::endl; return 0; } -sycl::queue q; - -const half tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, +const sycl::half tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; -const half tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, +const sycl::half tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, -0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; -const half tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, +const sycl::half tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, -0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; native_tanh_tester(q, tv[0], tl[0], tu[0]); @@ -56,11 +56,11 @@ native_tanh_tester( {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); -const half ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, +const sycl::half ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, -2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0}; -const half el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9, +const sycl::half el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9, 0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9}; -const half eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1, +const sycl::half eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1, 0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1}; native_exp2_tester(q, ev[0], el[0], eu[0]); From 80e69940245e252f810d2cedd14c4d788ee86b81 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 16 Jun 2022 10:16:10 +0100 Subject: [PATCH 06/20] format Signed-off-by: JackAKirk --- SYCL/DeviceLib/built-ins/ext_native_math.cpp | 71 +++++----- .../built-ins/ext_native_math_common.hpp | 2 +- .../built-ins/ext_native_math_fp16.cpp | 130 +++++++++--------- 3 files changed, 101 insertions(+), 102 deletions(-) diff --git a/SYCL/DeviceLib/built-ins/ext_native_math.cpp b/SYCL/DeviceLib/built-ins/ext_native_math.cpp index fa24addd0e..d5952fd8cf 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math.cpp @@ -11,42 +11,41 @@ int main() { -sycl::queue q; - -const float tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, - -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; -const float tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, - -0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; -const float tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, - -0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; - -native_tanh_tester(q, tv[0], tl[0], tu[0]); -native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, - {tu[0], tu[1]}); -native_tanh_tester(q, {tv[0], tv[1], tv[2]}, - {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); - -native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); -native_tanh_tester>(q, {tv[0], tv[1], tv[2]}, - {tl[0], tl[1], tl[2]}, - {tu[0], tu[1], tu[2]}); -native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); -native_tanh_tester( - q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, - {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, - {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); -native_tanh_tester( - q, - {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], - tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, - {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], - tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, - {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], - tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); + sycl::queue q; + + const float tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; + const float tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, + -0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; + const float tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, + -0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; + + native_tanh_tester(q, tv[0], tl[0], tu[0]); + native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, + {tu[0], tu[1]}); + native_tanh_tester( + q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + + native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester>( + q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester( + q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); + native_tanh_tester( + q, + {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], + tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], + tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], + tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); return 0; } diff --git a/SYCL/DeviceLib/built-ins/ext_native_math_common.hpp b/SYCL/DeviceLib/built-ins/ext_native_math_common.hpp index 1ac9203537..7511f03206 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math_common.hpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math_common.hpp @@ -1,5 +1,5 @@ -#include #include +#include template void assert_out_of_bound(sycl::marray val, sycl::marray lower, diff --git a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp index f3b0b7623b..a590b900d8 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp @@ -14,7 +14,7 @@ int main() { -sycl::queue q; + sycl::queue q; if (!q.get_device().has(sycl::aspect::fp16)) { std::cout << "skipping fp16 tests: requires fp16 device aspect." @@ -22,73 +22,73 @@ sycl::queue q; return 0; } -const sycl::half tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, - -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; -const sycl::half tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1, - -0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; -const sycl::half tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1, - -0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; + const sycl::half tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; + const sycl::half tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, + 0.75, -0.1, -0.94, 0.92, -0.84, 0.82, + -1.0, 0.98, -1.10, 0.98}; + const sycl::half tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, + 0.77, 0.1, -0.92, 0.94, -0.82, 0.84, + -0.98, 1.00, -0.98, 1.10}; -native_tanh_tester(q, tv[0], tl[0], tu[0]); -native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, - {tu[0], tu[1]}); -native_tanh_tester(q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, - {tu[0], tu[1], tu[2]}); -native_tanh_tester>(q, {tv[0], tv[1], tv[2]}, - {tl[0], tl[1], tl[2]}, - {tu[0], tu[1], tu[2]}); -native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); -native_tanh_tester>(q, {tv[0], tv[1], tv[2], tv[3]}, - {tl[0], tl[1], tl[2], tl[3]}, - {tu[0], tu[1], tu[2], tu[3]}); -native_tanh_tester( - q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, - {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, - {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); -native_tanh_tester( - q, - {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], - tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, - {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], - tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, - {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], - tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); + native_tanh_tester(q, tv[0], tl[0], tu[0]); + native_tanh_tester(q, {tv[0], tv[1]}, {tl[0], tl[1]}, + {tu[0], tu[1]}); + native_tanh_tester(q, {tv[0], tv[1], tv[2]}, + {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + native_tanh_tester>( + q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); + native_tanh_tester(q, {tv[0], tv[1], tv[2], tv[3]}, + {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester>( + q, {tv[0], tv[1], tv[2], tv[3]}, {tl[0], tl[1], tl[2], tl[3]}, + {tu[0], tu[1], tu[2], tu[3]}); + native_tanh_tester( + q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); + native_tanh_tester( + q, + {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], + tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, + {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], + tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, + {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], + tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); -const sycl::half ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, - -2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0}; -const sycl::half el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9, - 0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9}; -const sycl::half eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1, - 0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1}; + const sycl::half ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, + -2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0}; + const sycl::half el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9, + 0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9}; + const sycl::half eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1, + 0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1}; -native_exp2_tester(q, ev[0], el[0], eu[0]); -native_exp2_tester(q, {ev[0], ev[1]}, {el[0], el[1]}, - {eu[0], eu[1]}); -native_exp2_tester(q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, - {eu[0], eu[1], eu[2]}); -native_exp2_tester(q, {ev[0], ev[1], ev[2], ev[3]}, - {el[0], el[1], el[2], el[3]}, - {eu[0], eu[1], eu[2], eu[3]}); -native_exp2_tester>(q, {ev[0], ev[1], ev[2]}, - {el[0], el[1], el[2]}, - {eu[0], eu[1], eu[2]}); -native_exp2_tester>(q, {ev[0], ev[1], ev[2], ev[3]}, - {el[0], el[1], el[2], el[3]}, - {eu[0], eu[1], eu[2], eu[3]}); -native_exp2_tester( - q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]}, - {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]}, - {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]}); -native_exp2_tester( - q, - {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9], - ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]}, - {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9], - el[10], el[11], el[12], el[13], el[14], el[15]}, - {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9], - eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]}); + native_exp2_tester(q, ev[0], el[0], eu[0]); + native_exp2_tester(q, {ev[0], ev[1]}, {el[0], el[1]}, + {eu[0], eu[1]}); + native_exp2_tester(q, {ev[0], ev[1], ev[2]}, + {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]}); + native_exp2_tester(q, {ev[0], ev[1], ev[2], ev[3]}, + {el[0], el[1], el[2], el[3]}, + {eu[0], eu[1], eu[2], eu[3]}); + native_exp2_tester>( + q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]}); + native_exp2_tester>( + q, {ev[0], ev[1], ev[2], ev[3]}, {el[0], el[1], el[2], el[3]}, + {eu[0], eu[1], eu[2], eu[3]}); + native_exp2_tester( + q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]}, + {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]}, + {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]}); + native_exp2_tester( + q, + {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9], + ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]}, + {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9], + el[10], el[11], el[12], el[13], el[14], el[15]}, + {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9], + eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]}); return 0; } From 1f0edf8cb176f671138c1ece9c5b2b8956737fd8 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 16 Jun 2022 11:01:14 +0100 Subject: [PATCH 07/20] use fp16 aspect in half_builtins.cpp Signed-off-by: JackAKirk --- SYCL/Basic/half_builtins.cpp | 62 ++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/SYCL/Basic/half_builtins.cpp b/SYCL/Basic/half_builtins.cpp index f26e58494d..e2e06f8e40 100644 --- a/SYCL/Basic/half_builtins.cpp +++ b/SYCL/Basic/half_builtins.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out // RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out @@ -166,34 +166,40 @@ template bool check(vec a, vec b) { int main() { queue q; - if (q.get_device().has(sycl::aspect::fp16)) { - float16 a, b, c, d; - for (int i = 0; i < SZ_max; i++) { - a[i] = i / (float)SZ_max; - b[i] = (SZ_max - i) / (float)SZ_max; - c[i] = (float)(3 * i); - } - int err = 0; - { - buffer a_buf(&a, 1); - buffer b_buf(&b, 1); - buffer c_buf(&c, 1); - buffer err_buf(&err, 1); - q.submit([&](handler &cgh) { - auto A = a_buf.get_access(cgh); - auto B = b_buf.get_access(cgh); - auto C = c_buf.get_access(cgh); - auto err = err_buf.get_access(cgh); - cgh.parallel_for(SZ_max, [=](item<1> index) { - size_t i = index.get_id(0); - TEST_BUILTIN_1(fabs); - TEST_BUILTIN_2(fmin); - TEST_BUILTIN_2(fmax); - TEST_BUILTIN_3(fma); - }); + + if (!q.get_device().has(sycl::aspect::fp16)) { + std::cout << "skipping fp16 tests: requires fp16 device aspect." + << std::endl; + return 0; + } + + float16 a, b, c, d; + for (int i = 0; i < SZ_max; i++) { + a[i] = i / (float)SZ_max; + b[i] = (SZ_max - i) / (float)SZ_max; + c[i] = (float)(3 * i); + } + int err = 0; + { + buffer a_buf(&a, 1); + buffer b_buf(&b, 1); + buffer c_buf(&c, 1); + buffer err_buf(&err, 1); + q.submit([&](handler &cgh) { + auto A = a_buf.get_access(cgh); + auto B = b_buf.get_access(cgh); + auto C = c_buf.get_access(cgh); + auto err = err_buf.get_access(cgh); + cgh.parallel_for(SZ_max, [=](item<1> index) { + size_t i = index.get_id(0); + TEST_BUILTIN_1(fabs); + TEST_BUILTIN_2(fmin); + TEST_BUILTIN_2(fmax); + TEST_BUILTIN_3(fma); }); - } - assert(err == 0); + }); } + assert(err == 0); + return 0; } From 499c642b9a85340749df98c34bf54dc7d7e26ef8 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 16 Jun 2022 11:09:30 +0100 Subject: [PATCH 08/20] removed unnecessary -fsycl-device-code-split=per_kernel. Signed-off-by: JackAKirk --- SYCL/Basic/half_builtins.cpp | 2 +- SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp | 2 +- SYCL/DeviceLib/math_test_marray_vec_fp16.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/SYCL/Basic/half_builtins.cpp b/SYCL/Basic/half_builtins.cpp index e2e06f8e40..6b015121d3 100644 --- a/SYCL/Basic/half_builtins.cpp +++ b/SYCL/Basic/half_builtins.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp index a590b900d8..6a27bff4ad 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp b/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp index 71740ad335..da9df54d80 100644 --- a/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp +++ b/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out From 3d039698c9ea2ed9417408984674f21a779deb7c Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 30 Jun 2022 10:27:32 +0100 Subject: [PATCH 09/20] write -> read (superficial change in context of test). Signed-off-by: JackAKirk --- .../half_precision_math_test_marray_vec.cpp | 6 +++--- SYCL/DeviceLib/math_test_marray_vec_common.hpp | 12 ++++++------ SYCL/DeviceLib/native_math_test_marray_vec.cpp | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp index b2f3d152fe..28936bc7d2 100644 --- a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp @@ -39,7 +39,7 @@ template bool checkEqual(marray A, size_t B) { deviceQueue.submit([&](handler &cgh) { \ accessor res_access( \ buffer1, cgh); \ - accessor input_access( \ + accessor input_access( \ buffer2, cgh); \ cgh.single_task>([=]() { \ res_access[0] = sycl::half_precision::NAME(input_access[0]); \ @@ -75,9 +75,9 @@ HALF_PRECISION_OPERATOR(recip) deviceQueue.submit([&](handler &cgh) { \ accessor res_access( \ buffer1, cgh); \ - accessor input1_access( \ + accessor input1_access( \ buffer2, cgh); \ - accessor input2_access( \ + accessor input2_access( \ buffer3, cgh); \ cgh.single_task>([=]() { \ res_access[0] = \ diff --git a/SYCL/DeviceLib/math_test_marray_vec_common.hpp b/SYCL/DeviceLib/math_test_marray_vec_common.hpp index cc4b440448..1c10543f68 100644 --- a/SYCL/DeviceLib/math_test_marray_vec_common.hpp +++ b/SYCL/DeviceLib/math_test_marray_vec_common.hpp @@ -32,7 +32,7 @@ template bool checkEqual(marray A, size_t B) { deviceQueue.submit([&](handler &cgh) { \ accessor res_access( \ buffer1, cgh); \ - accessor input_access( \ + accessor input_access( \ buffer2, cgh); \ cgh.single_task>( \ [=]() { res_access[0] = NAME(input_access[0]); }); \ @@ -93,9 +93,9 @@ OPERATOR(trunc) deviceQueue.submit([&](handler &cgh) { \ accessor res_access( \ buffer1, cgh); \ - accessor input1_access( \ + accessor input1_access( \ buffer2, cgh); \ - accessor input2_access( \ + accessor input2_access( \ buffer3, cgh); \ cgh.single_task>([=]() { \ res_access[0] = NAME(input1_access[0], input2_access[0]); \ @@ -134,11 +134,11 @@ OPERATOR_2(remainder) deviceQueue.submit([&](handler &cgh) { \ accessor res_access( \ buffer1, cgh); \ - accessor input1_access( \ + accessor input1_access( \ buffer2, cgh); \ - accessor input2_access( \ + accessor input2_access( \ buffer3, cgh); \ - accessor input3_access( \ + accessor input3_access( \ buffer4, cgh); \ cgh.single_task>([=]() { \ res_access[0] = \ diff --git a/SYCL/DeviceLib/native_math_test_marray_vec.cpp b/SYCL/DeviceLib/native_math_test_marray_vec.cpp index f138ced366..22f9515eca 100644 --- a/SYCL/DeviceLib/native_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/native_math_test_marray_vec.cpp @@ -39,7 +39,7 @@ template bool checkEqual(marray A, size_t B) { deviceQueue.submit([&](handler &cgh) { \ accessor res_access( \ buffer1, cgh); \ - accessor input_access( \ + accessor input_access( \ buffer2, cgh); \ cgh.single_task>( \ [=]() { res_access[0] = sycl::native::NAME(input_access[0]); }); \ @@ -74,9 +74,9 @@ NATIVE_OPERATOR(recip) deviceQueue.submit([&](handler &cgh) { \ accessor res_access( \ buffer1, cgh); \ - accessor input1_access( \ + accessor input1_access( \ buffer2, cgh); \ - accessor input2_access( \ + accessor input2_access( \ buffer3, cgh); \ cgh.single_task>([=]() { \ res_access[0] = \ From 538c64b75ac30cf2f902011619098f60fbcb8e46 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 30 Jun 2022 10:30:02 +0100 Subject: [PATCH 10/20] Removed float3 powr tests. Signed-off-by: JackAKirk --- SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp | 2 -- SYCL/DeviceLib/native_math_test_marray_vec.cpp | 2 -- 2 files changed, 4 deletions(-) diff --git a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp index 28936bc7d2..ad72883320 100644 --- a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp @@ -109,8 +109,6 @@ template void half_precision_math_tests_3(queue &deviceQueue) { T{0.25, 0.25, 0.25}, 2); half_precision_math_test_recip(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 4); - half_precision_math_test_2_powr(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, - T{2, 2, 2}, 4); half_precision_math_test_2_divide(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, T{2, 2, 2}, 2); } diff --git a/SYCL/DeviceLib/native_math_test_marray_vec.cpp b/SYCL/DeviceLib/native_math_test_marray_vec.cpp index 22f9515eca..8943c99c1d 100644 --- a/SYCL/DeviceLib/native_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/native_math_test_marray_vec.cpp @@ -104,8 +104,6 @@ template void native_tests_3(queue &deviceQueue) { native_math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); native_math_test_rsqrt(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 2); native_math_test_recip(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 4); - native_math_test_2_powr(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, T{2, 2, 2}, - 4); native_math_test_2_divide(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, T{2, 2, 2}, 2); } From e77ba6971d6247e7338a53cd2cc13bb1d1fb204d Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 1 Jul 2022 18:24:20 +0100 Subject: [PATCH 11/20] Removed failing test coverage for existing float3 functions. Signed-off-by: JackAKirk --- SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp | 1 - SYCL/DeviceLib/native_math_test_marray_vec.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp index ad72883320..6ec4ad5b1b 100644 --- a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp @@ -99,7 +99,6 @@ template void half_precision_math_tests_3(queue &deviceQueue) { half_precision_math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); half_precision_math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); half_precision_math_test_exp2(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 4); - half_precision_math_test_exp10(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 100); half_precision_math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); half_precision_math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); half_precision_math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100}, diff --git a/SYCL/DeviceLib/native_math_test_marray_vec.cpp b/SYCL/DeviceLib/native_math_test_marray_vec.cpp index 8943c99c1d..984126f909 100644 --- a/SYCL/DeviceLib/native_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/native_math_test_marray_vec.cpp @@ -97,7 +97,6 @@ template void native_tests_3(queue &deviceQueue) { native_math_test_tan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); native_math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); native_math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); - native_math_test_exp2(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 4); native_math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); native_math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); native_math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100}, 2); From 4f2dfe53ca1e965399e744e8c2df2dfd30b75912 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 6 Sep 2022 10:54:19 +0100 Subject: [PATCH 12/20] Remove broken half/native cases. Signed-off-by: JackAKirk --- SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp | 1 - SYCL/DeviceLib/native_math_test_marray_vec.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp index 6ec4ad5b1b..65358b8a8e 100644 --- a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp @@ -54,7 +54,6 @@ HALF_PRECISION_OPERATOR(tan) HALF_PRECISION_OPERATOR(cos) HALF_PRECISION_OPERATOR(exp) HALF_PRECISION_OPERATOR(exp2) -HALF_PRECISION_OPERATOR(exp10) HALF_PRECISION_OPERATOR(log) HALF_PRECISION_OPERATOR(log2) HALF_PRECISION_OPERATOR(log10) diff --git a/SYCL/DeviceLib/native_math_test_marray_vec.cpp b/SYCL/DeviceLib/native_math_test_marray_vec.cpp index 984126f909..72a6cf4fff 100644 --- a/SYCL/DeviceLib/native_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/native_math_test_marray_vec.cpp @@ -56,7 +56,6 @@ NATIVE_OPERATOR(exp2) NATIVE_OPERATOR(exp10) NATIVE_OPERATOR(log) NATIVE_OPERATOR(log2) -NATIVE_OPERATOR(log10) NATIVE_OPERATOR(sqrt) NATIVE_OPERATOR(rsqrt) NATIVE_OPERATOR(recip) From c1972f09ab7e80fce4f19cb5c31231543393d428 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 7 Sep 2022 09:07:08 +0100 Subject: [PATCH 13/20] Removed unused cases. Signed-off-by: JackAKirk --- SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp | 2 -- SYCL/DeviceLib/native_math_test_marray_vec.cpp | 3 --- 2 files changed, 5 deletions(-) diff --git a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp index 65358b8a8e..c53b70e95f 100644 --- a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp @@ -122,8 +122,6 @@ template void half_precision_math_tests_4(queue &deviceQueue) { 1); half_precision_math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 4); - half_precision_math_test_exp10(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, - 100); half_precision_math_test_log(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); half_precision_math_test_log2(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, diff --git a/SYCL/DeviceLib/native_math_test_marray_vec.cpp b/SYCL/DeviceLib/native_math_test_marray_vec.cpp index 72a6cf4fff..75b24197f0 100644 --- a/SYCL/DeviceLib/native_math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/native_math_test_marray_vec.cpp @@ -98,7 +98,6 @@ template void native_tests_3(queue &deviceQueue) { native_math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); native_math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); native_math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); - native_math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100}, 2); native_math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); native_math_test_rsqrt(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 2); native_math_test_recip(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 4); @@ -114,8 +113,6 @@ template void native_tests_4(queue &deviceQueue) { native_math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 4); native_math_test_log(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); native_math_test_log2(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, 2); - native_math_test_log10(deviceQueue, T{-1, -1, -1, -1}, T{100, 100, 100, 100}, - 2); native_math_test_sqrt(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, 2); native_math_test_rsqrt(deviceQueue, T{-1, -1, -1, -1}, T{0.25, 0.25, 0.25, 0.25}, 2); From 57f33290af9c35f81f3e029400be71b92b26e3ec Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 12 Sep 2022 15:28:04 +0100 Subject: [PATCH 14/20] Add marray -fast-math test cases. Signed-off-by: JackAKirk --- SYCL/DeviceLib/built-ins/fast-math-flag.cpp | 118 +++++++++++++------- 1 file changed, 76 insertions(+), 42 deletions(-) diff --git a/SYCL/DeviceLib/built-ins/fast-math-flag.cpp b/SYCL/DeviceLib/built-ins/fast-math-flag.cpp index e6b7d08238..d90845e58d 100644 --- a/SYCL/DeviceLib/built-ins/fast-math-flag.cpp +++ b/SYCL/DeviceLib/built-ins/fast-math-flag.cpp @@ -6,47 +6,64 @@ #include #include -#define __TEST_FFMATH_BINARY(func) \ - int test_ffmath_##func() { \ - sycl::float4 r[2]; \ - sycl::float4 val[2] = {{1.0004f, 1e-4f, 1.4f, 14.0f}, \ - {1.0004f, 1e-4f, 1.4f, 14.0f}}; \ +using namespace sycl; + +template bool checkEqual(vec A, vec B) { + + return sycl::all(A == B); +} + +template +bool checkEqual(marray A, marray B) { + for (int i = 0; i < N; i++) { + if (A[i] != B[i]) { + return false; + } + } + return true; +} + +#define __TEST_FFMATH_UNARY(func) \ + template void test_ffmath_##func(queue &deviceQueue) { \ + T input{1.0004f, 1e-4f, 1.4f, 14.0f}; \ + T res[2] = {{-1, -1, -1, -1}, {-2, -2, -2, -2}}; \ { \ - sycl::buffer output(&r[0], sycl::range<1>(2)); \ - sycl::buffer input(&val[0], sycl::range<1>(2)); \ - sycl::queue q; \ - q.submit([&](sycl::handler &cgh) { \ - auto AccO = \ - output.template get_access(cgh); \ - auto AccI = input.template get_access(cgh); \ + buffer input_buff(&input, 1); \ + buffer res_buff(&res[0], sycl::range<1>(2)); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_acc(res_buff, \ + cgh); \ + accessor input_acc( \ + input_buff, cgh); \ cgh.single_task([=]() { \ - AccO[0] = sycl::func(AccI[0], AccI[1]); \ - AccO[1] = sycl::native::func(AccI[0], AccI[1]); \ + res_acc[0] = sycl::native::func(input_acc[0]); \ + res_acc[1] = sycl::func(input_acc[0]); \ }); \ }); \ } \ - return sycl::all(r[0] == r[1]); \ + assert(checkEqual(res[0], res[1])); \ } -#define __TEST_FFMATH_UNARY(func) \ - int test_ffmath_##func() { \ - sycl::float4 val = {1.0004f, 1e-4f, 1.4f, 14.0f}; \ - sycl::float4 r[2]; \ +#define __TEST_FFMATH_BINARY(func) \ + template void test_ffmath_##func(queue &deviceQueue) { \ + T input[2] = {{1.0004f, 1e-4f, 1.4f, 14.0f}, \ + {1.0004f, 1e-4f, 1.4f, 14.0f}}; \ + T res[2] = {{-1, -1, -1, -1}, {-2, -2, -2, -2}}; \ { \ - sycl::buffer output(&r[0], sycl::range<1>(2)); \ - sycl::buffer input(&val, sycl::range<1>(1)); \ - sycl::queue q; \ - q.submit([&](sycl::handler &cgh) { \ - auto AccO = \ - output.template get_access(cgh); \ - auto AccI = input.template get_access(cgh); \ + buffer input_buff(&input[0], range<1>(2)); \ + buffer res_buff(&res[0], range<1>(2)); \ + deviceQueue.submit([&](handler &cgh) { \ + accessor res_acc(res_buff, \ + cgh); \ + accessor input_acc( \ + input_buff, cgh); \ cgh.single_task([=]() { \ - AccO[0] = sycl::func(AccI[0]); \ - AccO[1] = sycl::native::func(AccI[0]); \ + res_acc[0] = sycl::native::func(input_acc[0], input_acc[1]); \ + res_acc[1] = sycl::func(input_acc[0], input_acc[1]); \ }); \ }); \ } \ - return sycl::all(r[0] == r[1]); \ + assert(checkEqual(res[0], res[1])); \ } __TEST_FFMATH_UNARY(cos) @@ -56,26 +73,43 @@ __TEST_FFMATH_UNARY(exp10) __TEST_FFMATH_UNARY(log) __TEST_FFMATH_UNARY(log2) __TEST_FFMATH_UNARY(log10) -__TEST_FFMATH_BINARY(powr) __TEST_FFMATH_UNARY(rsqrt) __TEST_FFMATH_UNARY(sin) __TEST_FFMATH_UNARY(sqrt) __TEST_FFMATH_UNARY(tan) +__TEST_FFMATH_BINARY(powr) + int main() { - assert(test_ffmath_cos()); - assert(test_ffmath_exp()); - assert(test_ffmath_exp2()); - assert(test_ffmath_exp10()); - assert(test_ffmath_log()); - assert(test_ffmath_log2()); - assert(test_ffmath_log10()); - assert(test_ffmath_powr()); - assert(test_ffmath_rsqrt()); - assert(test_ffmath_sin()); - assert(test_ffmath_sqrt()); - assert(test_ffmath_tan()); + queue q; + test_ffmath_cos>(q); + test_ffmath_exp>(q); + test_ffmath_exp2>(q); + test_ffmath_exp10>(q); + test_ffmath_log>(q); + test_ffmath_log2>(q); + test_ffmath_log10>(q); + test_ffmath_powr>(q); + test_ffmath_rsqrt>(q); + test_ffmath_sin>(q); + test_ffmath_sqrt>(q); + test_ffmath_tan>(q); + test_ffmath_powr>(q); + + test_ffmath_cos(q); + test_ffmath_exp(q); + test_ffmath_exp2(q); + test_ffmath_exp10(q); + test_ffmath_log(q); + test_ffmath_log2(q); + test_ffmath_log10(q); + test_ffmath_powr(q); + test_ffmath_rsqrt(q); + test_ffmath_sin(q); + test_ffmath_sqrt(q); + test_ffmath_tan(q); + test_ffmath_powr(q); return 0; } From a9efc862b6cdac11a52f09f1835fd01a8938832f Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 13 Sep 2022 14:45:53 +0100 Subject: [PATCH 15/20] Removed initially proposed native and half_prec tests. Signed-off-by: JackAKirk --- .../half_precision_math_test_marray_vec.cpp | 153 ------------------ .../DeviceLib/native_math_test_marray_vec.cpp | 138 ---------------- 2 files changed, 291 deletions(-) delete mode 100644 SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp delete mode 100644 SYCL/DeviceLib/native_math_test_marray_vec.cpp diff --git a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp b/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp deleted file mode 100644 index c53b70e95f..0000000000 --- a/SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %ACC_RUN_PLACEHOLDER %t.out - -#include - -using namespace sycl; - -template class TypeHelper; - -template bool checkEqual(vec A, size_t B) { - T TB = B; - return A.x() == TB && A.y() == TB && A.z() == TB; -} - -template bool checkEqual(vec A, size_t B) { - T TB = B; - return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB; -} - -template bool checkEqual(marray A, size_t B) { - for (int i = 0; i < N; i++) { - if (A[i] != B) { - return false; - } - } - return true; -} - -#define HALF_PRECISION_OPERATOR(NAME) \ - template \ - void half_precision_math_test_##NAME(queue &deviceQueue, T result, T input, \ - size_t ref) { \ - { \ - buffer buffer1(&result, 1); \ - buffer buffer2(&input, 1); \ - deviceQueue.submit([&](handler &cgh) { \ - accessor res_access( \ - buffer1, cgh); \ - accessor input_access( \ - buffer2, cgh); \ - cgh.single_task>([=]() { \ - res_access[0] = sycl::half_precision::NAME(input_access[0]); \ - }); \ - }); \ - } \ - assert(checkEqual(result, ref)); \ - } - -HALF_PRECISION_OPERATOR(sin) -HALF_PRECISION_OPERATOR(tan) -HALF_PRECISION_OPERATOR(cos) -HALF_PRECISION_OPERATOR(exp) -HALF_PRECISION_OPERATOR(exp2) -HALF_PRECISION_OPERATOR(log) -HALF_PRECISION_OPERATOR(log2) -HALF_PRECISION_OPERATOR(log10) -HALF_PRECISION_OPERATOR(sqrt) -HALF_PRECISION_OPERATOR(rsqrt) -HALF_PRECISION_OPERATOR(recip) - -#undef HALF_PRECISION_OPERATOR - -#define HALF_PRECISION_OPERATOR_2(NAME) \ - template \ - void half_precision_math_test_2_##NAME(queue &deviceQueue, T result, \ - T input1, T input2, size_t ref) { \ - { \ - buffer buffer1(&result, 1); \ - buffer buffer2(&input1, 1); \ - buffer buffer3(&input2, 1); \ - deviceQueue.submit([&](handler &cgh) { \ - accessor res_access( \ - buffer1, cgh); \ - accessor input1_access( \ - buffer2, cgh); \ - accessor input2_access( \ - buffer3, cgh); \ - cgh.single_task>([=]() { \ - res_access[0] = \ - sycl::half_precision::NAME(input1_access[0], input2_access[0]); \ - }); \ - }); \ - } \ - assert(checkEqual(result, ref)); \ - } - -HALF_PRECISION_OPERATOR_2(divide) -HALF_PRECISION_OPERATOR_2(powr) - -#undef HALF_PRECISION_OPERATOR_2 - -template void half_precision_math_tests_3(queue &deviceQueue) { - half_precision_math_test_sin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - half_precision_math_test_tan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - half_precision_math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); - half_precision_math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); - half_precision_math_test_exp2(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 4); - half_precision_math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); - half_precision_math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); - half_precision_math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100}, - 2); - half_precision_math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); - half_precision_math_test_rsqrt(deviceQueue, T{-1, -1, -1}, - T{0.25, 0.25, 0.25}, 2); - half_precision_math_test_recip(deviceQueue, T{-1, -1, -1}, - T{0.25, 0.25, 0.25}, 4); - half_precision_math_test_2_divide(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, - T{2, 2, 2}, 2); -} - -template void half_precision_math_tests_4(queue &deviceQueue) { - half_precision_math_test_sin(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, - 0); - half_precision_math_test_tan(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, - 0); - half_precision_math_test_cos(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, - 1); - half_precision_math_test_exp(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, - 1); - half_precision_math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, - 4); - half_precision_math_test_log(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, - 0); - half_precision_math_test_log2(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, - 2); - half_precision_math_test_log10(deviceQueue, T{-1, -1, -1, -1}, - T{100, 100, 100, 100}, 2); - half_precision_math_test_sqrt(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, - 2); - half_precision_math_test_rsqrt(deviceQueue, T{-1, -1, -1, -1}, - T{0.25, 0.25, 0.25, 0.25}, 2); - half_precision_math_test_recip(deviceQueue, T{-1, -1, -1, -1}, - T{0.25, 0.25, 0.25, 0.25}, 4); - half_precision_math_test_2_powr(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, - T{2, 2, 2, 2}, 4); - half_precision_math_test_2_divide(deviceQueue, T{-1, -1, -1, -1}, - T{4, 4, 4, 4}, T{2, 2, 2, 2}, 2); -} - -int main() { - queue deviceQueue; - - half_precision_math_tests_3(deviceQueue); - half_precision_math_tests_3>(deviceQueue); - - half_precision_math_tests_4(deviceQueue); - half_precision_math_tests_4>(deviceQueue); - std::cout << "Pass" << std::endl; - return 0; -} diff --git a/SYCL/DeviceLib/native_math_test_marray_vec.cpp b/SYCL/DeviceLib/native_math_test_marray_vec.cpp deleted file mode 100644 index 75b24197f0..0000000000 --- a/SYCL/DeviceLib/native_math_test_marray_vec.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %ACC_RUN_PLACEHOLDER %t.out - -#include - -using namespace sycl; - -template class TypeHelper; - -template bool checkEqual(vec A, size_t B) { - T TB = B; - return A.x() == TB && A.y() == TB && A.z() == TB; -} - -template bool checkEqual(vec A, size_t B) { - T TB = B; - return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB; -} - -template bool checkEqual(marray A, size_t B) { - for (int i = 0; i < N; i++) { - if (A[i] != B) { - return false; - } - } - return true; -} - -#define NATIVE_OPERATOR(NAME) \ - template \ - void native_math_test_##NAME(queue &deviceQueue, T result, T input, \ - size_t ref) { \ - { \ - buffer buffer1(&result, 1); \ - buffer buffer2(&input, 1); \ - deviceQueue.submit([&](handler &cgh) { \ - accessor res_access( \ - buffer1, cgh); \ - accessor input_access( \ - buffer2, cgh); \ - cgh.single_task>( \ - [=]() { res_access[0] = sycl::native::NAME(input_access[0]); }); \ - }); \ - } \ - assert(checkEqual(result, ref)); \ - } - -NATIVE_OPERATOR(sin) -NATIVE_OPERATOR(tan) -NATIVE_OPERATOR(cos) -NATIVE_OPERATOR(exp) -NATIVE_OPERATOR(exp2) -NATIVE_OPERATOR(exp10) -NATIVE_OPERATOR(log) -NATIVE_OPERATOR(log2) -NATIVE_OPERATOR(sqrt) -NATIVE_OPERATOR(rsqrt) -NATIVE_OPERATOR(recip) - -#undef NATIVE_OPERATOR - -#define NATIVE_OPERATOR_2(NAME) \ - template \ - void native_math_test_2_##NAME(queue &deviceQueue, T result, T input1, \ - T input2, size_t ref) { \ - { \ - buffer buffer1(&result, 1); \ - buffer buffer2(&input1, 1); \ - buffer buffer3(&input2, 1); \ - deviceQueue.submit([&](handler &cgh) { \ - accessor res_access( \ - buffer1, cgh); \ - accessor input1_access( \ - buffer2, cgh); \ - accessor input2_access( \ - buffer3, cgh); \ - cgh.single_task>([=]() { \ - res_access[0] = \ - sycl::native::NAME(input1_access[0], input2_access[0]); \ - }); \ - }); \ - } \ - assert(checkEqual(result, ref)); \ - } - -NATIVE_OPERATOR_2(divide) -NATIVE_OPERATOR_2(powr) - -#undef NATIVE_OPERATOR_2 - -template void native_tests_3(queue &deviceQueue) { - native_math_test_sin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - native_math_test_tan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0); - native_math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); - native_math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1); - native_math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0); - native_math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); - native_math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2); - native_math_test_rsqrt(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 2); - native_math_test_recip(deviceQueue, T{-1, -1, -1}, T{0.25, 0.25, 0.25}, 4); - native_math_test_2_divide(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, T{2, 2, 2}, - 2); -} - -template void native_tests_4(queue &deviceQueue) { - native_math_test_sin(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - native_math_test_tan(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 0); - native_math_test_cos(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); - native_math_test_exp(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0}, 1); - native_math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, 4); - native_math_test_log(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1}, 0); - native_math_test_log2(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, 2); - native_math_test_sqrt(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, 2); - native_math_test_rsqrt(deviceQueue, T{-1, -1, -1, -1}, - T{0.25, 0.25, 0.25, 0.25}, 2); - native_math_test_recip(deviceQueue, T{-1, -1, -1, -1}, - T{0.25, 0.25, 0.25, 0.25}, 4); - native_math_test_2_powr(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2}, - T{2, 2, 2, 2}, 4); - native_math_test_2_divide(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4}, - T{2, 2, 2, 2}, 2); -} - -int main() { - queue deviceQueue; - - native_tests_3(deviceQueue); - native_tests_3>(deviceQueue); - - native_tests_4(deviceQueue); - native_tests_4>(deviceQueue); - - std::cout << "Pass" << std::endl; - return 0; -} From f731c935345790082917b5bf76c913d2970cf559 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 13 Sep 2022 15:00:55 +0100 Subject: [PATCH 16/20] Added back device-code-split. Signed-off-by: JackAKirk --- SYCL/DeviceLib/built-ins/ext_native_math.cpp | 4 ++-- SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/SYCL/DeviceLib/built-ins/ext_native_math.cpp b/SYCL/DeviceLib/built-ins/ext_native_math.cpp index 889c1baed8..4ab4dcf4d1 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math.cpp @@ -3,7 +3,7 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// tests oneapi extension native tanh math function for sycl::vec and +// Tests oneapi extension native tanh math function for sycl::vec and // sycl::marray float cases. #include "ext_native_math_common.hpp" @@ -47,4 +47,4 @@ int main() { tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); return 0; -} \ No newline at end of file +} diff --git a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp index 6a27bff4ad..9b89fb347b 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out // RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out @@ -7,7 +7,7 @@ // OpenCL CPU driver does not support cl_khr_fp16 extension for this reason this // test is compiled with the -fsycl-device-code-split flag -// tests oneapi extension native math functions for sycl::vec and sycl::marray +// Tests oneapi extension native math functions for sycl::vec and sycl::marray // fp16 cases. #include "ext_native_math_common.hpp" From 505064cd1aecd0771fcc58daffedb961255a7997 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 14 Sep 2022 09:19:55 +0100 Subject: [PATCH 17/20] Remove host_runs. Signed-off-by: JackAKirk --- SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp | 1 - SYCL/DeviceLib/math_test_marray_vec.cpp | 1 - SYCL/DeviceLib/math_test_marray_vec_fp16.cpp | 1 - 3 files changed, 3 deletions(-) diff --git a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp index 9b89fb347b..74bd0f8213 100644 --- a/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp +++ b/SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp @@ -1,5 +1,4 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out -// RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out diff --git a/SYCL/DeviceLib/math_test_marray_vec.cpp b/SYCL/DeviceLib/math_test_marray_vec.cpp index de054a2c1f..721db1a559 100644 --- a/SYCL/DeviceLib/math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/math_test_marray_vec.cpp @@ -1,6 +1,5 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out diff --git a/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp b/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp index da9df54d80..7db7350ece 100644 --- a/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp +++ b/SYCL/DeviceLib/math_test_marray_vec_fp16.cpp @@ -1,6 +1,5 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out From cfc1e91c3c93ed3330fa1543de8ab2cac8eb55de Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 22 Sep 2022 11:44:47 +0100 Subject: [PATCH 18/20] windows && level_zero marked unsupported. Signed-off-by: JackAKirk --- SYCL/DeviceLib/math_test_marray_vec.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SYCL/DeviceLib/math_test_marray_vec.cpp b/SYCL/DeviceLib/math_test_marray_vec.cpp index 721db1a559..83c69cd947 100644 --- a/SYCL/DeviceLib/math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/math_test_marray_vec.cpp @@ -1,3 +1,5 @@ +// TODO fix level_zero on windows failure +// UNSUPPORTED: (windows && level_zero) // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out From ea6693d2bf74e2aa450603ffe4df89c6d9541b84 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 22 Sep 2022 15:00:54 +0100 Subject: [PATCH 19/20] Mark opencl && windows unsupported. Signed-off-by: JackAKirk --- SYCL/DeviceLib/math_test_marray_vec.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SYCL/DeviceLib/math_test_marray_vec.cpp b/SYCL/DeviceLib/math_test_marray_vec.cpp index 83c69cd947..cae8852791 100644 --- a/SYCL/DeviceLib/math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/math_test_marray_vec.cpp @@ -1,5 +1,5 @@ -// TODO fix level_zero on windows failure -// UNSUPPORTED: (windows && level_zero) +// TODO fix windows failures +// UNSUPPORTED: windows && (level_zero || opencl) // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out From 512a37ab748843fb0673133ee150f08e3c6fb4c0 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 2 Dec 2022 14:43:07 +0000 Subject: [PATCH 20/20] Added fp64 aspect check. Signed-off-by: JackAKirk --- SYCL/DeviceLib/math_test_marray_vec.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/SYCL/DeviceLib/math_test_marray_vec.cpp b/SYCL/DeviceLib/math_test_marray_vec.cpp index cae8852791..18dfc1ae00 100644 --- a/SYCL/DeviceLib/math_test_marray_vec.cpp +++ b/SYCL/DeviceLib/math_test_marray_vec.cpp @@ -13,14 +13,18 @@ int main() { queue deviceQueue; math_tests_4(deviceQueue); - math_tests_4(deviceQueue); math_tests_4>(deviceQueue); - math_tests_4>(deviceQueue); math_tests_3(deviceQueue); - math_tests_3(deviceQueue); math_tests_3>(deviceQueue); - math_tests_3>(deviceQueue); + + if (deviceQueue.get_device().has(sycl::aspect::fp64)) { + math_tests_4(deviceQueue); + math_tests_4>(deviceQueue); + + math_tests_3(deviceQueue); + math_tests_3>(deviceQueue); + } std::cout << "Pass" << std::endl; return 0;