From 1c064d1a72914ad714a20bc4b0b0f8ca27acdac5 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Thu, 22 Jan 2026 22:46:13 -0800 Subject: [PATCH 1/6] Set DXIL version to 1.9 for SM6.9 release (#8063) Set DXIL version to 1.9 for SM6.9. Was previously 1.10. #8091 --- include/dxc/DXIL/DxilConstants.h | 2 +- include/dxc/DXIL/DxilShaderModel.h | 5 +- include/dxc/Support/HLSLOptions.td | 2 +- lib/DXIL/DxilShaderModel.cpp | 237 ++++++++---------- tools/clang/unittests/HLSL/ValidationTest.cpp | 65 ++--- utils/hct/hctdb_instrhelp.py | 2 +- 6 files changed, 137 insertions(+), 176 deletions(-) diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 2c00080ada..949795c110 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -29,7 +29,7 @@ namespace DXIL { const unsigned kDxilMajor = 1; /* hctdb_instrhelp.get_dxil_version_minor()*/ // VALRULE-TEXT:BEGIN -const unsigned kDxilMinor = 10; +const unsigned kDxilMinor = 9; // VALRULE-TEXT:END inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) { diff --git a/include/dxc/DXIL/DxilShaderModel.h b/include/dxc/DXIL/DxilShaderModel.h index ff9ef39b99..380fb1aa32 100644 --- a/include/dxc/DXIL/DxilShaderModel.h +++ b/include/dxc/DXIL/DxilShaderModel.h @@ -33,7 +33,7 @@ class ShaderModel { // clang-format on // VALRULE-TEXT:BEGIN static const unsigned kHighestMajor = 6; - static const unsigned kHighestMinor = 10; + static const unsigned kHighestMinor = 9; // VALRULE-TEXT:END // Major/Minor version of highest released shader model @@ -87,7 +87,6 @@ class ShaderModel { bool IsSM67Plus() const { return IsSMAtLeast(6, 7); } bool IsSM68Plus() const { return IsSMAtLeast(6, 8); } bool IsSM69Plus() const { return IsSMAtLeast(6, 9); } - bool IsSM610Plus() const { return IsSMAtLeast(6, 10); } // VALRULE-TEXT:END const char *GetName() const { return m_pszName; } const char *GetKindName() const; @@ -139,7 +138,7 @@ class ShaderModel { bool m_bTypedUavs, unsigned m_UAVRegsLim); /* hctdb_instrhelp.get_num_shader_models()*/ // VALRULE-TEXT:BEGIN - static const unsigned kNumShaderModels = 116; + static const unsigned kNumShaderModels = 107; // VALRULE-TEXT:END static const ShaderModel ms_ShaderModels[kNumShaderModels]; diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index cd7dfb2f0c..40182f85b9 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -451,7 +451,7 @@ def fvk_bind_counter_heap : MultiArg<["-"], "fvk-bind-counter-heap", 2>, MetaVar def target_profile : JoinedOrSeparate<["-", "/"], "T">, Flags<[CoreOption]>, Group, MetaVarName<"">, /* hctdb_instrhelp.get_target_profiles()*/ // VALRULE-TEXT:BEGIN - HelpText<"Set target profile. \n\t: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, ps_6_5, ps_6_6, ps_6_7, ps_6_8, ps_6_9, ps_6_10, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, vs_6_5, vs_6_6, vs_6_7, vs_6_8, vs_6_9, vs_6_10, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, gs_6_5, gs_6_6, gs_6_7, gs_6_8, gs_6_9, gs_6_10, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, hs_6_5, hs_6_6, hs_6_7, hs_6_8, hs_6_9, hs_6_10, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, ds_6_5, ds_6_6, ds_6_7, ds_6_8, ds_6_9, ds_6_10, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, cs_6_7, cs_6_8, cs_6_9, cs_6_10, \n\t\t lib_6_1, lib_6_2, lib_6_3, lib_6_4, lib_6_5, lib_6_6, lib_6_7, lib_6_8, lib_6_9, lib_6_10, \n\t\t ms_6_5, ms_6_6, ms_6_7, ms_6_8, ms_6_9, ms_6_10, \n\t\t as_6_5, as_6_6, as_6_7, as_6_8, as_6_9, as_6_10, \n\t\t ">; + HelpText<"Set target profile. \n\t: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, ps_6_5, ps_6_6, ps_6_7, ps_6_8, ps_6_9, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, vs_6_5, vs_6_6, vs_6_7, vs_6_8, vs_6_9, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, gs_6_5, gs_6_6, gs_6_7, gs_6_8, gs_6_9, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, hs_6_5, hs_6_6, hs_6_7, hs_6_8, hs_6_9, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, ds_6_5, ds_6_6, ds_6_7, ds_6_8, ds_6_9, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, cs_6_7, cs_6_8, cs_6_9, \n\t\t lib_6_1, lib_6_2, lib_6_3, lib_6_4, lib_6_5, lib_6_6, lib_6_7, lib_6_8, lib_6_9, \n\t\t ms_6_5, ms_6_6, ms_6_7, ms_6_8, ms_6_9, \n\t\t as_6_5, as_6_6, as_6_7, as_6_8, as_6_9, \n\t\t ">; // VALRULE-TEXT:END def entrypoint : JoinedOrSeparate<["-", "/"], "E">, Flags<[CoreOption, RewriteOption]>, Group, HelpText<"Entry point name">; diff --git a/lib/DXIL/DxilShaderModel.cpp b/lib/DXIL/DxilShaderModel.cpp index e70a3b44c9..06cc6e633c 100644 --- a/lib/DXIL/DxilShaderModel.cpp +++ b/lib/DXIL/DxilShaderModel.cpp @@ -65,7 +65,6 @@ bool ShaderModel::IsValidForDxil() const { case 7: case 8: case 9: - case 10: // VALRULE-TEXT:END return true; case kOfflineMinor: @@ -80,122 +79,113 @@ const ShaderModel *ShaderModel::Get(Kind Kind, unsigned Major, unsigned Minor) { /* hctdb_instrhelp.get_shader_model_get()*/ // VALRULE-TEXT:BEGIN const static std::pair hashToIdxMap[] = { - {1024, 0}, // ps_4_0 - {1025, 1}, // ps_4_1 - {1280, 2}, // ps_5_0 - {1281, 3}, // ps_5_1 - {1536, 4}, // ps_6_0 - {1537, 5}, // ps_6_1 - {1538, 6}, // ps_6_2 - {1539, 7}, // ps_6_3 - {1540, 8}, // ps_6_4 - {1541, 9}, // ps_6_5 - {1542, 10}, // ps_6_6 - {1543, 11}, // ps_6_7 - {1544, 12}, // ps_6_8 - {1545, 13}, // ps_6_9 - {1546, 14}, // ps_6_10 - {66560, 15}, // vs_4_0 - {66561, 16}, // vs_4_1 - {66816, 17}, // vs_5_0 - {66817, 18}, // vs_5_1 - {67072, 19}, // vs_6_0 - {67073, 20}, // vs_6_1 - {67074, 21}, // vs_6_2 - {67075, 22}, // vs_6_3 - {67076, 23}, // vs_6_4 - {67077, 24}, // vs_6_5 - {67078, 25}, // vs_6_6 - {67079, 26}, // vs_6_7 - {67080, 27}, // vs_6_8 - {67081, 28}, // vs_6_9 - {67082, 29}, // vs_6_10 - {132096, 30}, // gs_4_0 - {132097, 31}, // gs_4_1 - {132352, 32}, // gs_5_0 - {132353, 33}, // gs_5_1 - {132608, 34}, // gs_6_0 - {132609, 35}, // gs_6_1 - {132610, 36}, // gs_6_2 - {132611, 37}, // gs_6_3 - {132612, 38}, // gs_6_4 - {132613, 39}, // gs_6_5 - {132614, 40}, // gs_6_6 - {132615, 41}, // gs_6_7 - {132616, 42}, // gs_6_8 - {132617, 43}, // gs_6_9 - {132618, 44}, // gs_6_10 - {197632, 45}, // hs_4_0 - {197633, 46}, // hs_4_1 - {197888, 47}, // hs_5_0 - {197889, 48}, // hs_5_1 - {198144, 49}, // hs_6_0 - {198145, 50}, // hs_6_1 - {198146, 51}, // hs_6_2 - {198147, 52}, // hs_6_3 - {198148, 53}, // hs_6_4 - {198149, 54}, // hs_6_5 - {198150, 55}, // hs_6_6 - {198151, 56}, // hs_6_7 - {198152, 57}, // hs_6_8 - {198153, 58}, // hs_6_9 - {198154, 59}, // hs_6_10 - {263168, 60}, // ds_4_0 - {263169, 61}, // ds_4_1 - {263424, 62}, // ds_5_0 - {263425, 63}, // ds_5_1 - {263680, 64}, // ds_6_0 - {263681, 65}, // ds_6_1 - {263682, 66}, // ds_6_2 - {263683, 67}, // ds_6_3 - {263684, 68}, // ds_6_4 - {263685, 69}, // ds_6_5 - {263686, 70}, // ds_6_6 - {263687, 71}, // ds_6_7 - {263688, 72}, // ds_6_8 - {263689, 73}, // ds_6_9 - {263690, 74}, // ds_6_10 - {328704, 75}, // cs_4_0 - {328705, 76}, // cs_4_1 - {328960, 77}, // cs_5_0 - {328961, 78}, // cs_5_1 - {329216, 79}, // cs_6_0 - {329217, 80}, // cs_6_1 - {329218, 81}, // cs_6_2 - {329219, 82}, // cs_6_3 - {329220, 83}, // cs_6_4 - {329221, 84}, // cs_6_5 - {329222, 85}, // cs_6_6 - {329223, 86}, // cs_6_7 - {329224, 87}, // cs_6_8 - {329225, 88}, // cs_6_9 - {329226, 89}, // cs_6_10 - {394241, 90}, // lib_4_1 - {394497, 91}, // lib_5_1 - {394753, 92}, // lib_6_1 - {394754, 93}, // lib_6_2 - {394755, 94}, // lib_6_3 - {394756, 95}, // lib_6_4 - {394757, 96}, // lib_6_5 - {394758, 97}, // lib_6_6 - {394759, 98}, // lib_6_7 - {394760, 99}, // lib_6_8 - {394761, 100}, // lib_6_9 - {394762, 101}, // lib_6_10 + {1024, 0}, // ps_4_0 + {1025, 1}, // ps_4_1 + {1280, 2}, // ps_5_0 + {1281, 3}, // ps_5_1 + {1536, 4}, // ps_6_0 + {1537, 5}, // ps_6_1 + {1538, 6}, // ps_6_2 + {1539, 7}, // ps_6_3 + {1540, 8}, // ps_6_4 + {1541, 9}, // ps_6_5 + {1542, 10}, // ps_6_6 + {1543, 11}, // ps_6_7 + {1544, 12}, // ps_6_8 + {1545, 13}, // ps_6_9 + {66560, 14}, // vs_4_0 + {66561, 15}, // vs_4_1 + {66816, 16}, // vs_5_0 + {66817, 17}, // vs_5_1 + {67072, 18}, // vs_6_0 + {67073, 19}, // vs_6_1 + {67074, 20}, // vs_6_2 + {67075, 21}, // vs_6_3 + {67076, 22}, // vs_6_4 + {67077, 23}, // vs_6_5 + {67078, 24}, // vs_6_6 + {67079, 25}, // vs_6_7 + {67080, 26}, // vs_6_8 + {67081, 27}, // vs_6_9 + {132096, 28}, // gs_4_0 + {132097, 29}, // gs_4_1 + {132352, 30}, // gs_5_0 + {132353, 31}, // gs_5_1 + {132608, 32}, // gs_6_0 + {132609, 33}, // gs_6_1 + {132610, 34}, // gs_6_2 + {132611, 35}, // gs_6_3 + {132612, 36}, // gs_6_4 + {132613, 37}, // gs_6_5 + {132614, 38}, // gs_6_6 + {132615, 39}, // gs_6_7 + {132616, 40}, // gs_6_8 + {132617, 41}, // gs_6_9 + {197632, 42}, // hs_4_0 + {197633, 43}, // hs_4_1 + {197888, 44}, // hs_5_0 + {197889, 45}, // hs_5_1 + {198144, 46}, // hs_6_0 + {198145, 47}, // hs_6_1 + {198146, 48}, // hs_6_2 + {198147, 49}, // hs_6_3 + {198148, 50}, // hs_6_4 + {198149, 51}, // hs_6_5 + {198150, 52}, // hs_6_6 + {198151, 53}, // hs_6_7 + {198152, 54}, // hs_6_8 + {198153, 55}, // hs_6_9 + {263168, 56}, // ds_4_0 + {263169, 57}, // ds_4_1 + {263424, 58}, // ds_5_0 + {263425, 59}, // ds_5_1 + {263680, 60}, // ds_6_0 + {263681, 61}, // ds_6_1 + {263682, 62}, // ds_6_2 + {263683, 63}, // ds_6_3 + {263684, 64}, // ds_6_4 + {263685, 65}, // ds_6_5 + {263686, 66}, // ds_6_6 + {263687, 67}, // ds_6_7 + {263688, 68}, // ds_6_8 + {263689, 69}, // ds_6_9 + {328704, 70}, // cs_4_0 + {328705, 71}, // cs_4_1 + {328960, 72}, // cs_5_0 + {328961, 73}, // cs_5_1 + {329216, 74}, // cs_6_0 + {329217, 75}, // cs_6_1 + {329218, 76}, // cs_6_2 + {329219, 77}, // cs_6_3 + {329220, 78}, // cs_6_4 + {329221, 79}, // cs_6_5 + {329222, 80}, // cs_6_6 + {329223, 81}, // cs_6_7 + {329224, 82}, // cs_6_8 + {329225, 83}, // cs_6_9 + {394241, 84}, // lib_4_1 + {394497, 85}, // lib_5_1 + {394753, 86}, // lib_6_1 + {394754, 87}, // lib_6_2 + {394755, 88}, // lib_6_3 + {394756, 89}, // lib_6_4 + {394757, 90}, // lib_6_5 + {394758, 91}, // lib_6_6 + {394759, 92}, // lib_6_7 + {394760, 93}, // lib_6_8 + {394761, 94}, // lib_6_9 // lib_6_x is for offline linking only, and relaxes restrictions - {394767, 102}, // lib_6_x - {853509, 103}, // ms_6_5 - {853510, 104}, // ms_6_6 - {853511, 105}, // ms_6_7 - {853512, 106}, // ms_6_8 - {853513, 107}, // ms_6_9 - {853514, 108}, // ms_6_10 - {919045, 109}, // as_6_5 - {919046, 110}, // as_6_6 - {919047, 111}, // as_6_7 - {919048, 112}, // as_6_8 - {919049, 113}, // as_6_9 - {919050, 114}, // as_6_10 + {394767, 95}, // lib_6_x + {853509, 96}, // ms_6_5 + {853510, 97}, // ms_6_6 + {853511, 98}, // ms_6_7 + {853512, 99}, // ms_6_8 + {853513, 100}, // ms_6_9 + {919045, 101}, // as_6_5 + {919046, 102}, // as_6_6 + {919047, 103}, // as_6_7 + {919048, 104}, // as_6_8 + {919049, 105}, // as_6_9 }; unsigned hash = (unsigned)Kind << 16 | Major << 8 | Minor; auto pred = [](const std::pair &elem, unsigned val) { @@ -338,9 +328,6 @@ void ShaderModel::GetDxilVersion(unsigned &DxilMajor, case 9: DxilMinor = 9; break; - case 10: - DxilMinor = 10; - break; case kOfflineMinor: // Always update this to highest dxil version DxilMinor = DXIL::kDxilMinor; break; @@ -391,9 +378,6 @@ void ShaderModel::GetMinValidatorVersion(unsigned &ValMajor, case 9: ValMinor = 9; break; - case 10: - ValMinor = 10; - break; // VALRULE-TEXT:END case kOfflineMinor: ValMajor = 0; @@ -535,7 +519,6 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Pixel, 6, 7, "ps_6_7", 32, 8, true, true, UINT_MAX), SM(Kind::Pixel, 6, 8, "ps_6_8", 32, 8, true, true, UINT_MAX), SM(Kind::Pixel, 6, 9, "ps_6_9", 32, 8, true, true, UINT_MAX), - SM(Kind::Pixel, 6, 10, "ps_6_10", 32, 8, true, true, UINT_MAX), SM(Kind::Vertex, 4, 0, "vs_4_0", 16, 16, false, false, 0), SM(Kind::Vertex, 4, 1, "vs_4_1", 32, 32, false, false, 0), SM(Kind::Vertex, 5, 0, "vs_5_0", 32, 32, true, true, 64), @@ -550,7 +533,6 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Vertex, 6, 7, "vs_6_7", 32, 32, true, true, UINT_MAX), SM(Kind::Vertex, 6, 8, "vs_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Vertex, 6, 9, "vs_6_9", 32, 32, true, true, UINT_MAX), - SM(Kind::Vertex, 6, 10, "vs_6_10", 32, 32, true, true, UINT_MAX), SM(Kind::Geometry, 4, 0, "gs_4_0", 16, 32, false, false, 0), SM(Kind::Geometry, 4, 1, "gs_4_1", 32, 32, false, false, 0), SM(Kind::Geometry, 5, 0, "gs_5_0", 32, 32, true, true, 64), @@ -565,7 +547,6 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Geometry, 6, 7, "gs_6_7", 32, 32, true, true, UINT_MAX), SM(Kind::Geometry, 6, 8, "gs_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Geometry, 6, 9, "gs_6_9", 32, 32, true, true, UINT_MAX), - SM(Kind::Geometry, 6, 10, "gs_6_10", 32, 32, true, true, UINT_MAX), SM(Kind::Hull, 4, 0, "hs_4_0", 32, 32, false, false, 0), SM(Kind::Hull, 4, 1, "hs_4_1", 32, 32, false, false, 0), SM(Kind::Hull, 5, 0, "hs_5_0", 32, 32, true, true, 64), @@ -580,7 +561,6 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Hull, 6, 7, "hs_6_7", 32, 32, true, true, UINT_MAX), SM(Kind::Hull, 6, 8, "hs_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Hull, 6, 9, "hs_6_9", 32, 32, true, true, UINT_MAX), - SM(Kind::Hull, 6, 10, "hs_6_10", 32, 32, true, true, UINT_MAX), SM(Kind::Domain, 4, 0, "ds_4_0", 32, 32, false, false, 0), SM(Kind::Domain, 4, 1, "ds_4_1", 32, 32, false, false, 0), SM(Kind::Domain, 5, 0, "ds_5_0", 32, 32, true, true, 64), @@ -595,7 +575,6 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Domain, 6, 7, "ds_6_7", 32, 32, true, true, UINT_MAX), SM(Kind::Domain, 6, 8, "ds_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Domain, 6, 9, "ds_6_9", 32, 32, true, true, UINT_MAX), - SM(Kind::Domain, 6, 10, "ds_6_10", 32, 32, true, true, UINT_MAX), SM(Kind::Compute, 4, 0, "cs_4_0", 0, 0, false, false, 0), SM(Kind::Compute, 4, 1, "cs_4_1", 0, 0, false, false, 0), SM(Kind::Compute, 5, 0, "cs_5_0", 0, 0, true, true, 64), @@ -610,7 +589,6 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Compute, 6, 7, "cs_6_7", 0, 0, true, true, UINT_MAX), SM(Kind::Compute, 6, 8, "cs_6_8", 0, 0, true, true, UINT_MAX), SM(Kind::Compute, 6, 9, "cs_6_9", 0, 0, true, true, UINT_MAX), - SM(Kind::Compute, 6, 10, "cs_6_10", 0, 0, true, true, UINT_MAX), SM(Kind::Library, 4, 1, "lib_4_1", 0, 0, false, false, 0), SM(Kind::Library, 5, 1, "lib_5_1", 0, 0, true, true, 64), SM(Kind::Library, 6, 1, "lib_6_1", 0, 0, true, true, UINT_MAX), @@ -622,7 +600,6 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Library, 6, 7, "lib_6_7", 0, 0, true, true, UINT_MAX), SM(Kind::Library, 6, 8, "lib_6_8", 0, 0, true, true, UINT_MAX), SM(Kind::Library, 6, 9, "lib_6_9", 0, 0, true, true, UINT_MAX), - SM(Kind::Library, 6, 10, "lib_6_10", 0, 0, true, true, UINT_MAX), // lib_6_x is for offline linking only, and relaxes restrictions SM(Kind::Library, 6, kOfflineMinor, "lib_6_x", 32, 32, true, true, UINT_MAX), @@ -631,13 +608,11 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Mesh, 6, 7, "ms_6_7", 0, 0, true, true, UINT_MAX), SM(Kind::Mesh, 6, 8, "ms_6_8", 0, 0, true, true, UINT_MAX), SM(Kind::Mesh, 6, 9, "ms_6_9", 0, 0, true, true, UINT_MAX), - SM(Kind::Mesh, 6, 10, "ms_6_10", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 5, "as_6_5", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 6, "as_6_6", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 7, "as_6_7", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 8, "as_6_8", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 9, "as_6_9", 0, 0, true, true, UINT_MAX), - SM(Kind::Amplification, 6, 10, "as_6_10", 0, 0, true, true, UINT_MAX), // Values before Invalid must remain sorted by Kind, then Major, then Minor. SM(Kind::Invalid, 0, 0, "invalid", 0, 0, false, false, 0), // VALRULE-TEXT:END diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp index b1b79bbae9..796f017f09 100644 --- a/tools/clang/unittests/HLSL/ValidationTest.cpp +++ b/tools/clang/unittests/HLSL/ValidationTest.cpp @@ -4883,9 +4883,8 @@ TEST_F(ValidationTest, CacheInitWithLowPrec) { } TEST_F(ValidationTest, PSVStringTableReorder) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileSource("float4 main(float a:A, float b:B) : SV_Target { return 1; }", @@ -5076,9 +5075,8 @@ class SemanticIndexRotator { }; TEST_F(ValidationTest, PSVSemanticIndexTableReorder) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXILValidation\\hs_signatures.hlsl", "hs_6_0", &pProgram); @@ -5552,9 +5550,8 @@ SimplePSV::SimplePSV(const DxilPartHeader *pPSVPart) { } TEST_F(ValidationTest, PSVContentValidationVS) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_VS.hlsl", "vs_6_8", &pProgram); @@ -5708,9 +5705,8 @@ TEST_F(ValidationTest, PSVContentValidationVS) { } TEST_F(ValidationTest, PSVContentValidationHS) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_HS.hlsl", "hs_6_8", &pProgram); @@ -5858,9 +5854,8 @@ TEST_F(ValidationTest, PSVContentValidationHS) { } TEST_F(ValidationTest, PSVContentValidationDS) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_DS.hlsl", "ds_6_8", &pProgram); @@ -6015,9 +6010,8 @@ TEST_F(ValidationTest, PSVContentValidationDS) { } TEST_F(ValidationTest, PSVContentValidationGS) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_GS.hlsl", "gs_6_8", &pProgram); @@ -6103,9 +6097,8 @@ TEST_F(ValidationTest, PSVContentValidationGS) { } TEST_F(ValidationTest, PSVContentValidationPS) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_PS.hlsl", "ps_6_8", &pProgram); @@ -6188,9 +6181,8 @@ TEST_F(ValidationTest, PSVContentValidationPS) { } TEST_F(ValidationTest, PSVContentValidationCS) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_CS.hlsl", "cs_6_8", &pProgram); @@ -6270,9 +6262,8 @@ TEST_F(ValidationTest, PSVContentValidationCS) { } TEST_F(ValidationTest, PSVContentValidationMS) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_MS.hlsl", "ms_6_8", &pProgram); @@ -6337,9 +6328,8 @@ TEST_F(ValidationTest, PSVContentValidationMS) { } TEST_F(ValidationTest, PSVContentValidationAS) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_AS.hlsl", "as_6_8", &pProgram); @@ -6439,9 +6429,8 @@ struct SimpleContainer { }; TEST_F(ValidationTest, WrongPSVSize) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_AS.hlsl", "as_6_8", &pProgram); @@ -6528,9 +6517,8 @@ TEST_F(ValidationTest, WrongPSVSize) { } TEST_F(ValidationTest, WrongPSVSizeOnZeros) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_PS.hlsl", "ps_6_8", &pProgram); @@ -6624,9 +6612,8 @@ TEST_F(ValidationTest, WrongPSVSizeOnZeros) { } TEST_F(ValidationTest, WrongPSVVersion) { - if (!m_ver.m_InternalValidator) - if (m_ver.SkipDxilVersion(1, 8)) - return; + if (m_ver.SkipDxilVersion(1, 10)) + return; CComPtr pProgram60; std::vector args; diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 8321768bef..87ac8886be 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -1693,7 +1693,7 @@ def get_extended_table_opcode_enum_decls(): # since there can be pre-release versions that are higher # than the last released version highest_major = 6 -highest_minor = 10 +highest_minor = 9 highest_shader_models = {4: 1, 5: 1, 6: highest_minor} # fetch the last released version from latest-released.json From 15943a81b5423c7f6089055a5ae4bda8c695c27d Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Tue, 27 Jan 2026 16:09:15 -0800 Subject: [PATCH 2/6] Add upcoming release section --- docs/ReleaseNotes.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index c14b4ce258..8a83524274 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -17,6 +17,11 @@ The included licenses apply to the following files: ## Changelog +### Upcoming Release + +Place release notes for the upcoming release below this line and remove this +line upon naming the release. Refer to previous for appropriate section names. + ### Version 1.9.2602 #### Shader Model 6.9 Release From a2f4fe9535cd28660795da16e619f0b203cc4a20 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Tue, 27 Jan 2026 16:10:28 -0800 Subject: [PATCH 3/6] Extra space --- docs/ReleaseNotes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index 8a83524274..79b0362943 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -17,7 +17,7 @@ The included licenses apply to the following files: ## Changelog -### Upcoming Release +### Upcoming Release Place release notes for the upcoming release below this line and remove this line upon naming the release. Refer to previous for appropriate section names. From 5765af05d6e64b2a4b4550cec7094775566c802b Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Tue, 27 Jan 2026 21:00:54 -0500 Subject: [PATCH 4/6] Update SPIR-V submodules (#8092) (#8100) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update spirv-tools to the v2026.1 release in preparation for the Vulkan SDK release. This picks up fixes for recent regressions related to debug info. Co-authored-by: Nathan Gauër --- external/SPIRV-Headers | 2 +- external/SPIRV-Tools | 2 +- .../intrinsics.sm6_5.multiprefix.hlsl | 16 ++++++++-------- .../test/CodeGenSPIRV/sm6_5.wave-match.hlsl | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers index b824a462d4..04f10f650d 160000 --- a/external/SPIRV-Headers +++ b/external/SPIRV-Headers @@ -1 +1 @@ -Subproject commit b824a462d4256d720bebb40e78b9eb8f78bbb305 +Subproject commit 04f10f650d514df88b76d25e83db360142c7b174 diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index 262bdab481..fbe4f3ad91 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit 262bdab48146c937467f826699a40da0fdfc0f1a +Subproject commit fbe4f3ad913c44fe8700545f8ffe35d1382b7093 diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.sm6_5.multiprefix.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.sm6_5.multiprefix.hlsl index 7b15e17558..a71664b063 100644 --- a/tools/clang/test/CodeGenSPIRV/intrinsics.sm6_5.multiprefix.hlsl +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.sm6_5.multiprefix.hlsl @@ -3,7 +3,7 @@ // CHECK-ERROR: error: Vulkan 1.1 is required for Wave Operation but not permitted to use -// CHECK: OpCapability GroupNonUniformPartitionedNV +// CHECK: OpCapability GroupNonUniformPartitionedEXT // CHECK: OpExtension "SPV_NV_shader_subgroup_partitioned" StructuredBuffer g_mask; @@ -13,37 +13,37 @@ uint4 main(int4 input0 : ATTR0, uint4 input1 : ATTR1) : SV_Target { // CHECK: [[input0:%[0-9]+]] = OpLoad %v4int %input0 // CHECK: [[mask:%[0-9]+]] = OpLoad %v4uint %mask -// CHECK: {{%[0-9]+}} = OpGroupNonUniformIMul %v4int %uint_3 PartitionedExclusiveScanNV [[input0]] [[mask]] +// CHECK: {{%[0-9]+}} = OpGroupNonUniformIMul %v4int %uint_3 PartitionedExclusiveScanEXT [[input0]] [[mask]] int4 res = WaveMultiPrefixProduct(input0, mask); // CHECK: [[input1:%[0-9]+]] = OpLoad %v4uint %input1 // CHECK: [[mask:%[0-9]+]] = OpLoad %v4uint %mask -// CHECK: {{%[0-9]+}} = OpGroupNonUniformIMul %v4uint %uint_3 PartitionedExclusiveScanNV [[input1]] [[mask]] +// CHECK: {{%[0-9]+}} = OpGroupNonUniformIMul %v4uint %uint_3 PartitionedExclusiveScanEXT [[input1]] [[mask]] res += WaveMultiPrefixProduct(input1, mask); // CHECK: [[input0:%[0-9]+]] = OpLoad %v4int %input0 // CHECK: [[mask:%[0-9]+]] = OpLoad %v4uint %mask -// CHECK: {{%[0-9]+}} = OpGroupNonUniformIAdd %v4int %uint_3 PartitionedExclusiveScanNV [[input0]] [[mask]] +// CHECK: {{%[0-9]+}} = OpGroupNonUniformIAdd %v4int %uint_3 PartitionedExclusiveScanEXT [[input0]] [[mask]] res += WaveMultiPrefixSum(input0, mask); // CHECK: [[input1:%[0-9]+]] = OpLoad %v4uint %input1 // CHECK: [[mask:%[0-9]+]] = OpLoad %v4uint %mask -// CHECK: {{%[0-9]+}} = OpGroupNonUniformIAdd %v4uint %uint_3 PartitionedExclusiveScanNV [[input1]] [[mask]] +// CHECK: {{%[0-9]+}} = OpGroupNonUniformIAdd %v4uint %uint_3 PartitionedExclusiveScanEXT [[input1]] [[mask]] res += WaveMultiPrefixSum(input1, mask); // CHECK: [[input1:%[0-9]+]] = OpLoad %v4uint %input1 // CHECK: [[mask:%[0-9]+]] = OpLoad %v4uint %mask -// CHECK: {{%[0-9]+}} = OpGroupNonUniformBitwiseAnd %v4uint %uint_3 PartitionedExclusiveScanNV [[input1]] [[mask]] +// CHECK: {{%[0-9]+}} = OpGroupNonUniformBitwiseAnd %v4uint %uint_3 PartitionedExclusiveScanEXT [[input1]] [[mask]] res += WaveMultiPrefixBitAnd(input1, mask); // CHECK: [[input1:%[0-9]+]] = OpLoad %v4uint %input1 // CHECK: [[mask:%[0-9]+]] = OpLoad %v4uint %mask -// CHECK: {{%[0-9]+}} = OpGroupNonUniformBitwiseOr %v4uint %uint_3 PartitionedExclusiveScanNV [[input1]] [[mask]] +// CHECK: {{%[0-9]+}} = OpGroupNonUniformBitwiseOr %v4uint %uint_3 PartitionedExclusiveScanEXT [[input1]] [[mask]] res += WaveMultiPrefixBitOr(input1, mask); // CHECK: [[input1:%[0-9]+]] = OpLoad %v4uint %input1 // CHECK: [[mask:%[0-9]+]] = OpLoad %v4uint %mask -// CHECK: {{%[0-9]+}} = OpGroupNonUniformBitwiseXor %v4uint %uint_3 PartitionedExclusiveScanNV [[input1]] [[mask]] +// CHECK: {{%[0-9]+}} = OpGroupNonUniformBitwiseXor %v4uint %uint_3 PartitionedExclusiveScanEXT [[input1]] [[mask]] res += WaveMultiPrefixBitXor(input1, mask); return res; } diff --git a/tools/clang/test/CodeGenSPIRV/sm6_5.wave-match.hlsl b/tools/clang/test/CodeGenSPIRV/sm6_5.wave-match.hlsl index 582be080d0..2040139dfa 100644 --- a/tools/clang/test/CodeGenSPIRV/sm6_5.wave-match.hlsl +++ b/tools/clang/test/CodeGenSPIRV/sm6_5.wave-match.hlsl @@ -3,12 +3,12 @@ // CHECK-ERROR: error: Vulkan 1.1 is required for Wave Operation but not permitted to use -// CHECK: OpCapability GroupNonUniformPartitionedNV +// CHECK: OpCapability GroupNonUniformPartitionedEXT // CHECK: OpExtension "SPV_NV_shader_subgroup_partitioned" uint4 main(uint4 input : ATTR0) : SV_Target { // CHECK: [[input:%[0-9]+]] = OpLoad %v4uint %input -// CHECK: {{%[0-9]+}} = OpGroupNonUniformPartitionNV %v4uint [[input]] +// CHECK: {{%[0-9]+}} = OpGroupNonUniformPartitionEXT %v4uint [[input]] uint4 res = WaveMatch(input); return res; } From d76a1143558c8f0b5a54d099333b36b53979d4af Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Fri, 30 Jan 2026 11:54:44 -0800 Subject: [PATCH 5/6] Move experimental SM 6.10 --- docs/ReleaseNotes.md | 18 +- tools/clang/unittests/HLSLExec/CMakeLists.txt | 1 + tools/clang/unittests/HLSLExec/TaefTest.cpp | 2804 +++++++++++++++++ 3 files changed, 2814 insertions(+), 9 deletions(-) create mode 100644 tools/clang/unittests/HLSLExec/TaefTest.cpp diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index 79b0362943..a990068d9a 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -22,15 +22,6 @@ The included licenses apply to the following files: Place release notes for the upcoming release below this line and remove this line upon naming the release. Refer to previous for appropriate section names. -### Version 1.9.2602 - -#### Shader Model 6.9 Release - -- Shader Model 6.9 is fully supported. - - See [the official blog - post](https://devblogs.microsoft.com/directx/shader-model-6-9-dxr-1-2-and-agilitysdk-1-619-release) - for more details. - #### Experimental Shader Model 6.10 - Moved Linear Algebra (Cooperative Vector) DXIL Opcodes to experimental Shader Model 6.10 @@ -41,6 +32,15 @@ line upon naming the release. Refer to previous for appropriate section names. shaders which returns the total number of waves executing within the thread group. +### Version 1.9.2602 + +#### Shader Model 6.9 Release + +- Shader Model 6.9 is fully supported. + - See [the official blog + post](https://devblogs.microsoft.com/directx/shader-model-6-9-dxr-1-2-and-agilitysdk-1-619-release) + for more details. + #### Noteble SPIR-V updates - Handle vector element assignment for asuint. [#8011](https://github.com/microsoft/DirectXShaderCompiler/issues/8011) diff --git a/tools/clang/unittests/HLSLExec/CMakeLists.txt b/tools/clang/unittests/HLSLExec/CMakeLists.txt index 8282fd5282..8a84c2db56 100644 --- a/tools/clang/unittests/HLSLExec/CMakeLists.txt +++ b/tools/clang/unittests/HLSLExec/CMakeLists.txt @@ -12,6 +12,7 @@ add_clang_library(ExecHLSLTests SHARED LongVectors.cpp HlslExecTestUtils.cpp ExecHLSLTests.rc + TaefTest.cpp ) add_dependencies(ClangUnitTests ExecHLSLTests) diff --git a/tools/clang/unittests/HLSLExec/TaefTest.cpp b/tools/clang/unittests/HLSLExec/TaefTest.cpp new file mode 100644 index 0000000000..56d2e1dc60 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/TaefTest.cpp @@ -0,0 +1,2804 @@ +#ifndef NOMINMAX +#define NOMINMAX 1 +#endif + +#define INLINE_TEST_METHOD_MARKUP +#include + +#include "LongVectorTestData.h" + +#include "ShaderOpTest.h" +#include "dxc/Support/Global.h" + +#include "HlslTestUtils.h" + +#include "HlslExecTestUtils.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace LongVector { + +// +// Data Types +// + +template constexpr bool is16BitType() { + return std::is_same_v || std::is_same_v || + std::is_same_v; +} + +struct DataType { + const char *HLSLTypeString; + bool Is16Bit; + size_t HLSLSizeInBytes; +}; + +template const DataType &getDataType() { + static_assert(false && "Unknown data type"); +} + +#define DATA_TYPE(TYPE, HLSL_STRING, HLSL_SIZE) \ + template <> const DataType &getDataType() { \ + static DataType DataType{HLSL_STRING, is16BitType(), HLSL_SIZE}; \ + return DataType; \ + } + +DATA_TYPE(HLSLBool_t, "bool", 4) +DATA_TYPE(int16_t, "int16_t", 2) +DATA_TYPE(int32_t, "int", 4) +DATA_TYPE(int64_t, "int64_t", 8) +DATA_TYPE(uint16_t, "uint16_t", 2) +DATA_TYPE(uint32_t, "uint32_t", 4) +DATA_TYPE(uint64_t, "uint64_t", 8) +DATA_TYPE(HLSLHalf_t, "half", 2) +DATA_TYPE(float, "float", 4) +DATA_TYPE(double, "double", 8) + +#undef DATA_TYPE + +template constexpr bool isFloatingPointType() { + return std::is_same_v || std::is_same_v || + std::is_same_v; +} + +// +// Operation Types +// + +enum class OpType : unsigned { +#define OP(GROUP, SYMBOL, ARITY, INTRINSIC, OPERATOR, DEFINES, SHADER_NAME, \ + INPUT_SET_1, INPUT_SET_2, INPUT_SET_3) \ + SYMBOL, +#include "LongVectorOps.def" + NumOpTypes +}; + +struct Operation { + size_t Arity; + const char *Intrinsic; + const char *Operator; + const char *ExtraDefines; + const char *ShaderName; + InputSet InputSets[3]; + OpType Type; +}; + +static constexpr Operation Operations[] = { + +#define OP(GROUP, SYMBOL, ARITY, INTRINSIC, OPERATOR, DEFINES, SHADER_NAME, \ + INPUT_SET_1, INPUT_SET_2, INPUT_SET_3) \ + {ARITY, \ + INTRINSIC, \ + OPERATOR, \ + DEFINES, \ + SHADER_NAME, \ + {InputSet::INPUT_SET_1, InputSet::INPUT_SET_2, InputSet::INPUT_SET_3}, \ + OpType::SYMBOL}, +#include "LongVectorOps.def" +}; + +constexpr const Operation &getOperation(OpType Op) { + if (Op < OpType::NumOpTypes) + return Operations[unsigned(Op)]; + std::abort(); +} + +static const std::unordered_set LoadAndStoreOpTypes = { + OpType::LoadAndStore_RDH_BAB_UAV, OpType::LoadAndStore_RDH_BAB_SRV, + OpType::LoadAndStore_DT_BAB_UAV, OpType::LoadAndStore_DT_BAB_SRV, + OpType::LoadAndStore_RD_BAB_UAV, OpType::LoadAndStore_RD_BAB_SRV, + OpType::LoadAndStore_RDH_SB_UAV, OpType::LoadAndStore_RDH_SB_SRV, + OpType::LoadAndStore_DT_SB_UAV, OpType::LoadAndStore_DT_SB_SRV, + OpType::LoadAndStore_RD_SB_UAV, OpType::LoadAndStore_RD_SB_SRV, +}; + +static bool IsStructuredBufferLoadAndStoreOp(OpType Op) { + switch (Op) { + case OpType::LoadAndStore_RDH_SB_UAV: + case OpType::LoadAndStore_RDH_SB_SRV: + case OpType::LoadAndStore_DT_SB_UAV: + case OpType::LoadAndStore_DT_SB_SRV: + case OpType::LoadAndStore_RD_SB_UAV: + case OpType::LoadAndStore_RD_SB_SRV: + return true; + default: + return false; + } +} + +// Helper to fill the test data from the shader buffer based on type. +// Convenient to be used when copying HLSL*_t types so we can use the +// underlying type. +template +void fillLongVectorDataFromShaderBuffer(const MappedData &ShaderBuffer, + std::vector &TestData, + size_t NumElements) { + + if constexpr (std::is_same_v) { + auto *ShaderBufferPtr = + static_cast(ShaderBuffer.data()); + for (size_t I = 0; I < NumElements; I++) + TestData.push_back(HLSLHalf_t::FromHALF(ShaderBufferPtr[I])); + return; + } + + if constexpr (std::is_same_v) { + auto *ShaderBufferPtr = static_cast(ShaderBuffer.data()); + for (size_t I = 0; I < NumElements; I++) + // HLSLBool_t has a int32_t based constructor. + TestData.push_back(ShaderBufferPtr[I]); + return; + } + + auto *ShaderBufferPtr = static_cast(ShaderBuffer.data()); + for (size_t I = 0; I < NumElements; I++) + TestData.push_back(ShaderBufferPtr[I]); + return; +} + +template +void logLongVector(const std::vector &Values, const std::wstring &Name) { + hlsl_test::LogCommentFmt(L"LongVector Name: %s", Name.c_str()); + + const size_t LoggingWidth = 40; + + std::wstringstream Wss(L""); + Wss << L"LongVector Values: "; + Wss << L"["; + const size_t NumElements = Values.size(); + for (size_t I = 0; I < NumElements; I++) { + if (I % LoggingWidth == 0 && I != 0) + Wss << L"\n "; + Wss << Values[I]; + if (I != NumElements - 1) + Wss << L", "; + } + Wss << L" ]"; + + hlsl_test::LogCommentFmt(Wss.str().c_str()); +} + +enum class ValidationType { + Epsilon, + Ulp, +}; + +template +bool doValuesMatch(T A, T B, double Tolerance, ValidationType) { + if (Tolerance == 0.0) + return A == B; + + T Diff = A > B ? A - B : B - A; + return Diff <= Tolerance; +} + +bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double, ValidationType) { + return A == B; +} + +bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance, + ValidationType ValidationType) { + switch (ValidationType) { + case ValidationType::Epsilon: + return CompareHalfEpsilon(A.Val, B.Val, static_cast(Tolerance)); + case ValidationType::Ulp: + return CompareHalfULP(A.Val, B.Val, static_cast(Tolerance)); + default: + hlsl_test::LogErrorFmt( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +bool doValuesMatch(float A, float B, double Tolerance, + ValidationType ValidationType) { + switch (ValidationType) { + case ValidationType::Epsilon: + return CompareFloatEpsilon(A, B, static_cast(Tolerance)); + case ValidationType::Ulp: { + // Tolerance is in ULPs. Convert to int for the comparison. + const int IntTolerance = static_cast(Tolerance); + return CompareFloatULP(A, B, IntTolerance); + }; + default: + hlsl_test::LogErrorFmt( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +bool doValuesMatch(double A, double B, double Tolerance, + ValidationType ValidationType) { + switch (ValidationType) { + case ValidationType::Epsilon: + return CompareDoubleEpsilon(A, B, Tolerance); + case ValidationType::Ulp: { + // Tolerance is in ULPs. Convert to int64_t for the comparison. + const int64_t IntTolerance = static_cast(Tolerance); + return CompareDoubleULP(A, B, IntTolerance); + }; + default: + hlsl_test::LogErrorFmt( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +template +bool doVectorsMatch(const std::vector &ActualValues, + const std::vector &ExpectedValues, double Tolerance, + ValidationType ValidationType, bool VerboseLogging) { + + DXASSERT( + ActualValues.size() == ExpectedValues.size(), + "Programmer error: Actual and Expected vectors must be the same size."); + + if (VerboseLogging) { + logLongVector(ActualValues, L"ActualValues"); + logLongVector(ExpectedValues, L"ExpectedValues"); + + hlsl_test::LogCommentFmt( + L"ValidationType: %s, Tolerance: %17g", + ValidationType == ValidationType::Epsilon ? L"Epsilon" : L"ULP", + Tolerance); + } + + // Stash mismatched indexes for easy failure logging later + std::vector MismatchedIndexes; + for (size_t I = 0; I < ActualValues.size(); I++) { + if (!doValuesMatch(ActualValues[I], ExpectedValues[I], Tolerance, + ValidationType)) + MismatchedIndexes.push_back(I); + } + + if (MismatchedIndexes.empty()) + return true; + + if (!MismatchedIndexes.empty()) { + for (size_t Index : MismatchedIndexes) { + std::wstringstream Wss(L""); + Wss << std::setprecision(15); // Set precision for floating point types + Wss << L"Mismatch at Index: " << Index; + Wss << L" Actual Value:" << ActualValues[Index] << ","; + Wss << L" Expected Value:" << ExpectedValues[Index]; + hlsl_test::LogErrorFmt(Wss.str().c_str()); + } + } + + return false; +} + +static WEX::Common::String getInputValueSetName(size_t Index) { + using WEX::Common::String; + using WEX::TestExecution::TestData; + + DXASSERT(Index >= 0 && Index <= 9, "Only single digit indices supported"); + + String ParameterName = L"InputValueSetName"; + ParameterName.Append((wchar_t)(L'1' + Index)); + + String ValueSetName; + if (FAILED(TestData::TryGetValue(ParameterName, ValueSetName))) { + String Name = L"DefaultInputValueSet"; + Name.Append((wchar_t)(L'1' + Index)); + return Name; + } + + return ValueSetName; +} + +std::string getCompilerOptionsString( + const Operation &Operation, const DataType &OpDataType, + const DataType &OutDataType, size_t VectorSize, + std::optional AdditionalOptions = std::nullopt) { + std::stringstream CompilerOptions; + + if (OpDataType.Is16Bit || OutDataType.Is16Bit) + CompilerOptions << " -enable-16bit-types"; + + CompilerOptions << " -DTYPE=" << OpDataType.HLSLTypeString; + CompilerOptions << " -DNUM=" << VectorSize; + + CompilerOptions << " -DOPERATOR="; + CompilerOptions << Operation.Operator; + + CompilerOptions << " -DFUNC="; + CompilerOptions << Operation.Intrinsic; + + CompilerOptions << " " << Operation.ExtraDefines; + + CompilerOptions << " -DOUT_TYPE=" << OutDataType.HLSLTypeString; + + CompilerOptions << " -DBASIC_OP_TYPE=0x" << std::hex << Operation.Arity; + + if (AdditionalOptions) + CompilerOptions << " " << AdditionalOptions.value(); + + return CompilerOptions.str(); +} + +// Helper to fill the shader buffer based on type. Convenient to be used when +// copying HLSL*_t types so we can copy the underlying type directly instead +// of the struct. +template +void fillShaderBufferFromLongVectorData(std::vector &ShaderBuffer, + const std::vector &TestData) { + + // Note: DataSize for HLSLHalf_t and HLSLBool_t may be larger than the + // underlying type in some cases. Thats fine. Resize just makes sure we have + // enough space. + const size_t NumElements = TestData.size(); + [[maybe_unused]] const size_t DataSize = sizeof(T) * NumElements; + + // Ensure the shader buffer is large enough. It should be pre-sized based on + // the D3D12_RESOURCE_DESC for the associated D3D12_RESOURCE. + DXASSERT_NOMSG(ShaderBuffer.size() >= DataSize); + + if constexpr (std::is_same_v) { + auto *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t I = 0; I < NumElements; I++) + ShaderBufferPtr[I] = TestData[I].Val; + return; + } + + if constexpr (std::is_same_v) { + auto *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); + for (size_t I = 0; I < NumElements; I++) + ShaderBufferPtr[I] = TestData[I].Val; + return; + } + + auto *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); + for (size_t I = 0; I < NumElements; I++) + ShaderBufferPtr[I] = TestData[I]; +} + +// +// Run the test. Return std::nullopt if the test was skipped, otherwise returns +// the output buffer that was populated by the shader. +// +template using InputSets = std::vector>; + +template +std::optional> +runTest(ID3D12Device *D3DDevice, bool VerboseLogging, + const Operation &Operation, const InputSets &Inputs, + size_t ExpectedOutputSize, + std::optional AdditionalCompilerOptions) { + DXASSERT_NOMSG(Inputs.size() == Operation.Arity); + + if (VerboseLogging) { + for (size_t I = 0; I < Operation.Arity; ++I) { + std::wstring Name = L"InputVector"; + Name += (wchar_t)(L'1' + I); + logLongVector(Inputs[I], Name); + } + } + + const DataType &OpDataType = getDataType(); + const DataType &OutDataType = getDataType(); + + // We have to construct the string outside of the lambda. Otherwise it's + // cleaned up when the lambda finishes executing but before the shader runs. + std::string CompilerOptionsString = + getCompilerOptionsString(Operation, OpDataType, OutDataType, + Inputs[0].size(), AdditionalCompilerOptions); + + dxc::SpecificDllLoader DxilDllLoader; + CComPtr TestXML; + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &TestXML, DxilDllLoader); + auto ShaderOpSet = std::make_shared(); + st::ParseShaderOpSetFromStream(TestXML, ShaderOpSet.get()); + + if (LoadAndStoreOpTypes.count(Operation.Type) > 0) + configureLoadAndStoreShaderOp(Operation, OpDataType, Inputs[0].size(), + sizeof(T), ShaderOpSet.get()); + + // RunShaderOpTest is a helper function that handles resource creation + // and setup. It also handles the shader compilation and execution. It takes + // a callback that is called when the shader is compiled, but before it is + // executed. + std::shared_ptr TestResult = + st::RunShaderOpTestAfterParse( + D3DDevice, DxilDllLoader, Operation.ShaderName, + [&](LPCSTR Name, std::vector &ShaderData, + st::ShaderOp *ShaderOp) { + if (VerboseLogging) + hlsl_test::LogCommentFmt( + L"RunShaderOpTest CallBack. Resource Name: %S", Name); + + // This callback is called once for each resource defined for + // "LongVectorOp" in ShaderOpArith.xml. All callbacks are fired for + // each resource. We determine whether they are applicable to the + // test case when they run. + + // Process the callback for the OutputVector resource. + if (_stricmp(Name, "OutputVector") == 0) { + // We only need to set the compiler options string once. So this + // is a convenient place to do it. + ShaderOp->Shaders.at(0).Arguments = CompilerOptionsString.c_str(); + + return; + } + + // Process the callback for the InputVector[1-3] resources + for (size_t I = 0; I < 3; ++I) { + std::string BufferName = "InputVector"; + BufferName += (char)('1' + I); + if (_stricmp(Name, BufferName.c_str()) == 0) { + if (I < Operation.Arity) + fillShaderBufferFromLongVectorData(ShaderData, Inputs[I]); + return; + } + } + + LOG_ERROR_FMT_THROW( + L"RunShaderOpTest CallBack. Unexpected Resource Name: %S", + Name); + }, + std::move(ShaderOpSet)); + + // Extract the data from the shader result + MappedData ShaderOutData; + + char *ReadBackName = "OutputVector"; + TestResult->Test->GetReadBackData(ReadBackName, &ShaderOutData); + + std::vector OutData; + fillLongVectorDataFromShaderBuffer(ShaderOutData, OutData, + ExpectedOutputSize); + + return OutData; +} + +// LoadAndStore operations dynamically configure the UAV/SRV formats and sizes +// based on the vector size and data type. We also adjust the format and flags +// based on whether we're using raw buffers or structured buffers. +void configureLoadAndStoreShaderOp(const Operation &Operation, + const DataType &OpDataType, + size_t VectorSize, size_t ElementSize, + st::ShaderOpSet *ShaderOpSet) { + + DXASSERT_NOMSG(LoadAndStoreOpTypes.count(Operation.Type) > 0); + + st::ShaderOp *ShaderOp = ShaderOpSet->GetShaderOp(Operation.ShaderName); + DXASSERT(ShaderOp, "Invalid ShaderOp name"); + + // When using DXGI_FORMAT_R32_TYPELESS (raw buffer cases) we need to compute + // the number of 32-bit elements required to hold the vector. + const UINT Num32BitElements = + static_cast((VectorSize * OpDataType.HLSLSizeInBytes + 3) / 4); + + const UINT StructureByteStride = static_cast(ElementSize * VectorSize); + + const bool IsSB = IsStructuredBufferLoadAndStoreOp(Operation.Type); + if (!ShaderOp->DescriptorHeaps.empty()) { + DXASSERT(ShaderOp->DescriptorHeaps.size() == 1, + "Programmer error: Expecting a single descriptor heap for " + "LoadAndStore tests"); + + for (auto &D : ShaderOp->DescriptorHeaps[0].Descriptors) { + const bool IsUAV = (_stricmp(D.Kind, "UAV") == 0); + DXASSERT(IsUAV || (_stricmp(D.Kind, "SRV") == 0), + "Programmer error: Expecting UAV or SRV descriptors only"); + + if (IsSB) { + if (IsUAV) { + D.UavDesc.Format = DXGI_FORMAT_UNKNOWN; + D.UavDesc.Buffer.NumElements = 1; // One StructuredBuffer + D.UavDesc.Buffer.StructureByteStride = StructureByteStride; + } else { + D.SrvDesc.Format = DXGI_FORMAT_UNKNOWN; + D.SrvDesc.Buffer.NumElements = 1; // One StructuredBuffer + D.SrvDesc.Buffer.StructureByteStride = StructureByteStride; + } + } else { // Raw buffer + if (IsUAV) { + D.UavDesc.Format = DXGI_FORMAT_R32_TYPELESS; + D.UavDesc.Buffer.NumElements = Num32BitElements; + D.UavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + } else { + D.SrvDesc.Format = DXGI_FORMAT_R32_TYPELESS; + D.SrvDesc.Buffer.NumElements = Num32BitElements; + D.SrvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + } + } + } + } + + const UINT BufferWidth = IsSB ? StructureByteStride : (Num32BitElements * 4); + for (auto &R : ShaderOp->Resources) + R.Desc.Width = BufferWidth; +} + +template +std::vector buildTestInput(InputSet InputSet, size_t SizeToTest) { + const std::vector &RawValueSet = getInputSet(InputSet); + + std::vector ValueSet; + ValueSet.reserve(SizeToTest); + for (size_t I = 0; I < SizeToTest; ++I) + ValueSet.push_back(RawValueSet[I % RawValueSet.size()]); + + return ValueSet; +} + +template +InputSets buildTestInputs(size_t VectorSize, const InputSet OpInputSets[3], + size_t Arity) { + InputSets Inputs; + + for (size_t I = 0; I < Arity; ++I) + Inputs.push_back(buildTestInput(OpInputSets[I], VectorSize)); + + return Inputs; +} + +struct ValidationConfig { + double Tolerance = 0.0; + ValidationType Type = ValidationType::Epsilon; + + static ValidationConfig Epsilon(double Tolerance) { + return ValidationConfig{Tolerance, ValidationType::Epsilon}; + } + + static ValidationConfig Ulp(double Tolerance) { + return ValidationConfig{Tolerance, ValidationType::Ulp}; + } +}; + +template +void runAndVerify( + ID3D12Device *D3DDevice, bool VerboseLogging, const Operation &Operation, + const InputSets &Inputs, const std::vector &Expected, + const ValidationConfig &ValidationConfig, + std::optional AdditionalCompilerOptions = std::nullopt) { + + std::optional> Actual = + runTest(D3DDevice, VerboseLogging, Operation, Inputs, + Expected.size(), AdditionalCompilerOptions); + + // If the test didn't run, don't verify anything. + if (!Actual) + return; + + VERIFY_IS_TRUE(doVectorsMatch(*Actual, Expected, ValidationConfig.Tolerance, + ValidationConfig.Type, VerboseLogging)); +} + +// +// Op definitions. The main goal of this is to specify the validation +// configuration and how to build the Expected results for a given Op. +// +// Most Ops have a 1:1 mapping of input to output, and so can use the generic +// ExpectedBuilder. +// +// Ops that differ from this pattern can specialize ExpectedBuilder as +// necessary. +// + +// Op - specializations are expected to have a ValidationConfig member and an +// appropriate overloaded function call operator. +template struct Op; + +// ExpectedBuilder - specializations are expected to have buildExpectedData +// member functions. +template struct ExpectedBuilder; + +// Default Validation configuration - ULP for floating point types, exact +// matches for everything else. +template struct DefaultValidation { + ValidationConfig ValidationConfig; + + DefaultValidation() { + if constexpr (isFloatingPointType()) + ValidationConfig = ValidationConfig::Ulp(1.0f); + } +}; + +// Strict Validation - Defaults to exact matches. +// Tolerance can be set to a non-zero value to allow for a wider range. +struct StrictValidation { + ValidationConfig ValidationConfig; +}; + +// Macros to build up common patterns of Op definitions + +#define OP_1(OP, VALIDATION, IMPL) \ + template struct Op : VALIDATION { \ + T operator()(T A) { return IMPL; } \ + } + +#define OP_2(OP, VALIDATION, IMPL) \ + template struct Op : VALIDATION { \ + T operator()(T A, T B) { return IMPL; } \ + } + +#define OP_3(OP, VALIDATION, IMPL) \ + template struct Op : VALIDATION { \ + T operator()(T A, T B, T C) { return IMPL; } \ + } + +#define STRICT_OP_1(OP, IMPL) OP_1(OP, StrictValidation, IMPL) + +#define DEFAULT_OP_1(OP, IMPL) OP_1(OP, DefaultValidation, IMPL) +#define DEFAULT_OP_2(OP, IMPL) OP_2(OP, DefaultValidation, IMPL) +#define DEFAULT_OP_3(OP, IMPL) OP_3(OP, DefaultValidation, IMPL) + +// +// TernaryMath +// + +DEFAULT_OP_3(OpType::Mad, (A * B + C)); +DEFAULT_OP_3(OpType::Fma, (A * B + C)); + +// +// BinaryMath +// + +DEFAULT_OP_2(OpType::Add, (A + B)); +DEFAULT_OP_2(OpType::Subtract, (A - B)); +DEFAULT_OP_2(OpType::Multiply, (A * B)); +DEFAULT_OP_2(OpType::Divide, (A / B)); + +template struct Op : DefaultValidation { + T operator()(T A, T B) { + if constexpr (std::is_same_v) + return std::fmod(A, B); + else + return A % B; + } +}; + +DEFAULT_OP_2(OpType::Min, (std::min(A, B))); +DEFAULT_OP_2(OpType::Max, (std::max(A, B))); +DEFAULT_OP_2(OpType::Ldexp, (A * static_cast(std::pow(2.0f, B)))); + +// +// Bitwise +// + +template T Saturate(T A) { + if (A < static_cast(0.0f)) + return static_cast(0.0f); + if (A > static_cast(1.0f)) + return static_cast(1.0f); + return A; +} + +template T ReverseBits(T A) { + T Result = 0; + const size_t NumBits = sizeof(T) * 8; + for (size_t I = 0; I < NumBits; I++) { + Result <<= 1; + Result |= (A & 1); + A >>= 1; + } + return Result; +} + +template uint32_t CountBits(T A) { + return static_cast(std::bitset(A).count()); +} + +// General purpose bit scan from the MSB. Based on the value of LookingForZero +// returns the index of the first high/low bit found. +template uint32_t ScanFromMSB(T A, bool LookingForZero) { + if (A == 0) + return std::numeric_limits::max(); + + constexpr uint32_t NumBits = sizeof(T) * 8; + for (int32_t I = NumBits - 1; I >= 0; --I) { + bool BitSet = (A & (static_cast(1) << I)) != 0; + if (BitSet != LookingForZero) + return static_cast(I); + } + return std::numeric_limits::max(); +} + +template +typename std::enable_if::value, uint32_t>::type +FirstBitHigh(T A) { + const bool IsNegative = A < 0; + return ScanFromMSB(A, IsNegative); +} + +template +typename std::enable_if::value, uint32_t>::type +FirstBitHigh(T A) { + return ScanFromMSB(A, false); +} + +template uint32_t FirstBitLow(T A) { + const uint32_t NumBits = sizeof(T) * 8; + + if (A == 0) + return std::numeric_limits::max(); + + for (uint32_t I = 0; I < NumBits; ++I) { + if (A & (static_cast(1) << I)) + return static_cast(I); + } + + return std::numeric_limits::max(); +} + +DEFAULT_OP_2(OpType::And, (A & B)); +DEFAULT_OP_2(OpType::Or, (A | B)); +DEFAULT_OP_2(OpType::Xor, (A ^ B)); +DEFAULT_OP_2(OpType::LeftShift, (A << B)); +DEFAULT_OP_2(OpType::RightShift, (A >> B)); +DEFAULT_OP_1(OpType::Saturate, (Saturate(A))); +DEFAULT_OP_1(OpType::ReverseBits, (ReverseBits(A))); + +#define BITWISE_OP(OP, IMPL) \ + template struct Op : StrictValidation { \ + uint32_t operator()(T A) { return IMPL; } \ + } + +BITWISE_OP(OpType::CountBits, (CountBits(A))); +BITWISE_OP(OpType::FirstBitHigh, (FirstBitHigh(A))); +BITWISE_OP(OpType::FirstBitLow, (FirstBitLow(A))); + +#undef BITWISE_OP + +// +// Unary +// + +DEFAULT_OP_1(OpType::Initialize, (A)); + +template +struct Op : DefaultValidation {}; + +template +static std::vector buildExpectedArrayAccess(const InputSets &Inputs) { + const size_t VectorSize = Inputs[0].size(); + std::vector Expected; + const size_t IndexCount = 6; + Expected.resize(VectorSize); + + size_t IndexList[IndexCount] = { + 0, VectorSize - 1, 1, VectorSize - 2, VectorSize / 2, VectorSize / 2 + 1}; + size_t End = std::min(VectorSize, IndexCount); + for (size_t I = 0; I < End; ++I) + Expected[IndexList[I]] = Inputs[0][IndexList[I]]; + + return Expected; +} + +template +struct ExpectedBuilder { + static std::vector + buildExpected(Op, + const InputSets &Inputs) { + DXASSERT_NOMSG(Inputs.size() == 1); + return buildExpectedArrayAccess(Inputs); + } +}; + +template +struct Op : DefaultValidation {}; + +template +struct ExpectedBuilder { + static std::vector + buildExpected(Op, + const InputSets &Inputs) { + DXASSERT_NOMSG(Inputs.size() == 2); + return buildExpectedArrayAccess(Inputs); + } +}; + +// +// Cast +// + +#define CAST_OP(OP, TYPE, IMPL) \ + template struct Op : StrictValidation { \ + TYPE operator()(T A) { return IMPL; } \ + }; + +template HLSLBool_t CastToBool(T A) { return (bool)A; } +template <> HLSLBool_t CastToBool(HLSLHalf_t A) { return (bool)((float)A); } + +template HLSLHalf_t CastToFloat16(T A) { + return HLSLHalf_t(float(A)); +} + +template float CastToFloat32(T A) { return (float)A; } + +template double CastToFloat64(T A) { return (double)A; } +template <> double CastToFloat64(HLSLHalf_t A) { return (double)((float)A); } + +template int16_t CastToInt16(T A) { return (int16_t)A; } +template <> int16_t CastToInt16(HLSLHalf_t A) { return (int16_t)((float)A); } + +template int32_t CastToInt32(T A) { return (int32_t)A; } +template <> int32_t CastToInt32(HLSLHalf_t A) { return (int32_t)((float)A); } + +template int64_t CastToInt64(T A) { return (int64_t)A; } +template <> int64_t CastToInt64(HLSLHalf_t A) { return (int64_t)((float)A); } + +template uint16_t CastToUint16(T A) { return (uint16_t)A; } +template <> uint16_t CastToUint16(HLSLHalf_t A) { return (uint16_t)((float)A); } + +template uint32_t CastToUint32(T A) { return (uint32_t)A; } +template <> uint32_t CastToUint32(HLSLHalf_t A) { return (uint32_t)((float)A); } + +template uint64_t CastToUint64(T A) { return (uint64_t)A; } +template <> uint64_t CastToUint64(HLSLHalf_t A) { return (uint64_t)((float)A); } + +CAST_OP(OpType::CastToBool, HLSLBool_t, (CastToBool(A))); +CAST_OP(OpType::CastToInt16, int16_t, (CastToInt16(A))); +CAST_OP(OpType::CastToInt32, int32_t, (CastToInt32(A))); +CAST_OP(OpType::CastToInt64, int64_t, (CastToInt64(A))); +CAST_OP(OpType::CastToUint16, uint16_t, (CastToUint16(A))); +CAST_OP(OpType::CastToUint32, uint32_t, (CastToUint32(A))); +CAST_OP(OpType::CastToUint64, uint64_t, (CastToUint64(A))); +CAST_OP(OpType::CastToUint16_FromFP, uint16_t, (CastToUint16(A))); +CAST_OP(OpType::CastToUint32_FromFP, uint32_t, (CastToUint32(A))); +CAST_OP(OpType::CastToUint64_FromFP, uint64_t, (CastToUint64(A))); +CAST_OP(OpType::CastToFloat16, HLSLHalf_t, (CastToFloat16(A))); +CAST_OP(OpType::CastToFloat32, float, (CastToFloat32(A))); +CAST_OP(OpType::CastToFloat64, double, (CastToFloat64(A))); + +#undef CAST_OP + +// +// Trigonometric +// + +// All trigonometric ops are floating point types. These trig functions are +// defined to have a max absolute error of 0.0008 as per the D3D functional +// specs. An example with this spec for sin and cos is available here: +// https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#22.10.20 + +template struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Epsilon(0.0008f); +}; + +// Half precision trig functions have a larger tolerance due to their lower +// precision. Note that the D3D spec +// does not mention half precision trig functions. +template struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Epsilon(0.003f); +}; + +// For the half precision trig functions with an infinite range in either +// direction we use 2 ULPs of tolerance instead. +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + +#define TRIG_OP(OP, IMPL) \ + template struct Op : TrigonometricValidation { \ + T operator()(T A) { return IMPL; } \ + } + +TRIG_OP(OpType::Acos, (std::acos(A))); +TRIG_OP(OpType::Asin, (std::asin(A))); +TRIG_OP(OpType::Atan, (std::atan(A))); +TRIG_OP(OpType::Cos, (std::cos(A))); +TRIG_OP(OpType::Cosh, (std::cosh(A))); +TRIG_OP(OpType::Sin, (std::sin(A))); +TRIG_OP(OpType::Sinh, (std::sinh(A))); +TRIG_OP(OpType::Tan, (std::tan(A))); +TRIG_OP(OpType::Tanh, (std::tanh(A))); + +#undef TRIG_OP + +// +// AsType +// + +// We don't have std::bit_cast in C++17, so we define our own version. +template +typename std::enable_if::value && + std::is_trivially_copyable::value, + ToT>::type +bit_cast(const FromT &Src) { + ToT Dst; + std::memcpy(&Dst, &Src, sizeof(ToT)); + return Dst; +} + +#define AS_TYPE_OP(OP, TYPE, IMPL) \ + template struct Op : StrictValidation { \ + TYPE operator()(T A) { return IMPL; } \ + }; + +// asFloat16 + +template HLSLHalf_t asFloat16(T); +template <> HLSLHalf_t asFloat16(HLSLHalf_t A) { return A; } +template <> HLSLHalf_t asFloat16(int16_t A) { + return HLSLHalf_t::FromHALF(bit_cast(A)); +} +template <> HLSLHalf_t asFloat16(uint16_t A) { + return HLSLHalf_t::FromHALF(bit_cast(A)); +} + +AS_TYPE_OP(OpType::AsFloat16, HLSLHalf_t, (asFloat16(A))); + +// asInt16 + +template int16_t asInt16(T); +template <> int16_t asInt16(HLSLHalf_t A) { return bit_cast(A.Val); } +template <> int16_t asInt16(int16_t A) { return A; } +template <> int16_t asInt16(uint16_t A) { return bit_cast(A); } + +AS_TYPE_OP(OpType::AsInt16, int16_t, (asInt16(A))); + +// asUint16 + +template uint16_t asUint16(T); +template <> uint16_t asUint16(HLSLHalf_t A) { + return bit_cast(A.Val); +} +template <> uint16_t asUint16(uint16_t A) { return A; } +template <> uint16_t asUint16(int16_t A) { return bit_cast(A); } + +AS_TYPE_OP(OpType::AsUint16, uint16_t, (asUint16(A))); + +// asFloat + +template float asFloat(T); +template <> float asFloat(float A) { return float(A); } +template <> float asFloat(int32_t A) { return bit_cast(A); } +template <> float asFloat(uint32_t A) { return bit_cast(A); } + +AS_TYPE_OP(OpType::AsFloat, float, (asFloat(A))); + +// asInt + +template int32_t asInt(T); +template <> int32_t asInt(float A) { return bit_cast(A); } +template <> int32_t asInt(int32_t A) { return A; } +template <> int32_t asInt(uint32_t A) { return bit_cast(A); } + +AS_TYPE_OP(OpType::AsInt, int32_t, (asInt(A))); + +// asUint + +template unsigned int asUint(T); +template <> unsigned int asUint(unsigned int A) { return A; } +template <> unsigned int asUint(float A) { return bit_cast(A); } +template <> unsigned int asUint(int A) { return bit_cast(A); } + +AS_TYPE_OP(OpType::AsUint, uint32_t, (asUint(A))); + +// asDouble + +template <> struct Op : StrictValidation { + double operator()(uint32_t LowBits, uint32_t HighBits) { + uint64_t Bits = (static_cast(HighBits) << 32) | LowBits; + double Result; + std::memcpy(&Result, &Bits, sizeof(Result)); + return Result; + } +}; + +// splitDouble +// +// splitdouble is special because it's a function that takes a double and +// outputs two values. To handle this special case we override various bits of +// the testing machinary. + +template <> +struct Op : StrictValidation {}; + +// Specialized version of ExpectedBuilder for the splitdouble case. The +// expected output for this has all the Low values followed by all the High +// values. +template <> struct ExpectedBuilder { + static std::vector + buildExpected(Op &, + const InputSets &Inputs) { + DXASSERT_NOMSG(Inputs.size() == 1); + + size_t VectorSize = Inputs[0].size(); + + std::vector Expected; + Expected.resize(VectorSize * 2); + + for (size_t I = 0; I < VectorSize; ++I) { + uint32_t Low, High; + splitDouble(Inputs[0][I], Low, High); + Expected[I] = Low; + Expected[I + VectorSize] = High; + } + + return Expected; + } + + static void splitDouble(const double A, uint32_t &LowBits, + uint32_t &HighBits) { + uint64_t Bits = 0; + std::memcpy(&Bits, &A, sizeof(Bits)); + LowBits = static_cast(Bits & 0xFFFFFFFF); + HighBits = static_cast(Bits >> 32); + } +}; + +// +// Unary Math +// + +template T UnaryMathAbs(T A) { + if constexpr (std::is_unsigned_v) + return A; + else + return static_cast(std::abs(A)); +} + +DEFAULT_OP_1(OpType::Abs, (UnaryMathAbs(A))); + +// Sign is special because the return type doesn't match the input type. +template struct Op : DefaultValidation { + int32_t operator()(T A) { + const T Zero = T(); + + if (A > Zero) + return 1; + if (A < Zero) + return -1; + return 0; + } +}; + +DEFAULT_OP_1(OpType::Ceil, (std::ceil(A))); +DEFAULT_OP_1(OpType::Exp, (std::exp(A))); +DEFAULT_OP_1(OpType::Floor, (std::floor(A))); +DEFAULT_OP_1(OpType::Frac, (A - static_cast(std::floor(A)))); +DEFAULT_OP_1(OpType::Log, (std::log(A))); +DEFAULT_OP_1(OpType::Rcp, (static_cast(1.0) / A)); +DEFAULT_OP_1(OpType::Round, (std::round(A))); +DEFAULT_OP_1(OpType::Rsqrt, + (static_cast(1.0) / static_cast(std::sqrt(A)))); +DEFAULT_OP_1(OpType::Sqrt, (std::sqrt(A))); +DEFAULT_OP_1(OpType::Trunc, (std::trunc(A))); +DEFAULT_OP_1(OpType::Exp2, (std::exp2(A))); +DEFAULT_OP_1(OpType::Log10, (std::log10(A))); +DEFAULT_OP_1(OpType::Log2, (std::log2(A))); + +// Frexp has a return value as well as an output paramater. So we handle it +// with special logic. Frexp is only supported for fp32 values. +template <> struct Op : DefaultValidation {}; + +template <> struct ExpectedBuilder { + static std::vector buildExpected(Op &, + const InputSets &Inputs) { + DXASSERT_NOMSG(Inputs.size() == 1); + + // Expected values size is doubled. In the first half we store the + // Mantissas and in the second half we store the Exponents. This way we + // can leverage the existing logic which verify expected values in a + // single vector. We just need to make sure that we organize the output in + // the same way in the shader and when we read it back. + + size_t VectorSize = Inputs[0].size(); + + std::vector Expected; + Expected.resize(VectorSize * 2); + + for (size_t I = 0; I < VectorSize; ++I) { + int Exp = 0; + float Man = std::frexp(Inputs[0][I], &Exp); + + // std::frexp returns a signed mantissa. But the HLSL implmentation + // returns an unsigned mantissa. + Man = std::abs(Man); + + Expected[I] = Man; + + // std::frexp returns the exponent as an int, but HLSL stores it as a + // float. However, the HLSL exponents fractional component is always 0. + // So it can conversion between float and int is safe. + Expected[I + VectorSize] = static_cast(Exp); + } + + return Expected; + } +}; + +// +// Binary Comparison +// + +#define BINARY_COMPARISON_OP(OP, IMPL) \ + template struct Op : StrictValidation { \ + HLSLBool_t operator()(T A, T B) { return IMPL; } \ + }; + +BINARY_COMPARISON_OP(OpType::LessThan, (A < B)); +BINARY_COMPARISON_OP(OpType::LessEqual, (A <= B)); +BINARY_COMPARISON_OP(OpType::GreaterThan, (A > B)); +BINARY_COMPARISON_OP(OpType::GreaterEqual, (A >= B)); +BINARY_COMPARISON_OP(OpType::Equal, (A == B)); +BINARY_COMPARISON_OP(OpType::NotEqual, (A != B)); + +// +// Binary Logical +// + +DEFAULT_OP_2(OpType::Logical_And, (A && B)); +DEFAULT_OP_2(OpType::Logical_Or, (A || B)); + +// Ternary Logical +// + +OP_3(OpType::Select, StrictValidation, (static_cast(A) ? B : C)); + +// +// Reduction +// + +#define REDUCTION_OP(OP, STDFUNC) \ + template struct Op : StrictValidation {}; \ + template struct ExpectedBuilder { \ + static std::vector buildExpected(Op &, \ + const InputSets &Inputs) { \ + const bool Res = STDFUNC(Inputs[0].begin(), Inputs[0].end(), \ + [](T A) { return A != static_cast(0); }); \ + return std::vector{Res}; \ + } \ + }; + +REDUCTION_OP(OpType::Any_Mixed, (std::any_of)); +REDUCTION_OP(OpType::Any_NoZero, (std::any_of)); +REDUCTION_OP(OpType::Any_Zero, (std::any_of)); + +REDUCTION_OP(OpType::All_Mixed, (std::all_of)); +REDUCTION_OP(OpType::All_NoZero, (std::all_of)); +REDUCTION_OP(OpType::All_Zero, (std::all_of)); + +#undef REDUCTION_OP + +template struct Op : StrictValidation {}; +template struct ExpectedBuilder { + // For Dot, buildExpected is a special case: it also computes an absolute + // epsilon for validation because Dot is a compound operation. Expected value + // is computed by multiplying and accumulating in fp64 for higher precision. + // Absolute epsilon is computed by reordering the accumulation into a + // worst-case sequence, then summing the per-step epsilons to produce a + // conservative error tolerance for the entire Dot operation. + static std::vector buildExpected(Op &Op, + const InputSets &Inputs) { + + std::vector PositiveProducts; + std::vector NegativeProducts; + + const size_t VectorSize = Inputs[0].size(); + + // Floating point ops have a tolerance of 0.5 ULPs per operation as per the + // DX spec. + const double ULPTolerance = 0.5; + + // Accumulate in fp64 to improve precision. + double DotProduct = 0.0; // computed reference result + double AbsoluteEpsilon = 0.0; // computed tolerance + for (size_t I = 0; I < VectorSize; ++I) { + double Product = Inputs[0][I] * Inputs[1][I]; + AbsoluteEpsilon += computeAbsoluteEpsilon(Product, ULPTolerance); + + DotProduct += Product; + + if (Product >= 0.0) + PositiveProducts.push_back(Product); + else + NegativeProducts.push_back(Product); + } + + // Sort each by magnitude so that we can accumulate them in worst case + // order. + std::sort(PositiveProducts.begin(), PositiveProducts.end(), + std::greater()); + std::sort(NegativeProducts.begin(), NegativeProducts.end()); + + // Helper to sum the products and compute/add to the running absolute + // epsilon total. + auto SumProducts = [&AbsoluteEpsilon, + ULPTolerance](const std::vector &Values) { + double Sum = Values.empty() ? 0.0 : Values[0]; + for (size_t I = 1; I < Values.size(); ++I) { + Sum += Values[I]; + AbsoluteEpsilon += computeAbsoluteEpsilon(Sum, ULPTolerance); + } + return Sum; + }; + + // Accumulate products in the worst case order while computing the absolute + // epsilon error for each intermediate step. And accumulate that error. + const double SumPos = SumProducts(PositiveProducts); + const double SumNeg = SumProducts(NegativeProducts); + + if (!PositiveProducts.empty() && !NegativeProducts.empty()) + AbsoluteEpsilon += + computeAbsoluteEpsilon((SumPos + SumNeg), ULPTolerance); + + Op.ValidationConfig = ValidationConfig::Epsilon(AbsoluteEpsilon); + + std::vector Expected; + Expected.push_back(static_cast(DotProduct)); + return Expected; + } +}; + +template +static double computeAbsoluteEpsilon(double A, double ULPTolerance) { + DXASSERT((!isinf(A) && !isnan(A)), + "Input values should not produce inf or nan results"); + + // ULP is a positive value by definition. So, working with abs(A) simplifies + // our logic for computing ULP in the first place. + A = std::abs(A); + + double ULP = 0.0; + + if constexpr (std::is_same_v) + ULP = HLSLHalf_t::GetULP(A); + else + ULP = + std::nextafter(static_cast(A), std::numeric_limits::infinity()) - + static_cast(A); + + return ULP * ULPTolerance; +} + +template +struct Op : DefaultValidation {}; +template struct ExpectedBuilder { + static std::vector buildExpected(Op, + const InputSets &Inputs) { + std::vector Expected(Inputs[0].size(), Inputs[0][0]); + return Expected; + } +}; + +// +// Loading and Storing of Buffers +// + +STRICT_OP_1(OpType::LoadAndStore_RDH_BAB_UAV, (A)); +STRICT_OP_1(OpType::LoadAndStore_RDH_BAB_SRV, (A)); +STRICT_OP_1(OpType::LoadAndStore_DT_BAB_UAV, (A)); +STRICT_OP_1(OpType::LoadAndStore_DT_BAB_SRV, (A)); +STRICT_OP_1(OpType::LoadAndStore_RD_BAB_UAV, (A)); +STRICT_OP_1(OpType::LoadAndStore_RD_BAB_SRV, (A)); +STRICT_OP_1(OpType::LoadAndStore_RDH_SB_UAV, (A)); +STRICT_OP_1(OpType::LoadAndStore_RDH_SB_SRV, (A)); +STRICT_OP_1(OpType::LoadAndStore_DT_SB_UAV, (A)); +STRICT_OP_1(OpType::LoadAndStore_DT_SB_SRV, (A)); +STRICT_OP_1(OpType::LoadAndStore_RD_SB_UAV, (A)); +STRICT_OP_1(OpType::LoadAndStore_RD_SB_SRV, (A)); + +// +// Float Ops +// + +#define FLOAT_SPECIAL_OP(OP, IMPL) \ + template struct Op : StrictValidation { \ + HLSLBool_t operator()(T A) { return IMPL; } \ + }; + +FLOAT_SPECIAL_OP(OpType::IsFinite, (std::isfinite(A))); +FLOAT_SPECIAL_OP(OpType::IsInf, (std::isinf(A))); +FLOAT_SPECIAL_OP(OpType::IsNan, (std::isnan(A))); +#undef FLOAT_SPECIAL_OP + +template struct Op : DefaultValidation {}; + +template static T modF(T Input, T &OutParam); + +template <> float modF(float Input, float &OutParam) { + return std::modf(Input, &OutParam); +} + +template <> HLSLHalf_t modF(HLSLHalf_t Input, HLSLHalf_t &OutParam) { + float Exp = 0.0f; + float Man = std::modf(float(Input), &Exp); + OutParam = HLSLHalf_t(Exp); + return Man; +} + +template struct ExpectedBuilder { + static std::vector buildExpected(Op &, + const InputSets &Inputs) { + DXASSERT_NOMSG(Inputs.size() == 1); + size_t VectorSize = Inputs[0].size(); + + std::vector Expected; + Expected.resize(VectorSize * 2); + + for (size_t I = 0; I < VectorSize; ++I) { + T Exp; + T Man = modF(Inputs[0][I], Exp); + Expected[I] = Man; + Expected[I + VectorSize] = Exp; + } + + return Expected; + } +}; + +// +// Derivative Ops +// + +// Coarse derivatives (ddx/ddy): All lanes in quad get same result +// Fine derivatives (ddx_fine/ddy_fine): Each lane gets unique result +// For testing, we validate results on lane 3 to keep validation generic +// +// The value of A in each lane is computed by : A = A + LaneID*2 +// +// Top right (lane 1) - Top Left (lane 0) +DEFAULT_OP_1(OpType::DerivativeDdx, ((A + 2) - (A + 0))); +// Lower left (lane 2) - Top Left (lane 0) +DEFAULT_OP_1(OpType::DerivativeDdy, ((A + 4) - (A + 0))); + +// Bottom right (lane 3) - Bottom left (lane 2) +DEFAULT_OP_1(OpType::DerivativeDdxFine, ((A + 6) - (A + 4))); +// Bottom right (lane 3) - Top right (lane 1) +DEFAULT_OP_1(OpType::DerivativeDdyFine, ((A + 6) - (A + 2))); + +// +// Quad Read Ops +// + +// We keep things generic so we can re-use this macro for all quad ops. +// The lane we write to is determined via a defines in the shader code. +// See TestQuadRead in ShaderOpArith.xml. +// For all cases we simply fill the vector on that lane with the value of the +// third element. +#define QUAD_READ_OP(OP, ARITY) \ + template struct Op : DefaultValidation {}; \ + template struct ExpectedBuilder { \ + static std::vector buildExpected(Op &, \ + const InputSets &Inputs) { \ + DXASSERT_NOMSG(Inputs.size() == ARITY); \ + std::vector Expected; \ + const size_t VectorSize = Inputs[0].size(); \ + Expected.assign(VectorSize, Inputs[0][2]); \ + return Expected; \ + } \ + }; + +QUAD_READ_OP(OpType::QuadReadLaneAt, 2); +QUAD_READ_OP(OpType::QuadReadAcrossX, 1); +QUAD_READ_OP(OpType::QuadReadAcrossY, 1); +QUAD_READ_OP(OpType::QuadReadAcrossDiagonal, 1); + +#undef QUAD_READ_OP + +// +// Wave Ops +// + +#define WAVE_OP(OP, IMPL) \ + template struct Op : DefaultValidation { \ + T operator()(T A, UINT WaveSize) { return IMPL; } \ + }; + +template T waveActiveSum(T A, UINT WaveSize) { + T WaveSizeT = static_cast(WaveSize); + return A * WaveSizeT; +} + +WAVE_OP(OpType::WaveActiveSum, (waveActiveSum(A, WaveSize))); + +template T waveActiveMin(T A, UINT WaveSize) { + std::vector Values; + // Add the 'WaveLaneID' to A. + for (UINT I = 0; I < WaveSize; ++I) + Values.push_back(A + static_cast(I)); + return *std::min_element(Values.begin(), Values.end()); +} + +WAVE_OP(OpType::WaveActiveMin, (waveActiveMin(A, WaveSize))); + +template T waveActiveMax(T A, UINT WaveSize) { + std::vector Values; + // Add the 'WaveLaneID' to A. + for (UINT I = 0; I < WaveSize; ++I) + Values.push_back(A + static_cast(I)); + return *std::max_element(Values.begin(), Values.end()); +} + +WAVE_OP(OpType::WaveActiveMax, (waveActiveMax(A, WaveSize))); + +template T waveActiveProduct(T A, UINT WaveSize) { + // We want to avoid overflow of a large product. So, the WaveActiveProdFn has + // an input set of all 1's and we modify the value of the largest lane to be + // equal to the lane index in the shader. + return A * static_cast(WaveSize - 1); +} + +WAVE_OP(OpType::WaveActiveProduct, (waveActiveProduct(A, WaveSize))); + +template T waveActiveBitAnd(T A, UINT) { + // We set the LSB to 0 in one of the lanes. + return static_cast(A & ~static_cast(1)); +} + +WAVE_OP(OpType::WaveActiveBitAnd, (waveActiveBitAnd(A, WaveSize))); + +template T waveActiveBitOr(T A, UINT) { + // We set the LSB to 1 in one of the lanes. + return static_cast(A | static_cast(1)); +} + +WAVE_OP(OpType::WaveActiveBitOr, (waveActiveBitOr(A, WaveSize))); + +template T waveActiveBitXor(T A, UINT) { + // We clear the LSB in every lane except the last lane which sets it to 1. + return static_cast(A | static_cast(1)); +} + +WAVE_OP(OpType::WaveActiveBitXor, (waveActiveBitXor(A, WaveSize))); + +WAVE_OP(OpType::WaveMultiPrefixBitAnd, waveMultiPrefixBitAnd(A, WaveSize)); + +template T waveMultiPrefixBitAnd(T A, UINT) { + // All lanes in the group mask use a mask to filter for only the second and + // third LSBs. + return static_cast(A & static_cast(0x6)); +} + +WAVE_OP(OpType::WaveMultiPrefixBitOr, waveMultiPrefixBitOr(A, WaveSize)); + +template T waveMultiPrefixBitOr(T A, UINT) { + // All lanes in the group mask clear the second LSB. + return static_cast(A & ~static_cast(0x2)); +} + +template +struct Op : StrictValidation {}; + +template struct ExpectedBuilder { + static std::vector buildExpected(Op &, + const InputSets &Inputs, UINT) { + DXASSERT_NOMSG(Inputs.size() == 1); + + std::vector Expected; + const size_t VectorSize = Inputs[0].size(); + + // We get a little creative for MultiPrefixBitXor. The mask we use for the + // group in the shader is 0xE (0b1110), which includes lanes 1, 2, and 3. + // Prefix ops don't include the value of the current lane in their result. + // So, for this test we store the result of WaveMultiPrefixBitXor from lane + // 3. This means only the values from lanes 1 and 2 contribute to the result + // at lane 3. + // + // In the shader: + // - Lane 0: Set to 0 (not in mask, shouldn't affect result) + // - Lane 1: Keeps original input values + // - Lane 2: Lower half + last element set to 0, upper half keeps input + // - Lane 3: Stores the prefix XOR result (lanes 1 XOR lanes 2) + // + // Expected result: Lower half matches input (lane 1 XOR 0), upper half is + // 0s, except last element matches input. + for (size_t I = 0; I < VectorSize / 2; ++I) + Expected.push_back(Inputs[0][I]); + for (size_t I = VectorSize / 2; I < VectorSize - 1; ++I) + Expected.push_back(0); + + // We also set the last element to 0 on lane 2 so the last element in the + // output vector matches the last element in the input vector. + Expected.push_back(Inputs[0][VectorSize - 1]); + + return Expected; + } +}; + +template +struct Op : StrictValidation {}; + +template struct ExpectedBuilder { + static std::vector + buildExpected(Op &, + const InputSets &Inputs, UINT) { + DXASSERT_NOMSG(Inputs.size() == 1); + + std::vector Expected; + const size_t VectorSize = Inputs[0].size(); + Expected.assign(VectorSize, static_cast(true)); + // We set the last element to a different value on a single lane. + Expected[VectorSize - 1] = static_cast(false); + + return Expected; + } +}; + +template +struct Op : StrictValidation {}; + +template struct ExpectedBuilder { + static std::vector buildExpected(Op &, + const InputSets &Inputs, UINT) { + DXASSERT_NOMSG(Inputs.size() == 1); + + std::vector Expected; + const size_t VectorSize = Inputs[0].size(); + // Simple test, on the lane that we read we also fill the vector with the + // value of the first element. + Expected.assign(VectorSize, Inputs[0][0]); + + return Expected; + } +}; + +template +struct Op : StrictValidation {}; + +template struct ExpectedBuilder { + static std::vector buildExpected(Op &, + const InputSets &Inputs, UINT) { + DXASSERT_NOMSG(Inputs.size() == 1); + + std::vector Expected; + const size_t VectorSize = Inputs[0].size(); + // Simple test, on the lane that we read we also fill the vector with the + // value of the first element. + Expected.assign(VectorSize, Inputs[0][0]); + + return Expected; + } +}; + +WAVE_OP(OpType::WavePrefixSum, (wavePrefixSum(A, WaveSize))); + +template T wavePrefixSum(T A, UINT WaveSize) { + // We test the prefix sum in the 'middle' lane. This choice is arbitrary. + return A * static_cast(WaveSize / 2); +} + +WAVE_OP(OpType::WaveMultiPrefixSum, (waveMultiPrefixSum(A, WaveSize))); + +template T waveMultiPrefixSum(T A, UINT) { + return A * static_cast(2u); +} + +WAVE_OP(OpType::WavePrefixProduct, (wavePrefixProduct(A, WaveSize))); + +template T wavePrefixProduct(T A, UINT) { + // We test the the prefix product in the 3rd lane to avoid overflow issues. + // So the result is A * A. + return A * A; +} + +WAVE_OP(OpType::WaveMultiPrefixProduct, (waveMultiPrefixProduct(A, WaveSize))); + +template T waveMultiPrefixProduct(T A, UINT) { + // The group mask has 3 lanes. + return A * A; +} + +template struct Op : StrictValidation {}; + +static void WriteExpectedValueForLane(UINT *Dest, const UINT LaneID, + const std::bitset<128> &ExpectedValue) { + std::bitset<128> Lo32Mask; + Lo32Mask.set(); + Lo32Mask >>= 128 - 32; + + UINT Offset = 4 * LaneID; + for (uint32_t I = 0; I < 4; I++) { + uint32_t V = ((ExpectedValue >> (I * 32)) & Lo32Mask).to_ulong(); + Dest[Offset++] = V; + } +} + +template struct ExpectedBuilder { + static std::vector buildExpected(Op &, + const InputSets &Inputs, + const UINT WaveSize) { + // This test sets lanes (0, min(VectorSize/2, WaveSize/2), and + // min(VectorSize-1, WaveSize-1)) to unique values and has them modify the + // vector at their respective indices. Remaining lanes remain unchanged. + DXASSERT_NOMSG(Inputs.size() == 1); + + const UINT VectorSize = static_cast(Inputs[0].size()); + std::vector Expected; + Expected.assign(WaveSize * 4, 0); + + const UINT MidLaneID = std::min(VectorSize / 2, WaveSize / 2); + const UINT LastLaneID = std::min(VectorSize - 1, WaveSize - 1); + + // Use a std::bitset<128> to represent the uint4 returned by WaveMatch as + // its convenient this way in c++ + std::bitset<128> DefaultExpectedValue; + + for (UINT I = 0; I < WaveSize; ++I) + DefaultExpectedValue.set(I); + + DefaultExpectedValue.reset(0); + DefaultExpectedValue.reset(MidLaneID); + DefaultExpectedValue.reset(LastLaneID); + + for (UINT LaneID = 0; LaneID < WaveSize; ++LaneID) { + if (LaneID == 0 || LaneID == MidLaneID || LaneID == LastLaneID) { + std::bitset<128> ExpectedValue(0); + ExpectedValue.set(LaneID); + WriteExpectedValueForLane(Expected.data(), LaneID, ExpectedValue); + continue; + } + WriteExpectedValueForLane(Expected.data(), LaneID, DefaultExpectedValue); + } + + return Expected; + } +}; + +#undef WAVE_OP + +// +// dispatchTest +// + +template struct ExpectedBuilder { + + static auto buildExpected(Op Op, const InputSets &Inputs) { + DXASSERT_NOMSG(Inputs.size() == 1); + + std::vector Expected; + Expected.reserve(Inputs[0].size()); + + for (size_t I = 0; I < Inputs[0].size(); ++I) + Expected.push_back(Op(Inputs[0][I])); + + return Expected; + } + + static auto buildExpected(Op Op, const InputSets &Inputs) { + DXASSERT_NOMSG(Inputs.size() == 2); + + std::vector Expected; + Expected.reserve(Inputs[0].size()); + + for (size_t I = 0; I < Inputs[0].size(); ++I) + Expected.push_back(Op(Inputs[0][I], Inputs[1][I])); + + return Expected; + } + + static auto buildExpected(Op Op, const InputSets &Inputs) { + DXASSERT_NOMSG(Inputs.size() == 3); + + std::vector Expected; + Expected.reserve(Inputs[0].size()); + + for (size_t I = 0; I < Inputs[0].size(); ++I) + Expected.push_back(Op(Inputs[0][I], Inputs[1][I], Inputs[2][I])); + + return Expected; + } + + static auto buildExpected(Op Op, const InputSets &Inputs, + UINT WaveSize) { + DXASSERT_NOMSG(Inputs.size() == 1); + + std::vector Expected; + Expected.reserve(Inputs[0].size()); + + for (size_t I = 0; I < Inputs[0].size(); ++I) + Expected.push_back(Op(Inputs[0][I], WaveSize)); + + return Expected; + } +}; + +template +std::vector getInputSizesToTest(size_t OverrideInputSize) { + std::vector InputVectorSizes; + const std::array DefaultInputSizes = {3, 5, 16, 17, + 35, 100, 256, 1024}; + + if (OverrideInputSize) + InputVectorSizes.push_back(OverrideInputSize); + else { + // StructuredBuffers have a max size of 2048 bytes. + const size_t MaxInputSize = + IsStructuredBufferLoadAndStoreOp(OP) ? 2048 / sizeof(T) : 1024; + + for (size_t Size : DefaultInputSizes) { + if (Size <= MaxInputSize) + InputVectorSizes.push_back(Size); + } + + if (InputVectorSizes.empty() || MaxInputSize != InputVectorSizes.back()) + InputVectorSizes.push_back(MaxInputSize); + } + + return InputVectorSizes; +} + +template +void dispatchTest(ID3D12Device *D3DDevice, bool VerboseLogging, + size_t OverrideInputSize) { + + const std::vector InputVectorSizes = + getInputSizesToTest(OverrideInputSize); + + constexpr const Operation &Operation = getOperation(OP); + Op Op; + + for (size_t VectorSize : InputVectorSizes) { + std::vector> Inputs = + buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); + + auto Expected = ExpectedBuilder::buildExpected(Op, Inputs); + + runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, + Op.ValidationConfig); + } +} + +template +void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging, + size_t OverrideInputSize, UINT WaveSize) { + + const std::vector InputVectorSizes = + getInputSizesToTest(OverrideInputSize); + + constexpr const Operation &Operation = getOperation(OP); + Op Op; + + const std::string AdditionalCompilerOptions = + "-DWAVE_SIZE=" + std::to_string(WaveSize) + + " -DNUMTHREADS_XYZ=" + std::to_string(WaveSize) + ",1,1 "; + + for (size_t VectorSize : InputVectorSizes) { + std::vector> Inputs = + buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); + + auto Expected = ExpectedBuilder::buildExpected(Op, Inputs, WaveSize); + + runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, + Op.ValidationConfig, AdditionalCompilerOptions); + } +} + +} // namespace LongVector + +using namespace LongVector; + +// TAEF test entry points +#define HLK_TEST(Op, DataType) \ + TEST_METHOD(Op##_##DataType) { runTest(); } + +#define HLK_TEST_DOUBLE(Op, DataType) \ + TEST_METHOD(Op##_##DataType) { \ + BEGIN_TEST_METHOD_PROPERTIES() \ + TEST_METHOD_PROPERTY( \ + "Kits.Specification", \ + "Device.Graphics.D3D12.DXILCore.ShaderModel69.DoublePrecision") \ + END_TEST_METHOD_PROPERTIES() \ + runTest(); \ + } + +#define HLK_WAVEOP_TEST(Op, DataType) \ + TEST_METHOD(Op##_##DataType) { \ + BEGIN_TEST_METHOD_PROPERTIES() \ + TEST_METHOD_PROPERTY( \ + "Kits.Specification", \ + "Device.Graphics.D3D12.DXILCore.ShaderModel69.CoreRequirement") \ + END_TEST_METHOD_PROPERTIES() \ + runWaveOpTest(); \ + } + +#define HLK_WAVEOP_TEST_DOUBLE(Op, DataType) \ + TEST_METHOD(Op##_##DataType) { \ + BEGIN_TEST_METHOD_PROPERTIES() \ + TEST_METHOD_PROPERTY( \ + "Kits.Specification", \ + "Device.Graphics.D3D12.DXILCore.ShaderModel69.DoublePrecision") \ + END_TEST_METHOD_PROPERTIES() \ + runWaveOpTest(); \ + } + +class DxilConf_SM69_Vectorized { +public: + BEGIN_TEST_CLASS(DxilConf_SM69_Vectorized) + TEST_CLASS_PROPERTY("Kits.TestName", + "D3D12 - Shader Model 6.9 - Vectorized DXIL - Core Tests") + TEST_CLASS_PROPERTY("Kits.TestId", "81db1ff8-5bc5-48a1-8d7b-600fc600a677") + TEST_CLASS_PROPERTY("Kits.Description", + "Validates required SM 6.9 vectorized DXIL operations") + TEST_CLASS_PROPERTY( + "Kits.Specification", + "Device.Graphics.D3D12.DXILCore.ShaderModel69.CoreRequirement") + TEST_METHOD_PROPERTY(L"Priority", L"0") + END_TEST_CLASS() + + TEST_CLASS_SETUP(classSetup) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + // Run this only once. + if (!Initialized) { + Initialized = true; + + D3D12SDK = D3D12SDKSelector(); + + WEX::TestExecution::RuntimeParameters::TryGetValue(L"VerboseLogging", + VerboseLogging); + if (VerboseLogging) + hlsl_test::LogCommentFmt(L"Verbose logging is enabled for this test."); + else + hlsl_test::LogCommentFmt(L"Verbose logging is disabled for this test."); + + WEX::TestExecution::RuntimeParameters::TryGetValue(L"InputSize", + OverrideInputSize); + + WEX::TestExecution::RuntimeParameters::TryGetValue(L"WaveLaneCount", + OverrideWaveLaneCount); + + bool IsRITP = false; + WEX::TestExecution::RuntimeParameters::TryGetValue(L"RITP", IsRITP); + + if (IsRITP) { + if (!OverrideInputSize) + // Help keep test runtime down for RITP runs + OverrideInputSize = 10; + else + hlsl_test::LogWarningFmt( + L"RITP is enabled but InputSize is also set. Will use the" + L"InputSize value: %d.", + OverrideInputSize); + } + + bool FailIfRequirementsNotMet = false; +#ifdef _HLK_CONF + FailIfRequirementsNotMet = true; +#endif + WEX::TestExecution::RuntimeParameters::TryGetValue( + L"FailIfRequirementsNotMet", FailIfRequirementsNotMet); + + const bool SkipUnsupported = !FailIfRequirementsNotMet; + if (!D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_9, + SkipUnsupported)) { + if (FailIfRequirementsNotMet) + hlsl_test::LogErrorFmt( + L"Device Creation failed, resulting in test failure, since " + L"FailIfRequirementsNotMet is set. The expectation is that this " + L"test will only be executed if something has previously " + L"determined that the system meets the requirements of this " + L"test."); + + return false; + } + } + + return true; + } + + TEST_METHOD_SETUP(methodSetup) { + // It's possible a previous test case caused a device removal. If it did we + // need to try and create a new device. + if (D3DDevice && D3DDevice->GetDeviceRemovedReason() != S_OK) { + hlsl_test::LogCommentFmt(L"Device was lost!"); + D3DDevice.Release(); + } + + if (!D3DDevice) { + hlsl_test::LogCommentFmt(L"Creating device"); + + // We expect this to succeed, and fail if it doesn't, because classSetup() + // has already ensured that the system configuration meets the + // requirements of all the tests in this class. + const bool SkipUnsupported = false; + + VERIFY_IS_TRUE(D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_9, + SkipUnsupported)); + } + + return true; + } + + template void runWaveOpTest() { + WEX::TestExecution::SetVerifyOutput VerifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + UINT WaveSize = 0; + + if (OverrideWaveLaneCount > 0) { + WaveSize = OverrideWaveLaneCount; + hlsl_test::LogCommentFmt( + L"Using overridden WaveLaneCount of %d for this test.", WaveSize); + } else { + D3D12_FEATURE_DATA_D3D12_OPTIONS1 WaveOpts; + VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS1, &WaveOpts, sizeof(WaveOpts))); + + WaveSize = WaveOpts.WaveLaneCountMin; + } + + DXASSERT_NOMSG(WaveSize > 0); + DXASSERT((WaveSize & (WaveSize - 1)) == 0, "must be a power of 2"); + + dispatchWaveOpTest(D3DDevice, VerboseLogging, OverrideInputSize, + WaveSize); + } + + template void runTest() { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + dispatchTest(D3DDevice, VerboseLogging, OverrideInputSize); + } + + // TernaryMath + + HLK_TEST(Mad, uint16_t); + HLK_TEST(Mad, uint32_t); + HLK_TEST(Mad, uint64_t); + HLK_TEST(Mad, int16_t); + HLK_TEST(Mad, int32_t); + HLK_TEST(Mad, int64_t); + HLK_TEST(Mad, HLSLHalf_t); + HLK_TEST(Mad, float); + HLK_TEST_DOUBLE(Fma, double); + HLK_TEST_DOUBLE(Mad, double); + + // BinaryMath + + HLK_TEST(Add, HLSLBool_t); + HLK_TEST(Subtract, HLSLBool_t); + HLK_TEST(Add, int16_t); + HLK_TEST(Subtract, int16_t); + HLK_TEST(Multiply, int16_t); + HLK_TEST(Divide, int16_t); + HLK_TEST(Modulus, int16_t); + HLK_TEST(Min, int16_t); + HLK_TEST(Max, int16_t); + HLK_TEST(Add, int32_t); + HLK_TEST(Subtract, int32_t); + HLK_TEST(Multiply, int32_t); + HLK_TEST(Divide, int32_t); + HLK_TEST(Modulus, int32_t); + HLK_TEST(Min, int32_t); + HLK_TEST(Max, int32_t); + HLK_TEST(Add, int64_t); + HLK_TEST(Subtract, int64_t); + HLK_TEST(Multiply, int64_t); + HLK_TEST(Divide, int64_t); + HLK_TEST(Modulus, int64_t); + HLK_TEST(Min, int64_t); + HLK_TEST(Max, int64_t); + HLK_TEST(Add, uint16_t); + HLK_TEST(Subtract, uint16_t); + HLK_TEST(Multiply, uint16_t); + HLK_TEST(Divide, uint16_t); + HLK_TEST(Modulus, uint16_t); + HLK_TEST(Min, uint16_t); + HLK_TEST(Max, uint16_t); + HLK_TEST(Add, uint32_t); + HLK_TEST(Subtract, uint32_t); + HLK_TEST(Multiply, uint32_t); + HLK_TEST(Divide, uint32_t); + HLK_TEST(Modulus, uint32_t); + HLK_TEST(Min, uint32_t); + HLK_TEST(Max, uint32_t); + HLK_TEST(Add, uint64_t); + HLK_TEST(Subtract, uint64_t); + HLK_TEST(Multiply, uint64_t); + HLK_TEST(Divide, uint64_t); + HLK_TEST(Modulus, uint64_t); + HLK_TEST(Min, uint64_t); + HLK_TEST(Max, uint64_t); + HLK_TEST(Add, HLSLHalf_t); + HLK_TEST(Subtract, HLSLHalf_t); + HLK_TEST(Multiply, HLSLHalf_t); + HLK_TEST(Divide, HLSLHalf_t); + HLK_TEST(Modulus, HLSLHalf_t); + HLK_TEST(Min, HLSLHalf_t); + HLK_TEST(Max, HLSLHalf_t); + HLK_TEST(Ldexp, HLSLHalf_t); + HLK_TEST(Add, float); + HLK_TEST(Subtract, float); + HLK_TEST(Multiply, float); + HLK_TEST(Divide, float); + HLK_TEST(Modulus, float); + HLK_TEST(Min, float); + HLK_TEST(Max, float); + HLK_TEST(Ldexp, float); + HLK_TEST_DOUBLE(Add, double); + HLK_TEST_DOUBLE(Subtract, double); + HLK_TEST_DOUBLE(Multiply, double); + HLK_TEST_DOUBLE(Divide, double); + HLK_TEST_DOUBLE(Min, double); + HLK_TEST_DOUBLE(Max, double); + + // Bitwise + + HLK_TEST(And, uint16_t); + HLK_TEST(Or, uint16_t); + HLK_TEST(Xor, uint16_t); + HLK_TEST(ReverseBits, uint16_t); + HLK_TEST(CountBits, uint16_t); + HLK_TEST(FirstBitHigh, uint16_t); + HLK_TEST(FirstBitLow, uint16_t); + HLK_TEST(LeftShift, uint16_t); + HLK_TEST(RightShift, uint16_t); + HLK_TEST(And, uint32_t); + HLK_TEST(Or, uint32_t); + HLK_TEST(Xor, uint32_t); + HLK_TEST(LeftShift, uint32_t); + HLK_TEST(RightShift, uint32_t); + HLK_TEST(ReverseBits, uint32_t); + HLK_TEST(CountBits, uint32_t); + HLK_TEST(FirstBitHigh, uint32_t); + HLK_TEST(FirstBitLow, uint32_t); + HLK_TEST(And, uint64_t); + HLK_TEST(Or, uint64_t); + HLK_TEST(Xor, uint64_t); + HLK_TEST(LeftShift, uint64_t); + HLK_TEST(RightShift, uint64_t); + HLK_TEST(ReverseBits, uint64_t); + HLK_TEST(CountBits, uint64_t); + HLK_TEST(FirstBitHigh, uint64_t); + HLK_TEST(FirstBitLow, uint64_t); + HLK_TEST(And, int16_t); + HLK_TEST(Or, int16_t); + HLK_TEST(Xor, int16_t); + HLK_TEST(LeftShift, int16_t); + HLK_TEST(RightShift, int16_t); + HLK_TEST(ReverseBits, int16_t); + HLK_TEST(CountBits, int16_t); + HLK_TEST(FirstBitHigh, int16_t); + HLK_TEST(FirstBitLow, int16_t); + HLK_TEST(And, int32_t); + HLK_TEST(Or, int32_t); + HLK_TEST(Xor, int32_t); + HLK_TEST(LeftShift, int32_t); + HLK_TEST(RightShift, int32_t); + HLK_TEST(ReverseBits, int32_t); + HLK_TEST(CountBits, int32_t); + HLK_TEST(FirstBitHigh, int32_t); + HLK_TEST(FirstBitLow, int32_t); + HLK_TEST(And, int64_t); + HLK_TEST(Or, int64_t); + HLK_TEST(Xor, int64_t); + HLK_TEST(LeftShift, int64_t); + HLK_TEST(RightShift, int64_t); + HLK_TEST(ReverseBits, int64_t); + HLK_TEST(CountBits, int64_t); + HLK_TEST(FirstBitHigh, int64_t); + HLK_TEST(FirstBitLow, int64_t); + HLK_TEST(Saturate, HLSLHalf_t); + HLK_TEST(Saturate, float); + HLK_TEST_DOUBLE(Saturate, double); + + // Unary + + HLK_TEST(Initialize, HLSLBool_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLBool_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLBool_t); + HLK_TEST(Initialize, int16_t); + HLK_TEST(ArrayOperator_StaticAccess, int16_t); + HLK_TEST(ArrayOperator_DynamicAccess, int16_t); + HLK_TEST(Initialize, int32_t); + HLK_TEST(ArrayOperator_StaticAccess, int32_t); + HLK_TEST(ArrayOperator_DynamicAccess, int32_t); + HLK_TEST(Initialize, int64_t); + HLK_TEST(ArrayOperator_StaticAccess, int64_t); + HLK_TEST(ArrayOperator_DynamicAccess, int64_t); + HLK_TEST(Initialize, uint16_t); + HLK_TEST(ArrayOperator_StaticAccess, uint16_t); + HLK_TEST(ArrayOperator_DynamicAccess, uint16_t); + HLK_TEST(Initialize, uint32_t); + HLK_TEST(ArrayOperator_StaticAccess, uint32_t); + HLK_TEST(ArrayOperator_DynamicAccess, uint32_t); + HLK_TEST(Initialize, uint64_t); + HLK_TEST(ArrayOperator_StaticAccess, uint64_t); + HLK_TEST(ArrayOperator_DynamicAccess, uint64_t); + HLK_TEST(Initialize, HLSLHalf_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLHalf_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLHalf_t); + HLK_TEST(Initialize, float); + HLK_TEST(ArrayOperator_StaticAccess, float); + HLK_TEST(ArrayOperator_DynamicAccess, float); + HLK_TEST_DOUBLE(Initialize, double); + HLK_TEST_DOUBLE(ArrayOperator_StaticAccess, double); + HLK_TEST_DOUBLE(ArrayOperator_DynamicAccess, double); + + HLK_TEST(ShuffleVector, HLSLBool_t); + HLK_TEST(ShuffleVector, int16_t); + HLK_TEST(ShuffleVector, int32_t); + HLK_TEST(ShuffleVector, int64_t); + HLK_TEST(ShuffleVector, uint16_t); + HLK_TEST(ShuffleVector, uint32_t); + HLK_TEST(ShuffleVector, uint64_t); + HLK_TEST(ShuffleVector, HLSLHalf_t); + HLK_TEST(ShuffleVector, float); + HLK_TEST_DOUBLE(ShuffleVector, double); + + // Explicit Cast + + HLK_TEST(CastToInt16, HLSLBool_t); + HLK_TEST(CastToInt32, HLSLBool_t); + HLK_TEST(CastToInt64, HLSLBool_t); + HLK_TEST(CastToUint16, HLSLBool_t); + HLK_TEST(CastToUint32, HLSLBool_t); + HLK_TEST(CastToUint64, HLSLBool_t); + HLK_TEST(CastToFloat16, HLSLBool_t); + HLK_TEST(CastToFloat32, HLSLBool_t); + HLK_TEST(CastToFloat64, HLSLBool_t); + + HLK_TEST(CastToBool, HLSLHalf_t); + HLK_TEST(CastToInt16, HLSLHalf_t); + HLK_TEST(CastToInt32, HLSLHalf_t); + HLK_TEST(CastToInt64, HLSLHalf_t); + HLK_TEST(CastToUint16_FromFP, HLSLHalf_t); + HLK_TEST(CastToUint32_FromFP, HLSLHalf_t); + HLK_TEST(CastToUint64_FromFP, HLSLHalf_t); + HLK_TEST(CastToFloat32, HLSLHalf_t); + HLK_TEST(CastToFloat64, HLSLHalf_t); + + HLK_TEST(CastToBool, float); + HLK_TEST(CastToInt16, float); + HLK_TEST(CastToInt32, float); + HLK_TEST(CastToInt64, float); + HLK_TEST(CastToUint16_FromFP, float); + HLK_TEST(CastToUint32_FromFP, float); + HLK_TEST(CastToUint64_FromFP, float); + HLK_TEST(CastToFloat16, float); + HLK_TEST(CastToFloat64, float); + + HLK_TEST_DOUBLE(CastToBool, double); + HLK_TEST_DOUBLE(CastToInt16, double); + HLK_TEST_DOUBLE(CastToInt32, double); + HLK_TEST_DOUBLE(CastToInt64, double); + HLK_TEST_DOUBLE(CastToUint16_FromFP, double); + HLK_TEST_DOUBLE(CastToUint32_FromFP, double); + HLK_TEST_DOUBLE(CastToUint64_FromFP, double); + HLK_TEST_DOUBLE(CastToFloat16, double); + HLK_TEST_DOUBLE(CastToFloat32, double); + + HLK_TEST(CastToBool, uint16_t); + HLK_TEST(CastToInt16, uint16_t); + HLK_TEST(CastToInt32, uint16_t); + HLK_TEST(CastToInt64, uint16_t); + HLK_TEST(CastToUint32, uint16_t); + HLK_TEST(CastToUint64, uint16_t); + HLK_TEST(CastToFloat16, uint16_t); + HLK_TEST(CastToFloat32, uint16_t); + HLK_TEST(CastToFloat64, uint16_t); + + HLK_TEST(CastToBool, uint32_t); + HLK_TEST(CastToInt16, uint32_t); + HLK_TEST(CastToInt32, uint32_t); + HLK_TEST(CastToInt64, uint32_t); + HLK_TEST(CastToUint16, uint32_t); + HLK_TEST(CastToUint64, uint32_t); + HLK_TEST(CastToFloat16, uint32_t); + HLK_TEST(CastToFloat32, uint32_t); + HLK_TEST(CastToFloat64, uint32_t); + + HLK_TEST(CastToBool, uint64_t); + HLK_TEST(CastToInt16, uint64_t); + HLK_TEST(CastToInt32, uint64_t); + HLK_TEST(CastToInt64, uint64_t); + HLK_TEST(CastToUint16, uint64_t); + HLK_TEST(CastToUint32, uint64_t); + HLK_TEST(CastToFloat16, uint64_t); + HLK_TEST(CastToFloat32, uint64_t); + HLK_TEST(CastToFloat64, uint64_t); + + HLK_TEST(CastToBool, int16_t); + HLK_TEST(CastToInt32, int16_t); + HLK_TEST(CastToInt64, int16_t); + HLK_TEST(CastToUint16, int16_t); + HLK_TEST(CastToUint32, int16_t); + HLK_TEST(CastToUint64, int16_t); + HLK_TEST(CastToFloat16, int16_t); + HLK_TEST(CastToFloat32, int16_t); + HLK_TEST(CastToFloat64, int16_t); + + HLK_TEST(CastToBool, int32_t); + HLK_TEST(CastToInt16, int32_t); + HLK_TEST(CastToInt64, int32_t); + HLK_TEST(CastToUint16, int32_t); + HLK_TEST(CastToUint32, int32_t); + HLK_TEST(CastToUint64, int32_t); + HLK_TEST(CastToFloat16, int32_t); + HLK_TEST(CastToFloat32, int32_t); + HLK_TEST(CastToFloat64, int32_t); + + HLK_TEST(CastToBool, int64_t); + HLK_TEST(CastToInt16, int64_t); + HLK_TEST(CastToInt32, int64_t); + HLK_TEST(CastToUint16, int64_t); + HLK_TEST(CastToUint32, int64_t); + HLK_TEST(CastToUint64, int64_t); + HLK_TEST(CastToFloat16, int64_t); + HLK_TEST(CastToFloat32, int64_t); + HLK_TEST(CastToFloat64, int64_t); + + // Trigonometric + + HLK_TEST(Acos, HLSLHalf_t); + HLK_TEST(Asin, HLSLHalf_t); + HLK_TEST(Atan, HLSLHalf_t); + HLK_TEST(Cos, HLSLHalf_t); + HLK_TEST(Cosh, HLSLHalf_t); + HLK_TEST(Sin, HLSLHalf_t); + HLK_TEST(Sinh, HLSLHalf_t); + HLK_TEST(Tan, HLSLHalf_t); + HLK_TEST(Tanh, HLSLHalf_t); + HLK_TEST(Acos, float); + HLK_TEST(Asin, float); + HLK_TEST(Atan, float); + HLK_TEST(Cos, float); + HLK_TEST(Cosh, float); + HLK_TEST(Sin, float); + HLK_TEST(Sinh, float); + HLK_TEST(Tan, float); + HLK_TEST(Tanh, float); + + // AsType + + HLK_TEST(AsFloat16, int16_t); + HLK_TEST(AsInt16, int16_t); + HLK_TEST(AsUint16, int16_t); + HLK_TEST(AsFloat, int32_t); + HLK_TEST(AsInt, int32_t); + HLK_TEST(AsUint, int32_t); + HLK_TEST(AsFloat16, uint16_t); + HLK_TEST(AsInt16, uint16_t); + HLK_TEST(AsUint16, uint16_t); + HLK_TEST(AsFloat, uint32_t); + HLK_TEST(AsInt, uint32_t); + HLK_TEST(AsUint, uint32_t); + HLK_TEST(AsDouble, uint32_t); + HLK_TEST(AsFloat16, HLSLHalf_t); + HLK_TEST(AsInt16, HLSLHalf_t); + HLK_TEST(AsUint16, HLSLHalf_t); + HLK_TEST(AsUint_SplitDouble, double); + + // Unary Math + + HLK_TEST(Abs, int16_t); + HLK_TEST(Sign, int16_t); + HLK_TEST(Abs, int32_t); + HLK_TEST(Sign, int32_t); + HLK_TEST(Abs, int64_t); + HLK_TEST(Sign, int64_t); + HLK_TEST(Abs, uint16_t); + HLK_TEST(Sign, uint16_t); + HLK_TEST(Abs, uint32_t); + HLK_TEST(Sign, uint32_t); + HLK_TEST(Abs, uint64_t); + HLK_TEST(Sign, uint64_t); + HLK_TEST(Abs, HLSLHalf_t); + HLK_TEST(Ceil, HLSLHalf_t); + HLK_TEST(Exp, HLSLHalf_t); + HLK_TEST(Floor, HLSLHalf_t); + HLK_TEST(Frac, HLSLHalf_t); + HLK_TEST(Log, HLSLHalf_t); + HLK_TEST(Rcp, HLSLHalf_t); + HLK_TEST(Round, HLSLHalf_t); + HLK_TEST(Rsqrt, HLSLHalf_t); + HLK_TEST(Sign, HLSLHalf_t); + HLK_TEST(Sqrt, HLSLHalf_t); + HLK_TEST(Trunc, HLSLHalf_t); + HLK_TEST(Exp2, HLSLHalf_t); + HLK_TEST(Log10, HLSLHalf_t); + HLK_TEST(Log2, HLSLHalf_t); + HLK_TEST(Abs, float); + HLK_TEST(Ceil, float); + HLK_TEST(Exp, float); + HLK_TEST(Floor, float); + HLK_TEST(Frac, float); + HLK_TEST(Log, float); + HLK_TEST(Rcp, float); + HLK_TEST(Round, float); + HLK_TEST(Rsqrt, float); + HLK_TEST(Sign, float); + HLK_TEST(Sqrt, float); + HLK_TEST(Trunc, float); + HLK_TEST(Exp2, float); + HLK_TEST(Log10, float); + HLK_TEST(Log2, float); + HLK_TEST(Frexp, float); + HLK_TEST_DOUBLE(Abs, double); + HLK_TEST_DOUBLE(Sign, double); + + // Float Special + + HLK_TEST(IsFinite, HLSLHalf_t); + HLK_TEST(IsInf, HLSLHalf_t); + HLK_TEST(IsNan, HLSLHalf_t); + HLK_TEST(ModF, HLSLHalf_t); + + HLK_TEST(IsFinite, float); + HLK_TEST(IsInf, float); + HLK_TEST(IsNan, float); + HLK_TEST(ModF, float); + + // Binary Comparison + + HLK_TEST(LessThan, int16_t); + HLK_TEST(LessEqual, int16_t); + HLK_TEST(GreaterThan, int16_t); + HLK_TEST(GreaterEqual, int16_t); + HLK_TEST(Equal, int16_t); + HLK_TEST(NotEqual, int16_t); + HLK_TEST(LessThan, int32_t); + HLK_TEST(LessEqual, int32_t); + HLK_TEST(GreaterThan, int32_t); + HLK_TEST(GreaterEqual, int32_t); + HLK_TEST(Equal, int32_t); + HLK_TEST(NotEqual, int32_t); + HLK_TEST(LessThan, int64_t); + HLK_TEST(LessEqual, int64_t); + HLK_TEST(GreaterThan, int64_t); + HLK_TEST(GreaterEqual, int64_t); + HLK_TEST(Equal, int64_t); + HLK_TEST(NotEqual, int64_t); + HLK_TEST(LessThan, uint16_t); + HLK_TEST(LessEqual, uint16_t); + HLK_TEST(GreaterThan, uint16_t); + HLK_TEST(GreaterEqual, uint16_t); + HLK_TEST(Equal, uint16_t); + HLK_TEST(NotEqual, uint16_t); + HLK_TEST(LessThan, uint32_t); + HLK_TEST(LessEqual, uint32_t); + HLK_TEST(GreaterThan, uint32_t); + HLK_TEST(GreaterEqual, uint32_t); + HLK_TEST(Equal, uint32_t); + HLK_TEST(NotEqual, uint32_t); + HLK_TEST(LessThan, uint64_t); + HLK_TEST(LessEqual, uint64_t); + HLK_TEST(GreaterThan, uint64_t); + HLK_TEST(GreaterEqual, uint64_t); + HLK_TEST(Equal, uint64_t); + HLK_TEST(NotEqual, uint64_t); + HLK_TEST(LessThan, HLSLHalf_t); + HLK_TEST(LessEqual, HLSLHalf_t); + HLK_TEST(GreaterThan, HLSLHalf_t); + HLK_TEST(GreaterEqual, HLSLHalf_t); + HLK_TEST(Equal, HLSLHalf_t); + HLK_TEST(NotEqual, HLSLHalf_t); + HLK_TEST(LessThan, float); + HLK_TEST(LessEqual, float); + HLK_TEST(GreaterThan, float); + HLK_TEST(GreaterEqual, float); + HLK_TEST(Equal, float); + HLK_TEST(NotEqual, float); + HLK_TEST_DOUBLE(LessThan, double); + HLK_TEST_DOUBLE(LessEqual, double); + HLK_TEST_DOUBLE(GreaterThan, double); + HLK_TEST_DOUBLE(GreaterEqual, double); + HLK_TEST_DOUBLE(Equal, double); + HLK_TEST_DOUBLE(NotEqual, double); + + // Binary Logical + + HLK_TEST(Logical_And, HLSLBool_t); + HLK_TEST(Logical_Or, HLSLBool_t); + + // Ternary Logical + HLK_TEST(Select, HLSLBool_t); + HLK_TEST(Select, int16_t); + HLK_TEST(Select, int32_t); + HLK_TEST(Select, int64_t); + HLK_TEST(Select, uint16_t); + HLK_TEST(Select, uint32_t); + HLK_TEST(Select, uint64_t); + HLK_TEST(Select, HLSLHalf_t); + HLK_TEST(Select, float); + HLK_TEST_DOUBLE(Select, double); + + // Reduction + HLK_TEST(Any_Mixed, HLSLBool_t); + HLK_TEST(Any_Zero, HLSLBool_t); + HLK_TEST(Any_NoZero, HLSLBool_t); + HLK_TEST(All_Mixed, HLSLBool_t); + HLK_TEST(All_Zero, HLSLBool_t); + HLK_TEST(All_NoZero, HLSLBool_t); + + HLK_TEST(Any_Mixed, int16_t); + HLK_TEST(Any_Zero, int16_t); + HLK_TEST(Any_NoZero, int16_t); + HLK_TEST(All_Mixed, int16_t); + HLK_TEST(All_Zero, int16_t); + HLK_TEST(All_NoZero, int16_t); + + HLK_TEST(Any_Mixed, int32_t); + HLK_TEST(Any_Zero, int32_t); + HLK_TEST(Any_NoZero, int32_t); + HLK_TEST(All_Mixed, int32_t); + HLK_TEST(All_Zero, int32_t); + HLK_TEST(All_NoZero, int32_t); + + HLK_TEST(Any_Mixed, int64_t); + HLK_TEST(Any_Zero, int64_t); + HLK_TEST(Any_NoZero, int64_t); + HLK_TEST(All_Mixed, int64_t); + HLK_TEST(All_Zero, int64_t); + HLK_TEST(All_NoZero, int64_t); + + HLK_TEST(Dot, HLSLHalf_t); + + HLK_TEST(Dot, float); + + // LoadAndStore + // BAB == Byte Address Buffer + // RDH == Resource Descriptor Heap + // RD == Root Descriptor + // DT == Descriptor Table + // SB == Structured Buffer + + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLHalf_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLHalf_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLHalf_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLHalf_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLHalf_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLHalf_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLHalf_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLHalf_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLHalf_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLHalf_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLHalf_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLHalf_t); + + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLBool_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLBool_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLBool_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLBool_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLBool_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLBool_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLBool_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLBool_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLBool_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLBool_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLBool_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLBool_t); + + HLK_TEST(LoadAndStore_RDH_BAB_SRV, int16_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, int16_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, int16_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, int16_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, int16_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, int16_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, int16_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, int16_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, int16_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, int16_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, int16_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, int16_t); + + HLK_TEST(LoadAndStore_RDH_BAB_SRV, int32_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, int32_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, int32_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, int32_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, int32_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, int32_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, int32_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, int32_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, int32_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, int32_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, int32_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, int32_t); + + HLK_TEST(LoadAndStore_RDH_BAB_SRV, int64_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, int64_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, int64_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, int64_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, int64_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, int64_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, int64_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, int64_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, int64_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, int64_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, int64_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, int64_t); + + HLK_TEST(LoadAndStore_RDH_BAB_SRV, uint16_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, uint16_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, uint16_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, uint16_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, uint16_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, uint16_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, uint16_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, uint16_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, uint16_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, uint16_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, uint16_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, uint16_t); + + HLK_TEST(LoadAndStore_RDH_BAB_UAV, uint32_t); + HLK_TEST(LoadAndStore_RDH_BAB_SRV, uint32_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, uint32_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, uint32_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, uint32_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, uint32_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, uint32_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, uint32_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, uint32_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, uint32_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, uint32_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, uint32_t); + + HLK_TEST(LoadAndStore_RDH_BAB_UAV, uint64_t); + HLK_TEST(LoadAndStore_RDH_BAB_SRV, uint64_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, uint64_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, uint64_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, uint64_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, uint64_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, uint64_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, uint64_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, uint64_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, uint64_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, uint64_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, uint64_t); + + HLK_TEST(LoadAndStore_RDH_BAB_UAV, float); + HLK_TEST(LoadAndStore_RDH_BAB_SRV, float); + HLK_TEST(LoadAndStore_DT_BAB_UAV, float); + HLK_TEST(LoadAndStore_DT_BAB_SRV, float); + HLK_TEST(LoadAndStore_RD_BAB_UAV, float); + HLK_TEST(LoadAndStore_RD_BAB_SRV, float); + HLK_TEST(LoadAndStore_RDH_SB_UAV, float); + HLK_TEST(LoadAndStore_RDH_SB_SRV, float); + HLK_TEST(LoadAndStore_DT_SB_UAV, float); + HLK_TEST(LoadAndStore_DT_SB_SRV, float); + HLK_TEST(LoadAndStore_RD_SB_UAV, float); + HLK_TEST(LoadAndStore_RD_SB_SRV, float); + + HLK_TEST_DOUBLE(LoadAndStore_RDH_BAB_SRV, double); + HLK_TEST_DOUBLE(LoadAndStore_RDH_BAB_UAV, double); + HLK_TEST_DOUBLE(LoadAndStore_DT_BAB_SRV, double); + HLK_TEST_DOUBLE(LoadAndStore_DT_BAB_UAV, double); + HLK_TEST_DOUBLE(LoadAndStore_RD_BAB_SRV, double); + HLK_TEST_DOUBLE(LoadAndStore_RD_BAB_UAV, double); + HLK_TEST_DOUBLE(LoadAndStore_RDH_SB_SRV, double); + HLK_TEST_DOUBLE(LoadAndStore_RDH_SB_UAV, double); + HLK_TEST_DOUBLE(LoadAndStore_DT_SB_SRV, double); + HLK_TEST_DOUBLE(LoadAndStore_DT_SB_UAV, double); + HLK_TEST_DOUBLE(LoadAndStore_RD_SB_SRV, double); + HLK_TEST_DOUBLE(LoadAndStore_RD_SB_UAV, double); + + // Derivative + HLK_TEST(DerivativeDdx, HLSLHalf_t); + HLK_TEST(DerivativeDdy, HLSLHalf_t); + HLK_TEST(DerivativeDdxFine, HLSLHalf_t); + HLK_TEST(DerivativeDdyFine, HLSLHalf_t); + HLK_TEST(DerivativeDdx, float); + HLK_TEST(DerivativeDdy, float); + HLK_TEST(DerivativeDdxFine, float); + HLK_TEST(DerivativeDdyFine, float); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLBool_t); + HLK_TEST(QuadReadAcrossX, HLSLBool_t); + HLK_TEST(QuadReadAcrossY, HLSLBool_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLBool_t); + HLK_TEST(QuadReadLaneAt, int16_t); + HLK_TEST(QuadReadAcrossX, int16_t); + HLK_TEST(QuadReadAcrossY, int16_t); + HLK_TEST(QuadReadAcrossDiagonal, int16_t); + HLK_TEST(QuadReadLaneAt, int32_t); + HLK_TEST(QuadReadAcrossX, int32_t); + HLK_TEST(QuadReadAcrossY, int32_t); + HLK_TEST(QuadReadAcrossDiagonal, int32_t); + HLK_TEST(QuadReadLaneAt, int64_t); + HLK_TEST(QuadReadAcrossX, int64_t); + HLK_TEST(QuadReadAcrossY, int64_t); + HLK_TEST(QuadReadAcrossDiagonal, int64_t); + HLK_TEST(QuadReadLaneAt, uint16_t); + HLK_TEST(QuadReadAcrossX, uint16_t); + HLK_TEST(QuadReadAcrossY, uint16_t); + HLK_TEST(QuadReadAcrossDiagonal, uint16_t); + HLK_TEST(QuadReadLaneAt, uint32_t); + HLK_TEST(QuadReadAcrossX, uint32_t); + HLK_TEST(QuadReadAcrossY, uint32_t); + HLK_TEST(QuadReadAcrossDiagonal, uint32_t); + HLK_TEST(QuadReadLaneAt, uint64_t); + HLK_TEST(QuadReadAcrossX, uint64_t); + HLK_TEST(QuadReadAcrossY, uint64_t); + HLK_TEST(QuadReadAcrossDiagonal, uint64_t); + HLK_TEST(QuadReadLaneAt, HLSLHalf_t); + HLK_TEST(QuadReadAcrossX, HLSLHalf_t); + HLK_TEST(QuadReadAcrossY, HLSLHalf_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLHalf_t); + HLK_TEST(QuadReadLaneAt, float); + HLK_TEST(QuadReadAcrossX, float); + HLK_TEST(QuadReadAcrossY, float); + HLK_TEST(QuadReadAcrossDiagonal, float); + HLK_TEST_DOUBLE(QuadReadLaneAt, double); + HLK_TEST_DOUBLE(QuadReadAcrossX, double); + HLK_TEST_DOUBLE(QuadReadAcrossY, double); + HLK_TEST_DOUBLE(QuadReadAcrossDiagonal, double); + + // Wave + + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLBool_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLBool_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLBool_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLBool_t); + + HLK_WAVEOP_TEST(WaveActiveSum, int16_t); + HLK_WAVEOP_TEST(WaveActiveMin, int16_t); + HLK_WAVEOP_TEST(WaveActiveMax, int16_t); + HLK_WAVEOP_TEST(WaveActiveProduct, int16_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, int16_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, int16_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, int16_t); + HLK_WAVEOP_TEST(WavePrefixSum, int16_t); + HLK_WAVEOP_TEST(WavePrefixProduct, int16_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, int16_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, int16_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, int16_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, int16_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, int16_t); + HLK_WAVEOP_TEST(WaveMatch, int16_t); + HLK_WAVEOP_TEST(WaveActiveSum, int32_t); + HLK_WAVEOP_TEST(WaveActiveMin, int32_t); + HLK_WAVEOP_TEST(WaveActiveMax, int32_t); + HLK_WAVEOP_TEST(WaveActiveProduct, int32_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, int32_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, int32_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, int32_t); + HLK_WAVEOP_TEST(WavePrefixSum, int32_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, int32_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, int32_t); + HLK_WAVEOP_TEST(WavePrefixProduct, int32_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, int32_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, int32_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, int32_t); + HLK_WAVEOP_TEST(WaveMatch, int32_t); + HLK_WAVEOP_TEST(WaveActiveSum, int64_t); + HLK_WAVEOP_TEST(WaveActiveMin, int64_t); + HLK_WAVEOP_TEST(WaveActiveMax, int64_t); + HLK_WAVEOP_TEST(WaveActiveProduct, int64_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, int64_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, int64_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, int64_t); + HLK_WAVEOP_TEST(WavePrefixSum, int64_t); + HLK_WAVEOP_TEST(WavePrefixProduct, int64_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, int64_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, int64_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, int64_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, int64_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, int64_t); + HLK_WAVEOP_TEST(WaveMatch, int64_t); + + // Note: WaveActiveBit* ops don't support uint16_t in HLSL + // But the WaveMultiPrefixBit ops support all int and uint types + HLK_WAVEOP_TEST(WaveActiveSum, uint16_t); + HLK_WAVEOP_TEST(WaveActiveMin, uint16_t); + HLK_WAVEOP_TEST(WaveActiveMax, uint16_t); + HLK_WAVEOP_TEST(WaveActiveProduct, uint16_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, uint16_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, uint16_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, uint16_t); + HLK_WAVEOP_TEST(WavePrefixSum, uint16_t); + HLK_WAVEOP_TEST(WavePrefixProduct, uint16_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, uint16_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, uint16_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, uint16_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, uint16_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, uint16_t); + HLK_WAVEOP_TEST(WaveMatch, uint16_t); + HLK_WAVEOP_TEST(WaveActiveSum, uint32_t); + HLK_WAVEOP_TEST(WaveActiveMin, uint32_t); + HLK_WAVEOP_TEST(WaveActiveMax, uint32_t); + HLK_WAVEOP_TEST(WaveActiveProduct, uint32_t); + HLK_WAVEOP_TEST(WaveActiveBitAnd, uint32_t); + HLK_WAVEOP_TEST(WaveActiveBitOr, uint32_t); + HLK_WAVEOP_TEST(WaveActiveBitXor, uint32_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, uint32_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, uint32_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, uint32_t); + HLK_WAVEOP_TEST(WavePrefixSum, uint32_t); + HLK_WAVEOP_TEST(WavePrefixProduct, uint32_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, uint32_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, uint32_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, uint32_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, uint32_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, uint32_t); + HLK_WAVEOP_TEST(WaveMatch, uint32_t); + HLK_WAVEOP_TEST(WaveActiveSum, uint64_t); + HLK_WAVEOP_TEST(WaveActiveMin, uint64_t); + HLK_WAVEOP_TEST(WaveActiveMax, uint64_t); + HLK_WAVEOP_TEST(WaveActiveProduct, uint64_t); + HLK_WAVEOP_TEST(WaveActiveBitAnd, uint64_t); + HLK_WAVEOP_TEST(WaveActiveBitOr, uint64_t); + HLK_WAVEOP_TEST(WaveActiveBitXor, uint64_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, uint64_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, uint64_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, uint64_t); + HLK_WAVEOP_TEST(WavePrefixSum, uint64_t); + HLK_WAVEOP_TEST(WavePrefixProduct, uint64_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, uint64_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, uint64_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, uint64_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, uint64_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, uint64_t); + HLK_WAVEOP_TEST(WaveMatch, uint64_t); + + HLK_WAVEOP_TEST(WaveActiveSum, HLSLHalf_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLHalf_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLHalf_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLHalf_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLHalf_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLHalf_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLHalf_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLHalf_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLHalf_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLHalf_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLHalf_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLHalf_t); + HLK_WAVEOP_TEST(WaveActiveSum, float); + HLK_WAVEOP_TEST(WaveActiveMin, float); + HLK_WAVEOP_TEST(WaveActiveMax, float); + HLK_WAVEOP_TEST(WaveActiveProduct, float); + HLK_WAVEOP_TEST(WaveActiveAllEqual, float); + HLK_WAVEOP_TEST(WaveReadLaneAt, float); + HLK_WAVEOP_TEST(WaveReadLaneFirst, float); + HLK_WAVEOP_TEST(WavePrefixSum, float); + HLK_WAVEOP_TEST(WavePrefixProduct, float); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, float); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, float); + HLK_WAVEOP_TEST(WaveMatch, float); + HLK_WAVEOP_TEST_DOUBLE(WaveActiveSum, double); + HLK_WAVEOP_TEST_DOUBLE(WaveActiveMin, double); + HLK_WAVEOP_TEST_DOUBLE(WaveActiveMax, double); + HLK_WAVEOP_TEST_DOUBLE(WaveActiveProduct, double); + HLK_WAVEOP_TEST_DOUBLE(WaveActiveAllEqual, double); + HLK_WAVEOP_TEST_DOUBLE(WaveReadLaneAt, double); + HLK_WAVEOP_TEST_DOUBLE(WaveReadLaneFirst, double); + HLK_WAVEOP_TEST_DOUBLE(WavePrefixSum, double); + HLK_WAVEOP_TEST_DOUBLE(WavePrefixProduct, double); + HLK_WAVEOP_TEST_DOUBLE(WaveMultiPrefixSum, double); + HLK_WAVEOP_TEST_DOUBLE(WaveMultiPrefixProduct, double); + HLK_WAVEOP_TEST_DOUBLE(WaveMatch, double); + +private: + bool Initialized = false; + std::optional D3D12SDK; + bool VerboseLogging = false; + size_t OverrideInputSize = 0; + UINT OverrideWaveLaneCount = 0; + CComPtr D3DDevice; +}; From 43d4896710cdf87d2785b3288f2ed710397ba601 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Mon, 2 Feb 2026 09:49:30 -0800 Subject: [PATCH 6/6] Whoops. Remove files that aren't supposed to be here --- include/dxc/DXIL/DxilConstants.h | 2 +- include/dxc/DXIL/DxilShaderModel.h | 5 +- include/dxc/Support/HLSLOptions.td | 2 +- lib/DXIL/DxilShaderModel.cpp | 237 +- tools/clang/unittests/HLSL/ValidationTest.cpp | 65 +- tools/clang/unittests/HLSLExec/CMakeLists.txt | 1 - tools/clang/unittests/HLSLExec/TaefTest.cpp | 2804 ----------------- utils/hct/hctdb_instrhelp.py | 2 +- 8 files changed, 176 insertions(+), 2942 deletions(-) delete mode 100644 tools/clang/unittests/HLSLExec/TaefTest.cpp diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 70e07b5173..030ff009cd 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -29,7 +29,7 @@ namespace DXIL { const unsigned kDxilMajor = 1; /* hctdb_instrhelp.get_dxil_version_minor()*/ // VALRULE-TEXT:BEGIN -const unsigned kDxilMinor = 9; +const unsigned kDxilMinor = 10; // VALRULE-TEXT:END inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) { diff --git a/include/dxc/DXIL/DxilShaderModel.h b/include/dxc/DXIL/DxilShaderModel.h index 380fb1aa32..ff9ef39b99 100644 --- a/include/dxc/DXIL/DxilShaderModel.h +++ b/include/dxc/DXIL/DxilShaderModel.h @@ -33,7 +33,7 @@ class ShaderModel { // clang-format on // VALRULE-TEXT:BEGIN static const unsigned kHighestMajor = 6; - static const unsigned kHighestMinor = 9; + static const unsigned kHighestMinor = 10; // VALRULE-TEXT:END // Major/Minor version of highest released shader model @@ -87,6 +87,7 @@ class ShaderModel { bool IsSM67Plus() const { return IsSMAtLeast(6, 7); } bool IsSM68Plus() const { return IsSMAtLeast(6, 8); } bool IsSM69Plus() const { return IsSMAtLeast(6, 9); } + bool IsSM610Plus() const { return IsSMAtLeast(6, 10); } // VALRULE-TEXT:END const char *GetName() const { return m_pszName; } const char *GetKindName() const; @@ -138,7 +139,7 @@ class ShaderModel { bool m_bTypedUavs, unsigned m_UAVRegsLim); /* hctdb_instrhelp.get_num_shader_models()*/ // VALRULE-TEXT:BEGIN - static const unsigned kNumShaderModels = 107; + static const unsigned kNumShaderModels = 116; // VALRULE-TEXT:END static const ShaderModel ms_ShaderModels[kNumShaderModels]; diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index 40182f85b9..cd7dfb2f0c 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -451,7 +451,7 @@ def fvk_bind_counter_heap : MultiArg<["-"], "fvk-bind-counter-heap", 2>, MetaVar def target_profile : JoinedOrSeparate<["-", "/"], "T">, Flags<[CoreOption]>, Group, MetaVarName<"">, /* hctdb_instrhelp.get_target_profiles()*/ // VALRULE-TEXT:BEGIN - HelpText<"Set target profile. \n\t: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, ps_6_5, ps_6_6, ps_6_7, ps_6_8, ps_6_9, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, vs_6_5, vs_6_6, vs_6_7, vs_6_8, vs_6_9, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, gs_6_5, gs_6_6, gs_6_7, gs_6_8, gs_6_9, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, hs_6_5, hs_6_6, hs_6_7, hs_6_8, hs_6_9, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, ds_6_5, ds_6_6, ds_6_7, ds_6_8, ds_6_9, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, cs_6_7, cs_6_8, cs_6_9, \n\t\t lib_6_1, lib_6_2, lib_6_3, lib_6_4, lib_6_5, lib_6_6, lib_6_7, lib_6_8, lib_6_9, \n\t\t ms_6_5, ms_6_6, ms_6_7, ms_6_8, ms_6_9, \n\t\t as_6_5, as_6_6, as_6_7, as_6_8, as_6_9, \n\t\t ">; + HelpText<"Set target profile. \n\t: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, ps_6_5, ps_6_6, ps_6_7, ps_6_8, ps_6_9, ps_6_10, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, vs_6_5, vs_6_6, vs_6_7, vs_6_8, vs_6_9, vs_6_10, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, gs_6_5, gs_6_6, gs_6_7, gs_6_8, gs_6_9, gs_6_10, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, hs_6_5, hs_6_6, hs_6_7, hs_6_8, hs_6_9, hs_6_10, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, ds_6_5, ds_6_6, ds_6_7, ds_6_8, ds_6_9, ds_6_10, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, cs_6_7, cs_6_8, cs_6_9, cs_6_10, \n\t\t lib_6_1, lib_6_2, lib_6_3, lib_6_4, lib_6_5, lib_6_6, lib_6_7, lib_6_8, lib_6_9, lib_6_10, \n\t\t ms_6_5, ms_6_6, ms_6_7, ms_6_8, ms_6_9, ms_6_10, \n\t\t as_6_5, as_6_6, as_6_7, as_6_8, as_6_9, as_6_10, \n\t\t ">; // VALRULE-TEXT:END def entrypoint : JoinedOrSeparate<["-", "/"], "E">, Flags<[CoreOption, RewriteOption]>, Group, HelpText<"Entry point name">; diff --git a/lib/DXIL/DxilShaderModel.cpp b/lib/DXIL/DxilShaderModel.cpp index 06cc6e633c..e70a3b44c9 100644 --- a/lib/DXIL/DxilShaderModel.cpp +++ b/lib/DXIL/DxilShaderModel.cpp @@ -65,6 +65,7 @@ bool ShaderModel::IsValidForDxil() const { case 7: case 8: case 9: + case 10: // VALRULE-TEXT:END return true; case kOfflineMinor: @@ -79,113 +80,122 @@ const ShaderModel *ShaderModel::Get(Kind Kind, unsigned Major, unsigned Minor) { /* hctdb_instrhelp.get_shader_model_get()*/ // VALRULE-TEXT:BEGIN const static std::pair hashToIdxMap[] = { - {1024, 0}, // ps_4_0 - {1025, 1}, // ps_4_1 - {1280, 2}, // ps_5_0 - {1281, 3}, // ps_5_1 - {1536, 4}, // ps_6_0 - {1537, 5}, // ps_6_1 - {1538, 6}, // ps_6_2 - {1539, 7}, // ps_6_3 - {1540, 8}, // ps_6_4 - {1541, 9}, // ps_6_5 - {1542, 10}, // ps_6_6 - {1543, 11}, // ps_6_7 - {1544, 12}, // ps_6_8 - {1545, 13}, // ps_6_9 - {66560, 14}, // vs_4_0 - {66561, 15}, // vs_4_1 - {66816, 16}, // vs_5_0 - {66817, 17}, // vs_5_1 - {67072, 18}, // vs_6_0 - {67073, 19}, // vs_6_1 - {67074, 20}, // vs_6_2 - {67075, 21}, // vs_6_3 - {67076, 22}, // vs_6_4 - {67077, 23}, // vs_6_5 - {67078, 24}, // vs_6_6 - {67079, 25}, // vs_6_7 - {67080, 26}, // vs_6_8 - {67081, 27}, // vs_6_9 - {132096, 28}, // gs_4_0 - {132097, 29}, // gs_4_1 - {132352, 30}, // gs_5_0 - {132353, 31}, // gs_5_1 - {132608, 32}, // gs_6_0 - {132609, 33}, // gs_6_1 - {132610, 34}, // gs_6_2 - {132611, 35}, // gs_6_3 - {132612, 36}, // gs_6_4 - {132613, 37}, // gs_6_5 - {132614, 38}, // gs_6_6 - {132615, 39}, // gs_6_7 - {132616, 40}, // gs_6_8 - {132617, 41}, // gs_6_9 - {197632, 42}, // hs_4_0 - {197633, 43}, // hs_4_1 - {197888, 44}, // hs_5_0 - {197889, 45}, // hs_5_1 - {198144, 46}, // hs_6_0 - {198145, 47}, // hs_6_1 - {198146, 48}, // hs_6_2 - {198147, 49}, // hs_6_3 - {198148, 50}, // hs_6_4 - {198149, 51}, // hs_6_5 - {198150, 52}, // hs_6_6 - {198151, 53}, // hs_6_7 - {198152, 54}, // hs_6_8 - {198153, 55}, // hs_6_9 - {263168, 56}, // ds_4_0 - {263169, 57}, // ds_4_1 - {263424, 58}, // ds_5_0 - {263425, 59}, // ds_5_1 - {263680, 60}, // ds_6_0 - {263681, 61}, // ds_6_1 - {263682, 62}, // ds_6_2 - {263683, 63}, // ds_6_3 - {263684, 64}, // ds_6_4 - {263685, 65}, // ds_6_5 - {263686, 66}, // ds_6_6 - {263687, 67}, // ds_6_7 - {263688, 68}, // ds_6_8 - {263689, 69}, // ds_6_9 - {328704, 70}, // cs_4_0 - {328705, 71}, // cs_4_1 - {328960, 72}, // cs_5_0 - {328961, 73}, // cs_5_1 - {329216, 74}, // cs_6_0 - {329217, 75}, // cs_6_1 - {329218, 76}, // cs_6_2 - {329219, 77}, // cs_6_3 - {329220, 78}, // cs_6_4 - {329221, 79}, // cs_6_5 - {329222, 80}, // cs_6_6 - {329223, 81}, // cs_6_7 - {329224, 82}, // cs_6_8 - {329225, 83}, // cs_6_9 - {394241, 84}, // lib_4_1 - {394497, 85}, // lib_5_1 - {394753, 86}, // lib_6_1 - {394754, 87}, // lib_6_2 - {394755, 88}, // lib_6_3 - {394756, 89}, // lib_6_4 - {394757, 90}, // lib_6_5 - {394758, 91}, // lib_6_6 - {394759, 92}, // lib_6_7 - {394760, 93}, // lib_6_8 - {394761, 94}, // lib_6_9 + {1024, 0}, // ps_4_0 + {1025, 1}, // ps_4_1 + {1280, 2}, // ps_5_0 + {1281, 3}, // ps_5_1 + {1536, 4}, // ps_6_0 + {1537, 5}, // ps_6_1 + {1538, 6}, // ps_6_2 + {1539, 7}, // ps_6_3 + {1540, 8}, // ps_6_4 + {1541, 9}, // ps_6_5 + {1542, 10}, // ps_6_6 + {1543, 11}, // ps_6_7 + {1544, 12}, // ps_6_8 + {1545, 13}, // ps_6_9 + {1546, 14}, // ps_6_10 + {66560, 15}, // vs_4_0 + {66561, 16}, // vs_4_1 + {66816, 17}, // vs_5_0 + {66817, 18}, // vs_5_1 + {67072, 19}, // vs_6_0 + {67073, 20}, // vs_6_1 + {67074, 21}, // vs_6_2 + {67075, 22}, // vs_6_3 + {67076, 23}, // vs_6_4 + {67077, 24}, // vs_6_5 + {67078, 25}, // vs_6_6 + {67079, 26}, // vs_6_7 + {67080, 27}, // vs_6_8 + {67081, 28}, // vs_6_9 + {67082, 29}, // vs_6_10 + {132096, 30}, // gs_4_0 + {132097, 31}, // gs_4_1 + {132352, 32}, // gs_5_0 + {132353, 33}, // gs_5_1 + {132608, 34}, // gs_6_0 + {132609, 35}, // gs_6_1 + {132610, 36}, // gs_6_2 + {132611, 37}, // gs_6_3 + {132612, 38}, // gs_6_4 + {132613, 39}, // gs_6_5 + {132614, 40}, // gs_6_6 + {132615, 41}, // gs_6_7 + {132616, 42}, // gs_6_8 + {132617, 43}, // gs_6_9 + {132618, 44}, // gs_6_10 + {197632, 45}, // hs_4_0 + {197633, 46}, // hs_4_1 + {197888, 47}, // hs_5_0 + {197889, 48}, // hs_5_1 + {198144, 49}, // hs_6_0 + {198145, 50}, // hs_6_1 + {198146, 51}, // hs_6_2 + {198147, 52}, // hs_6_3 + {198148, 53}, // hs_6_4 + {198149, 54}, // hs_6_5 + {198150, 55}, // hs_6_6 + {198151, 56}, // hs_6_7 + {198152, 57}, // hs_6_8 + {198153, 58}, // hs_6_9 + {198154, 59}, // hs_6_10 + {263168, 60}, // ds_4_0 + {263169, 61}, // ds_4_1 + {263424, 62}, // ds_5_0 + {263425, 63}, // ds_5_1 + {263680, 64}, // ds_6_0 + {263681, 65}, // ds_6_1 + {263682, 66}, // ds_6_2 + {263683, 67}, // ds_6_3 + {263684, 68}, // ds_6_4 + {263685, 69}, // ds_6_5 + {263686, 70}, // ds_6_6 + {263687, 71}, // ds_6_7 + {263688, 72}, // ds_6_8 + {263689, 73}, // ds_6_9 + {263690, 74}, // ds_6_10 + {328704, 75}, // cs_4_0 + {328705, 76}, // cs_4_1 + {328960, 77}, // cs_5_0 + {328961, 78}, // cs_5_1 + {329216, 79}, // cs_6_0 + {329217, 80}, // cs_6_1 + {329218, 81}, // cs_6_2 + {329219, 82}, // cs_6_3 + {329220, 83}, // cs_6_4 + {329221, 84}, // cs_6_5 + {329222, 85}, // cs_6_6 + {329223, 86}, // cs_6_7 + {329224, 87}, // cs_6_8 + {329225, 88}, // cs_6_9 + {329226, 89}, // cs_6_10 + {394241, 90}, // lib_4_1 + {394497, 91}, // lib_5_1 + {394753, 92}, // lib_6_1 + {394754, 93}, // lib_6_2 + {394755, 94}, // lib_6_3 + {394756, 95}, // lib_6_4 + {394757, 96}, // lib_6_5 + {394758, 97}, // lib_6_6 + {394759, 98}, // lib_6_7 + {394760, 99}, // lib_6_8 + {394761, 100}, // lib_6_9 + {394762, 101}, // lib_6_10 // lib_6_x is for offline linking only, and relaxes restrictions - {394767, 95}, // lib_6_x - {853509, 96}, // ms_6_5 - {853510, 97}, // ms_6_6 - {853511, 98}, // ms_6_7 - {853512, 99}, // ms_6_8 - {853513, 100}, // ms_6_9 - {919045, 101}, // as_6_5 - {919046, 102}, // as_6_6 - {919047, 103}, // as_6_7 - {919048, 104}, // as_6_8 - {919049, 105}, // as_6_9 + {394767, 102}, // lib_6_x + {853509, 103}, // ms_6_5 + {853510, 104}, // ms_6_6 + {853511, 105}, // ms_6_7 + {853512, 106}, // ms_6_8 + {853513, 107}, // ms_6_9 + {853514, 108}, // ms_6_10 + {919045, 109}, // as_6_5 + {919046, 110}, // as_6_6 + {919047, 111}, // as_6_7 + {919048, 112}, // as_6_8 + {919049, 113}, // as_6_9 + {919050, 114}, // as_6_10 }; unsigned hash = (unsigned)Kind << 16 | Major << 8 | Minor; auto pred = [](const std::pair &elem, unsigned val) { @@ -328,6 +338,9 @@ void ShaderModel::GetDxilVersion(unsigned &DxilMajor, case 9: DxilMinor = 9; break; + case 10: + DxilMinor = 10; + break; case kOfflineMinor: // Always update this to highest dxil version DxilMinor = DXIL::kDxilMinor; break; @@ -378,6 +391,9 @@ void ShaderModel::GetMinValidatorVersion(unsigned &ValMajor, case 9: ValMinor = 9; break; + case 10: + ValMinor = 10; + break; // VALRULE-TEXT:END case kOfflineMinor: ValMajor = 0; @@ -519,6 +535,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Pixel, 6, 7, "ps_6_7", 32, 8, true, true, UINT_MAX), SM(Kind::Pixel, 6, 8, "ps_6_8", 32, 8, true, true, UINT_MAX), SM(Kind::Pixel, 6, 9, "ps_6_9", 32, 8, true, true, UINT_MAX), + SM(Kind::Pixel, 6, 10, "ps_6_10", 32, 8, true, true, UINT_MAX), SM(Kind::Vertex, 4, 0, "vs_4_0", 16, 16, false, false, 0), SM(Kind::Vertex, 4, 1, "vs_4_1", 32, 32, false, false, 0), SM(Kind::Vertex, 5, 0, "vs_5_0", 32, 32, true, true, 64), @@ -533,6 +550,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Vertex, 6, 7, "vs_6_7", 32, 32, true, true, UINT_MAX), SM(Kind::Vertex, 6, 8, "vs_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Vertex, 6, 9, "vs_6_9", 32, 32, true, true, UINT_MAX), + SM(Kind::Vertex, 6, 10, "vs_6_10", 32, 32, true, true, UINT_MAX), SM(Kind::Geometry, 4, 0, "gs_4_0", 16, 32, false, false, 0), SM(Kind::Geometry, 4, 1, "gs_4_1", 32, 32, false, false, 0), SM(Kind::Geometry, 5, 0, "gs_5_0", 32, 32, true, true, 64), @@ -547,6 +565,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Geometry, 6, 7, "gs_6_7", 32, 32, true, true, UINT_MAX), SM(Kind::Geometry, 6, 8, "gs_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Geometry, 6, 9, "gs_6_9", 32, 32, true, true, UINT_MAX), + SM(Kind::Geometry, 6, 10, "gs_6_10", 32, 32, true, true, UINT_MAX), SM(Kind::Hull, 4, 0, "hs_4_0", 32, 32, false, false, 0), SM(Kind::Hull, 4, 1, "hs_4_1", 32, 32, false, false, 0), SM(Kind::Hull, 5, 0, "hs_5_0", 32, 32, true, true, 64), @@ -561,6 +580,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Hull, 6, 7, "hs_6_7", 32, 32, true, true, UINT_MAX), SM(Kind::Hull, 6, 8, "hs_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Hull, 6, 9, "hs_6_9", 32, 32, true, true, UINT_MAX), + SM(Kind::Hull, 6, 10, "hs_6_10", 32, 32, true, true, UINT_MAX), SM(Kind::Domain, 4, 0, "ds_4_0", 32, 32, false, false, 0), SM(Kind::Domain, 4, 1, "ds_4_1", 32, 32, false, false, 0), SM(Kind::Domain, 5, 0, "ds_5_0", 32, 32, true, true, 64), @@ -575,6 +595,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Domain, 6, 7, "ds_6_7", 32, 32, true, true, UINT_MAX), SM(Kind::Domain, 6, 8, "ds_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Domain, 6, 9, "ds_6_9", 32, 32, true, true, UINT_MAX), + SM(Kind::Domain, 6, 10, "ds_6_10", 32, 32, true, true, UINT_MAX), SM(Kind::Compute, 4, 0, "cs_4_0", 0, 0, false, false, 0), SM(Kind::Compute, 4, 1, "cs_4_1", 0, 0, false, false, 0), SM(Kind::Compute, 5, 0, "cs_5_0", 0, 0, true, true, 64), @@ -589,6 +610,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Compute, 6, 7, "cs_6_7", 0, 0, true, true, UINT_MAX), SM(Kind::Compute, 6, 8, "cs_6_8", 0, 0, true, true, UINT_MAX), SM(Kind::Compute, 6, 9, "cs_6_9", 0, 0, true, true, UINT_MAX), + SM(Kind::Compute, 6, 10, "cs_6_10", 0, 0, true, true, UINT_MAX), SM(Kind::Library, 4, 1, "lib_4_1", 0, 0, false, false, 0), SM(Kind::Library, 5, 1, "lib_5_1", 0, 0, true, true, 64), SM(Kind::Library, 6, 1, "lib_6_1", 0, 0, true, true, UINT_MAX), @@ -600,6 +622,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Library, 6, 7, "lib_6_7", 0, 0, true, true, UINT_MAX), SM(Kind::Library, 6, 8, "lib_6_8", 0, 0, true, true, UINT_MAX), SM(Kind::Library, 6, 9, "lib_6_9", 0, 0, true, true, UINT_MAX), + SM(Kind::Library, 6, 10, "lib_6_10", 0, 0, true, true, UINT_MAX), // lib_6_x is for offline linking only, and relaxes restrictions SM(Kind::Library, 6, kOfflineMinor, "lib_6_x", 32, 32, true, true, UINT_MAX), @@ -608,11 +631,13 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Mesh, 6, 7, "ms_6_7", 0, 0, true, true, UINT_MAX), SM(Kind::Mesh, 6, 8, "ms_6_8", 0, 0, true, true, UINT_MAX), SM(Kind::Mesh, 6, 9, "ms_6_9", 0, 0, true, true, UINT_MAX), + SM(Kind::Mesh, 6, 10, "ms_6_10", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 5, "as_6_5", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 6, "as_6_6", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 7, "as_6_7", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 8, "as_6_8", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 9, "as_6_9", 0, 0, true, true, UINT_MAX), + SM(Kind::Amplification, 6, 10, "as_6_10", 0, 0, true, true, UINT_MAX), // Values before Invalid must remain sorted by Kind, then Major, then Minor. SM(Kind::Invalid, 0, 0, "invalid", 0, 0, false, false, 0), // VALRULE-TEXT:END diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp index 796f017f09..b1b79bbae9 100644 --- a/tools/clang/unittests/HLSL/ValidationTest.cpp +++ b/tools/clang/unittests/HLSL/ValidationTest.cpp @@ -4883,8 +4883,9 @@ TEST_F(ValidationTest, CacheInitWithLowPrec) { } TEST_F(ValidationTest, PSVStringTableReorder) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileSource("float4 main(float a:A, float b:B) : SV_Target { return 1; }", @@ -5075,8 +5076,9 @@ class SemanticIndexRotator { }; TEST_F(ValidationTest, PSVSemanticIndexTableReorder) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXILValidation\\hs_signatures.hlsl", "hs_6_0", &pProgram); @@ -5550,8 +5552,9 @@ SimplePSV::SimplePSV(const DxilPartHeader *pPSVPart) { } TEST_F(ValidationTest, PSVContentValidationVS) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_VS.hlsl", "vs_6_8", &pProgram); @@ -5705,8 +5708,9 @@ TEST_F(ValidationTest, PSVContentValidationVS) { } TEST_F(ValidationTest, PSVContentValidationHS) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_HS.hlsl", "hs_6_8", &pProgram); @@ -5854,8 +5858,9 @@ TEST_F(ValidationTest, PSVContentValidationHS) { } TEST_F(ValidationTest, PSVContentValidationDS) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_DS.hlsl", "ds_6_8", &pProgram); @@ -6010,8 +6015,9 @@ TEST_F(ValidationTest, PSVContentValidationDS) { } TEST_F(ValidationTest, PSVContentValidationGS) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_GS.hlsl", "gs_6_8", &pProgram); @@ -6097,8 +6103,9 @@ TEST_F(ValidationTest, PSVContentValidationGS) { } TEST_F(ValidationTest, PSVContentValidationPS) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_PS.hlsl", "ps_6_8", &pProgram); @@ -6181,8 +6188,9 @@ TEST_F(ValidationTest, PSVContentValidationPS) { } TEST_F(ValidationTest, PSVContentValidationCS) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_CS.hlsl", "cs_6_8", &pProgram); @@ -6262,8 +6270,9 @@ TEST_F(ValidationTest, PSVContentValidationCS) { } TEST_F(ValidationTest, PSVContentValidationMS) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_MS.hlsl", "ms_6_8", &pProgram); @@ -6328,8 +6337,9 @@ TEST_F(ValidationTest, PSVContentValidationMS) { } TEST_F(ValidationTest, PSVContentValidationAS) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_AS.hlsl", "as_6_8", &pProgram); @@ -6429,8 +6439,9 @@ struct SimpleContainer { }; TEST_F(ValidationTest, WrongPSVSize) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_AS.hlsl", "as_6_8", &pProgram); @@ -6517,8 +6528,9 @@ TEST_F(ValidationTest, WrongPSVSize) { } TEST_F(ValidationTest, WrongPSVSizeOnZeros) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram; CompileFile(L"..\\DXC\\dumpPSV_PS.hlsl", "ps_6_8", &pProgram); @@ -6612,8 +6624,9 @@ TEST_F(ValidationTest, WrongPSVSizeOnZeros) { } TEST_F(ValidationTest, WrongPSVVersion) { - if (m_ver.SkipDxilVersion(1, 10)) - return; + if (!m_ver.m_InternalValidator) + if (m_ver.SkipDxilVersion(1, 8)) + return; CComPtr pProgram60; std::vector args; diff --git a/tools/clang/unittests/HLSLExec/CMakeLists.txt b/tools/clang/unittests/HLSLExec/CMakeLists.txt index 8a84c2db56..8282fd5282 100644 --- a/tools/clang/unittests/HLSLExec/CMakeLists.txt +++ b/tools/clang/unittests/HLSLExec/CMakeLists.txt @@ -12,7 +12,6 @@ add_clang_library(ExecHLSLTests SHARED LongVectors.cpp HlslExecTestUtils.cpp ExecHLSLTests.rc - TaefTest.cpp ) add_dependencies(ClangUnitTests ExecHLSLTests) diff --git a/tools/clang/unittests/HLSLExec/TaefTest.cpp b/tools/clang/unittests/HLSLExec/TaefTest.cpp deleted file mode 100644 index 56d2e1dc60..0000000000 --- a/tools/clang/unittests/HLSLExec/TaefTest.cpp +++ /dev/null @@ -1,2804 +0,0 @@ -#ifndef NOMINMAX -#define NOMINMAX 1 -#endif - -#define INLINE_TEST_METHOD_MARKUP -#include - -#include "LongVectorTestData.h" - -#include "ShaderOpTest.h" -#include "dxc/Support/Global.h" - -#include "HlslTestUtils.h" - -#include "HlslExecTestUtils.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace LongVector { - -// -// Data Types -// - -template constexpr bool is16BitType() { - return std::is_same_v || std::is_same_v || - std::is_same_v; -} - -struct DataType { - const char *HLSLTypeString; - bool Is16Bit; - size_t HLSLSizeInBytes; -}; - -template const DataType &getDataType() { - static_assert(false && "Unknown data type"); -} - -#define DATA_TYPE(TYPE, HLSL_STRING, HLSL_SIZE) \ - template <> const DataType &getDataType() { \ - static DataType DataType{HLSL_STRING, is16BitType(), HLSL_SIZE}; \ - return DataType; \ - } - -DATA_TYPE(HLSLBool_t, "bool", 4) -DATA_TYPE(int16_t, "int16_t", 2) -DATA_TYPE(int32_t, "int", 4) -DATA_TYPE(int64_t, "int64_t", 8) -DATA_TYPE(uint16_t, "uint16_t", 2) -DATA_TYPE(uint32_t, "uint32_t", 4) -DATA_TYPE(uint64_t, "uint64_t", 8) -DATA_TYPE(HLSLHalf_t, "half", 2) -DATA_TYPE(float, "float", 4) -DATA_TYPE(double, "double", 8) - -#undef DATA_TYPE - -template constexpr bool isFloatingPointType() { - return std::is_same_v || std::is_same_v || - std::is_same_v; -} - -// -// Operation Types -// - -enum class OpType : unsigned { -#define OP(GROUP, SYMBOL, ARITY, INTRINSIC, OPERATOR, DEFINES, SHADER_NAME, \ - INPUT_SET_1, INPUT_SET_2, INPUT_SET_3) \ - SYMBOL, -#include "LongVectorOps.def" - NumOpTypes -}; - -struct Operation { - size_t Arity; - const char *Intrinsic; - const char *Operator; - const char *ExtraDefines; - const char *ShaderName; - InputSet InputSets[3]; - OpType Type; -}; - -static constexpr Operation Operations[] = { - -#define OP(GROUP, SYMBOL, ARITY, INTRINSIC, OPERATOR, DEFINES, SHADER_NAME, \ - INPUT_SET_1, INPUT_SET_2, INPUT_SET_3) \ - {ARITY, \ - INTRINSIC, \ - OPERATOR, \ - DEFINES, \ - SHADER_NAME, \ - {InputSet::INPUT_SET_1, InputSet::INPUT_SET_2, InputSet::INPUT_SET_3}, \ - OpType::SYMBOL}, -#include "LongVectorOps.def" -}; - -constexpr const Operation &getOperation(OpType Op) { - if (Op < OpType::NumOpTypes) - return Operations[unsigned(Op)]; - std::abort(); -} - -static const std::unordered_set LoadAndStoreOpTypes = { - OpType::LoadAndStore_RDH_BAB_UAV, OpType::LoadAndStore_RDH_BAB_SRV, - OpType::LoadAndStore_DT_BAB_UAV, OpType::LoadAndStore_DT_BAB_SRV, - OpType::LoadAndStore_RD_BAB_UAV, OpType::LoadAndStore_RD_BAB_SRV, - OpType::LoadAndStore_RDH_SB_UAV, OpType::LoadAndStore_RDH_SB_SRV, - OpType::LoadAndStore_DT_SB_UAV, OpType::LoadAndStore_DT_SB_SRV, - OpType::LoadAndStore_RD_SB_UAV, OpType::LoadAndStore_RD_SB_SRV, -}; - -static bool IsStructuredBufferLoadAndStoreOp(OpType Op) { - switch (Op) { - case OpType::LoadAndStore_RDH_SB_UAV: - case OpType::LoadAndStore_RDH_SB_SRV: - case OpType::LoadAndStore_DT_SB_UAV: - case OpType::LoadAndStore_DT_SB_SRV: - case OpType::LoadAndStore_RD_SB_UAV: - case OpType::LoadAndStore_RD_SB_SRV: - return true; - default: - return false; - } -} - -// Helper to fill the test data from the shader buffer based on type. -// Convenient to be used when copying HLSL*_t types so we can use the -// underlying type. -template -void fillLongVectorDataFromShaderBuffer(const MappedData &ShaderBuffer, - std::vector &TestData, - size_t NumElements) { - - if constexpr (std::is_same_v) { - auto *ShaderBufferPtr = - static_cast(ShaderBuffer.data()); - for (size_t I = 0; I < NumElements; I++) - TestData.push_back(HLSLHalf_t::FromHALF(ShaderBufferPtr[I])); - return; - } - - if constexpr (std::is_same_v) { - auto *ShaderBufferPtr = static_cast(ShaderBuffer.data()); - for (size_t I = 0; I < NumElements; I++) - // HLSLBool_t has a int32_t based constructor. - TestData.push_back(ShaderBufferPtr[I]); - return; - } - - auto *ShaderBufferPtr = static_cast(ShaderBuffer.data()); - for (size_t I = 0; I < NumElements; I++) - TestData.push_back(ShaderBufferPtr[I]); - return; -} - -template -void logLongVector(const std::vector &Values, const std::wstring &Name) { - hlsl_test::LogCommentFmt(L"LongVector Name: %s", Name.c_str()); - - const size_t LoggingWidth = 40; - - std::wstringstream Wss(L""); - Wss << L"LongVector Values: "; - Wss << L"["; - const size_t NumElements = Values.size(); - for (size_t I = 0; I < NumElements; I++) { - if (I % LoggingWidth == 0 && I != 0) - Wss << L"\n "; - Wss << Values[I]; - if (I != NumElements - 1) - Wss << L", "; - } - Wss << L" ]"; - - hlsl_test::LogCommentFmt(Wss.str().c_str()); -} - -enum class ValidationType { - Epsilon, - Ulp, -}; - -template -bool doValuesMatch(T A, T B, double Tolerance, ValidationType) { - if (Tolerance == 0.0) - return A == B; - - T Diff = A > B ? A - B : B - A; - return Diff <= Tolerance; -} - -bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double, ValidationType) { - return A == B; -} - -bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance, - ValidationType ValidationType) { - switch (ValidationType) { - case ValidationType::Epsilon: - return CompareHalfEpsilon(A.Val, B.Val, static_cast(Tolerance)); - case ValidationType::Ulp: - return CompareHalfULP(A.Val, B.Val, static_cast(Tolerance)); - default: - hlsl_test::LogErrorFmt( - L"Invalid ValidationType. Expecting Epsilon or ULP."); - return false; - } -} - -bool doValuesMatch(float A, float B, double Tolerance, - ValidationType ValidationType) { - switch (ValidationType) { - case ValidationType::Epsilon: - return CompareFloatEpsilon(A, B, static_cast(Tolerance)); - case ValidationType::Ulp: { - // Tolerance is in ULPs. Convert to int for the comparison. - const int IntTolerance = static_cast(Tolerance); - return CompareFloatULP(A, B, IntTolerance); - }; - default: - hlsl_test::LogErrorFmt( - L"Invalid ValidationType. Expecting Epsilon or ULP."); - return false; - } -} - -bool doValuesMatch(double A, double B, double Tolerance, - ValidationType ValidationType) { - switch (ValidationType) { - case ValidationType::Epsilon: - return CompareDoubleEpsilon(A, B, Tolerance); - case ValidationType::Ulp: { - // Tolerance is in ULPs. Convert to int64_t for the comparison. - const int64_t IntTolerance = static_cast(Tolerance); - return CompareDoubleULP(A, B, IntTolerance); - }; - default: - hlsl_test::LogErrorFmt( - L"Invalid ValidationType. Expecting Epsilon or ULP."); - return false; - } -} - -template -bool doVectorsMatch(const std::vector &ActualValues, - const std::vector &ExpectedValues, double Tolerance, - ValidationType ValidationType, bool VerboseLogging) { - - DXASSERT( - ActualValues.size() == ExpectedValues.size(), - "Programmer error: Actual and Expected vectors must be the same size."); - - if (VerboseLogging) { - logLongVector(ActualValues, L"ActualValues"); - logLongVector(ExpectedValues, L"ExpectedValues"); - - hlsl_test::LogCommentFmt( - L"ValidationType: %s, Tolerance: %17g", - ValidationType == ValidationType::Epsilon ? L"Epsilon" : L"ULP", - Tolerance); - } - - // Stash mismatched indexes for easy failure logging later - std::vector MismatchedIndexes; - for (size_t I = 0; I < ActualValues.size(); I++) { - if (!doValuesMatch(ActualValues[I], ExpectedValues[I], Tolerance, - ValidationType)) - MismatchedIndexes.push_back(I); - } - - if (MismatchedIndexes.empty()) - return true; - - if (!MismatchedIndexes.empty()) { - for (size_t Index : MismatchedIndexes) { - std::wstringstream Wss(L""); - Wss << std::setprecision(15); // Set precision for floating point types - Wss << L"Mismatch at Index: " << Index; - Wss << L" Actual Value:" << ActualValues[Index] << ","; - Wss << L" Expected Value:" << ExpectedValues[Index]; - hlsl_test::LogErrorFmt(Wss.str().c_str()); - } - } - - return false; -} - -static WEX::Common::String getInputValueSetName(size_t Index) { - using WEX::Common::String; - using WEX::TestExecution::TestData; - - DXASSERT(Index >= 0 && Index <= 9, "Only single digit indices supported"); - - String ParameterName = L"InputValueSetName"; - ParameterName.Append((wchar_t)(L'1' + Index)); - - String ValueSetName; - if (FAILED(TestData::TryGetValue(ParameterName, ValueSetName))) { - String Name = L"DefaultInputValueSet"; - Name.Append((wchar_t)(L'1' + Index)); - return Name; - } - - return ValueSetName; -} - -std::string getCompilerOptionsString( - const Operation &Operation, const DataType &OpDataType, - const DataType &OutDataType, size_t VectorSize, - std::optional AdditionalOptions = std::nullopt) { - std::stringstream CompilerOptions; - - if (OpDataType.Is16Bit || OutDataType.Is16Bit) - CompilerOptions << " -enable-16bit-types"; - - CompilerOptions << " -DTYPE=" << OpDataType.HLSLTypeString; - CompilerOptions << " -DNUM=" << VectorSize; - - CompilerOptions << " -DOPERATOR="; - CompilerOptions << Operation.Operator; - - CompilerOptions << " -DFUNC="; - CompilerOptions << Operation.Intrinsic; - - CompilerOptions << " " << Operation.ExtraDefines; - - CompilerOptions << " -DOUT_TYPE=" << OutDataType.HLSLTypeString; - - CompilerOptions << " -DBASIC_OP_TYPE=0x" << std::hex << Operation.Arity; - - if (AdditionalOptions) - CompilerOptions << " " << AdditionalOptions.value(); - - return CompilerOptions.str(); -} - -// Helper to fill the shader buffer based on type. Convenient to be used when -// copying HLSL*_t types so we can copy the underlying type directly instead -// of the struct. -template -void fillShaderBufferFromLongVectorData(std::vector &ShaderBuffer, - const std::vector &TestData) { - - // Note: DataSize for HLSLHalf_t and HLSLBool_t may be larger than the - // underlying type in some cases. Thats fine. Resize just makes sure we have - // enough space. - const size_t NumElements = TestData.size(); - [[maybe_unused]] const size_t DataSize = sizeof(T) * NumElements; - - // Ensure the shader buffer is large enough. It should be pre-sized based on - // the D3D12_RESOURCE_DESC for the associated D3D12_RESOURCE. - DXASSERT_NOMSG(ShaderBuffer.size() >= DataSize); - - if constexpr (std::is_same_v) { - auto *ShaderBufferPtr = - reinterpret_cast(ShaderBuffer.data()); - for (size_t I = 0; I < NumElements; I++) - ShaderBufferPtr[I] = TestData[I].Val; - return; - } - - if constexpr (std::is_same_v) { - auto *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); - for (size_t I = 0; I < NumElements; I++) - ShaderBufferPtr[I] = TestData[I].Val; - return; - } - - auto *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); - for (size_t I = 0; I < NumElements; I++) - ShaderBufferPtr[I] = TestData[I]; -} - -// -// Run the test. Return std::nullopt if the test was skipped, otherwise returns -// the output buffer that was populated by the shader. -// -template using InputSets = std::vector>; - -template -std::optional> -runTest(ID3D12Device *D3DDevice, bool VerboseLogging, - const Operation &Operation, const InputSets &Inputs, - size_t ExpectedOutputSize, - std::optional AdditionalCompilerOptions) { - DXASSERT_NOMSG(Inputs.size() == Operation.Arity); - - if (VerboseLogging) { - for (size_t I = 0; I < Operation.Arity; ++I) { - std::wstring Name = L"InputVector"; - Name += (wchar_t)(L'1' + I); - logLongVector(Inputs[I], Name); - } - } - - const DataType &OpDataType = getDataType(); - const DataType &OutDataType = getDataType(); - - // We have to construct the string outside of the lambda. Otherwise it's - // cleaned up when the lambda finishes executing but before the shader runs. - std::string CompilerOptionsString = - getCompilerOptionsString(Operation, OpDataType, OutDataType, - Inputs[0].size(), AdditionalCompilerOptions); - - dxc::SpecificDllLoader DxilDllLoader; - CComPtr TestXML; - readHlslDataIntoNewStream(L"ShaderOpArith.xml", &TestXML, DxilDllLoader); - auto ShaderOpSet = std::make_shared(); - st::ParseShaderOpSetFromStream(TestXML, ShaderOpSet.get()); - - if (LoadAndStoreOpTypes.count(Operation.Type) > 0) - configureLoadAndStoreShaderOp(Operation, OpDataType, Inputs[0].size(), - sizeof(T), ShaderOpSet.get()); - - // RunShaderOpTest is a helper function that handles resource creation - // and setup. It also handles the shader compilation and execution. It takes - // a callback that is called when the shader is compiled, but before it is - // executed. - std::shared_ptr TestResult = - st::RunShaderOpTestAfterParse( - D3DDevice, DxilDllLoader, Operation.ShaderName, - [&](LPCSTR Name, std::vector &ShaderData, - st::ShaderOp *ShaderOp) { - if (VerboseLogging) - hlsl_test::LogCommentFmt( - L"RunShaderOpTest CallBack. Resource Name: %S", Name); - - // This callback is called once for each resource defined for - // "LongVectorOp" in ShaderOpArith.xml. All callbacks are fired for - // each resource. We determine whether they are applicable to the - // test case when they run. - - // Process the callback for the OutputVector resource. - if (_stricmp(Name, "OutputVector") == 0) { - // We only need to set the compiler options string once. So this - // is a convenient place to do it. - ShaderOp->Shaders.at(0).Arguments = CompilerOptionsString.c_str(); - - return; - } - - // Process the callback for the InputVector[1-3] resources - for (size_t I = 0; I < 3; ++I) { - std::string BufferName = "InputVector"; - BufferName += (char)('1' + I); - if (_stricmp(Name, BufferName.c_str()) == 0) { - if (I < Operation.Arity) - fillShaderBufferFromLongVectorData(ShaderData, Inputs[I]); - return; - } - } - - LOG_ERROR_FMT_THROW( - L"RunShaderOpTest CallBack. Unexpected Resource Name: %S", - Name); - }, - std::move(ShaderOpSet)); - - // Extract the data from the shader result - MappedData ShaderOutData; - - char *ReadBackName = "OutputVector"; - TestResult->Test->GetReadBackData(ReadBackName, &ShaderOutData); - - std::vector OutData; - fillLongVectorDataFromShaderBuffer(ShaderOutData, OutData, - ExpectedOutputSize); - - return OutData; -} - -// LoadAndStore operations dynamically configure the UAV/SRV formats and sizes -// based on the vector size and data type. We also adjust the format and flags -// based on whether we're using raw buffers or structured buffers. -void configureLoadAndStoreShaderOp(const Operation &Operation, - const DataType &OpDataType, - size_t VectorSize, size_t ElementSize, - st::ShaderOpSet *ShaderOpSet) { - - DXASSERT_NOMSG(LoadAndStoreOpTypes.count(Operation.Type) > 0); - - st::ShaderOp *ShaderOp = ShaderOpSet->GetShaderOp(Operation.ShaderName); - DXASSERT(ShaderOp, "Invalid ShaderOp name"); - - // When using DXGI_FORMAT_R32_TYPELESS (raw buffer cases) we need to compute - // the number of 32-bit elements required to hold the vector. - const UINT Num32BitElements = - static_cast((VectorSize * OpDataType.HLSLSizeInBytes + 3) / 4); - - const UINT StructureByteStride = static_cast(ElementSize * VectorSize); - - const bool IsSB = IsStructuredBufferLoadAndStoreOp(Operation.Type); - if (!ShaderOp->DescriptorHeaps.empty()) { - DXASSERT(ShaderOp->DescriptorHeaps.size() == 1, - "Programmer error: Expecting a single descriptor heap for " - "LoadAndStore tests"); - - for (auto &D : ShaderOp->DescriptorHeaps[0].Descriptors) { - const bool IsUAV = (_stricmp(D.Kind, "UAV") == 0); - DXASSERT(IsUAV || (_stricmp(D.Kind, "SRV") == 0), - "Programmer error: Expecting UAV or SRV descriptors only"); - - if (IsSB) { - if (IsUAV) { - D.UavDesc.Format = DXGI_FORMAT_UNKNOWN; - D.UavDesc.Buffer.NumElements = 1; // One StructuredBuffer - D.UavDesc.Buffer.StructureByteStride = StructureByteStride; - } else { - D.SrvDesc.Format = DXGI_FORMAT_UNKNOWN; - D.SrvDesc.Buffer.NumElements = 1; // One StructuredBuffer - D.SrvDesc.Buffer.StructureByteStride = StructureByteStride; - } - } else { // Raw buffer - if (IsUAV) { - D.UavDesc.Format = DXGI_FORMAT_R32_TYPELESS; - D.UavDesc.Buffer.NumElements = Num32BitElements; - D.UavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; - } else { - D.SrvDesc.Format = DXGI_FORMAT_R32_TYPELESS; - D.SrvDesc.Buffer.NumElements = Num32BitElements; - D.SrvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; - } - } - } - } - - const UINT BufferWidth = IsSB ? StructureByteStride : (Num32BitElements * 4); - for (auto &R : ShaderOp->Resources) - R.Desc.Width = BufferWidth; -} - -template -std::vector buildTestInput(InputSet InputSet, size_t SizeToTest) { - const std::vector &RawValueSet = getInputSet(InputSet); - - std::vector ValueSet; - ValueSet.reserve(SizeToTest); - for (size_t I = 0; I < SizeToTest; ++I) - ValueSet.push_back(RawValueSet[I % RawValueSet.size()]); - - return ValueSet; -} - -template -InputSets buildTestInputs(size_t VectorSize, const InputSet OpInputSets[3], - size_t Arity) { - InputSets Inputs; - - for (size_t I = 0; I < Arity; ++I) - Inputs.push_back(buildTestInput(OpInputSets[I], VectorSize)); - - return Inputs; -} - -struct ValidationConfig { - double Tolerance = 0.0; - ValidationType Type = ValidationType::Epsilon; - - static ValidationConfig Epsilon(double Tolerance) { - return ValidationConfig{Tolerance, ValidationType::Epsilon}; - } - - static ValidationConfig Ulp(double Tolerance) { - return ValidationConfig{Tolerance, ValidationType::Ulp}; - } -}; - -template -void runAndVerify( - ID3D12Device *D3DDevice, bool VerboseLogging, const Operation &Operation, - const InputSets &Inputs, const std::vector &Expected, - const ValidationConfig &ValidationConfig, - std::optional AdditionalCompilerOptions = std::nullopt) { - - std::optional> Actual = - runTest(D3DDevice, VerboseLogging, Operation, Inputs, - Expected.size(), AdditionalCompilerOptions); - - // If the test didn't run, don't verify anything. - if (!Actual) - return; - - VERIFY_IS_TRUE(doVectorsMatch(*Actual, Expected, ValidationConfig.Tolerance, - ValidationConfig.Type, VerboseLogging)); -} - -// -// Op definitions. The main goal of this is to specify the validation -// configuration and how to build the Expected results for a given Op. -// -// Most Ops have a 1:1 mapping of input to output, and so can use the generic -// ExpectedBuilder. -// -// Ops that differ from this pattern can specialize ExpectedBuilder as -// necessary. -// - -// Op - specializations are expected to have a ValidationConfig member and an -// appropriate overloaded function call operator. -template struct Op; - -// ExpectedBuilder - specializations are expected to have buildExpectedData -// member functions. -template struct ExpectedBuilder; - -// Default Validation configuration - ULP for floating point types, exact -// matches for everything else. -template struct DefaultValidation { - ValidationConfig ValidationConfig; - - DefaultValidation() { - if constexpr (isFloatingPointType()) - ValidationConfig = ValidationConfig::Ulp(1.0f); - } -}; - -// Strict Validation - Defaults to exact matches. -// Tolerance can be set to a non-zero value to allow for a wider range. -struct StrictValidation { - ValidationConfig ValidationConfig; -}; - -// Macros to build up common patterns of Op definitions - -#define OP_1(OP, VALIDATION, IMPL) \ - template struct Op : VALIDATION { \ - T operator()(T A) { return IMPL; } \ - } - -#define OP_2(OP, VALIDATION, IMPL) \ - template struct Op : VALIDATION { \ - T operator()(T A, T B) { return IMPL; } \ - } - -#define OP_3(OP, VALIDATION, IMPL) \ - template struct Op : VALIDATION { \ - T operator()(T A, T B, T C) { return IMPL; } \ - } - -#define STRICT_OP_1(OP, IMPL) OP_1(OP, StrictValidation, IMPL) - -#define DEFAULT_OP_1(OP, IMPL) OP_1(OP, DefaultValidation, IMPL) -#define DEFAULT_OP_2(OP, IMPL) OP_2(OP, DefaultValidation, IMPL) -#define DEFAULT_OP_3(OP, IMPL) OP_3(OP, DefaultValidation, IMPL) - -// -// TernaryMath -// - -DEFAULT_OP_3(OpType::Mad, (A * B + C)); -DEFAULT_OP_3(OpType::Fma, (A * B + C)); - -// -// BinaryMath -// - -DEFAULT_OP_2(OpType::Add, (A + B)); -DEFAULT_OP_2(OpType::Subtract, (A - B)); -DEFAULT_OP_2(OpType::Multiply, (A * B)); -DEFAULT_OP_2(OpType::Divide, (A / B)); - -template struct Op : DefaultValidation { - T operator()(T A, T B) { - if constexpr (std::is_same_v) - return std::fmod(A, B); - else - return A % B; - } -}; - -DEFAULT_OP_2(OpType::Min, (std::min(A, B))); -DEFAULT_OP_2(OpType::Max, (std::max(A, B))); -DEFAULT_OP_2(OpType::Ldexp, (A * static_cast(std::pow(2.0f, B)))); - -// -// Bitwise -// - -template T Saturate(T A) { - if (A < static_cast(0.0f)) - return static_cast(0.0f); - if (A > static_cast(1.0f)) - return static_cast(1.0f); - return A; -} - -template T ReverseBits(T A) { - T Result = 0; - const size_t NumBits = sizeof(T) * 8; - for (size_t I = 0; I < NumBits; I++) { - Result <<= 1; - Result |= (A & 1); - A >>= 1; - } - return Result; -} - -template uint32_t CountBits(T A) { - return static_cast(std::bitset(A).count()); -} - -// General purpose bit scan from the MSB. Based on the value of LookingForZero -// returns the index of the first high/low bit found. -template uint32_t ScanFromMSB(T A, bool LookingForZero) { - if (A == 0) - return std::numeric_limits::max(); - - constexpr uint32_t NumBits = sizeof(T) * 8; - for (int32_t I = NumBits - 1; I >= 0; --I) { - bool BitSet = (A & (static_cast(1) << I)) != 0; - if (BitSet != LookingForZero) - return static_cast(I); - } - return std::numeric_limits::max(); -} - -template -typename std::enable_if::value, uint32_t>::type -FirstBitHigh(T A) { - const bool IsNegative = A < 0; - return ScanFromMSB(A, IsNegative); -} - -template -typename std::enable_if::value, uint32_t>::type -FirstBitHigh(T A) { - return ScanFromMSB(A, false); -} - -template uint32_t FirstBitLow(T A) { - const uint32_t NumBits = sizeof(T) * 8; - - if (A == 0) - return std::numeric_limits::max(); - - for (uint32_t I = 0; I < NumBits; ++I) { - if (A & (static_cast(1) << I)) - return static_cast(I); - } - - return std::numeric_limits::max(); -} - -DEFAULT_OP_2(OpType::And, (A & B)); -DEFAULT_OP_2(OpType::Or, (A | B)); -DEFAULT_OP_2(OpType::Xor, (A ^ B)); -DEFAULT_OP_2(OpType::LeftShift, (A << B)); -DEFAULT_OP_2(OpType::RightShift, (A >> B)); -DEFAULT_OP_1(OpType::Saturate, (Saturate(A))); -DEFAULT_OP_1(OpType::ReverseBits, (ReverseBits(A))); - -#define BITWISE_OP(OP, IMPL) \ - template struct Op : StrictValidation { \ - uint32_t operator()(T A) { return IMPL; } \ - } - -BITWISE_OP(OpType::CountBits, (CountBits(A))); -BITWISE_OP(OpType::FirstBitHigh, (FirstBitHigh(A))); -BITWISE_OP(OpType::FirstBitLow, (FirstBitLow(A))); - -#undef BITWISE_OP - -// -// Unary -// - -DEFAULT_OP_1(OpType::Initialize, (A)); - -template -struct Op : DefaultValidation {}; - -template -static std::vector buildExpectedArrayAccess(const InputSets &Inputs) { - const size_t VectorSize = Inputs[0].size(); - std::vector Expected; - const size_t IndexCount = 6; - Expected.resize(VectorSize); - - size_t IndexList[IndexCount] = { - 0, VectorSize - 1, 1, VectorSize - 2, VectorSize / 2, VectorSize / 2 + 1}; - size_t End = std::min(VectorSize, IndexCount); - for (size_t I = 0; I < End; ++I) - Expected[IndexList[I]] = Inputs[0][IndexList[I]]; - - return Expected; -} - -template -struct ExpectedBuilder { - static std::vector - buildExpected(Op, - const InputSets &Inputs) { - DXASSERT_NOMSG(Inputs.size() == 1); - return buildExpectedArrayAccess(Inputs); - } -}; - -template -struct Op : DefaultValidation {}; - -template -struct ExpectedBuilder { - static std::vector - buildExpected(Op, - const InputSets &Inputs) { - DXASSERT_NOMSG(Inputs.size() == 2); - return buildExpectedArrayAccess(Inputs); - } -}; - -// -// Cast -// - -#define CAST_OP(OP, TYPE, IMPL) \ - template struct Op : StrictValidation { \ - TYPE operator()(T A) { return IMPL; } \ - }; - -template HLSLBool_t CastToBool(T A) { return (bool)A; } -template <> HLSLBool_t CastToBool(HLSLHalf_t A) { return (bool)((float)A); } - -template HLSLHalf_t CastToFloat16(T A) { - return HLSLHalf_t(float(A)); -} - -template float CastToFloat32(T A) { return (float)A; } - -template double CastToFloat64(T A) { return (double)A; } -template <> double CastToFloat64(HLSLHalf_t A) { return (double)((float)A); } - -template int16_t CastToInt16(T A) { return (int16_t)A; } -template <> int16_t CastToInt16(HLSLHalf_t A) { return (int16_t)((float)A); } - -template int32_t CastToInt32(T A) { return (int32_t)A; } -template <> int32_t CastToInt32(HLSLHalf_t A) { return (int32_t)((float)A); } - -template int64_t CastToInt64(T A) { return (int64_t)A; } -template <> int64_t CastToInt64(HLSLHalf_t A) { return (int64_t)((float)A); } - -template uint16_t CastToUint16(T A) { return (uint16_t)A; } -template <> uint16_t CastToUint16(HLSLHalf_t A) { return (uint16_t)((float)A); } - -template uint32_t CastToUint32(T A) { return (uint32_t)A; } -template <> uint32_t CastToUint32(HLSLHalf_t A) { return (uint32_t)((float)A); } - -template uint64_t CastToUint64(T A) { return (uint64_t)A; } -template <> uint64_t CastToUint64(HLSLHalf_t A) { return (uint64_t)((float)A); } - -CAST_OP(OpType::CastToBool, HLSLBool_t, (CastToBool(A))); -CAST_OP(OpType::CastToInt16, int16_t, (CastToInt16(A))); -CAST_OP(OpType::CastToInt32, int32_t, (CastToInt32(A))); -CAST_OP(OpType::CastToInt64, int64_t, (CastToInt64(A))); -CAST_OP(OpType::CastToUint16, uint16_t, (CastToUint16(A))); -CAST_OP(OpType::CastToUint32, uint32_t, (CastToUint32(A))); -CAST_OP(OpType::CastToUint64, uint64_t, (CastToUint64(A))); -CAST_OP(OpType::CastToUint16_FromFP, uint16_t, (CastToUint16(A))); -CAST_OP(OpType::CastToUint32_FromFP, uint32_t, (CastToUint32(A))); -CAST_OP(OpType::CastToUint64_FromFP, uint64_t, (CastToUint64(A))); -CAST_OP(OpType::CastToFloat16, HLSLHalf_t, (CastToFloat16(A))); -CAST_OP(OpType::CastToFloat32, float, (CastToFloat32(A))); -CAST_OP(OpType::CastToFloat64, double, (CastToFloat64(A))); - -#undef CAST_OP - -// -// Trigonometric -// - -// All trigonometric ops are floating point types. These trig functions are -// defined to have a max absolute error of 0.0008 as per the D3D functional -// specs. An example with this spec for sin and cos is available here: -// https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#22.10.20 - -template struct TrigonometricValidation { - ValidationConfig ValidationConfig = ValidationConfig::Epsilon(0.0008f); -}; - -// Half precision trig functions have a larger tolerance due to their lower -// precision. Note that the D3D spec -// does not mention half precision trig functions. -template struct TrigonometricValidation { - ValidationConfig ValidationConfig = ValidationConfig::Epsilon(0.003f); -}; - -// For the half precision trig functions with an infinite range in either -// direction we use 2 ULPs of tolerance instead. -template <> struct TrigonometricValidation { - ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); -}; - -template <> struct TrigonometricValidation { - ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); -}; - -template <> struct TrigonometricValidation { - ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); -}; - -#define TRIG_OP(OP, IMPL) \ - template struct Op : TrigonometricValidation { \ - T operator()(T A) { return IMPL; } \ - } - -TRIG_OP(OpType::Acos, (std::acos(A))); -TRIG_OP(OpType::Asin, (std::asin(A))); -TRIG_OP(OpType::Atan, (std::atan(A))); -TRIG_OP(OpType::Cos, (std::cos(A))); -TRIG_OP(OpType::Cosh, (std::cosh(A))); -TRIG_OP(OpType::Sin, (std::sin(A))); -TRIG_OP(OpType::Sinh, (std::sinh(A))); -TRIG_OP(OpType::Tan, (std::tan(A))); -TRIG_OP(OpType::Tanh, (std::tanh(A))); - -#undef TRIG_OP - -// -// AsType -// - -// We don't have std::bit_cast in C++17, so we define our own version. -template -typename std::enable_if::value && - std::is_trivially_copyable::value, - ToT>::type -bit_cast(const FromT &Src) { - ToT Dst; - std::memcpy(&Dst, &Src, sizeof(ToT)); - return Dst; -} - -#define AS_TYPE_OP(OP, TYPE, IMPL) \ - template struct Op : StrictValidation { \ - TYPE operator()(T A) { return IMPL; } \ - }; - -// asFloat16 - -template HLSLHalf_t asFloat16(T); -template <> HLSLHalf_t asFloat16(HLSLHalf_t A) { return A; } -template <> HLSLHalf_t asFloat16(int16_t A) { - return HLSLHalf_t::FromHALF(bit_cast(A)); -} -template <> HLSLHalf_t asFloat16(uint16_t A) { - return HLSLHalf_t::FromHALF(bit_cast(A)); -} - -AS_TYPE_OP(OpType::AsFloat16, HLSLHalf_t, (asFloat16(A))); - -// asInt16 - -template int16_t asInt16(T); -template <> int16_t asInt16(HLSLHalf_t A) { return bit_cast(A.Val); } -template <> int16_t asInt16(int16_t A) { return A; } -template <> int16_t asInt16(uint16_t A) { return bit_cast(A); } - -AS_TYPE_OP(OpType::AsInt16, int16_t, (asInt16(A))); - -// asUint16 - -template uint16_t asUint16(T); -template <> uint16_t asUint16(HLSLHalf_t A) { - return bit_cast(A.Val); -} -template <> uint16_t asUint16(uint16_t A) { return A; } -template <> uint16_t asUint16(int16_t A) { return bit_cast(A); } - -AS_TYPE_OP(OpType::AsUint16, uint16_t, (asUint16(A))); - -// asFloat - -template float asFloat(T); -template <> float asFloat(float A) { return float(A); } -template <> float asFloat(int32_t A) { return bit_cast(A); } -template <> float asFloat(uint32_t A) { return bit_cast(A); } - -AS_TYPE_OP(OpType::AsFloat, float, (asFloat(A))); - -// asInt - -template int32_t asInt(T); -template <> int32_t asInt(float A) { return bit_cast(A); } -template <> int32_t asInt(int32_t A) { return A; } -template <> int32_t asInt(uint32_t A) { return bit_cast(A); } - -AS_TYPE_OP(OpType::AsInt, int32_t, (asInt(A))); - -// asUint - -template unsigned int asUint(T); -template <> unsigned int asUint(unsigned int A) { return A; } -template <> unsigned int asUint(float A) { return bit_cast(A); } -template <> unsigned int asUint(int A) { return bit_cast(A); } - -AS_TYPE_OP(OpType::AsUint, uint32_t, (asUint(A))); - -// asDouble - -template <> struct Op : StrictValidation { - double operator()(uint32_t LowBits, uint32_t HighBits) { - uint64_t Bits = (static_cast(HighBits) << 32) | LowBits; - double Result; - std::memcpy(&Result, &Bits, sizeof(Result)); - return Result; - } -}; - -// splitDouble -// -// splitdouble is special because it's a function that takes a double and -// outputs two values. To handle this special case we override various bits of -// the testing machinary. - -template <> -struct Op : StrictValidation {}; - -// Specialized version of ExpectedBuilder for the splitdouble case. The -// expected output for this has all the Low values followed by all the High -// values. -template <> struct ExpectedBuilder { - static std::vector - buildExpected(Op &, - const InputSets &Inputs) { - DXASSERT_NOMSG(Inputs.size() == 1); - - size_t VectorSize = Inputs[0].size(); - - std::vector Expected; - Expected.resize(VectorSize * 2); - - for (size_t I = 0; I < VectorSize; ++I) { - uint32_t Low, High; - splitDouble(Inputs[0][I], Low, High); - Expected[I] = Low; - Expected[I + VectorSize] = High; - } - - return Expected; - } - - static void splitDouble(const double A, uint32_t &LowBits, - uint32_t &HighBits) { - uint64_t Bits = 0; - std::memcpy(&Bits, &A, sizeof(Bits)); - LowBits = static_cast(Bits & 0xFFFFFFFF); - HighBits = static_cast(Bits >> 32); - } -}; - -// -// Unary Math -// - -template T UnaryMathAbs(T A) { - if constexpr (std::is_unsigned_v) - return A; - else - return static_cast(std::abs(A)); -} - -DEFAULT_OP_1(OpType::Abs, (UnaryMathAbs(A))); - -// Sign is special because the return type doesn't match the input type. -template struct Op : DefaultValidation { - int32_t operator()(T A) { - const T Zero = T(); - - if (A > Zero) - return 1; - if (A < Zero) - return -1; - return 0; - } -}; - -DEFAULT_OP_1(OpType::Ceil, (std::ceil(A))); -DEFAULT_OP_1(OpType::Exp, (std::exp(A))); -DEFAULT_OP_1(OpType::Floor, (std::floor(A))); -DEFAULT_OP_1(OpType::Frac, (A - static_cast(std::floor(A)))); -DEFAULT_OP_1(OpType::Log, (std::log(A))); -DEFAULT_OP_1(OpType::Rcp, (static_cast(1.0) / A)); -DEFAULT_OP_1(OpType::Round, (std::round(A))); -DEFAULT_OP_1(OpType::Rsqrt, - (static_cast(1.0) / static_cast(std::sqrt(A)))); -DEFAULT_OP_1(OpType::Sqrt, (std::sqrt(A))); -DEFAULT_OP_1(OpType::Trunc, (std::trunc(A))); -DEFAULT_OP_1(OpType::Exp2, (std::exp2(A))); -DEFAULT_OP_1(OpType::Log10, (std::log10(A))); -DEFAULT_OP_1(OpType::Log2, (std::log2(A))); - -// Frexp has a return value as well as an output paramater. So we handle it -// with special logic. Frexp is only supported for fp32 values. -template <> struct Op : DefaultValidation {}; - -template <> struct ExpectedBuilder { - static std::vector buildExpected(Op &, - const InputSets &Inputs) { - DXASSERT_NOMSG(Inputs.size() == 1); - - // Expected values size is doubled. In the first half we store the - // Mantissas and in the second half we store the Exponents. This way we - // can leverage the existing logic which verify expected values in a - // single vector. We just need to make sure that we organize the output in - // the same way in the shader and when we read it back. - - size_t VectorSize = Inputs[0].size(); - - std::vector Expected; - Expected.resize(VectorSize * 2); - - for (size_t I = 0; I < VectorSize; ++I) { - int Exp = 0; - float Man = std::frexp(Inputs[0][I], &Exp); - - // std::frexp returns a signed mantissa. But the HLSL implmentation - // returns an unsigned mantissa. - Man = std::abs(Man); - - Expected[I] = Man; - - // std::frexp returns the exponent as an int, but HLSL stores it as a - // float. However, the HLSL exponents fractional component is always 0. - // So it can conversion between float and int is safe. - Expected[I + VectorSize] = static_cast(Exp); - } - - return Expected; - } -}; - -// -// Binary Comparison -// - -#define BINARY_COMPARISON_OP(OP, IMPL) \ - template struct Op : StrictValidation { \ - HLSLBool_t operator()(T A, T B) { return IMPL; } \ - }; - -BINARY_COMPARISON_OP(OpType::LessThan, (A < B)); -BINARY_COMPARISON_OP(OpType::LessEqual, (A <= B)); -BINARY_COMPARISON_OP(OpType::GreaterThan, (A > B)); -BINARY_COMPARISON_OP(OpType::GreaterEqual, (A >= B)); -BINARY_COMPARISON_OP(OpType::Equal, (A == B)); -BINARY_COMPARISON_OP(OpType::NotEqual, (A != B)); - -// -// Binary Logical -// - -DEFAULT_OP_2(OpType::Logical_And, (A && B)); -DEFAULT_OP_2(OpType::Logical_Or, (A || B)); - -// Ternary Logical -// - -OP_3(OpType::Select, StrictValidation, (static_cast(A) ? B : C)); - -// -// Reduction -// - -#define REDUCTION_OP(OP, STDFUNC) \ - template struct Op : StrictValidation {}; \ - template struct ExpectedBuilder { \ - static std::vector buildExpected(Op &, \ - const InputSets &Inputs) { \ - const bool Res = STDFUNC(Inputs[0].begin(), Inputs[0].end(), \ - [](T A) { return A != static_cast(0); }); \ - return std::vector{Res}; \ - } \ - }; - -REDUCTION_OP(OpType::Any_Mixed, (std::any_of)); -REDUCTION_OP(OpType::Any_NoZero, (std::any_of)); -REDUCTION_OP(OpType::Any_Zero, (std::any_of)); - -REDUCTION_OP(OpType::All_Mixed, (std::all_of)); -REDUCTION_OP(OpType::All_NoZero, (std::all_of)); -REDUCTION_OP(OpType::All_Zero, (std::all_of)); - -#undef REDUCTION_OP - -template struct Op : StrictValidation {}; -template struct ExpectedBuilder { - // For Dot, buildExpected is a special case: it also computes an absolute - // epsilon for validation because Dot is a compound operation. Expected value - // is computed by multiplying and accumulating in fp64 for higher precision. - // Absolute epsilon is computed by reordering the accumulation into a - // worst-case sequence, then summing the per-step epsilons to produce a - // conservative error tolerance for the entire Dot operation. - static std::vector buildExpected(Op &Op, - const InputSets &Inputs) { - - std::vector PositiveProducts; - std::vector NegativeProducts; - - const size_t VectorSize = Inputs[0].size(); - - // Floating point ops have a tolerance of 0.5 ULPs per operation as per the - // DX spec. - const double ULPTolerance = 0.5; - - // Accumulate in fp64 to improve precision. - double DotProduct = 0.0; // computed reference result - double AbsoluteEpsilon = 0.0; // computed tolerance - for (size_t I = 0; I < VectorSize; ++I) { - double Product = Inputs[0][I] * Inputs[1][I]; - AbsoluteEpsilon += computeAbsoluteEpsilon(Product, ULPTolerance); - - DotProduct += Product; - - if (Product >= 0.0) - PositiveProducts.push_back(Product); - else - NegativeProducts.push_back(Product); - } - - // Sort each by magnitude so that we can accumulate them in worst case - // order. - std::sort(PositiveProducts.begin(), PositiveProducts.end(), - std::greater()); - std::sort(NegativeProducts.begin(), NegativeProducts.end()); - - // Helper to sum the products and compute/add to the running absolute - // epsilon total. - auto SumProducts = [&AbsoluteEpsilon, - ULPTolerance](const std::vector &Values) { - double Sum = Values.empty() ? 0.0 : Values[0]; - for (size_t I = 1; I < Values.size(); ++I) { - Sum += Values[I]; - AbsoluteEpsilon += computeAbsoluteEpsilon(Sum, ULPTolerance); - } - return Sum; - }; - - // Accumulate products in the worst case order while computing the absolute - // epsilon error for each intermediate step. And accumulate that error. - const double SumPos = SumProducts(PositiveProducts); - const double SumNeg = SumProducts(NegativeProducts); - - if (!PositiveProducts.empty() && !NegativeProducts.empty()) - AbsoluteEpsilon += - computeAbsoluteEpsilon((SumPos + SumNeg), ULPTolerance); - - Op.ValidationConfig = ValidationConfig::Epsilon(AbsoluteEpsilon); - - std::vector Expected; - Expected.push_back(static_cast(DotProduct)); - return Expected; - } -}; - -template -static double computeAbsoluteEpsilon(double A, double ULPTolerance) { - DXASSERT((!isinf(A) && !isnan(A)), - "Input values should not produce inf or nan results"); - - // ULP is a positive value by definition. So, working with abs(A) simplifies - // our logic for computing ULP in the first place. - A = std::abs(A); - - double ULP = 0.0; - - if constexpr (std::is_same_v) - ULP = HLSLHalf_t::GetULP(A); - else - ULP = - std::nextafter(static_cast(A), std::numeric_limits::infinity()) - - static_cast(A); - - return ULP * ULPTolerance; -} - -template -struct Op : DefaultValidation {}; -template struct ExpectedBuilder { - static std::vector buildExpected(Op, - const InputSets &Inputs) { - std::vector Expected(Inputs[0].size(), Inputs[0][0]); - return Expected; - } -}; - -// -// Loading and Storing of Buffers -// - -STRICT_OP_1(OpType::LoadAndStore_RDH_BAB_UAV, (A)); -STRICT_OP_1(OpType::LoadAndStore_RDH_BAB_SRV, (A)); -STRICT_OP_1(OpType::LoadAndStore_DT_BAB_UAV, (A)); -STRICT_OP_1(OpType::LoadAndStore_DT_BAB_SRV, (A)); -STRICT_OP_1(OpType::LoadAndStore_RD_BAB_UAV, (A)); -STRICT_OP_1(OpType::LoadAndStore_RD_BAB_SRV, (A)); -STRICT_OP_1(OpType::LoadAndStore_RDH_SB_UAV, (A)); -STRICT_OP_1(OpType::LoadAndStore_RDH_SB_SRV, (A)); -STRICT_OP_1(OpType::LoadAndStore_DT_SB_UAV, (A)); -STRICT_OP_1(OpType::LoadAndStore_DT_SB_SRV, (A)); -STRICT_OP_1(OpType::LoadAndStore_RD_SB_UAV, (A)); -STRICT_OP_1(OpType::LoadAndStore_RD_SB_SRV, (A)); - -// -// Float Ops -// - -#define FLOAT_SPECIAL_OP(OP, IMPL) \ - template struct Op : StrictValidation { \ - HLSLBool_t operator()(T A) { return IMPL; } \ - }; - -FLOAT_SPECIAL_OP(OpType::IsFinite, (std::isfinite(A))); -FLOAT_SPECIAL_OP(OpType::IsInf, (std::isinf(A))); -FLOAT_SPECIAL_OP(OpType::IsNan, (std::isnan(A))); -#undef FLOAT_SPECIAL_OP - -template struct Op : DefaultValidation {}; - -template static T modF(T Input, T &OutParam); - -template <> float modF(float Input, float &OutParam) { - return std::modf(Input, &OutParam); -} - -template <> HLSLHalf_t modF(HLSLHalf_t Input, HLSLHalf_t &OutParam) { - float Exp = 0.0f; - float Man = std::modf(float(Input), &Exp); - OutParam = HLSLHalf_t(Exp); - return Man; -} - -template struct ExpectedBuilder { - static std::vector buildExpected(Op &, - const InputSets &Inputs) { - DXASSERT_NOMSG(Inputs.size() == 1); - size_t VectorSize = Inputs[0].size(); - - std::vector Expected; - Expected.resize(VectorSize * 2); - - for (size_t I = 0; I < VectorSize; ++I) { - T Exp; - T Man = modF(Inputs[0][I], Exp); - Expected[I] = Man; - Expected[I + VectorSize] = Exp; - } - - return Expected; - } -}; - -// -// Derivative Ops -// - -// Coarse derivatives (ddx/ddy): All lanes in quad get same result -// Fine derivatives (ddx_fine/ddy_fine): Each lane gets unique result -// For testing, we validate results on lane 3 to keep validation generic -// -// The value of A in each lane is computed by : A = A + LaneID*2 -// -// Top right (lane 1) - Top Left (lane 0) -DEFAULT_OP_1(OpType::DerivativeDdx, ((A + 2) - (A + 0))); -// Lower left (lane 2) - Top Left (lane 0) -DEFAULT_OP_1(OpType::DerivativeDdy, ((A + 4) - (A + 0))); - -// Bottom right (lane 3) - Bottom left (lane 2) -DEFAULT_OP_1(OpType::DerivativeDdxFine, ((A + 6) - (A + 4))); -// Bottom right (lane 3) - Top right (lane 1) -DEFAULT_OP_1(OpType::DerivativeDdyFine, ((A + 6) - (A + 2))); - -// -// Quad Read Ops -// - -// We keep things generic so we can re-use this macro for all quad ops. -// The lane we write to is determined via a defines in the shader code. -// See TestQuadRead in ShaderOpArith.xml. -// For all cases we simply fill the vector on that lane with the value of the -// third element. -#define QUAD_READ_OP(OP, ARITY) \ - template struct Op : DefaultValidation {}; \ - template struct ExpectedBuilder { \ - static std::vector buildExpected(Op &, \ - const InputSets &Inputs) { \ - DXASSERT_NOMSG(Inputs.size() == ARITY); \ - std::vector Expected; \ - const size_t VectorSize = Inputs[0].size(); \ - Expected.assign(VectorSize, Inputs[0][2]); \ - return Expected; \ - } \ - }; - -QUAD_READ_OP(OpType::QuadReadLaneAt, 2); -QUAD_READ_OP(OpType::QuadReadAcrossX, 1); -QUAD_READ_OP(OpType::QuadReadAcrossY, 1); -QUAD_READ_OP(OpType::QuadReadAcrossDiagonal, 1); - -#undef QUAD_READ_OP - -// -// Wave Ops -// - -#define WAVE_OP(OP, IMPL) \ - template struct Op : DefaultValidation { \ - T operator()(T A, UINT WaveSize) { return IMPL; } \ - }; - -template T waveActiveSum(T A, UINT WaveSize) { - T WaveSizeT = static_cast(WaveSize); - return A * WaveSizeT; -} - -WAVE_OP(OpType::WaveActiveSum, (waveActiveSum(A, WaveSize))); - -template T waveActiveMin(T A, UINT WaveSize) { - std::vector Values; - // Add the 'WaveLaneID' to A. - for (UINT I = 0; I < WaveSize; ++I) - Values.push_back(A + static_cast(I)); - return *std::min_element(Values.begin(), Values.end()); -} - -WAVE_OP(OpType::WaveActiveMin, (waveActiveMin(A, WaveSize))); - -template T waveActiveMax(T A, UINT WaveSize) { - std::vector Values; - // Add the 'WaveLaneID' to A. - for (UINT I = 0; I < WaveSize; ++I) - Values.push_back(A + static_cast(I)); - return *std::max_element(Values.begin(), Values.end()); -} - -WAVE_OP(OpType::WaveActiveMax, (waveActiveMax(A, WaveSize))); - -template T waveActiveProduct(T A, UINT WaveSize) { - // We want to avoid overflow of a large product. So, the WaveActiveProdFn has - // an input set of all 1's and we modify the value of the largest lane to be - // equal to the lane index in the shader. - return A * static_cast(WaveSize - 1); -} - -WAVE_OP(OpType::WaveActiveProduct, (waveActiveProduct(A, WaveSize))); - -template T waveActiveBitAnd(T A, UINT) { - // We set the LSB to 0 in one of the lanes. - return static_cast(A & ~static_cast(1)); -} - -WAVE_OP(OpType::WaveActiveBitAnd, (waveActiveBitAnd(A, WaveSize))); - -template T waveActiveBitOr(T A, UINT) { - // We set the LSB to 1 in one of the lanes. - return static_cast(A | static_cast(1)); -} - -WAVE_OP(OpType::WaveActiveBitOr, (waveActiveBitOr(A, WaveSize))); - -template T waveActiveBitXor(T A, UINT) { - // We clear the LSB in every lane except the last lane which sets it to 1. - return static_cast(A | static_cast(1)); -} - -WAVE_OP(OpType::WaveActiveBitXor, (waveActiveBitXor(A, WaveSize))); - -WAVE_OP(OpType::WaveMultiPrefixBitAnd, waveMultiPrefixBitAnd(A, WaveSize)); - -template T waveMultiPrefixBitAnd(T A, UINT) { - // All lanes in the group mask use a mask to filter for only the second and - // third LSBs. - return static_cast(A & static_cast(0x6)); -} - -WAVE_OP(OpType::WaveMultiPrefixBitOr, waveMultiPrefixBitOr(A, WaveSize)); - -template T waveMultiPrefixBitOr(T A, UINT) { - // All lanes in the group mask clear the second LSB. - return static_cast(A & ~static_cast(0x2)); -} - -template -struct Op : StrictValidation {}; - -template struct ExpectedBuilder { - static std::vector buildExpected(Op &, - const InputSets &Inputs, UINT) { - DXASSERT_NOMSG(Inputs.size() == 1); - - std::vector Expected; - const size_t VectorSize = Inputs[0].size(); - - // We get a little creative for MultiPrefixBitXor. The mask we use for the - // group in the shader is 0xE (0b1110), which includes lanes 1, 2, and 3. - // Prefix ops don't include the value of the current lane in their result. - // So, for this test we store the result of WaveMultiPrefixBitXor from lane - // 3. This means only the values from lanes 1 and 2 contribute to the result - // at lane 3. - // - // In the shader: - // - Lane 0: Set to 0 (not in mask, shouldn't affect result) - // - Lane 1: Keeps original input values - // - Lane 2: Lower half + last element set to 0, upper half keeps input - // - Lane 3: Stores the prefix XOR result (lanes 1 XOR lanes 2) - // - // Expected result: Lower half matches input (lane 1 XOR 0), upper half is - // 0s, except last element matches input. - for (size_t I = 0; I < VectorSize / 2; ++I) - Expected.push_back(Inputs[0][I]); - for (size_t I = VectorSize / 2; I < VectorSize - 1; ++I) - Expected.push_back(0); - - // We also set the last element to 0 on lane 2 so the last element in the - // output vector matches the last element in the input vector. - Expected.push_back(Inputs[0][VectorSize - 1]); - - return Expected; - } -}; - -template -struct Op : StrictValidation {}; - -template struct ExpectedBuilder { - static std::vector - buildExpected(Op &, - const InputSets &Inputs, UINT) { - DXASSERT_NOMSG(Inputs.size() == 1); - - std::vector Expected; - const size_t VectorSize = Inputs[0].size(); - Expected.assign(VectorSize, static_cast(true)); - // We set the last element to a different value on a single lane. - Expected[VectorSize - 1] = static_cast(false); - - return Expected; - } -}; - -template -struct Op : StrictValidation {}; - -template struct ExpectedBuilder { - static std::vector buildExpected(Op &, - const InputSets &Inputs, UINT) { - DXASSERT_NOMSG(Inputs.size() == 1); - - std::vector Expected; - const size_t VectorSize = Inputs[0].size(); - // Simple test, on the lane that we read we also fill the vector with the - // value of the first element. - Expected.assign(VectorSize, Inputs[0][0]); - - return Expected; - } -}; - -template -struct Op : StrictValidation {}; - -template struct ExpectedBuilder { - static std::vector buildExpected(Op &, - const InputSets &Inputs, UINT) { - DXASSERT_NOMSG(Inputs.size() == 1); - - std::vector Expected; - const size_t VectorSize = Inputs[0].size(); - // Simple test, on the lane that we read we also fill the vector with the - // value of the first element. - Expected.assign(VectorSize, Inputs[0][0]); - - return Expected; - } -}; - -WAVE_OP(OpType::WavePrefixSum, (wavePrefixSum(A, WaveSize))); - -template T wavePrefixSum(T A, UINT WaveSize) { - // We test the prefix sum in the 'middle' lane. This choice is arbitrary. - return A * static_cast(WaveSize / 2); -} - -WAVE_OP(OpType::WaveMultiPrefixSum, (waveMultiPrefixSum(A, WaveSize))); - -template T waveMultiPrefixSum(T A, UINT) { - return A * static_cast(2u); -} - -WAVE_OP(OpType::WavePrefixProduct, (wavePrefixProduct(A, WaveSize))); - -template T wavePrefixProduct(T A, UINT) { - // We test the the prefix product in the 3rd lane to avoid overflow issues. - // So the result is A * A. - return A * A; -} - -WAVE_OP(OpType::WaveMultiPrefixProduct, (waveMultiPrefixProduct(A, WaveSize))); - -template T waveMultiPrefixProduct(T A, UINT) { - // The group mask has 3 lanes. - return A * A; -} - -template struct Op : StrictValidation {}; - -static void WriteExpectedValueForLane(UINT *Dest, const UINT LaneID, - const std::bitset<128> &ExpectedValue) { - std::bitset<128> Lo32Mask; - Lo32Mask.set(); - Lo32Mask >>= 128 - 32; - - UINT Offset = 4 * LaneID; - for (uint32_t I = 0; I < 4; I++) { - uint32_t V = ((ExpectedValue >> (I * 32)) & Lo32Mask).to_ulong(); - Dest[Offset++] = V; - } -} - -template struct ExpectedBuilder { - static std::vector buildExpected(Op &, - const InputSets &Inputs, - const UINT WaveSize) { - // This test sets lanes (0, min(VectorSize/2, WaveSize/2), and - // min(VectorSize-1, WaveSize-1)) to unique values and has them modify the - // vector at their respective indices. Remaining lanes remain unchanged. - DXASSERT_NOMSG(Inputs.size() == 1); - - const UINT VectorSize = static_cast(Inputs[0].size()); - std::vector Expected; - Expected.assign(WaveSize * 4, 0); - - const UINT MidLaneID = std::min(VectorSize / 2, WaveSize / 2); - const UINT LastLaneID = std::min(VectorSize - 1, WaveSize - 1); - - // Use a std::bitset<128> to represent the uint4 returned by WaveMatch as - // its convenient this way in c++ - std::bitset<128> DefaultExpectedValue; - - for (UINT I = 0; I < WaveSize; ++I) - DefaultExpectedValue.set(I); - - DefaultExpectedValue.reset(0); - DefaultExpectedValue.reset(MidLaneID); - DefaultExpectedValue.reset(LastLaneID); - - for (UINT LaneID = 0; LaneID < WaveSize; ++LaneID) { - if (LaneID == 0 || LaneID == MidLaneID || LaneID == LastLaneID) { - std::bitset<128> ExpectedValue(0); - ExpectedValue.set(LaneID); - WriteExpectedValueForLane(Expected.data(), LaneID, ExpectedValue); - continue; - } - WriteExpectedValueForLane(Expected.data(), LaneID, DefaultExpectedValue); - } - - return Expected; - } -}; - -#undef WAVE_OP - -// -// dispatchTest -// - -template struct ExpectedBuilder { - - static auto buildExpected(Op Op, const InputSets &Inputs) { - DXASSERT_NOMSG(Inputs.size() == 1); - - std::vector Expected; - Expected.reserve(Inputs[0].size()); - - for (size_t I = 0; I < Inputs[0].size(); ++I) - Expected.push_back(Op(Inputs[0][I])); - - return Expected; - } - - static auto buildExpected(Op Op, const InputSets &Inputs) { - DXASSERT_NOMSG(Inputs.size() == 2); - - std::vector Expected; - Expected.reserve(Inputs[0].size()); - - for (size_t I = 0; I < Inputs[0].size(); ++I) - Expected.push_back(Op(Inputs[0][I], Inputs[1][I])); - - return Expected; - } - - static auto buildExpected(Op Op, const InputSets &Inputs) { - DXASSERT_NOMSG(Inputs.size() == 3); - - std::vector Expected; - Expected.reserve(Inputs[0].size()); - - for (size_t I = 0; I < Inputs[0].size(); ++I) - Expected.push_back(Op(Inputs[0][I], Inputs[1][I], Inputs[2][I])); - - return Expected; - } - - static auto buildExpected(Op Op, const InputSets &Inputs, - UINT WaveSize) { - DXASSERT_NOMSG(Inputs.size() == 1); - - std::vector Expected; - Expected.reserve(Inputs[0].size()); - - for (size_t I = 0; I < Inputs[0].size(); ++I) - Expected.push_back(Op(Inputs[0][I], WaveSize)); - - return Expected; - } -}; - -template -std::vector getInputSizesToTest(size_t OverrideInputSize) { - std::vector InputVectorSizes; - const std::array DefaultInputSizes = {3, 5, 16, 17, - 35, 100, 256, 1024}; - - if (OverrideInputSize) - InputVectorSizes.push_back(OverrideInputSize); - else { - // StructuredBuffers have a max size of 2048 bytes. - const size_t MaxInputSize = - IsStructuredBufferLoadAndStoreOp(OP) ? 2048 / sizeof(T) : 1024; - - for (size_t Size : DefaultInputSizes) { - if (Size <= MaxInputSize) - InputVectorSizes.push_back(Size); - } - - if (InputVectorSizes.empty() || MaxInputSize != InputVectorSizes.back()) - InputVectorSizes.push_back(MaxInputSize); - } - - return InputVectorSizes; -} - -template -void dispatchTest(ID3D12Device *D3DDevice, bool VerboseLogging, - size_t OverrideInputSize) { - - const std::vector InputVectorSizes = - getInputSizesToTest(OverrideInputSize); - - constexpr const Operation &Operation = getOperation(OP); - Op Op; - - for (size_t VectorSize : InputVectorSizes) { - std::vector> Inputs = - buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); - - auto Expected = ExpectedBuilder::buildExpected(Op, Inputs); - - runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, - Op.ValidationConfig); - } -} - -template -void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging, - size_t OverrideInputSize, UINT WaveSize) { - - const std::vector InputVectorSizes = - getInputSizesToTest(OverrideInputSize); - - constexpr const Operation &Operation = getOperation(OP); - Op Op; - - const std::string AdditionalCompilerOptions = - "-DWAVE_SIZE=" + std::to_string(WaveSize) + - " -DNUMTHREADS_XYZ=" + std::to_string(WaveSize) + ",1,1 "; - - for (size_t VectorSize : InputVectorSizes) { - std::vector> Inputs = - buildTestInputs(VectorSize, Operation.InputSets, Operation.Arity); - - auto Expected = ExpectedBuilder::buildExpected(Op, Inputs, WaveSize); - - runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, - Op.ValidationConfig, AdditionalCompilerOptions); - } -} - -} // namespace LongVector - -using namespace LongVector; - -// TAEF test entry points -#define HLK_TEST(Op, DataType) \ - TEST_METHOD(Op##_##DataType) { runTest(); } - -#define HLK_TEST_DOUBLE(Op, DataType) \ - TEST_METHOD(Op##_##DataType) { \ - BEGIN_TEST_METHOD_PROPERTIES() \ - TEST_METHOD_PROPERTY( \ - "Kits.Specification", \ - "Device.Graphics.D3D12.DXILCore.ShaderModel69.DoublePrecision") \ - END_TEST_METHOD_PROPERTIES() \ - runTest(); \ - } - -#define HLK_WAVEOP_TEST(Op, DataType) \ - TEST_METHOD(Op##_##DataType) { \ - BEGIN_TEST_METHOD_PROPERTIES() \ - TEST_METHOD_PROPERTY( \ - "Kits.Specification", \ - "Device.Graphics.D3D12.DXILCore.ShaderModel69.CoreRequirement") \ - END_TEST_METHOD_PROPERTIES() \ - runWaveOpTest(); \ - } - -#define HLK_WAVEOP_TEST_DOUBLE(Op, DataType) \ - TEST_METHOD(Op##_##DataType) { \ - BEGIN_TEST_METHOD_PROPERTIES() \ - TEST_METHOD_PROPERTY( \ - "Kits.Specification", \ - "Device.Graphics.D3D12.DXILCore.ShaderModel69.DoublePrecision") \ - END_TEST_METHOD_PROPERTIES() \ - runWaveOpTest(); \ - } - -class DxilConf_SM69_Vectorized { -public: - BEGIN_TEST_CLASS(DxilConf_SM69_Vectorized) - TEST_CLASS_PROPERTY("Kits.TestName", - "D3D12 - Shader Model 6.9 - Vectorized DXIL - Core Tests") - TEST_CLASS_PROPERTY("Kits.TestId", "81db1ff8-5bc5-48a1-8d7b-600fc600a677") - TEST_CLASS_PROPERTY("Kits.Description", - "Validates required SM 6.9 vectorized DXIL operations") - TEST_CLASS_PROPERTY( - "Kits.Specification", - "Device.Graphics.D3D12.DXILCore.ShaderModel69.CoreRequirement") - TEST_METHOD_PROPERTY(L"Priority", L"0") - END_TEST_CLASS() - - TEST_CLASS_SETUP(classSetup) { - WEX::TestExecution::SetVerifyOutput verifySettings( - WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); - - // Run this only once. - if (!Initialized) { - Initialized = true; - - D3D12SDK = D3D12SDKSelector(); - - WEX::TestExecution::RuntimeParameters::TryGetValue(L"VerboseLogging", - VerboseLogging); - if (VerboseLogging) - hlsl_test::LogCommentFmt(L"Verbose logging is enabled for this test."); - else - hlsl_test::LogCommentFmt(L"Verbose logging is disabled for this test."); - - WEX::TestExecution::RuntimeParameters::TryGetValue(L"InputSize", - OverrideInputSize); - - WEX::TestExecution::RuntimeParameters::TryGetValue(L"WaveLaneCount", - OverrideWaveLaneCount); - - bool IsRITP = false; - WEX::TestExecution::RuntimeParameters::TryGetValue(L"RITP", IsRITP); - - if (IsRITP) { - if (!OverrideInputSize) - // Help keep test runtime down for RITP runs - OverrideInputSize = 10; - else - hlsl_test::LogWarningFmt( - L"RITP is enabled but InputSize is also set. Will use the" - L"InputSize value: %d.", - OverrideInputSize); - } - - bool FailIfRequirementsNotMet = false; -#ifdef _HLK_CONF - FailIfRequirementsNotMet = true; -#endif - WEX::TestExecution::RuntimeParameters::TryGetValue( - L"FailIfRequirementsNotMet", FailIfRequirementsNotMet); - - const bool SkipUnsupported = !FailIfRequirementsNotMet; - if (!D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_9, - SkipUnsupported)) { - if (FailIfRequirementsNotMet) - hlsl_test::LogErrorFmt( - L"Device Creation failed, resulting in test failure, since " - L"FailIfRequirementsNotMet is set. The expectation is that this " - L"test will only be executed if something has previously " - L"determined that the system meets the requirements of this " - L"test."); - - return false; - } - } - - return true; - } - - TEST_METHOD_SETUP(methodSetup) { - // It's possible a previous test case caused a device removal. If it did we - // need to try and create a new device. - if (D3DDevice && D3DDevice->GetDeviceRemovedReason() != S_OK) { - hlsl_test::LogCommentFmt(L"Device was lost!"); - D3DDevice.Release(); - } - - if (!D3DDevice) { - hlsl_test::LogCommentFmt(L"Creating device"); - - // We expect this to succeed, and fail if it doesn't, because classSetup() - // has already ensured that the system configuration meets the - // requirements of all the tests in this class. - const bool SkipUnsupported = false; - - VERIFY_IS_TRUE(D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_9, - SkipUnsupported)); - } - - return true; - } - - template void runWaveOpTest() { - WEX::TestExecution::SetVerifyOutput VerifySettings( - WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); - - UINT WaveSize = 0; - - if (OverrideWaveLaneCount > 0) { - WaveSize = OverrideWaveLaneCount; - hlsl_test::LogCommentFmt( - L"Using overridden WaveLaneCount of %d for this test.", WaveSize); - } else { - D3D12_FEATURE_DATA_D3D12_OPTIONS1 WaveOpts; - VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( - D3D12_FEATURE_D3D12_OPTIONS1, &WaveOpts, sizeof(WaveOpts))); - - WaveSize = WaveOpts.WaveLaneCountMin; - } - - DXASSERT_NOMSG(WaveSize > 0); - DXASSERT((WaveSize & (WaveSize - 1)) == 0, "must be a power of 2"); - - dispatchWaveOpTest(D3DDevice, VerboseLogging, OverrideInputSize, - WaveSize); - } - - template void runTest() { - WEX::TestExecution::SetVerifyOutput verifySettings( - WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); - dispatchTest(D3DDevice, VerboseLogging, OverrideInputSize); - } - - // TernaryMath - - HLK_TEST(Mad, uint16_t); - HLK_TEST(Mad, uint32_t); - HLK_TEST(Mad, uint64_t); - HLK_TEST(Mad, int16_t); - HLK_TEST(Mad, int32_t); - HLK_TEST(Mad, int64_t); - HLK_TEST(Mad, HLSLHalf_t); - HLK_TEST(Mad, float); - HLK_TEST_DOUBLE(Fma, double); - HLK_TEST_DOUBLE(Mad, double); - - // BinaryMath - - HLK_TEST(Add, HLSLBool_t); - HLK_TEST(Subtract, HLSLBool_t); - HLK_TEST(Add, int16_t); - HLK_TEST(Subtract, int16_t); - HLK_TEST(Multiply, int16_t); - HLK_TEST(Divide, int16_t); - HLK_TEST(Modulus, int16_t); - HLK_TEST(Min, int16_t); - HLK_TEST(Max, int16_t); - HLK_TEST(Add, int32_t); - HLK_TEST(Subtract, int32_t); - HLK_TEST(Multiply, int32_t); - HLK_TEST(Divide, int32_t); - HLK_TEST(Modulus, int32_t); - HLK_TEST(Min, int32_t); - HLK_TEST(Max, int32_t); - HLK_TEST(Add, int64_t); - HLK_TEST(Subtract, int64_t); - HLK_TEST(Multiply, int64_t); - HLK_TEST(Divide, int64_t); - HLK_TEST(Modulus, int64_t); - HLK_TEST(Min, int64_t); - HLK_TEST(Max, int64_t); - HLK_TEST(Add, uint16_t); - HLK_TEST(Subtract, uint16_t); - HLK_TEST(Multiply, uint16_t); - HLK_TEST(Divide, uint16_t); - HLK_TEST(Modulus, uint16_t); - HLK_TEST(Min, uint16_t); - HLK_TEST(Max, uint16_t); - HLK_TEST(Add, uint32_t); - HLK_TEST(Subtract, uint32_t); - HLK_TEST(Multiply, uint32_t); - HLK_TEST(Divide, uint32_t); - HLK_TEST(Modulus, uint32_t); - HLK_TEST(Min, uint32_t); - HLK_TEST(Max, uint32_t); - HLK_TEST(Add, uint64_t); - HLK_TEST(Subtract, uint64_t); - HLK_TEST(Multiply, uint64_t); - HLK_TEST(Divide, uint64_t); - HLK_TEST(Modulus, uint64_t); - HLK_TEST(Min, uint64_t); - HLK_TEST(Max, uint64_t); - HLK_TEST(Add, HLSLHalf_t); - HLK_TEST(Subtract, HLSLHalf_t); - HLK_TEST(Multiply, HLSLHalf_t); - HLK_TEST(Divide, HLSLHalf_t); - HLK_TEST(Modulus, HLSLHalf_t); - HLK_TEST(Min, HLSLHalf_t); - HLK_TEST(Max, HLSLHalf_t); - HLK_TEST(Ldexp, HLSLHalf_t); - HLK_TEST(Add, float); - HLK_TEST(Subtract, float); - HLK_TEST(Multiply, float); - HLK_TEST(Divide, float); - HLK_TEST(Modulus, float); - HLK_TEST(Min, float); - HLK_TEST(Max, float); - HLK_TEST(Ldexp, float); - HLK_TEST_DOUBLE(Add, double); - HLK_TEST_DOUBLE(Subtract, double); - HLK_TEST_DOUBLE(Multiply, double); - HLK_TEST_DOUBLE(Divide, double); - HLK_TEST_DOUBLE(Min, double); - HLK_TEST_DOUBLE(Max, double); - - // Bitwise - - HLK_TEST(And, uint16_t); - HLK_TEST(Or, uint16_t); - HLK_TEST(Xor, uint16_t); - HLK_TEST(ReverseBits, uint16_t); - HLK_TEST(CountBits, uint16_t); - HLK_TEST(FirstBitHigh, uint16_t); - HLK_TEST(FirstBitLow, uint16_t); - HLK_TEST(LeftShift, uint16_t); - HLK_TEST(RightShift, uint16_t); - HLK_TEST(And, uint32_t); - HLK_TEST(Or, uint32_t); - HLK_TEST(Xor, uint32_t); - HLK_TEST(LeftShift, uint32_t); - HLK_TEST(RightShift, uint32_t); - HLK_TEST(ReverseBits, uint32_t); - HLK_TEST(CountBits, uint32_t); - HLK_TEST(FirstBitHigh, uint32_t); - HLK_TEST(FirstBitLow, uint32_t); - HLK_TEST(And, uint64_t); - HLK_TEST(Or, uint64_t); - HLK_TEST(Xor, uint64_t); - HLK_TEST(LeftShift, uint64_t); - HLK_TEST(RightShift, uint64_t); - HLK_TEST(ReverseBits, uint64_t); - HLK_TEST(CountBits, uint64_t); - HLK_TEST(FirstBitHigh, uint64_t); - HLK_TEST(FirstBitLow, uint64_t); - HLK_TEST(And, int16_t); - HLK_TEST(Or, int16_t); - HLK_TEST(Xor, int16_t); - HLK_TEST(LeftShift, int16_t); - HLK_TEST(RightShift, int16_t); - HLK_TEST(ReverseBits, int16_t); - HLK_TEST(CountBits, int16_t); - HLK_TEST(FirstBitHigh, int16_t); - HLK_TEST(FirstBitLow, int16_t); - HLK_TEST(And, int32_t); - HLK_TEST(Or, int32_t); - HLK_TEST(Xor, int32_t); - HLK_TEST(LeftShift, int32_t); - HLK_TEST(RightShift, int32_t); - HLK_TEST(ReverseBits, int32_t); - HLK_TEST(CountBits, int32_t); - HLK_TEST(FirstBitHigh, int32_t); - HLK_TEST(FirstBitLow, int32_t); - HLK_TEST(And, int64_t); - HLK_TEST(Or, int64_t); - HLK_TEST(Xor, int64_t); - HLK_TEST(LeftShift, int64_t); - HLK_TEST(RightShift, int64_t); - HLK_TEST(ReverseBits, int64_t); - HLK_TEST(CountBits, int64_t); - HLK_TEST(FirstBitHigh, int64_t); - HLK_TEST(FirstBitLow, int64_t); - HLK_TEST(Saturate, HLSLHalf_t); - HLK_TEST(Saturate, float); - HLK_TEST_DOUBLE(Saturate, double); - - // Unary - - HLK_TEST(Initialize, HLSLBool_t); - HLK_TEST(ArrayOperator_StaticAccess, HLSLBool_t); - HLK_TEST(ArrayOperator_DynamicAccess, HLSLBool_t); - HLK_TEST(Initialize, int16_t); - HLK_TEST(ArrayOperator_StaticAccess, int16_t); - HLK_TEST(ArrayOperator_DynamicAccess, int16_t); - HLK_TEST(Initialize, int32_t); - HLK_TEST(ArrayOperator_StaticAccess, int32_t); - HLK_TEST(ArrayOperator_DynamicAccess, int32_t); - HLK_TEST(Initialize, int64_t); - HLK_TEST(ArrayOperator_StaticAccess, int64_t); - HLK_TEST(ArrayOperator_DynamicAccess, int64_t); - HLK_TEST(Initialize, uint16_t); - HLK_TEST(ArrayOperator_StaticAccess, uint16_t); - HLK_TEST(ArrayOperator_DynamicAccess, uint16_t); - HLK_TEST(Initialize, uint32_t); - HLK_TEST(ArrayOperator_StaticAccess, uint32_t); - HLK_TEST(ArrayOperator_DynamicAccess, uint32_t); - HLK_TEST(Initialize, uint64_t); - HLK_TEST(ArrayOperator_StaticAccess, uint64_t); - HLK_TEST(ArrayOperator_DynamicAccess, uint64_t); - HLK_TEST(Initialize, HLSLHalf_t); - HLK_TEST(ArrayOperator_StaticAccess, HLSLHalf_t); - HLK_TEST(ArrayOperator_DynamicAccess, HLSLHalf_t); - HLK_TEST(Initialize, float); - HLK_TEST(ArrayOperator_StaticAccess, float); - HLK_TEST(ArrayOperator_DynamicAccess, float); - HLK_TEST_DOUBLE(Initialize, double); - HLK_TEST_DOUBLE(ArrayOperator_StaticAccess, double); - HLK_TEST_DOUBLE(ArrayOperator_DynamicAccess, double); - - HLK_TEST(ShuffleVector, HLSLBool_t); - HLK_TEST(ShuffleVector, int16_t); - HLK_TEST(ShuffleVector, int32_t); - HLK_TEST(ShuffleVector, int64_t); - HLK_TEST(ShuffleVector, uint16_t); - HLK_TEST(ShuffleVector, uint32_t); - HLK_TEST(ShuffleVector, uint64_t); - HLK_TEST(ShuffleVector, HLSLHalf_t); - HLK_TEST(ShuffleVector, float); - HLK_TEST_DOUBLE(ShuffleVector, double); - - // Explicit Cast - - HLK_TEST(CastToInt16, HLSLBool_t); - HLK_TEST(CastToInt32, HLSLBool_t); - HLK_TEST(CastToInt64, HLSLBool_t); - HLK_TEST(CastToUint16, HLSLBool_t); - HLK_TEST(CastToUint32, HLSLBool_t); - HLK_TEST(CastToUint64, HLSLBool_t); - HLK_TEST(CastToFloat16, HLSLBool_t); - HLK_TEST(CastToFloat32, HLSLBool_t); - HLK_TEST(CastToFloat64, HLSLBool_t); - - HLK_TEST(CastToBool, HLSLHalf_t); - HLK_TEST(CastToInt16, HLSLHalf_t); - HLK_TEST(CastToInt32, HLSLHalf_t); - HLK_TEST(CastToInt64, HLSLHalf_t); - HLK_TEST(CastToUint16_FromFP, HLSLHalf_t); - HLK_TEST(CastToUint32_FromFP, HLSLHalf_t); - HLK_TEST(CastToUint64_FromFP, HLSLHalf_t); - HLK_TEST(CastToFloat32, HLSLHalf_t); - HLK_TEST(CastToFloat64, HLSLHalf_t); - - HLK_TEST(CastToBool, float); - HLK_TEST(CastToInt16, float); - HLK_TEST(CastToInt32, float); - HLK_TEST(CastToInt64, float); - HLK_TEST(CastToUint16_FromFP, float); - HLK_TEST(CastToUint32_FromFP, float); - HLK_TEST(CastToUint64_FromFP, float); - HLK_TEST(CastToFloat16, float); - HLK_TEST(CastToFloat64, float); - - HLK_TEST_DOUBLE(CastToBool, double); - HLK_TEST_DOUBLE(CastToInt16, double); - HLK_TEST_DOUBLE(CastToInt32, double); - HLK_TEST_DOUBLE(CastToInt64, double); - HLK_TEST_DOUBLE(CastToUint16_FromFP, double); - HLK_TEST_DOUBLE(CastToUint32_FromFP, double); - HLK_TEST_DOUBLE(CastToUint64_FromFP, double); - HLK_TEST_DOUBLE(CastToFloat16, double); - HLK_TEST_DOUBLE(CastToFloat32, double); - - HLK_TEST(CastToBool, uint16_t); - HLK_TEST(CastToInt16, uint16_t); - HLK_TEST(CastToInt32, uint16_t); - HLK_TEST(CastToInt64, uint16_t); - HLK_TEST(CastToUint32, uint16_t); - HLK_TEST(CastToUint64, uint16_t); - HLK_TEST(CastToFloat16, uint16_t); - HLK_TEST(CastToFloat32, uint16_t); - HLK_TEST(CastToFloat64, uint16_t); - - HLK_TEST(CastToBool, uint32_t); - HLK_TEST(CastToInt16, uint32_t); - HLK_TEST(CastToInt32, uint32_t); - HLK_TEST(CastToInt64, uint32_t); - HLK_TEST(CastToUint16, uint32_t); - HLK_TEST(CastToUint64, uint32_t); - HLK_TEST(CastToFloat16, uint32_t); - HLK_TEST(CastToFloat32, uint32_t); - HLK_TEST(CastToFloat64, uint32_t); - - HLK_TEST(CastToBool, uint64_t); - HLK_TEST(CastToInt16, uint64_t); - HLK_TEST(CastToInt32, uint64_t); - HLK_TEST(CastToInt64, uint64_t); - HLK_TEST(CastToUint16, uint64_t); - HLK_TEST(CastToUint32, uint64_t); - HLK_TEST(CastToFloat16, uint64_t); - HLK_TEST(CastToFloat32, uint64_t); - HLK_TEST(CastToFloat64, uint64_t); - - HLK_TEST(CastToBool, int16_t); - HLK_TEST(CastToInt32, int16_t); - HLK_TEST(CastToInt64, int16_t); - HLK_TEST(CastToUint16, int16_t); - HLK_TEST(CastToUint32, int16_t); - HLK_TEST(CastToUint64, int16_t); - HLK_TEST(CastToFloat16, int16_t); - HLK_TEST(CastToFloat32, int16_t); - HLK_TEST(CastToFloat64, int16_t); - - HLK_TEST(CastToBool, int32_t); - HLK_TEST(CastToInt16, int32_t); - HLK_TEST(CastToInt64, int32_t); - HLK_TEST(CastToUint16, int32_t); - HLK_TEST(CastToUint32, int32_t); - HLK_TEST(CastToUint64, int32_t); - HLK_TEST(CastToFloat16, int32_t); - HLK_TEST(CastToFloat32, int32_t); - HLK_TEST(CastToFloat64, int32_t); - - HLK_TEST(CastToBool, int64_t); - HLK_TEST(CastToInt16, int64_t); - HLK_TEST(CastToInt32, int64_t); - HLK_TEST(CastToUint16, int64_t); - HLK_TEST(CastToUint32, int64_t); - HLK_TEST(CastToUint64, int64_t); - HLK_TEST(CastToFloat16, int64_t); - HLK_TEST(CastToFloat32, int64_t); - HLK_TEST(CastToFloat64, int64_t); - - // Trigonometric - - HLK_TEST(Acos, HLSLHalf_t); - HLK_TEST(Asin, HLSLHalf_t); - HLK_TEST(Atan, HLSLHalf_t); - HLK_TEST(Cos, HLSLHalf_t); - HLK_TEST(Cosh, HLSLHalf_t); - HLK_TEST(Sin, HLSLHalf_t); - HLK_TEST(Sinh, HLSLHalf_t); - HLK_TEST(Tan, HLSLHalf_t); - HLK_TEST(Tanh, HLSLHalf_t); - HLK_TEST(Acos, float); - HLK_TEST(Asin, float); - HLK_TEST(Atan, float); - HLK_TEST(Cos, float); - HLK_TEST(Cosh, float); - HLK_TEST(Sin, float); - HLK_TEST(Sinh, float); - HLK_TEST(Tan, float); - HLK_TEST(Tanh, float); - - // AsType - - HLK_TEST(AsFloat16, int16_t); - HLK_TEST(AsInt16, int16_t); - HLK_TEST(AsUint16, int16_t); - HLK_TEST(AsFloat, int32_t); - HLK_TEST(AsInt, int32_t); - HLK_TEST(AsUint, int32_t); - HLK_TEST(AsFloat16, uint16_t); - HLK_TEST(AsInt16, uint16_t); - HLK_TEST(AsUint16, uint16_t); - HLK_TEST(AsFloat, uint32_t); - HLK_TEST(AsInt, uint32_t); - HLK_TEST(AsUint, uint32_t); - HLK_TEST(AsDouble, uint32_t); - HLK_TEST(AsFloat16, HLSLHalf_t); - HLK_TEST(AsInt16, HLSLHalf_t); - HLK_TEST(AsUint16, HLSLHalf_t); - HLK_TEST(AsUint_SplitDouble, double); - - // Unary Math - - HLK_TEST(Abs, int16_t); - HLK_TEST(Sign, int16_t); - HLK_TEST(Abs, int32_t); - HLK_TEST(Sign, int32_t); - HLK_TEST(Abs, int64_t); - HLK_TEST(Sign, int64_t); - HLK_TEST(Abs, uint16_t); - HLK_TEST(Sign, uint16_t); - HLK_TEST(Abs, uint32_t); - HLK_TEST(Sign, uint32_t); - HLK_TEST(Abs, uint64_t); - HLK_TEST(Sign, uint64_t); - HLK_TEST(Abs, HLSLHalf_t); - HLK_TEST(Ceil, HLSLHalf_t); - HLK_TEST(Exp, HLSLHalf_t); - HLK_TEST(Floor, HLSLHalf_t); - HLK_TEST(Frac, HLSLHalf_t); - HLK_TEST(Log, HLSLHalf_t); - HLK_TEST(Rcp, HLSLHalf_t); - HLK_TEST(Round, HLSLHalf_t); - HLK_TEST(Rsqrt, HLSLHalf_t); - HLK_TEST(Sign, HLSLHalf_t); - HLK_TEST(Sqrt, HLSLHalf_t); - HLK_TEST(Trunc, HLSLHalf_t); - HLK_TEST(Exp2, HLSLHalf_t); - HLK_TEST(Log10, HLSLHalf_t); - HLK_TEST(Log2, HLSLHalf_t); - HLK_TEST(Abs, float); - HLK_TEST(Ceil, float); - HLK_TEST(Exp, float); - HLK_TEST(Floor, float); - HLK_TEST(Frac, float); - HLK_TEST(Log, float); - HLK_TEST(Rcp, float); - HLK_TEST(Round, float); - HLK_TEST(Rsqrt, float); - HLK_TEST(Sign, float); - HLK_TEST(Sqrt, float); - HLK_TEST(Trunc, float); - HLK_TEST(Exp2, float); - HLK_TEST(Log10, float); - HLK_TEST(Log2, float); - HLK_TEST(Frexp, float); - HLK_TEST_DOUBLE(Abs, double); - HLK_TEST_DOUBLE(Sign, double); - - // Float Special - - HLK_TEST(IsFinite, HLSLHalf_t); - HLK_TEST(IsInf, HLSLHalf_t); - HLK_TEST(IsNan, HLSLHalf_t); - HLK_TEST(ModF, HLSLHalf_t); - - HLK_TEST(IsFinite, float); - HLK_TEST(IsInf, float); - HLK_TEST(IsNan, float); - HLK_TEST(ModF, float); - - // Binary Comparison - - HLK_TEST(LessThan, int16_t); - HLK_TEST(LessEqual, int16_t); - HLK_TEST(GreaterThan, int16_t); - HLK_TEST(GreaterEqual, int16_t); - HLK_TEST(Equal, int16_t); - HLK_TEST(NotEqual, int16_t); - HLK_TEST(LessThan, int32_t); - HLK_TEST(LessEqual, int32_t); - HLK_TEST(GreaterThan, int32_t); - HLK_TEST(GreaterEqual, int32_t); - HLK_TEST(Equal, int32_t); - HLK_TEST(NotEqual, int32_t); - HLK_TEST(LessThan, int64_t); - HLK_TEST(LessEqual, int64_t); - HLK_TEST(GreaterThan, int64_t); - HLK_TEST(GreaterEqual, int64_t); - HLK_TEST(Equal, int64_t); - HLK_TEST(NotEqual, int64_t); - HLK_TEST(LessThan, uint16_t); - HLK_TEST(LessEqual, uint16_t); - HLK_TEST(GreaterThan, uint16_t); - HLK_TEST(GreaterEqual, uint16_t); - HLK_TEST(Equal, uint16_t); - HLK_TEST(NotEqual, uint16_t); - HLK_TEST(LessThan, uint32_t); - HLK_TEST(LessEqual, uint32_t); - HLK_TEST(GreaterThan, uint32_t); - HLK_TEST(GreaterEqual, uint32_t); - HLK_TEST(Equal, uint32_t); - HLK_TEST(NotEqual, uint32_t); - HLK_TEST(LessThan, uint64_t); - HLK_TEST(LessEqual, uint64_t); - HLK_TEST(GreaterThan, uint64_t); - HLK_TEST(GreaterEqual, uint64_t); - HLK_TEST(Equal, uint64_t); - HLK_TEST(NotEqual, uint64_t); - HLK_TEST(LessThan, HLSLHalf_t); - HLK_TEST(LessEqual, HLSLHalf_t); - HLK_TEST(GreaterThan, HLSLHalf_t); - HLK_TEST(GreaterEqual, HLSLHalf_t); - HLK_TEST(Equal, HLSLHalf_t); - HLK_TEST(NotEqual, HLSLHalf_t); - HLK_TEST(LessThan, float); - HLK_TEST(LessEqual, float); - HLK_TEST(GreaterThan, float); - HLK_TEST(GreaterEqual, float); - HLK_TEST(Equal, float); - HLK_TEST(NotEqual, float); - HLK_TEST_DOUBLE(LessThan, double); - HLK_TEST_DOUBLE(LessEqual, double); - HLK_TEST_DOUBLE(GreaterThan, double); - HLK_TEST_DOUBLE(GreaterEqual, double); - HLK_TEST_DOUBLE(Equal, double); - HLK_TEST_DOUBLE(NotEqual, double); - - // Binary Logical - - HLK_TEST(Logical_And, HLSLBool_t); - HLK_TEST(Logical_Or, HLSLBool_t); - - // Ternary Logical - HLK_TEST(Select, HLSLBool_t); - HLK_TEST(Select, int16_t); - HLK_TEST(Select, int32_t); - HLK_TEST(Select, int64_t); - HLK_TEST(Select, uint16_t); - HLK_TEST(Select, uint32_t); - HLK_TEST(Select, uint64_t); - HLK_TEST(Select, HLSLHalf_t); - HLK_TEST(Select, float); - HLK_TEST_DOUBLE(Select, double); - - // Reduction - HLK_TEST(Any_Mixed, HLSLBool_t); - HLK_TEST(Any_Zero, HLSLBool_t); - HLK_TEST(Any_NoZero, HLSLBool_t); - HLK_TEST(All_Mixed, HLSLBool_t); - HLK_TEST(All_Zero, HLSLBool_t); - HLK_TEST(All_NoZero, HLSLBool_t); - - HLK_TEST(Any_Mixed, int16_t); - HLK_TEST(Any_Zero, int16_t); - HLK_TEST(Any_NoZero, int16_t); - HLK_TEST(All_Mixed, int16_t); - HLK_TEST(All_Zero, int16_t); - HLK_TEST(All_NoZero, int16_t); - - HLK_TEST(Any_Mixed, int32_t); - HLK_TEST(Any_Zero, int32_t); - HLK_TEST(Any_NoZero, int32_t); - HLK_TEST(All_Mixed, int32_t); - HLK_TEST(All_Zero, int32_t); - HLK_TEST(All_NoZero, int32_t); - - HLK_TEST(Any_Mixed, int64_t); - HLK_TEST(Any_Zero, int64_t); - HLK_TEST(Any_NoZero, int64_t); - HLK_TEST(All_Mixed, int64_t); - HLK_TEST(All_Zero, int64_t); - HLK_TEST(All_NoZero, int64_t); - - HLK_TEST(Dot, HLSLHalf_t); - - HLK_TEST(Dot, float); - - // LoadAndStore - // BAB == Byte Address Buffer - // RDH == Resource Descriptor Heap - // RD == Root Descriptor - // DT == Descriptor Table - // SB == Structured Buffer - - HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLHalf_t); - HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLHalf_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLHalf_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLHalf_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLHalf_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLHalf_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLHalf_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLHalf_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLHalf_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLHalf_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLHalf_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLHalf_t); - - HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLBool_t); - HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLBool_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLBool_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLBool_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLBool_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLBool_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLBool_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLBool_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLBool_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLBool_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLBool_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLBool_t); - - HLK_TEST(LoadAndStore_RDH_BAB_SRV, int16_t); - HLK_TEST(LoadAndStore_RDH_BAB_UAV, int16_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, int16_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, int16_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, int16_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, int16_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, int16_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, int16_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, int16_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, int16_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, int16_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, int16_t); - - HLK_TEST(LoadAndStore_RDH_BAB_SRV, int32_t); - HLK_TEST(LoadAndStore_RDH_BAB_UAV, int32_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, int32_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, int32_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, int32_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, int32_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, int32_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, int32_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, int32_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, int32_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, int32_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, int32_t); - - HLK_TEST(LoadAndStore_RDH_BAB_SRV, int64_t); - HLK_TEST(LoadAndStore_RDH_BAB_UAV, int64_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, int64_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, int64_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, int64_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, int64_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, int64_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, int64_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, int64_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, int64_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, int64_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, int64_t); - - HLK_TEST(LoadAndStore_RDH_BAB_SRV, uint16_t); - HLK_TEST(LoadAndStore_RDH_BAB_UAV, uint16_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, uint16_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, uint16_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, uint16_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, uint16_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, uint16_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, uint16_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, uint16_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, uint16_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, uint16_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, uint16_t); - - HLK_TEST(LoadAndStore_RDH_BAB_UAV, uint32_t); - HLK_TEST(LoadAndStore_RDH_BAB_SRV, uint32_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, uint32_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, uint32_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, uint32_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, uint32_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, uint32_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, uint32_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, uint32_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, uint32_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, uint32_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, uint32_t); - - HLK_TEST(LoadAndStore_RDH_BAB_UAV, uint64_t); - HLK_TEST(LoadAndStore_RDH_BAB_SRV, uint64_t); - HLK_TEST(LoadAndStore_DT_BAB_UAV, uint64_t); - HLK_TEST(LoadAndStore_DT_BAB_SRV, uint64_t); - HLK_TEST(LoadAndStore_RD_BAB_UAV, uint64_t); - HLK_TEST(LoadAndStore_RD_BAB_SRV, uint64_t); - HLK_TEST(LoadAndStore_RDH_SB_UAV, uint64_t); - HLK_TEST(LoadAndStore_RDH_SB_SRV, uint64_t); - HLK_TEST(LoadAndStore_DT_SB_UAV, uint64_t); - HLK_TEST(LoadAndStore_DT_SB_SRV, uint64_t); - HLK_TEST(LoadAndStore_RD_SB_UAV, uint64_t); - HLK_TEST(LoadAndStore_RD_SB_SRV, uint64_t); - - HLK_TEST(LoadAndStore_RDH_BAB_UAV, float); - HLK_TEST(LoadAndStore_RDH_BAB_SRV, float); - HLK_TEST(LoadAndStore_DT_BAB_UAV, float); - HLK_TEST(LoadAndStore_DT_BAB_SRV, float); - HLK_TEST(LoadAndStore_RD_BAB_UAV, float); - HLK_TEST(LoadAndStore_RD_BAB_SRV, float); - HLK_TEST(LoadAndStore_RDH_SB_UAV, float); - HLK_TEST(LoadAndStore_RDH_SB_SRV, float); - HLK_TEST(LoadAndStore_DT_SB_UAV, float); - HLK_TEST(LoadAndStore_DT_SB_SRV, float); - HLK_TEST(LoadAndStore_RD_SB_UAV, float); - HLK_TEST(LoadAndStore_RD_SB_SRV, float); - - HLK_TEST_DOUBLE(LoadAndStore_RDH_BAB_SRV, double); - HLK_TEST_DOUBLE(LoadAndStore_RDH_BAB_UAV, double); - HLK_TEST_DOUBLE(LoadAndStore_DT_BAB_SRV, double); - HLK_TEST_DOUBLE(LoadAndStore_DT_BAB_UAV, double); - HLK_TEST_DOUBLE(LoadAndStore_RD_BAB_SRV, double); - HLK_TEST_DOUBLE(LoadAndStore_RD_BAB_UAV, double); - HLK_TEST_DOUBLE(LoadAndStore_RDH_SB_SRV, double); - HLK_TEST_DOUBLE(LoadAndStore_RDH_SB_UAV, double); - HLK_TEST_DOUBLE(LoadAndStore_DT_SB_SRV, double); - HLK_TEST_DOUBLE(LoadAndStore_DT_SB_UAV, double); - HLK_TEST_DOUBLE(LoadAndStore_RD_SB_SRV, double); - HLK_TEST_DOUBLE(LoadAndStore_RD_SB_UAV, double); - - // Derivative - HLK_TEST(DerivativeDdx, HLSLHalf_t); - HLK_TEST(DerivativeDdy, HLSLHalf_t); - HLK_TEST(DerivativeDdxFine, HLSLHalf_t); - HLK_TEST(DerivativeDdyFine, HLSLHalf_t); - HLK_TEST(DerivativeDdx, float); - HLK_TEST(DerivativeDdy, float); - HLK_TEST(DerivativeDdxFine, float); - HLK_TEST(DerivativeDdyFine, float); - - // Quad - HLK_TEST(QuadReadLaneAt, HLSLBool_t); - HLK_TEST(QuadReadAcrossX, HLSLBool_t); - HLK_TEST(QuadReadAcrossY, HLSLBool_t); - HLK_TEST(QuadReadAcrossDiagonal, HLSLBool_t); - HLK_TEST(QuadReadLaneAt, int16_t); - HLK_TEST(QuadReadAcrossX, int16_t); - HLK_TEST(QuadReadAcrossY, int16_t); - HLK_TEST(QuadReadAcrossDiagonal, int16_t); - HLK_TEST(QuadReadLaneAt, int32_t); - HLK_TEST(QuadReadAcrossX, int32_t); - HLK_TEST(QuadReadAcrossY, int32_t); - HLK_TEST(QuadReadAcrossDiagonal, int32_t); - HLK_TEST(QuadReadLaneAt, int64_t); - HLK_TEST(QuadReadAcrossX, int64_t); - HLK_TEST(QuadReadAcrossY, int64_t); - HLK_TEST(QuadReadAcrossDiagonal, int64_t); - HLK_TEST(QuadReadLaneAt, uint16_t); - HLK_TEST(QuadReadAcrossX, uint16_t); - HLK_TEST(QuadReadAcrossY, uint16_t); - HLK_TEST(QuadReadAcrossDiagonal, uint16_t); - HLK_TEST(QuadReadLaneAt, uint32_t); - HLK_TEST(QuadReadAcrossX, uint32_t); - HLK_TEST(QuadReadAcrossY, uint32_t); - HLK_TEST(QuadReadAcrossDiagonal, uint32_t); - HLK_TEST(QuadReadLaneAt, uint64_t); - HLK_TEST(QuadReadAcrossX, uint64_t); - HLK_TEST(QuadReadAcrossY, uint64_t); - HLK_TEST(QuadReadAcrossDiagonal, uint64_t); - HLK_TEST(QuadReadLaneAt, HLSLHalf_t); - HLK_TEST(QuadReadAcrossX, HLSLHalf_t); - HLK_TEST(QuadReadAcrossY, HLSLHalf_t); - HLK_TEST(QuadReadAcrossDiagonal, HLSLHalf_t); - HLK_TEST(QuadReadLaneAt, float); - HLK_TEST(QuadReadAcrossX, float); - HLK_TEST(QuadReadAcrossY, float); - HLK_TEST(QuadReadAcrossDiagonal, float); - HLK_TEST_DOUBLE(QuadReadLaneAt, double); - HLK_TEST_DOUBLE(QuadReadAcrossX, double); - HLK_TEST_DOUBLE(QuadReadAcrossY, double); - HLK_TEST_DOUBLE(QuadReadAcrossDiagonal, double); - - // Wave - - HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLBool_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLBool_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLBool_t); - HLK_WAVEOP_TEST(WaveMatch, HLSLBool_t); - - HLK_WAVEOP_TEST(WaveActiveSum, int16_t); - HLK_WAVEOP_TEST(WaveActiveMin, int16_t); - HLK_WAVEOP_TEST(WaveActiveMax, int16_t); - HLK_WAVEOP_TEST(WaveActiveProduct, int16_t); - HLK_WAVEOP_TEST(WaveActiveAllEqual, int16_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, int16_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, int16_t); - HLK_WAVEOP_TEST(WavePrefixSum, int16_t); - HLK_WAVEOP_TEST(WavePrefixProduct, int16_t); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, int16_t); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, int16_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, int16_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, int16_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, int16_t); - HLK_WAVEOP_TEST(WaveMatch, int16_t); - HLK_WAVEOP_TEST(WaveActiveSum, int32_t); - HLK_WAVEOP_TEST(WaveActiveMin, int32_t); - HLK_WAVEOP_TEST(WaveActiveMax, int32_t); - HLK_WAVEOP_TEST(WaveActiveProduct, int32_t); - HLK_WAVEOP_TEST(WaveActiveAllEqual, int32_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, int32_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, int32_t); - HLK_WAVEOP_TEST(WavePrefixSum, int32_t); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, int32_t); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, int32_t); - HLK_WAVEOP_TEST(WavePrefixProduct, int32_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, int32_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, int32_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, int32_t); - HLK_WAVEOP_TEST(WaveMatch, int32_t); - HLK_WAVEOP_TEST(WaveActiveSum, int64_t); - HLK_WAVEOP_TEST(WaveActiveMin, int64_t); - HLK_WAVEOP_TEST(WaveActiveMax, int64_t); - HLK_WAVEOP_TEST(WaveActiveProduct, int64_t); - HLK_WAVEOP_TEST(WaveActiveAllEqual, int64_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, int64_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, int64_t); - HLK_WAVEOP_TEST(WavePrefixSum, int64_t); - HLK_WAVEOP_TEST(WavePrefixProduct, int64_t); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, int64_t); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, int64_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, int64_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, int64_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, int64_t); - HLK_WAVEOP_TEST(WaveMatch, int64_t); - - // Note: WaveActiveBit* ops don't support uint16_t in HLSL - // But the WaveMultiPrefixBit ops support all int and uint types - HLK_WAVEOP_TEST(WaveActiveSum, uint16_t); - HLK_WAVEOP_TEST(WaveActiveMin, uint16_t); - HLK_WAVEOP_TEST(WaveActiveMax, uint16_t); - HLK_WAVEOP_TEST(WaveActiveProduct, uint16_t); - HLK_WAVEOP_TEST(WaveActiveAllEqual, uint16_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, uint16_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, uint16_t); - HLK_WAVEOP_TEST(WavePrefixSum, uint16_t); - HLK_WAVEOP_TEST(WavePrefixProduct, uint16_t); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, uint16_t); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, uint16_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, uint16_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, uint16_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, uint16_t); - HLK_WAVEOP_TEST(WaveMatch, uint16_t); - HLK_WAVEOP_TEST(WaveActiveSum, uint32_t); - HLK_WAVEOP_TEST(WaveActiveMin, uint32_t); - HLK_WAVEOP_TEST(WaveActiveMax, uint32_t); - HLK_WAVEOP_TEST(WaveActiveProduct, uint32_t); - HLK_WAVEOP_TEST(WaveActiveBitAnd, uint32_t); - HLK_WAVEOP_TEST(WaveActiveBitOr, uint32_t); - HLK_WAVEOP_TEST(WaveActiveBitXor, uint32_t); - HLK_WAVEOP_TEST(WaveActiveAllEqual, uint32_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, uint32_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, uint32_t); - HLK_WAVEOP_TEST(WavePrefixSum, uint32_t); - HLK_WAVEOP_TEST(WavePrefixProduct, uint32_t); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, uint32_t); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, uint32_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, uint32_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, uint32_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, uint32_t); - HLK_WAVEOP_TEST(WaveMatch, uint32_t); - HLK_WAVEOP_TEST(WaveActiveSum, uint64_t); - HLK_WAVEOP_TEST(WaveActiveMin, uint64_t); - HLK_WAVEOP_TEST(WaveActiveMax, uint64_t); - HLK_WAVEOP_TEST(WaveActiveProduct, uint64_t); - HLK_WAVEOP_TEST(WaveActiveBitAnd, uint64_t); - HLK_WAVEOP_TEST(WaveActiveBitOr, uint64_t); - HLK_WAVEOP_TEST(WaveActiveBitXor, uint64_t); - HLK_WAVEOP_TEST(WaveActiveAllEqual, uint64_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, uint64_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, uint64_t); - HLK_WAVEOP_TEST(WavePrefixSum, uint64_t); - HLK_WAVEOP_TEST(WavePrefixProduct, uint64_t); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, uint64_t); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, uint64_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, uint64_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, uint64_t); - HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, uint64_t); - HLK_WAVEOP_TEST(WaveMatch, uint64_t); - - HLK_WAVEOP_TEST(WaveActiveSum, HLSLHalf_t); - HLK_WAVEOP_TEST(WaveActiveMin, HLSLHalf_t); - HLK_WAVEOP_TEST(WaveActiveMax, HLSLHalf_t); - HLK_WAVEOP_TEST(WaveActiveProduct, HLSLHalf_t); - HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLHalf_t); - HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLHalf_t); - HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLHalf_t); - HLK_WAVEOP_TEST(WavePrefixSum, HLSLHalf_t); - HLK_WAVEOP_TEST(WavePrefixProduct, HLSLHalf_t); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLHalf_t); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLHalf_t); - HLK_WAVEOP_TEST(WaveMatch, HLSLHalf_t); - HLK_WAVEOP_TEST(WaveActiveSum, float); - HLK_WAVEOP_TEST(WaveActiveMin, float); - HLK_WAVEOP_TEST(WaveActiveMax, float); - HLK_WAVEOP_TEST(WaveActiveProduct, float); - HLK_WAVEOP_TEST(WaveActiveAllEqual, float); - HLK_WAVEOP_TEST(WaveReadLaneAt, float); - HLK_WAVEOP_TEST(WaveReadLaneFirst, float); - HLK_WAVEOP_TEST(WavePrefixSum, float); - HLK_WAVEOP_TEST(WavePrefixProduct, float); - HLK_WAVEOP_TEST(WaveMultiPrefixSum, float); - HLK_WAVEOP_TEST(WaveMultiPrefixProduct, float); - HLK_WAVEOP_TEST(WaveMatch, float); - HLK_WAVEOP_TEST_DOUBLE(WaveActiveSum, double); - HLK_WAVEOP_TEST_DOUBLE(WaveActiveMin, double); - HLK_WAVEOP_TEST_DOUBLE(WaveActiveMax, double); - HLK_WAVEOP_TEST_DOUBLE(WaveActiveProduct, double); - HLK_WAVEOP_TEST_DOUBLE(WaveActiveAllEqual, double); - HLK_WAVEOP_TEST_DOUBLE(WaveReadLaneAt, double); - HLK_WAVEOP_TEST_DOUBLE(WaveReadLaneFirst, double); - HLK_WAVEOP_TEST_DOUBLE(WavePrefixSum, double); - HLK_WAVEOP_TEST_DOUBLE(WavePrefixProduct, double); - HLK_WAVEOP_TEST_DOUBLE(WaveMultiPrefixSum, double); - HLK_WAVEOP_TEST_DOUBLE(WaveMultiPrefixProduct, double); - HLK_WAVEOP_TEST_DOUBLE(WaveMatch, double); - -private: - bool Initialized = false; - std::optional D3D12SDK; - bool VerboseLogging = false; - size_t OverrideInputSize = 0; - UINT OverrideWaveLaneCount = 0; - CComPtr D3DDevice; -}; diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 940dde39b1..6208032bf1 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -1692,7 +1692,7 @@ def get_extended_table_opcode_enum_decls(): # since there can be pre-release versions that are higher # than the last released version highest_major = 6 -highest_minor = 9 +highest_minor = 10 highest_shader_models = {4: 1, 5: 1, 6: highest_minor} # fetch the last released version from latest-released.json