From 594d5e8e048f009a2395195a6a7ad3fa07918251 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Tue, 27 Jan 2026 10:47:09 -0700 Subject: [PATCH] [SM6.10] Update DXIL ops for hlsl-specs/769 Updates the DXIL ops to reflect the shape required by https://github.com/microsoft/hlsl-specs/pull/769 HL ops are not updated as they are blocked until the HL LinAlgMatrix type is available --- docs/DXIL.rst | 10 +- include/dxc/DXIL/DxilConstants.h | 31 ++- include/dxc/DXIL/DxilInstructions.h | 210 +++++++++----------- include/dxc/HlslIntrinsicOp.h | 39 ++-- lib/DXIL/DxilOperations.cpp | 267 +++++++++++++------------- lib/HLSL/HLOperationLower.cpp | 2 - utils/hct/gen_intrin_main.txt | 2 - utils/hct/hctdb.py | 188 +++++++----------- utils/hct/hlsl_intrinsic_opcodes.json | 39 ++-- 9 files changed, 344 insertions(+), 444 deletions(-) diff --git a/docs/DXIL.rst b/docs/DXIL.rst index a5f0536c4e..a67bb715d0 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -3076,7 +3076,7 @@ ID Name Description 2147483656 RayQuery_CandidateTriangleObjectPosition returns candidate triangle vertices in object space as <9 x float> 2147483657 RayQuery_CommittedTriangleObjectPosition returns committed triangle vertices in object space as <9 x float> 2147483658 HitObject_TriangleObjectPosition returns triangle vertices in object space as <9 x float> -2147483659 CreateMatrix creates a handle to a Matrix +2147483659 LinAlgMatrixReserved0 reserved 2147483660 FillMatrix fills a matrix with a scalar value 2147483661 CopyConvertMatrix Converts and copies the element and use type of the source matrix to the destination matrix with optional transpose 2147483662 MatrixLoadFromDescriptor fills a matrix with data from a [RW]ByteAddressBuffer @@ -3094,10 +3094,10 @@ ID Name Description 2147483674 MatrixVecMulAdd Multiplies a MxK dimension matrix and a K sized input vector then adds a M sized bias vector 2147483675 MatrixAccumulateToDescriptor accumulates a matrix to a RWByteAddressBuffer 2147483676 MatrixAccumulateToMemory accumulates a matrix to groupshared memory -2147483677 MatrixOuterProduct Outer products an M sized vector and a K sized vector producing an MxK matrix -2147483678 LinAlgMatrixReserved0 reserved -2147483679 LinAlgMatrixReserved1 reserved -2147483680 LinAlgMatrixReserved2 reserved +2147483677 MatrixOuterProduct Outer products an M sized vector and a N sized vector producing an MxN matrix +2147483678 LinAlgMatrixReserved1 reserved +2147483679 LinAlgMatrixReserved2 reserved +2147483680 LinAlgMatrixReserved3 reserved ========== ======================================== =================================================================================================================== diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 8d44e58487..ffbaad4f6a 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -512,9 +512,10 @@ static const OpCodeTableID TableID = OpCodeTableID::ExperimentalOps; // Enumeration for ExperimentalOps DXIL operations enum class OpCode : unsigned { // - LinAlgMatrixReserved0 = 30, // reserved - LinAlgMatrixReserved1 = 31, // reserved - LinAlgMatrixReserved2 = 32, // reserved + LinAlgMatrixReserved0 = 11, // reserved + LinAlgMatrixReserved1 = 30, // reserved + LinAlgMatrixReserved2 = 31, // reserved + LinAlgMatrixReserved3 = 32, // reserved // Group Wave Ops GetGroupWaveCount = 2, // returns the number of waves in the thread group @@ -532,7 +533,6 @@ enum class OpCode : unsigned { CopyConvertMatrix = 13, // Converts and copies the element and use type of the source matrix // to the destination matrix with optional transpose - CreateMatrix = 11, // creates a handle to a Matrix FillMatrix = 12, // fills a matrix with a scalar value MatrixAccumulate = 24, // accumulate A or B matrix into Accumulator matrix // following LHS += RHS @@ -552,8 +552,8 @@ enum class OpCode : unsigned { 15, // fills a matrix with data from a groupshared array MatrixMulOp = 23, // applies a multiplication op to matrix C using A and B as parameters - MatrixOuterProduct = 29, // Outer products an M sized vector and a K sized - // vector producing an MxK matrix + MatrixOuterProduct = 29, // Outer products an M sized vector and a N sized + // vector producing an MxN matrix MatrixQueryAccumulatorLayout = 22, // returns comptime 0 when accumulator // matrix are A layout, 1 when B layout MatrixSetElement = 19, // sets the element of the matrix corresponding to the @@ -1242,8 +1242,8 @@ enum class OpCode : unsigned { EXP_OPCODE(ExperimentalOps, HitObject_TriangleObjectPosition), // returns triangle vertices in // object space as <9 x float> - // CreateMatrix = 0x8000000B, 2147483659U, -2147483637 - EXP_OPCODE(ExperimentalOps, CreateMatrix), // creates a handle to a Matrix + // LinAlgMatrixReserved0 = 0x8000000B, 2147483659U, -2147483637 + EXP_OPCODE(ExperimentalOps, LinAlgMatrixReserved0), // reserved // FillMatrix = 0x8000000C, 2147483660U, -2147483636 EXP_OPCODE(ExperimentalOps, FillMatrix), // fills a matrix with a scalar value // CopyConvertMatrix = 0x8000000D, 2147483661U, -2147483635 @@ -1316,14 +1316,14 @@ enum class OpCode : unsigned { MatrixAccumulateToMemory), // accumulates a matrix to groupshared memory // MatrixOuterProduct = 0x8000001D, 2147483677U, -2147483619 EXP_OPCODE(ExperimentalOps, - MatrixOuterProduct), // Outer products an M sized vector and a K - // sized vector producing an MxK matrix - // LinAlgMatrixReserved0 = 0x8000001E, 2147483678U, -2147483618 - EXP_OPCODE(ExperimentalOps, LinAlgMatrixReserved0), // reserved - // LinAlgMatrixReserved1 = 0x8000001F, 2147483679U, -2147483617 + MatrixOuterProduct), // Outer products an M sized vector and a N + // sized vector producing an MxN matrix + // LinAlgMatrixReserved1 = 0x8000001E, 2147483678U, -2147483618 EXP_OPCODE(ExperimentalOps, LinAlgMatrixReserved1), // reserved - // LinAlgMatrixReserved2 = 0x80000020, 2147483680U, -2147483616 + // LinAlgMatrixReserved2 = 0x8000001F, 2147483679U, -2147483617 EXP_OPCODE(ExperimentalOps, LinAlgMatrixReserved2), // reserved + // LinAlgMatrixReserved3 = 0x80000020, 2147483680U, -2147483616 + EXP_OPCODE(ExperimentalOps, LinAlgMatrixReserved3), // reserved }; // OPCODE-ENUM:END #undef EXP_OPCODE @@ -1480,7 +1480,6 @@ enum class OpCodeClass : unsigned { // Linear Algebra Operations CopyConvertMatrix, - CreateMatrix, FillMatrix, MatVecMul, MatVecMulAdd, @@ -1688,7 +1687,7 @@ enum class OpCodeClass : unsigned { NodeOutputIsValid, OutputComplete, - NumOpClasses = 223, // exclusive last value of enumeration + NumOpClasses = 222, // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 4646a0b872..b7737ff292 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10500,26 +10500,6 @@ struct DxilInst_HitObject_TriangleObjectPosition { void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } }; -/// This instruction creates a handle to a Matrix -struct DxilInst_CreateMatrix { - llvm::Instruction *Instr; - // Construction and identification - DxilInst_CreateMatrix(llvm::Instruction *pInstr) : Instr(pInstr) {} - operator bool() const { - return hlsl::OP::IsDxilOpFuncCallInst(Instr, - hlsl::OP::OpCode::CreateMatrix); - } - // Validation support - bool isAllowed() const { return true; } - bool isArgumentListValid() const { - if (1 != llvm::dyn_cast(Instr)->getNumArgOperands()) - return false; - return true; - } - // Metadata - bool requiresUniformInputs() const { return false; } -}; - /// This instruction fills a matrix with a scalar value struct DxilInst_FillMatrix { llvm::Instruction *Instr; @@ -10531,7 +10511,7 @@ struct DxilInst_FillMatrix { // Validation support bool isAllowed() const { return true; } bool isArgumentListValid() const { - if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; return true; } @@ -10539,14 +10519,11 @@ struct DxilInst_FillMatrix { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, - arg_value = 2, + arg_value = 1, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_value() const { return Instr->getOperand(2); } - void set_value(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_value() const { return Instr->getOperand(1); } + void set_value(llvm::Value *val) { Instr->setOperand(1, val); } }; /// This instruction Converts and copies the element and use type of the source @@ -10562,7 +10539,7 @@ struct DxilInst_CopyConvertMatrix { // Validation support bool isAllowed() const { return true; } bool isArgumentListValid() const { - if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; return true; } @@ -10570,17 +10547,14 @@ struct DxilInst_CopyConvertMatrix { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_destMatrixRef = 1, - arg_srcMatrixRef = 2, - arg_transpose = 3, + arg_srcMatrix = 1, + arg_transpose = 2, }; // Accessors - llvm::Value *get_destMatrixRef() const { return Instr->getOperand(1); } - void set_destMatrixRef(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_srcMatrixRef() const { return Instr->getOperand(2); } - void set_srcMatrixRef(llvm::Value *val) { Instr->setOperand(2, val); } - llvm::Value *get_transpose() const { return Instr->getOperand(3); } - void set_transpose(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_srcMatrix() const { return Instr->getOperand(1); } + void set_srcMatrix(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_transpose() const { return Instr->getOperand(2); } + void set_transpose(llvm::Value *val) { Instr->setOperand(2, val); } }; /// This instruction fills a matrix with data from a [RW]ByteAddressBuffer @@ -10596,7 +10570,7 @@ struct DxilInst_MatrixLoadFromDescriptor { // Validation support bool isAllowed() const { return true; } bool isArgumentListValid() const { - if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) + if (5 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; return true; } @@ -10604,23 +10578,20 @@ struct DxilInst_MatrixLoadFromDescriptor { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, - arg_handle = 2, - arg_offset = 3, - arg_stride = 4, - arg_layout = 5, + arg_handle = 1, + arg_offset = 2, + arg_stride = 3, + arg_layout = 4, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_handle() const { return Instr->getOperand(2); } - void set_handle(llvm::Value *val) { Instr->setOperand(2, val); } - llvm::Value *get_offset() const { return Instr->getOperand(3); } - void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } - llvm::Value *get_stride() const { return Instr->getOperand(4); } - void set_stride(llvm::Value *val) { Instr->setOperand(4, val); } - llvm::Value *get_layout() const { return Instr->getOperand(5); } - void set_layout(llvm::Value *val) { Instr->setOperand(5, val); } + llvm::Value *get_handle() const { return Instr->getOperand(1); } + void set_handle(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_offset() const { return Instr->getOperand(2); } + void set_offset(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_stride() const { return Instr->getOperand(3); } + void set_stride(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_layout() const { return Instr->getOperand(4); } + void set_layout(llvm::Value *val) { Instr->setOperand(4, val); } }; /// This instruction fills a matrix with data from a groupshared array @@ -10635,7 +10606,7 @@ struct DxilInst_MatrixLoadFromMemory { // Validation support bool isAllowed() const { return true; } bool isArgumentListValid() const { - if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) + if (5 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; return true; } @@ -10643,23 +10614,20 @@ struct DxilInst_MatrixLoadFromMemory { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, - arg_groupsharedArr = 2, - arg_offset = 3, - arg_stride = 4, - arg_layout = 5, + arg_groupsharedArr = 1, + arg_offset = 2, + arg_stride = 3, + arg_layout = 4, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_groupsharedArr() const { return Instr->getOperand(2); } - void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(2, val); } - llvm::Value *get_offset() const { return Instr->getOperand(3); } - void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } - llvm::Value *get_stride() const { return Instr->getOperand(4); } - void set_stride(llvm::Value *val) { Instr->setOperand(4, val); } - llvm::Value *get_layout() const { return Instr->getOperand(5); } - void set_layout(llvm::Value *val) { Instr->setOperand(5, val); } + llvm::Value *get_groupsharedArr() const { return Instr->getOperand(1); } + void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_offset() const { return Instr->getOperand(2); } + void set_offset(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_stride() const { return Instr->getOperand(3); } + void set_stride(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_layout() const { return Instr->getOperand(4); } + void set_layout(llvm::Value *val) { Instr->setOperand(4, val); } }; /// This instruction returns the number of elements stored in thread-local @@ -10683,11 +10651,11 @@ struct DxilInst_MatrixLength { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, + arg_matrix = 1, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrix() const { return Instr->getOperand(1); } + void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } }; /// This instruction returns a two element vector containing the column and row @@ -10711,12 +10679,12 @@ struct DxilInst_MatrixGetCoordinate { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, + arg_matrix = 1, arg_threadLocalIndex = 2, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrix() const { return Instr->getOperand(1); } + void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_threadLocalIndex() const { return Instr->getOperand(2); } void set_threadLocalIndex(llvm::Value *val) { Instr->setOperand(2, val); } }; @@ -10742,12 +10710,12 @@ struct DxilInst_MatrixGetElement { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, + arg_matrix = 1, arg_threadLocalIndex = 2, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrix() const { return Instr->getOperand(1); } + void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_threadLocalIndex() const { return Instr->getOperand(2); } void set_threadLocalIndex(llvm::Value *val) { Instr->setOperand(2, val); } }; @@ -10773,13 +10741,13 @@ struct DxilInst_MatrixSetElement { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, + arg_matrix = 1, arg_threadLocalIndex = 2, arg_value = 3, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrix() const { return Instr->getOperand(1); } + void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_threadLocalIndex() const { return Instr->getOperand(2); } void set_threadLocalIndex(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_value() const { return Instr->getOperand(3); } @@ -10806,15 +10774,15 @@ struct DxilInst_MatrixStoreToDescriptor { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, + arg_matrix = 1, arg_handle = 2, arg_offset = 3, arg_stride = 4, arg_layout = 5, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrix() const { return Instr->getOperand(1); } + void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_handle() const { return Instr->getOperand(2); } void set_handle(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_offset() const { return Instr->getOperand(3); } @@ -10845,15 +10813,15 @@ struct DxilInst_MatrixStoreToMemory { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, + arg_matrix = 1, arg_groupsharedArr = 2, arg_offset = 3, arg_stride = 4, arg_layout = 5, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrix() const { return Instr->getOperand(1); } + void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_groupsharedArr() const { return Instr->getOperand(2); } void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_offset() const { return Instr->getOperand(3); } @@ -10898,7 +10866,7 @@ struct DxilInst_MatrixMulOp { // Validation support bool isAllowed() const { return true; } bool isArgumentListValid() const { - if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; return true; } @@ -10906,17 +10874,14 @@ struct DxilInst_MatrixMulOp { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRefA = 1, - arg_matrixRefB = 2, - arg_matrixRefC = 3, + arg_matrixA = 1, + arg_matrixB = 2, }; // Accessors - llvm::Value *get_matrixRefA() const { return Instr->getOperand(1); } - void set_matrixRefA(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_matrixRefB() const { return Instr->getOperand(2); } - void set_matrixRefB(llvm::Value *val) { Instr->setOperand(2, val); } - llvm::Value *get_matrixRefC() const { return Instr->getOperand(3); } - void set_matrixRefC(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_matrixA() const { return Instr->getOperand(1); } + void set_matrixA(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrixB() const { return Instr->getOperand(2); } + void set_matrixB(llvm::Value *val) { Instr->setOperand(2, val); } }; /// This instruction accumulate A or B matrix into Accumulator matrix following @@ -10940,14 +10905,14 @@ struct DxilInst_MatrixAccumulate { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRefRHS = 1, - arg_matrixRefLHS = 2, + arg_matrixLHS = 1, + arg_matrixRHS = 2, }; // Accessors - llvm::Value *get_matrixRefRHS() const { return Instr->getOperand(1); } - void set_matrixRefRHS(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_matrixRefLHS() const { return Instr->getOperand(2); } - void set_matrixRefLHS(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_matrixLHS() const { return Instr->getOperand(1); } + void set_matrixLHS(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrixRHS() const { return Instr->getOperand(2); } + void set_matrixRHS(llvm::Value *val) { Instr->setOperand(2, val); } }; /// This instruction Multiplies a MxK dimension matrix and a K sized input @@ -10971,13 +10936,13 @@ struct DxilInst_MatrixVecMul { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, + arg_matrix = 1, arg_inputVector = 2, arg_interpretation = 3, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrix() const { return Instr->getOperand(1); } + void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_inputVector() const { return Instr->getOperand(2); } void set_inputVector(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_interpretation() const { return Instr->getOperand(3); } @@ -11005,15 +10970,15 @@ struct DxilInst_MatrixVecMulAdd { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, + arg_matrix = 1, arg_inputVector = 2, arg_inputInterpretation = 3, arg_biasVector = 4, arg_biasInterpretation = 5, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrix() const { return Instr->getOperand(1); } + void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_inputVector() const { return Instr->getOperand(2); } void set_inputVector(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_inputInterpretation() const { return Instr->getOperand(3); } @@ -11045,15 +11010,15 @@ struct DxilInst_MatrixAccumulateToDescriptor { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, + arg_matrix = 1, arg_handle = 2, arg_offset = 3, arg_stride = 4, arg_layout = 5, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_matrix() const { return Instr->getOperand(1); } + void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_handle() const { return Instr->getOperand(2); } void set_handle(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_offset() const { return Instr->getOperand(3); } @@ -11104,8 +11069,8 @@ struct DxilInst_MatrixAccumulateToMemory { void set_layout(llvm::Value *val) { Instr->setOperand(5, val); } }; -/// This instruction Outer products an M sized vector and a K sized vector -/// producing an MxK matrix +/// This instruction Outer products an M sized vector and a N sized vector +/// producing an MxN matrix struct DxilInst_MatrixOuterProduct { llvm::Instruction *Instr; // Construction and identification @@ -11117,7 +11082,7 @@ struct DxilInst_MatrixOuterProduct { // Validation support bool isAllowed() const { return true; } bool isArgumentListValid() const { - if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; return true; } @@ -11125,17 +11090,14 @@ struct DxilInst_MatrixOuterProduct { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_matrixRef = 1, - arg_vectorA = 2, - arg_vectorB = 3, + arg_vectorA = 1, + arg_vectorB = 2, }; // Accessors - llvm::Value *get_matrixRef() const { return Instr->getOperand(1); } - void set_matrixRef(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_vectorA() const { return Instr->getOperand(2); } - void set_vectorA(llvm::Value *val) { Instr->setOperand(2, val); } - llvm::Value *get_vectorB() const { return Instr->getOperand(3); } - void set_vectorB(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_vectorA() const { return Instr->getOperand(1); } + void set_vectorA(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_vectorB() const { return Instr->getOperand(2); } + void set_vectorB(llvm::Value *val) { Instr->setOperand(2, val); } }; // INSTR-HELPER:END } // namespace hlsl diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index 3eb605fe84..66d04eb109 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -112,25 +112,24 @@ enum class IntrinsicOp { IOP_WorldToObject3x4 = 100, IOP_WorldToObject4x3 = 101, IOP___builtin_LinAlg_CopyConvertMatrix = 405, - IOP___builtin_LinAlg_CreateMatrix = 406, - IOP___builtin_LinAlg_FillMatrix = 407, - IOP___builtin_LinAlg_MatrixAccumulate = 416, - IOP___builtin_LinAlg_MatrixAccumulateToDescriptor = 420, - IOP___builtin_LinAlg_MatrixAccumulateToMemory = 421, - IOP___builtin_LinAlg_MatrixGetCoordinate = 408, - IOP___builtin_LinAlg_MatrixGetElement = 409, - IOP___builtin_LinAlg_MatrixLength = 410, - IOP___builtin_LinAlg_MatrixLoadFromDescriptor = 411, - IOP___builtin_LinAlg_MatrixLoadFromMemory = 412, - IOP___builtin_LinAlg_MatrixMatrixMultiply = 417, - IOP___builtin_LinAlg_MatrixMatrixMultiplyAccumulate = 418, - IOP___builtin_LinAlg_MatrixOuterProduct = 422, - IOP___builtin_LinAlg_MatrixQueryAccumulatorLayout = 419, - IOP___builtin_LinAlg_MatrixSetElement = 413, - IOP___builtin_LinAlg_MatrixStoreToDescriptor = 414, - IOP___builtin_LinAlg_MatrixStoreToMemory = 415, - IOP___builtin_LinAlg_MatrixVectorMultiply = 423, - IOP___builtin_LinAlg_MatrixVectorMultiplyAdd = 424, + IOP___builtin_LinAlg_FillMatrix = 406, + IOP___builtin_LinAlg_MatrixAccumulate = 415, + IOP___builtin_LinAlg_MatrixAccumulateToDescriptor = 419, + IOP___builtin_LinAlg_MatrixAccumulateToMemory = 420, + IOP___builtin_LinAlg_MatrixGetCoordinate = 407, + IOP___builtin_LinAlg_MatrixGetElement = 408, + IOP___builtin_LinAlg_MatrixLength = 409, + IOP___builtin_LinAlg_MatrixLoadFromDescriptor = 410, + IOP___builtin_LinAlg_MatrixLoadFromMemory = 411, + IOP___builtin_LinAlg_MatrixMatrixMultiply = 416, + IOP___builtin_LinAlg_MatrixMatrixMultiplyAccumulate = 417, + IOP___builtin_LinAlg_MatrixOuterProduct = 421, + IOP___builtin_LinAlg_MatrixQueryAccumulatorLayout = 418, + IOP___builtin_LinAlg_MatrixSetElement = 412, + IOP___builtin_LinAlg_MatrixStoreToDescriptor = 413, + IOP___builtin_LinAlg_MatrixStoreToMemory = 414, + IOP___builtin_LinAlg_MatrixVectorMultiply = 422, + IOP___builtin_LinAlg_MatrixVectorMultiplyAdd = 423, IOP___builtin_MatVecMul = 390, IOP___builtin_MatVecMulAdd = 391, IOP___builtin_OuterProductAccumulate = 392, @@ -431,7 +430,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 425, + Num_Intrinsics = 424, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index deb000e703..909f64b860 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2823,95 +2823,96 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { {{0x2}}, {{0x0}}}, // Overloads: f - // Linear Algebra Operations - {OC::CreateMatrix, - "CreateMatrix", - OCC::CreateMatrix, - "createMatrix", + {OC::LinAlgMatrixReserved0, + "LinAlgMatrixReserved0", + OCC::Reserved, + "reserved", Attribute::None, 0, {}, {}}, // Overloads: v + + // Linear Algebra Operations {OC::FillMatrix, "FillMatrix", OCC::FillMatrix, "fillMatrix", Attribute::None, - 1, - {{0x63}}, - {{0x0}}}, // Overloads: hfwi + 2, + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::CopyConvertMatrix, "CopyConvertMatrix", OCC::CopyConvertMatrix, "copyConvertMatrix", Attribute::None, - 0, - {}, - {}}, // Overloads: v + 2, + {{0x200}, {0x200}}, + {{0x0}, {0x0}}}, // Overloads: o,o {OC::MatrixLoadFromDescriptor, "MatrixLoadFromDescriptor", OCC::MatrixLoadFromDescriptor, "matrixLoadFromDescriptor", Attribute::None, - 0, - {}, - {}}, // Overloads: v + 1, + {{0x200}}, + {{0x0}}}, // Overloads: o {OC::MatrixLoadFromMemory, "MatrixLoadFromMemory", OCC::MatrixLoadFromMemory, "matrixLoadFromMemory", Attribute::None, - 0, - {}, - {}}, // Overloads: v + 2, + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::MatrixLength, "MatrixLength", OCC::MatrixLength, "matrixLength", Attribute::None, - 0, - {}, - {}}, // Overloads: v + 1, + {{0x200}}, + {{0x0}}}, // Overloads: o {OC::MatrixGetCoordinate, "MatrixGetCoordinate", OCC::MatrixGetCoordinate, "matrixGetCoordinate", Attribute::None, - 0, - {}, - {}}, // Overloads: v + 1, + {{0x200}}, + {{0x0}}}, // Overloads: o {OC::MatrixGetElement, "MatrixGetElement", OCC::MatrixGetElement, "matrixGetElement", Attribute::None, - 1, - {{0x63}}, - {{0x0}}}, // Overloads: hfwi + 2, + {{0x63}, {0x200}}, + {{0x0}, {0x0}}}, // Overloads: hfwi,o {OC::MatrixSetElement, "MatrixSetElement", OCC::MatrixSetElement, "matrixSetElement", Attribute::None, - 1, - {{0x63}}, - {{0x0}}}, // Overloads: hfwi + 3, + {{0x200}, {0x200}, {0x63}}, + {{0x0}, {0x0}, {0x0}}}, // Overloads: o,o,hfwi {OC::MatrixStoreToDescriptor, "MatrixStoreToDescriptor", OCC::MatrixStoreToDescriptor, "matrixStoreToDescriptor", Attribute::None, - 0, - {}, - {}}, // Overloads: v + 1, + {{0x200}}, + {{0x0}}}, // Overloads: o {OC::MatrixStoreToMemory, "MatrixStoreToMemory", OCC::MatrixStoreToMemory, "matrixStoreToMemory", Attribute::None, - 0, - {}, - {}}, // Overloads: v + 2, + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::MatrixQueryAccumulatorLayout, "MatrixQueryAccumulatorLayout", OCC::MatrixQueryAccumulatorLayout, @@ -2925,76 +2926,76 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { OCC::MatrixMulOp, "matrixMulOp", Attribute::None, - 0, - {}, - {}}, // Overloads: v + 3, + {{0x200}, {0x200}, {0x200}}, + {{0x0}, {0x0}, {0x0}}}, // Overloads: o,o,o {OC::MatrixAccumulate, "MatrixAccumulate", OCC::MatrixAccumulate, "matrixAccumulate", Attribute::None, - 0, - {}, - {}}, // Overloads: v + 3, + {{0x200}, {0x200}, {0x200}}, + {{0x0}, {0x0}, {0x0}}}, // Overloads: o,o,o {OC::MatrixVecMul, "MatrixVecMul", OCC::MatrixVecMul, "matrixVecMul", Attribute::None, - 2, - {{0x400}, {0x400}}, - {{0x63}, {0x63}}}, // Overloads: getNumParams() <= 2) return nullptr; return FT->getParamType(2); @@ -6860,6 +6856,9 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::VectorReduceAnd: case OpCode::VectorReduceOr: case OpCode::FDot: + case OpCode::MatrixLength: + case OpCode::MatrixStoreToDescriptor: + case OpCode::MatrixAccumulateToDescriptor: if (FT->getNumParams() <= 1) return nullptr; return FT->getParamType(1); @@ -6871,7 +6870,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::CalculateLOD: case OpCode::ReportHit: case OpCode::HitObject_FromRayQueryWithAttrs: - case OpCode::MatrixSetElement: if (FT->getNumParams() <= 3) return nullptr; return FT->getParamType(3); @@ -6982,22 +6980,11 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::GetGroupWaveIndex: case OpCode::GetGroupWaveCount: case OpCode::ClusterID: - case OpCode::CreateMatrix: - case OpCode::CopyConvertMatrix: - case OpCode::MatrixLoadFromDescriptor: - case OpCode::MatrixLoadFromMemory: - case OpCode::MatrixLength: - case OpCode::MatrixGetCoordinate: - case OpCode::MatrixStoreToDescriptor: - case OpCode::MatrixStoreToMemory: - case OpCode::MatrixQueryAccumulatorLayout: - case OpCode::MatrixMulOp: - case OpCode::MatrixAccumulate: - case OpCode::MatrixAccumulateToDescriptor: - case OpCode::MatrixAccumulateToMemory: case OpCode::LinAlgMatrixReserved0: + case OpCode::MatrixQueryAccumulatorLayout: case OpCode::LinAlgMatrixReserved1: case OpCode::LinAlgMatrixReserved2: + case OpCode::LinAlgMatrixReserved3: return Type::getVoidTy(Ctx); case OpCode::QuadVote: return IntegerType::get(Ctx, 1); @@ -7018,7 +7005,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::SampleCmpLevel: case OpCode::SampleCmpGrad: case OpCode::SampleCmpBias: - case OpCode::RawBufferVectorLoad: { + case OpCode::RawBufferVectorLoad: + case OpCode::MatrixGetCoordinate: { StructType *ST = cast(Ty); return ST->getElementType(0); } @@ -7030,29 +7018,44 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { return cast(Ty)->getElementType(); case OpCode::MatVecMul: case OpCode::MatVecMulAdd: + case OpCode::FillMatrix: + case OpCode::CopyConvertMatrix: + case OpCode::MatrixLoadFromMemory: + case OpCode::MatrixGetElement: if (FT->getNumParams() < 2) return nullptr; return llvm::StructType::get(Ctx, {FT->getReturnType(), FT->getParamType(1)}); case OpCode::OuterProductAccumulate: + case OpCode::MatrixStoreToMemory: + case OpCode::MatrixAccumulateToMemory: if (FT->getNumParams() < 3) return nullptr; return llvm::StructType::get(Ctx, {FT->getParamType(1), FT->getParamType(2)}); + case OpCode::MatrixSetElement: + if (FT->getNumParams() < 4) + return nullptr; + return llvm::StructType::get( + Ctx, {FT->getReturnType(), FT->getParamType(1), FT->getParamType(3)}); + + case OpCode::MatrixMulOp: + case OpCode::MatrixAccumulate: case OpCode::MatrixVecMul: - case OpCode::MatrixVecMulAdd: + case OpCode::MatrixOuterProduct: if (FT->getNumParams() < 3) return nullptr; - return llvm::StructType::get(Ctx, - {FT->getReturnType(), FT->getParamType(2)}); + return llvm::StructType::get( + Ctx, {FT->getReturnType(), FT->getParamType(1), FT->getParamType(2)}); - case OpCode::MatrixOuterProduct: - if (FT->getNumParams() < 4) + case OpCode::MatrixVecMulAdd: + if (FT->getNumParams() < 5) return nullptr; return llvm::StructType::get(Ctx, - {FT->getParamType(2), FT->getParamType(3)}); + {FT->getReturnType(), FT->getParamType(1), + FT->getParamType(2), FT->getParamType(4)}); // OPCODE-OLOAD-TYPES:END default: diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 2d6df1fc32..124a576206 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7659,8 +7659,6 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_CopyConvertMatrix, EmptyLower, DXIL::OpCode::CopyConvertMatrix}, - {IntrinsicOp::IOP___builtin_LinAlg_CreateMatrix, EmptyLower, - DXIL::OpCode::CreateMatrix}, {IntrinsicOp::IOP___builtin_LinAlg_FillMatrix, EmptyLower, DXIL::OpCode::FillMatrix}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixGetCoordinate, EmptyLower, diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 55085f7216..98b0a29318 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -396,10 +396,8 @@ void [[min_sm=6.10]] __builtin_VectorAccumulate(in LinAlg InputVector, in RWB // LinAlg intrinsics -// TODO: Update return type for CreateMatrix to MatrixRef // TODO: Replace all int MatrixRef with MatrixRef type // TODO: Replace all int GroupSharedMem with groupshared memory -void [[min_sm=6.10]] __builtin_LinAlg_CreateMatrix(); void [[min_sm=6.10]] __builtin_LinAlg_FillMatrix(int MatrixRef, numeric value); void [[min_sm=6.10]] __builtin_LinAlg_CopyConvertMatrix(int MatrixRefDest, int MatrixRefSrc, bool transpose); void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(int MatrixRef, resource buf, int32_only offset, int32_only stride, int32_only layout); diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 61b13c72bb..8e0fe9d75a 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1161,7 +1161,7 @@ def populate_categories_and_models_ExperimentalOps(self): # Thread/Wave/ThreadGroup scope operations for i in insts( - "CreateMatrix,MatrixQueryAccumulatorLayout," + "MatrixQueryAccumulatorLayout," + "MatrixLoadFromDescriptor,MatrixAccumulateToDescriptor," + "MatrixVecMul,MatrixVecMulAdd,MatrixOuterProduct" ): @@ -6334,31 +6334,17 @@ def populate_ExperimentalOps(self): ) # Linear Algebra Ops - add_dxil_op( - "CreateMatrix", - "CreateMatrix", - "creates a handle to a Matrix", - "v", - "", - [ - db_dxil_param( - 0, "i32", "", "operation result" - ), # TODO: %dx.types.MatrixRef - ], - ) + op_table.reserve_dxil_op_range("LinAlgMatrixReserved", 1) add_dxil_op( "FillMatrix", "FillMatrix", "fills a matrix with a scalar value", - "hfwi", + "o,hfwi", "", [ - db_dxil_param(0, "v", "", ""), - db_dxil_param( - 2, "i32", "matrixRef", "matrix to be filled" - ), # TODO: %dx.types.MatrixRef - db_dxil_param(3, "$o", "value", "value to fill matrix with"), + db_dxil_param(0, "$x0", "", "resulting matrix"), + db_dxil_param(2, "$x1", "value", "value to fill matrix with"), ], ) @@ -6366,17 +6352,12 @@ def populate_ExperimentalOps(self): "CopyConvertMatrix", "CopyConvertMatrix", "Converts and copies the element and use type of the source matrix to the destination matrix with optional transpose", - "v", + "o,o", "", [ - db_dxil_param(0, "v", "", ""), - db_dxil_param( - 2, "i32", "destMatrixRef", "matrix to be filled" - ), # TODO: %dx.types.MatrixRef - db_dxil_param( - 3, "i32", "srcMatrixRef", "matrix to fill matrix with" - ), # TODO: %dx.types.MatrixRef - db_dxil_param(4, "i1", "transpose", "should the matrix be transposed"), + db_dxil_param(0, "$x0", "", "resulting matrix"), + db_dxil_param(2, "$x1", "srcMatrix", "matrix to copy copy from"), + db_dxil_param(3, "i1", "transpose", "should the matrix be transposed"), ], ) @@ -6384,24 +6365,21 @@ def populate_ExperimentalOps(self): "MatrixLoadFromDescriptor", "MatrixLoadFromDescriptor", "fills a matrix with data from a [RW]ByteAddressBuffer", - "v", + "o", "", [ - db_dxil_param(0, "v", "", ""), - db_dxil_param( - 2, "i32", "matrixRef", "matrix to be filled" - ), # TODO: %dx.types.MatrixRef + db_dxil_param(0, "$o", "", "resulting matrix"), db_dxil_param( - 3, "res", "handle", "byte address buffer to fill matrix with" + 2, "res", "handle", "byte address buffer to fill matrix with" ), - db_dxil_param(4, "i32", "offset", "starting offset in the buffer"), + db_dxil_param(3, "i32", "offset", "starting offset in the buffer"), db_dxil_param( - 5, + 4, "i32", "stride", "number of bytes between the start of each row or column", ), - db_dxil_param(6, "i32", "layout", "memory layout of matrix elements"), + db_dxil_param(5, "i32", "layout", "memory layout of matrix elements"), ], ) @@ -6409,25 +6387,22 @@ def populate_ExperimentalOps(self): "MatrixLoadFromMemory", "MatrixLoadFromMemory", "fills a matrix with data from a groupshared array", - "v", # TODO: overload needs to be updated + "o,hfwi", # TODO: needs to be updated for groupshared "", [ - db_dxil_param(0, "v", "", ""), - db_dxil_param( - 2, "i32", "matrixRef", "matrix to be filled" - ), # TODO: %dx.types.MatrixRef + db_dxil_param(0, "$x0", "", "resulting matrix"), # TODO: [Ty] * addrspace(4), ; groupshared T[M * N] db_dxil_param( - 3, "i32", "groupsharedArr", "groupshared array to fill matrix with" + 2, "$x1", "groupsharedArr", "groupshared array to fill matrix with" ), - db_dxil_param(4, "i32", "offset", "starting offset in the array"), + db_dxil_param(3, "i32", "offset", "starting offset in the array"), db_dxil_param( - 5, + 4, "i32", "stride", "number of bytes between the start of each row or column", ), - db_dxil_param(6, "i32", "layout", "memory layout of matrix elements"), + db_dxil_param(5, "i32", "layout", "memory layout of matrix elements"), ], ) @@ -6435,13 +6410,11 @@ def populate_ExperimentalOps(self): "MatrixLength", "MatrixLength", "returns the number of elements stored in thread-local storage on the active thread for the provided matrix", - "v", + "o", "", [ db_dxil_param(0, "i32", "", "operation result"), - db_dxil_param( - 2, "i32", "matrixRef", "matrix to be examined" - ), # TODO: %dx.types.MatrixRef + db_dxil_param(2, "$o", "matrix", "matrix to be examined"), ], ) @@ -6449,13 +6422,13 @@ def populate_ExperimentalOps(self): "MatrixGetCoordinate", "MatrixGetCoordinate", "returns a two element vector containing the column and row of the matrix that the thread-local index corresponds to", - "v", + "o", "", [ - db_dxil_param(0, "i32", "", "operation result"), # TODO: <2 x i32> db_dxil_param( - 2, "i32", "matrixRef", "matrix to be examined" - ), # TODO: %dx.types.MatrixRef + 0, "$vec4", "", "operation result" + ), # TODO: this should be <2 x i32> + db_dxil_param(2, "$o", "matrix", "matrix to be examined"), db_dxil_param( 3, "i32", "threadLocalIndex", "thread-local index to be examined" ), @@ -6466,13 +6439,11 @@ def populate_ExperimentalOps(self): "MatrixGetElement", "MatrixGetElement", "returns the element of the matrix corresponding to the provided thread-local index", - "hfwi", + "hfwi,o", "", [ - db_dxil_param(0, "$o", "", "operation result"), - db_dxil_param( - 2, "i32", "matrixRef", "matrix to be examined" - ), # TODO: %dx.types.MatrixRef + db_dxil_param(0, "$x0", "", "operation result"), + db_dxil_param(2, "$x1", "matrix", "matrix to be examined"), db_dxil_param( 3, "i32", "threadLocalIndex", "thread-local index to be examined" ), @@ -6483,17 +6454,15 @@ def populate_ExperimentalOps(self): "MatrixSetElement", "MatrixSetElement", "sets the element of the matrix corresponding to the provided thread-local index", - "hfwi", + "o,o,hfwi", "", [ - db_dxil_param(0, "v", "", ""), - db_dxil_param( - 2, "i32", "matrixRef", "matrix to be examined" - ), # TODO: %dx.types.MatrixRef + db_dxil_param(0, "$x0", "", "resulting matrix"), + db_dxil_param(2, "$x1", "matrix", "matrix to be examined"), db_dxil_param( 3, "i32", "threadLocalIndex", "thread-local index to be examined" ), - db_dxil_param(4, "$o", "value", "value to set"), + db_dxil_param(4, "$x2", "value", "value to set"), ], ) @@ -6501,13 +6470,11 @@ def populate_ExperimentalOps(self): "MatrixStoreToDescriptor", "MatrixStoreToDescriptor", "stores a matrix to a RWByteAddressBuffer", - "v", + "o", "", [ db_dxil_param(0, "v", "", ""), - db_dxil_param( - 2, "i32", "matrixRef", "matrix to be stored" - ), # TODO: %dx.types.MatrixRef + db_dxil_param(2, "$o", "matrix", "matrix to be stored"), db_dxil_param(3, "res", "handle", "byte address buffer to store into"), db_dxil_param(4, "i32", "offset", "starting offset in the buffer"), db_dxil_param( @@ -6524,16 +6491,14 @@ def populate_ExperimentalOps(self): "MatrixStoreToMemory", "MatrixStoreToMemory", "stores a matrix to groupshared memory", - "v", # TODO: overload needs to be updated + "o,hfwi", # TODO: needs to be updated for groupshared "", [ db_dxil_param(0, "v", "", ""), - db_dxil_param( - 2, "i32", "matrixRef", "matrix to be stored" - ), # TODO: %dx.types.MatrixRef + db_dxil_param(2, "$x0", "matrix", "matrix to be stored"), # TODO: [Ty] * addrspace(4), ; groupshared T[M * N] db_dxil_param( - 3, "i32", "groupsharedArr", "groupshared array to store into" + 3, "$x1", "groupsharedArr", "groupshared array to store into" ), db_dxil_param(4, "i32", "offset", "starting offset in the array"), db_dxil_param( @@ -6561,19 +6526,12 @@ def populate_ExperimentalOps(self): "MatrixMulOp", "MatrixMulOp", "applies a multiplication op to matrix C using A and B as parameters", - "v", + "o,o,o", "", [ - db_dxil_param(0, "v", "", ""), - db_dxil_param( - 2, "i32", "matrixRefA", "matrix A" - ), # TODO: %dx.types.MatrixRef - db_dxil_param( - 3, "i32", "matrixRefB", "matrix B" - ), # TODO: %dx.types.MatrixRef - db_dxil_param( - 4, "i32", "matrixRefC", "matrix C" - ), # TODO: %dx.types.MatrixRef + db_dxil_param(0, "$x0", "", "resulting matrix"), + db_dxil_param(2, "$x1", "matrixA", "A matrix"), + db_dxil_param(3, "$x2", "matrixB", "B matrix"), ], ) @@ -6581,16 +6539,12 @@ def populate_ExperimentalOps(self): "MatrixAccumulate", "MatrixAccumulate", "accumulate A or B matrix into Accumulator matrix following LHS += RHS", - "v", + "o,o,o", "", [ - db_dxil_param(0, "v", "", ""), - db_dxil_param( - 2, "i32", "matrixRefRHS", "A or B matrix" - ), # TODO: %dx.types.MatrixRef - db_dxil_param( - 3, "i32", "matrixRefLHS", "Accumulator matrix" - ), # TODO: %dx.types.MatrixRef + db_dxil_param(0, "$x0", "", "resulting matrix"), + db_dxil_param(2, "$x1", "matrixLHS", "Accumulator matrix"), + db_dxil_param(3, "$x2", "matrixRHS", "A or B matrix"), ], ) @@ -6598,14 +6552,12 @@ def populate_ExperimentalOps(self): "MatrixVecMul", "MatrixVecMul", "Multiplies a MxK dimension matrix and a K sized input vector", - "