From 2870d8947fcbc9c0b4005ec22ed36eabb135b0be Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Mon, 15 Dec 2025 15:59:09 +0100 Subject: [PATCH 1/3] [mlir][amdgpu] Expose waitcnt bitpacking infra --- llvm/include/llvm/TargetParser/TargetParser.h | 186 ++++++++++++++++++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 180 ----------------- .../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 53 ++--- 3 files changed, 197 insertions(+), 222 deletions(-) diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h index 9dfa50c1ad1ba..263f4036d9b2f 100644 --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -192,6 +192,192 @@ LLVM_ABI IsaVersion getIsaVersion(StringRef GPU); /// default target features with entries overridden by \p Features. LLVM_ABI std::pair fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, StringMap &Features); + +/// Represents the counter values to wait for in an s_waitcnt instruction. +/// +/// Large values (including the maximum possible integer) can be used to +/// represent "don't care" waits. +struct Waitcnt { + unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12. + unsigned ExpCnt = ~0u; + unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12. + unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11. + unsigned SampleCnt = ~0u; // gfx12+ only. + unsigned BvhCnt = ~0u; // gfx12+ only. + unsigned KmCnt = ~0u; // gfx12+ only. + unsigned XCnt = ~0u; // gfx1250. + + Waitcnt() = default; + // Pre-gfx12 constructor. + Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) + : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {} + + // gfx12+ constructor. + Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, + unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt) + : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt), + SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {} + + bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); } + + bool hasWaitExceptStoreCnt() const { + return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u || + SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u; + } + + bool hasWaitStoreCnt() const { return StoreCnt != ~0u; } + + Waitcnt combined(const Waitcnt &Other) const { + // Does the right thing provided self and Other are either both pre-gfx12 + // or both gfx12+. + return Waitcnt( + std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt), + std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt), + std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt), + std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt)); + } + + friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait); +}; + +// The following methods are only meaningful on targets that support +// S_WAITCNT. + +/// \returns Vmcnt bit mask for given isa \p Version. +LLVM_ABI unsigned getVmcntBitMask(const IsaVersion &Version); + +/// \returns Expcnt bit mask for given isa \p Version. +LLVM_ABI unsigned getExpcntBitMask(const IsaVersion &Version); + +/// \returns Lgkmcnt bit mask for given isa \p Version. +LLVM_ABI unsigned getLgkmcntBitMask(const IsaVersion &Version); + +/// \returns Waitcnt bit mask for given isa \p Version. +LLVM_ABI unsigned getWaitcntBitMask(const IsaVersion &Version); + +/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. +LLVM_ABI unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); + +/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. +LLVM_ABI unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); + +/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. +LLVM_ABI unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); + +/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa +/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and +/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction +/// which needs it is deprecated +/// +/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: +/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) +/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) +/// \p Vmcnt = \p Waitcnt[15:10] (gfx11) +/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) +/// \p Expcnt = \p Waitcnt[2:0] (gfx11) +/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) +/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) +/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11) +/// +LLVM_ABI void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned &Vmcnt, unsigned &Expcnt, + unsigned &Lgkmcnt); + +LLVM_ABI Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); + +/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. +LLVM_ABI unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned Vmcnt); + +/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. +LLVM_ABI unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned Expcnt); + +/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. +LLVM_ABI unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned Lgkmcnt); + +/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa +/// \p Version. Should not be used on gfx12+, the instruction which needs +/// it is deprecated +/// +/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: +/// Waitcnt[2:0] = \p Expcnt (gfx11+) +/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) +/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) +/// Waitcnt[6:4] = \p Expcnt (pre-gfx11) +/// Waitcnt[9:4] = \p Lgkmcnt (gfx11) +/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) +/// Waitcnt[13:8] = \p Lgkmcnt (gfx10) +/// Waitcnt[15:10] = \p Vmcnt (gfx11) +/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) +/// +/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given +/// isa \p Version. +/// +LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, + unsigned Expcnt, unsigned Lgkmcnt); + +LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version, + const Waitcnt &Decoded); + +// The following methods are only meaningful on targets that support +// S_WAIT_*CNT, introduced with gfx12. + +/// \returns Loadcnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support LOADcnt +LLVM_ABI unsigned getLoadcntBitMask(const IsaVersion &Version); + +/// \returns Samplecnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support SAMPLEcnt +LLVM_ABI unsigned getSamplecntBitMask(const IsaVersion &Version); + +/// \returns Bvhcnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support BVHcnt +LLVM_ABI unsigned getBvhcntBitMask(const IsaVersion &Version); + +/// \returns Dscnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support DScnt +LLVM_ABI unsigned getDscntBitMask(const IsaVersion &Version); + +/// \returns Dscnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support KMcnt +LLVM_ABI unsigned getKmcntBitMask(const IsaVersion &Version); + +/// \returns Xcnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support Xcnt. +LLVM_ABI unsigned getXcntBitMask(const IsaVersion &Version); + +/// \return STOREcnt or VScnt bit mask for given isa \p Version. +/// returns 0 for versions that do not support STOREcnt or VScnt. +/// STOREcnt and VScnt are the same counter, the name used +/// depends on the ISA version. +LLVM_ABI unsigned getStorecntBitMask(const IsaVersion &Version); + +// The following are only meaningful on targets that support +// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT. + +/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given +/// isa \p Version. +LLVM_ABI Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, + unsigned LoadcntDscnt); + +/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given +/// isa \p Version. +LLVM_ABI Waitcnt decodeStorecntDscnt(const IsaVersion &Version, + unsigned StorecntDscnt); + +/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an +/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa +/// \p Version. +LLVM_ABI unsigned encodeLoadcntDscnt(const IsaVersion &Version, + const Waitcnt &Decoded); + +/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an +/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa +/// \p Version. +LLVM_ABI unsigned encodeStorecntDscnt(const IsaVersion &Version, + const Waitcnt &Decoded); } // namespace AMDGPU struct BasicSubtargetFeatureKV { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 75db58a292c13..2022a3d32b5da 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1076,186 +1076,6 @@ getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size); /// Checks if \p Val is inside \p MD, a !range-like metadata. bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val); -/// Represents the counter values to wait for in an s_waitcnt instruction. -/// -/// Large values (including the maximum possible integer) can be used to -/// represent "don't care" waits. -struct Waitcnt { - unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12. - unsigned ExpCnt = ~0u; - unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12. - unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11. - unsigned SampleCnt = ~0u; // gfx12+ only. - unsigned BvhCnt = ~0u; // gfx12+ only. - unsigned KmCnt = ~0u; // gfx12+ only. - unsigned XCnt = ~0u; // gfx1250. - - Waitcnt() = default; - // Pre-gfx12 constructor. - Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) - : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {} - - // gfx12+ constructor. - Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, - unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt) - : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt), - SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {} - - bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); } - - bool hasWaitExceptStoreCnt() const { - return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u || - SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u; - } - - bool hasWaitStoreCnt() const { return StoreCnt != ~0u; } - - Waitcnt combined(const Waitcnt &Other) const { - // Does the right thing provided self and Other are either both pre-gfx12 - // or both gfx12+. - return Waitcnt( - std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt), - std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt), - std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt), - std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt)); - } - - friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait); -}; - -// The following methods are only meaningful on targets that support -// S_WAITCNT. - -/// \returns Vmcnt bit mask for given isa \p Version. -unsigned getVmcntBitMask(const IsaVersion &Version); - -/// \returns Expcnt bit mask for given isa \p Version. -unsigned getExpcntBitMask(const IsaVersion &Version); - -/// \returns Lgkmcnt bit mask for given isa \p Version. -unsigned getLgkmcntBitMask(const IsaVersion &Version); - -/// \returns Waitcnt bit mask for given isa \p Version. -unsigned getWaitcntBitMask(const IsaVersion &Version); - -/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); - -/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); - -/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); - -/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa -/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and -/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction -/// which needs it is deprecated -/// -/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: -/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) -/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) -/// \p Vmcnt = \p Waitcnt[15:10] (gfx11) -/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) -/// \p Expcnt = \p Waitcnt[2:0] (gfx11) -/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) -/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) -/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11) -/// -void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, - unsigned &Expcnt, unsigned &Lgkmcnt); - -Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); - -/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. -unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, - unsigned Vmcnt); - -/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. -unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, - unsigned Expcnt); - -/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. -unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, - unsigned Lgkmcnt); - -/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa -/// \p Version. Should not be used on gfx12+, the instruction which needs -/// it is deprecated -/// -/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: -/// Waitcnt[2:0] = \p Expcnt (gfx11+) -/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) -/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) -/// Waitcnt[6:4] = \p Expcnt (pre-gfx11) -/// Waitcnt[9:4] = \p Lgkmcnt (gfx11) -/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) -/// Waitcnt[13:8] = \p Lgkmcnt (gfx10) -/// Waitcnt[15:10] = \p Vmcnt (gfx11) -/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) -/// -/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given -/// isa \p Version. -/// -unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, - unsigned Expcnt, unsigned Lgkmcnt); - -unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); - -// The following methods are only meaningful on targets that support -// S_WAIT_*CNT, introduced with gfx12. - -/// \returns Loadcnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support LOADcnt -unsigned getLoadcntBitMask(const IsaVersion &Version); - -/// \returns Samplecnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support SAMPLEcnt -unsigned getSamplecntBitMask(const IsaVersion &Version); - -/// \returns Bvhcnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support BVHcnt -unsigned getBvhcntBitMask(const IsaVersion &Version); - -/// \returns Dscnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support DScnt -unsigned getDscntBitMask(const IsaVersion &Version); - -/// \returns Dscnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support KMcnt -unsigned getKmcntBitMask(const IsaVersion &Version); - -/// \returns Xcnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support Xcnt. -unsigned getXcntBitMask(const IsaVersion &Version); - -/// \return STOREcnt or VScnt bit mask for given isa \p Version. -/// returns 0 for versions that do not support STOREcnt or VScnt. -/// STOREcnt and VScnt are the same counter, the name used -/// depends on the ISA version. -unsigned getStorecntBitMask(const IsaVersion &Version); - -// The following are only meaningful on targets that support -// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT. - -/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given -/// isa \p Version. -Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt); - -/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given -/// isa \p Version. -Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt); - -/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an -/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa -/// \p Version. -unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); - -/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an -/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa -/// \p Version. -unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); - namespace Hwreg { using HwregId = EncodingField<5, 0>; diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index a5e1cf18e415e..939f7776e970a 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/TargetParser/TargetParser.h" #include namespace mlir { @@ -435,50 +436,18 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern { } }; -// TODO: AMDGPU backend already have all this bitpacking logic, we should move -// it to some common place. -/// Vmcnt, Expcnt and Lgkmcnt are decoded as follows: -/// Vmcnt = Waitcnt[3:0] (pre-gfx9) -/// Vmcnt = Waitcnt[15:14,3:0] (gfx9,10) -/// Vmcnt = Waitcnt[15:10] (gfx11) -/// Expcnt = Waitcnt[6:4] (pre-gfx11) -/// Expcnt = Waitcnt[2:0] (gfx11) -/// Lgkmcnt = Waitcnt[11:8] (pre-gfx10) -/// Lgkmcnt = Waitcnt[13:8] (gfx10) -/// Lgkmcnt = Waitcnt[9:4] (gfx11) static FailureOr encodeWaitcnt(Chipset chipset, unsigned vmcnt, unsigned expcnt, unsigned lgkmcnt) { - if (chipset.majorVersion < 9) { - vmcnt = std::min(15u, vmcnt); - expcnt = std::min(7u, expcnt); - lgkmcnt = std::min(15u, lgkmcnt); - return vmcnt | (expcnt << 4) | (lgkmcnt << 8); - } - if (chipset.majorVersion == 9) { - vmcnt = std::min(63u, vmcnt); - expcnt = std::min(7u, expcnt); - lgkmcnt = std::min(15u, lgkmcnt); - unsigned lowBits = vmcnt & 0xF; - unsigned highBits = (vmcnt >> 4) << 14; - unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8); - return lowBits | highBits | otherCnts; - } - if (chipset.majorVersion == 10) { - vmcnt = std::min(63u, vmcnt); - expcnt = std::min(7u, expcnt); - lgkmcnt = std::min(63u, lgkmcnt); - unsigned lowBits = vmcnt & 0xF; - unsigned highBits = (vmcnt >> 4) << 14; - unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8); - return lowBits | highBits | otherCnts; - } - if (chipset.majorVersion == 11) { - vmcnt = std::min(63u, vmcnt); - expcnt = std::min(7u, expcnt); - lgkmcnt = std::min(63u, lgkmcnt); - return (vmcnt << 10) | expcnt | (lgkmcnt << 4); - } - return failure(); + if (chipset.majorVersion >= 12) + return failure(); + + llvm::AMDGPU::IsaVersion isaVersion{ + chipset.majorVersion, chipset.minorVersion, chipset.steppingVersion}; + vmcnt = std::min(vmcnt, llvm::AMDGPU::getVmcntBitMask(isaVersion)); + expcnt = std::min(expcnt, llvm::AMDGPU::getExpcntBitMask(isaVersion)); + lgkmcnt = std::min(lgkmcnt, llvm::AMDGPU::getLgkmcntBitMask(isaVersion)); + + return llvm::AMDGPU::encodeWaitcnt(isaVersion, vmcnt, expcnt, lgkmcnt); } struct MemoryCounterWaitOpLowering From 007a852d39ed0afdd158867414c3e4c2d1006b33 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Tue, 16 Dec 2025 12:21:05 +0100 Subject: [PATCH 2/3] dedicated header --- llvm/include/llvm/Support/AMDGPUWaitcnt.h | 209 ++++++++++++++++++ llvm/include/llvm/TargetParser/TargetParser.h | 186 ---------------- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 1 + .../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 2 +- 4 files changed, 211 insertions(+), 187 deletions(-) create mode 100644 llvm/include/llvm/Support/AMDGPUWaitcnt.h diff --git a/llvm/include/llvm/Support/AMDGPUWaitcnt.h b/llvm/include/llvm/Support/AMDGPUWaitcnt.h new file mode 100644 index 0000000000000..a2243e5723813 --- /dev/null +++ b/llvm/include/llvm/Support/AMDGPUWaitcnt.h @@ -0,0 +1,209 @@ +//===---------------- AMDGPUWaitcnt.h ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// AMDGPU waitcnt support infrastructure +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_AMDGPUWAITCNT_H +#define LLVM_SUPPORT_AMDGPUWAITCNT_H + +#include "llvm/TargetParser/TargetParser.h" // IsaVersion + +namespace llvm { +namespace AMDGPU { +/// Represents the counter values to wait for in an s_waitcnt instruction. +/// +/// Large values (including the maximum possible integer) can be used to +/// represent "don't care" waits. +struct LLVM_ABI Waitcnt { + unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12. + unsigned ExpCnt = ~0u; + unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12. + unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11. + unsigned SampleCnt = ~0u; // gfx12+ only. + unsigned BvhCnt = ~0u; // gfx12+ only. + unsigned KmCnt = ~0u; // gfx12+ only. + unsigned XCnt = ~0u; // gfx1250. + + Waitcnt() = default; + // Pre-gfx12 constructor. + Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) + : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {} + + // gfx12+ constructor. + Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, + unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt) + : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt), + SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {} + + bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); } + + bool hasWaitExceptStoreCnt() const { + return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u || + SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u; + } + + bool hasWaitStoreCnt() const { return StoreCnt != ~0u; } + + Waitcnt combined(const Waitcnt &Other) const { + // Does the right thing provided self and Other are either both pre-gfx12 + // or both gfx12+. + return Waitcnt( + std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt), + std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt), + std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt), + std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt)); + } + + friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait); +}; + +// The following methods are only meaningful on targets that support +// S_WAITCNT. + +/// \returns Vmcnt bit mask for given isa \p Version. +LLVM_ABI unsigned getVmcntBitMask(const IsaVersion &Version); + +/// \returns Expcnt bit mask for given isa \p Version. +LLVM_ABI unsigned getExpcntBitMask(const IsaVersion &Version); + +/// \returns Lgkmcnt bit mask for given isa \p Version. +LLVM_ABI unsigned getLgkmcntBitMask(const IsaVersion &Version); + +/// \returns Waitcnt bit mask for given isa \p Version. +LLVM_ABI unsigned getWaitcntBitMask(const IsaVersion &Version); + +/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. +LLVM_ABI unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); + +/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. +LLVM_ABI unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); + +/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. +LLVM_ABI unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); + +/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa +/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and +/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction +/// which needs it is deprecated +/// +/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: +/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) +/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) +/// \p Vmcnt = \p Waitcnt[15:10] (gfx11) +/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) +/// \p Expcnt = \p Waitcnt[2:0] (gfx11) +/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) +/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) +/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11) +/// +LLVM_ABI void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned &Vmcnt, unsigned &Expcnt, + unsigned &Lgkmcnt); + +LLVM_ABI Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); + +/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. +LLVM_ABI unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned Vmcnt); + +/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. +LLVM_ABI unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned Expcnt); + +/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. +LLVM_ABI unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned Lgkmcnt); + +/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa +/// \p Version. Should not be used on gfx12+, the instruction which needs +/// it is deprecated +/// +/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: +/// Waitcnt[2:0] = \p Expcnt (gfx11+) +/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) +/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) +/// Waitcnt[6:4] = \p Expcnt (pre-gfx11) +/// Waitcnt[9:4] = \p Lgkmcnt (gfx11) +/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) +/// Waitcnt[13:8] = \p Lgkmcnt (gfx10) +/// Waitcnt[15:10] = \p Vmcnt (gfx11) +/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) +/// +/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given +/// isa \p Version. +/// +LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, + unsigned Expcnt, unsigned Lgkmcnt); + +LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version, + const Waitcnt &Decoded); + +// The following methods are only meaningful on targets that support +// S_WAIT_*CNT, introduced with gfx12. + +/// \returns Loadcnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support LOADcnt +LLVM_ABI unsigned getLoadcntBitMask(const IsaVersion &Version); + +/// \returns Samplecnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support SAMPLEcnt +LLVM_ABI unsigned getSamplecntBitMask(const IsaVersion &Version); + +/// \returns Bvhcnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support BVHcnt +LLVM_ABI unsigned getBvhcntBitMask(const IsaVersion &Version); + +/// \returns Dscnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support DScnt +LLVM_ABI unsigned getDscntBitMask(const IsaVersion &Version); + +/// \returns Dscnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support KMcnt +LLVM_ABI unsigned getKmcntBitMask(const IsaVersion &Version); + +/// \returns Xcnt bit mask for given isa \p Version. +/// Returns 0 for versions that do not support Xcnt. +LLVM_ABI unsigned getXcntBitMask(const IsaVersion &Version); + +/// \return STOREcnt or VScnt bit mask for given isa \p Version. +/// returns 0 for versions that do not support STOREcnt or VScnt. +/// STOREcnt and VScnt are the same counter, the name used +/// depends on the ISA version. +LLVM_ABI unsigned getStorecntBitMask(const IsaVersion &Version); + +// The following are only meaningful on targets that support +// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT. + +/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given +/// isa \p Version. +LLVM_ABI Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, + unsigned LoadcntDscnt); + +/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given +/// isa \p Version. +LLVM_ABI Waitcnt decodeStorecntDscnt(const IsaVersion &Version, + unsigned StorecntDscnt); + +/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an +/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa +/// \p Version. +LLVM_ABI unsigned encodeLoadcntDscnt(const IsaVersion &Version, + const Waitcnt &Decoded); + +/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an +/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa +/// \p Version. +LLVM_ABI unsigned encodeStorecntDscnt(const IsaVersion &Version, + const Waitcnt &Decoded); +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_SUPPORT_AMDGPUWAITCNT_H diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h index 263f4036d9b2f..9dfa50c1ad1ba 100644 --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -192,192 +192,6 @@ LLVM_ABI IsaVersion getIsaVersion(StringRef GPU); /// default target features with entries overridden by \p Features. LLVM_ABI std::pair fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, StringMap &Features); - -/// Represents the counter values to wait for in an s_waitcnt instruction. -/// -/// Large values (including the maximum possible integer) can be used to -/// represent "don't care" waits. -struct Waitcnt { - unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12. - unsigned ExpCnt = ~0u; - unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12. - unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11. - unsigned SampleCnt = ~0u; // gfx12+ only. - unsigned BvhCnt = ~0u; // gfx12+ only. - unsigned KmCnt = ~0u; // gfx12+ only. - unsigned XCnt = ~0u; // gfx1250. - - Waitcnt() = default; - // Pre-gfx12 constructor. - Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) - : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {} - - // gfx12+ constructor. - Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, - unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt) - : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt), - SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {} - - bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); } - - bool hasWaitExceptStoreCnt() const { - return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u || - SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u; - } - - bool hasWaitStoreCnt() const { return StoreCnt != ~0u; } - - Waitcnt combined(const Waitcnt &Other) const { - // Does the right thing provided self and Other are either both pre-gfx12 - // or both gfx12+. - return Waitcnt( - std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt), - std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt), - std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt), - std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt)); - } - - friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait); -}; - -// The following methods are only meaningful on targets that support -// S_WAITCNT. - -/// \returns Vmcnt bit mask for given isa \p Version. -LLVM_ABI unsigned getVmcntBitMask(const IsaVersion &Version); - -/// \returns Expcnt bit mask for given isa \p Version. -LLVM_ABI unsigned getExpcntBitMask(const IsaVersion &Version); - -/// \returns Lgkmcnt bit mask for given isa \p Version. -LLVM_ABI unsigned getLgkmcntBitMask(const IsaVersion &Version); - -/// \returns Waitcnt bit mask for given isa \p Version. -LLVM_ABI unsigned getWaitcntBitMask(const IsaVersion &Version); - -/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. -LLVM_ABI unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); - -/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. -LLVM_ABI unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); - -/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. -LLVM_ABI unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); - -/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa -/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and -/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction -/// which needs it is deprecated -/// -/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: -/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) -/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) -/// \p Vmcnt = \p Waitcnt[15:10] (gfx11) -/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) -/// \p Expcnt = \p Waitcnt[2:0] (gfx11) -/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) -/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) -/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11) -/// -LLVM_ABI void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, - unsigned &Vmcnt, unsigned &Expcnt, - unsigned &Lgkmcnt); - -LLVM_ABI Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); - -/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. -LLVM_ABI unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, - unsigned Vmcnt); - -/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. -LLVM_ABI unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, - unsigned Expcnt); - -/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. -LLVM_ABI unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, - unsigned Lgkmcnt); - -/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa -/// \p Version. Should not be used on gfx12+, the instruction which needs -/// it is deprecated -/// -/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: -/// Waitcnt[2:0] = \p Expcnt (gfx11+) -/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) -/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) -/// Waitcnt[6:4] = \p Expcnt (pre-gfx11) -/// Waitcnt[9:4] = \p Lgkmcnt (gfx11) -/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) -/// Waitcnt[13:8] = \p Lgkmcnt (gfx10) -/// Waitcnt[15:10] = \p Vmcnt (gfx11) -/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) -/// -/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given -/// isa \p Version. -/// -LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, - unsigned Expcnt, unsigned Lgkmcnt); - -LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version, - const Waitcnt &Decoded); - -// The following methods are only meaningful on targets that support -// S_WAIT_*CNT, introduced with gfx12. - -/// \returns Loadcnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support LOADcnt -LLVM_ABI unsigned getLoadcntBitMask(const IsaVersion &Version); - -/// \returns Samplecnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support SAMPLEcnt -LLVM_ABI unsigned getSamplecntBitMask(const IsaVersion &Version); - -/// \returns Bvhcnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support BVHcnt -LLVM_ABI unsigned getBvhcntBitMask(const IsaVersion &Version); - -/// \returns Dscnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support DScnt -LLVM_ABI unsigned getDscntBitMask(const IsaVersion &Version); - -/// \returns Dscnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support KMcnt -LLVM_ABI unsigned getKmcntBitMask(const IsaVersion &Version); - -/// \returns Xcnt bit mask for given isa \p Version. -/// Returns 0 for versions that do not support Xcnt. -LLVM_ABI unsigned getXcntBitMask(const IsaVersion &Version); - -/// \return STOREcnt or VScnt bit mask for given isa \p Version. -/// returns 0 for versions that do not support STOREcnt or VScnt. -/// STOREcnt and VScnt are the same counter, the name used -/// depends on the ISA version. -LLVM_ABI unsigned getStorecntBitMask(const IsaVersion &Version); - -// The following are only meaningful on targets that support -// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT. - -/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given -/// isa \p Version. -LLVM_ABI Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, - unsigned LoadcntDscnt); - -/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given -/// isa \p Version. -LLVM_ABI Waitcnt decodeStorecntDscnt(const IsaVersion &Version, - unsigned StorecntDscnt); - -/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an -/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa -/// \p Version. -LLVM_ABI unsigned encodeLoadcntDscnt(const IsaVersion &Version, - const Waitcnt &Decoded); - -/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an -/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa -/// \p Version. -LLVM_ABI unsigned encodeStorecntDscnt(const IsaVersion &Version, - const Waitcnt &Decoded); } // namespace AMDGPU struct BasicSubtargetFeatureKV { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 2022a3d32b5da..95ee1626a8623 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -14,6 +14,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Module.h" +#include "llvm/Support/AMDGPUWaitcnt.h" #include "llvm/Support/Alignment.h" #include #include diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index 939f7776e970a..3d406d8d01a79 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -28,9 +28,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/AMDGPUWaitcnt.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/TargetParser/TargetParser.h" #include namespace mlir { From 4ca606ec531c1592df228d38b689b6d2ae7f4cec Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Tue, 16 Dec 2025 15:36:11 +0100 Subject: [PATCH 3/3] namespace --- llvm/include/llvm/Support/AMDGPUWaitcnt.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Support/AMDGPUWaitcnt.h b/llvm/include/llvm/Support/AMDGPUWaitcnt.h index a2243e5723813..4d04daa2b24cb 100644 --- a/llvm/include/llvm/Support/AMDGPUWaitcnt.h +++ b/llvm/include/llvm/Support/AMDGPUWaitcnt.h @@ -16,8 +16,7 @@ #include "llvm/TargetParser/TargetParser.h" // IsaVersion -namespace llvm { -namespace AMDGPU { +namespace llvm::AMDGPU { /// Represents the counter values to wait for in an s_waitcnt instruction. /// /// Large values (including the maximum possible integer) can be used to @@ -203,7 +202,6 @@ LLVM_ABI unsigned encodeLoadcntDscnt(const IsaVersion &Version, /// \p Version. LLVM_ABI unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); -} // end namespace AMDGPU -} // end namespace llvm +} // end namespace llvm::AMDGPU #endif // LLVM_SUPPORT_AMDGPUWAITCNT_H