diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp index ceda928f202f5..27759c4a3ca3d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp @@ -1839,7 +1839,7 @@ void ControlFlowRewriter::rewrite() { Opcode = AMDGPU::S_CBRANCH_SCC1; } else { Register CondReg = Info.OrigCondition; - if (!LMA.isSubsetOfExec(CondReg, *Node->Block)) { + if (!LMA.isSubsetOfExec(CondReg, *Node->Block, Node->Block->end())) { CondReg = LMU.createLaneMaskReg(); BuildMI(*Node->Block, Node->Block->end(), {}, TII.get(LMC.AndOpc), CondReg) @@ -1876,7 +1876,6 @@ void ControlFlowRewriter::rewrite() { RegMap; GCNLaneMaskUpdater Updater(Function); Updater.setLaneMaskAnalysis(&LMA); - Updater.setAccumulating(true); for (WaveNode *LaneTarget : NodeOrder) { CFGNodeInfo &LaneTargetInfo = NodeInfo.find(LaneTarget)->second; @@ -1937,7 +1936,8 @@ void ControlFlowRewriter::rewrite() { } } else { CondReg = LaneOrigin.CondReg; - if (!LMA.isSubsetOfExec(LaneOrigin.CondReg, *LaneOrigin.Node->Block)) { + if (!LMA.isSubsetOfExec(LaneOrigin.CondReg, *LaneOrigin.Node->Block, + LaneOrigin.Node->Block->getFirstTerminator())) { Register Prev = CondReg; CondReg = LMU.createLaneMaskReg(); BuildMI(*LaneOrigin.Node->Block, @@ -2033,28 +2033,34 @@ void ControlFlowRewriter::rewrite() { CFGNodeInfo &PredInfo = NodeInfo.find(Pred)->second; Register PrimaryExec = PredInfo.PrimarySuccessorExec; - MachineInstr *PrimaryExecDef; - for (;;) { - PrimaryExecDef = MRI.getVRegDef(PrimaryExec); - if (PrimaryExecDef->getOpcode() != AMDGPU::COPY) - break; - PrimaryExec = PrimaryExecDef->getOperand(1).getReg(); - } + // Turning off this copy-chain optimization to retain the Accumulator as + // the PrimaryExec + + // MachineInstr *PrimaryExecDef; + // for (;;) { + // PrimaryExecDef = MRI.getVRegDef(PrimaryExec); + // if (PrimaryExecDef->getOpcode() != AMDGPU::COPY) + // break; + // PrimaryExec = PrimaryExecDef->getOperand(1).getReg(); + // } // Rejoin = EXEC ^ PrimaryExec // // Fold immediately if PrimaryExec was obtained via XOR as well. Register Rejoin; - if (PrimaryExecDef->getParent() == Pred->Block && - PrimaryExecDef->getOpcode() == LMC.XorOpc && - PrimaryExecDef->getOperand(1).isReg() && - PrimaryExecDef->getOperand(2).isReg()) { - if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg) - Rejoin = PrimaryExecDef->getOperand(2).getReg(); - else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg) - Rejoin = PrimaryExecDef->getOperand(1).getReg(); - } + // Turning off this XOR optimiztion since buildMergeLaneMasks() will not + // introduce XOR instruction for creating the PrimaryExec + + // if (PrimaryExecDef->getParent() == Pred->Block && + // PrimaryExecDef->getOpcode() == LMC.XorOpc && + // PrimaryExecDef->getOperand(1).isReg() && + // PrimaryExecDef->getOperand(2).isReg()) { + // if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg) + // Rejoin = PrimaryExecDef->getOperand(2).getReg(); + // else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg) + // Rejoin = PrimaryExecDef->getOperand(1).getReg(); + // } if (!Rejoin) { // Try to find a previously generated XOR (or merely masked) value @@ -2091,7 +2097,15 @@ void ControlFlowRewriter::rewrite() { LLVM_DEBUG(Function.dump()); } - + Updater.insertAccumulatorResets(); + // Replace MovTermOpc with MovOpc + for (MachineBasicBlock &MBB : Function) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() == LMC.MovTermOpc) { + MI.setDesc(TII.get(LMC.MovOpc)); + } + } + } Updater.cleanup(); } diff --git a/llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.cpp b/llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.cpp index d7b19cbe745a8..0b2cda411e91e 100644 --- a/llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.cpp +++ b/llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.cpp @@ -10,6 +10,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -31,13 +32,15 @@ bool GCNLaneMaskUtils::maybeLaneMask(Register Reg) const { /// Determine whether the lane-mask register \p Reg is a wave-wide constant. /// If so, the value is stored in \p Val. -bool GCNLaneMaskUtils::isConstantLaneMask(Register Reg, bool &Val) const { +bool GCNLaneMaskUtils::isConstantLaneMask( + Register Reg, bool &Val, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { MachineRegisterInfo &MRI = MF.getRegInfo(); - const MachineInstr *MI; for (;;) { - MI = MRI.getVRegDef(Reg); - if (!MI) { + MI = SIRegisterInfo::getDomVRegDefInBasicBlock(Reg, MBB, MI, + MRI.getTargetRegisterInfo()); + if (MI == MBB.end()) { // This can happen when called from GCNLaneMaskUpdater, where Reg can // be a placeholder that has not yet been filled in. return false; @@ -83,37 +86,33 @@ Register GCNLaneMaskUtils::createLaneMaskReg() const { /// Insert the moral equivalent of /// -/// DstReg = (PrevReg & ~EXEC) | (CurReg & EXEC) +/// DstReg = PrevReg | (CurReg & EXEC) /// /// before \p I in basic block \p MBB. Some simplifications are applied on the -/// fly based on constant inputs and analysis via \p LMA, and further -/// simplifications can be requested in "accumulating" mode. +/// fly based on constant inputs and analysis via \p LMA /// /// \param DstReg The virtual register into which the merged mask is written. /// \param PrevReg The virtual register with the "previous" lane mask value; -/// may be null to indicate an undef value. +/// may be ZeroReg or Accumulator. /// \param CurReg The virtual register with the "current" lane mask value to /// be merged into "previous". /// \param LMA If non-null, used to test whether CurReg may already be a subset /// of EXEC. -/// \param accumulating Indicates that we should assume PrevReg is already -/// properly masked, i.e. use PrevReg directly instead of -/// (PrevReg & ~EXEC), and don't add extra 1-bits to DstReg -/// beyond (CurReg & EXEC). +/// \param isPrevZeroReg Indicates that PrevReg is a zero register. void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg, GCNLaneMaskAnalysis *LMA, - bool accumulating) const { + bool isPrevZeroReg) const { const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); bool PrevVal = false; - bool PrevConstant = !PrevReg || isConstantLaneMask(PrevReg, PrevVal); + bool PrevConstant = !PrevReg || isPrevZeroReg; bool CurVal = false; - bool CurConstant = isConstantLaneMask(CurReg, CurVal); + bool CurConstant = isConstantLaneMask(CurReg, CurVal, MBB, I); - assert(PrevReg || !accumulating); + assert(PrevReg); if (PrevConstant && CurConstant) { if (PrevVal == CurVal) { @@ -135,19 +134,11 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB, Register PrevMaskedReg; Register CurMaskedReg; if (!PrevConstant) { - if (accumulating || (CurConstant && CurVal)) { - PrevMaskedReg = PrevReg; - } else { - PrevMaskedReg = createLaneMaskReg(); - PrevMaskedBuilt = - BuildMI(MBB, I, DL, TII->get(LMC.AndN2Opc), PrevMaskedReg) - .addReg(PrevReg) - .addReg(LMC.ExecReg); - } + PrevMaskedReg = PrevReg; } if (!CurConstant) { if ((PrevConstant && PrevVal) || - (LMA && LMA->isSubsetOfExec(CurReg, MBB))) { + (LMA && LMA->isSubsetOfExec(CurReg, MBB, I))) { CurMaskedReg = CurReg; } else { CurMaskedReg = createLaneMaskReg(); @@ -157,8 +148,7 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB, } } - // TODO-NOW: reevaluate the masking logic in case of CurConstant && CurVal && - // accumulating + // TODO-NOW: reevaluate the masking logic in case of CurConstant && CurVal if (PrevConstant && !PrevVal) { if (CurMaskedBuilt) { @@ -188,22 +178,27 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB, /// (Reg & EXEC) == Reg when used in \p UseBlock. bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg, MachineBasicBlock &UseBlock, + MachineBasicBlock::iterator I, unsigned RemainingDepth) { MachineRegisterInfo &MRI = LMU.function()->getRegInfo(); - MachineInstr *DefInstr = nullptr; + MachineBasicBlock::iterator DefInstr = UseBlock.end(); const AMDGPU::LaneMaskConstants &LMC = LMU.getLaneMaskConsts(); for (;;) { if (!Register::isVirtualRegister(Reg)) { if (Reg == LMC.ExecReg && - (!DefInstr || DefInstr->getParent() == &UseBlock)) + (DefInstr == UseBlock.end() || DefInstr->getParent() == &UseBlock)) return true; return false; } - DefInstr = MRI.getVRegDef(Reg); + DefInstr = SIRegisterInfo::getDomVRegDefInBasicBlock( + Reg, UseBlock, I, MRI.getTargetRegisterInfo()); + if (DefInstr == UseBlock.end()) + return false; if (DefInstr->getOpcode() == AMDGPU::COPY) { Reg = DefInstr->getOperand(1).getReg(); + I = DefInstr; continue; } @@ -242,7 +237,7 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg, if ((LikeOr || IsAnd || IsAndN2) && (DefInstr->getOperand(1).isReg() && DefInstr->getOperand(2).isReg())) { bool FirstIsSubset = isSubsetOfExec(DefInstr->getOperand(1).getReg(), - UseBlock, RemainingDepth); + UseBlock, DefInstr, RemainingDepth); if (!FirstIsSubset && (LikeOr || IsAndN2)) return SubsetOfExec.try_emplace(Reg, false).first->second; @@ -252,7 +247,7 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg, } bool SecondIsSubset = isSubsetOfExec(DefInstr->getOperand(2).getReg(), - UseBlock, RemainingDepth); + UseBlock, DefInstr, RemainingDepth); if (!SecondIsSubset) return SubsetOfExec.try_emplace(Reg, false).first->second; @@ -268,19 +263,19 @@ void GCNLaneMaskUpdater::init(Register Reg) { Processed = false; Blocks.clear(); // SSAUpdater.Initialize(LMU.getLaneMaskConsts().LaneMaskRC); - SSAUpdater.Initialize(Reg); + Accumulator = AMDGPU::NoRegister; } /// Optional cleanup, may remove stray instructions. void GCNLaneMaskUpdater::cleanup() { Processed = false; Blocks.clear(); - + Accumulator = AMDGPU::NoRegister; MachineRegisterInfo &MRI = LMU.function()->getRegInfo(); if (ZeroReg && MRI.use_empty(ZeroReg)) { MRI.getVRegDef(ZeroReg)->eraseFromParent(); - ZeroReg = {}; + ZeroReg = AMDGPU::NoRegister; } for (MachineInstr *MI : PotentiallyDead) { @@ -330,19 +325,19 @@ void GCNLaneMaskUpdater::addAvailable(MachineBasicBlock &Block, Register GCNLaneMaskUpdater::getValueInMiddleOfBlock(MachineBasicBlock &Block) { if (!Processed) process(); - return SSAUpdater.GetValueInMiddleOfBlock(&Block); + return Accumulator; } /// Return the value at the end of the given block, i.e. after any change that /// was registered via \ref addAvailable. /// -/// Note: If \p Block is the reset block in accumulating mode with ResetAtEnd +/// Note: If \p Block is the reset block with ResetAtEnd /// reset mode, then this value will be 0. You likely want /// \ref getPreReset instead. Register GCNLaneMaskUpdater::getValueAtEndOfBlock(MachineBasicBlock &Block) { if (!Processed) process(); - return SSAUpdater.GetValueAtEndOfBlock(&Block); + return Accumulator; } /// Return the value in \p Block after the value merge (if any). @@ -352,15 +347,15 @@ Register GCNLaneMaskUpdater::getValueAfterMerge(MachineBasicBlock &Block) { auto BlockIt = findBlockInfo(Block); if (BlockIt != Blocks.end()) { - if (BlockIt->Merged) - return BlockIt->Merged; + if (BlockIt->Value) + return Accumulator; if (BlockIt->Flags & ResetInMiddle) return ZeroReg; } // We didn't merge anything in the block, but the block may still be // ResetAtEnd, in which case we need the pre-reset value. - return SSAUpdater.GetValueInMiddleOfBlock(&Block); + return Accumulator; } /// Determine whether \p MI defines and/or uses SCC. @@ -414,70 +409,56 @@ void GCNLaneMaskUpdater::process() { LMU.function()->getSubtarget().getInstrInfo(); MachineBasicBlock &Entry = LMU.function()->front(); - // Prepare an all-zero value for the default and reset in accumulating mode. - if (Accumulating && !ZeroReg) { + if (!ZeroReg) { ZeroReg = LMU.createLaneMaskReg(); BuildMI(Entry, Entry.getFirstTerminator(), {}, TII->get(LMU.getLaneMaskConsts().MovOpc), ZeroReg) .addImm(0); } - // Add available values. + if (!Accumulator) { + Accumulator = LMU.createLaneMaskReg(); + BuildMI(Entry, Entry.getFirstTerminator(), {}, + TII->get(LMU.getLaneMaskConsts().MovOpc), Accumulator) + .addImm(0); + } + + // Reset accumulator. for (BlockInfo &Info : Blocks) { - assert(Accumulating || !Info.Flags); assert(Info.Flags || Info.Value); - if (Info.Value) - Info.Merged = LMU.createLaneMaskReg(); - - SSAUpdater.AddAvailableValue( - Info.Block, - (Info.Value && !(Info.Flags & ResetAtEnd)) ? Info.Merged : ZeroReg); + if (!Info.Value || (Info.Flags & ResetAtEnd)) + AccumulatorResetBlocks[Info.Block].insert(Accumulator); } - if (Accumulating && !SSAUpdater.HasValueForBlock(&Entry)) - SSAUpdater.AddAvailableValue(&Entry, ZeroReg); - // Once the SSA updater is ready, we can fill in all merge code, relying // on the SSA updater to insert required PHIs. for (BlockInfo &Info : Blocks) { if (!Info.Value) continue; - // Determine the "previous" value, if any. + // Determine the "previous" value. Register Previous; - if (Info.Block != &LMU.function()->front() && - !(Info.Flags & ResetInMiddle)) { - Previous = SSAUpdater.GetValueInMiddleOfBlock(Info.Block); - if (Accumulating) { - assert(!MRI.getVRegDef(Previous) || - MRI.getVRegDef(Previous)->getOpcode() != AMDGPU::IMPLICIT_DEF); - } else { - MachineInstr *PrevInstr = MRI.getVRegDef(Previous); - if (PrevInstr && PrevInstr->getOpcode() == AMDGPU::IMPLICIT_DEF) { - PotentiallyDead.insert(PrevInstr); - Previous = {}; - } - } - } else { - if (Accumulating) - Previous = ZeroReg; - } + if (Info.Block != &LMU.function()->front() && !(Info.Flags & ResetInMiddle)) + Previous = Accumulator; + else + Previous = ZeroReg; // Insert merge logic. MachineBasicBlock::iterator insertPt = getSaluInsertionAtEnd(*Info.Block); - LMU.buildMergeLaneMasks(*Info.Block, insertPt, {}, Info.Merged, Previous, - Info.Value, LMA, Accumulating); - - if (Info.Flags & ResetAtEnd) { - MachineInstr *mergeInstr = MRI.getVRegDef(Info.Merged); - if (mergeInstr->getOpcode() == AMDGPU::COPY && - mergeInstr->getOperand(1).getReg().isVirtual()) { - assert(MRI.use_empty(Info.Merged)); - Info.Merged = mergeInstr->getOperand(1).getReg(); - mergeInstr->eraseFromParent(); - } - } + LMU.buildMergeLaneMasks(*Info.Block, insertPt, {}, Accumulator, Previous, + Info.Value, LMA, Previous == ZeroReg); + + // Switching off this optimization, since Accumulator will always have a use + // if (Info.Flags & ResetAtEnd) { + // MachineInstr *mergeInstr = MRI.getVRegDef(Info.Merged); + // if (mergeInstr->getOpcode() == AMDGPU::COPY && + // mergeInstr->getOperand(1).getReg().isVirtual()) { + // assert(MRI.use_empty(Info.Merged)); + // Info.Merged = mergeInstr->getOperand(1).getReg(); + // mergeInstr->eraseFromParent(); + // } + // } } Processed = true; @@ -489,3 +470,22 @@ GCNLaneMaskUpdater::findBlockInfo(MachineBasicBlock &Block) { return llvm::find_if( Blocks, [&](const auto &Entry) { return Entry.Block == &Block; }); } + +void GCNLaneMaskUpdater::insertAccumulatorResets() { + const SIInstrInfo *TII = + LMU.function()->getSubtarget().getInstrInfo(); + for (auto &Entry : AccumulatorResetBlocks) { + MachineBasicBlock *B = Entry.first; + DenseSet &Accumulators = Entry.second; + for (Register ACC : Accumulators) { + // Get first branch instruction. + MachineBasicBlock::iterator I = B->getFirstTerminator(); + while (I != B->end() && !I->isBranch()) + I++; + if (I == B->end()) + I--; + BuildMI(*B, I, {}, TII->get(LMU.getLaneMaskConsts().MovOpc), ACC) + .addImm(0); + } + } +} diff --git a/llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.h b/llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.h index f4419f139d92c..1998801156ed6 100644 --- a/llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.h +++ b/llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.h @@ -43,14 +43,15 @@ class GCNLaneMaskUtils { const AMDGPU::LaneMaskConstants &getLaneMaskConsts() const { return LMC; } bool maybeLaneMask(Register Reg) const; - bool isConstantLaneMask(Register Reg, bool &Val) const; + bool isConstantLaneMask(Register Reg, bool &Val, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; Register createLaneMaskReg() const; void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg, GCNLaneMaskAnalysis *LMA = nullptr, - bool Accumulating = false) const; + bool isPrevZeroReg = false) const; }; /// Lazy analyses of lane masks. @@ -64,28 +65,19 @@ class GCNLaneMaskAnalysis { GCNLaneMaskAnalysis(MachineFunction &MF) : LMU(MF) {} bool isSubsetOfExec(Register Reg, MachineBasicBlock &UseBlock, + MachineBasicBlock::iterator I, unsigned RemainingDepth = 5); }; /// \brief SSA-updater for lane masks. /// -/// The updater operates in one of two modes: "default" and "accumulating". -/// -/// Default mode is the analog to regular SSA construction and suitable for the -/// lowering of normal per-lane boolean values to lane masks: the mask can be -/// (re-)written multiple times for each lane. In each basic block, only the -/// lanes enabled by that block's EXEC mask are updated. Bits for lanes that -/// never contributed with an available value are undefined. -/// -/// Accumulating mode is used for some aspects of control flow lowering. In -/// this mode, each lane is assumed to provide a "true" available value only +/// Each lane is assumed to provide a "true" available value only /// once, and to never attempt to change the value back to "false" -- except /// that all lanes are reset to false in "reset blocks" as explained below. -/// In accumulating mode, the bits for lanes that never contributed with an -/// available value are 0. +/// The bits for lanes that never contributed with an available value are 0. /// -/// In accumulating mode, all lanes are reset to 0 at certain points in "reset -/// blocks" which are added via \ref addReset. The reset happens in one or both +/// All lanes are reset to 0 at certain points in "reset blocks" +/// which are added via \ref addReset. The reset happens in one or both /// of two modes: /// - ResetInMiddle: Reset logically happens after the point queried by /// \ref getValueInMiddleOfBlock and before the contribution of the block's @@ -105,9 +97,6 @@ class GCNLaneMaskUpdater { private: GCNLaneMaskUtils LMU; GCNLaneMaskAnalysis *LMA = nullptr; - MachineSSAUpdater SSAUpdater; - - bool Accumulating = false; bool Processed = false; @@ -115,7 +104,6 @@ class GCNLaneMaskUpdater { MachineBasicBlock *Block; unsigned Flags = 0; // ResetFlags Register Value; - Register Merged; explicit BlockInfo(MachineBasicBlock *Block) : Block(Block) {} }; @@ -124,23 +112,25 @@ class GCNLaneMaskUpdater { Register ZeroReg; DenseSet PotentiallyDead; + DenseMap> AccumulatorResetBlocks; public: - GCNLaneMaskUpdater(MachineFunction &MF) : LMU(MF), SSAUpdater(MF) {} + Register Accumulator; + + GCNLaneMaskUpdater(MachineFunction &MF) : LMU(MF) {} void setLaneMaskAnalysis(GCNLaneMaskAnalysis *Analysis) { LMA = Analysis; } void init(Register Reg); void cleanup(); - void setAccumulating(bool Val) { Accumulating = Val; } - void addReset(MachineBasicBlock &Block, ResetFlags Flags); void addAvailable(MachineBasicBlock &Block, Register Value); Register getValueInMiddleOfBlock(MachineBasicBlock &Block); Register getValueAtEndOfBlock(MachineBasicBlock &Block); Register getValueAfterMerge(MachineBasicBlock &Block); + void insertAccumulatorResets(); private: void process(); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 10af38c637a39..7c9a82291f4ae 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -4182,6 +4182,21 @@ const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const { : &AMDGPU::VReg_64RegClass; } +MachineBasicBlock::iterator +SIRegisterInfo::getDomVRegDefInBasicBlock(Register Reg, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterInfo *TRI) { + if (I == MBB.begin()) + return MBB.end(); + // Iterate backwards from I (exclusive) to the beginning of the basic block + do { + --I; + if (I->definesRegister(Reg, TRI)) + return I; + } while (I != MBB.begin()); + return MBB.end(); +} + // Find reaching register definition MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index bbb32397bc5a5..cf4a2945393ed 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -417,6 +417,15 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { MachineRegisterInfo &MRI, LiveIntervals *LIS) const; + /// getDomVRegDefInBasicBlock - Return the last machine instr that defines + /// the specified virtual register in the basic block, searching backwards + /// from instruction I (inclusive). Returns MBB.end() if no definition is + /// found. + static MachineBasicBlock::iterator + getDomVRegDefInBasicBlock(Register Reg, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterInfo *TRI); + const uint32_t *getAllVGPRRegMask() const; const uint32_t *getAllAGPRRegMask() const; const uint32_t *getAllVectorRegMask() const;