Skip to content

Commit 8d60256

Browse files
committed
FC for nonSSA Exec Mask Man instrs
1 parent 2f11810 commit 8d60256

File tree

5 files changed

+140
-91
lines changed

5 files changed

+140
-91
lines changed

llvm/include/llvm/CodeGen/MachineRegisterInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,13 @@ class MachineRegisterInfo {
622622
/// multiple definitions or no definition, return null.
623623
LLVM_ABI MachineInstr *getUniqueVRegDef(Register Reg) const;
624624

625+
/// getDomVRegDefInBasicBlock - Return the last machine instr that defines
626+
/// the specified virtual register in the basic block, searching backwards
627+
/// from instruction I (inclusive). Returns nullptr if no definition is found.
628+
/// accepts end() sentinel value iterator as a valid parameter, will decrement
629+
/// it to the previous instruction if it is end()
630+
LLVM_ABI MachineBasicBlock::iterator getDomVRegDefInBasicBlock(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
631+
625632
/// clearKillFlags - Iterate over all the uses of the given register and
626633
/// clear the kill flag from the MachineOperand. This function is used by
627634
/// optimization passes which extend register lifetimes and need only

llvm/lib/CodeGen/MachineRegisterInfo.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,20 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(Register Reg) const {
422422
return &*I;
423423
}
424424

425+
/// getDomVRegDefInBasicBlock - Return the last machine instr that defines
426+
/// the specified virtual register in the basic block, searching backwards
427+
/// from instruction I (exclusive). Returns MBB.end() if no definition is found.
428+
MachineBasicBlock::iterator MachineRegisterInfo::getDomVRegDefInBasicBlock(
429+
Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
430+
if(I == MBB.begin()) return MBB.end();
431+
// Iterate backwards from I (exclusive) to the beginning of the basic block
432+
do {
433+
--I;
434+
if (I->modifiesRegister(Reg, getTargetRegisterInfo())) return I;
435+
} while (I != MBB.begin());
436+
return MBB.end();
437+
}
438+
425439
bool MachineRegisterInfo::hasOneNonDBGUse(Register RegNo) const {
426440
return hasSingleElement(use_nodbg_operands(RegNo));
427441
}

llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1909,7 +1909,7 @@ void ControlFlowRewriter::rewrite() {
19091909
Opcode = AMDGPU::S_CBRANCH_SCC1;
19101910
} else {
19111911
Register CondReg = Info.OrigCondition;
1912-
bool isCondRegSubsetOfExec = LMA.isSubsetOfExec(CondReg, *Node->Block);
1912+
bool isCondRegSubsetOfExec = LMA.isSubsetOfExec(CondReg, *Node->Block, Node->Block->end());
19131913
LLVM_DEBUG(dbgs() << "isSubsetOfExec(" << printReg(CondReg, MRI.getTargetRegisterInfo(), 0, &MRI) << "," << Node->Block->name() << ") : " << isCondRegSubsetOfExec << "\n");
19141914

19151915
if (!isCondRegSubsetOfExec) {
@@ -1973,7 +1973,7 @@ void ControlFlowRewriter::rewrite() {
19731973
// Step 2.1: Add conditions branching to LaneTarget to the Lane mask
19741974
// Updater.
19751975
// FIXME: we are creating a register here only to initialize the updater
1976-
Updater.init(LMU.createLaneMaskReg());
1976+
Updater.init();
19771977
Updater.addReset(*LaneTarget->Block, GCNLaneMaskUpdater::ResetInMiddle);
19781978
LLVM_DEBUG(dbgs() << "\nMark ResetInMiddle(X): " << LaneTarget->printableName() << '\n');
19791979
for (const auto &NodeDivergentPair : LaneTargetInfo.OriginBranch) {
@@ -2023,7 +2023,7 @@ void ControlFlowRewriter::rewrite() {
20232023
}
20242024
} else {
20252025
CondReg = LaneOrigin.CondReg;
2026-
bool isCondRegSubsetOfExec = LMA.isSubsetOfExec(LaneOrigin.CondReg, *LaneOrigin.Node->Block);
2026+
bool isCondRegSubsetOfExec = LMA.isSubsetOfExec(LaneOrigin.CondReg, *LaneOrigin.Node->Block, LaneOrigin.Node->Block->getFirstTerminator());
20272027
LLVM_DEBUG(dbgs() << "isSubsetOfExec(" << printReg(LaneOrigin.CondReg, MRI.getTargetRegisterInfo(), 0, &MRI) << "," << LaneOrigin.Node->Block->name() << ") : " << isCondRegSubsetOfExec << "\n");
20282028
if (!isCondRegSubsetOfExec) {
20292029
Register Prev = CondReg;
@@ -2120,7 +2120,7 @@ void ControlFlowRewriter::rewrite() {
21202120
LLVM_DEBUG(dbgs() << "\nRejoin @ " << Secondary->printableName() << '\n');
21212121
Secondary->dump();
21222122
// FIXME: we are creating a register here only to initialize the updater
2123-
Updater.init(LMU.createLaneMaskReg());
2123+
Updater.init();
21242124
Updater.addReset(*Secondary->Block, GCNLaneMaskUpdater::ResetInMiddle);
21252125
LLVM_DEBUG(dbgs() << "\nMark ResetInMiddle(X): " << Secondary->printableName() << '\n');
21262126

@@ -2132,32 +2132,32 @@ void ControlFlowRewriter::rewrite() {
21322132
Register PrimaryExec = PredInfo.PrimarySuccessorExec;
21332133
LLVM_DEBUG(dbgs() << "Pred:" << Pred->Block->name() << "\nPrimaryExec:" << printReg(PrimaryExec,MRI.getTargetRegisterInfo(), 0, &MRI) << "\n");
21342134

2135-
MachineInstr *PrimaryExecDef;
2136-
for (;;) {
2137-
PrimaryExecDef = MRI.getVRegDef(PrimaryExec);
2138-
if (PrimaryExecDef->getOpcode() != AMDGPU::COPY)
2139-
break;
2140-
PrimaryExec = PrimaryExecDef->getOperand(1).getReg();
2141-
}
2135+
// MachineInstr *PrimaryExecDef;
2136+
// for (;;) {
2137+
// PrimaryExecDef = MRI.getVRegDef(PrimaryExec);
2138+
// if (PrimaryExecDef->getOpcode() != AMDGPU::COPY)
2139+
// break;
2140+
// PrimaryExec = PrimaryExecDef->getOperand(1).getReg();
2141+
// }
21422142

2143-
LLVM_DEBUG(dbgs() << "PrimaryExecDef:");
2144-
LLVM_DEBUG(PrimaryExecDef->dump());
2145-
LLVM_DEBUG(dbgs() << "\n");
2143+
// LLVM_DEBUG(dbgs() << "PrimaryExecDef:");
2144+
// LLVM_DEBUG(PrimaryExecDef->dump());
2145+
// LLVM_DEBUG(dbgs() << "\n");
21462146

21472147
// Rejoin = EXEC ^ PrimaryExec
21482148
//
21492149
// Fold immediately if PrimaryExec was obtained via XOR as well.
21502150
Register Rejoin;
21512151

2152-
if (PrimaryExecDef->getParent() == Pred->Block &&
2153-
PrimaryExecDef->getOpcode() == LMC.XorOpc &&
2154-
PrimaryExecDef->getOperand(1).isReg() &&
2155-
PrimaryExecDef->getOperand(2).isReg()) {
2156-
if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg)
2157-
Rejoin = PrimaryExecDef->getOperand(2).getReg();
2158-
else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg)
2159-
Rejoin = PrimaryExecDef->getOperand(1).getReg();
2160-
}
2152+
// if (PrimaryExecDef->getParent() == Pred->Block &&
2153+
// PrimaryExecDef->getOpcode() == LMC.XorOpc &&
2154+
// PrimaryExecDef->getOperand(1).isReg() &&
2155+
// PrimaryExecDef->getOperand(2).isReg()) {
2156+
// if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg)
2157+
// Rejoin = PrimaryExecDef->getOperand(2).getReg();
2158+
// else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg)
2159+
// Rejoin = PrimaryExecDef->getOperand(1).getReg();
2160+
// }
21612161

21622162
if (!Rejoin) {
21632163
// Try to find a previously generated XOR (or merely masked) value
@@ -2199,6 +2199,7 @@ void ControlFlowRewriter::rewrite() {
21992199

22002200
}
22012201

2202+
Updater.insertAccumulatorResets();
22022203
Updater.cleanup();
22032204

22042205
LLVM_DEBUG(dbgs() << "CFG_BEGIN:" << Function.getName().str() << "_clean\n");

0 commit comments

Comments
 (0)