Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 34 additions & 20 deletions llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1839,7 +1839,7 @@ void ControlFlowRewriter::rewrite() {
Opcode = AMDGPU::S_CBRANCH_SCC1;
} else {
Register CondReg = Info.OrigCondition;
if (!LMA.isSubsetOfExec(CondReg, *Node->Block)) {
if (!LMA.isSubsetOfExec(CondReg, *Node->Block, Node->Block->end())) {
CondReg = LMU.createLaneMaskReg();
BuildMI(*Node->Block, Node->Block->end(), {}, TII.get(LMC.AndOpc),
CondReg)
Expand Down Expand Up @@ -1876,7 +1876,6 @@ void ControlFlowRewriter::rewrite() {
RegMap;
GCNLaneMaskUpdater Updater(Function);
Updater.setLaneMaskAnalysis(&LMA);
Updater.setAccumulating(true);

for (WaveNode *LaneTarget : NodeOrder) {
CFGNodeInfo &LaneTargetInfo = NodeInfo.find(LaneTarget)->second;
Expand Down Expand Up @@ -1937,7 +1936,8 @@ void ControlFlowRewriter::rewrite() {
}
} else {
CondReg = LaneOrigin.CondReg;
if (!LMA.isSubsetOfExec(LaneOrigin.CondReg, *LaneOrigin.Node->Block)) {
if (!LMA.isSubsetOfExec(LaneOrigin.CondReg, *LaneOrigin.Node->Block,
LaneOrigin.Node->Block->getFirstTerminator())) {
Register Prev = CondReg;
CondReg = LMU.createLaneMaskReg();
BuildMI(*LaneOrigin.Node->Block,
Expand Down Expand Up @@ -2033,28 +2033,34 @@ void ControlFlowRewriter::rewrite() {
CFGNodeInfo &PredInfo = NodeInfo.find(Pred)->second;
Register PrimaryExec = PredInfo.PrimarySuccessorExec;

MachineInstr *PrimaryExecDef;
for (;;) {
PrimaryExecDef = MRI.getVRegDef(PrimaryExec);
if (PrimaryExecDef->getOpcode() != AMDGPU::COPY)
break;
PrimaryExec = PrimaryExecDef->getOperand(1).getReg();
}
// Turning off this copy-chain optimization to retain the Accumulator as
// the PrimaryExec

// MachineInstr *PrimaryExecDef;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code commented out won't look good. Better clean them all. What is the significance of adding the above comment here? Are you planning to implement a similar optimization for ACC based non-SSA form? If yes, leave a strong note mentioning that (still need to clean up the commented code). Otherwise, remove the comment as well.

// for (;;) {
// PrimaryExecDef = MRI.getVRegDef(PrimaryExec);
// if (PrimaryExecDef->getOpcode() != AMDGPU::COPY)
// break;
// PrimaryExec = PrimaryExecDef->getOperand(1).getReg();
// }

// Rejoin = EXEC ^ PrimaryExec
//
// Fold immediately if PrimaryExec was obtained via XOR as well.
Register Rejoin;

if (PrimaryExecDef->getParent() == Pred->Block &&
PrimaryExecDef->getOpcode() == LMC.XorOpc &&
PrimaryExecDef->getOperand(1).isReg() &&
PrimaryExecDef->getOperand(2).isReg()) {
if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg)
Rejoin = PrimaryExecDef->getOperand(2).getReg();
else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg)
Rejoin = PrimaryExecDef->getOperand(1).getReg();
}
// Turning off this XOR optimiztion since buildMergeLaneMasks() will not

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

// introduce XOR instruction for creating the PrimaryExec

// if (PrimaryExecDef->getParent() == Pred->Block &&
// PrimaryExecDef->getOpcode() == LMC.XorOpc &&
// PrimaryExecDef->getOperand(1).isReg() &&
// PrimaryExecDef->getOperand(2).isReg()) {
// if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg)
// Rejoin = PrimaryExecDef->getOperand(2).getReg();
// else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg)
// Rejoin = PrimaryExecDef->getOperand(1).getReg();
// }

if (!Rejoin) {
// Try to find a previously generated XOR (or merely masked) value
Expand Down Expand Up @@ -2091,7 +2097,15 @@ void ControlFlowRewriter::rewrite() {

LLVM_DEBUG(Function.dump());
}

Updater.insertAccumulatorResets();
// Replace MovTermOpc with MovOpc
for (MachineBasicBlock &MBB : Function) {
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() == LMC.MovTermOpc) {
MI.setDesc(TII.get(LMC.MovOpc));
}
}
}
Updater.cleanup();
}

Expand Down
Loading