@@ -1909,7 +1909,7 @@ void ControlFlowRewriter::rewrite() {
19091909 Opcode = AMDGPU::S_CBRANCH_SCC1;
19101910 } else {
19111911 Register CondReg = Info.OrigCondition ;
1912- bool isCondRegSubsetOfExec = LMA.isSubsetOfExec (CondReg, *Node->Block );
1912+ bool isCondRegSubsetOfExec = LMA.isSubsetOfExec (CondReg, *Node->Block , Node-> Block -> end () );
19131913 LLVM_DEBUG (dbgs () << " isSubsetOfExec(" << printReg (CondReg, MRI.getTargetRegisterInfo (), 0 , &MRI) << " ," << Node->Block ->name () << " ) : " << isCondRegSubsetOfExec << " \n " );
19141914
19151915 if (!isCondRegSubsetOfExec) {
@@ -1973,7 +1973,7 @@ void ControlFlowRewriter::rewrite() {
19731973 // Step 2.1: Add conditions branching to LaneTarget to the Lane mask
19741974 // Updater.
19751975 // FIXME: we are creating a register here only to initialize the updater
1976- Updater.init (LMU. createLaneMaskReg () );
1976+ Updater.init ();
19771977 Updater.addReset (*LaneTarget->Block , GCNLaneMaskUpdater::ResetInMiddle);
19781978 LLVM_DEBUG (dbgs () << " \n Mark ResetInMiddle(X): " << LaneTarget->printableName () << ' \n ' );
19791979 for (const auto &NodeDivergentPair : LaneTargetInfo.OriginBranch ) {
@@ -2023,7 +2023,7 @@ void ControlFlowRewriter::rewrite() {
20232023 }
20242024 } else {
20252025 CondReg = LaneOrigin.CondReg ;
2026- bool isCondRegSubsetOfExec = LMA.isSubsetOfExec (LaneOrigin.CondReg , *LaneOrigin.Node ->Block );
2026+ bool isCondRegSubsetOfExec = LMA.isSubsetOfExec (LaneOrigin.CondReg , *LaneOrigin.Node ->Block , LaneOrigin. Node -> Block -> getFirstTerminator () );
20272027 LLVM_DEBUG (dbgs () << " isSubsetOfExec(" << printReg (LaneOrigin.CondReg , MRI.getTargetRegisterInfo (), 0 , &MRI) << " ," << LaneOrigin.Node ->Block ->name () << " ) : " << isCondRegSubsetOfExec << " \n " );
20282028 if (!isCondRegSubsetOfExec) {
20292029 Register Prev = CondReg;
@@ -2120,7 +2120,7 @@ void ControlFlowRewriter::rewrite() {
21202120 LLVM_DEBUG (dbgs () << " \n Rejoin @ " << Secondary->printableName () << ' \n ' );
21212121 Secondary->dump ();
21222122 // FIXME: we are creating a register here only to initialize the updater
2123- Updater.init (LMU. createLaneMaskReg () );
2123+ Updater.init ();
21242124 Updater.addReset (*Secondary->Block , GCNLaneMaskUpdater::ResetInMiddle);
21252125 LLVM_DEBUG (dbgs () << " \n Mark ResetInMiddle(X): " << Secondary->printableName () << ' \n ' );
21262126
@@ -2132,32 +2132,32 @@ void ControlFlowRewriter::rewrite() {
21322132 Register PrimaryExec = PredInfo.PrimarySuccessorExec ;
21332133 LLVM_DEBUG (dbgs () << " Pred:" << Pred->Block ->name () << " \n PrimaryExec:" << printReg (PrimaryExec,MRI.getTargetRegisterInfo (), 0 , &MRI) << " \n " );
21342134
2135- MachineInstr *PrimaryExecDef;
2136- for (;;) {
2137- PrimaryExecDef = MRI.getVRegDef (PrimaryExec);
2138- if (PrimaryExecDef->getOpcode () != AMDGPU::COPY)
2139- break ;
2140- PrimaryExec = PrimaryExecDef->getOperand (1 ).getReg ();
2141- }
2135+ // MachineInstr *PrimaryExecDef;
2136+ // for (;;) {
2137+ // PrimaryExecDef = MRI.getVRegDef(PrimaryExec);
2138+ // if (PrimaryExecDef->getOpcode() != AMDGPU::COPY)
2139+ // break;
2140+ // PrimaryExec = PrimaryExecDef->getOperand(1).getReg();
2141+ // }
21422142
2143- LLVM_DEBUG (dbgs () << " PrimaryExecDef:" );
2144- LLVM_DEBUG (PrimaryExecDef->dump ());
2145- LLVM_DEBUG (dbgs () << " \n " );
2143+ // LLVM_DEBUG(dbgs() << "PrimaryExecDef:");
2144+ // LLVM_DEBUG(PrimaryExecDef->dump());
2145+ // LLVM_DEBUG(dbgs() << "\n");
21462146
21472147 // Rejoin = EXEC ^ PrimaryExec
21482148 //
21492149 // Fold immediately if PrimaryExec was obtained via XOR as well.
21502150 Register Rejoin;
21512151
2152- if (PrimaryExecDef->getParent () == Pred->Block &&
2153- PrimaryExecDef->getOpcode () == LMC.XorOpc &&
2154- PrimaryExecDef->getOperand (1 ).isReg () &&
2155- PrimaryExecDef->getOperand (2 ).isReg ()) {
2156- if (PrimaryExecDef->getOperand (1 ).getReg () == LMC.ExecReg )
2157- Rejoin = PrimaryExecDef->getOperand (2 ).getReg ();
2158- else if (PrimaryExecDef->getOperand (2 ).getReg () == LMC.ExecReg )
2159- Rejoin = PrimaryExecDef->getOperand (1 ).getReg ();
2160- }
2152+ // if (PrimaryExecDef->getParent() == Pred->Block &&
2153+ // PrimaryExecDef->getOpcode() == LMC.XorOpc &&
2154+ // PrimaryExecDef->getOperand(1).isReg() &&
2155+ // PrimaryExecDef->getOperand(2).isReg()) {
2156+ // if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg)
2157+ // Rejoin = PrimaryExecDef->getOperand(2).getReg();
2158+ // else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg)
2159+ // Rejoin = PrimaryExecDef->getOperand(1).getReg();
2160+ // }
21612161
21622162 if (!Rejoin) {
21632163 // Try to find a previously generated XOR (or merely masked) value
@@ -2199,6 +2199,7 @@ void ControlFlowRewriter::rewrite() {
21992199
22002200 }
22012201
2202+ Updater.insertAccumulatorResets ();
22022203 Updater.cleanup ();
22032204
22042205 LLVM_DEBUG (dbgs () << " CFG_BEGIN:" << Function.getName ().str () << " _clean\n " );
0 commit comments