Skip to content

Commit f196b1d

Browse files
authored
[VPlan] Extract reverse operation for reverse accesses (#146525)
This patch introduces VPInstruction::Reverse and extracts the reverse operations of loaded/stored values from reverse memory accesses. This extraction facilitates future support for permutation elimination within VPlan.
1 parent e741cd8 commit f196b1d

23 files changed

+247
-196
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7096,6 +7096,20 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
70967096
if (AddrI && vputils::isSingleScalar(WidenMemR->getAddr()) !=
70977097
CostCtx.isLegacyUniformAfterVectorization(AddrI, VF))
70987098
return true;
7099+
7100+
if (WidenMemR->isReverse()) {
7101+
// If the stored value of a reverse store is invariant, LICM will
7102+
// hoist the reverse operation to the preheader. In this case, the
7103+
// result of the VPlan-based cost model will diverge from that of
7104+
// the legacy model.
7105+
if (auto *StoreR = dyn_cast<VPWidenStoreRecipe>(WidenMemR))
7106+
if (StoreR->getStoredValue()->isDefinedOutsideLoopRegions())
7107+
return true;
7108+
7109+
if (auto *StoreR = dyn_cast<VPWidenStoreEVLRecipe>(WidenMemR))
7110+
if (StoreR->getStoredValue()->isDefinedOutsideLoopRegions())
7111+
return true;
7112+
}
70997113
}
71007114

71017115
// The legacy cost model costs non-header phis with a scalar VF as a phi,
@@ -7633,8 +7647,8 @@ void EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd() {
76337647
});
76347648
}
76357649

7636-
VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
7637-
VFRange &Range) {
7650+
VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
7651+
VFRange &Range) {
76387652
assert((VPI->getOpcode() == Instruction::Load ||
76397653
VPI->getOpcode() == Instruction::Store) &&
76407654
"Must be called with either a load or store");
@@ -7695,15 +7709,26 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
76957709
Builder.insert(VectorPtr);
76967710
Ptr = VectorPtr;
76977711
}
7712+
76987713
if (VPI->getOpcode() == Instruction::Load) {
76997714
auto *Load = cast<LoadInst>(I);
7700-
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse, *VPI,
7701-
VPI->getDebugLoc());
7715+
auto *LoadR = new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
7716+
*VPI, Load->getDebugLoc());
7717+
if (Reverse) {
7718+
Builder.insert(LoadR);
7719+
return new VPInstruction(VPInstruction::Reverse, LoadR, {}, {},
7720+
LoadR->getDebugLoc());
7721+
}
7722+
return LoadR;
77027723
}
77037724

77047725
StoreInst *Store = cast<StoreInst>(I);
7705-
return new VPWidenStoreRecipe(*Store, Ptr, VPI->getOperand(0), Mask,
7706-
Consecutive, Reverse, *VPI, VPI->getDebugLoc());
7726+
VPValue *StoredVal = VPI->getOperand(0);
7727+
if (Reverse)
7728+
StoredVal = Builder.createNaryOp(VPInstruction::Reverse, StoredVal,
7729+
Store->getDebugLoc());
7730+
return new VPWidenStoreRecipe(*Store, Ptr, StoredVal, Mask, Consecutive,
7731+
Reverse, *VPI, Store->getDebugLoc());
77077732
}
77087733

77097734
VPWidenIntOrFpInductionRecipe *

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class VPRecipeBuilder {
9090
/// Check if the load or store instruction \p VPI should widened for \p
9191
/// Range.Start and potentially masked. Such instructions are handled by a
9292
/// recipe that takes an additional VPInstruction for the mask.
93-
VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);
93+
VPRecipeBase *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);
9494

9595
/// Optimize the special case where the operand of \p VPI is a constant
9696
/// integer induction variable.

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
11111111
// Unrolling will add all copies of its original operand as additional
11121112
// operands.
11131113
LastActiveLane,
1114+
// Returns a reversed vector for the operand.
1115+
Reverse,
11141116

11151117
// The opcodes below are used for VPInstructionWithType.
11161118
//
@@ -3476,10 +3478,10 @@ struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
34763478
/// using the value to store, the address to store to, the explicit vector
34773479
/// length and an optional mask.
34783480
struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
3479-
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL,
3480-
VPValue *Mask)
3481+
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr,
3482+
VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
34813483
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3482-
{Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3484+
{Addr, StoredVal, &EVL}, S.isConsecutive(),
34833485
S.isReverse(), S, S.getDebugLoc()) {
34843486
setMask(Mask);
34853487
}

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
132132
case VPInstruction::Broadcast:
133133
case VPInstruction::PtrAdd:
134134
case VPInstruction::WidePtrAdd:
135+
case VPInstruction::Reverse:
135136
// Return the type based on first operand.
136137
return inferScalarType(R->getOperand(0));
137138
case VPInstruction::BranchOnCond:

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,12 @@ m_LastActiveLane(const Op0_t &Op0) {
462462
return m_VPInstruction<VPInstruction::LastActiveLane>(Op0);
463463
}
464464

465+
template <typename Op0_t>
466+
inline VPInstruction_match<VPInstruction::Reverse, Op0_t>
467+
m_Reverse(const Op0_t &Op0) {
468+
return m_VPInstruction<VPInstruction::Reverse>(Op0);
469+
}
470+
465471
inline VPInstruction_match<VPInstruction::StepVector> m_StepVector() {
466472
return m_VPInstruction<VPInstruction::StepVector>();
467473
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
444444
case VPInstruction::ExtractPenultimateElement:
445445
case VPInstruction::Not:
446446
case VPInstruction::ResumeForEpilogue:
447+
case VPInstruction::Reverse:
447448
case VPInstruction::Unpack:
448449
return 1;
449450
case Instruction::ICmp:
@@ -901,6 +902,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
901902
}
902903
case VPInstruction::ResumeForEpilogue:
903904
return State.get(getOperand(0), true);
905+
case VPInstruction::Reverse:
906+
return Builder.CreateVectorReverse(State.get(getOperand(0)), "reverse");
904907
default:
905908
llvm_unreachable("Unsupported opcode for instruction");
906909
}
@@ -1087,6 +1090,14 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
10871090
I32Ty, {Arg0Ty, I32Ty, I1Ty});
10881091
return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
10891092
}
1093+
case VPInstruction::Reverse: {
1094+
assert(VF.isVector() && "Reverse operation must be vector type");
1095+
auto *VectorTy = cast<VectorType>(
1096+
toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF));
1097+
return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy,
1098+
VectorTy, /*Mask=*/{}, Ctx.CostKind,
1099+
/*Index=*/0);
1100+
}
10901101
case VPInstruction::ExtractLastLane: {
10911102
// Add on the cost of extracting the element.
10921103
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
@@ -1189,6 +1200,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
11891200
case VPInstruction::WidePtrAdd:
11901201
case VPInstruction::StepVector:
11911202
case VPInstruction::ReductionStartVector:
1203+
case VPInstruction::Reverse:
11921204
case VPInstruction::VScale:
11931205
case VPInstruction::Unpack:
11941206
return false;
@@ -1366,6 +1378,9 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,
13661378
case VPInstruction::ResumeForEpilogue:
13671379
O << "resume-for-epilogue";
13681380
break;
1381+
case VPInstruction::Reverse:
1382+
O << "reverse";
1383+
break;
13691384
case VPInstruction::Unpack:
13701385
O << "unpack";
13711386
break;
@@ -2244,18 +2259,32 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
22442259
VPValue *Operand = getOperand(0);
22452260
TTI::CastContextHint CCH = TTI::CastContextHint::None;
22462261
// For Trunc/FPTrunc, get the context from the only user.
2247-
if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
2248-
!hasMoreThanOneUniqueUser() && getNumUsers() > 0) {
2249-
if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
2250-
CCH = ComputeCCH(StoreRecipe);
2262+
if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {
2263+
auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * {
2264+
if (R->getNumUsers() == 0 || R->hasMoreThanOneUniqueUser())
2265+
return nullptr;
2266+
return dyn_cast<VPRecipeBase>(*R->user_begin());
2267+
};
2268+
2269+
if (VPRecipeBase *Recipe = GetOnlyUser(this)) {
2270+
if (match(Recipe, m_Reverse(m_VPValue())))
2271+
Recipe = GetOnlyUser(cast<VPInstruction>(Recipe));
2272+
if (Recipe)
2273+
CCH = ComputeCCH(Recipe);
2274+
}
22512275
}
22522276
// For Z/Sext, get the context from the operand.
22532277
else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
22542278
Opcode == Instruction::FPExt) {
22552279
if (Operand->isLiveIn())
22562280
CCH = TTI::CastContextHint::Normal;
2257-
else if (Operand->getDefiningRecipe())
2258-
CCH = ComputeCCH(Operand->getDefiningRecipe());
2281+
else if (auto *Recipe = Operand->getDefiningRecipe()) {
2282+
VPValue *ReverseOp;
2283+
if (match(Recipe, m_Reverse(m_VPValue(ReverseOp))))
2284+
Recipe = ReverseOp->getDefiningRecipe();
2285+
if (Recipe)
2286+
CCH = ComputeCCH(Recipe);
2287+
}
22592288
}
22602289

22612290
auto *SrcTy =
@@ -3500,12 +3529,7 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
35003529
Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind,
35013530
OpInfo, &Ingredient);
35023531
}
3503-
if (!Reverse)
3504-
return Cost;
3505-
3506-
return Cost += Ctx.TTI.getShuffleCost(
3507-
TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3508-
cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
3532+
return Cost;
35093533
}
35103534

35113535
void VPWidenLoadRecipe::execute(VPTransformState &State) {
@@ -3536,8 +3560,6 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
35363560
NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
35373561
}
35383562
applyMetadata(*cast<Instruction>(NewLI));
3539-
if (Reverse)
3540-
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
35413563
State.set(this, NewLI);
35423564
}
35433565

@@ -3592,8 +3614,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
35923614
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
35933615
applyMetadata(*NewLI);
35943616
Instruction *Res = NewLI;
3595-
if (isReverse())
3596-
Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
35973617
State.set(this, Res);
35983618
}
35993619

@@ -3610,15 +3630,9 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
36103630
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
36113631
unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
36123632
->getAddressSpace();
3613-
InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
3633+
return Ctx.TTI.getMemIntrinsicInstrCost(
36143634
MemIntrinsicCostAttributes(Intrinsic::vp_load, Ty, Alignment, AS),
36153635
Ctx.CostKind);
3616-
if (!Reverse)
3617-
return Cost;
3618-
3619-
return Cost + Ctx.TTI.getShuffleCost(
3620-
TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3621-
cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
36223636
}
36233637

36243638
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3647,13 +3661,6 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
36473661
}
36483662

36493663
Value *StoredVal = State.get(StoredVPValue);
3650-
if (isReverse()) {
3651-
// If we store to reverse consecutive memory locations, then we need
3652-
// to reverse the order of elements in the stored value.
3653-
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
3654-
// We don't want to update the value in the map as it might be used in
3655-
// another expression. So don't call resetVectorValue(StoredVal).
3656-
}
36573664
Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
36583665
Instruction *NewSI = nullptr;
36593666
if (CreateScatter)
@@ -3682,8 +3689,6 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
36823689
CallInst *NewSI = nullptr;
36833690
Value *StoredVal = State.get(StoredValue);
36843691
Value *EVL = State.get(getEVL(), VPLane(0));
3685-
if (isReverse())
3686-
StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
36873692
Value *Mask = nullptr;
36883693
if (VPValue *VPMask = getMask()) {
36893694
Mask = State.get(VPMask);
@@ -3720,15 +3725,9 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
37203725
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
37213726
unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
37223727
->getAddressSpace();
3723-
InstructionCost Cost = Ctx.TTI.getMemIntrinsicInstrCost(
3728+
return Ctx.TTI.getMemIntrinsicInstrCost(
37243729
MemIntrinsicCostAttributes(Intrinsic::vp_store, Ty, Alignment, AS),
37253730
Ctx.CostKind);
3726-
if (!Reverse)
3727-
return Cost;
3728-
3729-
return Cost + Ctx.TTI.getShuffleCost(
3730-
TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3731-
cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
37323731
}
37333732

37343733
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2887,25 +2887,41 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
28872887
return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), Addr,
28882888
EVL, Mask);
28892889

2890-
if (match(&CurRecipe,
2890+
VPValue *ReversedVal;
2891+
if (match(&CurRecipe, m_Reverse(m_VPValue(ReversedVal))) &&
2892+
match(ReversedVal,
28912893
m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) &&
28922894
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
2893-
cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
2894-
return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe),
2895-
AdjustEndPtr(EndPtr), EVL, Mask);
2895+
cast<VPWidenLoadRecipe>(ReversedVal)->isReverse()) {
2896+
auto *LoadR = new VPWidenLoadEVLRecipe(
2897+
*cast<VPWidenLoadRecipe>(ReversedVal), AdjustEndPtr(EndPtr), EVL, Mask);
2898+
LoadR->insertBefore(&CurRecipe);
2899+
return new VPWidenIntrinsicRecipe(
2900+
Intrinsic::experimental_vp_reverse, {LoadR, Plan->getTrue(), &EVL},
2901+
TypeInfo.inferScalarType(LoadR), {}, {}, DL);
2902+
}
28962903

2897-
if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(),
2904+
VPValue *StoredVal;
2905+
if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(StoredVal),
28982906
m_RemoveMask(HeaderMask, Mask))) &&
28992907
!cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
29002908
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), Addr,
2901-
EVL, Mask);
2909+
StoredVal, EVL, Mask);
29022910

2903-
if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(),
2904-
m_RemoveMask(HeaderMask, Mask))) &&
2911+
if (match(&CurRecipe,
2912+
m_MaskedStore(m_VPValue(EndPtr), m_Reverse(m_VPValue(ReversedVal)),
2913+
m_RemoveMask(HeaderMask, Mask))) &&
29052914
match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
2906-
cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
2915+
cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) {
2916+
auto *NewReverse = new VPWidenIntrinsicRecipe(
2917+
Intrinsic::experimental_vp_reverse,
2918+
{ReversedVal, Plan->getTrue(), &EVL},
2919+
TypeInfo.inferScalarType(ReversedVal), {}, {}, DL);
2920+
NewReverse->insertBefore(&CurRecipe);
29072921
return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
2908-
AdjustEndPtr(EndPtr), EVL, Mask);
2922+
AdjustEndPtr(EndPtr), NewReverse, EVL,
2923+
Mask);
2924+
}
29092925

29102926
if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
29112927
if (Rdx->isConditional() &&

llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ define void @vector_reverse_mask_nxv4i1(ptr %a, ptr %cond, i64 %N) #0 {
2222
; CHECK: %[[WIDEMSKLOAD:.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr align 8 %{{.*}}, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
2323
; CHECK: %[[REVERSE7:.*]] = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> %[[WIDEMSKLOAD]])
2424
; CHECK: %[[FADD:.*]] = fadd <vscale x 4 x double> %[[REVERSE7]]
25-
; CHECK: %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
2625
; CHECK: %[[REVERSE8:.*]] = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> %[[FADD]])
26+
; CHECK: %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
2727
; CHECK: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> %[[REVERSE8]], ptr align 8 %{{.*}}, <vscale x 4 x i1> %[[REVERSE9]]
2828

2929
entry:

llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N)
3737
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -24
3838
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -56
3939
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8
40-
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
4140
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP4]], align 8
41+
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
4242
; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
4343
; CHECK-NEXT: [[TMP5:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer
4444
; CHECK-NEXT: [[TMP6:%.*]] = fcmp une <4 x double> [[REVERSE2]], zeroinitializer

0 commit comments

Comments
 (0)