@@ -444,6 +444,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
444444 case VPInstruction::ExtractPenultimateElement:
445445 case VPInstruction::Not:
446446 case VPInstruction::ResumeForEpilogue:
447+ case VPInstruction::Reverse:
447448 case VPInstruction::Unpack:
448449 return 1 ;
449450 case Instruction::ICmp:
@@ -901,6 +902,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
901902 }
902903 case VPInstruction::ResumeForEpilogue:
903904 return State.get (getOperand (0 ), true );
905+ case VPInstruction::Reverse:
906+ return Builder.CreateVectorReverse (State.get (getOperand (0 )), " reverse" );
904907 default :
905908 llvm_unreachable (" Unsupported opcode for instruction" );
906909 }
@@ -1087,6 +1090,14 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
10871090 I32Ty, {Arg0Ty, I32Ty, I1Ty});
10881091 return Ctx.TTI .getIntrinsicInstrCost (Attrs, Ctx.CostKind );
10891092 }
1093+ case VPInstruction::Reverse: {
1094+ assert (VF.isVector () && " Reverse operation must be vector type" );
1095+ auto *VectorTy = cast<VectorType>(
1096+ toVectorTy (Ctx.Types .inferScalarType (getOperand (0 )), VF));
1097+ return Ctx.TTI .getShuffleCost (TargetTransformInfo::SK_Reverse, VectorTy,
1098+ VectorTy, /* Mask=*/ {}, Ctx.CostKind ,
1099+ /* Index=*/ 0 );
1100+ }
10901101 case VPInstruction::ExtractLastLane: {
10911102 // Add on the cost of extracting the element.
10921103 auto *VecTy = toVectorTy (Ctx.Types .inferScalarType (getOperand (0 )), VF);
@@ -1189,6 +1200,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
11891200 case VPInstruction::WidePtrAdd:
11901201 case VPInstruction::StepVector:
11911202 case VPInstruction::ReductionStartVector:
1203+ case VPInstruction::Reverse:
11921204 case VPInstruction::VScale:
11931205 case VPInstruction::Unpack:
11941206 return false ;
@@ -1366,6 +1378,9 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,
13661378 case VPInstruction::ResumeForEpilogue:
13671379 O << " resume-for-epilogue" ;
13681380 break ;
1381+ case VPInstruction::Reverse:
1382+ O << " reverse" ;
1383+ break ;
13691384 case VPInstruction::Unpack:
13701385 O << " unpack" ;
13711386 break ;
@@ -2244,18 +2259,32 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
22442259 VPValue *Operand = getOperand (0 );
22452260 TTI::CastContextHint CCH = TTI::CastContextHint::None;
22462261 // For Trunc/FPTrunc, get the context from the only user.
2247- if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
2248- !hasMoreThanOneUniqueUser () && getNumUsers () > 0 ) {
2249- if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin ()))
2250- CCH = ComputeCCH (StoreRecipe);
2262+ if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {
2263+ auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * {
2264+ if (R->getNumUsers () == 0 || R->hasMoreThanOneUniqueUser ())
2265+ return nullptr ;
2266+ return dyn_cast<VPRecipeBase>(*R->user_begin ());
2267+ };
2268+
2269+ if (VPRecipeBase *Recipe = GetOnlyUser (this )) {
2270+ if (match (Recipe, m_Reverse (m_VPValue ())))
2271+ Recipe = GetOnlyUser (cast<VPInstruction>(Recipe));
2272+ if (Recipe)
2273+ CCH = ComputeCCH (Recipe);
2274+ }
22512275 }
22522276 // For Z/Sext, get the context from the operand.
22532277 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
22542278 Opcode == Instruction::FPExt) {
22552279 if (Operand->isLiveIn ())
22562280 CCH = TTI::CastContextHint::Normal;
2257- else if (Operand->getDefiningRecipe ())
2258- CCH = ComputeCCH (Operand->getDefiningRecipe ());
2281+ else if (auto *Recipe = Operand->getDefiningRecipe ()) {
2282+ VPValue *ReverseOp;
2283+ if (match (Recipe, m_Reverse (m_VPValue (ReverseOp))))
2284+ Recipe = ReverseOp->getDefiningRecipe ();
2285+ if (Recipe)
2286+ CCH = ComputeCCH (Recipe);
2287+ }
22592288 }
22602289
22612290 auto *SrcTy =
@@ -3500,12 +3529,7 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
35003529 Cost += Ctx.TTI .getMemoryOpCost (Opcode, Ty, Alignment, AS, Ctx.CostKind ,
35013530 OpInfo, &Ingredient);
35023531 }
3503- if (!Reverse)
3504- return Cost;
3505-
3506- return Cost += Ctx.TTI .getShuffleCost (
3507- TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3508- cast<VectorType>(Ty), {}, Ctx.CostKind , 0 );
3532+ return Cost;
35093533}
35103534
35113535void VPWidenLoadRecipe::execute (VPTransformState &State) {
@@ -3536,8 +3560,6 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
35363560 NewLI = Builder.CreateAlignedLoad (DataTy, Addr, Alignment, " wide.load" );
35373561 }
35383562 applyMetadata (*cast<Instruction>(NewLI));
3539- if (Reverse)
3540- NewLI = Builder.CreateVectorReverse (NewLI, " reverse" );
35413563 State.set (this , NewLI);
35423564}
35433565
@@ -3592,8 +3614,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
35923614 0 , Attribute::getWithAlignment (NewLI->getContext (), Alignment));
35933615 applyMetadata (*NewLI);
35943616 Instruction *Res = NewLI;
3595- if (isReverse ())
3596- Res = createReverseEVL (Builder, Res, EVL, " vp.reverse" );
35973617 State.set (this , Res);
35983618}
35993619
@@ -3610,15 +3630,9 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
36103630 Type *Ty = toVectorTy (getLoadStoreType (&Ingredient), VF);
36113631 unsigned AS = cast<PointerType>(Ctx.Types .inferScalarType (getAddr ()))
36123632 ->getAddressSpace ();
3613- InstructionCost Cost = Ctx.TTI .getMemIntrinsicInstrCost (
3633+ return Ctx.TTI .getMemIntrinsicInstrCost (
36143634 MemIntrinsicCostAttributes (Intrinsic::vp_load, Ty, Alignment, AS),
36153635 Ctx.CostKind );
3616- if (!Reverse)
3617- return Cost;
3618-
3619- return Cost + Ctx.TTI .getShuffleCost (
3620- TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3621- cast<VectorType>(Ty), {}, Ctx.CostKind , 0 );
36223636}
36233637
36243638#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3647,13 +3661,6 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
36473661 }
36483662
36493663 Value *StoredVal = State.get (StoredVPValue);
3650- if (isReverse ()) {
3651- // If we store to reverse consecutive memory locations, then we need
3652- // to reverse the order of elements in the stored value.
3653- StoredVal = Builder.CreateVectorReverse (StoredVal, " reverse" );
3654- // We don't want to update the value in the map as it might be used in
3655- // another expression. So don't call resetVectorValue(StoredVal).
3656- }
36573664 Value *Addr = State.get (getAddr (), /* IsScalar*/ !CreateScatter);
36583665 Instruction *NewSI = nullptr ;
36593666 if (CreateScatter)
@@ -3682,8 +3689,6 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
36823689 CallInst *NewSI = nullptr ;
36833690 Value *StoredVal = State.get (StoredValue);
36843691 Value *EVL = State.get (getEVL (), VPLane (0 ));
3685- if (isReverse ())
3686- StoredVal = createReverseEVL (Builder, StoredVal, EVL, " vp.reverse" );
36873692 Value *Mask = nullptr ;
36883693 if (VPValue *VPMask = getMask ()) {
36893694 Mask = State.get (VPMask);
@@ -3720,15 +3725,9 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
37203725 Type *Ty = toVectorTy (getLoadStoreType (&Ingredient), VF);
37213726 unsigned AS = cast<PointerType>(Ctx.Types .inferScalarType (getAddr ()))
37223727 ->getAddressSpace ();
3723- InstructionCost Cost = Ctx.TTI .getMemIntrinsicInstrCost (
3728+ return Ctx.TTI .getMemIntrinsicInstrCost (
37243729 MemIntrinsicCostAttributes (Intrinsic::vp_store, Ty, Alignment, AS),
37253730 Ctx.CostKind );
3726- if (!Reverse)
3727- return Cost;
3728-
3729- return Cost + Ctx.TTI .getShuffleCost (
3730- TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3731- cast<VectorType>(Ty), {}, Ctx.CostKind , 0 );
37323731}
37333732
37343733#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
0 commit comments