diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index bf26fec287636..e61f62186714a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6736,6 +6736,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // avoid expensive fixups. unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros(); + // If we're after type legalization and SVT is not legal, use the + // promoted type for creating constants to avoid creating nodes with + // illegal types. + if (IsAfterLegalTypes && VT.isVector()) { + SVT = getTypeToTransformTo(*DAG.getContext(), SVT); + if (SVT.bitsLT(VT.getScalarType())) + return SDValue(); + ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT); + if (ShSVT.bitsLT(ShVT.getScalarType())) + return SDValue(); + } + const unsigned SVTBits = SVT.getSizeInBits(); + bool UseNPQ = false, UsePreShift = false, UsePostShift = false; SmallVector PreShifts, PostShifts, MagicFactors, NPQFactors; @@ -6758,7 +6771,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, UnsignedDivisionByConstantInfo::get( Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero())); - MagicFactor = DAG.getConstant(magics.Magic, dl, SVT); + MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT); assert(magics.PreShift < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); @@ -6769,8 +6782,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT); PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT); NPQFactor = DAG.getConstant( - magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1) - : APInt::getZero(EltBits), + magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1) + : APInt::getZero(SVTBits), dl, SVT); UseNPQ |= magics.IsAdd; UsePreShift |= magics.PreShift != 0; diff --git a/llvm/test/CodeGen/AArch64/vector-div-by-promoted-const-no-assertion.ll b/llvm/test/CodeGen/AArch64/vector-div-by-promoted-const-no-assertion.ll new file mode 100644 index 0000000000000..edb5d64b0e31b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vector-div-by-promoted-const-no-assertion.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-p:64:64-i32:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64-pc-windows-msvc" + +define <16 x i16> @udiv_v16i16_from_zext(<16 x i8> %x) { +; CHECK-LABEL: udiv_v16i16_from_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #21846 // =0x5556 +; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: dup v2.8h, w8 +; CHECK-NEXT: umull2 v3.4s, v1.8h, v2.8h +; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h +; CHECK-NEXT: umull2 v4.4s, v0.8h, v2.8h +; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h +; CHECK-NEXT: uzp2 v1.8h, v1.8h, v3.8h +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v4.8h +; CHECK-NEXT: ret +entry: + %zext = zext <16 x i8> %x to <16 x i16> + %div = udiv <16 x i16> %zext, splat (i16 3) + ret <16 x i16> %div +} + +define <16 x i16> @sdiv_v16i16_from_sext(<16 x i8> %x) { +; CHECK-LABEL: sdiv_v16i16_from_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #21846 // =0x5556 +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: dup v2.8h, w8 +; CHECK-NEXT: smull2 v3.4s, v1.8h, v2.8h +; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h +; CHECK-NEXT: smull2 v4.4s, v0.8h, v2.8h +; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h +; CHECK-NEXT: uzp2 v1.8h, v1.8h, v3.8h +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v4.8h +; CHECK-NEXT: usra v1.8h, v1.8h, #15 +; CHECK-NEXT: usra v0.8h, v0.8h, #15 +; CHECK-NEXT: ret +entry: + %sext = sext <16 x i8> %x to <16 x i16> + %div = sdiv <16 x i16> %sext, splat (i16 3) + ret <16 x i16> %div +} + +define <16 x i16> @udiv_exact_v16i16_from_zext(<16 x i8> %x) { +; CHECK-LABEL: udiv_exact_v16i16_from_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #43691 // =0xaaab +; CHECK-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-NEXT: dup v3.8h, w8 +; CHECK-NEXT: mul v1.8h, v0.8h, v3.8h +; CHECK-NEXT: mul v0.8h, v2.8h, v3.8h +; CHECK-NEXT: ret +entry: + %zext = zext <16 x i8> %x to <16 x i16> + %div = udiv exact <16 x i16> %zext, splat (i16 3) + ret <16 x i16> %div +} + +define <16 x i16> @sdiv_exact_v16i16_from_sext(<16 x i8> %x) { +; CHECK-LABEL: sdiv_exact_v16i16_from_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #43691 // =0xaaab +; CHECK-NEXT: sshll v2.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v0.8h, v0.16b, #0 +; CHECK-NEXT: dup v3.8h, w8 +; CHECK-NEXT: mul v1.8h, v0.8h, v3.8h +; CHECK-NEXT: mul v0.8h, v2.8h, v3.8h +; CHECK-NEXT: ret +entry: + %sext = sext <16 x i8> %x to <16 x i16> + %div = sdiv exact <16 x i16> %sext, splat (i16 3) + ret <16 x i16> %div +}