Skip to content

Commit ff781eb

Browse files
[DAGCombiner] Fix assertion failure in vector division lowering
Recent fixes in division legalization trip the legality assertion when NewNodesMustHaveLegalTypes is set.
1 parent 07b8aa8 commit ff781eb

File tree

2 files changed

+117
-4
lines changed

2 files changed

+117
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6559,6 +6559,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
65596559
if (N->getFlags().hasExact())
65606560
return BuildExactSDIV(*this, N, dl, DAG, Created);
65616561

6562+
// If we're after type legalization and SVT is not legal, use the
6563+
// promoted type for creating constants to avoid creating nodes with
6564+
// illegal types.
6565+
if (IsAfterLegalTypes && VT.isVector()) {
6566+
SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6567+
if (SVT.bitsLT(VT.getScalarType()))
6568+
return SDValue();
6569+
ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6570+
if (ShSVT.bitsLT(ShVT.getScalarType()))
6571+
return SDValue();
6572+
}
6573+
const unsigned SVTBits = SVT.getSizeInBits();
6574+
65626575
SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
65636576

65646577
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
@@ -6585,7 +6598,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
65856598
NumeratorFactor = -1;
65866599
}
65876600

6588-
MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6601+
MagicFactors.push_back(
6602+
DAG.getConstant(magics.Magic.sext(SVTBits), dl, SVT));
65896603
Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
65906604
Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
65916605
ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
@@ -6736,6 +6750,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67366750
// avoid expensive fixups.
67376751
unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
67386752

6753+
// If we're after type legalization and SVT is not legal, use the
6754+
// promoted type for creating constants to avoid creating nodes with
6755+
// illegal types.
6756+
if (IsAfterLegalTypes && VT.isVector()) {
6757+
SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6758+
if (SVT.bitsLT(VT.getScalarType()))
6759+
return SDValue();
6760+
ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6761+
if (ShSVT.bitsLT(ShVT.getScalarType()))
6762+
return SDValue();
6763+
}
6764+
const unsigned SVTBits = SVT.getSizeInBits();
6765+
67396766
bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
67406767
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
67416768

@@ -6758,7 +6785,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67586785
UnsignedDivisionByConstantInfo::get(
67596786
Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
67606787

6761-
MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6788+
MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
67626789

67636790
assert(magics.PreShift < Divisor.getBitWidth() &&
67646791
"We shouldn't generate an undefined shift!");
@@ -6769,8 +6796,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67696796
PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
67706797
PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
67716798
NPQFactor = DAG.getConstant(
6772-
magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6773-
: APInt::getZero(EltBits),
6799+
magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6800+
: APInt::getZero(SVTBits),
67746801
dl, SVT);
67756802
UseNPQ |= magics.IsAdd;
67766803
UsePreShift |= magics.PreShift != 0;
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-pc-windows-msvc < %s | FileCheck %s
3+
4+
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-p:64:64-i32:32-i64:64-i128:128-n32:64-S128-Fn32"
5+
target triple = "aarch64-pc-windows-msvc"
6+
7+
; udiv with zext input - the zext from i8 to i16 triggers vector splitting
8+
; which exposes the promoted constant issue
9+
define <16 x i16> @udiv_v16i16_from_zext(<16 x i8> %x) {
10+
; CHECK-LABEL: udiv_v16i16_from_zext:
11+
; CHECK: // %bb.0: // %entry
12+
; CHECK-NEXT: mov w8, #21846 // =0x5556
13+
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
14+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
15+
; CHECK-NEXT: dup v2.8h, w8
16+
; CHECK-NEXT: umull2 v3.4s, v1.8h, v2.8h
17+
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
18+
; CHECK-NEXT: umull2 v4.4s, v0.8h, v2.8h
19+
; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
20+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v3.8h
21+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v4.8h
22+
; CHECK-NEXT: ret
23+
entry:
24+
%zext = zext <16 x i8> %x to <16 x i16>
25+
%div = udiv <16 x i16> %zext, splat (i16 3)
26+
ret <16 x i16> %div
27+
}
28+
29+
; sdiv with zext input
30+
define <16 x i16> @sdiv_v16i16_from_zext(<16 x i8> %x) {
31+
; CHECK-LABEL: sdiv_v16i16_from_zext:
32+
; CHECK: // %bb.0: // %entry
33+
; CHECK-NEXT: mov w8, #21846 // =0x5556
34+
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
35+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
36+
; CHECK-NEXT: dup v2.8h, w8
37+
; CHECK-NEXT: umull2 v3.4s, v1.8h, v2.8h
38+
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
39+
; CHECK-NEXT: umull2 v4.4s, v0.8h, v2.8h
40+
; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
41+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v3.8h
42+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v4.8h
43+
; CHECK-NEXT: ret
44+
entry:
45+
%zext = zext <16 x i8> %x to <16 x i16>
46+
%div = sdiv <16 x i16> %zext, splat (i16 3)
47+
ret <16 x i16> %div
48+
}
49+
50+
; udiv exact with zext input
51+
define <16 x i16> @udiv_exact_v16i16_from_zext(<16 x i8> %x) {
52+
; CHECK-LABEL: udiv_exact_v16i16_from_zext:
53+
; CHECK: // %bb.0: // %entry
54+
; CHECK-NEXT: mov w8, #43691 // =0xaaab
55+
; CHECK-NEXT: ushll v2.8h, v0.8b, #0
56+
; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0
57+
; CHECK-NEXT: dup v3.8h, w8
58+
; CHECK-NEXT: mul v1.8h, v0.8h, v3.8h
59+
; CHECK-NEXT: mul v0.8h, v2.8h, v3.8h
60+
; CHECK-NEXT: ret
61+
entry:
62+
%zext = zext <16 x i8> %x to <16 x i16>
63+
%div = udiv exact <16 x i16> %zext, splat (i16 3)
64+
ret <16 x i16> %div
65+
}
66+
67+
; sdiv exact with zext input
68+
define <16 x i16> @sdiv_exact_v16i16_from_zext(<16 x i8> %x) {
69+
; CHECK-LABEL: sdiv_exact_v16i16_from_zext:
70+
; CHECK: // %bb.0: // %entry
71+
; CHECK-NEXT: mov w8, #21846 // =0x5556
72+
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
73+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
74+
; CHECK-NEXT: dup v2.8h, w8
75+
; CHECK-NEXT: umull2 v3.4s, v1.8h, v2.8h
76+
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
77+
; CHECK-NEXT: umull2 v4.4s, v0.8h, v2.8h
78+
; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
79+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v3.8h
80+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v4.8h
81+
; CHECK-NEXT: ret
82+
entry:
83+
%zext = zext <16 x i8> %x to <16 x i16>
84+
%div = sdiv exact <16 x i16> %zext, splat (i16 3)
85+
ret <16 x i16> %div
86+
}

0 commit comments

Comments
 (0)