Skip to content

Commit 2ee1721

Browse files
[DAGCombiner] Fix assertion failure in vector division lowering
Recent fix in division legalization trip the legality assertion when NewNodesMustHaveLegalTypes is set.
1 parent 07b8aa8 commit 2ee1721

File tree

3 files changed

+99
-7
lines changed

3 files changed

+99
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6736,6 +6736,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67366736
// avoid expensive fixups.
67376737
unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
67386738

6739+
// If we're after type legalization and SVT is not legal, use the
6740+
// promoted type for creating constants to avoid creating nodes with
6741+
// illegal types.
6742+
if (IsAfterLegalTypes && VT.isVector()) {
6743+
SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6744+
if (SVT.bitsLT(VT.getScalarType()))
6745+
return SDValue();
6746+
ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6747+
if (ShSVT.bitsLT(ShVT.getScalarType()))
6748+
return SDValue();
6749+
}
6750+
const unsigned SVTBits = SVT.getSizeInBits();
6751+
67396752
bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
67406753
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
67416754

@@ -6758,7 +6771,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67586771
UnsignedDivisionByConstantInfo::get(
67596772
Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
67606773

6761-
MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6774+
MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
67626775

67636776
assert(magics.PreShift < Divisor.getBitWidth() &&
67646777
"We shouldn't generate an undefined shift!");
@@ -6769,8 +6782,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67696782
PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
67706783
PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
67716784
NPQFactor = DAG.getConstant(
6772-
magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6773-
: APInt::getZero(EltBits),
6785+
magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6786+
: APInt::getZero(SVTBits),
67746787
dl, SVT);
67756788
UseNPQ |= magics.IsAdd;
67766789
UsePreShift |= magics.PreShift != 0;

llvm/test/CodeGen/AArch64/rem-by-const.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -898,8 +898,8 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) {
898898
; CHECK-SD-NEXT: dup v1.4h, w8
899899
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
900900
; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
901-
; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #17
902-
; CHECK-SD-NEXT: xtn v1.4h, v1.4s
901+
; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
902+
; CHECK-SD-NEXT: sshr v1.4h, v1.4h, #1
903903
; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15
904904
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
905905
; CHECK-SD-NEXT: ret
@@ -952,8 +952,8 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) {
952952
; CHECK-SD-NEXT: dup v1.4h, w8
953953
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
954954
; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
955-
; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #19
956-
; CHECK-SD-NEXT: xtn v1.4h, v1.4s
955+
; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
956+
; CHECK-SD-NEXT: sshr v1.4h, v1.4h, #3
957957
; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15
958958
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
959959
; CHECK-SD-NEXT: ret
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-p:64:64-i32:32-i64:64-i128:128-n32:64-S128-Fn32"
5+
target triple = "aarch64-pc-windows-msvc"
6+
7+
define <16 x i16> @udiv_v16i16_from_zext(<16 x i8> %x) {
8+
; CHECK-LABEL: udiv_v16i16_from_zext:
9+
; CHECK: // %bb.0: // %entry
10+
; CHECK-NEXT: mov w8, #21846 // =0x5556
11+
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
12+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
13+
; CHECK-NEXT: dup v2.8h, w8
14+
; CHECK-NEXT: umull2 v3.4s, v1.8h, v2.8h
15+
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
16+
; CHECK-NEXT: umull2 v4.4s, v0.8h, v2.8h
17+
; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
18+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v3.8h
19+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v4.8h
20+
; CHECK-NEXT: ret
21+
entry:
22+
%zext = zext <16 x i8> %x to <16 x i16>
23+
%div = udiv <16 x i16> %zext, splat (i16 3)
24+
ret <16 x i16> %div
25+
}
26+
27+
define <16 x i16> @sdiv_v16i16_from_sext(<16 x i8> %x) {
28+
; CHECK-LABEL: sdiv_v16i16_from_sext:
29+
; CHECK: // %bb.0: // %entry
30+
; CHECK-NEXT: mov w8, #21846 // =0x5556
31+
; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
32+
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
33+
; CHECK-NEXT: dup v2.8h, w8
34+
; CHECK-NEXT: smull2 v3.4s, v1.8h, v2.8h
35+
; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h
36+
; CHECK-NEXT: smull2 v4.4s, v0.8h, v2.8h
37+
; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h
38+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v3.8h
39+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v4.8h
40+
; CHECK-NEXT: usra v1.8h, v1.8h, #15
41+
; CHECK-NEXT: usra v0.8h, v0.8h, #15
42+
; CHECK-NEXT: ret
43+
entry:
44+
%sext = sext <16 x i8> %x to <16 x i16>
45+
%div = sdiv <16 x i16> %sext, splat (i16 3)
46+
ret <16 x i16> %div
47+
}
48+
49+
define <16 x i16> @udiv_exact_v16i16_from_zext(<16 x i8> %x) {
50+
; CHECK-LABEL: udiv_exact_v16i16_from_zext:
51+
; CHECK: // %bb.0: // %entry
52+
; CHECK-NEXT: mov w8, #43691 // =0xaaab
53+
; CHECK-NEXT: ushll v2.8h, v0.8b, #0
54+
; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0
55+
; CHECK-NEXT: dup v3.8h, w8
56+
; CHECK-NEXT: mul v1.8h, v0.8h, v3.8h
57+
; CHECK-NEXT: mul v0.8h, v2.8h, v3.8h
58+
; CHECK-NEXT: ret
59+
entry:
60+
%zext = zext <16 x i8> %x to <16 x i16>
61+
%div = udiv exact <16 x i16> %zext, splat (i16 3)
62+
ret <16 x i16> %div
63+
}
64+
65+
define <16 x i16> @sdiv_exact_v16i16_from_sext(<16 x i8> %x) {
66+
; CHECK-LABEL: sdiv_exact_v16i16_from_sext:
67+
; CHECK: // %bb.0: // %entry
68+
; CHECK-NEXT: mov w8, #43691 // =0xaaab
69+
; CHECK-NEXT: sshll v2.8h, v0.8b, #0
70+
; CHECK-NEXT: sshll2 v0.8h, v0.16b, #0
71+
; CHECK-NEXT: dup v3.8h, w8
72+
; CHECK-NEXT: mul v1.8h, v0.8h, v3.8h
73+
; CHECK-NEXT: mul v0.8h, v2.8h, v3.8h
74+
; CHECK-NEXT: ret
75+
entry:
76+
%sext = sext <16 x i8> %x to <16 x i16>
77+
%div = sdiv exact <16 x i16> %sext, splat (i16 3)
78+
ret <16 x i16> %div
79+
}

0 commit comments

Comments
 (0)