Skip to content

Commit 1ba23ff

Browse files
[AArch64] Add intrinsics support for SVE2p2 instructions (#163575)
This patch add intrinsics for SVE2p2 instructions defined in [this](ARM-software/acle#412) ACLE proposal. Intrinsics added: ``` // Variants are available for: // _s8, _s16, _u16, _mf8, _bf16, _f16 svuint8_t svcompact[_u8](svbool_t pg, svuint8_t zn); // Variants are available for: // _s8, _s16, _u16, _s32, _u32, _s64, _u64 // _mf8, _bf16, _f16, _f32, _f64 svuint8_t svexpand[_u8](svbool_t pg, svuint8_t zn); // Variants are available for: // _b16, _b32, _b64 int64_t svfirstp_b8(svbool_t pg, svbool_t pn); // Variants are available for: // _b16, _b32, _b64 int64_t svlastp_b8(svbool_t pg, svbool_t pn); ``` It also generates Sema tests using aarch64_builtins_test_generator script for some previously merged intrinsics patches, which were merged without regenerating. --------- Co-authored-by: Kerry McLaughlin <kerry.mclaughlin@arm.com>
1 parent 44735e1 commit 1ba23ff

16 files changed

+1489
-11
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,11 @@ let SMETargetGuard = "sme2p2" in {
984984
def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact", [VerifyRuntimeMode]>;
985985
}
986986

987+
let SVETargetGuard = "sve2p2|sme2p2", SMETargetGuard = "sme2p2" in {
988+
def SVCOMPACT_BH : SInst<"svcompact[_{d}]", "dPd", "cUcsUsmbh", MergeNone, "aarch64_sve_compact", [VerifyRuntimeMode]>;
989+
def SVEXPAND : SInst<"svexpand[_{d}]", "dPd", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_expand", [VerifyRuntimeMode]>;
990+
}
991+
987992
// Note: svdup_lane is implemented using the intrinsic for TBL to represent a
988993
// splat of any possible lane. It is upto LLVM to pick a more efficient
989994
// instruction such as DUP (indexed) if the lane index fits the range of the
@@ -1111,6 +1116,11 @@ def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendS
11111116
def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp", [VerifyRuntimeMode]>;
11121117
def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfdb", MergeNone, "", [VerifyRuntimeMode]>;
11131118

1119+
let SVETargetGuard = "sve2p2|sme2p2", SMETargetGuard = "sve2p2|sme2p2" in {
1120+
def SVFIRSTP : SInst<"svfirstp_{d}", "lPP", "PcPsPiPl", MergeNone, "aarch64_sve_firstp", [VerifyRuntimeMode], []>;
1121+
def SVLASTP : SInst<"svlastp_{d}", "lPP", "PcPsPiPl", MergeNone, "aarch64_sve_lastp", [VerifyRuntimeMode], []>;
1122+
}
1123+
11141124
////////////////////////////////////////////////////////////////////////////////
11151125
// Saturating scalar arithmetic
11161126

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
4+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
5+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
6+
7+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
8+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
9+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
10+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
11+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
12+
13+
#ifdef __ARM_FEATURE_SME
14+
#include "arm_sme.h"
15+
#define STREAMING __arm_streaming
16+
#else
17+
#include "arm_sve.h"
18+
#define STREAMING
19+
#endif
20+
21+
#ifdef SVE_OVERLOADED_FORMS
22+
// A simple used,unused... macro, long enough to represent any SVE builtin.
23+
#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
24+
#else
25+
#define SVE_ACLE_FUNC(A1,A2) A1##A2
26+
#endif
27+
28+
// CHECK-LABEL: @test_svcompact_s8(
29+
// CHECK-NEXT: entry:
30+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
31+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
32+
//
33+
// CPP-CHECK-LABEL: @_Z17test_svcompact_s8u10__SVBool_tu10__SVInt8_t(
34+
// CPP-CHECK-NEXT: entry:
35+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
36+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
37+
//
38+
svint8_t test_svcompact_s8(svbool_t pg, svint8_t op) STREAMING
39+
{
40+
return SVE_ACLE_FUNC(svcompact,_s8)(pg, op);
41+
}
42+
43+
// CHECK-LABEL: @test_svcompact_s16(
44+
// CHECK-NEXT: entry:
45+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
46+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
47+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
48+
//
49+
// CPP-CHECK-LABEL: @_Z18test_svcompact_s16u10__SVBool_tu11__SVInt16_t(
50+
// CPP-CHECK-NEXT: entry:
51+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
52+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
53+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
54+
//
55+
svint16_t test_svcompact_s16(svbool_t pg, svint16_t op) STREAMING
56+
{
57+
return SVE_ACLE_FUNC(svcompact,_s16)(pg, op);
58+
}
59+
60+
// CHECK-LABEL: @test_svcompact_u8(
61+
// CHECK-NEXT: entry:
62+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
63+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
64+
//
65+
// CPP-CHECK-LABEL: @_Z17test_svcompact_u8u10__SVBool_tu11__SVUint8_t(
66+
// CPP-CHECK-NEXT: entry:
67+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
68+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
69+
//
70+
svuint8_t test_svcompact_u8(svbool_t pg, svuint8_t op) STREAMING
71+
{
72+
return SVE_ACLE_FUNC(svcompact,_u8)(pg, op);
73+
}
74+
75+
// CHECK-LABEL: @test_svcompact_u16(
76+
// CHECK-NEXT: entry:
77+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
78+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
79+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
80+
//
81+
// CPP-CHECK-LABEL: @_Z18test_svcompact_u16u10__SVBool_tu12__SVUint16_t(
82+
// CPP-CHECK-NEXT: entry:
83+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
84+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
85+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
86+
//
87+
svuint16_t test_svcompact_u16(svbool_t pg, svuint16_t op) STREAMING
88+
{
89+
return SVE_ACLE_FUNC(svcompact,_u16)(pg, op);
90+
}
91+
92+
// CHECK-LABEL: @test_svcompact_mf8(
93+
// CHECK-NEXT: entry:
94+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
95+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
96+
//
97+
// CPP-CHECK-LABEL: @_Z18test_svcompact_mf8u10__SVBool_tu13__SVMfloat8_t(
98+
// CPP-CHECK-NEXT: entry:
99+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
100+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
101+
//
102+
svmfloat8_t test_svcompact_mf8(svbool_t pg, svmfloat8_t op) STREAMING
103+
{
104+
return SVE_ACLE_FUNC(svcompact,_mf8)(pg, op);
105+
}
106+
107+
// CHECK-LABEL: @test_svcompact_f16(
108+
// CHECK-NEXT: entry:
109+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
110+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
111+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
112+
//
113+
// CPP-CHECK-LABEL: @_Z18test_svcompact_f16u10__SVBool_tu13__SVFloat16_t(
114+
// CPP-CHECK-NEXT: entry:
115+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
116+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
117+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
118+
//
119+
svfloat16_t test_svcompact_f16(svbool_t pg, svfloat16_t op) STREAMING
120+
{
121+
return SVE_ACLE_FUNC(svcompact,_f16)(pg, op);
122+
}
123+
124+
// CHECK-LABEL: @test_svcompact_bf16(
125+
// CHECK-NEXT: entry:
126+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
127+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]])
128+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
129+
//
130+
// CPP-CHECK-LABEL: @_Z19test_svcompact_bf16u10__SVBool_tu14__SVBfloat16_t(
131+
// CPP-CHECK-NEXT: entry:
132+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
133+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]])
134+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
135+
//
136+
svbfloat16_t test_svcompact_bf16(svbool_t pg, svbfloat16_t op) STREAMING
137+
{
138+
return SVE_ACLE_FUNC(svcompact,_bf16)(pg, op);
139+
}

0 commit comments

Comments
 (0)