Skip to content

Commit 84e2738

Browse files
committed
[Clang][AArch64][SVE2p3][SME2p3] Add intrinsics for v9.7a add/add-and-subtract/subtract pairwise operations
Add the following new clang intrinsics based on the ACLE specification ARM-software/acle#428 (Add alpha support for 9.7 data processing intrinsics) - ADDQP (Add pairwise within quadword vector segments) - svint8_t svaddqp_s8(svint8_t, svint8_t) / svint8_t svaddqp(svint8_t, svint8_t) - svuint8_t svaddqp_u8(svuint8_t, svuint8_t) / svuint8_t svaddqp(svuint8_t, svuint8_t) - svint16_t svaddqp_s16(svint16_t, svint16_t) / svint16_t svaddqp(svint16_t, svint16_t) - svuint16_t svaddqp_u16(svuint16_t, svuint16_t) / svuint16_t svaddqp(svuint16_t, svuint16_t) - svint32_t svaddqp_s32(svint32_t, svint32_t) / svint32_t svaddqp(svint32_t, svint32_t) - svuint32_t svaddqp_u32(svuint32_t, svuint32_t) / svuint32_t svaddqp(svuint32_t, svuint32_t) - svint64_t svaddqp_s64(svint64_t, svint64_t) / svint64_t svaddqp(svint64_t, svint64_t) - svuint64_t svaddqp_u64(svuint64_t, svuint64_t) / svuint64_t svaddqp(svuint64_t, svuint64_t) - ADDSUBP (Add and subtract pairwise) - svint8_t svaddsubp_s8(svint8_t, svint8_t) / svint8_t svaddsubp(svint8_t, svint8_t) - svuint8_t svaddsubp_u8(svuint8_t, svuint8_t) / svuint8_t svaddsubp(svuint8_t, svuint8_t) - svint16_t svaddsubp_s16(svint16_t, svint16_t) / svint16_t svaddsubp(svint16_t, svint16_t) - svuint16_t svaddsubp_u16(svuint16_t, svuint16_t) / svuint16_t svaddsubp(svuint16_t, svuint16_t) - svint32_t svaddsubp_s32(svint32_t, svint32_t) / svint32_t svaddsubp(svint32_t, svint32_t) - svuint32_t svaddsubp_u32(svuint32_t, svuint32_t) / svuint32_t svaddsubp(svuint32_t, svuint32_t) - svint64_t svaddsubp_s64(svint64_t, svint64_t) / svint64_t svaddsubp(svint64_t, svint64_t) - svuint64_t svaddsubp_u64(svuint64_t, svuint64_t) / svuint64_t svaddsubp(svuint64_t, svuint64_t) - SUBP (Subtract pairwise) - svint8_t svsubp_s8(svbool_t, svint8_t, svint8_t) / svint8_t svsubp(svbool_t, svint8_t, svint8_t) - svuint8_t svsubp_u8(svbool_t, svuint8_t, svuint8_t) / svuint8_t svsubp(svbool_t, svuint8_t, svuint8_t) - svint16_t svsubp_s16(svbool_t, svint16_t, svint16_t) / svint16_t svsubp(svbool_t, svint16_t, svint16_t) - svuint16_t svsubp_u16(svbool_t, svuint16_t, svuint16_t) / svuint16_t svsubp(svbool_t, svuint16_t, svuint16_t) - svint32_t svsubp_s32(svbool_t, svint32_t, svint32_t) / svint32_t svsubp(svbool_t, svint32_t, svint32_t) - svuint32_t svsubp_u32(svbool_t, svuint32_t, svuint32_t) / svuint32_t svsubp(svbool_t, svuint32_t, svuint32_t) - svint64_t svsubp_s64(svbool_t, svint64_t, svint64_t) / svint64_t svsubp(svbool_t, svint64_t, svint64_t) - svuint64_t svsubp_u64(svbool_t, svuint64_t, svuint64_t) / svuint64_t svsubp(svbool_t, svuint64_t, svuint64_t)
1 parent 3200d64 commit 84e2738

7 files changed

Lines changed: 1057 additions & 3 deletions

File tree

clang/include/clang/Basic/arm_sve.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,6 +1421,17 @@ defm SVMINP_S : SInstPairwise<"svminp", "csli", "aarch64_sve_sminp", [
14211421
defm SVMINP_U : SInstPairwise<"svminp", "UcUsUiUl", "aarch64_sve_uminp", [VerifyRuntimeMode]>;
14221422
}
14231423

1424+
////////////////////////////////////////////////////////////////////////////////
1425+
// SVE2.3 - Add pairwise within quadword vector segments
1426+
1427+
let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in {
1428+
def SVADDQP : SInst<"svaddqp[_{d}]", "ddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_addqp",
1429+
[VerifyRuntimeMode]>;
1430+
def SVADDSUBP : SInst<"svaddsubp[_{d}]", "ddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_addsubp",
1431+
[VerifyRuntimeMode]>;
1432+
def SVSUBP : SInst<"svsubp[_{d}]", "dPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_subp", [VerifyRuntimeMode]>;
1433+
}
1434+
14241435
////////////////////////////////////////////////////////////////////////////////
14251436
// SVE2 - Widening pairwise arithmetic
14261437

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_addqp.c

Lines changed: 262 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_addsubp.c

Lines changed: 262 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_subp.c

Lines changed: 322 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py
2+
// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify
3+
// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify
4+
// expected-no-diagnostics
5+
6+
// REQUIRES: aarch64-registered-target
7+
8+
#include <arm_sve.h>
9+
10+
// Properties: guard="sve,(sve2p3|sme2p3)" streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent"
11+
12+
void test(void) {
13+
svbool_t svbool_t_val;
14+
svint8_t svint8_t_val;
15+
svint16_t svint16_t_val;
16+
svint32_t svint32_t_val;
17+
svint64_t svint64_t_val;
18+
svuint8_t svuint8_t_val;
19+
svuint16_t svuint16_t_val;
20+
svuint32_t svuint32_t_val;
21+
svuint64_t svuint64_t_val;
22+
23+
svaddqp(svint8_t_val, svint8_t_val);
24+
svaddqp(svint16_t_val, svint16_t_val);
25+
svaddqp(svint32_t_val, svint32_t_val);
26+
svaddqp(svint64_t_val, svint64_t_val);
27+
svaddqp(svuint8_t_val, svuint8_t_val);
28+
svaddqp(svuint16_t_val, svuint16_t_val);
29+
svaddqp(svuint32_t_val, svuint32_t_val);
30+
svaddqp(svuint64_t_val, svuint64_t_val);
31+
svaddqp_s8(svint8_t_val, svint8_t_val);
32+
svaddqp_s16(svint16_t_val, svint16_t_val);
33+
svaddqp_s32(svint32_t_val, svint32_t_val);
34+
svaddqp_s64(svint64_t_val, svint64_t_val);
35+
svaddqp_u8(svuint8_t_val, svuint8_t_val);
36+
svaddqp_u16(svuint16_t_val, svuint16_t_val);
37+
svaddqp_u32(svuint32_t_val, svuint32_t_val);
38+
svaddqp_u64(svuint64_t_val, svuint64_t_val);
39+
svaddsubp(svint8_t_val, svint8_t_val);
40+
svaddsubp(svint16_t_val, svint16_t_val);
41+
svaddsubp(svint32_t_val, svint32_t_val);
42+
svaddsubp(svint64_t_val, svint64_t_val);
43+
svaddsubp(svuint8_t_val, svuint8_t_val);
44+
svaddsubp(svuint16_t_val, svuint16_t_val);
45+
svaddsubp(svuint32_t_val, svuint32_t_val);
46+
svaddsubp(svuint64_t_val, svuint64_t_val);
47+
svaddsubp_s8(svint8_t_val, svint8_t_val);
48+
svaddsubp_s16(svint16_t_val, svint16_t_val);
49+
svaddsubp_s32(svint32_t_val, svint32_t_val);
50+
svaddsubp_s64(svint64_t_val, svint64_t_val);
51+
svaddsubp_u8(svuint8_t_val, svuint8_t_val);
52+
svaddsubp_u16(svuint16_t_val, svuint16_t_val);
53+
svaddsubp_u32(svuint32_t_val, svuint32_t_val);
54+
svaddsubp_u64(svuint64_t_val, svuint64_t_val);
55+
svsubp(svbool_t_val, svint8_t_val, svint8_t_val);
56+
svsubp(svbool_t_val, svint16_t_val, svint16_t_val);
57+
svsubp(svbool_t_val, svint32_t_val, svint32_t_val);
58+
svsubp(svbool_t_val, svint64_t_val, svint64_t_val);
59+
svsubp(svbool_t_val, svuint8_t_val, svuint8_t_val);
60+
svsubp(svbool_t_val, svuint16_t_val, svuint16_t_val);
61+
svsubp(svbool_t_val, svuint32_t_val, svuint32_t_val);
62+
svsubp(svbool_t_val, svuint64_t_val, svuint64_t_val);
63+
svsubp_s8(svbool_t_val, svint8_t_val, svint8_t_val);
64+
svsubp_s16(svbool_t_val, svint16_t_val, svint16_t_val);
65+
svsubp_s32(svbool_t_val, svint32_t_val, svint32_t_val);
66+
svsubp_s64(svbool_t_val, svint64_t_val, svint64_t_val);
67+
svsubp_u8(svbool_t_val, svuint8_t_val, svuint8_t_val);
68+
svsubp_u16(svbool_t_val, svuint16_t_val, svuint16_t_val);
69+
svsubp_u32(svbool_t_val, svuint32_t_val, svuint32_t_val);
70+
svsubp_u64(svbool_t_val, svuint64_t_val, svuint64_t_val);
71+
}
72+
73+
void test_streaming(void) __arm_streaming{
74+
svbool_t svbool_t_val;
75+
svint8_t svint8_t_val;
76+
svint16_t svint16_t_val;
77+
svint32_t svint32_t_val;
78+
svint64_t svint64_t_val;
79+
svuint8_t svuint8_t_val;
80+
svuint16_t svuint16_t_val;
81+
svuint32_t svuint32_t_val;
82+
svuint64_t svuint64_t_val;
83+
84+
svaddqp(svint8_t_val, svint8_t_val);
85+
svaddqp(svint16_t_val, svint16_t_val);
86+
svaddqp(svint32_t_val, svint32_t_val);
87+
svaddqp(svint64_t_val, svint64_t_val);
88+
svaddqp(svuint8_t_val, svuint8_t_val);
89+
svaddqp(svuint16_t_val, svuint16_t_val);
90+
svaddqp(svuint32_t_val, svuint32_t_val);
91+
svaddqp(svuint64_t_val, svuint64_t_val);
92+
svaddqp_s8(svint8_t_val, svint8_t_val);
93+
svaddqp_s16(svint16_t_val, svint16_t_val);
94+
svaddqp_s32(svint32_t_val, svint32_t_val);
95+
svaddqp_s64(svint64_t_val, svint64_t_val);
96+
svaddqp_u8(svuint8_t_val, svuint8_t_val);
97+
svaddqp_u16(svuint16_t_val, svuint16_t_val);
98+
svaddqp_u32(svuint32_t_val, svuint32_t_val);
99+
svaddqp_u64(svuint64_t_val, svuint64_t_val);
100+
svaddsubp(svint8_t_val, svint8_t_val);
101+
svaddsubp(svint16_t_val, svint16_t_val);
102+
svaddsubp(svint32_t_val, svint32_t_val);
103+
svaddsubp(svint64_t_val, svint64_t_val);
104+
svaddsubp(svuint8_t_val, svuint8_t_val);
105+
svaddsubp(svuint16_t_val, svuint16_t_val);
106+
svaddsubp(svuint32_t_val, svuint32_t_val);
107+
svaddsubp(svuint64_t_val, svuint64_t_val);
108+
svaddsubp_s8(svint8_t_val, svint8_t_val);
109+
svaddsubp_s16(svint16_t_val, svint16_t_val);
110+
svaddsubp_s32(svint32_t_val, svint32_t_val);
111+
svaddsubp_s64(svint64_t_val, svint64_t_val);
112+
svaddsubp_u8(svuint8_t_val, svuint8_t_val);
113+
svaddsubp_u16(svuint16_t_val, svuint16_t_val);
114+
svaddsubp_u32(svuint32_t_val, svuint32_t_val);
115+
svaddsubp_u64(svuint64_t_val, svuint64_t_val);
116+
svsubp(svbool_t_val, svint8_t_val, svint8_t_val);
117+
svsubp(svbool_t_val, svint16_t_val, svint16_t_val);
118+
svsubp(svbool_t_val, svint32_t_val, svint32_t_val);
119+
svsubp(svbool_t_val, svint64_t_val, svint64_t_val);
120+
svsubp(svbool_t_val, svuint8_t_val, svuint8_t_val);
121+
svsubp(svbool_t_val, svuint16_t_val, svuint16_t_val);
122+
svsubp(svbool_t_val, svuint32_t_val, svuint32_t_val);
123+
svsubp(svbool_t_val, svuint64_t_val, svuint64_t_val);
124+
svsubp_s8(svbool_t_val, svint8_t_val, svint8_t_val);
125+
svsubp_s16(svbool_t_val, svint16_t_val, svint16_t_val);
126+
svsubp_s32(svbool_t_val, svint32_t_val, svint32_t_val);
127+
svsubp_s64(svbool_t_val, svint64_t_val, svint64_t_val);
128+
svsubp_u8(svbool_t_val, svuint8_t_val, svuint8_t_val);
129+
svsubp_u16(svbool_t_val, svuint16_t_val, svuint16_t_val);
130+
svsubp_u32(svbool_t_val, svuint32_t_val, svuint32_t_val);
131+
svsubp_u64(svbool_t_val, svuint64_t_val, svuint64_t_val);
132+
}
133+
134+
void test_streaming_compatible(void) __arm_streaming_compatible{
135+
svbool_t svbool_t_val;
136+
svint8_t svint8_t_val;
137+
svint16_t svint16_t_val;
138+
svint32_t svint32_t_val;
139+
svint64_t svint64_t_val;
140+
svuint8_t svuint8_t_val;
141+
svuint16_t svuint16_t_val;
142+
svuint32_t svuint32_t_val;
143+
svuint64_t svuint64_t_val;
144+
145+
svaddqp(svint8_t_val, svint8_t_val);
146+
svaddqp(svint16_t_val, svint16_t_val);
147+
svaddqp(svint32_t_val, svint32_t_val);
148+
svaddqp(svint64_t_val, svint64_t_val);
149+
svaddqp(svuint8_t_val, svuint8_t_val);
150+
svaddqp(svuint16_t_val, svuint16_t_val);
151+
svaddqp(svuint32_t_val, svuint32_t_val);
152+
svaddqp(svuint64_t_val, svuint64_t_val);
153+
svaddqp_s8(svint8_t_val, svint8_t_val);
154+
svaddqp_s16(svint16_t_val, svint16_t_val);
155+
svaddqp_s32(svint32_t_val, svint32_t_val);
156+
svaddqp_s64(svint64_t_val, svint64_t_val);
157+
svaddqp_u8(svuint8_t_val, svuint8_t_val);
158+
svaddqp_u16(svuint16_t_val, svuint16_t_val);
159+
svaddqp_u32(svuint32_t_val, svuint32_t_val);
160+
svaddqp_u64(svuint64_t_val, svuint64_t_val);
161+
svaddsubp(svint8_t_val, svint8_t_val);
162+
svaddsubp(svint16_t_val, svint16_t_val);
163+
svaddsubp(svint32_t_val, svint32_t_val);
164+
svaddsubp(svint64_t_val, svint64_t_val);
165+
svaddsubp(svuint8_t_val, svuint8_t_val);
166+
svaddsubp(svuint16_t_val, svuint16_t_val);
167+
svaddsubp(svuint32_t_val, svuint32_t_val);
168+
svaddsubp(svuint64_t_val, svuint64_t_val);
169+
svaddsubp_s8(svint8_t_val, svint8_t_val);
170+
svaddsubp_s16(svint16_t_val, svint16_t_val);
171+
svaddsubp_s32(svint32_t_val, svint32_t_val);
172+
svaddsubp_s64(svint64_t_val, svint64_t_val);
173+
svaddsubp_u8(svuint8_t_val, svuint8_t_val);
174+
svaddsubp_u16(svuint16_t_val, svuint16_t_val);
175+
svaddsubp_u32(svuint32_t_val, svuint32_t_val);
176+
svaddsubp_u64(svuint64_t_val, svuint64_t_val);
177+
svsubp(svbool_t_val, svint8_t_val, svint8_t_val);
178+
svsubp(svbool_t_val, svint16_t_val, svint16_t_val);
179+
svsubp(svbool_t_val, svint32_t_val, svint32_t_val);
180+
svsubp(svbool_t_val, svint64_t_val, svint64_t_val);
181+
svsubp(svbool_t_val, svuint8_t_val, svuint8_t_val);
182+
svsubp(svbool_t_val, svuint16_t_val, svuint16_t_val);
183+
svsubp(svbool_t_val, svuint32_t_val, svuint32_t_val);
184+
svsubp(svbool_t_val, svuint64_t_val, svuint64_t_val);
185+
svsubp_s8(svbool_t_val, svint8_t_val, svint8_t_val);
186+
svsubp_s16(svbool_t_val, svint16_t_val, svint16_t_val);
187+
svsubp_s32(svbool_t_val, svint32_t_val, svint32_t_val);
188+
svsubp_s64(svbool_t_val, svint64_t_val, svint64_t_val);
189+
svsubp_u8(svbool_t_val, svuint8_t_val, svuint8_t_val);
190+
svsubp_u16(svbool_t_val, svuint16_t_val, svuint16_t_val);
191+
svsubp_u32(svbool_t_val, svuint32_t_val, svuint32_t_val);
192+
svsubp_u64(svbool_t_val, svuint64_t_val, svuint64_t_val);
193+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2567,6 +2567,10 @@ def int_aarch64_sve_sminp : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable
25672567
def int_aarch64_sve_umaxp : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
25682568
def int_aarch64_sve_uminp : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
25692569

2570+
def int_aarch64_sve_addqp : AdvSIMD_2VectorArg_Intrinsic<[IntrSpeculatable]>;
2571+
def int_aarch64_sve_addsubp : AdvSIMD_2VectorArg_Intrinsic<[IntrSpeculatable]>;
2572+
def int_aarch64_sve_subp : AdvSIMD_Pred2VectorArg_Intrinsic;
2573+
25702574
//
25712575
// SVE2 - Widening pairwise arithmetic
25722576
//

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4793,11 +4793,11 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
47934793
//===----------------------------------------------------------------------===//
47944794
let Predicates = [HasSVE2p3_or_SME2p3] in {
47954795
// SVE2 Add pairwise within quadword vector segments (unpredicated)
4796-
defm ADDQP_ZZZ : sve2_int_mul<0b110, "addqp", null_frag>;
4796+
defm ADDQP_ZZZ : sve2_int_mul<0b110, "addqp", int_aarch64_sve_addqp>;
47974797

47984798
// SVE2 Add subtract/subtract pairwise
4799-
defm ADDSUBP_ZZZ : sve2_int_mul<0b111, "addsubp", null_frag>;
4800-
defm SUBP_ZPmZZ : sve2_int_arith_pred<0b100001, "subp", null_frag>;
4799+
defm ADDSUBP_ZZZ : sve2_int_mul<0b111, "addsubp", int_aarch64_sve_addsubp>;
4800+
defm SUBP_ZPmZZ : sve2_int_arith_pred<0b100001, "subp", int_aarch64_sve_subp>;
48014801

48024802
// SVE2 integer absolute difference and accumulate long
48034803
defm SABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b0, "sabal">;

0 commit comments

Comments
 (0)