Skip to content

Commit b2a59f9

Browse files
[AArch64] Add support for range prefetch intrinsic
This patch adds support in Clang for the RPRFM instruction, which is available when FEAT_RPRFM is defined: void __rpld(int64_t access_kind, uint64_t retention_policy uint64_t reuse distance, int64_t stride, uint64_t count, int64_t length, void const *addr); If FEAT_RPRFM is not available, this instruction is a NOP. This implements the following ACLE proposal: ARM-software/acle#423
1 parent 96c69b7 commit b2a59f9

19 files changed

Lines changed: 196 additions & 2 deletions

File tree

clang/include/clang/Basic/BuiltinsAArch64.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a")
9696
// Prefetch
9797
BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
9898

99+
// Range Prefetch
100+
BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiUiiUii", "nc")
101+
99102
// System Registers
100103
BUILTIN(__builtin_arm_rsr, "UicC*", "nc")
101104
BUILTIN(__builtin_arm_rsr64, "WUicC*", "nc")

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
612612
if (HasLSE)
613613
Builder.defineMacro("__ARM_FEATURE_ATOMICS", "1");
614614

615+
if (HasRPRFM)
616+
Builder.defineMacro("__ARM_FEATURE_RPRFM", "1");
617+
615618
if (HasBFloat16) {
616619
Builder.defineMacro("__ARM_FEATURE_BF16", "1");
617620
Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1");
@@ -870,6 +873,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
870873
.Case("ssve-fp8fma", HasSSVE_FP8FMA)
871874
.Case("sme-f8f32", HasSME_F8F32)
872875
.Case("sme-f8f16", HasSME_F8F16)
876+
.Case("rprfm", HasRPRFM)
873877
.Default(false);
874878
}
875879

@@ -1100,6 +1104,9 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
11001104
if (Feature == "+strict-align")
11011105
HasUnalignedAccess = false;
11021106

1107+
if (Feature == "+rprfm")
1108+
HasRPRFM = true;
1109+
11031110
// All predecessor archs are added but select the latest one for ArchKind.
11041111
if (Feature == "+v8a" && ArchInfo->Version < llvm::AArch64::ARMV8A.Version)
11051112
ArchInfo = &llvm::AArch64::ARMV8A;

clang/lib/Basic/Targets/AArch64.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
131131
bool HasRCPC3 = false;
132132
bool HasSMEFA64 = false;
133133
bool HasPAuthLR = false;
134+
bool HasRPRFM = false;
134135

135136
const llvm::AArch64::ArchInfo *ArchInfo = &llvm::AArch64::ARMV8A;
136137

clang/lib/Headers/arm_acle.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
9898
#else
9999
#define __pldx(access_kind, cache_level, retention_policy, addr) \
100100
__builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
101+
#define __rpld(access_kind, retention_policy, reuse_distance, stride, count, \
102+
length, addr) \
103+
__builtin_arm_range_prefetch(addr, access_kind, retention_policy, \
104+
reuse_distance, stride, count, length)
101105
#endif
102106

103107
/* 7.6.2 Instruction prefetch */

clang/lib/Sema/SemaARM.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,15 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
11221122
SemaRef.BuiltinConstantArgRange(TheCall, 4, 0, 1);
11231123
}
11241124

1125+
if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch) {
1126+
return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
1127+
SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) ||
1128+
SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 15) ||
1129+
SemaRef.BuiltinConstantArgRange(TheCall, 4, -2048, 2040) ||
1130+
SemaRef.BuiltinConstantArgRange(TheCall, 5, 0, 65535) ||
1131+
SemaRef.BuiltinConstantArgRange(TheCall, 6, -2048, 2040);
1132+
}
1133+
11251134
if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
11261135
BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
11271136
BuiltinID == AArch64::BI__builtin_arm_rsr128 ||

clang/test/CodeGen/arm_acle.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,19 @@ void test_pld() {
164164
__pld(0);
165165
}
166166

167+
#if defined(__ARM_64BIT_STATE)
168+
169+
// AArch64-LABEL: @test_rpld(
170+
// AArch64-NEXT: entry:
171+
// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 -2048, i32 65535, i32 2040)
172+
// AArch64-NEXT: ret void
173+
//
174+
void test_rpld() {
175+
__rpld(1, 1, 15, -2048, 65535, 2040, 0);
176+
}
177+
178+
#endif
179+
167180
// AArch32-LABEL: @test_pldx(
168181
// AArch32-NEXT: entry:
169182
// AArch32-NEXT: call void @llvm.prefetch.p0(ptr null, i32 1, i32 3, i32 1)

clang/test/CodeGen/builtins-arm64.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,20 @@ void prefetch(void) {
6262
// CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1)
6363
}
6464

65+
void range_prefetch(void) {
66+
__builtin_arm_range_prefetch(0, 0, 0, 15, 1024, 24, 2); // pldkeep
67+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i32 15, i32 1024, i32 24, i32 2)
68+
69+
__builtin_arm_range_prefetch(0, 0, 1, 15, 1024, 24, 2); // pldstrm
70+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i32 15, i32 1024, i32 24, i32 2)
71+
72+
__builtin_arm_range_prefetch(0, 1, 0, 15, 1024, 24, 2); // pstkeep
73+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i32 15, i32 1024, i32 24, i32 2)
74+
75+
__builtin_arm_range_prefetch(0, 1, 1, 15, 1024, 24, 2); // pststrm
76+
// CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 1024, i32 24, i32 2)
77+
}
78+
6579
__attribute__((target("v8.5a")))
6680
int32_t jcvt(double v) {
6781
//CHECK-LABEL: @jcvt(

clang/test/Driver/print-supported-extensions-aarch64.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
// CHECK-NEXT: rcpc FEAT_LRCPC Enable support for RCPC extension
7070
// CHECK-NEXT: rcpc3 FEAT_LRCPC3 Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set
7171
// CHECK-NEXT: rdm FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions
72+
// CHECK-NEXT: rprfm FEAT_RPRFM Enable Armv8.0-A Range Prefetch Memory instruction
7273
// CHECK-NEXT: sb FEAT_SB Enable Armv8.5-A Speculation Barrier
7374
// CHECK-NEXT: sha2 FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support
7475
// CHECK-NEXT: sha3 FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support

clang/test/Preprocessor/aarch64-target-features.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,3 +789,6 @@
789789
// CHECK-SMEF8F16: __ARM_FEATURE_FP8 1
790790
// CHECK-SMEF8F16: __ARM_FEATURE_SME2 1
791791
// CHECK-SMEF8F16: __ARM_FEATURE_SME_F8F16 1
792+
793+
// RUN: %clang --target=aarch64 -march=armv8-a+rprfm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-RPRFM %s
794+
// CHECK-RPRFM: __ARM_FEATURE_RPRFM 1

clang/test/Sema/builtins-arm64.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,22 @@ void test_prefetch(void) {
3030
__builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
3131
}
3232

33+
void test_range_prefetch(void) {
34+
__builtin_arm_range_prefetch(0, 2, 0, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
35+
__builtin_arm_range_prefetch(0, 0, 2, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
36+
__builtin_arm_range_prefetch(0, 0, 0, 16, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
37+
__builtin_arm_range_prefetch(0, 0, 0, 0, -2049, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
38+
__builtin_arm_range_prefetch(0, 0, 0, 0, 2041, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
39+
__builtin_arm_range_prefetch(0, 0, 0, 0, 0, 65536, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
40+
__builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, -2049); // expected-error-re {{argument value {{.*}} is outside the valid range}}
41+
__builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, 2041); // expected-error-re {{argument value {{.*}} is outside the valid range}}
42+
}
43+
3344
void test_trap(short s, unsigned short us) {
3445
__builtin_arm_trap(42);
3546
__builtin_arm_trap(65535);
3647
__builtin_arm_trap(-1);
3748
__builtin_arm_trap(65536); // expected-warning {{implicit conversion from 'int' to 'unsigned short' changes value from 65536 to 0}}
3849
__builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
3950
__builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
40-
}
51+
}

0 commit comments

Comments
 (0)