[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)
https://github.com/CarolineConcatto closed https://github.com/llvm/llvm-project/pull/146353 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)
https://github.com/paulwalker-arm approved this pull request. https://github.com/llvm/llvm-project/pull/146353 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)
https://github.com/CarolineConcatto updated
https://github.com/llvm/llvm-project/pull/146353
>From 27917c4896f31bc3ffd7e466ff9bf1cea15ba476 Mon Sep 17 00:00:00 2001
From: CarolineConcatto
Date: Mon, 30 Jun 2025 12:15:25 +
Subject: [PATCH 1/3] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem
llvm.aarch64.set.fpmr only writes to inaccessible memory.
Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the
optimiser can treat it as a pure write.
The original patch did not add this property, causing the intrinsic
to be conservatively treated as readwrite. This commit fixes that.
---
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c | 3 +++
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c | 4
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c | 4
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c | 4
.../CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c | 5 +
llvm/include/llvm/IR/IntrinsicsAArch64.td| 2 +-
11 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn,
float16x4_t vm, fpm_t fpm) {
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm)
{
return vcvtq_mf8_f16_fpm(vn, vm, fpm);
}
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t
vn, mfloat8x8_t vm, f
return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8>
[[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd,
mfloat8x16_t vn, mfloat8x8_t vm,
float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t
vm, fpm_t fpmr) {
return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t
vn, mfloat8x8_t vm, fp
return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8>
[[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t
vn, mfloat8x8_t vm,
float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t
vm, fpm_t fpm) {
return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
@@ -32,6 +32,8 @@ svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr)
__arm_streaming {
return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr);
}
+// CHECK: declare void @llvm
[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)
https://github.com/CarolineConcatto updated
https://github.com/llvm/llvm-project/pull/146353
>From 27917c4896f31bc3ffd7e466ff9bf1cea15ba476 Mon Sep 17 00:00:00 2001
From: CarolineConcatto
Date: Mon, 30 Jun 2025 12:15:25 +
Subject: [PATCH 1/3] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem
llvm.aarch64.set.fpmr only writes to inaccessible memory.
Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the
optimiser can treat it as a pure write.
The original patch did not add this property, causing the intrinsic
to be conservatively treated as readwrite. This commit fixes that.
---
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c | 3 +++
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c | 4
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c | 4
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c | 4
.../CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c | 5 +
llvm/include/llvm/IR/IntrinsicsAArch64.td| 2 +-
11 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn,
float16x4_t vm, fpm_t fpm) {
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm)
{
return vcvtq_mf8_f16_fpm(vn, vm, fpm);
}
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t
vn, mfloat8x8_t vm, f
return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8>
[[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd,
mfloat8x16_t vn, mfloat8x8_t vm,
float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t
vm, fpm_t fpmr) {
return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t
vn, mfloat8x8_t vm, fp
return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8>
[[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t
vn, mfloat8x8_t vm,
float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t
vm, fpm_t fpm) {
return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
@@ -32,6 +32,8 @@ svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr)
__arm_streaming {
return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr);
}
+// CHECK: declare void @llvm
[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn,
float16x4_t vm, fpm_t fpm) {
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm)
{
return vcvtq_mf8_f16_fpm(vn, vm, fpm);
}
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
CarolineConcatto wrote:
I've tried to find similar test in Sema, but I think what I want is just when
we have the llvm-ir/codegen.
Let me know if the test I added is fine.
It is also now checking other fp8 functions memory attributes, besides
set.fpmr.
https://github.com/llvm/llvm-project/pull/146353
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)
https://github.com/CarolineConcatto updated
https://github.com/llvm/llvm-project/pull/146353
>From 27917c4896f31bc3ffd7e466ff9bf1cea15ba476 Mon Sep 17 00:00:00 2001
From: CarolineConcatto
Date: Mon, 30 Jun 2025 12:15:25 +
Subject: [PATCH 1/2] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem
llvm.aarch64.set.fpmr only writes to inaccessible memory.
Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the
optimiser can treat it as a pure write.
The original patch did not add this property, causing the intrinsic
to be conservatively treated as readwrite. This commit fixes that.
---
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c | 3 +++
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c | 4
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c | 4
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c | 4
.../CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c | 5 +
llvm/include/llvm/IR/IntrinsicsAArch64.td| 2 +-
11 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn,
float16x4_t vm, fpm_t fpm) {
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm)
{
return vcvtq_mf8_f16_fpm(vn, vm, fpm);
}
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t
vn, mfloat8x8_t vm, f
return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8>
[[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd,
mfloat8x16_t vn, mfloat8x8_t vm,
float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t
vm, fpm_t fpmr) {
return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t
vn, mfloat8x8_t vm, fp
return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8>
[[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t
vn, mfloat8x8_t vm,
float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t
vm, fpm_t fpm) {
return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
@@ -32,6 +32,8 @@ svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr)
__arm_streaming {
return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr);
}
+// CHECK: declare void @llvm
[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)
https://github.com/kmclaughlin-arm approved this pull request. https://github.com/llvm/llvm-project/pull/146353 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn,
float16x4_t vm, fpm_t fpm) {
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm)
{
return vcvtq_mf8_f16_fpm(vn, vm, fpm);
}
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
paulwalker-arm wrote:
The test changes look manual? Which is going to make it awkward in the future.
Perhaps add a dedicated test file for this specific purpose?
https://github.com/llvm/llvm-project/pull/146353
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)
llvmbot wrote:
@llvm/pr-subscribers-clang
Author: None (CarolineConcatto)
Changes
llvm.aarch64.set.fpmr only writes to inaccessible memory. Tag it with the
IntrWriteMem and IntrInaccessibleMemOnly properties so the optimiser can treat
it as a pure write.
The original patch did not add this property, causing the intrinsic to be
conservatively treated as readwrite. This commit fixes that.
---
Full diff: https://github.com/llvm/llvm-project/pull/146353.diff
11 Files Affected:
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+3)
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
(+4)
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
(+4)
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c (+4)
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
(+5)
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c (+5)
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c (+5)
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c
(+5)
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c
(+5)
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c
(+5)
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+1-1)
``diff
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn,
float16x4_t vm, fpm_t fpm) {
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm)
{
return vcvtq_mf8_f16_fpm(vn, vm, fpm);
}
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t
vn, mfloat8x8_t vm, f
return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8>
[[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd,
mfloat8x16_t vn, mfloat8x8_t vm,
float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t
vm, fpm_t fpmr) {
return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t
vn, mfloat8x8_t vm, fp
return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8>
[[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t
vn, mfloat8x8_t vm,
float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t
vm, fpm_t fpm) {
return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
@@ -32,6 +32,8 @@ svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr)
__arm_streaming {
return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-L
[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)
https://github.com/CarolineConcatto created
https://github.com/llvm/llvm-project/pull/146353
llvm.aarch64.set.fpmr only writes to inaccessible memory. Tag it with the
IntrWriteMem and IntrInaccessibleMemOnly properties so the optimiser can treat
it as a pure write.
The original patch did not add this property, causing the intrinsic to be
conservatively treated as readwrite. This commit fixes that.
>From 27917c4896f31bc3ffd7e466ff9bf1cea15ba476 Mon Sep 17 00:00:00 2001
From: CarolineConcatto
Date: Mon, 30 Jun 2025 12:15:25 +
Subject: [PATCH] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem
llvm.aarch64.set.fpmr only writes to inaccessible memory.
Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the
optimiser can treat it as a pure write.
The original patch did not add this property, causing the intrinsic
to be conservatively treated as readwrite. This commit fixes that.
---
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c | 3 +++
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c | 4
.../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c | 4
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c | 4
.../CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c | 5 +
.../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c | 5 +
llvm/include/llvm/IR/IntrinsicsAArch64.td| 2 +-
11 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn,
float16x4_t vm, fpm_t fpm) {
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm)
{
return vcvtq_mf8_f16_fpm(vn, vm, fpm);
}
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t
vn, mfloat8x8_t vm, f
return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8>
[[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd,
mfloat8x16_t vn, mfloat8x8_t vm,
float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t
vm, fpm_t fpmr) {
return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t
vn, mfloat8x8_t vm, fp
return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8>
[[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t
vn, mfloat8x8_t vm,
float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t
vm, fpm_t fpm) {
return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test
