[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)

2025-07-04 Thread via cfe-commits

https://github.com/CarolineConcatto closed 
https://github.com/llvm/llvm-project/pull/146353
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)

2025-07-03 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm approved this pull request.


https://github.com/llvm/llvm-project/pull/146353
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)

2025-07-03 Thread via cfe-commits

https://github.com/CarolineConcatto updated 
https://github.com/llvm/llvm-project/pull/146353

>From 27917c4896f31bc3ffd7e466ff9bf1cea15ba476 Mon Sep 17 00:00:00 2001
From: CarolineConcatto 
Date: Mon, 30 Jun 2025 12:15:25 +
Subject: [PATCH 1/3] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem

llvm.aarch64.set.fpmr only writes to inaccessible memory.
Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the
optimiser can treat it as a pure write.

The original patch did not add this property, causing the intrinsic
to be conservatively treated as readwrite. This commit fixes that.
---
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c  | 3 +++
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c | 4 
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c | 4 
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c  | 4 
 .../CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c  | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c  | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c | 5 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td| 2 +-
 11 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, 
float16x4_t vm, fpm_t fpm) {
 mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) 
{
   return vcvtq_mf8_f16_fpm(vn, vm, fpm);
 }
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t 
vn, mfloat8x8_t vm, f
   return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
 // CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> 
[[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd, 
mfloat8x16_t vn, mfloat8x8_t vm,
 float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t 
vm, fpm_t fpmr) {
   return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
 }
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t 
vn, mfloat8x8_t vm, fp
   return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
 // CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> 
[[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t 
vn, mfloat8x8_t vm,
 float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t 
vm, fpm_t fpm) {
   return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
 }
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
@@ -32,6 +32,8 @@ svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr)  
__arm_streaming {
   return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr);
 }
 
+// CHECK: declare void @llvm

[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)

2025-07-03 Thread via cfe-commits

https://github.com/CarolineConcatto updated 
https://github.com/llvm/llvm-project/pull/146353

>From 27917c4896f31bc3ffd7e466ff9bf1cea15ba476 Mon Sep 17 00:00:00 2001
From: CarolineConcatto 
Date: Mon, 30 Jun 2025 12:15:25 +
Subject: [PATCH 1/3] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem

llvm.aarch64.set.fpmr only writes to inaccessible memory.
Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the
optimiser can treat it as a pure write.

The original patch did not add this property, causing the intrinsic
to be conservatively treated as readwrite. This commit fixes that.
---
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c  | 3 +++
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c | 4 
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c | 4 
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c  | 4 
 .../CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c  | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c  | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c | 5 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td| 2 +-
 11 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, 
float16x4_t vm, fpm_t fpm) {
 mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) 
{
   return vcvtq_mf8_f16_fpm(vn, vm, fpm);
 }
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t 
vn, mfloat8x8_t vm, f
   return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
 // CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> 
[[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd, 
mfloat8x16_t vn, mfloat8x8_t vm,
 float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t 
vm, fpm_t fpmr) {
   return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
 }
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t 
vn, mfloat8x8_t vm, fp
   return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
 // CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> 
[[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t 
vn, mfloat8x8_t vm,
 float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t 
vm, fpm_t fpm) {
   return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
 }
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
@@ -32,6 +32,8 @@ svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr)  
__arm_streaming {
   return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr);
 }
 
+// CHECK: declare void @llvm

[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)

2025-07-03 Thread via cfe-commits


@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, 
float16x4_t vm, fpm_t fpm) {
 mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) 
{
   return vcvtq_mf8_f16_fpm(vn, vm, fpm);
 }
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }

CarolineConcatto wrote:

I've tried to find similar test in Sema, but I think what I want is just when 
we have the llvm-ir/codegen.
Let me know if the test I added is fine. 
It is also now checking other fp8  functions memory attributes, besides 
set.fpmr.

https://github.com/llvm/llvm-project/pull/146353
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)

2025-07-03 Thread via cfe-commits

https://github.com/CarolineConcatto updated 
https://github.com/llvm/llvm-project/pull/146353

>From 27917c4896f31bc3ffd7e466ff9bf1cea15ba476 Mon Sep 17 00:00:00 2001
From: CarolineConcatto 
Date: Mon, 30 Jun 2025 12:15:25 +
Subject: [PATCH 1/2] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem

llvm.aarch64.set.fpmr only writes to inaccessible memory.
Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the
optimiser can treat it as a pure write.

The original patch did not add this property, causing the intrinsic
to be conservatively treated as readwrite. This commit fixes that.
---
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c  | 3 +++
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c | 4 
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c | 4 
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c  | 4 
 .../CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c  | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c  | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c | 5 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td| 2 +-
 11 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, 
float16x4_t vm, fpm_t fpm) {
 mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) 
{
   return vcvtq_mf8_f16_fpm(vn, vm, fpm);
 }
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t 
vn, mfloat8x8_t vm, f
   return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
 // CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> 
[[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd, 
mfloat8x16_t vn, mfloat8x8_t vm,
 float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t 
vm, fpm_t fpmr) {
   return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
 }
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t 
vn, mfloat8x8_t vm, fp
   return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
 // CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> 
[[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t 
vn, mfloat8x8_t vm,
 float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t 
vm, fpm_t fpm) {
   return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
 }
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
@@ -32,6 +32,8 @@ svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr)  
__arm_streaming {
   return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr);
 }
 
+// CHECK: declare void @llvm

[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)

2025-07-01 Thread Kerry McLaughlin via cfe-commits

https://github.com/kmclaughlin-arm approved this pull request.


https://github.com/llvm/llvm-project/pull/146353
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)

2025-07-01 Thread Paul Walker via cfe-commits


@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, 
float16x4_t vm, fpm_t fpm) {
 mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) 
{
   return vcvtq_mf8_f16_fpm(vn, vm, fpm);
 }
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }

paulwalker-arm wrote:

The test changes look manual? Which is going to make it awkward in the future.  
Perhaps add a dedicated test file for this specific purpose?

https://github.com/llvm/llvm-project/pull/146353
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)

2025-06-30 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (CarolineConcatto)


Changes

llvm.aarch64.set.fpmr only writes to inaccessible memory. Tag it with the 
IntrWriteMem and IntrInaccessibleMemOnly properties so the optimiser can treat 
it as a pure write.

The original patch did not add this property, causing the intrinsic to be 
conservatively treated as readwrite. This commit fixes that.

---
Full diff: https://github.com/llvm/llvm-project/pull/146353.diff


11 Files Affected:

- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+3) 
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c 
(+4) 
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c 
(+4) 
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c (+4) 
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c 
(+5) 
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c (+5) 
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c (+5) 
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c 
(+5) 
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c 
(+5) 
- (modified) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c 
(+5) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+1-1) 


``diff
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, 
float16x4_t vm, fpm_t fpm) {
 mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) 
{
   return vcvtq_mf8_f16_fpm(vn, vm, fpm);
 }
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t 
vn, mfloat8x8_t vm, f
   return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
 // CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> 
[[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd, 
mfloat8x16_t vn, mfloat8x8_t vm,
 float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t 
vm, fpm_t fpmr) {
   return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
 }
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t 
vn, mfloat8x8_t vm, fp
   return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
 // CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> 
[[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t 
vn, mfloat8x8_t vm,
 float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t 
vm, fpm_t fpm) {
   return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
 }
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
@@ -32,6 +32,8 @@ svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr)  
__arm_streaming {
   return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-L

[clang] [llvm] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem (PR #146353)

2025-06-30 Thread via cfe-commits

https://github.com/CarolineConcatto created 
https://github.com/llvm/llvm-project/pull/146353

llvm.aarch64.set.fpmr only writes to inaccessible memory. Tag it with the 
IntrWriteMem and IntrInaccessibleMemOnly properties so the optimiser can treat 
it as a pure write.

The original patch did not add this property, causing the intrinsic to be 
conservatively treated as readwrite. This commit fixes that.

>From 27917c4896f31bc3ffd7e466ff9bf1cea15ba476 Mon Sep 17 00:00:00 2001
From: CarolineConcatto 
Date: Mon, 30 Jun 2025 12:15:25 +
Subject: [PATCH] [AArch64] Mark aarch64_set_fpmr as IntrWriteMem

llvm.aarch64.set.fpmr only writes to inaccessible memory.
Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the
optimiser can treat it as a pure write.

The original patch did not add this property, causing the intrinsic
to be conservatively treated as readwrite. This commit fixes that.
---
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c  | 3 +++
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c | 4 
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c | 4 
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c  | 4 
 .../CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c  | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c  | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c | 5 +
 .../test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c | 5 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td| 2 +-
 11 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, 
float16x4_t vm, fpm_t fpm) {
 mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) 
{
   return vcvtq_mf8_f16_fpm(vn, vm, fpm);
 }
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t 
vn, mfloat8x8_t vm, f
   return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
 // CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> 
[[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd, 
mfloat8x16_t vn, mfloat8x8_t vm,
 float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t 
vm, fpm_t fpmr) {
   return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
 }
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t 
vn, mfloat8x8_t vm, fp
   return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
 }
 
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64)  [[ATTR1:#.*]]
+
 // CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
 // CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> 
[[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t 
vn, mfloat8x8_t vm,
 float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t 
vm, fpm_t fpm) {
   return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
 }
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind 
willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test