https://github.com/fhahn created 
https://github.com/llvm/llvm-project/pull/164821

Currently Clang usually leaves padding bits uninitialized, which means
they are undef at the moment.

When expanding stores of vector types to include padding, the padding
lanes will be poison, hence the padding bits will be poison.

This interacts badly with coercion of arguments and return values, where
3 x float vectors will be loaded as i128 integer; poisoning the padding
bits will make the whole value poison.

Not sure if there's a better way, but I think we have a number of places
that currently rely on the padding being undef, not poison.

>From 751a31d39b8ca061c44e58a3fe365484e192a3c1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <[email protected]>
Date: Thu, 23 Oct 2025 14:01:19 +0100
Subject: [PATCH 1/2] [Clang] Add test

---
 .../CodeGen/AArch64/ext-vector-coercion.c     | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 clang/test/CodeGen/AArch64/ext-vector-coercion.c

diff --git a/clang/test/CodeGen/AArch64/ext-vector-coercion.c 
b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
new file mode 100644
index 0000000000000..3680225e1e84a
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
@@ -0,0 +1,42 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// RUN: %clang_cc1 -fenable-matrix -triple arm64-apple-macosx %s -emit-llvm 
-disable-llvm-passes -o - | FileCheck %s
+
+typedef float float3 __attribute__((ext_vector_type(3)));
+struct Vec3 {
+  union {
+    struct {
+      float x;
+      float y;
+      float z;
+    };
+  float vec __attribute__((ext_vector_type(3)));
+  };
+};
+
+// CHECK-LABEL: define i128 @add(
+// CHECK-SAME: i128 [[A_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_VEC3:%.*]], align 16
+// CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_VEC3]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store i128 [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], 
ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[LOADVECN]], 
<4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], 
ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[LOADVECN1:%.*]] = load <4 x float>, ptr [[TMP1]], align 16
+// CHECK-NEXT:    [[EXTRACTVEC2:%.*]] = shufflevector <4 x float> 
[[LOADVECN1]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <3 x float> [[EXTRACTVEC]], 
[[EXTRACTVEC2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], 
ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 
x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x float> [[EXTRACTVEC3]], ptr [[TMP2]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw 
[[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
+// CHECK-NEXT:    ret i128 [[TMP3]]
+//
+struct Vec3 add(struct Vec3 a) {
+  struct Vec3 res;
+  res.vec = a.vec + a.vec;
+  return res;
+}
+

>From fd3df1e02956799793ed9dfabb0bb63a2090110d Mon Sep 17 00:00:00 2001
From: Florian Hahn <[email protected]>
Date: Thu, 23 Oct 2025 14:03:30 +0100
Subject: [PATCH 2/2] [Clang] Freeze padded vectors before storing.

Currently Clang usually leaves padding bits uninitialized, which means
they are undef at the moment.

When expanding stores of vector types to include padding, the padding
lanes will be poison, hence the padding bits will be poison.

This interacts badly with coercion of arguments and return values, where
3 x float vectors will be loaded as i128 integer; poisoning the padding
bits will make the whole value poison.
---
 clang/lib/CodeGen/CGExpr.cpp                     | 3 +++
 clang/test/CodeGen/AArch64/ext-vector-coercion.c | 7 ++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 301d5770cf78f..a581cf821092f 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2300,6 +2300,9 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value 
*Value, Address Addr,
         SmallVector<int, 16> Mask(NewVecTy->getNumElements(), -1);
         std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0);
         Value = Builder.CreateShuffleVector(Value, Mask, "extractVec");
+        // The extra lanes will be poison. Freeze the whole vector to make sure
+        // the padding memory is not poisoned, which may break coercion.
+        Value = Builder.CreateFreeze(Value);
         SrcTy = NewVecTy;
       }
       if (Addr.getElementType() != SrcTy)
diff --git a/clang/test/CodeGen/AArch64/ext-vector-coercion.c 
b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
index 3680225e1e84a..44638fac1560a 100644
--- a/clang/test/CodeGen/AArch64/ext-vector-coercion.c
+++ b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
@@ -29,10 +29,11 @@ struct Vec3 {
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <3 x float> [[EXTRACTVEC]], 
[[EXTRACTVEC2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], 
ptr [[RETVAL]], i32 0, i32 0
 // CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 
x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x float> [[EXTRACTVEC3]], ptr [[TMP2]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = freeze <4 x float> [[EXTRACTVEC3]]
+// CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[TMP2]], align 16
 // CHECK-NEXT:    [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw 
[[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
-// CHECK-NEXT:    ret i128 [[TMP3]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
+// CHECK-NEXT:    ret i128 [[TMP4]]
 //
 struct Vec3 add(struct Vec3 a) {
   struct Vec3 res;

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to