https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/192977

>From b0c58f23b2efe774a79e4b2e76766b357d9d63f7 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <[email protected]>
Date: Mon, 20 Apr 2026 13:22:10 +0000
Subject: [PATCH 1/2] [Clang] Fix incorrect type for `__mfp8` in
 `extractelement` codegen

The codegen for extracting an element from an FP8 vector
was emitting a simple `extractelement` with `i8` type for
the extracted element. The `__mfp8` type is represented
as `<1 x i8>` in LLVM IR. This codegen created inconsistency in
Clang - some `__mfp8` expressions would correspond to LLVM IR
values with `<1 x i8>` type and some to `i8` type.

It also caused an assertion failure when the extracted element
was passed as a function argument.

This patch fixes the issue by bitcasting the extracted
element to `<1 x i8>`.
---
 clang/lib/CodeGen/CGExprScalar.cpp       |  9 ++++++-
 clang/test/CodeGen/AArch64/fp8-extract.c | 33 ++++++++++++++++++++++++
 clang/test/CodeGen/arm-mfp8.c            | 12 +++------
 3 files changed, 45 insertions(+), 9 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/fp8-extract.c

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index a8dcf22992983..f8997f0503491 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2213,7 +2213,14 @@ Value 
*ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
   if (CGF.SanOpts.has(SanitizerKind::ArrayBounds))
     CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true);
 
-  return Builder.CreateExtractElement(Base, Idx, "vecext");
+  Value *Ret = Builder.CreateExtractElement(Base, Idx, "vecext");
+
+  // Even being a scalar the `__mfp8` type corresponds to `<1 x i8>` in LLVM 
IR.
+  // Cast the extracted element to the vector type to keep it consistent in
+  // Clang.
+  if (E->getType()->isMFloat8Type())
+    Ret = Builder.CreateBitCast(Ret, ConvertType(E->getType()), "mfp8ext");
+  return Ret;
 }
 
 Value *ScalarExprEmitter::VisitMatrixSingleSubscriptExpr(
diff --git a/clang/test/CodeGen/AArch64/fp8-extract.c 
b/clang/test/CodeGen/AArch64/fp8-extract.c
new file mode 100644
index 0000000000000..c0c113600de63
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-extract.c
@@ -0,0 +1,33 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | 
FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | 
FileCheck %s -check-prefix CHECK-CXX
+
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -S -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// Test for a n internal compiler error when extracting an element from an FP8
+// vector and passing it to a function.
+
+// CHECK-LABEL: define dso_local void @f(
+// CHECK-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 0
+// CHECK-NEXT:    [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8>
+// CHECK-NEXT:    call void @g(<1 x i8> [[MFP8EXT]])
+// CHECK-NEXT:    ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z1f14__Mfloat8x16_t(
+// CHECK-CXX-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 0
+// CHECK-CXX-NEXT:    [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8>
+// CHECK-CXX-NEXT:    call void @_Z1gu6__mfp8(<1 x i8> [[MFP8EXT]])
+// CHECK-CXX-NEXT:    ret void
+//
+void f(__Mfloat8x16_t v) {
+  void g(__mfp8);
+  g(v[0]);
+}
diff --git a/clang/test/CodeGen/arm-mfp8.c b/clang/test/CodeGen/arm-mfp8.c
index 9385b537f18b3..f99c865e99a95 100644
--- a/clang/test/CodeGen/arm-mfp8.c
+++ b/clang/test/CodeGen/arm-mfp8.c
@@ -64,20 +64,16 @@ __mfp8 func1n(__mfp8 mfp8) {
 // CHECK-C-LABEL: define dso_local <1 x i8> @test_extract_element(
 // CHECK-C-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-C-NEXT:  [[ENTRY:.*:]]
-// CHECK-C-NEXT:    [[RETVAL:%.*]] = alloca <1 x i8>, align 1
 // CHECK-C-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i8> [[X]], i32 [[I]]
-// CHECK-C-NEXT:    store i8 [[VECEXT]], ptr [[RETVAL]], align 1
-// CHECK-C-NEXT:    [[TMP0:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1
-// CHECK-C-NEXT:    ret <1 x i8> [[TMP0]]
+// CHECK-C-NEXT:    [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8>
+// CHECK-C-NEXT:    ret <1 x i8> [[MFP8EXT]]
 //
 // CHECK-CXX-LABEL: define dso_local <1 x i8> 
@_Z20test_extract_element14__Mfloat8x16_ti(
 // CHECK-CXX-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-CXX-NEXT:  [[ENTRY:.*:]]
-// CHECK-CXX-NEXT:    [[RETVAL:%.*]] = alloca <1 x i8>, align 1
 // CHECK-CXX-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i8> [[X]], i32 
[[I]]
-// CHECK-CXX-NEXT:    store i8 [[VECEXT]], ptr [[RETVAL]], align 1
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1
-// CHECK-CXX-NEXT:    ret <1 x i8> [[TMP0]]
+// CHECK-CXX-NEXT:    [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8>
+// CHECK-CXX-NEXT:    ret <1 x i8> [[MFP8EXT]]
 //
 mfloat8_t test_extract_element(mfloat8x16_t x, int i) {
   return x[i];

>From 5fe924e355eeae522efb8490f4ccd3eca7b2378c Mon Sep 17 00:00:00 2001
From: Momchil Velikov <[email protected]>
Date: Fri, 8 May 2026 13:12:20 +0100
Subject: [PATCH 2/2] [fixup] Use `insertelement` instead of `bitcast

---
 clang/lib/CodeGen/CGExprScalar.cpp       |  7 ++--
 clang/test/CodeGen/AArch64/fp8-extract.c | 47 +++++++++++++++++-------
 clang/test/CodeGen/arm-mfp8.c            |  8 ++--
 3 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index f8997f0503491..38cec47f676dc 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2216,10 +2216,11 @@ Value 
*ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
   Value *Ret = Builder.CreateExtractElement(Base, Idx, "vecext");
 
   // Even being a scalar the `__mfp8` type corresponds to `<1 x i8>` in LLVM 
IR.
-  // Cast the extracted element to the vector type to keep it consistent in
-  // Clang.
   if (E->getType()->isMFloat8Type())
-    Ret = Builder.CreateBitCast(Ret, ConvertType(E->getType()), "mfp8ext");
+    Ret = Builder.CreateInsertElement(
+        llvm::PoisonValue::get(llvm::FixedVectorType::get(CGF.Int8Ty, 1)), Ret,
+        uint64_t(0), "mfp8ext");
+
   return Ret;
 }
 
diff --git a/clang/test/CodeGen/AArch64/fp8-extract.c 
b/clang/test/CodeGen/AArch64/fp8-extract.c
index c0c113600de63..7fa93c6f15305 100644
--- a/clang/test/CodeGen/AArch64/fp8-extract.c
+++ b/clang/test/CodeGen/AArch64/fp8-extract.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
-// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | 
FileCheck %s
-// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | 
FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine | FileCheck %s -check-prefix CHECK-CXX
 
 // RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -S -o /dev/null %s
 
@@ -11,23 +11,42 @@
 // Test for a n internal compiler error when extracting an element from an FP8
 // vector and passing it to a function.
 
-// CHECK-LABEL: define dso_local void @f(
-// CHECK-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-LABEL: define dso_local void @test_var(
+// CHECK-SAME: <16 x i8> [[V:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 0
-// CHECK-NEXT:    [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8>
-// CHECK-NEXT:    call void @g(<1 x i8> [[MFP8EXT]])
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 [[I]]
+// CHECK-NEXT:    [[MFP8CAST:%.*]] = insertelement <1 x i8> poison, i8 
[[VECEXT]], i64 0
+// CHECK-NEXT:    call void @g(<1 x i8> [[MFP8CAST]]) #[[ATTR2:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
-// CHECK-CXX-LABEL: define dso_local void @_Z1f14__Mfloat8x16_t(
-// CHECK-CXX-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-LABEL: define dso_local void @_Z8test_var14__Mfloat8x16_ti(
+// CHECK-CXX-SAME: <16 x i8> [[V:%.*]], i32 noundef [[I:%.*]]) 
#[[ATTR0:[0-9]+]] {
 // CHECK-CXX-NEXT:  [[ENTRY:.*:]]
-// CHECK-CXX-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 0
-// CHECK-CXX-NEXT:    [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8>
-// CHECK-CXX-NEXT:    call void @_Z1gu6__mfp8(<1 x i8> [[MFP8EXT]])
+// CHECK-CXX-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 
[[I]]
+// CHECK-CXX-NEXT:    [[MFP8CAST:%.*]] = insertelement <1 x i8> poison, i8 
[[VECEXT]], i64 0
+// CHECK-CXX-NEXT:    call void @_Z1gu6__mfp8(<1 x i8> [[MFP8CAST]]) 
#[[ATTR2:[0-9]+]]
 // CHECK-CXX-NEXT:    ret void
 //
-void f(__Mfloat8x16_t v) {
+void test_var(__Mfloat8x16_t v, int i) {
   void g(__mfp8);
-  g(v[0]);
+  g(v[i]);
+}
+
+// CHECK-LABEL: define dso_local void @test_cst(
+// CHECK-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MFP8CAST:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> 
poison, <1 x i32> <i32 3>
+// CHECK-NEXT:    call void @g(<1 x i8> [[MFP8CAST]]) #[[ATTR2]]
+// CHECK-NEXT:    ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z8test_cst14__Mfloat8x16_t(
+// CHECK-CXX-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[MFP8CAST:%.*]] = shufflevector <16 x i8> [[V]], <16 x 
i8> poison, <1 x i32> <i32 3>
+// CHECK-CXX-NEXT:    call void @_Z1gu6__mfp8(<1 x i8> [[MFP8CAST]]) #[[ATTR2]]
+// CHECK-CXX-NEXT:    ret void
+//
+void test_cst(__Mfloat8x16_t v) {
+  void g(__mfp8);
+  g(v[3]);
 }
diff --git a/clang/test/CodeGen/arm-mfp8.c b/clang/test/CodeGen/arm-mfp8.c
index f99c865e99a95..82df375a45686 100644
--- a/clang/test/CodeGen/arm-mfp8.c
+++ b/clang/test/CodeGen/arm-mfp8.c
@@ -65,15 +65,15 @@ __mfp8 func1n(__mfp8 mfp8) {
 // CHECK-C-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-C-NEXT:  [[ENTRY:.*:]]
 // CHECK-C-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i8> [[X]], i32 [[I]]
-// CHECK-C-NEXT:    [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8>
-// CHECK-C-NEXT:    ret <1 x i8> [[MFP8EXT]]
+// CHECK-C-NEXT:    [[MFP8CAST:%.*]] = insertelement <1 x i8> poison, i8 
[[VECEXT]], i64 0
+// CHECK-C-NEXT:    ret <1 x i8> [[MFP8CAST]]
 //
 // CHECK-CXX-LABEL: define dso_local <1 x i8> 
@_Z20test_extract_element14__Mfloat8x16_ti(
 // CHECK-CXX-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-CXX-NEXT:  [[ENTRY:.*:]]
 // CHECK-CXX-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i8> [[X]], i32 
[[I]]
-// CHECK-CXX-NEXT:    [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8>
-// CHECK-CXX-NEXT:    ret <1 x i8> [[MFP8EXT]]
+// CHECK-CXX-NEXT:    [[MFP8CAST:%.*]] = insertelement <1 x i8> poison, i8 
[[VECEXT]], i64 0
+// CHECK-CXX-NEXT:    ret <1 x i8> [[MFP8CAST]]
 //
 mfloat8_t test_extract_element(mfloat8x16_t x, int i) {
   return x[i];

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to