https://github.com/Icohedron created 
https://github.com/llvm/llvm-project/pull/178315

Fixes #177712
This PR requires #177708 to be merged first.

The MatrixElt and VectorElt cases of `EmitLoadOfLValue` did not convert the 
scalar value from its load/store type into its primary IR type like the other 
cases do, which caused issues with HLSL in particular which requires bools to 
be converted to and from i32 and i1 forms for its load/store and primary IR 
types respectively.

>From 3c3844dbe7d6b79de4f7a86eca7d1ad9480ca21c Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 27 Jan 2026 14:51:39 -0800
Subject: [PATCH] EmitFromMemory when emitting load vector and matrix element
 LValue

---
 clang/lib/CodeGen/CGExpr.cpp                  | 19 ++++---
 .../BasicFeatures/VectorElementwiseCast.hlsl  | 53 +++++++++++++++++++
 2 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 490377c04b034..7f817000acb68 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2445,8 +2445,9 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, 
SourceLocation Loc) {
   if (LV.isVectorElt()) {
     llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(),
                                               LV.isVolatileQualified());
-    return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(),
-                                                    "vecext"));
+    llvm::Value *Elt =
+        Builder.CreateExtractElement(Load, LV.getVectorIdx(), "vecext");
+    return RValue::get(EmitFromMemory(Elt, LV.getType()));
   }
 
   // If this is a reference to a subset of the elements of a vector, either
@@ -2461,14 +2462,18 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, 
SourceLocation Loc) {
 
   if (LV.isMatrixElt()) {
     llvm::Value *Idx = LV.getMatrixIdx();
-    if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
-      const auto *const MatTy = LV.getType()->castAs<ConstantMatrixType>();
-      llvm::MatrixBuilder MB(Builder);
-      MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+    QualType EltTy = LV.getType();
+    if (const auto *MatTy = EltTy->getAs<ConstantMatrixType>()) {
+      EltTy = MatTy->getElementType();
+      if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
+        llvm::MatrixBuilder MB(Builder);
+        MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+      }
     }
     llvm::LoadInst *Load =
         Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified());
-    return RValue::get(Builder.CreateExtractElement(Load, Idx, "matrixext"));
+    llvm::Value *Elt = Builder.CreateExtractElement(Load, Idx, "matrixext");
+    return RValue::get(EmitFromMemory(Elt, EltTy));
   }
   if (LV.isMatrixRow()) {
     QualType MatTy = LV.getType();
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index c11c8498ada45..881e6b5dd525a 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -180,3 +180,56 @@ export void call8(int3x1 M) {
     int3 V = (int3)M;
 }
 
+// vector flat cast from matrix of same size (bool)
+// CHECK-LABEL: call9
+// CHECK:    [[M_ADDR:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[V:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT:    [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = zext <2 x i1> %M to <2 x i32>
+// CHECK-NEXT:    store <2 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store <2 x i32> [[TMP1]], ptr [[HLSL_EWCAST_SRC]], align 4
+// CHECK-NEXT:    [[MATRIX_GEP:%.*]] = getelementptr inbounds <2 x i32>, ptr 
[[HLSL_EWCAST_SRC]], i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT:    [[MATRIXEXT:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i32 [[MATRIXEXT]] to i1
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i1> [[TMP2]], i1 
[[LOADEDV]], i64 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT:    [[MATRIXEXT1:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
+// CHECK-NEXT:    [[LOADEDV2:%.*]] = trunc i32 [[MATRIXEXT1]] to i1
+// CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i1> [[TMP4]], i1 
[[LOADEDV2]], i64 1
+// CHECK-NEXT:    [[TMP7:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i32>
+// CHECK-NEXT:    store <2 x i32> [[TMP7]], ptr [[V]], align 8
+// CHECK-NEXT:    ret void
+export void call9(bool1x2 M) {
+    bool2 V = (bool2)M;
+}
+
+struct BoolVecStruct {
+    bool2 V;
+};
+
+// vector flat cast from struct containing bool vector
+// CHECK-LABEL: call10
+// CHECK:    [[V:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca %struct.BoolVecStruct, align 1
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], 
ptr align 1 %s, i32 8, i1 false)
+// CHECK-NEXT:    [[VECTOR_GEP:%.*]] = getelementptr inbounds 
%struct.BoolVecStruct, ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 8
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i32 [[VECEXT]] to i1
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i1> [[TMP0]], i1 
[[LOADEDV]], i64 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 8
+// CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
+// CHECK-NEXT:    [[LOADEDV2:%.*]] = trunc i32 [[VECEXT1]] to i1
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i1> [[TMP2]], i1 
[[LOADEDV2]], i64 1
+// CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32>
+// CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr [[V]], align 8
+// CHECK-NEXT:    ret void
+export void call10(BoolVecStruct s) {
+    bool2 V = (bool2)s;
+}

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to