[llvm-branch-commits] [clang] [HLSL][Matrix] EmitFromMemory when emitting load vector and matrix element LValues (PR #178315)

via llvm-branch-commits Tue, 27 Jan 2026 14:58:46 -0800

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-codegen

Author: Deric C. (Icohedron)

<details>
<summary>Changes</summary>

Fixes #<!-- -->177712
This PR requires #<!-- -->177708 to be merged first.

The MatrixElt and VectorElt cases of `EmitLoadOfLValue` did not convert the 
scalar value from its load/store type into its primary IR type like the other 
cases do, which caused issues with HLSL in particular which requires bools to 
be converted to and from i32 and i1 forms for its load/store and primary IR 
types respectively.

---
Full diff: https://github.com/llvm/llvm-project/pull/178315.diff


2 Files Affected:

- (modified) clang/lib/CodeGen/CGExpr.cpp (+12-7) 
- (modified) clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl 
(+53) 


``````````diff
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 490377c04b034..7f817000acb68 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2445,8 +2445,9 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, 
SourceLocation Loc) {
   if (LV.isVectorElt()) {
     llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(),
                                               LV.isVolatileQualified());
-    return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(),
-                                                    "vecext"));
+    llvm::Value *Elt =
+        Builder.CreateExtractElement(Load, LV.getVectorIdx(), "vecext");
+    return RValue::get(EmitFromMemory(Elt, LV.getType()));
   }
 
   // If this is a reference to a subset of the elements of a vector, either
@@ -2461,14 +2462,18 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, 
SourceLocation Loc) {
 
   if (LV.isMatrixElt()) {
     llvm::Value *Idx = LV.getMatrixIdx();
-    if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
-      const auto *const MatTy = LV.getType()->castAs<ConstantMatrixType>();
-      llvm::MatrixBuilder MB(Builder);
-      MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+    QualType EltTy = LV.getType();
+    if (const auto *MatTy = EltTy->getAs<ConstantMatrixType>()) {
+      EltTy = MatTy->getElementType();
+      if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
+        llvm::MatrixBuilder MB(Builder);
+        MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+      }
     }
     llvm::LoadInst *Load =
         Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified());
-    return RValue::get(Builder.CreateExtractElement(Load, Idx, "matrixext"));
+    llvm::Value *Elt = Builder.CreateExtractElement(Load, Idx, "matrixext");
+    return RValue::get(EmitFromMemory(Elt, EltTy));
   }
   if (LV.isMatrixRow()) {
     QualType MatTy = LV.getType();
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index c11c8498ada45..881e6b5dd525a 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -180,3 +180,56 @@ export void call8(int3x1 M) {
     int3 V = (int3)M;
 }
 
+// vector flat cast from matrix of same size (bool)
+// CHECK-LABEL: call9
+// CHECK:    [[M_ADDR:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[V:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT:    [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = zext <2 x i1> %M to <2 x i32>
+// CHECK-NEXT:    store <2 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store <2 x i32> [[TMP1]], ptr [[HLSL_EWCAST_SRC]], align 4
+// CHECK-NEXT:    [[MATRIX_GEP:%.*]] = getelementptr inbounds <2 x i32>, ptr 
[[HLSL_EWCAST_SRC]], i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT:    [[MATRIXEXT:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i32 [[MATRIXEXT]] to i1
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i1> [[TMP2]], i1 
[[LOADEDV]], i64 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT:    [[MATRIXEXT1:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
+// CHECK-NEXT:    [[LOADEDV2:%.*]] = trunc i32 [[MATRIXEXT1]] to i1
+// CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i1> [[TMP4]], i1 
[[LOADEDV2]], i64 1
+// CHECK-NEXT:    [[TMP7:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i32>
+// CHECK-NEXT:    store <2 x i32> [[TMP7]], ptr [[V]], align 8
+// CHECK-NEXT:    ret void
+export void call9(bool1x2 M) {
+    bool2 V = (bool2)M;
+}
+
+struct BoolVecStruct {
+    bool2 V;
+};
+
+// vector flat cast from struct containing bool vector
+// CHECK-LABEL: call10
+// CHECK:    [[V:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca %struct.BoolVecStruct, align 1
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], 
ptr align 1 %s, i32 8, i1 false)
+// CHECK-NEXT:    [[VECTOR_GEP:%.*]] = getelementptr inbounds 
%struct.BoolVecStruct, ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 8
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i32 [[VECEXT]] to i1
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i1> [[TMP0]], i1 
[[LOADEDV]], i64 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 8
+// CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
+// CHECK-NEXT:    [[LOADEDV2:%.*]] = trunc i32 [[VECEXT1]] to i1
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i1> [[TMP2]], i1 
[[LOADEDV2]], i64 1
+// CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32>
+// CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr [[V]], align 8
+// CHECK-NEXT:    ret void
+export void call10(BoolVecStruct s) {
+    bool2 V = (bool2)s;
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/178315
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [HLSL][Matrix] EmitFromMemory when emitting load vector and matrix element LValues (PR #178315)

Reply via email to