Author: Deric C.
Date: 2026-02-20T16:01:31-08:00
New Revision: 6b44a2f9119012451b3cb9f37ba4cb5c6b2de53e

URL: 
https://github.com/llvm/llvm-project/commit/6b44a2f9119012451b3cb9f37ba4cb5c6b2de53e
DIFF: 
https://github.com/llvm/llvm-project/commit/6b44a2f9119012451b3cb9f37ba4cb5c6b2de53e.diff

LOG: [HLSL][Matrix] Make matrix single element accessor return a scalar instead 
of vector (#182609)

Fixes #182599 by making `SemaHLSL::checkMatrixComponent` return the
element type instead of a vector when the number of vector components is
exactly 1.

Added: 
    

Modified: 
    clang/lib/Sema/SemaHLSL.cpp
    clang/test/AST/HLSL/matrix-member-access-scalar.hlsl
    clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl
    clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
    clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl
    clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl

Removed: 
    


################################################################################
diff  --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 802a1bdbccfdd..911dba40d3bde 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -5296,6 +5296,8 @@ QualType SemaHLSL::checkMatrixComponent(Sema &S, QualType 
baseType,
   }
 
   QualType ElemTy = MT->getElementType();
+  if (NumComponents == 1)
+    return ElemTy;
   QualType VT = S.Context.getExtVectorType(ElemTy, NumComponents);
   if (HasRepeated)
     VK = VK_PRValue;

diff  --git a/clang/test/AST/HLSL/matrix-member-access-scalar.hlsl 
b/clang/test/AST/HLSL/matrix-member-access-scalar.hlsl
index b403d27b29760..14c18aaffb77e 100644
--- a/clang/test/AST/HLSL/matrix-member-access-scalar.hlsl
+++ b/clang/test/AST/HLSL/matrix-member-access-scalar.hlsl
@@ -6,31 +6,27 @@ typedef float float3x3 __attribute__((matrix_type(3,3)));
 void ok() {
     float3x3 A;
 
-   // CHECK:      BinaryOperator {{.*}} 'vector<float, 1>' lvalue '='
-   // CHECK-NEXT: MatrixElementExpr {{.*}} 'vector<float, 1>' lvalue _m12
+   // CHECK:      BinaryOperator {{.*}} 'float' lvalue '='
+   // CHECK-NEXT: MatrixElementExpr {{.*}} 'float' lvalue _m12
    // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue 
Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
-   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 1>' <VectorSplat>
    // CHECK-NEXT: FloatingLiteral {{.*}} 'float' 3.140000e+00
     A._m12 = 3.14;
 
    // CHECK: VarDecl {{.*}} r 'float' cinit
-   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' <HLSLVectorTruncation>
-   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 1>' <LValueToRValue>
-   // CHECK-NEXT: MatrixElementExpr {{.*}}  'vector<float, 1>' lvalue _m00
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' <LValueToRValue>
+   // CHECK-NEXT: MatrixElementExpr {{.*}}  'float' lvalue _m00
    // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue 
Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
     float r = A._m00;
 
    // CHECK: VarDecl {{.*}} good1 'float' cinit
-   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' <HLSLVectorTruncation>
-   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 1>' <LValueToRValue>
-   // CHECK-NEXT: MatrixElementExpr {{.*}}  'vector<float, 1>' lvalue _11
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' <LValueToRValue>
+   // CHECK-NEXT: MatrixElementExpr {{.*}}  'float' lvalue _11
    // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue 
Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
     float good1 = A._11;
 
-   // CHECK:      BinaryOperator {{.*}} 'vector<float, 1>' lvalue '='
-   // CHECK-NEXT: MatrixElementExpr {{.*}} 'vector<float, 1>' lvalue _33
+   // CHECK:      BinaryOperator {{.*}} 'float' lvalue '='
+   // CHECK-NEXT: MatrixElementExpr {{.*}} 'float' lvalue _33
    // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue 
Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
-   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 1>' <VectorSplat>
    // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' <LValueToRValue>
    // CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue Var {{.*}} 'R' 'float'
     float R;

diff  --git 
a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl 
b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl
index bedb9fdbe11c8..def8aa5440568 100644
--- a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl
@@ -10,9 +10,8 @@
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> zeroinitializer
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return11(int4x4 A) {
     return A._11;
@@ -24,9 +23,8 @@ int Return11(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 1>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 1
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return12(int4x4 A) {
     return A._12;
@@ -38,9 +36,8 @@ int Return12(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 2>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 2
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return13(int4x4 A) {
     return A._13;
@@ -52,9 +49,8 @@ int Return13(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 3>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 3
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return14(int4x4 A) {
     return A._14;
@@ -66,9 +62,8 @@ int Return14(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 4>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 4
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return21(int4x4 A) {
     return A._21;
@@ -80,9 +75,8 @@ int Return21(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 5>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 5
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return22(int4x4 A) {
     return A._22;
@@ -94,9 +88,8 @@ int Return22(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 6>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 6
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return23(int4x4 A) {
     return A._23;
@@ -108,9 +101,8 @@ int Return23(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 7>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 7
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return24(int4x4 A) {
     return A._24;
@@ -122,9 +114,8 @@ int Return24(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 8>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 8
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return31(int4x4 A) {
     return A._31;
@@ -136,9 +127,8 @@ int Return31(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 9>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 9
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return32(int4x4 A) {
     return A._32;
@@ -150,9 +140,8 @@ int Return32(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 10>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 10
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return33(int4x4 A) {
     return A._33;
@@ -164,9 +153,8 @@ int Return33(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 11>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 11
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return34(int4x4 A) {
     return A._34;
@@ -178,9 +166,8 @@ int Return34(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 12>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 12
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return41(int4x4 A) {
     return A._41;
@@ -192,9 +179,8 @@ int Return41(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 13>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 13
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return42(int4x4 A) {
     return A._42;
@@ -206,9 +192,8 @@ int Return42(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 14>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 14
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return43(int4x4 A) {
     return A._43;
@@ -220,9 +205,8 @@ int Return43(int4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> 
poison, <1 x i32> <i32 15>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 15
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int Return44(int4x4 A) {
     return A._44;

diff  --git 
a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl 
b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
index 3098a09f67100..fb3a46170ebe0 100644
--- a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
@@ -12,11 +12,8 @@
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4:![0-9]+]], !align [[META5:![0-9]+]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP1]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat11(out int4x4 A, int I) {
@@ -31,12 +28,9 @@ void StoreScalarAtMat11(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 1
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 1
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat12(out int4x4 A, int I) {
@@ -51,12 +45,9 @@ void StoreScalarAtMat12(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 2
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 2
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat13(out int4x4 A, int I) {
@@ -71,12 +62,9 @@ void StoreScalarAtMat13(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 3
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 3
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat14(out int4x4 A, int I) {
@@ -91,12 +79,9 @@ void StoreScalarAtMat14(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 4
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat21(out int4x4 A, int I) {
@@ -111,12 +96,9 @@ void StoreScalarAtMat21(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 5
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 5
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat22(out int4x4 A, int I) {
@@ -131,12 +113,9 @@ void StoreScalarAtMat22(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 6
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 6
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat23(out int4x4 A, int I) {
@@ -151,12 +130,9 @@ void StoreScalarAtMat23(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 7
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 7
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat24(out int4x4 A, int I) {
@@ -171,12 +147,9 @@ void StoreScalarAtMat24(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 8
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 8
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat31(out int4x4 A, int I) {
@@ -191,12 +164,9 @@ void StoreScalarAtMat31(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 9
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 9
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat32(out int4x4 A, int I) {
@@ -211,12 +181,9 @@ void StoreScalarAtMat32(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 10
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 10
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat33(out int4x4 A, int I) {
@@ -231,12 +198,9 @@ void StoreScalarAtMat33(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 11
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 11
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat34(out int4x4 A, int I) {
@@ -251,12 +215,9 @@ void StoreScalarAtMat34(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 12
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 12
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat41(out int4x4 A, int I) {
@@ -271,12 +232,9 @@ void StoreScalarAtMat41(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 13
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 13
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat42(out int4x4 A, int I) {
@@ -291,12 +249,9 @@ void StoreScalarAtMat42(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 14
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 14
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat43(out int4x4 A, int I) {
@@ -311,12 +266,9 @@ void StoreScalarAtMat43(out int4x4 A, int I) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, 
i32 [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> 
[[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 15
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 
0, i32 15
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat44(out int4x4 A, int I) {

diff  --git 
a/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl 
b/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl
index 8626e2d0d68b5..5d542b8647eac 100644
--- a/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl
@@ -10,9 +10,8 @@
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> zeroinitializer
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 0
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return00(float4x4 A) {
     return A._m00;
@@ -24,9 +23,8 @@ float Return00(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 1>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 1
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return01(float4x4 A) {
     return A._m01;
@@ -38,9 +36,8 @@ float Return01(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 2>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 2
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return02(float4x4 A) {
     return A._m02;
@@ -52,9 +49,8 @@ float Return02(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 3>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 3
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return03(float4x4 A) {
     return A._m03;
@@ -66,9 +62,8 @@ float Return03(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 4>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 4
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return10(float4x4 A) {
     return A._m10;
@@ -80,9 +75,8 @@ float Return10(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 5>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 5
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return11(float4x4 A) {
     return A._m11;
@@ -94,9 +88,8 @@ float Return11(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 6>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 6
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return12(float4x4 A) {
     return A._m12;
@@ -108,9 +101,8 @@ float Return12(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 7>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 7
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return13(float4x4 A) {
     return A._m13;
@@ -122,9 +114,8 @@ float Return13(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 8>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 8
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return20(float4x4 A) {
     return A._m20;
@@ -136,9 +127,8 @@ float Return20(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 9>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 9
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return21(float4x4 A) {
     return A._m21;
@@ -150,9 +140,8 @@ float Return21(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 10>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 10
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return22(float4x4 A) {
     return A._m22;
@@ -164,9 +153,8 @@ float Return22(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 11>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 11
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return23(float4x4 A) {
     return A._m23;
@@ -178,9 +166,8 @@ float Return23(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 12>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 12
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return30(float4x4 A) {
     return A._m30;
@@ -192,9 +179,8 @@ float Return30(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 13>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 13
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return31(float4x4 A) {
     return A._m31;
@@ -206,9 +192,8 @@ float Return31(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 14>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 14
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return32(float4x4 A) {
     return A._m32;
@@ -220,9 +205,8 @@ float Return32(float4x4 A) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
 // CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x 
float> poison, <1 x i32> <i32 15>
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], 
i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[TMP0]], i32 15
+// CHECK-NEXT:    ret float [[TMP1]]
 //
 float Return33(float4x4 A) {
     return A._m33;

diff  --git 
a/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl 
b/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl
index fb4fa267174b9..97dc7e7c2bd37 100644
--- a/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl
@@ -12,11 +12,8 @@
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4:![0-9]+]], !align [[META5:![0-9]+]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP1]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat00(out float4x4 A, float F) {
@@ -31,12 +28,9 @@ void StoreScalarAtMat00(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 1
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 1
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat01(out float4x4 A, float F) {
@@ -51,12 +45,9 @@ void StoreScalarAtMat01(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 2
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 2
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat02(out float4x4 A, float F) {
@@ -71,12 +62,9 @@ void StoreScalarAtMat02(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 3
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 3
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat03(out float4x4 A, float F) {
@@ -91,12 +79,9 @@ void StoreScalarAtMat03(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 4
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 4
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat10(out float4x4 A, float F) {
@@ -111,12 +96,9 @@ void StoreScalarAtMat10(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 5
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 5
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat11(out float4x4 A, float F) {
@@ -131,12 +113,9 @@ void StoreScalarAtMat11(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 6
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 6
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat12(out float4x4 A, float F) {
@@ -151,12 +130,9 @@ void StoreScalarAtMat12(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 7
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 7
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat13(out float4x4 A, float F) {
@@ -171,12 +147,9 @@ void StoreScalarAtMat13(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 8
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 8
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat20(out float4x4 A, float F) {
@@ -191,12 +164,9 @@ void StoreScalarAtMat20(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 9
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 9
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat21(out float4x4 A, float F) {
@@ -211,12 +181,9 @@ void StoreScalarAtMat21(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 10
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 10
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat22(out float4x4 A, float F) {
@@ -231,12 +198,9 @@ void StoreScalarAtMat22(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 11
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 11
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat23(out float4x4 A, float F) {
@@ -251,12 +215,9 @@ void StoreScalarAtMat23(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 12
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 12
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat30(out float4x4 A, float F) {
@@ -271,12 +232,9 @@ void StoreScalarAtMat30(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 13
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 13
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat31(out float4x4 A, float F) {
@@ -291,12 +249,9 @@ void StoreScalarAtMat31(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 14
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 14
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat32(out float4x4 A, float F) {
@@ -311,12 +266,9 @@ void StoreScalarAtMat32(out float4x4 A, float F) {
 // CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> 
poison, float [[TMP0]], i64 0
-// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> 
[[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull 
[[META4]], !align [[META5]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], 
i32 0
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 15
-// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 
0, i32 15
+// CHECK-NEXT:    store float [[TMP0]], ptr [[TMP2]], align 4
 // CHECK-NEXT:    ret void
 //
 void StoreScalarAtMat33(out float4x4 A, float F) {


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to