[PATCH] D76791: [Matrix] Implement matrix index expressions ([][]).

Florian Hahn via Phabricator via cfe-commits Wed, 27 May 2020 13:38:08 -0700

fhahn updated this revision to Diff 266643.
fhahn marked 4 inline comments as done.
fhahn added a comment.


Addressed latest comments:

- Handle placeholder types in CreateBuiltinMatrixSubscriptExpr and do not limit 
to non-overload types there.
- Check !MatrixSubscriptExpr instead of ParenExpr.
- Only handle placeholder types for Base in ActOnMatrixSubscriptExpr. Only skip 
isMSPropertySubscriptExpr.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76791/new/

https://reviews.llvm.org/D76791

Files:
  clang/include/clang/AST/ASTContext.h
  clang/include/clang/AST/BuiltinTypes.def
  clang/include/clang/AST/ComputeDependence.h
  clang/include/clang/AST/Expr.h
  clang/include/clang/AST/RecursiveASTVisitor.h
  clang/include/clang/AST/Stmt.h
  clang/include/clang/AST/Type.h
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Basic/Specifiers.h
  clang/include/clang/Basic/StmtNodes.td
  clang/include/clang/Sema/Initialization.h
  clang/include/clang/Sema/Sema.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/AST/ComputeDependence.cpp
  clang/lib/AST/Expr.cpp
  clang/lib/AST/ExprClassification.cpp
  clang/lib/AST/ExprConstant.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/NSAPI.cpp
  clang/lib/AST/StmtPrinter.cpp
  clang/lib/AST/StmtProfile.cpp
  clang/lib/AST/TextNodeDumper.cpp
  clang/lib/AST/Type.cpp
  clang/lib/AST/TypeLoc.cpp
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/lib/CodeGen/CGValue.h
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Sema/SemaCast.cpp
  clang/lib/Sema/SemaExceptionSpec.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/lib/Sema/SemaInit.cpp
  clang/lib/Sema/TreeTransform.h
  clang/lib/Serialization/ASTCommon.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTReaderStmt.cpp
  clang/lib/Serialization/ASTWriterStmt.cpp
  clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGenCXX/matrix-type-operators.cpp
  clang/test/CodeGenObjC/matrix-type-operators.m
  clang/test/Sema/matrix-type-operators.c
  clang/test/SemaCXX/matrix-type-operators.cpp
  clang/test/SemaObjC/matrix-type-operators.m
  clang/tools/libclang/CXCursor.cpp
  llvm/include/llvm/IR/MatrixBuilder.h

Index: llvm/include/llvm/IR/MatrixBuilder.h
===================================================================
--- llvm/include/llvm/IR/MatrixBuilder.h
+++ llvm/include/llvm/IR/MatrixBuilder.h
@@ -155,15 +155,19 @@
     return B.CreateMul(LHS, ScalarVector);
   }
 
-  /// Extracts the element at (\p Row, \p Column) from \p Matrix.
-  Value *CreateExtractMatrix(Value *Matrix, Value *Row, Value *Column,
-                             unsigned NumRows, Twine const &Name = "") {
-
+  /// Extracts the element at (\p RowIdx, \p ColumnIdx) from \p Matrix.
+  Value *CreateExtractElement(Value *Matrix, Value *RowIdx, Value *ColumnIdx,
+                              unsigned NumRows, Twine const &Name = "") {
+
+    unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(),
+                                 ColumnIdx->getType()->getScalarSizeInBits());
+    Type *IntTy = IntegerType::get(RowIdx->getType()->getContext(), MaxWidth);
+    RowIdx = B.CreateZExt(RowIdx, IntTy);
+    ColumnIdx = B.CreateZExt(ColumnIdx, IntTy);
+    Value *NumRowsV = B.getIntN(MaxWidth, NumRows);
     return B.CreateExtractElement(
-        Matrix,
-        B.CreateAdd(
-            B.CreateMul(Column, ConstantInt::get(Column->getType(), NumRows)),
-            Row));
+        Matrix, B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx),
+        "matext");
   }
 };
 
Index: clang/tools/libclang/CXCursor.cpp
===================================================================
--- clang/tools/libclang/CXCursor.cpp
+++ clang/tools/libclang/CXCursor.cpp
@@ -419,6 +419,11 @@
     K = CXCursor_ArraySubscriptExpr;
     break;
 
+  case Stmt::MatrixSubscriptExprClass:
+    // TODO: add support for MatrixSubscriptExpr.
+    K = CXCursor_UnexposedExpr;
+    break;
+
   case Stmt::OMPArraySectionExprClass:
     K = CXCursor_OMPArraySectionExpr;
     break;
Index: clang/test/SemaObjC/matrix-type-operators.m
===================================================================
--- /dev/null
+++ clang/test/SemaObjC/matrix-type-operators.m
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -fenable-matrix %s
+
+struct Foo {};
+__attribute__((objc_root_class))
+@interface FooValue
+@property struct Foo value;
+@end
+
+typedef double double4x4 __attribute__((matrix_type(4, 4)));
+
+// Check that we generate proper error messages for invalid placeholder types.
+//
+double test_index_placeholders(double4x4 m, FooValue *iv) {
+  return m[iv.value][iv.value];
+  // expected-error@-1 {{matrix row index is not an integer}}
+  // expected-error@-2 {{matrix column index is not an integer}}
+}
+
+double test_base_and_index_placeholders(FooValue *m, FooValue *iv) {
+  return m.value[iv.value][iv.value];
+  // expected-error@-1 {{subscripted value is not an array, pointer, or vector}}
+}
Index: clang/test/SemaCXX/matrix-type-operators.cpp
===================================================================
--- /dev/null
+++ clang/test/SemaCXX/matrix-type-operators.cpp
@@ -0,0 +1,116 @@
+// RUN: %clang_cc1 %s -fenable-matrix -pedantic -std=c++11 -verify -triple=x86_64-apple-darwin9
+
+typedef float sx5x10_t __attribute__((matrix_type(5, 10)));
+
+sx5x10_t get_matrix();
+
+void insert(sx5x10_t a, float f) {
+  // Non integer indexes.
+  a[3][f] = 0;
+  // expected-error@-1 {{matrix column index is not an integer}}
+  a[f][9] = 0;
+  // expected-error@-1 {{matrix row index is not an integer}}
+  a[f][f] = 0;
+  // expected-error@-1 {{matrix row index is not an integer}}
+  // expected-error@-2 {{matrix column index is not an integer}}
+  a[0][f] = 0;
+  // expected-error@-1 {{matrix column index is not an integer}}
+
+  // Invalid element type.
+  a[3][4] = &f;
+  // expected-error@-1 {{assigning to 'float' from incompatible type 'float *'; remove &}}
+
+  // Indexes outside allowed dimensions.
+  a[-1][3] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[3][-1] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[3][-1u] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[-1u][3] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[5][2] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[4][10] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[5][10.0] = f;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  // expected-error@-2 {{matrix column index is not an integer}}
+
+  get_matrix()[0][0] = f;
+  // expected-error@-1 {{expression is not assignable}}
+  get_matrix()[5][10.0] = f;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  // expected-error@-2 {{matrix column index is not an integer}}
+  get_matrix()[3] = 5.0;
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+
+  float &x = reinterpret_cast<float &>(a[3][3]);
+  // expected-error@-1 {{reinterpret_cast of a matrix element to 'float &' needs its address, which is not allowed}}
+
+  a[4, 5] = 5.0;
+  // expected-error@-1 {{comma expressions are not allowed as indices in matrix subscript expressions}}
+  // expected-warning@-2 {{expression result unused}}
+
+  a[4, 5, 4] = 5.0;
+  // expected-error@-1 {{comma expressions are not allowed as indices in matrix subscript expressions}}
+  // expected-warning@-2 {{expression result unused}}
+  // expected-warning@-3 {{expression result unused}}
+}
+
+void extract(sx5x10_t a, float f) {
+  // Non integer indexes.
+  float v1 = a[3][f];
+  // expected-error@-1 {{matrix column index is not an integer}}
+  float v2 = a[f][9];
+  // expected-error@-1 {{matrix row index is not an integer}}
+  float v3 = a[f][f];
+  // expected-error@-1 {{matrix row index is not an integer}}
+  // expected-error@-2 {{matrix column index is not an integer}}
+
+  // Invalid element type.
+  char *v4 = a[3][4];
+  // expected-error@-1 {{cannot initialize a variable of type 'char *' with an lvalue of type 'float'}}
+
+  // Indexes outside allowed dimensions.
+  float v5 = a[-1][3];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v6 = a[3][-1];
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  float v8 = a[-1u][3];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v9 = a[5][2];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v10 = a[4][10];
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  float v11 = a[5][10.0];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  // expected-error@-2 {{matrix column index is not an integer}}
+
+  float v12 = get_matrix()[0][0];
+  float v13 = get_matrix()[5][10.0];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  // expected-error@-2 {{matrix column index is not an integer}}
+}
+
+const float &const_subscript_reference(sx5x10_t m) {
+  return m[2][2];
+  // expected-warning@-1 {{returning reference to local temporary object}}
+}
+
+const float &const_subscript_reference(const sx5x10_t &m) {
+  return m[2][2];
+  // expected-warning@-1 {{returning reference to local temporary object}}
+}
+
+float &nonconst_subscript_reference(sx5x10_t m) {
+  return m[2][2];
+  // expected-error@-1 {{non-const reference cannot bind to matrix element}}
+}
+
+void incomplete_matrix_index_expr(sx5x10_t a, float f) {
+  float x = a[3];
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+  a[2] = f;
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+}
Index: clang/test/Sema/matrix-type-operators.c
===================================================================
--- /dev/null
+++ clang/test/Sema/matrix-type-operators.c
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 %s -fenable-matrix -pedantic -verify -triple=x86_64-apple-darwin9
+
+typedef float sx5x10_t __attribute__((matrix_type(5, 10)));
+
+sx5x10_t get_matrix();
+
+void insert(sx5x10_t a, float f) {
+  // Non integer indexes.
+  a[3][f] = 0;
+  // expected-error@-1 {{matrix column index is not an integer}}
+  a[f][9] = 0;
+  // expected-error@-1 {{matrix row index is not an integer}}
+  a[f][f] = 0;
+  // expected-error@-1 {{matrix row index is not an integer}}
+  // expected-error@-2 {{matrix column index is not an integer}}
+  a[0][f] = 0;
+  // expected-error@-1 {{matrix column index is not an integer}}
+
+  a[f][f] = 0;
+  // expected-error@-1 {{matrix row index is not an integer}}
+  // expected-error@-2 {{matrix column index is not an integer}}
+
+  // Invalid element type.
+  a[3][4] = &f;
+  // expected-error@-1 {{assigning to 'float' from incompatible type 'float *'; remove &}}
+
+  // Indexes outside allowed dimensions.
+  a[-1][3] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[3][-1] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[3][-1u] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[-1u][3] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[5][2] = 10.0;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  a[4][10] = 10.0;
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  a[5][0] = f;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  (a[1])[1] = f;
+  // expected-error@-1 {{matrix row and column subscripts cannot be separated by any expression}}
+
+  a[3] = 5.0;
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+
+  (a[3]) = 5.0;
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+
+  get_matrix()[0][0] = f;
+  // expected-error@-1 {{expression is not assignable}}
+  get_matrix()[5][1] = f;
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  get_matrix()[3] = 5.0;
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+
+  (get_matrix()[5])[10.0] = f;
+  // expected-error@-1 {{matrix row and column subscripts cannot be separated by any expression}}
+  (get_matrix()[3]) = 5.0;
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+
+  a([0])[0] = f;
+  // expected-error@-1 {{expected expression}}
+  a[0]([0]) = f;
+  // expected-error@-1 {{expected expression}}
+}
+
+void extract(sx5x10_t a, float f) {
+  // Non integer indexes.
+  float v1 = a[3][f];
+  // expected-error@-1 {{matrix column index is not an integer}}
+  float v2 = a[f][9];
+  // expected-error@-1 {{matrix row index is not an integer}}
+  float v3 = a[f][f];
+  // expected-error@-1 {{matrix row index is not an integer}}
+  // expected-error@-2 {{matrix column index is not an integer}}
+
+  // Invalid element type.
+  char *v4 = a[3][4];
+  // expected-error@-1 {{initializing 'char *' with an expression of incompatible type 'float'}}
+
+  // Indexes outside allowed dimensions.
+  float v5 = a[-1][3];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v6 = a[3][-1];
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  float v8 = a[-1u][3];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v9 = a[5][2];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+  float v10 = a[4][10];
+  // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}}
+  float v11 = a[5][9];
+  // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}}
+
+  float v12 = a[3];
+  // expected-error@-1 {{single subscript expressions are not allowed for matrix values}}
+}
+
+float *address_of_element(sx5x10_t *a) {
+  return &(*a)[0][1];
+  // expected-error@-1 {{address of matrix element requested}}
+}
Index: clang/test/CodeGenObjC/matrix-type-operators.m
===================================================================
--- /dev/null
+++ clang/test/CodeGenObjC/matrix-type-operators.m
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fenable-matrix -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s
+
+__attribute__((objc_root_class))
+@interface IntValue
+@property int value;
+@end
+
+typedef double double4x4 __attribute__((matrix_type(4, 4)));
+
+// Check that we correctly deal with placeholder expressions.
+
+double test_index_placeholders(double4x4 m, IntValue *iv) {
+  // CHECK-LABEL: define double @test_index_placeholders(<16 x double> %m, %0* %iv)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %m.addr = alloca [16 x double], align 8
+  // CHECK-NEXT:    %iv.addr = alloca %0*, align 8
+  // CHECK-NEXT:    %0 = bitcast [16 x double]* %m.addr to <16 x double>*
+  // CHECK-NEXT:    store <16 x double> %m, <16 x double>* %0, align 8
+  // CHECK-NEXT:    store %0* %iv, %0** %iv.addr, align 8
+  // CHECK-NEXT:    %1 = load %0*, %0** %iv.addr, align 8
+  // CHECK-NEXT:    %2 = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
+  // CHECK-NEXT:    %3 = bitcast %0* %1 to i8*
+  // CHECK-NEXT:    %call = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* %3, i8* %2)
+  // CHECK-NEXT:    %4 = load %0*, %0** %iv.addr, align 8
+  // CHECK-NEXT:    %5 = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
+  // CHECK-NEXT:    %6 = bitcast %0* %4 to i8*
+  // CHECK-NEXT:    %call1 = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* %6, i8* %5)
+  // CHECK-NEXT:    %7 = load <16 x double>, <16 x double>* %0, align 8
+  // CHECK-NEXT:    %8 = mul i32 %call1, 4
+  // CHECK-NEXT:    %9 = add i32 %8, %call
+  // CHECK-NEXT:    %matext = extractelement <16 x double> %7, i32 %9
+  // CHECK-NEXT:    ret double %matext
+  // CHECK-NEXT:  }
+
+  return m[iv.value][iv.value];
+}
+
+__attribute__((objc_root_class))
+@interface MatrixValue
+@property double4x4 value;
+@end
+
+double test_base_and_index_placeholders(MatrixValue *m, IntValue *iv) {
+  // CHECK-LABEL: define double @test_base_and_index_placeholders(%1* %m, %0* %iv)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %m.addr = alloca %1*, align 8
+  // CHECK-NEXT:    %iv.addr = alloca %0*, align 8
+  // CHECK-NEXT:    store %1* %m, %1** %m.addr, align 8
+  // CHECK-NEXT:    store %0* %iv, %0** %iv.addr, align 8
+  // CHECK-NEXT:    %0 = load %0*, %0** %iv.addr, align 8
+  // CHECK-NEXT:    %1 = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
+  // CHECK-NEXT:    %2 = bitcast %0* %0 to i8*
+  // CHECK-NEXT:    %call = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* %2, i8* %1)
+  // CHECK-NEXT:    %3 = load %0*, %0** %iv.addr, align 8
+  // CHECK-NEXT:    %4 = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
+  // CHECK-NEXT:    %5 = bitcast %0* %3 to i8*
+  // CHECK-NEXT:    %call1 = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* %5, i8* %4)
+  // CHECK-NEXT:    %6 = load %1*, %1** %m.addr, align 8
+  // CHECK-NEXT:    %7 = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
+  // CHECK-NEXT:    %8 = bitcast %1* %6 to i8*
+  // CHECK-NEXT:    %call2 = call <16 x double> bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to <16 x double> (i8*, i8*)*)(i8* %8, i8* %7)
+  // CHECK-NEXT:    %9 = mul i32 %call1, 4
+  // CHECK-NEXT:    %10 = add i32 %9, %call
+  // CHECK-NEXT:    %matext = extractelement <16 x double> %call2, i32 %10
+  // CHECK-NEXT:    ret double %matext
+
+  return m.value[iv.value][iv.value];
+}
Index: clang/test/CodeGenCXX/matrix-type-operators.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenCXX/matrix-type-operators.cpp
@@ -0,0 +1,260 @@
+// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s
+
+typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
+using fx2x3_t = float __attribute__((matrix_type(2, 3)));
+
+void insert_fp(dx5x5_t *a, double d, fx2x3_t *b, float e) {
+  (*a)[0u][1u] = d;
+  (*b)[1u][0u] = e;
+
+  // CHECK-LABEL: @_Z9insert_fpPU11matrix_typeLm5ELm5EddPU11matrix_typeLm2ELm3Eff(
+  // CHECK-NEXT: entry:
+  // CHECK-NEXT:    %a.addr = alloca [25 x double]*, align 8
+  // CHECK-NEXT:    %d.addr = alloca double, align 8
+  // CHECK-NEXT:    %b.addr = alloca [6 x float]*, align 8
+  // CHECK-NEXT:    %e.addr = alloca float, align 4
+  // CHECK-NEXT:    store [25 x double]* %a, [25 x double]** %a.addr, align 8
+  // CHECK-NEXT:    store double %d, double* %d.addr, align 8
+  // CHECK-NEXT:    store [6 x float]* %b, [6 x float]** %b.addr, align 8
+  // CHECK-NEXT:    store float %e, float* %e.addr, align 4
+  // CHECK-NEXT:    %0 = load double, double* %d.addr, align 8
+  // CHECK-NEXT:    %1 = load [25 x double]*, [25 x double]** %a.addr, align 8
+  // CHECK-NEXT:    %2 = bitcast [25 x double]* %1 to <25 x double>*
+  // CHECK-NEXT:    %3 = load <25 x double>, <25 x double>* %2, align 8
+  // CHECK-NEXT:    %matins = insertelement <25 x double> %3, double %0, i32 5
+  // CHECK-NEXT:    store <25 x double> %matins, <25 x double>* %2, align 8
+  // CHECK-NEXT:    %4 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %5 = load [6 x float]*, [6 x float]** %b.addr, align 8
+  // CHECK-NEXT:    %6 = bitcast [6 x float]* %5 to <6 x float>*
+  // CHECK-NEXT:    %7 = load <6 x float>, <6 x float>* %6, align 4
+  // CHECK-NEXT:    %matins1 = insertelement <6 x float> %7, float %4, i32 1
+  // CHECK-NEXT:    store <6 x float> %matins1, <6 x float>* %6, align 4
+  // CHECK-NEXT:    ret void
+}
+
+typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
+
+void insert_int(ix9x3_t *a, int i) {
+  (*a)[4u][1u] = i;
+
+  // CHECK-LABEL: @_Z10insert_intPU11matrix_typeLm9ELm3Eii(
+  // CHECK-NEXT: entry:
+  // CHECK-NEXT:    %a.addr = alloca [27 x i32]*, align 8
+  // CHECK-NEXT:    %i.addr = alloca i32, align 4
+  // CHECK-NEXT:    store [27 x i32]* %a, [27 x i32]** %a.addr, align 8
+  // CHECK-NEXT:    store i32 %i, i32* %i.addr, align 4
+  // CHECK-NEXT:    %0 = load i32, i32* %i.addr, align 4
+  // CHECK-NEXT:    %1 = load [27 x i32]*, [27 x i32]** %a.addr, align 8
+  // CHECK-NEXT:    %2 = bitcast [27 x i32]* %1 to <27 x i32>*
+  // CHECK-NEXT:    %3 = load <27 x i32>, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    %matins = insertelement <27 x i32> %3, i32 %0, i32 13
+  // CHECK-NEXT:    store <27 x i32> %matins, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    ret void
+}
+
+template <typename EltTy, unsigned Rows, unsigned Columns>
+struct MyMatrix {
+  using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
+
+  matrix_t value;
+};
+
+template <typename EltTy, unsigned Rows, unsigned Columns>
+void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e) {
+  Mat.value[1u][0u] = e;
+}
+
+void test_template(unsigned *Ptr1, unsigned E1, float *Ptr2, float E2) {
+
+  // CHECK-LABEL: define void @_Z13test_templatePjjPff(i32* %Ptr1, i32 %E1, float* %Ptr2, float %E2)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %Ptr1.addr = alloca i32*, align 8
+  // CHECK-NEXT:    %E1.addr = alloca i32, align 4
+  // CHECK-NEXT:    %Ptr2.addr = alloca float*, align 8
+  // CHECK-NEXT:    %E2.addr = alloca float, align 4
+  // CHECK-NEXT:    %Mat1 = alloca %struct.MyMatrix, align 4
+  // CHECK-NEXT:    %Mat2 = alloca %struct.MyMatrix.0, align 4
+  // CHECK-NEXT:    store i32* %Ptr1, i32** %Ptr1.addr, align 8
+  // CHECK-NEXT:    store i32 %E1, i32* %E1.addr, align 4
+  // CHECK-NEXT:    store float* %Ptr2, float** %Ptr2.addr, align 8
+  // CHECK-NEXT:    store float %E2, float* %E2.addr, align 4
+  // CHECK-NEXT:    %0 = load i32*, i32** %Ptr1.addr, align 8
+  // CHECK-NEXT:    %1 = bitcast i32* %0 to [4 x i32]*
+  // CHECK-NEXT:    %2 = bitcast [4 x i32]* %1 to <4 x i32>*
+  // CHECK-NEXT:    %3 = load <4 x i32>, <4 x i32>* %2, align 4
+  // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %Mat1, i32 0, i32 0
+  // CHECK-NEXT:    %4 = bitcast [4 x i32]* %value to <4 x i32>*
+  // CHECK-NEXT:    store <4 x i32> %3, <4 x i32>* %4, align 4
+  // CHECK-NEXT:    %5 = load i32, i32* %E1.addr, align 4
+  // CHECK-NEXT:    call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix* nonnull align 4 dereferenceable(16) %Mat1, i32 %5)
+  // CHECK-NEXT:    %6 = load float*, float** %Ptr2.addr, align 8
+  // CHECK-NEXT:    %7 = bitcast float* %6 to [24 x float]*
+  // CHECK-NEXT:    %8 = bitcast [24 x float]* %7 to <24 x float>*
+  // CHECK-NEXT:    %9 = load <24 x float>, <24 x float>* %8, align 4
+  // CHECK-NEXT:    %value1 = getelementptr inbounds %struct.MyMatrix.0, %struct.MyMatrix.0* %Mat2, i32 0, i32 0
+  // CHECK-NEXT:    %10 = bitcast [24 x float]* %value1 to <24 x float>*
+  // CHECK-NEXT:    store <24 x float> %9, <24 x float>* %10, align 4
+  // CHECK-NEXT:    %11 = load float, float* %E2.addr, align 4
+  // CHECK-NEXT:    call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix.0* nonnull align 4 dereferenceable(96) %Mat2, float %11)
+  // CHECK-NEXT:    ret void
+
+  // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix* nonnull align 4 dereferenceable(16) %Mat, i32 %e)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %Mat.addr = alloca %struct.MyMatrix*, align 8
+  // CHECK-NEXT:    %e.addr = alloca i32, align 4
+  // CHECK-NEXT:    store %struct.MyMatrix* %Mat, %struct.MyMatrix** %Mat.addr, align 8
+  // CHECK-NEXT:    store i32 %e, i32* %e.addr, align 4
+  // CHECK-NEXT:    %0 = load i32, i32* %e.addr, align 4
+  // CHECK-NEXT:    %1 = load %struct.MyMatrix*, %struct.MyMatrix** %Mat.addr, align 8
+  // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %1, i32 0, i32 0
+  // CHECK-NEXT:    %2 = bitcast [4 x i32]* %value to <4 x i32>*
+  // CHECK-NEXT:    %3 = load <4 x i32>, <4 x i32>* %2, align 4
+  // CHECK-NEXT:    %matins = insertelement <4 x i32> %3, i32 %0, i32 1
+  // CHECK-NEXT:    store <4 x i32> %matins, <4 x i32>* %2, align 4
+  // CHECK-NEXT:    ret void
+
+  // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix.0* nonnull align 4 dereferenceable(96) %Mat, float %e)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %Mat.addr = alloca %struct.MyMatrix.0*, align 8
+  // CHECK-NEXT:    %e.addr = alloca float, align 4
+  // CHECK-NEXT:    store %struct.MyMatrix.0* %Mat, %struct.MyMatrix.0** %Mat.addr, align 8
+  // CHECK-NEXT:    store float %e, float* %e.addr, align 4
+  // CHECK-NEXT:    %0 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %1 = load %struct.MyMatrix.0*, %struct.MyMatrix.0** %Mat.addr, align 8
+  // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix.0, %struct.MyMatrix.0* %1, i32 0, i32 0
+  // CHECK-NEXT:    %2 = bitcast [24 x float]* %value to <24 x float>*
+  // CHECK-NEXT:    %3 = load <24 x float>, <24 x float>* %2, align 4
+  // CHECK-NEXT:    %matins = insertelement <24 x float> %3, float %0, i32 1
+  // CHECK-NEXT:    store <24 x float> %matins, <24 x float>* %2, align 4
+  // CHECK-NEXT:    ret void
+
+  MyMatrix<unsigned, 2, 2> Mat1;
+  Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1);
+  insert(Mat1, E1);
+
+  MyMatrix<float, 3, 8> Mat2;
+  Mat2.value = *((decltype(Mat2)::matrix_t *)Ptr2);
+  insert(Mat2, E2);
+}
+
+typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
+void extract1(dx5x5_t a, fx3x3_t b, ix9x3_t c) {
+  // CHECK-LABEL: @_Z8extract1U11matrix_typeLm5ELm5EdU11matrix_typeLm3ELm3EfU11matrix_typeLm9ELm3Ei(
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %a.addr = alloca [25 x double], align 8
+  // CHECK-NEXT:    %b.addr = alloca [9 x float], align 4
+  // CHECK-NEXT:    %c.addr = alloca [27 x i32], align 4
+  // CHECK-NEXT:    %v1 = alloca double, align 8
+  // CHECK-NEXT:    %v2 = alloca float, align 4
+  // CHECK-NEXT:    %v3 = alloca i32, align 4
+  // CHECK-NEXT:    %0 = bitcast [25 x double]* %a.addr to <25 x double>*
+  // CHECK-NEXT:    store <25 x double> %a, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %1 = bitcast [9 x float]* %b.addr to <9 x float>*
+  // CHECK-NEXT:    store <9 x float> %b, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %2 = bitcast [27 x i32]* %c.addr to <27 x i32>*
+  // CHECK-NEXT:    store <27 x i32> %c, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    %3 = load <25 x double>, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %matext = extractelement <25 x double> %3, i32 17
+  // CHECK-NEXT:    store double %matext, double* %v1, align 8
+  // CHECK-NEXT:    %4 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %matext1 = extractelement <9 x float> %4, i32 5
+  // CHECK-NEXT:    store float %matext1, float* %v2, align 4
+  // CHECK-NEXT:    %5 = load <27 x i32>, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    %matext2 = extractelement <27 x i32> %5, i32 10
+  // CHECK-NEXT:    store i32 %matext2, i32* %v3, align 4
+  // CHECK-NEXT:    ret void
+
+  double v1 = a[2][3];
+  float v2 = b[2][1];
+  int v3 = c[1][1];
+}
+
+template <typename EltTy, unsigned Rows, unsigned Columns>
+EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
+  return Mat.value[1u][0u];
+}
+
+void test_extract_template(unsigned *Ptr1, float *Ptr2) {
+  // CHECK-LABEL: define void @_Z21test_extract_templatePjPf(i32* %Ptr1, float* %Ptr2)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %Ptr1.addr = alloca i32*, align 8
+  // CHECK-NEXT:    %Ptr2.addr = alloca float*, align 8
+  // CHECK-NEXT:    %Mat1 = alloca %struct.MyMatrix, align 4
+  // CHECK-NEXT:    %v1 = alloca i32, align 4
+  // CHECK-NEXT:    store i32* %Ptr1, i32** %Ptr1.addr, align 8
+  // CHECK-NEXT:    store float* %Ptr2, float** %Ptr2.addr, align 8
+  // CHECK-NEXT:    %0 = load i32*, i32** %Ptr1.addr, align 8
+  // CHECK-NEXT:    %1 = bitcast i32* %0 to [4 x i32]*
+  // CHECK-NEXT:    %2 = bitcast [4 x i32]* %1 to <4 x i32>*
+  // CHECK-NEXT:    %3 = load <4 x i32>, <4 x i32>* %2, align 4
+  // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %Mat1, i32 0, i32 0
+  // CHECK-NEXT:    %4 = bitcast [4 x i32]* %value to <4 x i32>*
+  // CHECK-NEXT:    store <4 x i32> %3, <4 x i32>* %4, align 4
+  // CHECK-NEXT:    %call = call i32 @_Z7extractIjLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix* nonnull align 4 dereferenceable(16) %Mat1)
+  // CHECK-NEXT:    store i32 %call, i32* %v1, align 4
+  // CHECK-NEXT:    ret void
+
+  // CHECK-LABEL: define linkonce_odr i32 @_Z7extractIjLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix* nonnull align 4 dereferenceable(16) %Mat)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %Mat.addr = alloca %struct.MyMatrix*, align 8
+  // CHECK-NEXT:    store %struct.MyMatrix* %Mat, %struct.MyMatrix** %Mat.addr, align 8
+  // CHECK-NEXT:    %0 = load %struct.MyMatrix*, %struct.MyMatrix** %Mat.addr, align 8
+  // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %0, i32 0, i32 0
+  // CHECK-NEXT:    %1 = bitcast [4 x i32]* %value to <4 x i32>*
+  // CHECK-NEXT:    %2 = load <4 x i32>, <4 x i32>* %1, align 4
+  // CHECK-NEXT:    %matext = extractelement <4 x i32> %2, i32 1
+  // CHECK-NEXT:    ret i32 %matext
+
+  MyMatrix<unsigned, 2, 2> Mat1;
+  Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1);
+  unsigned v1 = extract(Mat1);
+}
+
+using double4x4 = double __attribute__((matrix_type(4, 4)));
+
+template <class R, class C>
+auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {}
+
+double test_matrix_subscript(double4x4 m) {
+  // CHECK-LABEL: define double @_Z21test_matrix_subscriptU11matrix_typeLm4ELm4Ed(<16 x double> %m)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %m.addr = alloca [16 x double], align 8
+  // CHECK-NEXT:    %0 = bitcast [16 x double]* %m.addr to <16 x double>*
+  // CHECK-NEXT:    store <16 x double> %m, <16 x double>* %0, align 8
+  // CHECK-NEXT:    %1 = load <16 x double>, <16 x double>* %0, align 8
+  // CHECK-NEXT:    %call = call nonnull align 8 dereferenceable(8) double* @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_EU11matrix_typeLm4ELm4EdT_T0_(<16 x double> %1, i32 1, i32 2)
+  // CHECK-NEXT:    %2 = load double, double* %call, align 8
+  // CHECK-NEXT:    ret double %2
+  // CHECK-NEXT:  }
+
+  // CHECK-LABEL:  define linkonce_odr nonnull align 8 dereferenceable(8) double* @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_EU11matrix_typeLm4ELm4EdT_T0_(<16 x double> %m, i32 %r, i32 %c)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %m.addr = alloca [16 x double], align 8
+  // CHECK-NEXT:    %r.addr = alloca i32, align 4
+  // CHECK-NEXT:    %c.addr = alloca i32, align 4
+  // CHECK-NEXT:    %0 = bitcast [16 x double]* %m.addr to <16 x double>*
+  // CHECK-NEXT:    store <16 x double> %m, <16 x double>* %0, align 8
+  // CHECK-NEXT:    store i32 %r, i32* %r.addr, align 4
+  // CHECK-NEXT:    store i32 %c, i32* %c.addr, align 4
+  // CHECK-NEXT:    call void @llvm.trap()
+  // CHECK-NEXT:    unreachable
+  // CHECK-NEXT:  }
+
+  return matrix_subscript(m, 1, 2);
+}
+
+const double &test_matrix_subscript_reference(const double4x4 m) {
+  // CHECK-LABEL: define nonnull align 8 dereferenceable(8) double* @_Z31test_matrix_subscript_referenceU11matrix_typeLm4ELm4Ed(<16 x double> %m)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %m.addr = alloca [16 x double], align 8
+  // CHECK-NEXT:    %ref.tmp = alloca double, align 8
+  // CHECK-NEXT:    %0 = bitcast [16 x double]* %m.addr to <16 x double>*
+  // CHECK-NEXT:    store <16 x double> %m, <16 x double>* %0, align 8
+  // CHECK-NEXT:    %1 = load <16 x double>, <16 x double>* %0, align 8
+  // CHECK-NEXT:    %matext = extractelement <16 x double> %1, i32 4
+  // CHECK-NEXT:    store double %matext, double* %ref.tmp, align 8
+  // CHECK-NEXT:    ret double* %ref.tmp
+  // CHECK-NEXT:  }
+
+  return m[0][1];
+}
Index: clang/test/CodeGen/matrix-type-operators.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/matrix-type-operators.c
@@ -0,0 +1,314 @@
+// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// Tests for the matrix type operators.
+
+typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
+typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
+
+// Check that we can use matrix index expression on different floating point
+// matrixes and indices.
+void insert_fp(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) {
+  // CHECK-LABEL: define void @insert_fp(<25 x double> %a, double %d, <6 x float> %b, float %e, i32 %j, i32 %k)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %a.addr = alloca [25 x double], align 8
+  // CHECK-NEXT:    %d.addr = alloca double, align 8
+  // CHECK-NEXT:    %b.addr = alloca [6 x float], align 4
+  // CHECK-NEXT:    %e.addr = alloca float, align 4
+  // CHECK-NEXT:    %j.addr = alloca i32, align 4
+  // CHECK-NEXT:    %k.addr = alloca i32, align 4
+  // CHECK-NEXT:    %0 = bitcast [25 x double]* %a.addr to <25 x double>*
+  // CHECK-NEXT:    store <25 x double> %a, <25 x double>* %0, align 8
+  // CHECK-NEXT:    store double %d, double* %d.addr, align 8
+  // CHECK-NEXT:    %1 = bitcast [6 x float]* %b.addr to <6 x float>*
+  // CHECK-NEXT:    store <6 x float> %b, <6 x float>* %1, align 4
+  // CHECK-NEXT:    store float %e, float* %e.addr, align 4
+  // CHECK-NEXT:    store i32 %j, i32* %j.addr, align 4
+  // CHECK-NEXT:    store i32 %k, i32* %k.addr, align 4
+  // CHECK-NEXT:    %2 = load double, double* %d.addr, align 8
+  // CHECK-NEXT:    %3 = load <25 x double>, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %matins = insertelement <25 x double> %3, double %2, i64 5
+  // CHECK-NEXT:    store <25 x double> %matins, <25 x double>* %0, align 8
+  a[0ll][1u] = d;
+
+  // CHECK-NEXT:    %4 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %5 = load <6 x float>, <6 x float>* %1, align 4
+  // CHECK-NEXT:    %matins1 = insertelement <6 x float> %5, float %4, i32 1
+  // CHECK-NEXT:    store <6 x float> %matins1, <6 x float>* %1, align 4
+  b[1][0] = e;
+
+  // CHECK-NEXT:    %6 = load double, double* %d.addr, align 8
+  // CHECK-NEXT:    %7 = load <25 x double>, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %matins2 = insertelement <25 x double> %7, double %6, i32 1
+  // CHECK-NEXT:    store <25 x double> %matins2, <25 x double>* %0, align 8
+  a[1][0u] = d;
+
+  // CHECK-NEXT:    %8 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %9 = load <6 x float>, <6 x float>* %1, align 4
+  // CHECK-NEXT:    %matins3 = insertelement <6 x float> %9, float %8, i64 3
+  // CHECK-NEXT:    store <6 x float> %matins3, <6 x float>* %1, align 4
+  b[1ull][1] = e;
+
+  // CHECK-NEXT:    %10 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %11 = load i32, i32* %j.addr, align 4
+  // CHECK-NEXT:    %12 = load i32, i32* %k.addr, align 4
+  // CHECK-NEXT:    %13 = mul i32 %12, 2
+  // CHECK-NEXT:    %14 = add i32 %13, %11
+  // CHECK-NEXT:    %15 = load <6 x float>, <6 x float>* %1, align 4
+  // CHECK-NEXT:    %matins4 = insertelement <6 x float> %15, float %10, i32 %14
+  // CHECK-NEXT:    store <6 x float> %matins4, <6 x float>* %1, align 4
+  b[j][k] = e;
+
+  // CHECK-NEXT:    %16 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %17 = load i32, i32* %j.addr, align 4
+  // CHECK-NEXT:    %18 = load i32, i32* %k.addr, align 4
+  // CHECK-NEXT:    %19 = mul i32 %18, 2
+  // CHECK-NEXT:    %20 = add i32 %19, %17
+  // CHECK-NEXT:    %21 = load <6 x float>, <6 x float>* %1, align 4
+  // CHECK-NEXT:    %matins5 = insertelement <6 x float> %21, float %16, i32 %20
+  // CHECK-NEXT:    store <6 x float> %matins5, <6 x float>* %1, align 4
+  // CHECK-NEXT:    ret void
+  (b)[j][k] = e;
+}
+
+// Check that we can can use matrix index expressions on integer matrixes.
+typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
+void insert_int(ix9x3_t a, int i) {
+  // CHECK-LABEL: define void @insert_int(<27 x i32> %a, i32 %i)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %a.addr = alloca [27 x i32], align 4
+  // CHECK-NEXT:    %i.addr = alloca i32, align 4
+  // CHECK-NEXT:    %0 = bitcast [27 x i32]* %a.addr to <27 x i32>*
+  // CHECK-NEXT:    store <27 x i32> %a, <27 x i32>* %0, align 4
+  // CHECK-NEXT:    store i32 %i, i32* %i.addr, align 4
+  // CHECK-NEXT:    %1 = load i32, i32* %i.addr, align 4
+  // CHECK-NEXT:    %2 = load <27 x i32>, <27 x i32>* %0, align 4
+  // CHECK-NEXT:    %matins = insertelement <27 x i32> %2, i32 %1, i32 13
+  // CHECK-NEXT:    store <27 x i32> %matins, <27 x i32>* %0, align 4
+  // CHECK-NEXT:    ret void
+
+  a[4u][1u] = i;
+}
+
+// Check that we can can use matrix index expressions on FP and integer
+// matrixes.
+typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
+void insert_int_fp(ix9x3_t *a, int i, fx2x3_t b, float e, short j, unsigned long long k) {
+  // CHECK-LABEL: define void @insert_int_fp([27 x i32]* %a, i32 %i, <6 x float> %b, float %e, i16 signext %j, i64 %k)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %a.addr = alloca [27 x i32]*, align 8
+  // CHECK-NEXT:    %i.addr = alloca i32, align 4
+  // CHECK-NEXT:    %b.addr = alloca [6 x float], align 4
+  // CHECK-NEXT:    %e.addr = alloca float, align 4
+  // CHECK-NEXT:    %j.addr = alloca i16, align 2
+  // CHECK-NEXT:    %k.addr = alloca i64, align 8
+  // CHECK-NEXT:    store [27 x i32]* %a, [27 x i32]** %a.addr, align 8
+  // CHECK-NEXT:    store i32 %i, i32* %i.addr, align 4
+  // CHECK-NEXT:    %0 = bitcast [6 x float]* %b.addr to <6 x float>*
+  // CHECK-NEXT:    store <6 x float> %b, <6 x float>* %0, align 4
+  // CHECK-NEXT:    store float %e, float* %e.addr, align 4
+  // CHECK-NEXT:    store i16 %j, i16* %j.addr, align 2
+  // CHECK-NEXT:    store i64 %k, i64* %k.addr, align 8
+  // CHECK-NEXT:    %1 = load i32, i32* %i.addr, align 4
+  // CHECK-NEXT:    %2 = load [27 x i32]*, [27 x i32]** %a.addr, align 8
+  // CHECK-NEXT:    %3 = bitcast [27 x i32]* %2 to <27 x i32>*
+  // CHECK-NEXT:    %4 = load <27 x i32>, <27 x i32>* %3, align 4
+  // CHECK-NEXT:    %matins = insertelement <27 x i32> %4, i32 %1, i32 13
+  // CHECK-NEXT:    store <27 x i32> %matins, <27 x i32>* %3, align 4
+  (*a)[4u][1u] = i;
+
+  // CHECK-NEXT:    %5 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %6 = load <6 x float>, <6 x float>* %0, align 4
+  // CHECK-NEXT:    %matins1 = insertelement <6 x float> %6, float %5, i32 3
+  // CHECK-NEXT:    store <6 x float> %matins1, <6 x float>* %0, align 4
+  b[1u][1u] = e;
+
+  // CHECK-NEXT:    %7 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %8 = load i16, i16* %j.addr, align 2
+  // CHECK-NEXT:    %9 = load i64, i64* %k.addr, align 8
+  // CHECK-NEXT:    %10 = zext i16 %8 to i64
+  // CHECK-NEXT:    %11 = mul i64 %9, 2
+  // CHECK-NEXT:    %12 = add i64 %11, %10
+  // CHECK-NEXT:    %13 = load <6 x float>, <6 x float>* %0, align 4
+  // CHECK-NEXT:    %matins2 = insertelement <6 x float> %13, float %7, i64 %12
+  // CHECK-NEXT:    store <6 x float> %matins2, <6 x float>* %0, align 4
+  // CHECK-NEXT:    ret void
+  b[j][k] = e;
+}
+
+// Check that we can use overloaded matrix index expressions on matrixes with
+// matching dimensions, but different element types.
+typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
+typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
+void insert_matching_dimensions(dx3x3_t a, double i, fx3x3_t b, float e, long int j, char k) {
+  // CHECK-LABEL: define void @insert_matching_dimensions(<9 x double> %a, double %i, <9 x float> %b, float %e, i64 %j, i8 signext %k) #3 {
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %a.addr = alloca [9 x double], align 8
+  // CHECK-NEXT:    %i.addr = alloca double, align 8
+  // CHECK-NEXT:    %b.addr = alloca [9 x float], align 4
+  // CHECK-NEXT:    %e.addr = alloca float, align 4
+  // CHECK-NEXT:    %j.addr = alloca i64, align 8
+  // CHECK-NEXT:    %k.addr = alloca i8, align 1
+  // CHECK-NEXT:    %0 = bitcast [9 x double]* %a.addr to <9 x double>*
+  // CHECK-NEXT:    store <9 x double> %a, <9 x double>* %0, align 8
+  // CHECK-NEXT:    store double %i, double* %i.addr, align 8
+  // CHECK-NEXT:    %1 = bitcast [9 x float]* %b.addr to <9 x float>*
+  // CHECK-NEXT:    store <9 x float> %b, <9 x float>* %1, align 4
+  // CHECK-NEXT:    store float %e, float* %e.addr, align 4
+  // CHECK-NEXT:    store i64 %j, i64* %j.addr, align 8
+  // CHECK-NEXT:    store i8 %k, i8* %k.addr, align 1
+  // CHECK-NEXT:    %2 = load double, double* %i.addr, align 8
+  // CHECK-NEXT:    %3 = load <9 x double>, <9 x double>* %0, align 8
+  // CHECK-NEXT:    %matins = insertelement <9 x double> %3, double %2, i32 5
+  // CHECK-NEXT:    store <9 x double> %matins, <9 x double>* %0, align 8
+  a[2u][1u] = i;
+
+  // CHECK-NEXT:    %4 = load float, float* %e.addr, align 4
+  // CHECK-NEXT:    %5 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %matins1 = insertelement <9 x float> %5, float %4, i32 7
+  // CHECK-NEXT:    store <9 x float> %matins1, <9 x float>* %1, align 4
+  b[1u][2u] = e;
+
+  // CHECK-NEXT:    %6 = load double, double* %i.addr, align 8
+  // CHECK-NEXT:    %conv = fptrunc double %6 to float
+  // CHECK-NEXT:    %7 = load i64, i64* %j.addr, align 8
+  // CHECK-NEXT:    %8 = load i8, i8* %k.addr, align 1
+  // CHECK-NEXT:    %9 = zext i8 %8 to i64
+  // CHECK-NEXT:    %10 = mul i64 %9, 3
+  // CHECK-NEXT:    %11 = add i64 %10, %7
+  // CHECK-NEXT:    %12 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %matins2 = insertelement <9 x float> %12, float %conv, i64 %11
+  // CHECK-NEXT:    store <9 x float> %matins2, <9 x float>* %1, align 4
+  // CHECK-NEXT:    ret void
+  b[j][k] = i;
+}
+
+void extract1(dx5x5_t a, fx3x3_t b, ix9x3_t c, unsigned long j) {
+  // CHECK-LABEL: @extract1(
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %a.addr = alloca [25 x double], align 8
+  // CHECK-NEXT:    %b.addr = alloca [9 x float], align 4
+  // CHECK-NEXT:    %c.addr = alloca [27 x i32], align 4
+  // CHECK-NEXT:    %j.addr = alloca i64, align 8
+  // CHECK-NEXT:    %v1 = alloca double, align 8
+  // CHECK-NEXT:    %v2 = alloca float, align 4
+  // CHECK-NEXT:    %v3 = alloca i32, align 4
+  // CHECK-NEXT:    %v4 = alloca i32, align 4
+  // CHECK-NEXT:    %0 = bitcast [25 x double]* %a.addr to <25 x double>*
+  // CHECK-NEXT:    store <25 x double> %a, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %1 = bitcast [9 x float]* %b.addr to <9 x float>*
+  // CHECK-NEXT:    store <9 x float> %b, <9 x float>* %1, align 4
+  double v1 = a[2][3];
+
+  // CHECK-NEXT:    %2 = bitcast [27 x i32]* %c.addr to <27 x i32>*
+  // CHECK-NEXT:    store <27 x i32> %c, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    store i64 %j, i64* %j.addr, align 8
+  // CHECK-NEXT:    %3 = load <25 x double>, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %matext = extractelement <25 x double> %3, i32 17
+  // CHECK-NEXT:    store double %matext, double* %v1, align 8
+  float v2 = b[2][1];
+
+  // CHECK-NEXT:    %4 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %matext1 = extractelement <9 x float> %4, i32 5
+  // CHECK-NEXT:    store float %matext1, float* %v2, align 4
+  // CHECK-NEXT:    %5 = load <27 x i32>, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    %matext2 = extractelement <27 x i32> %5, i32 10
+  // CHECK-NEXT:    store i32 %matext2, i32* %v3, align 4
+  int v3 = c[1][1];
+
+  // CHECK-NEXT:    %6 = load i64, i64* %j.addr, align 8
+  // CHECK-NEXT:    %7 = load i64, i64* %j.addr, align 8
+  // CHECK-NEXT:    %8 = load <27 x i32>, <27 x i32>* %2, align 4
+  // CHECK-NEXT:    %9 = mul i64 %7, 9
+  // CHECK-NEXT:    %10 = add i64 %9, %6
+  // CHECK-NEXT:    %matext3 = extractelement <27 x i32> %8, i64 %10
+  // CHECK-NEXT:    store i32 %matext3, i32* %v4, align 4
+  // CHECK-NEXT:    ret void
+  int v4 = c[j][j];
+}
+
+typedef double dx3x2_t __attribute__((matrix_type(3, 2)));
+double test_extract_matrix_pointer(dx5x5_t *ptr, dx3x2_t **ptr2) {
+  // CHECK-LABEL: define double @test_extract_matrix_pointer([25 x double]* %ptr, [6 x double]** %ptr2)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %ptr.addr = alloca [25 x double]*, align 8
+  // CHECK-NEXT:    %ptr2.addr = alloca [6 x double]**, align 8
+  // CHECK-NEXT:    store [25 x double]* %ptr, [25 x double]** %ptr.addr, align 8
+  // CHECK-NEXT:    store [6 x double]** %ptr2, [6 x double]*** %ptr2.addr, align 8
+  // CHECK-NEXT:    %0 = load [25 x double]*, [25 x double]** %ptr.addr, align 8
+  // CHECK-NEXT:    %arrayidx = getelementptr inbounds [25 x double], [25 x double]* %0, i64 0
+  // CHECK-NEXT:    %1 = bitcast [25 x double]* %arrayidx to <25 x double>*
+  // CHECK-NEXT:    %2 = load <25 x double>, <25 x double>* %1, align 8
+  // CHECK-NEXT:    %matext = extractelement <25 x double> %2, i32 17
+  // CHECK-NEXT:    %3 = load [6 x double]**, [6 x double]*** %ptr2.addr, align 8
+  // CHECK-NEXT:    %arrayidx1 = getelementptr inbounds [6 x double]*, [6 x double]** %3, i64 1
+  // CHECK-NEXT:    %4 = load [6 x double]*, [6 x double]** %arrayidx1, align 8
+  // CHECK-NEXT:    %arrayidx2 = getelementptr inbounds [6 x double], [6 x double]* %4, i64 2
+  // CHECK-NEXT:    %5 = bitcast [6 x double]* %arrayidx2 to <6 x double>*
+  // CHECK-NEXT:    %6 = load <6 x double>, <6 x double>* %5, align 8
+  // CHECK-NEXT:    %matext3 = extractelement <6 x double> %6, i32 3
+  // CHECK-NEXT:    %add = fadd double %matext, %matext3
+  // CHECK-NEXT:    %7 = load [6 x double]**, [6 x double]*** %ptr2.addr, align 8
+  // CHECK-NEXT:    %add.ptr = getelementptr inbounds [6 x double]*, [6 x double]** %7, i64 4
+  // CHECK-NEXT:    %8 = load [6 x double]*, [6 x double]** %add.ptr, align 8
+  // CHECK-NEXT:    %add.ptr4 = getelementptr inbounds [6 x double], [6 x double]* %8, i64 6
+  // CHECK-NEXT:    %9 = bitcast [6 x double]* %add.ptr4 to <6 x double>*
+  // CHECK-NEXT:    %10 = load <6 x double>, <6 x double>* %9, align 8
+  // CHECK-NEXT:    %matext5 = extractelement <6 x double> %10, i32 1
+  // CHECK-NEXT:    %add6 = fadd double %add, %matext5
+  // CHECK-NEXT:    ret double %add6
+
+  return (ptr[0])[2][3] + ptr2[1][2][0][1] + (*(*(ptr2 + 4) + 6))[1][0];
+}
+void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
+  // CHECK-LABEL: define void @insert_extract(<25 x double> %a, <9 x float> %b, i64 %j, i16 signext %k)
+  // CHECK-NEXT:  entry:
+  // CHECK-NEXT:    %a.addr = alloca [25 x double], align 8
+  // CHECK-NEXT:    %b.addr = alloca [9 x float], align 4
+  // CHECK-NEXT:    %j.addr = alloca i64, align 8
+  // CHECK-NEXT:    %k.addr = alloca i16, align 2
+  // CHECK-NEXT:    %0 = bitcast [25 x double]* %a.addr to <25 x double>*
+  // CHECK-NEXT:    store <25 x double> %a, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %1 = bitcast [9 x float]* %b.addr to <9 x float>*
+  // CHECK-NEXT:    store <9 x float> %b, <9 x float>* %1, align 4
+  // CHECK-NEXT:    store i64 %j, i64* %j.addr, align 8
+  // CHECK-NEXT:    store i16 %k, i16* %k.addr, align 2
+  // CHECK-NEXT:    %2 = load i64, i64* %j.addr, align 8
+  // CHECK-NEXT:    %3 = load i16, i16* %k.addr, align 2
+  // CHECK-NEXT:    %4 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %5 = zext i16 %3 to i64
+  // CHECK-NEXT:    %6 = mul i64 %5, 3
+  // CHECK-NEXT:    %7 = add i64 %6, %2
+  // CHECK-NEXT:    %matext = extractelement <9 x float> %4, i64 %7
+  // CHECK-NEXT:    %conv = fpext float %matext to double
+  // CHECK-NEXT:    %8 = load <25 x double>, <25 x double>* %0, align 8
+  // CHECK-NEXT:    %matins = insertelement <25 x double> %8, double %conv, i32 17
+  // CHECK-NEXT:    store <25 x double> %matins, <25 x double>* %0, align 8
+  a[2][3] = b[j][k];
+
+  // CHECK-NEXT:    %9 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %matext1 = extractelement <9 x float> %9, i32 3
+  // CHECK-NEXT:    %10 = load i16, i16* %k.addr, align 2
+  // CHECK-NEXT:    %11 = load i64, i64* %j.addr, align 8
+  // CHECK-NEXT:    %12 = zext i16 %10 to i64
+  // CHECK-NEXT:    %13 = mul i64 %11, 3
+  // CHECK-NEXT:    %14 = add i64 %13, %12
+  // CHECK-NEXT:    %15 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %matins2 = insertelement <9 x float> %15, float %matext1, i64 %14
+  // CHECK-NEXT:    store <9 x float> %matins2, <9 x float>* %1, align 4
+  b[k][j] = b[0][1];
+
+  // CHECK-NEXT:    %16 = load i16, i16* %k.addr, align 2
+  // CHECK-NEXT:    %17 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %18 = zext i16 %16 to i32
+  // CHECK-NEXT:    %19 = mul i32 %18, 3
+  // CHECK-NEXT:    %20 = add i32 %19, 0
+  // CHECK-NEXT:    %matext3 = extractelement <9 x float> %17, i32 %20
+  // CHECK-NEXT:    %21 = load i64, i64* %j.addr, align 8
+  // CHECK-NEXT:    %22 = mul i64 %21, 3
+  // CHECK-NEXT:    %23 = add i64 %22, 2
+  // CHECK-NEXT:    %24 = load <9 x float>, <9 x float>* %1, align 4
+  // CHECK-NEXT:    %matins4 = insertelement <9 x float> %24, float %matext3, i64 %23
+  // CHECK-NEXT:    store <9 x float> %matins4, <9 x float>* %1, align 4
+  // CHECK-NEXT:    ret void
+  b[2][j] = b[0][k];
+}
Index: clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1515,6 +1515,10 @@
       Bldr.addNodes(Dst);
       break;
 
+    case Stmt::MatrixSubscriptExprClass:
+      llvm_unreachable("Support for MatrixSubscriptExpr is not implemented.");
+      break;
+
     case Stmt::GCCAsmStmtClass:
       Bldr.takeNodes(Pred);
       VisitGCCAsmStmt(cast<GCCAsmStmt>(S), Pred, Dst);
Index: clang/lib/Serialization/ASTWriterStmt.cpp
===================================================================
--- clang/lib/Serialization/ASTWriterStmt.cpp
+++ clang/lib/Serialization/ASTWriterStmt.cpp
@@ -772,6 +772,15 @@
   Code = serialization::EXPR_ARRAY_SUBSCRIPT;
 }
 
+void ASTStmtWriter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
+  VisitExpr(E);
+  Record.AddStmt(E->getBase());
+  Record.AddStmt(E->getRowIdx());
+  Record.AddStmt(E->getColumnIdx());
+  Record.AddSourceLocation(E->getRBracketLoc());
+  Code = serialization::EXPR_ARRAY_SUBSCRIPT;
+}
+
 void ASTStmtWriter::VisitOMPArraySectionExpr(OMPArraySectionExpr *E) {
   VisitExpr(E);
   Record.AddStmt(E->getBase());
Index: clang/lib/Serialization/ASTReaderStmt.cpp
===================================================================
--- clang/lib/Serialization/ASTReaderStmt.cpp
+++ clang/lib/Serialization/ASTReaderStmt.cpp
@@ -907,6 +907,14 @@
   E->setRBracketLoc(readSourceLocation());
 }
 
+void ASTStmtReader::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
+  VisitExpr(E);
+  E->setBase(Record.readSubExpr());
+  E->setRowIdx(Record.readSubExpr());
+  E->setColumnIdx(Record.readSubExpr());
+  E->setRBracketLoc(readSourceLocation());
+}
+
 void ASTStmtReader::VisitOMPArraySectionExpr(OMPArraySectionExpr *E) {
   VisitExpr(E);
   E->setBase(Record.readSubExpr());
@@ -2926,6 +2934,10 @@
       S = new (Context) ArraySubscriptExpr(Empty);
       break;
 
+    case EXPR_MATRIX_SUBSCRIPT:
+      S = new (Context) MatrixSubscriptExpr(Empty);
+      break;
+
     case EXPR_OMP_ARRAY_SECTION:
       S = new (Context) OMPArraySectionExpr(Empty);
       break;
Index: clang/lib/Serialization/ASTReader.cpp
===================================================================
--- clang/lib/Serialization/ASTReader.cpp
+++ clang/lib/Serialization/ASTReader.cpp
@@ -7007,6 +7007,9 @@
     case PREDEF_TYPE_BUILTIN_FN:
       T = Context.BuiltinFnTy;
       break;
+    case PREDEF_TYPE_INCOMPLETE_MATRIX_IDX:
+      T = Context.IncompleteMatrixIdxTy;
+      break;
     case PREDEF_TYPE_OMP_ARRAY_SECTION:
       T = Context.OMPArraySectionTy;
       break;
Index: clang/lib/Serialization/ASTCommon.cpp
===================================================================
--- clang/lib/Serialization/ASTCommon.cpp
+++ clang/lib/Serialization/ASTCommon.cpp
@@ -240,6 +240,9 @@
   case BuiltinType::BuiltinFn:
     ID = PREDEF_TYPE_BUILTIN_FN;
     break;
+  case BuiltinType::IncompleteMatrixIdx:
+    ID = PREDEF_TYPE_INCOMPLETE_MATRIX_IDX;
+    break;
   case BuiltinType::OMPArraySection:
     ID = PREDEF_TYPE_OMP_ARRAY_SECTION;
     break;
Index: clang/lib/Sema/TreeTransform.h
===================================================================
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -2419,6 +2419,17 @@
                                              RBracketLoc);
   }
 
+  /// Build a new matrix subscript expression.
+  ///
+  /// By default, performs semantic analysis to build the new expression.
+  /// Subclasses may override this routine to provide different behavior.
+  ExprResult RebuildMatrixSubscriptExpr(Expr *Base, Expr *RowIdx,
+                                        Expr *ColumnIdx,
+                                        SourceLocation RBracketLoc) {
+    return getSema().CreateBuiltinMatrixSubscriptExpr(Base, RowIdx, ColumnIdx,
+                                                      RBracketLoc);
+  }
+
   /// Build a new array section expression.
   ///
   /// By default, performs semantic analysis to build the new expression.
@@ -10277,6 +10288,29 @@
       /*FIXME:*/ E->getLHS()->getBeginLoc(), RHS.get(), E->getRBracketLoc());
 }
 
+template <typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
+  ExprResult Base = getDerived().TransformExpr(E->getBase());
+  if (Base.isInvalid())
+    return ExprError();
+
+  ExprResult RowIdx = getDerived().TransformExpr(E->getRowIdx());
+  if (RowIdx.isInvalid())
+    return ExprError();
+
+  ExprResult ColumnIdx = getDerived().TransformExpr(E->getColumnIdx());
+  if (ColumnIdx.isInvalid())
+    return ExprError();
+
+  if (!getDerived().AlwaysRebuild() && Base.get() == E->getBase() &&
+      RowIdx.get() == E->getRowIdx() && ColumnIdx.get() == E->getColumnIdx())
+    return E;
+
+  return getDerived().RebuildMatrixSubscriptExpr(
+      Base.get(), RowIdx.get(), ColumnIdx.get(), E->getRBracketLoc());
+}
+
 template <typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformOMPArraySectionExpr(OMPArraySectionExpr *E) {
Index: clang/lib/Sema/SemaInit.cpp
===================================================================
--- clang/lib/Sema/SemaInit.cpp
+++ clang/lib/Sema/SemaInit.cpp
@@ -3494,6 +3494,7 @@
   case FK_NonConstLValueReferenceBindingToTemporary:
   case FK_NonConstLValueReferenceBindingToBitfield:
   case FK_NonConstLValueReferenceBindingToVectorElement:
+  case FK_NonConstLValueReferenceBindingToMatrixElement:
   case FK_NonConstLValueReferenceBindingToUnrelated:
   case FK_RValueReferenceBindingToLValue:
   case FK_ReferenceAddrspaceMismatchTemporary:
@@ -4687,7 +4688,8 @@
 /// which a reference can never bind). Attempting to bind a reference to
 /// such a glvalue will always create a temporary.
 static bool isNonReferenceableGLValue(Expr *E) {
-  return E->refersToBitField() || E->refersToVectorElement();
+  return E->refersToBitField() || E->refersToVectorElement() ||
+         E->refersToMatrixElement();
 }
 
 /// Reference initialization without resolving overloaded functions.
@@ -4808,6 +4810,9 @@
         else if (Initializer->refersToVectorElement())
           FK = InitializationSequence::
               FK_NonConstLValueReferenceBindingToVectorElement;
+        else if (Initializer->refersToMatrixElement())
+          FK = InitializationSequence::
+              FK_NonConstLValueReferenceBindingToMatrixElement;
         else
           llvm_unreachable("unexpected kind of compatible initializer");
         break;
@@ -8925,6 +8930,11 @@
       << Args[0]->getSourceRange();
     break;
 
+  case FK_NonConstLValueReferenceBindingToMatrixElement:
+    S.Diag(Kind.getLocation(), diag::err_reference_bind_to_matrix_element)
+        << DestType.isVolatileQualified() << Args[0]->getSourceRange();
+    break;
+
   case FK_RValueReferenceBindingToLValue:
     S.Diag(Kind.getLocation(), diag::err_lvalue_to_rvalue_ref)
       << DestType.getNonReferenceType() << OnlyArg->getType()
@@ -9270,6 +9280,10 @@
       OS << "non-const lvalue reference bound to vector element";
       break;
 
+    case FK_NonConstLValueReferenceBindingToMatrixElement:
+      OS << "non-const lvalue reference bound to matrix element";
+      break;
+
     case FK_NonConstLValueReferenceBindingToUnrelated:
       OS << "non-const lvalue reference bound to unrelated type";
       break;
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -4546,6 +4546,12 @@
     base = result.get();
   }
 
+  // Check if base and idx form a MatrixSubscriptExpr.
+  ExprResult MaybeMatrixSubscript =
+      ActOnMatrixSubscriptExpr(S, base, idx, rbLoc);
+  if (!MaybeMatrixSubscript.isUnset())
+    return MaybeMatrixSubscript;
+
   // A comma-expression as the index is deprecated in C++2a onwards.
   if (getLangOpts().CPlusPlus20 &&
       ((isa<BinaryOperator>(idx) && cast<BinaryOperator>(idx)->isCommaOp()) ||
@@ -4621,6 +4627,121 @@
   return Res;
 }
 
+ExprResult Sema::ActOnMatrixSubscriptExpr(Scope *S, Expr *Base, Expr *Idx,
+                                          SourceLocation RBLoc) {
+  // Helper to check for comma expressions, which are not allowed as indices for
+  // matrix subscript expressions.
+  auto CheckAndReportCommaError = [this, Base, RBLoc](Expr *E) {
+    if (isa<BinaryOperator>(E) && cast<BinaryOperator>(E)->isCommaOp()) {
+      Diag(E->getExprLoc(), diag::err_matrix_subscript_comma)
+          << SourceRange(Base->getBeginLoc(), RBLoc);
+      return true;
+    }
+    return false;
+  };
+
+  // The matrix subscript operator ([][])is considered a single operator.
+  // Separating the index expressions by parenthesis is not allowed.
+  if (Base->getType()->isSpecificPlaceholderType(
+          BuiltinType::IncompleteMatrixIdx) &&
+      !isa<MatrixSubscriptExpr>(Base)) {
+    Diag(Base->getExprLoc(), diag::err_matrix_separate_incomplete_index)
+        << SourceRange(Base->getBeginLoc(), RBLoc);
+    return ExprError();
+  }
+
+  // If the base is either a MatrixSubscriptExpr or a matrix type, try to create
+  // a new MatrixSubscriptExpr.
+  auto *SubscriptE = dyn_cast<MatrixSubscriptExpr>(Base);
+  if (SubscriptE) {
+    if (CheckAndReportCommaError(Idx))
+      return ExprError();
+
+    assert(SubscriptE->isIncomplete() &&
+           "base has to be an incomplete matrix subscript");
+    return CreateBuiltinMatrixSubscriptExpr(
+        SubscriptE->getBase(), SubscriptE->getRowIdx(), Idx, RBLoc);
+  }
+
+  bool IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, Base);
+  if (!IsMSPropertySubscript) {
+    ExprResult Result = CheckPlaceholderExpr(Base);
+    if (Result.isInvalid())
+      return ExprEmpty();
+    Base = Result.get();
+  }
+
+  if (Base->getType()->isMatrixType()) {
+    if (CheckAndReportCommaError(Idx))
+      return ExprError();
+
+    return CreateBuiltinMatrixSubscriptExpr(Base, Idx, nullptr, RBLoc);
+  }
+
+  // No matrix subscript could be formed and no matrix subscript related error
+  // has been generated.
+  return ExprEmpty();
+}
+
+ExprResult Sema::CreateBuiltinMatrixSubscriptExpr(Expr *Base, Expr *RowIdx,
+                                                  Expr *ColumnIdx,
+                                                  SourceLocation RBLoc) {
+  ExprResult RowR = CheckPlaceholderExpr(RowIdx);
+  if (RowR.isInvalid())
+    return RowR;
+  RowIdx = RowR.get();
+
+  ExprResult BaseR = CheckPlaceholderExpr(Base);
+  if (BaseR.isInvalid())
+    return BaseR;
+  Base = BaseR.get();
+
+  if (!ColumnIdx)
+    return new (Context) MatrixSubscriptExpr(
+        Base, RowIdx, ColumnIdx, Context.IncompleteMatrixIdxTy, RBLoc);
+
+  // Build an unanalyzed expression if any of the operands is type-dependent.
+  if (Base->isTypeDependent() || RowIdx->isTypeDependent() ||
+      ColumnIdx->isTypeDependent())
+    return new (Context) MatrixSubscriptExpr(Base, RowIdx, ColumnIdx,
+                                             Context.DependentTy, RBLoc);
+
+  ExprResult ColumnR = CheckPlaceholderExpr(ColumnIdx);
+  if (ColumnR.isInvalid())
+    return ColumnR;
+  ColumnIdx = ColumnR.get();
+
+  // Check that IndexExpr is an integer expression. If it is a constant
+  // expression, check that it is less than Dim (= the number of elements in the
+  // corresponding dimension).
+  auto IsIndexValid = [&](Expr *IndexExpr, unsigned Dim, bool IsColumnIdx) {
+    if (!IndexExpr->getType()->isIntegerType() &&
+        !IndexExpr->isTypeDependent()) {
+      Diag(IndexExpr->getBeginLoc(), diag::err_matrix_index_not_integer)
+          << IsColumnIdx;
+      return false;
+    }
+
+    llvm::APSInt Idx;
+    if (IndexExpr->isIntegerConstantExpr(Idx, Context) &&
+        (Idx < 0 || Idx >= Dim)) {
+      Diag(IndexExpr->getBeginLoc(), diag::err_matrix_index_outside_range)
+          << IsColumnIdx << Dim;
+      return false;
+    }
+    return true;
+  };
+
+  auto *MTy = Base->getType()->getAs<ConstantMatrixType>();
+  bool RowIdxValid = IsIndexValid(RowIdx, MTy->getNumRows(), false);
+  bool ColumnIdxValid = IsIndexValid(ColumnIdx, MTy->getNumColumns(), true);
+  if (!RowIdxValid || !ColumnIdxValid)
+    return ExprError();
+
+  return new (Context) MatrixSubscriptExpr(Base, RowIdx, ColumnIdx,
+                                           MTy->getElementType(), RBLoc);
+}
+
 void Sema::CheckAddressOfNoDeref(const Expr *E) {
   ExpressionEvaluationContextRecord &LastRecord = ExprEvalContexts.back();
   const Expr *StrippedExpr = E->IgnoreParenImpCasts();
@@ -5935,6 +6056,7 @@
   // These are always invalid as call arguments and should be reported.
   case BuiltinType::BoundMember:
   case BuiltinType::BuiltinFn:
+  case BuiltinType::IncompleteMatrixIdx:
   case BuiltinType::OMPArraySection:
   case BuiltinType::OMPArrayShaping:
   case BuiltinType::OMPIterator:
@@ -12896,13 +13018,14 @@
 }
 
 namespace {
-  enum {
-    AO_Bit_Field = 0,
-    AO_Vector_Element = 1,
-    AO_Property_Expansion = 2,
-    AO_Register_Variable = 3,
-    AO_No_Error = 4
-  };
+enum {
+  AO_Bit_Field = 0,
+  AO_Vector_Element = 1,
+  AO_Property_Expansion = 2,
+  AO_Register_Variable = 3,
+  AO_Matrix_Element = 4,
+  AO_No_Error = 5
+};
 }
 /// Diagnose invalid operand for address of operations.
 ///
@@ -13069,6 +13192,9 @@
   } else if (op->getObjectKind() == OK_VectorComponent) {
     // The operand cannot be an element of a vector
     AddressOfError = AO_Vector_Element;
+  } else if (op->getObjectKind() == OK_MatrixComponent) {
+    // The operand cannot be an element of a matrix.
+    AddressOfError = AO_Matrix_Element;
   } else if (dcl) { // C99 6.5.3.2p1
     // We have an lvalue with a decl. Make sure the decl is not declared
     // with the register storage-class specifier.
@@ -18861,6 +18987,13 @@
     return ExprError();
   }
 
+  case BuiltinType::IncompleteMatrixIdx:
+    Diag(cast<MatrixSubscriptExpr>(E->IgnoreParens())
+             ->getRowIdx()
+             ->getBeginLoc(),
+         diag::err_matrix_incomplete_index);
+    return ExprError();
+
   // Expressions of unknown type.
   case BuiltinType::OMPArraySection:
     Diag(E->getBeginLoc(), diag::err_omp_array_section_use);
Index: clang/lib/Sema/SemaExceptionSpec.cpp
===================================================================
--- clang/lib/Sema/SemaExceptionSpec.cpp
+++ clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1299,6 +1299,7 @@
 
     // Some might be dependent for other reasons.
   case Expr::ArraySubscriptExprClass:
+  case Expr::MatrixSubscriptExprClass:
   case Expr::OMPArraySectionExprClass:
   case Expr::OMPArrayShapingExprClass:
   case Expr::OMPIteratorExprClass:
Index: clang/lib/Sema/SemaCast.cpp
===================================================================
--- clang/lib/Sema/SemaCast.cpp
+++ clang/lib/Sema/SemaCast.cpp
@@ -2089,6 +2089,9 @@
       return TC_NotApplicable;
       // FIXME: Use a specific diagnostic for the rest of these cases.
     case OK_VectorComponent: inappropriate = "vector element";      break;
+    case OK_MatrixComponent:
+      inappropriate = "matrix element";
+      break;
     case OK_ObjCProperty:    inappropriate = "property expression"; break;
     case OK_ObjCSubscript:   inappropriate = "container subscripting expression";
                              break;
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -3628,6 +3628,7 @@
   LValue EmitUnaryOpLValue(const UnaryOperator *E);
   LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
                                 bool Accessed = false);
+  LValue EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E);
   LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
                                  bool IsLowerBound = true);
   LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E);
Index: clang/lib/CodeGen/CGValue.h
===================================================================
--- clang/lib/CodeGen/CGValue.h
+++ clang/lib/CodeGen/CGValue.h
@@ -170,7 +170,8 @@
     VectorElt,    // This is a vector element l-value (V[i]), use getVector*
     BitField,     // This is a bitfield l-value, use getBitfield*.
     ExtVectorElt, // This is an extended vector subset, use getExtVectorComp
-    GlobalReg     // This is a register l-value, use getGlobalReg()
+    GlobalReg,    // This is a register l-value, use getGlobalReg()
+    MatrixElt     // This is a matrix element, use getVector*
   } LVType;
 
   llvm::Value *V;
@@ -254,6 +255,7 @@
   bool isBitField() const { return LVType == BitField; }
   bool isExtVectorElt() const { return LVType == ExtVectorElt; }
   bool isGlobalReg() const { return LVType == GlobalReg; }
+  bool isMatrixElt() const { return LVType == MatrixElt; }
 
   bool isVolatileQualified() const { return Quals.hasVolatile(); }
   bool isRestrictQualified() const { return Quals.hasRestrict(); }
@@ -337,8 +339,14 @@
   Address getVectorAddress() const {
     return Address(getVectorPointer(), getAlignment());
   }
-  llvm::Value *getVectorPointer() const { assert(isVectorElt()); return V; }
-  llvm::Value *getVectorIdx() const { assert(isVectorElt()); return VectorIdx; }
+  llvm::Value *getVectorPointer() const {
+    assert(isVectorElt() || isMatrixElt());
+    return V;
+  }
+  llvm::Value *getVectorIdx() const {
+    assert(isVectorElt() || isMatrixElt());
+    return VectorIdx;
+  }
 
   // extended vector elements.
   Address getExtVectorAddress() const {
@@ -430,6 +438,18 @@
     return R;
   }
 
+  static LValue MakeMatrixElt(Address matAddress, llvm::Value *Idx,
+                              QualType type, LValueBaseInfo BaseInfo,
+                              TBAAAccessInfo TBAAInfo) {
+    LValue R;
+    R.LVType = MatrixElt;
+    R.V = matAddress.getPointer();
+    R.VectorIdx = Idx;
+    R.Initialize(type, type.getQualifiers(), matAddress.getAlignment(),
+                 BaseInfo, TBAAInfo);
+    return R;
+  }
+
   RValue asAggregateRValue(CodeGenFunction &CGF) const {
     return RValue::getAggregate(getAddress(CGF), isVolatileQualified());
   }
Index: clang/lib/CodeGen/CGExprScalar.cpp
===================================================================
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/IR/MatrixBuilder.h"
 #include "llvm/IR/Module.h"
 #include <cstdarg>
 
@@ -577,6 +578,7 @@
   }
 
   Value *VisitArraySubscriptExpr(ArraySubscriptExpr *E);
+  Value *VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E);
   Value *VisitShuffleVectorExpr(ShuffleVectorExpr *E);
   Value *VisitConvertVectorExpr(ConvertVectorExpr *E);
   Value *VisitMemberExpr(MemberExpr *E);
@@ -1808,6 +1810,22 @@
   return Builder.CreateExtractElement(Base, Idx, "vecext");
 }
 
+Value *ScalarExprEmitter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
+  TestAndClearIgnoreResultAssign();
+
+  // Handle the vector case.  The base must be a vector, the index must be an
+  // integer value.
+  Value *RowIdx = Visit(E->getRowIdx());
+  Value *ColumnIdx = Visit(E->getColumnIdx());
+  Value *Matrix = Visit(E->getBase());
+
+  // TODO: Should we emit bounds checks with SanitizerKind::ArrayBounds?
+  llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+  return MB.CreateExtractElement(
+      Matrix, RowIdx, ColumnIdx,
+      E->getBase()->getType()->getAs<ConstantMatrixType>()->getNumRows());
+}
+
 static int getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx,
                       unsigned Off) {
   int MV = SVI->getMaskValue(Idx);
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -1369,6 +1369,8 @@
     return EmitUnaryOpLValue(cast<UnaryOperator>(E));
   case Expr::ArraySubscriptExprClass:
     return EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E));
+  case Expr::MatrixSubscriptExprClass:
+    return EmitMatrixSubscriptExpr(cast<MatrixSubscriptExpr>(E));
   case Expr::OMPArraySectionExprClass:
     return EmitOMPArraySectionExpr(cast<OMPArraySectionExpr>(E));
   case Expr::ExtVectorElementExprClass:
@@ -1894,6 +1896,8 @@
   if (LV.isGlobalReg())
     return EmitLoadOfGlobalRegLValue(LV);
 
+  assert(!LV.isMatrixElt() &&
+         "loads of matrix element LValues should be handled elsewhere");
   assert(LV.isBitField() && "Unknown LValue type!");
   return EmitLoadOfBitfieldLValue(LV, Loc);
 }
@@ -1999,6 +2003,19 @@
   return RValue::get(Call);
 }
 
+// Store the specified rvalue into the specified matrix element.
+static void EmitStoreThroughMatrixEltLValue(RValue Src, LValue Dst,
+                                            CodeGenFunction &CGF) {
+  Address DstAddr = MaybeConvertMatrixAddress(
+      Address(Dst.getVectorPointer(),
+              CGF.getContext().getTypeAlignInChars(Dst.getType())),
+      CGF);
+  llvm::Value *Vec = CGF.Builder.CreateLoad(DstAddr);
+  Vec = CGF.Builder.CreateInsertElement(Vec, Src.getScalarVal(),
+                                        Dst.getVectorIdx(), "matins");
+
+  CGF.Builder.CreateStore(Vec, DstAddr, Dst.isVolatileQualified());
+}
 
 /// EmitStoreThroughLValue - Store the specified rvalue into the specified
 /// lvalue, where both are guaranteed to the have the same type, and that type
@@ -2025,6 +2042,9 @@
     if (Dst.isGlobalReg())
       return EmitStoreThroughGlobalRegLValue(Src, Dst);
 
+    if (Dst.isMatrixElt())
+      return EmitStoreThroughMatrixEltLValue(Src, Dst, *this);
+
     assert(Dst.isBitField() && "Unknown LValue type");
     return EmitStoreThroughBitfieldLValue(Src, Dst);
   }
@@ -3755,6 +3775,28 @@
   return LV;
 }
 
+LValue CodeGenFunction::EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E) {
+  assert(
+      !E->isIncomplete() &&
+      "incomplete matrix subscript expressions should be rejected during Sema");
+  LValue Base = EmitLValue(E->getBase());
+  llvm::Value *RowIdx = EmitScalarExpr(E->getRowIdx());
+  llvm::Value *ColIdx = EmitScalarExpr(E->getColumnIdx());
+  unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(),
+                               ColIdx->getType()->getScalarSizeInBits());
+  llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), MaxWidth);
+  RowIdx = Builder.CreateZExt(RowIdx, IntTy);
+  ColIdx = Builder.CreateZExt(ColIdx, IntTy);
+  llvm::Value *NumRows = Builder.getIntN(
+      MaxWidth,
+      E->getBase()->getType()->getAs<ConstantMatrixType>()->getNumRows());
+  llvm::Value *FinalIdx =
+      Builder.CreateAdd(Builder.CreateMul(ColIdx, NumRows), RowIdx);
+  return LValue::MakeMatrixElt(Base.getAddress(*this), FinalIdx,
+                               E->getBase()->getType(), Base.getBaseInfo(),
+                               TBAAAccessInfo());
+}
+
 static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
                                        LValueBaseInfo &BaseInfo,
                                        TBAAAccessInfo &TBAAInfo,
Index: clang/lib/AST/TypeLoc.cpp
===================================================================
--- clang/lib/AST/TypeLoc.cpp
+++ clang/lib/AST/TypeLoc.cpp
@@ -403,6 +403,7 @@
   case BuiltinType::Id:
 #include "clang/Basic/AArch64SVEACLETypes.def"
   case BuiltinType::BuiltinFn:
+  case BuiltinType::IncompleteMatrixIdx:
   case BuiltinType::OMPArraySection:
   case BuiltinType::OMPArrayShaping:
   case BuiltinType::OMPIterator:
Index: clang/lib/AST/Type.cpp
===================================================================
--- clang/lib/AST/Type.cpp
+++ clang/lib/AST/Type.cpp
@@ -3025,6 +3025,8 @@
     return "queue_t";
   case OCLReserveID:
     return "reserve_id_t";
+  case IncompleteMatrixIdx:
+    return "<incomplete matrix index type>";
   case OMPArraySection:
     return "<OpenMP array section type>";
   case OMPArrayShaping:
@@ -4045,6 +4047,7 @@
 #include "clang/Basic/AArch64SVEACLETypes.def"
     case BuiltinType::BuiltinFn:
     case BuiltinType::NullPtr:
+    case BuiltinType::IncompleteMatrixIdx:
     case BuiltinType::OMPArraySection:
     case BuiltinType::OMPArrayShaping:
     case BuiltinType::OMPIterator:
Index: clang/lib/AST/TextNodeDumper.cpp
===================================================================
--- clang/lib/AST/TextNodeDumper.cpp
+++ clang/lib/AST/TextNodeDumper.cpp
@@ -163,6 +163,9 @@
       case OK_VectorComponent:
         OS << " vectorcomponent";
         break;
+      case OK_MatrixComponent:
+        OS << " matrixcomponent";
+        break;
       }
     }
   }
Index: clang/lib/AST/StmtProfile.cpp
===================================================================
--- clang/lib/AST/StmtProfile.cpp
+++ clang/lib/AST/StmtProfile.cpp
@@ -1208,6 +1208,10 @@
   VisitExpr(S);
 }
 
+void StmtProfiler::VisitMatrixSubscriptExpr(const MatrixSubscriptExpr *S) {
+  VisitExpr(S);
+}
+
 void StmtProfiler::VisitOMPArraySectionExpr(const OMPArraySectionExpr *S) {
   VisitExpr(S);
 }
Index: clang/lib/AST/StmtPrinter.cpp
===================================================================
--- clang/lib/AST/StmtPrinter.cpp
+++ clang/lib/AST/StmtPrinter.cpp
@@ -1337,6 +1337,16 @@
   OS << "]";
 }
 
+void StmtPrinter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *Node) {
+  PrintExpr(Node->getBase());
+  OS << "[";
+  PrintExpr(Node->getRowIdx());
+  OS << "]";
+  OS << "[";
+  PrintExpr(Node->getColumnIdx());
+  OS << "]";
+}
+
 void StmtPrinter::VisitOMPArraySectionExpr(OMPArraySectionExpr *Node) {
   PrintExpr(Node->getBase());
   OS << "[";
Index: clang/lib/AST/NSAPI.cpp
===================================================================
--- clang/lib/AST/NSAPI.cpp
+++ clang/lib/AST/NSAPI.cpp
@@ -482,6 +482,7 @@
   case BuiltinType::Half:
   case BuiltinType::PseudoObject:
   case BuiltinType::BuiltinFn:
+  case BuiltinType::IncompleteMatrixIdx:
   case BuiltinType::OMPArraySection:
   case BuiltinType::OMPArrayShaping:
   case BuiltinType::OMPIterator:
Index: clang/lib/AST/ItaniumMangle.cpp
===================================================================
--- clang/lib/AST/ItaniumMangle.cpp
+++ clang/lib/AST/ItaniumMangle.cpp
@@ -4234,6 +4234,15 @@
     break;
   }
 
+  case Expr::MatrixSubscriptExprClass: {
+    const MatrixSubscriptExpr *ME = cast<MatrixSubscriptExpr>(E);
+    Out << "ixix";
+    mangleExpression(ME->getBase());
+    mangleExpression(ME->getRowIdx());
+    mangleExpression(ME->getColumnIdx());
+    break;
+  }
+
   case Expr::CompoundAssignOperatorClass: // fallthrough
   case Expr::BinaryOperatorClass: {
     const BinaryOperator *BO = cast<BinaryOperator>(E);
Index: clang/lib/AST/ExprConstant.cpp
===================================================================
--- clang/lib/AST/ExprConstant.cpp
+++ clang/lib/AST/ExprConstant.cpp
@@ -14184,6 +14184,7 @@
   case Expr::ImaginaryLiteralClass:
   case Expr::StringLiteralClass:
   case Expr::ArraySubscriptExprClass:
+  case Expr::MatrixSubscriptExprClass:
   case Expr::OMPArraySectionExprClass:
   case Expr::OMPArrayShapingExprClass:
   case Expr::OMPIteratorExprClass:
Index: clang/lib/AST/ExprClassification.cpp
===================================================================
--- clang/lib/AST/ExprClassification.cpp
+++ clang/lib/AST/ExprClassification.cpp
@@ -224,6 +224,10 @@
     }
     return Cl::CL_LValue;
 
+  // Subscripting matrix types behaves like member accesses.
+  case Expr::MatrixSubscriptExprClass:
+    return ClassifyInternal(Ctx, cast<MatrixSubscriptExpr>(E)->getBase());
+
     // C++ [expr.prim.general]p3: The result is an lvalue if the entity is a
     //   function or variable and a prvalue otherwise.
   case Expr::DeclRefExprClass:
Index: clang/lib/AST/Expr.cpp
===================================================================
--- clang/lib/AST/Expr.cpp
+++ clang/lib/AST/Expr.cpp
@@ -3439,6 +3439,7 @@
 
   case ParenExprClass:
   case ArraySubscriptExprClass:
+  case MatrixSubscriptExprClass:
   case OMPArraySectionExprClass:
   case OMPArrayShapingExprClass:
   case OMPIteratorExprClass:
Index: clang/lib/AST/ComputeDependence.cpp
===================================================================
--- clang/lib/AST/ComputeDependence.cpp
+++ clang/lib/AST/ComputeDependence.cpp
@@ -83,6 +83,12 @@
   return E->getLHS()->getDependence() | E->getRHS()->getDependence();
 }
 
+ExprDependence clang::computeDependence(MatrixSubscriptExpr *E) {
+  return E->getBase()->getDependence() | E->getRowIdx()->getDependence() |
+         (E->getColumnIdx() ? E->getColumnIdx()->getDependence()
+                            : ExprDependence::None);
+}
+
 ExprDependence clang::computeDependence(CompoundLiteralExpr *E) {
   return toExprDependence(E->getTypeSourceInfo()->getType()->getDependence()) |
          turnTypeToValueDependence(E->getInitializer()->getDependence());
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -1388,6 +1388,8 @@
     InitBuiltinType(OMPArrayShapingTy, BuiltinType::OMPArrayShaping);
     InitBuiltinType(OMPIteratorTy, BuiltinType::OMPIterator);
   }
+  if (LangOpts.MatrixTypes)
+    InitBuiltinType(IncompleteMatrixIdxTy, BuiltinType::IncompleteMatrixIdx);
 
   // C99 6.2.5p11.
   FloatComplexTy      = getComplexType(FloatTy);
Index: clang/include/clang/Serialization/ASTBitCodes.h
===================================================================
--- clang/include/clang/Serialization/ASTBitCodes.h
+++ clang/include/clang/Serialization/ASTBitCodes.h
@@ -1057,7 +1057,10 @@
       /// The placeholder type for OpenMP iterator expression.
       PREDEF_TYPE_OMP_ITERATOR = 71,
 
-      /// OpenCL image types with auto numeration
+      /// A placeholder type for incomplete matrix index operations.
+      PREDEF_TYPE_INCOMPLETE_MATRIX_IDX = 72,
+
+    /// OpenCL image types with auto numeration
 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
       PREDEF_TYPE_##Id##_ID,
 #include "clang/Basic/OpenCLImageTypes.def"
@@ -1597,6 +1600,9 @@
       /// An ArraySubscriptExpr record.
       EXPR_ARRAY_SUBSCRIPT,
 
+      /// An MatrixSubscriptExpr record.
+      EXPR_MATRIX_SUBSCRIPT,
+
       /// A CallExpr record.
       EXPR_CALL,
 
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -4903,6 +4903,13 @@
                                      Expr *Idx, SourceLocation RLoc);
   ExprResult CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc,
                                              Expr *Idx, SourceLocation RLoc);
+
+  ExprResult ActOnMatrixSubscriptExpr(Scope *S, Expr *Base, Expr *Idx,
+                                      SourceLocation RLoc);
+  ExprResult CreateBuiltinMatrixSubscriptExpr(Expr *Base, Expr *RowIdx,
+                                              Expr *ColumnIdx,
+                                              SourceLocation RBLoc);
+
   ExprResult ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc,
                                       Expr *LowerBound, SourceLocation ColonLoc,
                                       Expr *Length, SourceLocation RBLoc);
Index: clang/include/clang/Sema/Initialization.h
===================================================================
--- clang/include/clang/Sema/Initialization.h
+++ clang/include/clang/Sema/Initialization.h
@@ -999,6 +999,9 @@
     /// Non-const lvalue reference binding to a vector element.
     FK_NonConstLValueReferenceBindingToVectorElement,
 
+    /// Non-const lvalue reference binding to a matrix element.
+    FK_NonConstLValueReferenceBindingToMatrixElement,
+
     /// Non-const lvalue reference binding to an lvalue of unrelated
     /// type.
     FK_NonConstLValueReferenceBindingToUnrelated,
Index: clang/include/clang/Basic/StmtNodes.td
===================================================================
--- clang/include/clang/Basic/StmtNodes.td
+++ clang/include/clang/Basic/StmtNodes.td
@@ -69,6 +69,7 @@
 def OffsetOfExpr : StmtNode<Expr>;
 def UnaryExprOrTypeTraitExpr : StmtNode<Expr>;
 def ArraySubscriptExpr : StmtNode<Expr>;
+def MatrixSubscriptExpr : StmtNode<Expr>;
 def OMPArraySectionExpr : StmtNode<Expr>;
 def OMPIteratorExpr : StmtNode<Expr>;
 def CallExpr : StmtNode<Expr>;
Index: clang/include/clang/Basic/Specifiers.h
===================================================================
--- clang/include/clang/Basic/Specifiers.h
+++ clang/include/clang/Basic/Specifiers.h
@@ -154,7 +154,10 @@
     /// An Objective-C array/dictionary subscripting which reads an
     /// object or writes at the subscripted array/dictionary element via
     /// Objective-C method calls.
-    OK_ObjCSubscript
+    OK_ObjCSubscript,
+
+    /// A matrix component is a single element of a matrix.
+    OK_MatrixComponent
   };
 
   /// The reason why a DeclRefExpr does not constitute an odr-use.
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2032,6 +2032,8 @@
   "bit-field%select{| %1}2">;
 def err_reference_bind_to_vector_element : Error<
   "%select{non-const|volatile}0 reference cannot bind to vector element">;
+def err_reference_bind_to_matrix_element : Error<
+  "%select{non-const|volatile}0 reference cannot bind to matrix element">;
 def err_reference_var_requires_init : Error<
   "declaration of reference variable %0 requires an initializer">;
 def err_reference_without_init : Error<
@@ -6375,7 +6377,7 @@
 def err_static_block_func : Error<
   "function declared in block scope cannot have 'static' storage class">;
 def err_typecheck_address_of : Error<"address of %select{bit-field"
-  "|vector element|property expression|register variable}0 requested">;
+  "|vector element|property expression|register variable|matrix element}0 requested">;
 def ext_typecheck_addrof_void : Extension<
   "ISO C forbids taking the address of an expression of type 'void'">;
 def err_unqualified_pointer_member_function : Error<
@@ -10751,6 +10753,16 @@
 
 def err_builtin_matrix_disabled: Error<
   "matrix types extension is disabled. Pass -fenable-matrix to enable it">;
+def err_matrix_index_not_integer: Error<
+  "matrix %select{row|column}0 index is not an integer">;
+def err_matrix_index_outside_range: Error<
+  "matrix %select{row|column}0 index is outside the allowed range [0, %1)">;
+def err_matrix_incomplete_index: Error<
+  "single subscript expressions are not allowed for matrix values">;
+def err_matrix_separate_incomplete_index: Error<
+  "matrix row and column subscripts cannot be separated by any expression">;
+def err_matrix_subscript_comma: Error<
+  "comma expressions are not allowed as indices in matrix subscript expressions">;
 
 def err_preserve_field_info_not_field : Error<
   "__builtin_preserve_field_info argument %0 not a field access">;
Index: clang/include/clang/AST/Type.h
===================================================================
--- clang/include/clang/AST/Type.h
+++ clang/include/clang/AST/Type.h
@@ -2050,7 +2050,8 @@
   bool isComplexIntegerType() const;            // GCC _Complex integer type.
   bool isVectorType() const;                    // GCC vector type.
   bool isExtVectorType() const;                 // Extended vector type.
-  bool isConstantMatrixType() const;            // Matrix type.
+  bool isMatrixType() const;                    // Matrix type.
+  bool isConstantMatrixType() const;            // Constant matrix type.
   bool isDependentAddressSpaceType() const;     // value-dependent address space qualifier
   bool isObjCObjectPointerType() const;         // pointer to ObjC object
   bool isObjCRetainableType() const;            // ObjC object or block pointer
@@ -6744,6 +6745,10 @@
   return isa<ExtVectorType>(CanonicalType);
 }
 
+inline bool Type::isMatrixType() const {
+  return isa<MatrixType>(CanonicalType);
+}
+
 inline bool Type::isConstantMatrixType() const {
   return isa<ConstantMatrixType>(CanonicalType);
 }
Index: clang/include/clang/AST/Stmt.h
===================================================================
--- clang/include/clang/AST/Stmt.h
+++ clang/include/clang/AST/Stmt.h
@@ -445,8 +445,9 @@
     unsigned IsType : 1; // true if operand is a type, false if an expression.
   };
 
-  class ArraySubscriptExprBitfields {
+  class ArrayOrMatrixSubscriptExprBitfields {
     friend class ArraySubscriptExpr;
+    friend class MatrixSubscriptExpr;
 
     unsigned : NumExprBits;
 
@@ -999,7 +1000,7 @@
     CharacterLiteralBitfields CharacterLiteralBits;
     UnaryOperatorBitfields UnaryOperatorBits;
     UnaryExprOrTypeTraitExprBitfields UnaryExprOrTypeTraitExprBits;
-    ArraySubscriptExprBitfields ArraySubscriptExprBits;
+    ArrayOrMatrixSubscriptExprBitfields ArrayOrMatrixSubscriptExprBits;
     CallExprBitfields CallExprBits;
     MemberExprBitfields MemberExprBits;
     CastExprBitfields CastExprBits;
Index: clang/include/clang/AST/RecursiveASTVisitor.h
===================================================================
--- clang/include/clang/AST/RecursiveASTVisitor.h
+++ clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2588,6 +2588,7 @@
 // over the children.
 DEF_TRAVERSE_STMT(AddrLabelExpr, {})
 DEF_TRAVERSE_STMT(ArraySubscriptExpr, {})
+DEF_TRAVERSE_STMT(MatrixSubscriptExpr, {})
 DEF_TRAVERSE_STMT(OMPArraySectionExpr, {})
 DEF_TRAVERSE_STMT(OMPArrayShapingExpr, {})
 DEF_TRAVERSE_STMT(OMPIteratorExpr, {})
Index: clang/include/clang/AST/Expr.h
===================================================================
--- clang/include/clang/AST/Expr.h
+++ clang/include/clang/AST/Expr.h
@@ -471,6 +471,11 @@
   /// Returns whether this expression refers to a vector element.
   bool refersToVectorElement() const;
 
+  /// Returns whether this expression refers to a matrix element.
+  bool refersToMatrixElement() const {
+    return getObjectKind() == OK_MatrixComponent;
+  }
+
   /// Returns whether this expression refers to a global register
   /// variable.
   bool refersToGlobalRegisterVar() const;
@@ -2584,7 +2589,7 @@
       : Expr(ArraySubscriptExprClass, t, VK, OK) {
     SubExprs[LHS] = lhs;
     SubExprs[RHS] = rhs;
-    ArraySubscriptExprBits.RBracketLoc = rbracketloc;
+    ArrayOrMatrixSubscriptExprBits.RBracketLoc = rbracketloc;
     setDependence(computeDependence(this));
   }
 
@@ -2621,10 +2626,10 @@
   SourceLocation getEndLoc() const { return getRBracketLoc(); }
 
   SourceLocation getRBracketLoc() const {
-    return ArraySubscriptExprBits.RBracketLoc;
+    return ArrayOrMatrixSubscriptExprBits.RBracketLoc;
   }
   void setRBracketLoc(SourceLocation L) {
-    ArraySubscriptExprBits.RBracketLoc = L;
+    ArrayOrMatrixSubscriptExprBits.RBracketLoc = L;
   }
 
   SourceLocation getExprLoc() const LLVM_READONLY {
@@ -2644,6 +2649,84 @@
   }
 };
 
+/// MatrixSubscriptExpr - Matrix subscript expression for the MatrixType
+/// extension.
+/// MatrixSubscriptExpr can be either incomplete (only Base and RowIdx are set
+/// so far, the type is IncompleteMatrixIdx) or complete (Base, RowIdx and
+/// ColumnIdx refer to valid expressions). Incomplete matrix expressions only
+/// exist during the initial construction of the AST.
+class MatrixSubscriptExpr : public Expr {
+  enum { BASE, ROW_IDX, COLUMN_IDX, END_EXPR };
+  Stmt *SubExprs[END_EXPR];
+
+public:
+  MatrixSubscriptExpr(Expr *Base, Expr *RowIdx, Expr *ColumnIdx, QualType T,
+                      SourceLocation RBracketLoc)
+      : Expr(MatrixSubscriptExprClass, T, Base->getValueKind(),
+             OK_MatrixComponent) {
+    SubExprs[BASE] = Base;
+    SubExprs[ROW_IDX] = RowIdx;
+    SubExprs[COLUMN_IDX] = ColumnIdx;
+    ArrayOrMatrixSubscriptExprBits.RBracketLoc = RBracketLoc;
+    setDependence(computeDependence(this));
+  }
+
+  /// Create an empty matrix subscript expression.
+  explicit MatrixSubscriptExpr(EmptyShell Shell)
+      : Expr(MatrixSubscriptExprClass, Shell) {}
+
+  bool isIncomplete() const {
+    bool IsIncomplete = hasPlaceholderType(BuiltinType::IncompleteMatrixIdx);
+    assert((SubExprs[COLUMN_IDX] || IsIncomplete) &&
+           "expressions without column index must be marked as incomplete");
+    return IsIncomplete;
+  }
+  Expr *getBase() { return cast<Expr>(SubExprs[BASE]); }
+  const Expr *getBase() const { return cast<Expr>(SubExprs[BASE]); }
+  void setBase(Expr *E) { SubExprs[BASE] = E; }
+
+  Expr *getRowIdx() { return cast<Expr>(SubExprs[ROW_IDX]); }
+  const Expr *getRowIdx() const { return cast<Expr>(SubExprs[ROW_IDX]); }
+  void setRowIdx(Expr *E) { SubExprs[ROW_IDX] = E; }
+
+  Expr *getColumnIdx() { return cast_or_null<Expr>(SubExprs[COLUMN_IDX]); }
+  const Expr *getColumnIdx() const {
+    assert(!isIncomplete() &&
+           "cannot get the column index of an incomplete expression");
+    return cast<Expr>(SubExprs[COLUMN_IDX]);
+  }
+  void setColumnIdx(Expr *E) { SubExprs[COLUMN_IDX] = E; }
+
+  SourceLocation getBeginLoc() const LLVM_READONLY {
+    return getBase()->getBeginLoc();
+  }
+
+  SourceLocation getEndLoc() const { return getRBracketLoc(); }
+
+  SourceLocation getExprLoc() const LLVM_READONLY {
+    return getBase()->getExprLoc();
+  }
+
+  SourceLocation getRBracketLoc() const {
+    return ArrayOrMatrixSubscriptExprBits.RBracketLoc;
+  }
+  void setRBracketLoc(SourceLocation L) {
+    ArrayOrMatrixSubscriptExprBits.RBracketLoc = L;
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == MatrixSubscriptExprClass;
+  }
+
+  // Iterators
+  child_range children() {
+    return child_range(&SubExprs[0], &SubExprs[0] + END_EXPR);
+  }
+  const_child_range children() const {
+    return const_child_range(&SubExprs[0], &SubExprs[0] + END_EXPR);
+  }
+};
+
 /// CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
 /// CallExpr itself represents a normal function call, e.g., "f(x, 2)",
 /// while its subclasses may represent alternative syntax that (semantically)
Index: clang/include/clang/AST/ComputeDependence.h
===================================================================
--- clang/include/clang/AST/ComputeDependence.h
+++ clang/include/clang/AST/ComputeDependence.h
@@ -28,6 +28,7 @@
 class UnaryOperator;
 class UnaryExprOrTypeTraitExpr;
 class ArraySubscriptExpr;
+class MatrixSubscriptExpr;
 class CompoundLiteralExpr;
 class CastExpr;
 class BinaryOperator;
@@ -108,6 +109,7 @@
 ExprDependence computeDependence(UnaryOperator *E);
 ExprDependence computeDependence(UnaryExprOrTypeTraitExpr *E);
 ExprDependence computeDependence(ArraySubscriptExpr *E);
+ExprDependence computeDependence(MatrixSubscriptExpr *E);
 ExprDependence computeDependence(CompoundLiteralExpr *E);
 ExprDependence computeDependence(CastExpr *E);
 ExprDependence computeDependence(BinaryOperator *E);
Index: clang/include/clang/AST/BuiltinTypes.def
===================================================================
--- clang/include/clang/AST/BuiltinTypes.def
+++ clang/include/clang/AST/BuiltinTypes.def
@@ -310,6 +310,9 @@
 // context.
 PLACEHOLDER_TYPE(ARCUnbridgedCast, ARCUnbridgedCastTy)
 
+// A placeholder type for incomplete matrix index expressions.
+PLACEHOLDER_TYPE(IncompleteMatrixIdx, IncompleteMatrixIdxTy)
+
 // A placeholder type for OpenMP array sections.
 PLACEHOLDER_TYPE(OMPArraySection, OMPArraySectionTy)
 
Index: clang/include/clang/AST/ASTContext.h
===================================================================
--- clang/include/clang/AST/ASTContext.h
+++ clang/include/clang/AST/ASTContext.h
@@ -978,6 +978,7 @@
 #include "clang/Basic/OpenCLImageTypes.def"
   CanQualType OCLSamplerTy, OCLEventTy, OCLClkEventTy;
   CanQualType OCLQueueTy, OCLReserveIDTy;
+  CanQualType IncompleteMatrixIdxTy;
   CanQualType OMPArraySectionTy, OMPArrayShapingTy, OMPIteratorTy;
 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
   CanQualType Id##Ty;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D76791: [Matrix] Implement matrix index expressions ([][]).

Reply via email to