[clang] [llvm] [Clang] Allow vector and matrix type attributes for sub-byte _BitInt (PR #140253)

Dmitry Sidorov via cfe-commits Mon, 19 May 2025 06:18:35 -0700

https://github.com/MrSidims updated 
https://github.com/llvm/llvm-project/pull/140253


>From 67935acd84b032209d45bad22db8e1a9e72c1dcd Mon Sep 17 00:00:00 2001
From: "Sidorov, Dmitry" <dmitry.sido...@intel.com>
Date: Fri, 16 May 2025 06:55:10 -0700
Subject: [PATCH 1/2] [Clang] Allow vector and matrix type attributes for
 sub-byte _BitInt

Signed-off-by: Sidorov, Dmitry <dmitry.sido...@intel.com>
---
 clang/include/clang/Basic/DiagnosticSemaKinds.td |  3 +--
 clang/lib/Sema/SemaType.cpp                      |  4 ++--
 clang/test/SemaCXX/ext-int.cpp                   | 12 ++----------
 clang/test/SemaCXX/matrix-type.cpp               |  3 +--
 4 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f0bd5a1174020..9f20c07882901 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3257,8 +3257,7 @@ def err_attribute_too_few_arguments : Error<
   "%0 attribute takes at least %1 argument%s1">;
 def err_attribute_invalid_vector_type : Error<"invalid vector element type 
%0">;
 def err_attribute_invalid_bitint_vector_type : Error<
-  "'_BitInt' %select{vector|matrix}0 element width must be %select{a power of 
2|"
-  "at least as wide as 'CHAR_BIT'}1">;
+  "'_BitInt' %select{vector|matrix}0 element width must be a power of 2">;
 def err_attribute_invalid_matrix_type : Error<"invalid matrix element type 
%0">;
 def err_attribute_bad_neon_vector_size : Error<
   "Neon vector size must be 64 or 128 bits">;
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 9ed2326f151a3..28d441234262b 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -2321,9 +2321,9 @@ static bool CheckBitIntElementType(Sema &S, 
SourceLocation AttrLoc,
                                    bool ForMatrixType = false) {
   // Only support _BitInt elements with byte-sized power of 2 NumBits.
   unsigned NumBits = BIT->getNumBits();
-  if (!llvm::isPowerOf2_32(NumBits) || NumBits < 8)
+  if (!llvm::isPowerOf2_32(NumBits))
     return S.Diag(AttrLoc, diag::err_attribute_invalid_bitint_vector_type)
-           << ForMatrixType << (NumBits < 8);
+           << ForMatrixType;
   return false;
 }
 
diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp
index d974221e774a7..5c566dafed931 100644
--- a/clang/test/SemaCXX/ext-int.cpp
+++ b/clang/test/SemaCXX/ext-int.cpp
@@ -84,17 +84,9 @@ struct is_same<T,T> {
 };
 
 // Reject vector types:
-// expected-error@+1{{'_BitInt' vector element width must be at least as wide 
as 'CHAR_BIT'}}
-typedef _BitInt(2) __attribute__((vector_size(16))) VecTy;
-// expected-error@+1{{'_BitInt' vector element width must be at least as wide 
as 'CHAR_BIT'}}
-typedef _BitInt(2) __attribute__((ext_vector_type(32))) OtherVecTy;
-// expected-error@+1{{'_BitInt' vector element width must be at least as wide 
as 'CHAR_BIT'}}
-typedef _BitInt(4) __attribute__((vector_size(16))) VecTy2;
-// expected-error@+1{{'_BitInt' vector element width must be at least as wide 
as 'CHAR_BIT'}}
-typedef _BitInt(4) __attribute__((ext_vector_type(32))) OtherVecTy2;
-// expected-error@+1{{'_BitInt' vector element width must be at least as wide 
as 'CHAR_BIT'}}
+// expected-error@+1{{'_BitInt' vector element width must be a power of 2}}
 typedef _BitInt(5) __attribute__((vector_size(16))) VecTy3;
-// expected-error@+1{{'_BitInt' vector element width must be at least as wide 
as 'CHAR_BIT'}}
+// expected-error@+1{{'_BitInt' vector element width must be a power of 2}}
 typedef _BitInt(5) __attribute__((ext_vector_type(32))) OtherVecTy3;
 // expected-error@+1{{'_BitInt' vector element width must be a power of 2}}
 typedef _BitInt(37) __attribute__((vector_size(16))) VecTy4;
diff --git a/clang/test/SemaCXX/matrix-type.cpp 
b/clang/test/SemaCXX/matrix-type.cpp
index bb7a8421ca9e3..186d3b6b35208 100644
--- a/clang/test/SemaCXX/matrix-type.cpp
+++ b/clang/test/SemaCXX/matrix-type.cpp
@@ -31,8 +31,7 @@ void matrix_unsupported_element_type() {
 }
 
 void matrix_unsupported_bit_int() {
-  using m1 = _BitInt(2) __attribute__((matrix_type(4, 4))); // 
expected-error{{'_BitInt' matrix element width must be at least as wide as 
'CHAR_BIT'}}
-  using m2 = _BitInt(7) __attribute__((matrix_type(4, 4))); // 
expected-error{{'_BitInt' matrix element width must be at least as wide as 
'CHAR_BIT'}}
+  using m2 = _BitInt(7) __attribute__((matrix_type(4, 4))); // 
expected-error{{'_BitInt' matrix element width must be a power of 2}}
   using m3 = _BitInt(9) __attribute__((matrix_type(4, 4))); // 
expected-error{{'_BitInt' matrix element width must be a power of 2}}
   using m4 = _BitInt(12) __attribute__((matrix_type(4, 4))); // 
expected-error{{'_BitInt' matrix element width must be a power of 2}}
   using m5 = _BitInt(8) __attribute__((matrix_type(4, 4)));

>From 513b17b19ed9c416e48fe8cf590081ccc0cb2f91 Mon Sep 17 00:00:00 2001
From: "Sidorov, Dmitry" <dmitry.sido...@intel.com>
Date: Mon, 19 May 2025 06:17:59 -0700
Subject: [PATCH 2/2] Add tests

Signed-off-by: Sidorov, Dmitry <dmitry.sido...@intel.com>
---
 clang/test/CodeGenCXX/ext-int.cpp             | 35 ++++++++++++++
 .../test/CodeGenCXX/matrix-vector-bit-int.cpp | 48 +++++++++++++++++--
 llvm/test/CodeGen/X86/vec-2bit-int.ll         | 34 +++++++++++++
 3 files changed, 114 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/vec-2bit-int.ll

diff --git a/clang/test/CodeGenCXX/ext-int.cpp 
b/clang/test/CodeGenCXX/ext-int.cpp
index f470398ec2095..a75b3701e36ef 100644
--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -571,6 +571,41 @@ void VectorTest(uint16_t4 first, uint16_t4 second) {
   // CHECK: %[[ADD:.+]] = add <3 x i16> %[[Shuffle]], %[[Shuffle1]]
 }
 
+typedef unsigned _BitInt(4) uint4_t4 __attribute__((ext_vector_type(4)));
+void VectorTest(uint4_t4 first, uint4_t4 second) {
+  // LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i32 %{{.+}}, i32 
%{{.+}})
+  // LIN32: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(<4 x i4> %{{.+}}, <4 
x i4> %{{.+}})
+  // WIN64: define dso_local void 
@"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x 
i4> %{{.+}}, <4 x i4> %{{.+}})
+  // WIN32: define dso_local void 
@"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x 
i4> inreg %{{.+}}, <4 x i4> inreg %{{.+}})
+  first.xzw + second.zwx;
+  // CHECK: %[[Shuffle:.+]] = shufflevector <4 x i4> %{{.+}}, <4 x i4> poison, 
<3 x i32> <i32 0, i32 2, i32 3>
+  // CHECK: %[[Shuffle1:.+]] = shufflevector <4 x i4> %{{.+}}, <4 x i4> 
poison, <3 x i32> <i32 2, i32 3, i32 0>
+  // CHECK: %[[ADD:.+]] = add <3 x i4> %[[Shuffle]], %[[Shuffle1]]
+}
+
+typedef unsigned _BitInt(2) uint2_t2 __attribute__((ext_vector_type(2)));
+uint2_t2 TestBitIntVector2x2Alloca(uint2_t2 v1, uint2_t2 v2) {
+  // LIN64: define dso_local i16 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i16 
%[[V1Coerce:.+]], i16 %[[V2Coerce:.+]])
+  // LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 2
+  // LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
+  // LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
+  // LIN64: %[[RetValCoerce:.+]] = alloca i16, align 2
+  // LIN64: call void @llvm.memcpy.p0.p0.i64(ptr align 2 %[[RetValCoerce]], 
ptr align 2 %[[RetVal]], i64 1, i1 false)
+  // LIN64: %[[Ret:.+]] = load i16, ptr %[[RetValCoerce]], align 2
+  // LIN64: ret i16 %[[Ret]]
+
+  // LIN32: define dso_local <2 x i2> 
@_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(<2 x i2> %{{.+}}, <2 x i2> %{{.+}})
+  // LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
+  // LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
+  // LIN32: ret <2 x i2> %[[Ret:.+]]
+
+  // WIN: define dso_local <2 x i2> 
@"?TestBitIntVector2x2Alloca@@YAT?$__vector@U?$_UBitInt@$01@__clang@@$01@__clang@@T12@0@Z"(<2
 x i2>{{.*}}, <2 x i2>{{.*}})
+  // WIN: %[[V1:.+]] = alloca <2 x i2>, align 2
+  // WIN: %[[V2:.+]] = alloca <2 x i2>, align 2
+  // WIN: ret <2 x i2> %[[Ret:.+]]
+  return v1 + v2;
+}
+
 // Ensure that these types don't alias the normal int types.
 void TBAATest(_BitInt(sizeof(int) * 8) ExtInt,
               unsigned _BitInt(sizeof(int) * 8) ExtUInt,
diff --git a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp 
b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
index 7dc3b6bd59822..2e7531b334ecb 100644
--- a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
+++ b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
@@ -7,6 +7,8 @@ using i32x3 = _BitInt(32) __attribute__((ext_vector_type(3)));
 using i32x3x3 = _BitInt(32) __attribute__((matrix_type(3, 3)));
 using i512x3 = _BitInt(512) __attribute__((ext_vector_type(3)));
 using i512x3x3 = _BitInt(512) __attribute__((matrix_type(3, 3)));
+using i4x3 = _BitInt(4) __attribute__((ext_vector_type(3)));
+using i4x3x3 = _BitInt(4) __attribute__((matrix_type(3, 3)));
 
 // CHECK-LABEL: define dso_local i32 @_Z2v1Dv3_DB8_(
 // CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -68,8 +70,34 @@ i512x3 v3(i512x3 a) {
   return a + a;
 }
 
+// CHECK-LABEL: define dso_local i32 @_Z2v4Dv3_DB4_(
+// CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <3 x i4>, align 4
+// CHECK-NEXT:    [[A:%.*]] = alloca <3 x i4>, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <3 x i4>, align 4
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A_COERCE]], ptr [[A]], align 4
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
+// CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> 
poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> 
poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], 
<4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[EXTRACTVEC5:%.*]] = shufflevector <4 x i4> [[LOADVECN4]], 
<4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[ADD:%.*]] = add <3 x i4> [[EXTRACTVEC3]], [[EXTRACTVEC5]]
+// CHECK-NEXT:    store <3 x i4> [[ADD]], ptr [[RETVAL]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 
[[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 2, i1 false)
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[RETVAL_COERCE]], align 4
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+i4x3 v4(i4x3 a) {
+  return a + a;
+}
+
 // CHECK-LABEL: define dso_local noundef <9 x i8> 
@_Z2m1u11matrix_typeILm3ELm3EDB8_E(
-// CHECK-SAME: <9 x i8> noundef [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK-SAME: <9 x i8> noundef [[A:%.*]]) #[[ATTR4:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [9 x i8], align 1
 // CHECK-NEXT:    store <9 x i8> [[A]], ptr [[A_ADDR]], align 1
@@ -83,7 +111,7 @@ i8x3x3 m1(i8x3x3 a) {
 }
 
 // CHECK-LABEL: define dso_local noundef <9 x i32> 
@_Z2m2u11matrix_typeILm3ELm3EDB32_E(
-// CHECK-SAME: <9 x i32> noundef [[A:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK-SAME: <9 x i32> noundef [[A:%.*]]) #[[ATTR5:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [9 x i32], align 4
 // CHECK-NEXT:    store <9 x i32> [[A]], ptr [[A_ADDR]], align 4
@@ -97,7 +125,7 @@ i32x3x3 m2(i32x3x3 a) {
 }
 
 // CHECK-LABEL: define dso_local noundef <9 x i512> 
@_Z2m3u11matrix_typeILm3ELm3EDB512_E(
-// CHECK-SAME: <9 x i512> noundef [[A:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK-SAME: <9 x i512> noundef [[A:%.*]]) #[[ATTR6:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [9 x i512], align 8
 // CHECK-NEXT:    store <9 x i512> [[A]], ptr [[A_ADDR]], align 8
@@ -109,3 +137,17 @@ i32x3x3 m2(i32x3x3 a) {
 i512x3x3 m3(i512x3x3 a) {
   return a + a;
 }
+
+// CHECK-LABEL: define dso_local noundef <9 x i4> 
@_Z2m4u11matrix_typeILm3ELm3EDB4_E(
+// CHECK-SAME: <9 x i4> noundef [[A:%.*]]) #[[ATTR7:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [9 x i4], align 1
+// CHECK-NEXT:    store <9 x i4> [[A]], ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load <9 x i4>, ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = load <9 x i4>, ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = add <9 x i4> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    ret <9 x i4> [[TMP2]]
+//
+i4x3x3 m4(i4x3x3 a) {
+  return a + a;
+}
diff --git a/llvm/test/CodeGen/X86/vec-2bit-int.ll 
b/llvm/test/CodeGen/X86/vec-2bit-int.ll
new file mode 100644
index 0000000000000..2e172fd45d33b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-2bit-int.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
+
+define dso_local <2 x i2> @foo(<2 x i2> %v1, <2 x i2> %v2) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %xmm1, %rax
+; CHECK-NEXT:    andb $3, %al
+; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
+; CHECK-NEXT:    movq %xmm2, %rcx
+; CHECK-NEXT:    shlb $2, %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    andb $15, %cl
+; CHECK-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq %xmm0, %rax
+; CHECK-NEXT:    andb $3, %al
+; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
+; CHECK-NEXT:    movq %xmm2, %rcx
+; CHECK-NEXT:    shlb $2, %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    andb $15, %cl
+; CHECK-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    paddq %xmm1, %xmm0
+; CHECK-NEXT:    retq
+ entry:
+   %v2.addr = alloca <2 x i2>, align 2
+   %v1.addr = alloca <2 x i2>, align 2
+   store <2 x i2> %v2, ptr %v2.addr, align 2
+   store <2 x i2> %v1, ptr %v1.addr, align 2
+   %0 = load <2 x i2>, ptr %v1.addr, align 2
+   %1 = load <2 x i2>, ptr %v2.addr, align 2
+   %add = add <2 x i2> %0, %1
+   ret <2 x i2> %add
+}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [Clang] Allow vector and matrix type attributes for sub-byte _BitInt (PR #140253)

Reply via email to