[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-10-18 Thread Xiang Li via Phabricator via cfe-commits
python3kgae updated this revision to Diff 468617.
python3kgae added a comment.

Switch back to short and disable integer promote for hlsl.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

Files:
  clang/lib/Sema/SemaExpr.cpp
  clang/test/CodeGenHLSL/basic_types.hlsl
  clang/test/CodeGenHLSL/builtins/abs.hlsl
  clang/test/CodeGenHLSL/int16_t_add.hlsl
  clang/test/SemaHLSL/BitInt128.hlsl


Index: clang/test/SemaHLSL/BitInt128.hlsl
===
--- /dev/null
+++ clang/test/SemaHLSL/BitInt128.hlsl
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -ast-dump -o 
- %s -verify
+
+// expected-error@+1 {{_BitInt is not supported on this target}}
+_BitInt(128) i128;
+
+// expected-error@+1 {{_BitInt is not supported on this target}}
+unsigned _BitInt(128) u128;
Index: clang/test/CodeGenHLSL/int16_t_add.hlsl
===
--- /dev/null
+++ clang/test/CodeGenHLSL/int16_t_add.hlsl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -D__HLSL_ENABLE_16_BIT \
+// RUN:   -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
+
+// Make sure generate i16 add.
+// CHECK: add nsw i16 %
+int16_t add(int16_t a, int16_t b) {
+  return a + b;
+}
+// CHECK: define noundef <2 x i16> @
+// CHECK: add <2 x i16>
+int16_t2 add(int16_t2 a, int16_t2 b) {
+  return a + b;
+}
+// CHECK: define noundef <3 x i16> @
+// CHECK: add <3 x i16>
+int16_t3 add(int16_t3 a, int16_t3 b) {
+  return a + b;
+}
+// CHECK: define noundef <4 x i16> @
+// CHECK: add <4 x i16>
+int16_t4 add(int16_t4 a, int16_t4 b) {
+  return a + b;
+}
Index: clang/test/CodeGenHLSL/builtins/abs.hlsl
===
--- clang/test/CodeGenHLSL/builtins/abs.hlsl
+++ clang/test/CodeGenHLSL/builtins/abs.hlsl
@@ -7,8 +7,7 @@
 
 
 // CHECK: define noundef signext i16 @
-// FIXME: int16_t is promoted to i32 now. Change to abs.i16 once it is fixed.
-// CHECK: call i32 @llvm.abs.i32(
+// CHECK: call i16 @llvm.abs.i16(
 int16_t test_abs_int16_t ( int16_t p0 ) {
   return abs ( p0 );
 }
Index: clang/test/CodeGenHLSL/basic_types.hlsl
===
--- clang/test/CodeGenHLSL/basic_types.hlsl
+++ clang/test/CodeGenHLSL/basic_types.hlsl
@@ -4,7 +4,7 @@
 
 
 // CHECK:"?uint16_t_Val@@3GA" = global i16 0, align 2
-// CHECK:"?int16_t_Val@@3FA" = global i16 0, align 2
+// CHECK:"?int16_t_Val@@3FA" = global i16 0
 // CHECK:"?uint_Val@@3IA" = global i32 0, align 4
 // CHECK:"?uint64_t_Val@@3KA" = global i64 0, align 8
 // CHECK:"?int64_t_Val@@3JA" = global i64 0, align 8
Index: clang/lib/Sema/SemaExpr.cpp
===
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -837,7 +837,9 @@
   E = ImpCastExprToType(E, PTy, CK_IntegralCast).get();
   return E;
 }
-if (Ty->isPromotableIntegerType()) {
+if (Ty->isPromotableIntegerType() &&
+// Avoid promote integer type to int.
+!getLangOpts().HLSL) {
   QualType PT = Context.getPromotedIntegerType(Ty);
   E = ImpCastExprToType(E, PT, CK_IntegralCast).get();
   return E;


Index: clang/test/SemaHLSL/BitInt128.hlsl
===
--- /dev/null
+++ clang/test/SemaHLSL/BitInt128.hlsl
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -ast-dump -o - %s -verify
+
+// expected-error@+1 {{_BitInt is not supported on this target}}
+_BitInt(128) i128;
+
+// expected-error@+1 {{_BitInt is not supported on this target}}
+unsigned _BitInt(128) u128;
Index: clang/test/CodeGenHLSL/int16_t_add.hlsl
===
--- /dev/null
+++ clang/test/CodeGenHLSL/int16_t_add.hlsl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -D__HLSL_ENABLE_16_BIT \
+// RUN:   -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
+
+// Make sure generate i16 add.
+// CHECK: add nsw i16 %
+int16_t add(int16_t a, int16_t b) {
+  return a + b;
+}
+// CHECK: define noundef <2 x i16> @
+// CHECK: add <2 x i16>
+int16_t2 add(int16_t2 a, int16_t2 b) {
+  return a + b;
+}
+// CHECK: define noundef <3 x i16> @
+// CHECK: add <3 x i16>
+int16_t3 add(int16_t3 a, int16_t3 b) {
+  return a + b;
+}
+// CHECK: define noundef <4 x i16> @
+// CHECK: add <4 x i16>
+int16_t4 add(int16_t4 a, int16_t4 b) {
+  return a + b;
+}
Index: clang/test/CodeGenHLSL/builtins/abs.hlsl
===
--- clang/test/CodeGenHLSL/builtins/abs.hlsl
+++ clang/test/CodeGenHLSL/builtins/abs.hlsl
@@ -7,8 +7,7 @@
 
 
 // CHECK: define noundef 

[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-10-10 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

If you have `char`, would you want it to promote?  Because turning `char` to 
`_BitInt(8)` is breaking with C on other grounds (like aliasing), for better or 
worse.  So if you just don't want promotion, maybe you really should just 
disable promotion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-10-10 Thread Chris Bieneman via Phabricator via cfe-commits
beanz added a comment.

In D133668#3847871 , @rjmccall wrote:

> But that's purely on the implementation level, right?  Everything is 
> implicitly vectorized and you're just specifying the computation of a single 
> lane, but as far as that lane-wise computation is concerned, you just have 
> expressions which produce scalar values.

Yes, with the caveat that the language doesn't dictate the maximum SIMD width, 
but some features have minimum widths. The language source (and IR) operate on 
one lane of scalar and vector values, but we do have cross-SIMD lane 
operations, and true scalar (uniform) values, so we have to model the full 
breadth of parallel fun...

> If you don't otherwise have 16-bit (or 8-bit?) types, and it's the type 
> behavior you want, I'm fine with you just using `_BitInt`.  I just want to 
> make sure I understand the situation well enough to feel confident that 
> you've considered the alternatives.

We don't currently have 8-bit types (although I fully expect someone will want 
them because ML seems to love small data types). I suspect that the promoting 
integer behaviors for types smaller than int will likely never make sense for 
HLSL (or really any SPMD/implicit-SIMD) programming model).

My inclination is that we should define all our smaller than `int` fixed-size 
integer types as `_BitInt` to opt-out of promotion. Along with that I expect 
that we'll disable `short` under HLSL. We will still have `char`, but the 
intent for `char` is really for the _extremely_ limited cases where strings get 
used (i.e. `printf` for debugging).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-10-10 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In D133668#3847807 , @beanz wrote:

> In D133668#3847163 , @rjmccall 
> wrote:
>
>> Sure, but it's extremely easy to unpromote that arithmetic for most 
>> operators, and I'm sure LLVM already has a pass that will do that.
>
> Okay... but HLSL explicitly doesn't promote. Having the compiler rely on an 
> optimization to generate correct code is undesirable. Especially since we 
> want debug builds to behave correctly.
>
>> Alternatively, if you're worried about your ability to unpromote, and since 
>> you're breaking strict conformance anyway, have you considered just removing 
>> the initial promotion to `int` from the usual arithmetic conversions?  I'm 
>> pretty sure the rest of the rules hang together just fine if you do.  Then 
>> you have a uniform rule that permits easier vectorization for all small 
>> integer types rather than being sensitive specifically to using the 
>> `int16_t` typedef.
>
> HLSL isn't conformant to C or C++. One of the big areas that things get wonky 
> is that every `int` is actually an implicit SIMD vector of a 
> hardware-determined size.

But that's purely on the implementation level, right?  Everything is implicitly 
vectorized and you're just specifying the computation of a single lane, but as 
far as that lane-wise computation is concerned, you just have expressions which 
produce scalar values.

> We could disable promotion for HLSL. That is what we do in DXC. Part of what 
> made me think `BitInt` was a better solution is that HLSL doesn't have the 
> `short` keyword at all. The only 16-bit int types we support are the 
> `[u]int16_t` explicit-sized typedefs. If you think disabling promotion under 
> the HLSL language mode is a better approach, we can do that instead.

If you don't otherwise have 16-bit (or 8-bit?) types, and it's the type 
behavior you want, I'm fine with you just using `_BitInt`.  I just want to make 
sure I understand the situation well enough to feel confident that you've 
considered the alternatives.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-10-10 Thread Chris Bieneman via Phabricator via cfe-commits
beanz added a comment.

In D133668#3847163 , @rjmccall wrote:

> Sure, but it's extremely easy to unpromote that arithmetic for most 
> operators, and I'm sure LLVM already has a pass that will do that.

Okay... but HLSL explicitly doesn't promote. Having the compiler rely on an 
optimization to generate correct code is undesirable. Especially since we want 
debug builds to behave correctly.

> Alternatively, if you're worried about your ability to unpromote, and since 
> you're breaking strict conformance anyway, have you considered just removing 
> the initial promotion to `int` from the usual arithmetic conversions?  I'm 
> pretty sure the rest of the rules hang together just fine if you do.  Then 
> you have a uniform rule that permits easier vectorization for all small 
> integer types rather than being sensitive specifically to using the `int16_t` 
> typedef.

HLSL isn't conformant to C or C++. One of the big areas that things get wonky 
is that every `int` is actually an implicit SIMD vector of a 
hardware-determined size.

We could disable promotion for HLSL. That is what we do in DXC. Part of what 
made me think `BitInt` was a better solution is that HLSL doesn't have the 
`short` keyword at all. The only 16-bit int types we support are the 
`[u]int16_t` explicit-sized typedefs. If you think disabling promotion under 
the HLSL language mode is a better approach, we can do that instead.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-10-10 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

Sure, but it's extremely easy to unpromote that arithmetic for most operators, 
and I'm sure LLVM already has a pass that will do that.  The only thing that 
blocks unpromotion is when you feed a promoted result into something that's 
sensitive to working on a wider value, like comparison or division, or of 
course an assignment to a wider type.

Alternatively, if you're worried about your ability to unpromote, and since 
you're breaking strict conformance anyway, have you considered just removing 
the initial promotion to `int` from the usual arithmetic conversions?  I'm 
pretty sure the rest of the rules hang together just fine if you do.  Then you 
have a uniform rule that permits easier vectorization for all small integer 
types rather than being sensitive specifically to using the `int16_t` typedef.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-10-09 Thread Chris Bieneman via Phabricator via cfe-commits
beanz added a comment.

Avoiding argument promotion is one part of what we need, but not all of it. For 
example if you take this trial code:

  const RWBuffer In;
  RWBuffer Out;
  
  [numthreads(1,1,1)]
  void main(uint GI : SV_GroupIndex) {
Out[GI] = In[GI].x + In[GI].y;
  }

Following C rules, clang promotes the `short` math to `int` math, so the IR for 
`main` looks like:

  ; Function Attrs: noinline norecurse nounwind optnone
  define internal void @"?main@@YAXI@Z"(i32 noundef %GI) #2 {
  entry:
%GI.addr = alloca i32, align 4
store i32 %GI, ptr %GI.addr, align 4
%0 = load i32, ptr %GI.addr, align 4
%call = call noundef nonnull align 4 dereferenceable(4) ptr 
@"??A?$RWBuffer@T?$__vector@F$01@__clang@@@hlsl@@QBAAAT?$__vector@F$01@__clang@@I@Z"(ptr
 noundef nonnull align 4 dereferenceable(4) @In, i32 noundef %0)
%1 = load <2 x i16>, ptr %call, align 4
%2 = extractelement <2 x i16> %1, i32 0
%conv = sext i16 %2 to i32
%3 = load i32, ptr %GI.addr, align 4
%call1 = call noundef nonnull align 4 dereferenceable(4) ptr 
@"??A?$RWBuffer@T?$__vector@F$01@__clang@@@hlsl@@QBAAAT?$__vector@F$01@__clang@@I@Z"(ptr
 noundef nonnull align 4 dereferenceable(4) @In, i32 noundef %3)
%4 = load <2 x i16>, ptr %call1, align 4
%5 = extractelement <2 x i16> %4, i32 1
%conv2 = sext i16 %5 to i32
%add = add nsw i32 %conv, %conv2
%conv3 = trunc i32 %add to i16
%6 = load i32, ptr %GI.addr, align 4
%call4 = call noundef nonnull align 2 dereferenceable(2) ptr 
@"??A?$RWBuffer@F@hlsl@@QFI@Z"(ptr noundef nonnull align 4 
dereferenceable(4) @"?Out@@3V?$RWBuffer@F@hlsl@@A", i32 noundef %6)
store i16 %conv3, ptr %call4, align 2
ret void
  }

Because of the implicit vector nature of HLSL, these promotions and truncations 
would be extremely expensive. Using `_BitInt` allows us a language-header only 
solution that opts HLSL's `[u]int16_t` out of `Sema::UsualUnaryConversions`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-10-09 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

If your goal is just to pass 16-bit types in some specific way without 
promotion, you can just do that in the ABI.  C only requires `short` arguments 
to be promoted to `int` in variadic or unprototyped positions, and it's not 
legal under the C type compatibility rules to call a function prototyped with a 
`short` parameter using an unprototyped function type.  (Not that I expect that 
you care deeply about supporting unprototyped function calls in HLSL anyway.)  
So if you're looking an existing targets and feeling constrained by their ABI 
decision to promote small integer types, be aware that you can simply decide 
not to do that; those targets are mostly constrained by very old decisions that 
were informed by the state of languages and libraries a long time, and those 
decisions do not need to be carried forward to new targets.

But if you're sure you want to do this this way, your tests look fine.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-10-07 Thread Xiang Li via Phabricator via cfe-commits
python3kgae added a comment.

Hi, @rjmccall and @efriedma,
Could you take a look at this PR?
Or should I find someone else to review it?

Thanks
Xiang


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-28 Thread Xiang Li via Phabricator via cfe-commits
python3kgae added a comment.

Gentle ping.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-19 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added a comment.

LGTM, but please wait a bit to land for the codegen reviewers to weigh in on 
those tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-19 Thread Xiang Li via Phabricator via cfe-commits
python3kgae updated this revision to Diff 461308.
python3kgae added a comment.

Limit max bitint width to 64 for HLSL.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

Files:
  clang/lib/Basic/Targets/DirectX.h
  clang/lib/Headers/hlsl/hlsl_basic_types.h
  clang/test/CodeGenHLSL/basic_types.hlsl
  clang/test/CodeGenHLSL/builtins/abs.hlsl
  clang/test/CodeGenHLSL/int16_t_add.hlsl
  clang/test/SemaHLSL/BitInt128.hlsl

Index: clang/test/SemaHLSL/BitInt128.hlsl
===
--- /dev/null
+++ clang/test/SemaHLSL/BitInt128.hlsl
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -ast-dump -o - %s -verify
+
+// expected-error@+1 {{signed _BitInt of bit sizes greater than 64 not supported}}
+_BitInt(128) i128;
+
+// expected-error@+1 {{unsigned _BitInt of bit sizes greater than 64 not supported}}
+unsigned _BitInt(128) u128;
Index: clang/test/CodeGenHLSL/int16_t_add.hlsl
===
--- /dev/null
+++ clang/test/CodeGenHLSL/int16_t_add.hlsl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -D__HLSL_ENABLE_16_BIT \
+// RUN:   -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
+
+// Make sure generate i16 add.
+// CHECK: add nsw i16 %
+int16_t add(int16_t a, int16_t b) {
+  return a + b;
+}
+// CHECK: define noundef <2 x i16> @
+// CHECK: add <2 x i16>
+int16_t2 add(int16_t2 a, int16_t2 b) {
+  return a + b;
+}
+// CHECK: define noundef <3 x i16> @
+// CHECK: add <3 x i16>
+int16_t3 add(int16_t3 a, int16_t3 b) {
+  return a + b;
+}
+// CHECK: define noundef <4 x i16> @
+// CHECK: add <4 x i16>
+int16_t4 add(int16_t4 a, int16_t4 b) {
+  return a + b;
+}
Index: clang/test/CodeGenHLSL/builtins/abs.hlsl
===
--- clang/test/CodeGenHLSL/builtins/abs.hlsl
+++ clang/test/CodeGenHLSL/builtins/abs.hlsl
@@ -7,8 +7,7 @@
 
 
 // CHECK: define noundef signext i16 @
-// FIXME: int16_t is promoted to i32 now. Change to abs.i16 once it is fixed.
-// CHECK: call i32 @llvm.abs.i32(
+// CHECK: call i16 @llvm.abs.i16(
 int16_t test_abs_int16_t ( int16_t p0 ) {
   return abs ( p0 );
 }
Index: clang/test/CodeGenHLSL/basic_types.hlsl
===
--- clang/test/CodeGenHLSL/basic_types.hlsl
+++ clang/test/CodeGenHLSL/basic_types.hlsl
@@ -3,17 +3,17 @@
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
 
-// CHECK:"?uint16_t_Val@@3GA" = global i16 0, align 2
-// CHECK:"?int16_t_Val@@3FA" = global i16 0, align 2
+// CHECK:"?uint16_t_Val@@3U?$_UBitInt@$0BA@@__clang@@A" = global i16 0, align 2
+// CHECK:"?int16_t_Val@@3U?$_BitInt@$0BA@@__clang@@A" = global i16 0, align 2
 // CHECK:"?uint_Val@@3IA" = global i32 0, align 4
 // CHECK:"?uint64_t_Val@@3KA" = global i64 0, align 8
 // CHECK:"?int64_t_Val@@3JA" = global i64 0, align 8
-// CHECK:"?int16_t2_Val@@3T?$__vector@F$01@__clang@@A" = global <2 x i16> zeroinitializer, align 4
-// CHECK:"?int16_t3_Val@@3T?$__vector@F$02@__clang@@A" = global <3 x i16> zeroinitializer, align 8
-// CHECK:"?int16_t4_Val@@3T?$__vector@F$03@__clang@@A" = global <4 x i16> zeroinitializer, align 8
-// CHECK:"?uint16_t2_Val@@3T?$__vector@G$01@__clang@@A" = global <2 x i16> zeroinitializer, align 4
-// CHECK:"?uint16_t3_Val@@3T?$__vector@G$02@__clang@@A" = global <3 x i16> zeroinitializer, align 8
-// CHECK:"?uint16_t4_Val@@3T?$__vector@G$03@__clang@@A" = global <4 x i16> zeroinitializer, align 8
+// CHECK:"?int16_t2_Val@@3T?$__vector@U?$_BitInt@$0BA@@__clang@@$01@__clang@@A" = global <2 x i16> zeroinitializer, align 4
+// CHECK:"?int16_t3_Val@@3T?$__vector@U?$_BitInt@$0BA@@__clang@@$02@__clang@@A" = global <3 x i16> zeroinitializer, align 8
+// CHECK:"?int16_t4_Val@@3T?$__vector@U?$_BitInt@$0BA@@__clang@@$03@__clang@@A" = global <4 x i16> zeroinitializer, align 8
+// CHECK:"?uint16_t2_Val@@3T?$__vector@U?$_UBitInt@$0BA@@__clang@@$01@__clang@@A" = global <2 x i16> zeroinitializer, align 4
+// CHECK:"?uint16_t3_Val@@3T?$__vector@U?$_UBitInt@$0BA@@__clang@@$02@__clang@@A" = global <3 x i16> zeroinitializer, align 8
+// CHECK:"?uint16_t4_Val@@3T?$__vector@U?$_UBitInt@$0BA@@__clang@@$03@__clang@@A" = global <4 x i16> zeroinitializer, align 8
 // CHECK:"?int2_Val@@3T?$__vector@H$01@__clang@@A" = global <2 x i32> zeroinitializer, align 8
 // CHECK:"?int3_Val@@3T?$__vector@H$02@__clang@@A" = global <3 x i32> zeroinitializer, align 16
 // CHECK:"?int4_Val@@3T?$__vector@H$03@__clang@@A" = global <4 x i32> zeroinitializer, align 16
Index: clang/lib/Headers/hlsl/hlsl_basic_types.h
===
--- clang/lib/Headers/hlsl/hlsl_basic_types.h
+++ clang/lib/Headers/hlsl/hlsl_basic_types.h
@@ -13,8 +13,8 @@
 
 #ifdef __HLSL_ENABLE_16_BIT
 // 

[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-19 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added reviewers: rjmccall, efriedma.
aaron.ballman added a comment.

Adding some codegen reviewers for awareness.




Comment at: clang/lib/Basic/Targets/DirectX.h:66
 
+  bool hasBitIntType() const override { return true; }
   bool hasFeature(StringRef Feature) const override {

beanz wrote:
> aaron.ballman wrote:
> > This change requires more testing/thought, IMO -- do you support 128-bit 
> > operations? When we bump that limit to be arbitrarily high, should DX have 
> > the arbitrary limits or do you need to enforce something lower? Have you 
> > considered how you want to pack these into structures or other data layout 
> > considerations?
> Yea, we definitely need to set the max width to 64 for DirectX.
Nothing seems to have handled this comment yet. Be sure to add a Sema test for 
that as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-19 Thread Xiang Li via Phabricator via cfe-commits
python3kgae updated this revision to Diff 461251.
python3kgae added a comment.

Rebase and update test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

Files:
  clang/lib/Basic/Targets/DirectX.h
  clang/lib/Headers/hlsl/hlsl_basic_types.h
  clang/test/CodeGenHLSL/basic_types.hlsl
  clang/test/CodeGenHLSL/builtins/abs.hlsl
  clang/test/CodeGenHLSL/int16_t_add.hlsl


Index: clang/test/CodeGenHLSL/int16_t_add.hlsl
===
--- /dev/null
+++ clang/test/CodeGenHLSL/int16_t_add.hlsl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -D__HLSL_ENABLE_16_BIT \
+// RUN:   -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
+
+// Make sure generate i16 add.
+// CHECK: add nsw i16 %
+int16_t add(int16_t a, int16_t b) {
+  return a + b;
+}
+// CHECK: define noundef <2 x i16> @
+// CHECK: add <2 x i16>
+int16_t2 add(int16_t2 a, int16_t2 b) {
+  return a + b;
+}
+// CHECK: define noundef <3 x i16> @
+// CHECK: add <3 x i16>
+int16_t3 add(int16_t3 a, int16_t3 b) {
+  return a + b;
+}
+// CHECK: define noundef <4 x i16> @
+// CHECK: add <4 x i16>
+int16_t4 add(int16_t4 a, int16_t4 b) {
+  return a + b;
+}
Index: clang/test/CodeGenHLSL/builtins/abs.hlsl
===
--- clang/test/CodeGenHLSL/builtins/abs.hlsl
+++ clang/test/CodeGenHLSL/builtins/abs.hlsl
@@ -7,8 +7,7 @@
 
 
 // CHECK: define noundef signext i16 @
-// FIXME: int16_t is promoted to i32 now. Change to abs.i16 once it is fixed.
-// CHECK: call i32 @llvm.abs.i32(
+// CHECK: call i16 @llvm.abs.i16(
 int16_t test_abs_int16_t ( int16_t p0 ) {
   return abs ( p0 );
 }
Index: clang/test/CodeGenHLSL/basic_types.hlsl
===
--- clang/test/CodeGenHLSL/basic_types.hlsl
+++ clang/test/CodeGenHLSL/basic_types.hlsl
@@ -3,17 +3,17 @@
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
 
-// CHECK:"?uint16_t_Val@@3GA" = global i16 0, align 2
-// CHECK:"?int16_t_Val@@3FA" = global i16 0, align 2
+// CHECK:"?uint16_t_Val@@3U?$_UBitInt@$0BA@@__clang@@A" = global i16 0, align 2
+// CHECK:"?int16_t_Val@@3U?$_BitInt@$0BA@@__clang@@A" = global i16 0, align 2
 // CHECK:"?uint_Val@@3IA" = global i32 0, align 4
 // CHECK:"?uint64_t_Val@@3KA" = global i64 0, align 8
 // CHECK:"?int64_t_Val@@3JA" = global i64 0, align 8
-// CHECK:"?int16_t2_Val@@3T?$__vector@F$01@__clang@@A" = global <2 x i16> 
zeroinitializer, align 4
-// CHECK:"?int16_t3_Val@@3T?$__vector@F$02@__clang@@A" = global <3 x i16> 
zeroinitializer, align 8
-// CHECK:"?int16_t4_Val@@3T?$__vector@F$03@__clang@@A" = global <4 x i16> 
zeroinitializer, align 8
-// CHECK:"?uint16_t2_Val@@3T?$__vector@G$01@__clang@@A" = global <2 x i16> 
zeroinitializer, align 4
-// CHECK:"?uint16_t3_Val@@3T?$__vector@G$02@__clang@@A" = global <3 x i16> 
zeroinitializer, align 8
-// CHECK:"?uint16_t4_Val@@3T?$__vector@G$03@__clang@@A" = global <4 x i16> 
zeroinitializer, align 8
+// 
CHECK:"?int16_t2_Val@@3T?$__vector@U?$_BitInt@$0BA@@__clang@@$01@__clang@@A" = 
global <2 x i16> zeroinitializer, align 4
+// 
CHECK:"?int16_t3_Val@@3T?$__vector@U?$_BitInt@$0BA@@__clang@@$02@__clang@@A" = 
global <3 x i16> zeroinitializer, align 8
+// 
CHECK:"?int16_t4_Val@@3T?$__vector@U?$_BitInt@$0BA@@__clang@@$03@__clang@@A" = 
global <4 x i16> zeroinitializer, align 8
+// 
CHECK:"?uint16_t2_Val@@3T?$__vector@U?$_UBitInt@$0BA@@__clang@@$01@__clang@@A" 
= global <2 x i16> zeroinitializer, align 4
+// 
CHECK:"?uint16_t3_Val@@3T?$__vector@U?$_UBitInt@$0BA@@__clang@@$02@__clang@@A" 
= global <3 x i16> zeroinitializer, align 8
+// 
CHECK:"?uint16_t4_Val@@3T?$__vector@U?$_UBitInt@$0BA@@__clang@@$03@__clang@@A" 
= global <4 x i16> zeroinitializer, align 8
 // CHECK:"?int2_Val@@3T?$__vector@H$01@__clang@@A" = global <2 x i32> 
zeroinitializer, align 8
 // CHECK:"?int3_Val@@3T?$__vector@H$02@__clang@@A" = global <3 x i32> 
zeroinitializer, align 16
 // CHECK:"?int4_Val@@3T?$__vector@H$03@__clang@@A" = global <4 x i32> 
zeroinitializer, align 16
Index: clang/lib/Headers/hlsl/hlsl_basic_types.h
===
--- clang/lib/Headers/hlsl/hlsl_basic_types.h
+++ clang/lib/Headers/hlsl/hlsl_basic_types.h
@@ -13,8 +13,8 @@
 
 #ifdef __HLSL_ENABLE_16_BIT
 // 16-bit integer.
-typedef unsigned short uint16_t;
-typedef short int16_t;
+typedef unsigned _BitInt(16) uint16_t;
+typedef _BitInt(16) int16_t;
 #endif
 
 // unsigned 32-bit integer.
Index: clang/lib/Basic/Targets/DirectX.h
===
--- clang/lib/Basic/Targets/DirectX.h
+++ clang/lib/Basic/Targets/DirectX.h
@@ -65,6 +65,7 @@
   void getTargetDefines(const LangOptions ,
 MacroBuilder ) const override;
 
+  bool hasBitIntType() const override { 

[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-14 Thread Chris Bieneman via Phabricator via cfe-commits
beanz added inline comments.



Comment at: clang/lib/Basic/Targets/DirectX.h:66
 
+  bool hasBitIntType() const override { return true; }
   bool hasFeature(StringRef Feature) const override {

aaron.ballman wrote:
> This change requires more testing/thought, IMO -- do you support 128-bit 
> operations? When we bump that limit to be arbitrarily high, should DX have 
> the arbitrary limits or do you need to enforce something lower? Have you 
> considered how you want to pack these into structures or other data layout 
> considerations?
Yea, we definitely need to set the max width to 64 for DirectX.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-12 Thread Xiang Li via Phabricator via cfe-commits
python3kgae added a comment.

In D133668#3784636 , @python3kgae 
wrote:

> In D133668#3784607 , @aaron.ballman 
> wrote:
>
>> In D133668#3783975 , @beanz wrote:
>>
>>> In D133668#3783489 , 
>>> @aaron.ballman wrote:
>>>
 Okay, that's good to know! If you don't intend to *ever* conform to the 
 standard in this area, then I think this approach is very reasonable. But 
 you should know up front that you're constraining yourself here. (Changing 
 the underlying type in the future is an ABI break: 
 https://godbolt.org/z/P6ndrzMab, note the name mangling.)
>>>
>>> We have the benefit of ABI escape hatches. HLSL itself doesn't define a 
>>> permanently stable ABI since GPU hardware and runtime ABIs change too 
>>> frequently. We instead revision our ABI every few years as the DirectX and 
>>> Vulkan specifications evolve.
>>>
>>> My hope is that as the HLSL language and our runtime ABIs evolve we'll be 
>>> more and more conformant to the C standard, but there are some odd areas 
>>> that we might never quite get there on.
>>>
>>> The 16-bit integer math is an interesting case. Because GPUs are inherently 
>>> SIMD machines, on many architectures you can handle twice as many 16-bit 
>>> operations per instruction as 32-bit (yay vectors!). Combine that with 
>>> HLSL's SPMD programming model and all scalar math is actually vector math. 
>>> This makes integer promotion for 16-bit types severely limiting. As a 
>>> result I don't suspect we'll ever want to conform to C here.
>>
>> Ah, good to know!
>>
>> Btw, it looks like precommit CI is finding failures here.
>
> It is strange that the tests passed locally when CI hit fail in this PR. But 
> in https://reviews.llvm.org/D133634, I hit fail locally while CI passed all 
> tests.
> I'll check and fix local failures I can repro first.

Might be some issue with the stack PR. Should be OK once rebase with 
https://reviews.llvm.org/D133634.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-12 Thread Xiang Li via Phabricator via cfe-commits
python3kgae added a comment.

In D133668#3784607 , @aaron.ballman 
wrote:

> In D133668#3783975 , @beanz wrote:
>
>> In D133668#3783489 , 
>> @aaron.ballman wrote:
>>
>>> Okay, that's good to know! If you don't intend to *ever* conform to the 
>>> standard in this area, then I think this approach is very reasonable. But 
>>> you should know up front that you're constraining yourself here. (Changing 
>>> the underlying type in the future is an ABI break: 
>>> https://godbolt.org/z/P6ndrzMab, note the name mangling.)
>>
>> We have the benefit of ABI escape hatches. HLSL itself doesn't define a 
>> permanently stable ABI since GPU hardware and runtime ABIs change too 
>> frequently. We instead revision our ABI every few years as the DirectX and 
>> Vulkan specifications evolve.
>>
>> My hope is that as the HLSL language and our runtime ABIs evolve we'll be 
>> more and more conformant to the C standard, but there are some odd areas 
>> that we might never quite get there on.
>>
>> The 16-bit integer math is an interesting case. Because GPUs are inherently 
>> SIMD machines, on many architectures you can handle twice as many 16-bit 
>> operations per instruction as 32-bit (yay vectors!). Combine that with 
>> HLSL's SPMD programming model and all scalar math is actually vector math. 
>> This makes integer promotion for 16-bit types severely limiting. As a result 
>> I don't suspect we'll ever want to conform to C here.
>
> Ah, good to know!
>
> Btw, it looks like precommit CI is finding failures here.

It is strange that the tests passed locally when CI hit fail in this PR. But in 
https://reviews.llvm.org/D133634, I hit fail locally while CI passed all tests.
I'll check and fix local failures I can repro first.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-12 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added a comment.

In D133668#3783975 , @beanz wrote:

> In D133668#3783489 , @aaron.ballman 
> wrote:
>
>> Okay, that's good to know! If you don't intend to *ever* conform to the 
>> standard in this area, then I think this approach is very reasonable. But 
>> you should know up front that you're constraining yourself here. (Changing 
>> the underlying type in the future is an ABI break: 
>> https://godbolt.org/z/P6ndrzMab, note the name mangling.)
>
> We have the benefit of ABI escape hatches. HLSL itself doesn't define a 
> permanently stable ABI since GPU hardware and runtime ABIs change too 
> frequently. We instead revision our ABI every few years as the DirectX and 
> Vulkan specifications evolve.
>
> My hope is that as the HLSL language and our runtime ABIs evolve we'll be 
> more and more conformant to the C standard, but there are some odd areas that 
> we might never quite get there on.
>
> The 16-bit integer math is an interesting case. Because GPUs are inherently 
> SIMD machines, on many architectures you can handle twice as many 16-bit 
> operations per instruction as 32-bit (yay vectors!). Combine that with HLSL's 
> SPMD programming model and all scalar math is actually vector math. This 
> makes integer promotion for 16-bit types severely limiting. As a result I 
> don't suspect we'll ever want to conform to C here.

Ah, good to know!

Btw, it looks like precommit CI is finding failures here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-12 Thread Chris Bieneman via Phabricator via cfe-commits
beanz added a comment.

In D133668#3783489 , @aaron.ballman 
wrote:

> Okay, that's good to know! If you don't intend to *ever* conform to the 
> standard in this area, then I think this approach is very reasonable. But you 
> should know up front that you're constraining yourself here. (Changing the 
> underlying type in the future is an ABI break: 
> https://godbolt.org/z/P6ndrzMab, note the name mangling.)

We have the benefit of ABI escape hatches. HLSL itself doesn't define a 
permanently stable ABI since GPU hardware and runtime ABIs change too 
frequently. We instead revision our ABI every few years as the DirectX and 
Vulkan specifications evolve.

My hope is that as the HLSL language and our runtime ABIs evolve we'll be more 
and more conformant to the C standard, but there are some odd areas that we 
might never quite get there on.

The 16-bit integer math is an interesting case. Because GPUs are inherently 
SIMD machines, on many architectures you can handle twice as many 16-bit 
operations per instruction as 32-bit (yay vectors!). Combine that with HLSL's 
SPMD programming model and all scalar math is actually vector math. This makes 
integer promotion for 16-bit types severely limiting. As a result I don't 
suspect we'll ever want to conform to C here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-12 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added a comment.

In D133668#3783090 , @beanz wrote:

> HLSL deviates from C here. HLSL doesn't actually have `short` (although I'm 
> actually not sure we should disable it in Clang). We do have `int16_t`, but 
> we don't promote `int16_t` to `int`. We discussed changing codegen to disable 
> promotion for HLSL, but it seemed more straightforward to me to just define 
> `int16_t` as `_BitInt(16)`.

Okay, that's good to know! If you don't intend to *ever* conform to the 
standard in this area, then I think this approach is very reasonable. But you 
should know up front that you're constraining yourself here. (Changing the 
underlying type in the future is an ABI break: https://godbolt.org/z/P6ndrzMab, 
note the name mangling.)




Comment at: clang/lib/Basic/Targets/DirectX.h:66
 
+  bool hasBitIntType() const override { return true; }
   bool hasFeature(StringRef Feature) const override {

This change requires more testing/thought, IMO -- do you support 128-bit 
operations? When we bump that limit to be arbitrarily high, should DX have the 
arbitrary limits or do you need to enforce something lower? Have you considered 
how you want to pack these into structures or other data layout considerations?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-11 Thread Chris Bieneman via Phabricator via cfe-commits
beanz added a comment.

HLSL deviates from C here. HLSL doesn't actually have `short` (although I'm 
actually not sure we should disable it in Clang). We do have `int16_t`, but we 
don't promote `int16_t` to `int`. We discussed changing codegen to disable 
promotion for HLSL, but it seemed more straightforward to me to just define 
`int16_t` as `_BitInt(16)`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-11 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added a comment.

Drive-by comment before I get into the review: does HLSL intend to follow the 
standard in terms of behavior of intN_t? If yes, then this doesn't follow the 
behavior allowed by the standard or the direction WG14 chose. We discussed 
https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2960.pdf at our Jul 2022 
meeting, and this particular topic was part of that paper. The result of the 
preference poll was:

  Opinion poll: Which stdint.h types should be allowed to be bit-precise 
integer types?
  0) Leave it as is - [u]intN_t may not be bit-precise, but [u]intptr_t and 
[u]intmax_t are unclear.
   (no one asked for this direction)
  1) None of [u]intN_t, [u]intptr_t and [u]intmax_t.
   9 / 5 / 4 (this direction)
  2) None of [u]intN_t, [u]intptr_t and [u]intmax_t, unless they are wider than 
int.
   7 / 7 / 5 (not this direction)

So we decided explicitly to not allow intN_t to be defined in terms of a 
bit-precise integer type.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133668/new/

https://reviews.llvm.org/D133668

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133668: [HLSL] Use _BitInt(16) for int16_t to avoid promote to int.

2022-09-11 Thread Xiang Li via Phabricator via cfe-commits
python3kgae created this revision.
python3kgae added reviewers: beanz, pow2clk, Anastasia, aaron.ballman, bogner.
Herald added a project: All.
python3kgae requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

short will be promited to int in UsualUnaryConversions.
To avoid it, switch to _BitInt(16) for int16_t.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D133668

Files:
  clang/lib/AST/MicrosoftMangle.cpp
  clang/lib/Basic/Targets/DirectX.h
  clang/lib/Headers/hlsl/hlsl_basic_types.h
  clang/test/CodeGenHLSL/int16_t_add.hlsl


Index: clang/test/CodeGenHLSL/int16_t_add.hlsl
===
--- /dev/null
+++ clang/test/CodeGenHLSL/int16_t_add.hlsl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -D__HLSL_ENABLE_16_BIT \
+// RUN:   -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
+
+// Make sure generate i16 add.
+// CHECK: add nsw i16 %
+int16_t add(int16_t a, int16_t b) {
+  return a + b;
+}
+// CHECK: define noundef <2 x i16> @
+// CHECK: add <2 x i16>
+int16_t2 add(int16_t2 a, int16_t2 b) {
+  return a + b;
+}
+// CHECK: define noundef <3 x i16> @
+// CHECK: add <3 x i16>
+int16_t3 add(int16_t3 a, int16_t3 b) {
+  return a + b;
+}
+// CHECK: define noundef <4 x i16> @
+// CHECK: add <4 x i16>
+int16_t4 add(int16_t4 a, int16_t4 b) {
+  return a + b;
+}
Index: clang/lib/Headers/hlsl/hlsl_basic_types.h
===
--- clang/lib/Headers/hlsl/hlsl_basic_types.h
+++ clang/lib/Headers/hlsl/hlsl_basic_types.h
@@ -13,8 +13,8 @@
 
 #ifdef __HLSL_ENABLE_16_BIT
 // 16-bit integer.
-typedef unsigned short uint16_t;
-typedef short int16_t;
+typedef unsigned _BitInt(16) uint16_t;
+typedef _BitInt(16) int16_t;
 #endif
 
 // unsigned 32-bit integer.
Index: clang/lib/Basic/Targets/DirectX.h
===
--- clang/lib/Basic/Targets/DirectX.h
+++ clang/lib/Basic/Targets/DirectX.h
@@ -63,6 +63,7 @@
   void getTargetDefines(const LangOptions ,
 MacroBuilder ) const override;
 
+  bool hasBitIntType() const override { return true; }
   bool hasFeature(StringRef Feature) const override {
 return Feature == "directx";
   }
Index: clang/lib/AST/MicrosoftMangle.cpp
===
--- clang/lib/AST/MicrosoftMangle.cpp
+++ clang/lib/AST/MicrosoftMangle.cpp
@@ -3073,14 +3073,17 @@
 
 void MicrosoftCXXNameMangler::mangleType(const VectorType *T, Qualifiers Quals,
  SourceRange Range) {
-  const BuiltinType *ET = T->getElementType()->getAs();
-  assert(ET && "vectors with non-builtin elements are unsupported");
+  QualType EltTy = T->getElementType();
+  const BuiltinType *ET = EltTy->getAs();
+  const BitIntType *BitIntTy = EltTy->getAs();
+  assert((ET || BitIntTy) &&
+ "vectors with non-builtin/_BitInt elements are unsupported");
   uint64_t Width = getASTContext().getTypeSize(T);
   // Pattern match exactly the typedefs in our intrinsic headers.  Anything 
that
   // doesn't match the Intel types uses a custom mangling below.
   size_t OutSizeBefore = Out.tell();
   if (!isa(T)) {
-if (getASTContext().getTargetInfo().getTriple().isX86()) {
+if (getASTContext().getTargetInfo().getTriple().isX86() && ET) {
   if (Width == 64 && ET->getKind() == BuiltinType::LongLong) {
 mangleArtificialTagType(TTK_Union, "__m64");
   } else if (Width >= 128) {
@@ -3105,7 +3108,11 @@
 MicrosoftCXXNameMangler Extra(Context, Stream);
 Stream << "?$";
 Extra.mangleSourceName("__vector");
-Extra.mangleType(QualType(ET, 0), Range, QMM_Escape);
+if (ET)
+  Extra.mangleType(QualType(ET, 0), Range, QMM_Escape);
+else
+  Extra.mangleType(QualType(BitIntTy, 0), Range, QMM_Escape);
+
 Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumElements()));
 
 mangleArtificialTagType(TTK_Union, TemplateMangling, {"__clang"});


Index: clang/test/CodeGenHLSL/int16_t_add.hlsl
===
--- /dev/null
+++ clang/test/CodeGenHLSL/int16_t_add.hlsl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -D__HLSL_ENABLE_16_BIT \
+// RUN:   -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
+
+// Make sure generate i16 add.
+// CHECK: add nsw i16 %
+int16_t add(int16_t a, int16_t b) {
+  return a + b;
+}
+// CHECK: define noundef <2 x i16> @
+// CHECK: add <2 x i16>
+int16_t2 add(int16_t2 a, int16_t2 b) {
+  return a + b;
+}
+// CHECK: define noundef <3 x i16> @
+// CHECK: add <3 x i16>
+int16_t3 add(int16_t3 a, int16_t3 b) {
+  return a + b;
+}
+// CHECK: define noundef <4 x i16> @
+// CHECK: add <4 x i16>
+int16_t4 add(int16_t4 a,