[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
FreddyYe abandoned this revision. FreddyYe added a comment. agree with @pengfei . sorry for noise. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D115611/new/ https://reviews.llvm.org/D115611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
pengfei requested changes to this revision. pengfei added a comment. This revision now requires changes to proceed. I would suggest we drop the change. Sorry for my fickleness :( Comment at: clang/lib/Headers/avx512bf16intrin.h:32 /// \param __A ///A bfloat data. /// \returns A float data whose sign field and exponent field keep unchanged, Sorry, when I reviewed the doxygen, I found a new problem. If we want to use `unsigned short`, we may need to change here too. But `unsigned short` is not clear to user since they actually want to convert a bfloat type instead of integer. On the other hand, the double underscore naming conversion is reserved for compiler use and we have already used lots of these terminologies for X86 intrinsics. A much similar one is `__mask16`. So I think using `__bfloat16` here is consistent with the existing types we have defined. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D115611/new/ https://reviews.llvm.org/D115611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
skan accepted this revision. skan added a comment. This revision is now accepted and ready to land. LGTM Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D115611/new/ https://reviews.llvm.org/D115611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
FreddyYe updated this revision to Diff 394137. FreddyYe added a comment. clang-format Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D115611/new/ https://reviews.llvm.org/D115611 Files: clang/lib/Headers/avx512bf16intrin.h clang/lib/Headers/avx512vlbf16intrin.h clang/test/CodeGen/X86/avx512bf16-builtins.c clang/test/CodeGen/X86/avx512vlbf16-builtins.c Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c === --- clang/test/CodeGen/X86/avx512vlbf16-builtins.c +++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c @@ -162,7 +162,7 @@ return _mm256_mask_dpbf16_ps(D, U, A, B); } -__bfloat16 test_mm_cvtness_sbh(float A) { +unsigned short test_mm_cvtness_sbh(float A) { // CHECK-LABEL: @test_mm_cvtness_sbh // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128 // CHECK: ret i16 %{{.*}} Index: clang/test/CodeGen/X86/avx512bf16-builtins.c === --- clang/test/CodeGen/X86/avx512bf16-builtins.c +++ clang/test/CodeGen/X86/avx512bf16-builtins.c @@ -4,7 +4,7 @@ #include -float test_mm_cvtsbh_ss(__bfloat16 A) { +float test_mm_cvtsbh_ss(unsigned short A) { // CHECK-LABEL: @test_mm_cvtsbh_ss // CHECK: zext i16 %{{.*}} to i32 // CHECK: shl i32 %{{.*}}, 16 Index: clang/lib/Headers/avx512vlbf16intrin.h === --- clang/lib/Headers/avx512vlbf16intrin.h +++ clang/lib/Headers/avx512vlbf16intrin.h @@ -413,7 +413,8 @@ ///A float data. /// \returns A bf16 data whose sign field and exponent field keep unchanged, ///and fraction field is truncated to 7 bits. -static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { +static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +_mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask( (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); Index: clang/lib/Headers/avx512bf16intrin.h === --- clang/lib/Headers/avx512bf16intrin.h +++ clang/lib/Headers/avx512bf16intrin.h @@ -15,7 +15,6 @@ typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64))); typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32))); -typedef unsigned short __bfloat16; #define __DEFAULT_FN_ATTRS512 \ __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \ @@ -33,7 +32,7 @@ ///A bfloat data. /// \returns A float data whose sign field and exponent field keep unchanged, ///and fraction field is extended to 23 bits. -static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bfloat16 __A) { +static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(unsigned short __A) { return __builtin_ia32_cvtsbf162ss_32(__A); } Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c === --- clang/test/CodeGen/X86/avx512vlbf16-builtins.c +++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c @@ -162,7 +162,7 @@ return _mm256_mask_dpbf16_ps(D, U, A, B); } -__bfloat16 test_mm_cvtness_sbh(float A) { +unsigned short test_mm_cvtness_sbh(float A) { // CHECK-LABEL: @test_mm_cvtness_sbh // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128 // CHECK: ret i16 %{{.*}} Index: clang/test/CodeGen/X86/avx512bf16-builtins.c === --- clang/test/CodeGen/X86/avx512bf16-builtins.c +++ clang/test/CodeGen/X86/avx512bf16-builtins.c @@ -4,7 +4,7 @@ #include -float test_mm_cvtsbh_ss(__bfloat16 A) { +float test_mm_cvtsbh_ss(unsigned short A) { // CHECK-LABEL: @test_mm_cvtsbh_ss // CHECK: zext i16 %{{.*}} to i32 // CHECK: shl i32 %{{.*}}, 16 Index: clang/lib/Headers/avx512vlbf16intrin.h === --- clang/lib/Headers/avx512vlbf16intrin.h +++ clang/lib/Headers/avx512vlbf16intrin.h @@ -413,7 +413,8 @@ ///A float data. /// \returns A bf16 data whose sign field and exponent field keep unchanged, ///and fraction field is truncated to 7 bits. -static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { +static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +_mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask( (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); Index: clang/lib/Headers/avx512bf16intrin.h === --- clang/lib/Headers/avx512bf16intrin.h +++ clang/lib/Headers/avx512bf16intrin.h @@ -15,7 +15,6 @@ typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64))); typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32))); -typedef unsigned short
[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
FreddyYe updated this revision to Diff 394134. FreddyYe added a comment. changed into `unsigned short` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D115611/new/ https://reviews.llvm.org/D115611 Files: clang/lib/Headers/avx512bf16intrin.h clang/lib/Headers/avx512vlbf16intrin.h clang/test/CodeGen/X86/avx512bf16-builtins.c clang/test/CodeGen/X86/avx512vlbf16-builtins.c Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c === --- clang/test/CodeGen/X86/avx512vlbf16-builtins.c +++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c @@ -162,7 +162,7 @@ return _mm256_mask_dpbf16_ps(D, U, A, B); } -__bfloat16 test_mm_cvtness_sbh(float A) { +unsigned short test_mm_cvtness_sbh(float A) { // CHECK-LABEL: @test_mm_cvtness_sbh // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128 // CHECK: ret i16 %{{.*}} Index: clang/test/CodeGen/X86/avx512bf16-builtins.c === --- clang/test/CodeGen/X86/avx512bf16-builtins.c +++ clang/test/CodeGen/X86/avx512bf16-builtins.c @@ -4,7 +4,7 @@ #include -float test_mm_cvtsbh_ss(__bfloat16 A) { +float test_mm_cvtsbh_ss(unsigned short A) { // CHECK-LABEL: @test_mm_cvtsbh_ss // CHECK: zext i16 %{{.*}} to i32 // CHECK: shl i32 %{{.*}}, 16 Index: clang/lib/Headers/avx512vlbf16intrin.h === --- clang/lib/Headers/avx512vlbf16intrin.h +++ clang/lib/Headers/avx512vlbf16intrin.h @@ -413,7 +413,7 @@ ///A float data. /// \returns A bf16 data whose sign field and exponent field keep unchanged, ///and fraction field is truncated to 7 bits. -static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { +static __inline__ unsigned short __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask( (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); Index: clang/lib/Headers/avx512bf16intrin.h === --- clang/lib/Headers/avx512bf16intrin.h +++ clang/lib/Headers/avx512bf16intrin.h @@ -15,7 +15,6 @@ typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64))); typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32))); -typedef unsigned short __bfloat16; #define __DEFAULT_FN_ATTRS512 \ __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \ @@ -33,7 +32,7 @@ ///A bfloat data. /// \returns A float data whose sign field and exponent field keep unchanged, ///and fraction field is extended to 23 bits. -static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bfloat16 __A) { +static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(unsigned short __A) { return __builtin_ia32_cvtsbf162ss_32(__A); } Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c === --- clang/test/CodeGen/X86/avx512vlbf16-builtins.c +++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c @@ -162,7 +162,7 @@ return _mm256_mask_dpbf16_ps(D, U, A, B); } -__bfloat16 test_mm_cvtness_sbh(float A) { +unsigned short test_mm_cvtness_sbh(float A) { // CHECK-LABEL: @test_mm_cvtness_sbh // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128 // CHECK: ret i16 %{{.*}} Index: clang/test/CodeGen/X86/avx512bf16-builtins.c === --- clang/test/CodeGen/X86/avx512bf16-builtins.c +++ clang/test/CodeGen/X86/avx512bf16-builtins.c @@ -4,7 +4,7 @@ #include -float test_mm_cvtsbh_ss(__bfloat16 A) { +float test_mm_cvtsbh_ss(unsigned short A) { // CHECK-LABEL: @test_mm_cvtsbh_ss // CHECK: zext i16 %{{.*}} to i32 // CHECK: shl i32 %{{.*}}, 16 Index: clang/lib/Headers/avx512vlbf16intrin.h === --- clang/lib/Headers/avx512vlbf16intrin.h +++ clang/lib/Headers/avx512vlbf16intrin.h @@ -413,7 +413,7 @@ ///A float data. /// \returns A bf16 data whose sign field and exponent field keep unchanged, ///and fraction field is truncated to 7 bits. -static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { +static __inline__ unsigned short __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask( (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); Index: clang/lib/Headers/avx512bf16intrin.h === --- clang/lib/Headers/avx512bf16intrin.h +++ clang/lib/Headers/avx512bf16intrin.h @@ -15,7 +15,6 @@ typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64))); typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32))); -typedef
[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
craig.topper added inline comments. Comment at: clang/lib/Headers/avx512vlbf16intrin.h:416 ///and fraction field is truncated to 7 bits. -static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { +static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; pengfei wrote: > pengfei wrote: > > FreddyYe wrote: > > > craig.topper wrote: > > > > I'm not sure if this change is a good idea this late. Users could have > > > > been dependent on it being an unsigned value before. I believe this > > > > changes the behavior of this code > > > > > > > > ``` > > > > int result = _mm_cvtness_sbh(X) > > > > ``` > > > > > > > > Previously it would have zero extended, but now it will sign extend. > > > Yes, this should be a huge concern. > > > > > > Notice that intrinsic update has just documented these two intrinsics on > > > 12/7/2021. So maybe we still have change to change it? And it's more > > > theory right to do sign extension from a bfloat16 to int32. > > I think this is the problem that we choose integer to represent BF16. > > Neither zero extend nor sign extend makes sense to a floating type. But > > considering the MSB of floating point is sign bit. Sign extend should be > > better in theory. > > Maybe it's a good approach to use `__bf16`, but it is supported only by > > Clang. We can't use it for intrinsics. > > Anyway, I'm fine with keeping zero extend here. @craig.topper , do you > > think it's acceptable to you if we just change `__bfloat16` to `short`? > Sorry, I mean `unsigned short` I'm fine with just changing it to `unsigned short` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D115611/new/ https://reviews.llvm.org/D115611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
pengfei added inline comments. Comment at: clang/lib/Headers/avx512vlbf16intrin.h:416 ///and fraction field is truncated to 7 bits. -static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { +static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; pengfei wrote: > FreddyYe wrote: > > craig.topper wrote: > > > I'm not sure if this change is a good idea this late. Users could have > > > been dependent on it being an unsigned value before. I believe this > > > changes the behavior of this code > > > > > > ``` > > > int result = _mm_cvtness_sbh(X) > > > ``` > > > > > > Previously it would have zero extended, but now it will sign extend. > > Yes, this should be a huge concern. > > > > Notice that intrinsic update has just documented these two intrinsics on > > 12/7/2021. So maybe we still have change to change it? And it's more theory > > right to do sign extension from a bfloat16 to int32. > I think this is the problem that we choose integer to represent BF16. Neither > zero extend nor sign extend makes sense to a floating type. But considering > the MSB of floating point is sign bit. Sign extend should be better in theory. > Maybe it's a good approach to use `__bf16`, but it is supported only by > Clang. We can't use it for intrinsics. > Anyway, I'm fine with keeping zero extend here. @craig.topper , do you think > it's acceptable to you if we just change `__bfloat16` to `short`? Sorry, I mean `unsigned short` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D115611/new/ https://reviews.llvm.org/D115611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
pengfei added inline comments. Comment at: clang/lib/Headers/avx512vlbf16intrin.h:416 ///and fraction field is truncated to 7 bits. -static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { +static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; FreddyYe wrote: > craig.topper wrote: > > I'm not sure if this change is a good idea this late. Users could have been > > dependent on it being an unsigned value before. I believe this changes the > > behavior of this code > > > > ``` > > int result = _mm_cvtness_sbh(X) > > ``` > > > > Previously it would have zero extended, but now it will sign extend. > Yes, this should be a huge concern. > > Notice that intrinsic update has just documented these two intrinsics on > 12/7/2021. So maybe we still have change to change it? And it's more theory > right to do sign extension from a bfloat16 to int32. I think this is the problem that we choose integer to represent BF16. Neither zero extend nor sign extend makes sense to a floating type. But considering the MSB of floating point is sign bit. Sign extend should be better in theory. Maybe it's a good approach to use `__bf16`, but it is supported only by Clang. We can't use it for intrinsics. Anyway, I'm fine with keeping zero extend here. @craig.topper , do you think it's acceptable to you if we just change `__bfloat16` to `short`? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D115611/new/ https://reviews.llvm.org/D115611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
FreddyYe added inline comments. Comment at: clang/lib/Headers/avx512vlbf16intrin.h:416 ///and fraction field is truncated to 7 bits. -static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { +static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; craig.topper wrote: > I'm not sure if this change is a good idea this late. Users could have been > dependent on it being an unsigned value before. I believe this changes the > behavior of this code > > ``` > int result = _mm_cvtness_sbh(X) > ``` > > Previously it would have zero extended, but now it will sign extend. Yes, this should be a huge concern. Notice that intrinsic update has just documented these two intrinsics on 12/7/2021. So maybe we still have change to change it? And it's more theory right to do sign extension from a bfloat16 to int32. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D115611/new/ https://reviews.llvm.org/D115611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
craig.topper added inline comments. Comment at: clang/lib/Headers/avx512vlbf16intrin.h:416 ///and fraction field is truncated to 7 bits. -static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { +static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; I'm not sure if this change is a good idea this late. Users could have been dependent on it being an unsigned value before. I believe this changes the behavior of this code ``` int result = _mm_cvtness_sbh(X) ``` Previously it would have zero extended, but now it will sign extend. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D115611/new/ https://reviews.llvm.org/D115611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D115611: [X86][BF16] delete `typedef unsigned short __bfloat16`
FreddyYe created this revision. Herald added a subscriber: pengfei. FreddyYe requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits. The name `__bfloat` may mislead its real type is bfloat16, but in fact it's not. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D115611 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/avx512bf16intrin.h clang/lib/Headers/avx512vlbf16intrin.h clang/test/CodeGen/X86/avx512bf16-builtins.c clang/test/CodeGen/X86/avx512vlbf16-builtins.c Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c === --- clang/test/CodeGen/X86/avx512vlbf16-builtins.c +++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c @@ -162,7 +162,7 @@ return _mm256_mask_dpbf16_ps(D, U, A, B); } -__bfloat16 test_mm_cvtness_sbh(float A) { +short test_mm_cvtness_sbh(float A) { // CHECK-LABEL: @test_mm_cvtness_sbh // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128 // CHECK: ret i16 %{{.*}} Index: clang/test/CodeGen/X86/avx512bf16-builtins.c === --- clang/test/CodeGen/X86/avx512bf16-builtins.c +++ clang/test/CodeGen/X86/avx512bf16-builtins.c @@ -4,9 +4,9 @@ #include -float test_mm_cvtsbh_ss(__bfloat16 A) { +float test_mm_cvtsbh_ss(short A) { // CHECK-LABEL: @test_mm_cvtsbh_ss - // CHECK: zext i16 %{{.*}} to i32 + // CHECK: sext i16 %{{.*}} to i32 // CHECK: shl i32 %{{.*}}, 16 // CHECK: bitcast i32 %{{.*}} to float // CHECK: ret float %{{.*}} Index: clang/lib/Headers/avx512vlbf16intrin.h === --- clang/lib/Headers/avx512vlbf16intrin.h +++ clang/lib/Headers/avx512vlbf16intrin.h @@ -413,7 +413,7 @@ ///A float data. /// \returns A bf16 data whose sign field and exponent field keep unchanged, ///and fraction field is truncated to 7 bits. -static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { +static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask( (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); Index: clang/lib/Headers/avx512bf16intrin.h === --- clang/lib/Headers/avx512bf16intrin.h +++ clang/lib/Headers/avx512bf16intrin.h @@ -15,7 +15,6 @@ typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64))); typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32))); -typedef unsigned short __bfloat16; #define __DEFAULT_FN_ATTRS512 \ __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \ @@ -33,7 +32,7 @@ ///A bfloat data. /// \returns A float data whose sign field and exponent field keep unchanged, ///and fraction field is extended to 23 bits. -static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bfloat16 __A) { +static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(short __A) { return __builtin_ia32_cvtsbf162ss_32(__A); } Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -12479,8 +12479,8 @@ const CallExpr *E, ArrayRef Ops) { llvm::Type *Int32Ty = CGF.Builder.getInt32Ty(); - Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty); - Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16); + Value *SignExt = CGF.Builder.CreateSExt(Ops[0], Int32Ty); + Value *Shl = CGF.Builder.CreateShl(SignExt, 16); llvm::Type *ResultType = CGF.ConvertType(E->getType()); Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType); return BitCast; Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c === --- clang/test/CodeGen/X86/avx512vlbf16-builtins.c +++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c @@ -162,7 +162,7 @@ return _mm256_mask_dpbf16_ps(D, U, A, B); } -__bfloat16 test_mm_cvtness_sbh(float A) { +short test_mm_cvtness_sbh(float A) { // CHECK-LABEL: @test_mm_cvtness_sbh // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128 // CHECK: ret i16 %{{.*}} Index: clang/test/CodeGen/X86/avx512bf16-builtins.c === --- clang/test/CodeGen/X86/avx512bf16-builtins.c +++ clang/test/CodeGen/X86/avx512bf16-builtins.c @@ -4,9 +4,9 @@ #include -float test_mm_cvtsbh_ss(__bfloat16 A) { +float test_mm_cvtsbh_ss(short A) { // CHECK-LABEL: @test_mm_cvtsbh_ss - // CHECK: zext i16 %{{.*}} to i32 + // CHECK: sext i16 %{{.*}} to i32 // CHECK: shl i32 %{{.*}}, 16 // CHECK: bitcast i32 %{{.*}} to float // CHECK: ret float %{{.*}} Index: