[PATCH] D120411: [X86] Replace __m[128|256|512]bh with __m[128|256|512]i and mark the former deprecated

2022-02-25 Thread Andy Kaylor via Phabricator via cfe-commits
andrew.w.kaylor requested changes to this revision.
andrew.w.kaylor added a comment.
This revision now requires changes to proceed.

Replacing `__m128bh` with `__m128i` does not prevent arithmetic operations on 
the type.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120411/new/

https://reviews.llvm.org/D120411

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120411: [X86] Replace __m[128|256|512]bh with __m[128|256|512]i and mark the former deprecated

2022-02-23 Thread Phoebe Wang via Phabricator via cfe-commits
pengfei created this revision.
pengfei added reviewers: skan, RKSimon, craig.topper, FreddyYe, LuoYuanke.
pengfei requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

As discussed on D120395 , we should prohibit 
arithmetic operations for
__m[128|256|512]bh as well. But them may be used for ABI type in future,
so replace them with __m[128|256|512]i.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D120411

Files:
  clang/lib/Headers/avx512bf16intrin.h
  clang/lib/Headers/avx512vlbf16intrin.h
  clang/test/CodeGen/X86/avx512bf16-builtins.c
  clang/test/CodeGen/X86/avx512bf16-error.c
  clang/test/CodeGen/X86/avx512vlbf16-builtins.c

Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c
===
--- clang/test/CodeGen/X86/avx512vlbf16-builtins.c
+++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c
@@ -4,127 +4,127 @@
 
 #include 
 
-__m128bh test_mm_cvtne2ps2bf16(__m128 A, __m128 B) {
+__m128i test_mm_cvtne2ps2bf16(__m128 A, __m128 B) {
   // CHECK-LABEL: @test_mm_cvtne2ps2bf16
   // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
-  // CHECK: ret <8 x i16> %{{.*}}
+  // CHECK: ret <2 x i64> %{{.*}}
   return _mm_cvtne2ps_pbh(A, B);
 }
 
-__m128bh test_mm_maskz_cvtne2ps2bf16(__m128 A, __m128 B, __mmask8 U) {
+__m128i test_mm_maskz_cvtne2ps2bf16(__m128 A, __m128 B, __mmask8 U) {
   // CHECK-LABEL: @test_mm_maskz_cvtne2ps2bf16
   // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
-  // CHECK: ret <8 x i16> %{{.*}}
+  // CHECK: ret <2 x i64> %{{.*}}
   return _mm_maskz_cvtne2ps_pbh(U, A, B);
 }
 
-__m128bh test_mm_mask_cvtne2ps2bf16(__m128bh C, __mmask8 U, __m128 A, __m128 B) {
+__m128i test_mm_mask_cvtne2ps2bf16(__m128i C, __mmask8 U, __m128 A, __m128 B) {
   // CHECK-LABEL: @test_mm_mask_cvtne2ps2bf16
   // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
-  // CHECK: ret <8 x i16> %{{.*}}
+  // CHECK: ret <2 x i64> %{{.*}}
   return _mm_mask_cvtne2ps_pbh(C, U, A, B);
 }
 
-__m256bh test_mm256_cvtne2ps2bf16(__m256 A, __m256 B) {
+__m256i test_mm256_cvtne2ps2bf16(__m256 A, __m256 B) {
   // CHECK-LABEL: @test_mm256_cvtne2ps2bf16
   // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
-  // CHECK: ret <16 x i16> %{{.*}}
+  // CHECK: ret <4 x i64> %{{.*}}
   return _mm256_cvtne2ps_pbh(A, B);
 }
 
-__m256bh test_mm256_maskz_cvtne2ps2bf16(__m256 A, __m256 B, __mmask16 U) {
+__m256i test_mm256_maskz_cvtne2ps2bf16(__m256 A, __m256 B, __mmask16 U) {
   // CHECK-LABEL: @test_mm256_maskz_cvtne2ps2bf16
   // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
-  // CHECK: ret <16 x i16> %{{.*}}
+  // CHECK: ret <4 x i64> %{{.*}}
   return _mm256_maskz_cvtne2ps_pbh(U, A, B);
 }
 
-__m256bh test_mm256_mask_cvtne2ps2bf16(__m256bh C, __mmask16 U, __m256 A, __m256 B) {
+__m256i test_mm256_mask_cvtne2ps2bf16(__m256i C, __mmask16 U, __m256 A, __m256 B) {
   // CHECK-LABEL: @test_mm256_mask_cvtne2ps2bf16
   // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
-  // CHECK: ret <16 x i16> %{{.*}}
+  // CHECK: ret <4 x i64> %{{.*}}
   return _mm256_mask_cvtne2ps_pbh(C, U, A, B);
 }
 
-__m512bh test_mm512_cvtne2ps2bf16(__m512 A, __m512 B) {
+__m512i test_mm512_cvtne2ps2bf16(__m512 A, __m512 B) {
   // CHECK-LABEL: @test_mm512_cvtne2ps2bf16
   // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
-  // CHECK: ret <32 x i16> %{{.*}}
+  // CHECK: ret <8 x i64> %{{.*}}
   return _mm512_cvtne2ps_pbh(A, B);
 }
 
-__m512bh test_mm512_maskz_cvtne2ps2bf16(__m512 A, __m512 B, __mmask32 U) {
+__m512i test_mm512_maskz_cvtne2ps2bf16(__m512 A, __m512 B, __mmask32 U) {
   // CHECK-LABEL: @test_mm512_maskz_cvtne2ps2bf16
   // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
-  // CHECK: ret <32 x i16> %{{.*}}
+  // CHECK: ret <8 x i64> %{{.*}}
   return _mm512_maskz_cvtne2ps_pbh(U, A, B);
 }
 
-__m512bh test_mm512_mask_cvtne2ps2bf16(__m512bh C, __mmask32 U, __m512 A, __m512 B) {
+__m512i test_mm512_mask_cvtne2ps2bf16(__m512i C, __mmask32 U, __m512 A, __m512 B) {
   // CHECK-LABEL: @test_mm512_mask_cvtne2ps2bf16
   // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
-  // CHECK: ret <32 x i16> %{{.*}}
+  // CHECK: ret <8 x i64> %{{.*}}
   return _mm512_mask_cvtne2ps_pbh(C, U, A, B);
 }
 
-__m128bh test_mm_cvtneps2bf16(__m128 A) {
+__m128i test_mm_cvtneps2bf16(__m128 A) {
   // CHECK-LABEL: @test_mm_cvtneps2bf16
   // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128
-  // CHECK: ret <8 x i16> %{{.*}}
+  // CHECK: ret <2 x i64> %{{.*}}
   return _mm_cvtneps_pbh(A);
 }