[PATCH] D82665: [AArch64][SVE] Add bfloat16 to outstanding tuple vector intrinsics

Cullen Rhodes via Phabricator via cfe-commits Fri, 26 Jun 2020 10:23:40 -0700

c-rhodes created this revision.
c-rhodes added reviewers: sdesmalen, kmclaughlin, fpetrogalli.
Herald added subscribers: danielkiss, psnobl, rkruppe, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
Herald added projects: clang, LLVM.


- svget2/3/4
- svset2/3/4
- svcreate2/3/4
- svundef/2/3/4


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82665

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c
  llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=1 < %s | FileCheck %s
+; RUN: llc -mtriple aarch64 -mattr=+sve,+bf16 -asm-verbose=1 < %s | FileCheck %s
 
 ;
 ; SVCREATE2 (i8)
@@ -94,6 +94,37 @@
 }
 
 ;
+; SVCREATE2 (bfloat)
+;
+
+define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) local_unnamed_addr #0 {
+; CHECK-LABEL: test_svcreate2_bf16_vec0:
+; CHECK: // %L2
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %tuple, i32 0)
+  ret <vscale x 8 x bfloat> %extract
+}
+
+define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec1(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) local_unnamed_addr #0 {
+; CHECK-LABEL: test_svcreate2_bf16_vec1:
+; CHECK: // %L2
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %tuple, i32 1)
+  ret <vscale x 8 x bfloat> %extract
+}
+
+;
 ; SVCREATE2 (i32)
 ;
 
@@ -278,6 +309,7 @@
   %extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %tuple, i32 2)
   ret <vscale x 8 x i16> %extract
 }
+
 ;
 ; SVCREATE3 (half)
 ;
@@ -309,6 +341,36 @@
   ret <vscale x 8 x half> %extract
 }
 
+;
+; SVCREATE3 (bfloat)
+;
+
+define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) local_unnamed_addr #0 {
+; CHECK-LABEL: test_svcreate3_bf16_vec0:
+; CHECK: // %L2
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> %tuple, i32 0)
+  ret <vscale x 8 x bfloat> %extract
+}
+
+define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec2(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) local_unnamed_addr #0 {
+; CHECK-LABEL: test_svcreate3_bf16_vec2:
+; CHECK: // %L2
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> %tuple, i32 2)
+  ret <vscale x 8 x bfloat> %extract
+}
 
 ;
 ; SVCREATE3 (i32)
@@ -528,6 +590,37 @@
 }
 
 ;
+; SVCREATE4 (bfloat)
+;
+
+define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) local_unnamed_addr #0 {
+; CHECK-LABEL: test_svcreate4_bf16_vec0:
+; CHECK: // %L2
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 0)
+  ret <vscale x 8 x bfloat> %extract
+}
+
+define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec3(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) local_unnamed_addr #0 {
+; CHECK-LABEL: test_svcreate4_bf16_vec3:
+; CHECK: // %L2
+; CHECK-NEXT: mov z0.d, z3.d
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 3)
+  ret <vscale x 8 x bfloat> %extract
+}
+
+;
 ; SVCREATE4 (i32)
 ;
 
@@ -651,11 +744,12 @@
   ret <vscale x 2 x double> %extract
 }
 
-attributes #0 = { nounwind "target-features"="+sve" }
+attributes #0 = { nounwind "target-features"="+sve,+bf16" }
 
 declare <vscale x 4 x double>  @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
 declare <vscale x 8 x float>  @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 16 x half>  @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 16 x bfloat>  @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
 declare <vscale x 4 x i64>  @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
 declare <vscale x 8 x i32>  @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
@@ -664,6 +758,7 @@
 declare <vscale x 6 x double>  @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
 declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 24 x half> @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
 declare <vscale x 6 x i64>  @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
@@ -672,6 +767,7 @@
 declare <vscale x 8 x double>  @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64 (<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
 declare <vscale x 16 x float>  @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 32 x half>  @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 32 x bfloat>  @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
 declare <vscale x  8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
@@ -693,6 +789,10 @@
 declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64>, i32 immarg)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64>, i32 immarg)
 
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat>, i32 immarg)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat>, i32 immarg)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat>, i32 immarg)
+
 declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half>, i32 immarg)
 declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half>, i32 immarg)
 declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half>, i32 immarg)
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+svbfloat16x4_t test_svundef4_bf16()
+{
+  // CHECK-LABEL: test_svundef4_bf16
+  // CHECK: ret <vscale x 32 x bfloat> undef
+  // expected-warning@+1 {{implicit declaration of function 'svundef4_bf16'}}
+  return svundef4_bf16();
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+svbfloat16x3_t test_svundef3_bf16()
+{
+  // CHECK-LABEL: test_svundef3_bf16
+  // CHECK: ret <vscale x 24 x bfloat> undef
+  // expected-warning@+1 {{implicit declaration of function 'svundef3_bf16'}}
+  return svundef3_bf16();
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+svbfloat16x2_t test_svundef2_bf16()
+{
+  // CHECK-LABEL: test_svundef2_bf16
+  // CHECK: ret <vscale x 16 x bfloat> undef
+  // expected-warning@+1 {{implicit declaration of function 'svundef2_bf16'}}
+  return svundef2_bf16();
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+svbfloat16_t test_svundef_bf16()
+{
+  // CHECK-LABEL: test_svundef_bf16
+  // CHECK: ret <vscale x 8 x bfloat> undef
+  // expected-warning@+1 {{implicit declaration of function 'svundef_bf16'}}
+  return svundef_bf16();
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+
+svbfloat16x4_t test_svset4_bf16(svbfloat16x4_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset4_bf16
+  // CHECK: %[[INSERT:.*]] = call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.set.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> %tuple, i32 1, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 32 x bfloat> %[[INSERT]]
+  // expected-warning@+1 {{implicit declaration of function 'svset4_bf16'}}
+  return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 1, x);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+
+svbfloat16x3_t test_svset3_bf16(svbfloat16x3_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset3_bf16
+  // CHECK: %[[INSERT:.*]] = call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.set.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> %tuple, i32 0, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 24 x bfloat> %[[INSERT]]
+  // expected-warning@+1 {{implicit declaration of function 'svset3_bf16'}}
+  return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 0, x);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16x2_t test_svset2_bf16(svbfloat16x2_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset2_bf16
+  // CHECK: %[[INSERT:.*]] = call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.set.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> %tuple, i32 1, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 16 x bfloat> %[[INSERT]]
+  // expected-warning@+1 {{implicit declaration of function 'svset2_bf16'}}
+  return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 1, x);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svget4_bf16(svbfloat16x4_t tuple)
+{
+  // CHECK-LABEL: test_svget4_bf16
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 2)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning@+1 {{implicit declaration of function 'svget4_bf16'}}
+  return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 2);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svget3_bf16(svbfloat16x3_t tuple)
+{
+  // CHECK-LABEL: test_svget3_bf16
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> %tuple, i32 1)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning@+1 {{implicit declaration of function 'svget3_bf16'}}
+  return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 1);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svget2_bf16(svbfloat16x2_t tuple)
+{
+  // CHECK-LABEL: test_svget2_bf16
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %tuple, i32 0)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning@+1 {{implicit declaration of function 'svget2_bf16'}}
+  return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 0);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16x4_t test_svcreate4_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2, svbfloat16_t x4)
+{
+  // CHECK-LABEL: test_svcreate4_bf16
+  // CHECK: %[[CREATE:.*]] = call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat> %x0, <vscale x 8 x bfloat> %x1, <vscale x 8 x bfloat> %x2, <vscale x 8 x bfloat> %x4)
+  // CHECK-NEXT: ret <vscale x 32 x bfloat> %[[CREATE]]
+  // expected-warning@+1 {{implicit declaration of function 'svcreate4_bf16'}}
+  return SVE_ACLE_FUNC(svcreate4,_bf16,,)(x0, x1, x2, x4);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2)
+{
+  // CHECK-LABEL: test_svcreate3_bf16
+  // CHECK: %[[CREATE:.*]] = call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat> %x0, <vscale x 8 x bfloat> %x1, <vscale x 8 x bfloat> %x2)
+  // CHECK-NEXT: ret <vscale x 24 x bfloat> %[[CREATE]]
+  // expected-warning@+1 {{implicit declaration of function 'svcreate3_bf16'}}
+  return SVE_ACLE_FUNC(svcreate3,_bf16,,)(x0, x1, x2);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1)
+{
+  // CHECK-LABEL: test_svcreate2_bf16
+  // CHECK: %[[CREATE:.*]] = call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat> %x0, <vscale x 8 x bfloat> %x1)
+  // CHECK-NEXT: ret <vscale x 16 x bfloat> %[[CREATE]]
+  // expected-warning@+1 {{implicit declaration of function 'svcreate2_bf16'}}
+  return SVE_ACLE_FUNC(svcreate2,_bf16,,)(x0, x1);
+}
Index: clang/include/clang/Basic/arm_sve.td
===================================================================
--- clang/include/clang/Basic/arm_sve.td
+++ clang/include/clang/Basic/arm_sve.td
@@ -1414,6 +1414,16 @@
 def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_create3", [IsTupleCreate]>;
 def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_create4", [IsTupleCreate]>;
 
+let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+def SVUNDEF_1_BF16 : SInst<"svundef_{d}",  "d", "b", MergeNone, "", [IsUndef]>;
+def SVUNDEF_2_BF16 : SInst<"svundef2_{d}", "2", "b", MergeNone, "", [IsUndef]>;
+def SVUNDEF_3_BF16 : SInst<"svundef3_{d}", "3", "b", MergeNone, "", [IsUndef]>;
+def SVUNDEF_4_BF16 : SInst<"svundef4_{d}", "4", "b", MergeNone, "", [IsUndef]>;
+
+def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd",   "b", MergeNone, "aarch64_sve_tuple_create2", [IsTupleCreate]>;
+def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd",  "b", MergeNone, "aarch64_sve_tuple_create3", [IsTupleCreate]>;
+def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "aarch64_sve_tuple_create4", [IsTupleCreate]>;
+}
 
 ////////////////////////////////////////////////////////////////////////////////
 // Vector insertion and extraction
@@ -1425,6 +1435,16 @@
 def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>;
 def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>;
 
+let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "aarch64_sve_tuple_get", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>;
+def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "aarch64_sve_tuple_get", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>;
+def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "aarch64_sve_tuple_get", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>;
+
+def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>;
+def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>;
+def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 WhileGE/GT
 let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D82665: [AArch64][SVE] Add bfloat16 to outstanding tuple vector intrinsics

Reply via email to