[PATCH] D82298: [AArch64][SVE] Add bfloat16 support to load intrinsics

2020-06-24 Thread Sander de Smalen via Phabricator via cfe-commits
sdesmalen added inline comments.



Comment at: 
llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll:217
+  %base_scalar = bitcast * %base to bfloat*
+  %load = call  @llvm.aarch64.sve.ld1.nxv8bf16( %pg, bfloat* %base_scalar)
+  ret  %load

Sorry I only just spotted this in D82182 , but to match these intrinsics, the 
llc command needs to be passed +bf16 (and the patterns in the .td file need to 
be predicated accordingly)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82298/new/

https://reviews.llvm.org/D82298



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82298: [AArch64][SVE] Add bfloat16 support to load intrinsics

2020-06-24 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3d6cab271c7c: [AArch64][SVE] Add bfloat16 support to load 
intrinsics (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82298/new/

https://reviews.llvm.org/D82298

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
===
--- llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -87,6 +87,14 @@
   ret  %load
 }
 
+define  @masked_load_nxv8bf16( *%a,  %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv8bf16:
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv8bf16( *%a, i32 2,  %mask,  undef)
+  ret  %load
+}
+
 ;
 ; Masked Stores
 ;
@@ -182,6 +190,7 @@
 declare  @llvm.masked.load.nxv4f32(*, i32, , )
 declare  @llvm.masked.load.nxv4f16(*, i32, , )
 declare  @llvm.masked.load.nxv8f16(*, i32, , )
+declare  @llvm.masked.load.nxv8bf16(*, i32, , )
 
 declare void @llvm.masked.store.nxv2i64(, *, i32, )
 declare void @llvm.masked.store.nxv4i32(, *, i32, )
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -97,6 +97,23 @@
   ret  %res
 }
 
+define  @ld1rqh_bf16( %pred, bfloat* %addr) {
+; CHECK-LABEL: ld1rqh_bf16:
+; CHECK: ld1rqh { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ld1rq.nxv8bf16( %pred, bfloat* %addr)
+  ret  %res
+}
+
+define  @ld1rqh_bf16_imm( %pred, bfloat* %addr) {
+; CHECK-LABEL: ld1rqh_bf16_imm:
+; CHECK: ld1rqh { z0.h }, p0/z, [x0, #-16]
+; CHECK-NEXT: ret
+  %ptr = getelementptr inbounds bfloat, bfloat* %addr, i16 -8
+  %res = call  @llvm.aarch64.sve.ld1rq.nxv8bf16( %pred, bfloat* %ptr)
+  ret  %res
+}
+
 ;
 ; LD1RQW
 ;
@@ -208,6 +225,15 @@
   ret  %res
 }
 
+define  @ldnt1h_bf16( %pred, bfloat* %addr) {
+; CHECK-LABEL: ldnt1h_bf16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8bf16( %pred,
+ bfloat* %addr)
+  ret  %res
+}
+
 ;
 ; LDNT1W
 ;
@@ -498,6 +524,7 @@
 declare  @llvm.aarch64.sve.ld1rq.nxv4i32(, i32*)
 declare  @llvm.aarch64.sve.ld1rq.nxv2i64(, i64*)
 declare  @llvm.aarch64.sve.ld1rq.nxv8f16(, half*)
+declare  @llvm.aarch64.sve.ld1rq.nxv8bf16(, bfloat*)
 declare  @llvm.aarch64.sve.ld1rq.nxv4f32(, float*)
 declare  @llvm.aarch64.sve.ld1rq.nxv2f64(, double*)
 
@@ -506,6 +533,7 @@
 declare  @llvm.aarch64.sve.ldnt1.nxv4i32(, i32*)
 declare  @llvm.aarch64.sve.ldnt1.nxv2i64(, i64*)
 declare  @llvm.aarch64.sve.ldnt1.nxv8f16(, half*)
+declare  @llvm.aarch64.sve.ldnt1.nxv8bf16(, bfloat*)
 declare  @llvm.aarch64.sve.ldnt1.nxv4f32(, float*)
 declare  @llvm.aarch64.sve.ldnt1.nxv2f64(, double*)
 
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -140,6 +140,14 @@
   ret  %load
 }
 
+define  @ldnf1h_bf16( %pg, bfloat* %a) {
+; CHECK-LABEL: ldnf1h_bf16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8bf16( %pg, bfloat* %a)
+  ret  %load
+}
+
 define  @ldnf1h_f16_inbound( %pg, half* %a) {
 ; CHECK-LABEL: ldnf1h_f16_inbound:
 ; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
@@ -151,6 +159,17 @@
   ret  %load
 }
 
+define  @ldnf1h_bf16_inbound( %pg, bfloat* %a) {
+; CHECK-LABEL: ldnf1h_bf16_inbound:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+  %base_scalable = bitcast bfloat* %a to *
+  %base = getelementptr , * %base_scalable, i64 1
+  %base_scalar = bitcast * %base to bfloat*
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8bf16( %pg, bfloat* %base_scalar)
+  ret  %load
+}
+
 define  @ldnf1b_

[PATCH] D82298: [AArch64][SVE] Add bfloat16 support to load intrinsics

2020-06-24 Thread Sander de Smalen via Phabricator via cfe-commits
sdesmalen accepted this revision.
sdesmalen added a comment.

LGTM




Comment at: clang/include/clang/Basic/arm_sve.td:275
+let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+  def SVLD1_BF  : MInst<"svld1[_{2}]",  "dPc",  "b", [IsLoad], 
MemEltTyDefault, "aarch64_sve_ld1">;
+  def SVLD1_VNUM_BF : MInst<"svld1_vnum[_{2}]", "dPcl", "b", [IsLoad], 
MemEltTyDefault, "aarch64_sve_ld1">;

micro nit: doesn't match column indentation of the code around it.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82298/new/

https://reviews.llvm.org/D82298



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82298: [AArch64][SVE] Add bfloat16 support to load intrinsics

2020-06-23 Thread Francesco Petrogalli via Phabricator via cfe-commits
fpetrogalli accepted this revision.
fpetrogalli added a comment.
This revision is now accepted and ready to land.

LGTM, thanks!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82298/new/

https://reviews.llvm.org/D82298



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82298: [AArch64][SVE] Add bfloat16 support to load intrinsics

2020-06-23 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 272759.
kmclaughlin added a comment.

- Moved bfloat tests into separate files
- Added checks to the bfloat test files which test the warnings given when 
ARM_FEATURE_SVE_BF16 is omitted in the RUN line


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82298/new/

https://reviews.llvm.org/D82298

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
===
--- llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -87,6 +87,14 @@
   ret  %load
 }
 
+define  @masked_load_nxv8bf16( *%a,  %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv8bf16:
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv8bf16( *%a, i32 2,  %mask,  undef)
+  ret  %load
+}
+
 ;
 ; Masked Stores
 ;
@@ -182,6 +190,7 @@
 declare  @llvm.masked.load.nxv4f32(*, i32, , )
 declare  @llvm.masked.load.nxv4f16(*, i32, , )
 declare  @llvm.masked.load.nxv8f16(*, i32, , )
+declare  @llvm.masked.load.nxv8bf16(*, i32, , )
 
 declare void @llvm.masked.store.nxv2i64(, *, i32, )
 declare void @llvm.masked.store.nxv4i32(, *, i32, )
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -97,6 +97,23 @@
   ret  %res
 }
 
+define  @ld1rqh_bf16( %pred, bfloat* %addr) {
+; CHECK-LABEL: ld1rqh_bf16:
+; CHECK: ld1rqh { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ld1rq.nxv8bf16( %pred, bfloat* %addr)
+  ret  %res
+}
+
+define  @ld1rqh_bf16_imm( %pred, bfloat* %addr) {
+; CHECK-LABEL: ld1rqh_bf16_imm:
+; CHECK: ld1rqh { z0.h }, p0/z, [x0, #-16]
+; CHECK-NEXT: ret
+  %ptr = getelementptr inbounds bfloat, bfloat* %addr, i16 -8
+  %res = call  @llvm.aarch64.sve.ld1rq.nxv8bf16( %pred, bfloat* %ptr)
+  ret  %res
+}
+
 ;
 ; LD1RQW
 ;
@@ -208,6 +225,15 @@
   ret  %res
 }
 
+define  @ldnt1h_bf16( %pred, bfloat* %addr) {
+; CHECK-LABEL: ldnt1h_bf16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8bf16( %pred,
+ bfloat* %addr)
+  ret  %res
+}
+
 ;
 ; LDNT1W
 ;
@@ -498,6 +524,7 @@
 declare  @llvm.aarch64.sve.ld1rq.nxv4i32(, i32*)
 declare  @llvm.aarch64.sve.ld1rq.nxv2i64(, i64*)
 declare  @llvm.aarch64.sve.ld1rq.nxv8f16(, half*)
+declare  @llvm.aarch64.sve.ld1rq.nxv8bf16(, bfloat*)
 declare  @llvm.aarch64.sve.ld1rq.nxv4f32(, float*)
 declare  @llvm.aarch64.sve.ld1rq.nxv2f64(, double*)
 
@@ -506,6 +533,7 @@
 declare  @llvm.aarch64.sve.ldnt1.nxv4i32(, i32*)
 declare  @llvm.aarch64.sve.ldnt1.nxv2i64(, i64*)
 declare  @llvm.aarch64.sve.ldnt1.nxv8f16(, half*)
+declare  @llvm.aarch64.sve.ldnt1.nxv8bf16(, bfloat*)
 declare  @llvm.aarch64.sve.ldnt1.nxv4f32(, float*)
 declare  @llvm.aarch64.sve.ldnt1.nxv2f64(, double*)
 
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -140,6 +140,14 @@
   ret  %load
 }
 
+define  @ldnf1h_bf16( %pg, bfloat* %a) {
+; CHECK-LABEL: ldnf1h_bf16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8bf16( %pg, bfloat* %a)
+  ret  %load
+}
+
 define  @ldnf1h_f16_inbound( %pg, half* %a) {
 ; CHECK-LABEL: ldnf1h_f16_inbound:
 ; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
@@ -151,6 +159,17 @@
   ret  %load
 }
 
+define  @ldnf1h_bf16_inbound( %pg, bfloat* %a) {
+; CHECK-LABEL: ldnf1h_bf16_inbound:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+  %base_scalable = bitcast bfloat* %a to *
+  %base = getelementptr , * %base_scalable, i64 1
+  %base_scalar = bitcast * %base to bfloat*
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8bf16( %pg, bfloat* %base_scalar)
+  ret  %load
+}
+

[PATCH] D82298: [AArch64][SVE] Add bfloat16 support to load intrinsics

2020-06-23 Thread David Sherwood via Phabricator via cfe-commits
david-arm added inline comments.



Comment at: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c:2-4
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC 
-D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC 
-D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns 
-S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC 
-D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall 
-o - %s >/dev/null 2>%t

fpetrogalli wrote:
> With @sdesmalen  we where thinking that maybe it is better to duplicate the 
> run lines to have the BF16 intrinsics tested separately:
> 
> ```
>  RUN: %clang_cc1 -D__ARM_FEATURE_SVE  ... -target-feature +sve ...
>  RUN: %clang_cc1 _DENABLE_BF16_TEST -D__ARM_FEATURE_SVE 
> -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 ... 
> -target-feature +sve -target-feature +bf16 ... 
> ```
> 
> and wrap the BF16 tests in `#ifdef ENABLE_BF16_TEST ... #endif`.
> 
> this will make sure that the non BF16 tests will be erroneously associated to 
> the BF16 flags.
> 
> Please apply these to all the run lines involving BF16 modified in this patch.
> 
Is that definite? I mean there is a difference between "we were thinking" and 
"this is how we are going to do things in future". :) Just to avoid unnecessary 
code changes that's all. I presume existing tests already written in the same 
way (committed in last week or so) would be changed too?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82298/new/

https://reviews.llvm.org/D82298



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82298: [AArch64][SVE] Add bfloat16 support to load intrinsics

2020-06-23 Thread Sander de Smalen via Phabricator via cfe-commits
sdesmalen added inline comments.



Comment at: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c:2-4
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC 
-D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC 
-D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns 
-S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC 
-D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall 
-o - %s >/dev/null 2>%t

david-arm wrote:
> fpetrogalli wrote:
> > With @sdesmalen  we where thinking that maybe it is better to duplicate the 
> > run lines to have the BF16 intrinsics tested separately:
> > 
> > ```
> >  RUN: %clang_cc1 -D__ARM_FEATURE_SVE  ... -target-feature +sve ...
> >  RUN: %clang_cc1 _DENABLE_BF16_TEST -D__ARM_FEATURE_SVE 
> > -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 ... 
> > -target-feature +sve -target-feature +bf16 ... 
> > ```
> > 
> > and wrap the BF16 tests in `#ifdef ENABLE_BF16_TEST ... #endif`.
> > 
> > this will make sure that the non BF16 tests will be erroneously associated 
> > to the BF16 flags.
> > 
> > Please apply these to all the run lines involving BF16 modified in this 
> > patch.
> > 
> Is that definite? I mean there is a difference between "we were thinking" and 
> "this is how we are going to do things in future". :) Just to avoid 
> unnecessary code changes that's all. I presume existing tests already written 
> in the same way (committed in last week or so) would be changed too?
The other bfloat test are currently in a separate file (suffixed `-bfloat.c`). 
@fpetrogalli and I indeed discussed we could do this all in the same file using 
`#ifdef`s, but for now I'd actually prefer we stick with the approach we have 
gone down (specific test file for bfloat) until we've changed this for existing 
tests (in a separate patch).

So for now just move these tests to a separate file and please also add RUN 
lines like we've done for the SVE2 tests to check that we get diagnostics if 
`+sve` is passed (without `+bf16`).
(This actually hasn't been done yet for some of the newly introduced bfloat 
tests, so we'll need to fix that)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82298/new/

https://reviews.llvm.org/D82298



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82298: [AArch64][SVE] Add bfloat16 support to load intrinsics

2020-06-22 Thread Francesco Petrogalli via Phabricator via cfe-commits
fpetrogalli requested changes to this revision.
fpetrogalli added inline comments.
This revision now requires changes to proceed.



Comment at: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c:2-4
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC 
-D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC 
-D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns 
-S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC 
-D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall 
-o - %s >/dev/null 2>%t

With @sdesmalen  we where thinking that maybe it is better to duplicate the run 
lines to have the BF16 intrinsics tested separately:

```
 RUN: %clang_cc1 -D__ARM_FEATURE_SVE  ... -target-feature +sve ...
 RUN: %clang_cc1 _DENABLE_BF16_TEST -D__ARM_FEATURE_SVE 
-D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 ... 
-target-feature +sve -target-feature +bf16 ... 
```

and wrap the BF16 tests in `#ifdef ENABLE_BF16_TEST ... #endif`.

this will make sure that the non BF16 tests will be erroneously associated to 
the BF16 flags.

Please apply these to all the run lines involving BF16 modified in this patch.



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82298/new/

https://reviews.llvm.org/D82298



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82298: [AArch64][SVE] Add bfloat16 support to load intrinsics

2020-06-22 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, c-rhodes, efriedma, stuij, fpetrogalli, 
david-arm.
Herald added subscribers: llvm-commits, cfe-commits, danielkiss, psnobl, 
rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added projects: clang, LLVM.

Bfloat16 support added for the following intrinsics:

- LD1
- LD1RQ
- LDNT1
- LDNF1
- LDFF1


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82298

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
  llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll

Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
===
--- llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -87,6 +87,14 @@
   ret  %load
 }
 
+define  @masked_load_nxv8bf16( *%a,  %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv8bf16:
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.masked.load.nxv8bf16( *%a, i32 2,  %mask,  undef)
+  ret  %load
+}
+
 ;
 ; Masked Stores
 ;
@@ -182,6 +190,7 @@
 declare  @llvm.masked.load.nxv4f32(*, i32, , )
 declare  @llvm.masked.load.nxv4f16(*, i32, , )
 declare  @llvm.masked.load.nxv8f16(*, i32, , )
+declare  @llvm.masked.load.nxv8bf16(*, i32, , )
 
 declare void @llvm.masked.store.nxv2i64(, *, i32, )
 declare void @llvm.masked.store.nxv4i32(, *, i32, )
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -97,6 +97,23 @@
   ret  %res
 }
 
+define  @ld1rqh_bf16( %pred, bfloat* %addr) {
+; CHECK-LABEL: ld1rqh_bf16:
+; CHECK: ld1rqh { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ld1rq.nxv8bf16( %pred, bfloat* %addr)
+  ret  %res
+}
+
+define  @ld1rqh_bf16_imm( %pred, bfloat* %addr) {
+; CHECK-LABEL: ld1rqh_bf16_imm:
+; CHECK: ld1rqh { z0.h }, p0/z, [x0, #-16]
+; CHECK-NEXT: ret
+  %ptr = getelementptr inbounds bfloat, bfloat* %addr, i16 -8
+  %res = call  @llvm.aarch64.sve.ld1rq.nxv8bf16( %pred, bfloat* %ptr)
+  ret  %res
+}
+
 ;
 ; LD1RQW
 ;
@@ -208,6 +225,15 @@
   ret  %res
 }
 
+define  @ldnt1h_bf16( %pred, bfloat* %addr) {
+; CHECK-LABEL: ldnt1h_bf16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8bf16( %pred,
+ bfloat* %addr)
+  ret  %res
+}
+
 ;
 ; LDNT1W
 ;
@@ -474,6 +500,7 @@
 declare  @llvm.aarch64.sve.ld1rq.nxv4i32(, i32*)
 declare  @llvm.aarch64.sve.ld1rq.nxv2i64(, i64*)
 declare  @llvm.aarch64.sve.ld1rq.nxv8f16(, half*)
+declare  @llvm.aarch64.sve.ld1rq.nxv8bf16(, bfloat*)
 declare  @llvm.aarch64.sve.ld1rq.nxv4f32(, float*)
 declare  @llvm.aarch64.sve.ld1rq.nxv2f64(, double*)
 
@@ -482,6 +509,7 @@
 declare  @llvm.aarch64.sve.ldnt1.nxv4i32(, i32*)
 declare  @llvm.aarch64.sve.ldnt1.nxv2i64(, i64*)
 declare  @llvm.aarch64.sve.ldnt1.nxv8f16(, half*)
+declare  @llvm.aarch64.sve.ldnt1.nxv8bf16(, bfloat*)
 declare  @llvm.aarch64.sve.ldnt1.nxv4f32(, float*)
 declare  @llvm.aarch64.sve.ldnt1.nxv2f64(, double*)
 
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -140,6 +140,14 @@
   ret  %load
 }
 
+define  @ldnf1h_bf16( %pg, bfloat* %a) {
+; CHECK-LABEL: ldnf1h_bf16:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+  %load = call  @llvm.aarch64.sve.ldnf1.nxv8bf16( %pg, bfloat* %a)
+  ret  %load
+}
+
 define  @ldnf1h_f16_inbound( %pg, half* %a) {
 ; CHECK-LABEL: ldnf1h_f16_inbound:
 ; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
@@ -151,6 +159,17 @@
   ret  %load
 }
 
+define  @ldnf1h_bf16_inbound( %pg, bfloat* %a) {
+; CHECK-LABEL: ldnf1h_bf16_inbound:
+; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+  %base_scalable = bitcast bfloat* %a to *
+  %base = getelementptr , * %base_scalable, i64 1
+  %base_scalar = bitcast * %base to bfloat*
+  %load = call  @llvm.aarch6