[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3f5bf35f868d: [AArch64][SVE] Implement intrinsics for 
non-temporal loads  stores (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; STNT1B
+;
+
+define void @stnt1b_i8( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1b_i8:
+; CHECK: stnt1b { z0.b }, p0, [x0, #0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv16i8( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1H
+;
+
+define void @stnt1h_i16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_i16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8i16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1h_f16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_f16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8f16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1W
+;
+
+define void @stnt1w_i32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_i32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4i32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1w_f32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_f32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4f32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1D
+;
+
+define void @stnt1d_i64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_i64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2i64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1d_f64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_f64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2f64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *)
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDNT1B
+;
+
+define  @ldnt1b_i8( %pred, * %addr) {
+; CHECK-LABEL: ldnt1b_i8:
+; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv16i8( %pred,
+ * %addr)
+  ret  %res
+}
+
+;
+; LDNT1H
+;
+
+define  @ldnt1h_i16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_i16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8i16( %pred,
+ * %addr)
+  ret  %res
+}
+
+define  @ldnt1h_f16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_f16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8f16( %pred,
+  * %addr)
+  ret  %res
+}
+
+;
+; LDNT1W
+;
+
+define  @ldnt1w_i32( %pred, * %addr) {
+; CHECK-LABEL: ldnt1w_i32:
+; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2]
+; CHECK-NEXT: 

[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-11 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 233298.
kmclaughlin marked an inline comment as done.
kmclaughlin added a comment.

- Changed 'Offset' value used by getMaskedLoad & getMaskedStore to scalar type


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; STNT1B
+;
+
+define void @stnt1b_i8( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1b_i8:
+; CHECK: stnt1b { z0.b }, p0, [x0, #0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv16i8( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1H
+;
+
+define void @stnt1h_i16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_i16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8i16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1h_f16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_f16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8f16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1W
+;
+
+define void @stnt1w_i32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_i32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4i32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1w_f32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_f32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4f32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1D
+;
+
+define void @stnt1d_i64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_i64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2i64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1d_f64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_f64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2f64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *)
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDNT1B
+;
+
+define  @ldnt1b_i8( %pred, * %addr) {
+; CHECK-LABEL: ldnt1b_i8:
+; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv16i8( %pred,
+ * %addr)
+  ret  %res
+}
+
+;
+; LDNT1H
+;
+
+define  @ldnt1h_i16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_i16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8i16( %pred,
+ * %addr)
+  ret  %res
+}
+
+define  @ldnt1h_f16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_f16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8f16( %pred,
+  * %addr)
+  ret  %res
+}
+
+;
+; LDNT1W
+;
+
+define  @ldnt1w_i32( %pred, * %addr) {
+; CHECK-LABEL: ldnt1w_i32:
+; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  %res = call  

[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-09 Thread Eli Friedman via Phabricator via cfe-commits
efriedma accepted this revision.
efriedma added a comment.
This revision is now accepted and ready to land.

LGTM




Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:10800
+MINode->getOperand(3), DAG.getUNDEF(LoadVT),
+MINode->getOperand(2), PassThru,
+MINode->getMemoryVT(), MINode->getMemOperand(),

Offset (the fifth argument) is supposed to be a scalar, not a vector.  Probably 
nothing actually checks that for loads that aren't pre/post-indexed.  (Same 
applies to MSTORE.)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-09 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin marked 2 inline comments as done.
kmclaughlin added inline comments.



Comment at: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:6587
+  else if (!Size)
+Size = MemVT.getStoreSize().getKnownMinSize();
 

efriedma wrote:
> In order for alias analysis to correctly handle a MachineMemOperand, the 
> "Size" of an operation has to be conservative, in the sense that the the 
> number of bytes accessed must be at most "Size". Otherwise we'll assume two 
> operations don't alias when they actually do.
> 
> For a scaled vector, we don't know the size, so we have to conservatively 
> pass "MemoryLocation::UnknownSize".
Thanks @efriedma, I have changed this to use MemoryLocation::UnknownSize


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-09 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 232870.
kmclaughlin added a comment.

- Set 'Size' to MemoryLocation::UnknownSize for scalable vectors in 
getMemIntrinsicNode
- Ensure MLOAD zeroes inactive lanes by using a zero value for the PassThru in 
getMaskedLoad


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; STNT1B
+;
+
+define void @stnt1b_i8( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1b_i8:
+; CHECK: stnt1b { z0.b }, p0, [x0, #0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv16i8( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1H
+;
+
+define void @stnt1h_i16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_i16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8i16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1h_f16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_f16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8f16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1W
+;
+
+define void @stnt1w_i32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_i32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4i32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1w_f32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_f32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4f32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1D
+;
+
+define void @stnt1d_i64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_i64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2i64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1d_f64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_f64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2f64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *)
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDNT1B
+;
+
+define  @ldnt1b_i8( %pred, * %addr) {
+; CHECK-LABEL: ldnt1b_i8:
+; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv16i8( %pred,
+ * %addr)
+  ret  %res
+}
+
+;
+; LDNT1H
+;
+
+define  @ldnt1h_i16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_i16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8i16( %pred,
+ * %addr)
+  ret  %res
+}
+
+define  @ldnt1h_f16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_f16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8f16( %pred,
+  * %addr)
+  ret  %res
+}
+
+;
+; LDNT1W
+;
+
+define  @ldnt1w_i32( %pred, * %addr) {
+; CHECK-LABEL: ldnt1w_i32:
+; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, 

[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-04 Thread Eli Friedman via Phabricator via cfe-commits
efriedma added inline comments.



Comment at: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:6587
+  else if (!Size)
+Size = MemVT.getStoreSize().getKnownMinSize();
 

In order for alias analysis to correctly handle a MachineMemOperand, the "Size" 
of an operation has to be conservative, in the sense that the the number of 
bytes accessed must be at most "Size". Otherwise we'll assume two operations 
don't alias when they actually do.

For a scaled vector, we don't know the size, so we have to conservatively pass 
"MemoryLocation::UnknownSize".


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-04 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 232072.
kmclaughlin edited the summary of this revision.
kmclaughlin added a comment.

- Removed AArch64 specific ISDNodes for MLOAD & MSTORE


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71000/new/

https://reviews.llvm.org/D71000

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; STNT1B
+;
+
+define void @stnt1b_i8( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1b_i8:
+; CHECK: stnt1b { z0.b }, p0, [x0, #0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv16i8( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1H
+;
+
+define void @stnt1h_i16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_i16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8i16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1h_f16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_f16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8f16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1W
+;
+
+define void @stnt1w_i32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_i32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4i32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1w_f32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_f32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4f32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1D
+;
+
+define void @stnt1d_i64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_i64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2i64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1d_f64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_f64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2f64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *)
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDNT1B
+;
+
+define  @ldnt1b_i8( %pred, * %addr) {
+; CHECK-LABEL: ldnt1b_i8:
+; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv16i8( %pred,
+ * %addr)
+  ret  %res
+}
+
+;
+; LDNT1H
+;
+
+define  @ldnt1h_i16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_i16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8i16( %pred,
+ * %addr)
+  ret  %res
+}
+
+define  @ldnt1h_f16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_f16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8f16( %pred,
+  * %addr)
+  ret  %res
+}
+
+;
+; LDNT1W
+;
+
+define  @ldnt1w_i32( %pred, * %addr) {
+; CHECK-LABEL: ldnt1w_i32:
+; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv4i32( 

[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores

2019-12-04 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, paulwalker-arm, dancgr, mgudim, 
efriedma.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Adds the following intrinsics:

- llvm.aarch64.sve.ldnt1
- llvm.aarch64.sve.stnt1

This patch also adds the MLOAD & MSTORE AArch64ISD nodes, setting
the MONonTemporal flag when used with the intrinsics above.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D71000

Files:
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; STNT1B
+;
+
+define void @stnt1b_i8( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1b_i8:
+; CHECK: stnt1b { z0.b }, p0, [x0, #0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv16i8( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1H
+;
+
+define void @stnt1h_i16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_i16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8i16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1h_f16( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1h_f16:
+; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv8f16( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1W
+;
+
+define void @stnt1w_i32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_i32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4i32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1w_f32( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1w_f32:
+; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv4f32( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+;
+; STNT1D
+;
+
+define void @stnt1d_i64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_i64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2i64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+define void @stnt1d_f64( %data,  %pred, * %addr) {
+; CHECK-LABEL: stnt1d_f64:
+; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.stnt1.nxv2f64( %data,
+ %pred,
+* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *)
+declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *)
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; LDNT1B
+;
+
+define  @ldnt1b_i8( %pred, * %addr) {
+; CHECK-LABEL: ldnt1b_i8:
+; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv16i8( %pred,
+ * %addr)
+  ret  %res
+}
+
+;
+; LDNT1H
+;
+
+define  @ldnt1h_i16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_i16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8i16( %pred,
+ * %addr)
+  ret  %res
+}
+
+define  @ldnt1h_f16( %pred, * %addr) {
+; CHECK-LABEL: ldnt1h_f16:
+; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
+; CHECK-NEXT: ret
+  %res = call  @llvm.aarch64.sve.ldnt1.nxv8f16(