[PATCH] D114540: Big-endian version of vpermxor

2021-11-30 Thread Tarique Islam via Phabricator via cfe-commits
tislam updated this revision to Diff 390683.
tislam marked 2 inline comments as done.
tislam added a comment.

- The P9  RUN step on big-endian platform is 
removed from the lit test.
- Fixed formatting in `llvm/lib/Target/PowerPC/PPCInstrVSX.td`


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114540/new/

https://reviews.llvm.org/D114540

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/test/CodeGen/builtins-ppc-crypto.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll

Index: llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK-BE-P8
+
+define <16 x i8> @test_vpermxorb() local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_vpermxorb:
+; CHECK-LE-P8:   # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-LE-P8-NEXT:addis 4, 2, .LCPI0_1@toc@ha
+; CHECK-LE-P8-NEXT:addi 3, 3, .LCPI0_0@toc@l
+; CHECK-LE-P8-NEXT:lvx 2, 0, 3
+; CHECK-LE-P8-NEXT:addi 3, 4, .LCPI0_1@toc@l
+; CHECK-LE-P8-NEXT:lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-LE-P8-NEXT:blr
+;
+; CHECK-P9-LABEL: test_vpermxorb:
+; CHECK-P9:   # %bb.0: # %entry
+; CHECK-P9-NEXT:addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-P9-NEXT:addi 3, 3, .LCPI0_0@toc@l
+; CHECK-P9-NEXT:lxv 34, 0(3)
+; CHECK-P9-NEXT:addis 3, 2, .LCPI0_1@toc@ha
+; CHECK-P9-NEXT:addi 3, 3, .LCPI0_1@toc@l
+; CHECK-P9-NEXT:lxv 35, 0(3)
+; CHECK-P9-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-P9-NEXT:blr
+;
+; CHECK-BE-P8-LABEL: test_vpermxorb:
+; CHECK-BE-P8:   # %bb.0: # %entry
+; CHECK-BE-P8-NEXT:addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-BE-P8-NEXT:addis 4, 2, .LCPI0_1@toc@ha
+; CHECK-BE-P8-NEXT:addi 3, 3, .LCPI0_0@toc@l
+; CHECK-BE-P8-NEXT:addi 4, 4, .LCPI0_1@toc@l
+; CHECK-BE-P8-NEXT:lxvw4x 34, 0, 3
+; CHECK-BE-P8-NEXT:lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-BE-P8-NEXT:blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> , <16 x i8> , <16 x i8> )
+  ret <16 x i8> %0
+}
+
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8>, <16 x i8>, <16 x i8>)
+
+define <8 x i16> @test_vpermxorh() local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_vpermxorh:
+; CHECK-LE-P8:   # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:addis 3, 2, .LCPI1_0@toc@ha
+; CHECK-LE-P8-NEXT:addis 4, 2, .LCPI1_1@toc@ha
+; CHECK-LE-P8-NEXT:addi 3, 3, .LCPI1_0@toc@l
+; CHECK-LE-P8-NEXT:lvx 2, 0, 3
+; CHECK-LE-P8-NEXT:addi 3, 4, .LCPI1_1@toc@l
+; CHECK-LE-P8-NEXT:lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-LE-P8-NEXT:blr
+;
+; CHECK-P9-LABEL: test_vpermxorh:
+; CHECK-P9:   # %bb.0: # %entry
+; CHECK-P9-NEXT:addis 3, 2, .LCPI1_0@toc@ha
+; CHECK-P9-NEXT:addi 3, 3, .LCPI1_0@toc@l
+; CHECK-P9-NEXT:lxv 34, 0(3)
+; CHECK-P9-NEXT:addis 3, 2, .LCPI1_1@toc@ha
+; CHECK-P9-NEXT:addi 3, 3, .LCPI1_1@toc@l
+; CHECK-P9-NEXT:lxv 35, 0(3)
+; CHECK-P9-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-P9-NEXT:blr
+;
+; CHECK-BE-P8-LABEL: test_vpermxorh:
+; CHECK-BE-P8:   # %bb.0: # %entry
+; CHECK-BE-P8-NEXT:addis 3, 2, .LCPI1_0@toc@ha
+; CHECK-BE-P8-NEXT:addis 4, 2, .LCPI1_1@toc@ha
+; CHECK-BE-P8-NEXT:addi 3, 3, .LCPI1_0@toc@l
+; CHECK-BE-P8-NEXT:addi 4, 4, .LCPI1_1@toc@l
+; CHECK-BE-P8-NEXT:lxvw4x 34, 0, 3
+; CHECK-BE-P8-NEXT:lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-BE-P8-NEXT:blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> , <16 x i8> , <16 x i8> )
+  %1 = bitcast <16 x i8> %0 to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vpermxorw() local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_vpermxorw:
+; CHECK-LE-P8:   # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:addis 3, 2, .LCPI2_0@toc@ha
+; CHECK-LE-P8-NEXT:addis 4, 2, .LCPI2_1@toc@ha
+; CHECK-LE-P8-NEXT:addi 3, 3, .LCPI2_0@toc@l
+; CHECK-LE-P8-NEXT:lvx 2, 0, 3
+; CHECK-LE-P8-NEXT:addi 3, 4, .LCPI2_1@toc@l
+; CHECK-LE-P8-NEXT:lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-LE-P8-NEXT:blr
+;
+; CHECK-P9-LABEL: test_vpermxorw:
+; CHECK-P9:   # %bb.0: # %entry
+; CHECK-P9-NEXT:addis 3, 2, .LCPI2_0@toc@ha
+; CHECK-P9-NEXT:addi 3, 3, 

[PATCH] D114540: Big-endian version of vpermxor

2021-11-25 Thread Tarique Islam via Phabricator via cfe-commits
tislam updated this revision to Diff 389775.
tislam edited the summary of this revision.
tislam added a comment.

- Added test for `vpermxor_be` in `clang/test/CodeGen/builtins-ppc-crypto.c`.
- Placed `vpermxor_be` under `[HasVSX, HasP8Altivec]` in 
`llvm/lib/Target/PowerPC/PPCInstrVSX.td`.
- Updated test `llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll`.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114540/new/

https://reviews.llvm.org/D114540

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/test/CodeGen/builtins-ppc-crypto.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll

Index: llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
@@ -0,0 +1,167 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK-BE-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK-P9
+
+define <16 x i8> @test_vpermxorb() local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_vpermxorb:
+; CHECK-LE-P8:   # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-LE-P8-NEXT:addis 4, 2, .LCPI0_1@toc@ha
+; CHECK-LE-P8-NEXT:addi 3, 3, .LCPI0_0@toc@l
+; CHECK-LE-P8-NEXT:lvx 2, 0, 3
+; CHECK-LE-P8-NEXT:addi 3, 4, .LCPI0_1@toc@l
+; CHECK-LE-P8-NEXT:lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-LE-P8-NEXT:blr
+;
+; CHECK-P9-LABEL: test_vpermxorb:
+; CHECK-P9:   # %bb.0: # %entry
+; CHECK-P9-NEXT:addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-P9-NEXT:addi 3, 3, .LCPI0_0@toc@l
+; CHECK-P9-NEXT:lxv 34, 0(3)
+; CHECK-P9-NEXT:addis 3, 2, .LCPI0_1@toc@ha
+; CHECK-P9-NEXT:addi 3, 3, .LCPI0_1@toc@l
+; CHECK-P9-NEXT:lxv 35, 0(3)
+; CHECK-P9-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-P9-NEXT:blr
+;
+; CHECK-BE-P8-LABEL: test_vpermxorb:
+; CHECK-BE-P8:   # %bb.0: # %entry
+; CHECK-BE-P8-NEXT:addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-BE-P8-NEXT:addis 4, 2, .LCPI0_1@toc@ha
+; CHECK-BE-P8-NEXT:addi 3, 3, .LCPI0_0@toc@l
+; CHECK-BE-P8-NEXT:addi 4, 4, .LCPI0_1@toc@l
+; CHECK-BE-P8-NEXT:lxvw4x 34, 0, 3
+; CHECK-BE-P8-NEXT:lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-BE-P8-NEXT:blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> , <16 x i8> , <16 x i8> )
+  ret <16 x i8> %0
+}
+
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8>, <16 x i8>, <16 x i8>)
+
+define <8 x i16> @test_vpermxorh() local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_vpermxorh:
+; CHECK-LE-P8:   # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:addis 3, 2, .LCPI1_0@toc@ha
+; CHECK-LE-P8-NEXT:addis 4, 2, .LCPI1_1@toc@ha
+; CHECK-LE-P8-NEXT:addi 3, 3, .LCPI1_0@toc@l
+; CHECK-LE-P8-NEXT:lvx 2, 0, 3
+; CHECK-LE-P8-NEXT:addi 3, 4, .LCPI1_1@toc@l
+; CHECK-LE-P8-NEXT:lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-LE-P8-NEXT:blr
+;
+; CHECK-P9-LABEL: test_vpermxorh:
+; CHECK-P9:   # %bb.0: # %entry
+; CHECK-P9-NEXT:addis 3, 2, .LCPI1_0@toc@ha
+; CHECK-P9-NEXT:addi 3, 3, .LCPI1_0@toc@l
+; CHECK-P9-NEXT:lxv 34, 0(3)
+; CHECK-P9-NEXT:addis 3, 2, .LCPI1_1@toc@ha
+; CHECK-P9-NEXT:addi 3, 3, .LCPI1_1@toc@l
+; CHECK-P9-NEXT:lxv 35, 0(3)
+; CHECK-P9-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-P9-NEXT:blr
+;
+; CHECK-BE-P8-LABEL: test_vpermxorh:
+; CHECK-BE-P8:   # %bb.0: # %entry
+; CHECK-BE-P8-NEXT:addis 3, 2, .LCPI1_0@toc@ha
+; CHECK-BE-P8-NEXT:addis 4, 2, .LCPI1_1@toc@ha
+; CHECK-BE-P8-NEXT:addi 3, 3, .LCPI1_0@toc@l
+; CHECK-BE-P8-NEXT:addi 4, 4, .LCPI1_1@toc@l
+; CHECK-BE-P8-NEXT:lxvw4x 34, 0, 3
+; CHECK-BE-P8-NEXT:lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:vpermxor 2, 3, 2, 2
+; CHECK-BE-P8-NEXT:blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> , <16 x i8> , <16 x i8> )
+  %1 = bitcast <16 x i8> %0 to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vpermxorw() local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_vpermxorw:
+; CHECK-LE-P8:   # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:addis 3, 2, .LCPI2_0@toc@ha
+; CHECK-LE-P8-NEXT:addis 4, 2, .LCPI2_1@toc@ha
+; CHECK-LE-P8-NEXT:addi 3, 3, .LCPI2_0@toc@l
+; CHECK-LE-P8-NEXT:lvx 2, 0, 3
+; CHECK-LE-P8-NEXT:addi 3, 4, .LCPI2_1@toc@l
+; CHECK-LE-P8-NEXT:lvx 3, 0, 3
+; 

[PATCH] D114540: Big-endian version of vpermxor

2021-11-24 Thread Tarique Islam via Phabricator via cfe-commits
tislam created this revision.
tislam added a reviewer: nemanjai.
Herald added subscribers: kbarton, hiraditya.
tislam requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

A big-endian version of `vpermxor`, named `vpermxor_be`, is added to LLVM and 
Clang. `vpermxor_be` can be called directly on a little-endian platform.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D114540

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll

Index: llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
@@ -0,0 +1,94 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpermxorb() #0 {
+entry:
+  %a = alloca <16 x i8>, align 16
+  %b = alloca <16 x i8>, align 16
+  %c = alloca <16 x i8>, align 16
+  store <16 x i8> , <16 x i8>* %a, align 16
+  store <16 x i8> , <16 x i8>* %b, align 16
+  store <16 x i8> , <16 x i8>* %c, align 16
+  %0 = load <16 x i8>,  <16 x i8>* %a, align 16
+  %1 = load <16 x i8>,  <16 x i8>* %b, align 16
+  %2 = load <16 x i8>,  <16 x i8>* %c, align 16
+  %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  ret <16 x i8> %3
+; CHECK-NOT: xxlnor
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8>, <16 x i8>, <16 x i8>) #1
+
+; Function Attrs: nounwind
+define <8 x i16> @test_vpermxorh() #0 {
+entry:
+  %a = alloca <8 x i16>, align 16
+  %b = alloca <8 x i16>, align 16
+  %c = alloca <8 x i16>, align 16
+  store <8 x i16> , <8 x i16>* %a, align 16
+  store <8 x i16> , <8 x i16>* %b, align 16
+  store <8 x i16> , <8 x i16>* %c, align 16
+  %0 = load <8 x i16>,  <8 x i16>* %a, align 16
+  %1 = bitcast <8 x i16> %0 to <16 x i8>
+  %2 = load <8 x i16>,  <8 x i16>* %b, align 16
+  %3 = bitcast <8 x i16> %2 to <16 x i8>
+  %4 = load <8 x i16>,  <8 x i16>* %c, align 16
+  %5 = bitcast <8 x i16> %4 to <16 x i8>
+  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <8 x i16>
+  ret <8 x i16> %7
+; CHECK-NOT: xxlnor
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <4 x i32> @test_vpermxorw() #0 {
+entry:
+  %a = alloca <4 x i32>, align 16
+  %b = alloca <4 x i32>, align 16
+  %c = alloca <4 x i32>, align 16
+  store <4 x i32> , <4 x i32>* %a, align 16
+  store <4 x i32> , <4 x i32>* %b, align 16
+  store <4 x i32> , <4 x i32>* %c, align 16
+  %0 = load <4 x i32>,  <4 x i32>* %a, align 16
+  %1 = bitcast <4 x i32> %0 to <16 x i8>
+  %2 = load <4 x i32>,  <4 x i32>* %b, align 16
+  %3 = bitcast <4 x i32> %2 to <16 x i8>
+  %4 = load <4 x i32>,  <4 x i32>* %c, align 16
+  %5 = bitcast <4 x i32> %4 to <16 x i8>
+  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <4 x i32>
+  ret <4 x i32> %7
+; CHECK-NOT: xxlnor
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vpermxord() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  %c = alloca <2 x i64>, align 16
+  store <2 x i64> , <2 x i64>* %a, align 16
+  store <2 x i64> , <2 x i64>* %b, align 16
+  store <2 x i64> , <2 x i64>* %c, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = bitcast <2 x i64> %0 to <16 x i8>
+  %2 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %3 = bitcast <2 x i64> %2 to <16 x i8>
+  %4 = load <2 x i64>,  <2 x i64>* %c, align 16
+  %5 = bitcast <2 x i64> %4 to <16 x i8>
+  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <2 x i64>
+  ret <2 x i64> %7
+; CHECK-NOT: xxlnor
+; CHECK: vpermxor 2,
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td
===
--- llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2491,11 +2491,16 @@
 
 // These Altivec patterns are here because we need a VSX instruction to match
 // the intrinsic (but only for little endian system).
-let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in
+let Predicates = [HasVSX, IsLittleEndian,