[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-30 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG4195ed995993: [PowerPC] Improved codegen related to 
xscvdpsxws/xscvdpuxws (authored by Conanap).

Changed prior to commit:
  https://reviews.llvm.org/D109902?vs=375780=376327#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/test-vector-insert.ll
  llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpuxws v2, v2
+; CHECK-P8-NEXT:xscvdpuxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -26,26 +22,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpuxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpuxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpuxws v3, v2
+; CHECK-P9-NEXT:xscvdpuxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpuxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpuxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpuxws v3, v2
+; CHECK-BE-NEXT:xscvdpuxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
@@ -305,12 +293,8 @@
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpsxws f1, v2
-; CHECK-P8-NEXT:xscvdpsxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -318,26 +302,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpsxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpsxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpsxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpsxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
Index: llvm/test/CodeGen/PowerPC/test-vector-insert.ll
===
--- llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -17,8 +17,8 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
 ; RUN:  -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-BE-P9
-; xscvdpsxws and uxws is only available on Power7 and above
-; Codgen is different for LE Power7 and Power8
+; xscvdpsxws and xscvdpsxws is only available on Power7 and above
+; Codgen is different for Power7, Power8, and Power9.
 
 define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-LE-P7-LABEL: test:
@@ -38,20 +38,16 @@
 ;
 ; CHECK-LE-P8-LABEL: test:
 ; CHECK-LE-P8:   # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:xscvdpsxws f0, f1
+; CHECK-LE-P8-NEXT:xscvdpsxws v3, f1
 ; CHECK-LE-P8-NEXT:addis r3, 

[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-30 Thread Amy Kwan via Phabricator via cfe-commits
amyk added a comment.

Additional nits regarding comments.




Comment at: llvm/test/CodeGen/PowerPC/test-vector-insert.ll:20
 ; RUN:  -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-BE-P9
 ; xscvdpsxws and uxws is only available on Power7 and above
 ; Codgen is different for LE Power7 and Power8





Comment at: llvm/test/CodeGen/PowerPC/test-vector-insert.ll:21
 ; xscvdpsxws and uxws is only available on Power7 and above
 ; Codgen is different for LE Power7 and Power8
 




CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-30 Thread Nemanja Ivanovic via Phabricator via cfe-commits
nemanjai accepted this revision.
nemanjai added a comment.
This revision is now accepted and ready to land.

LGTM.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-28 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 375780.
Conanap marked an inline comment as done.
Conanap added a comment.

Updated correct version of the patch


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/test-vector-insert.ll
  llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpuxws v2, v2
+; CHECK-P8-NEXT:xscvdpuxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -26,26 +22,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpuxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpuxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpuxws v3, v2
+; CHECK-P9-NEXT:xscvdpuxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpuxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpuxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpuxws v3, v2
+; CHECK-BE-NEXT:xscvdpuxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
@@ -305,12 +293,8 @@
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpsxws f1, v2
-; CHECK-P8-NEXT:xscvdpsxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -318,26 +302,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpsxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpsxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpsxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpsxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
Index: llvm/test/CodeGen/PowerPC/test-vector-insert.ll
===
--- llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -38,20 +38,16 @@
 ;
 ; CHECK-LE-P8-LABEL: test:
 ; CHECK-LE-P8:   # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:xscvdpsxws f0, f1
+; CHECK-LE-P8-NEXT:xscvdpsxws v3, f1
 ; CHECK-LE-P8-NEXT:addis r3, r2, .LCPI0_0@toc@ha
 ; CHECK-LE-P8-NEXT:addi r3, r3, .LCPI0_0@toc@l
-; CHECK-LE-P8-NEXT:lvx v3, 0, r3
-; CHECK-LE-P8-NEXT:mffprwz r4, f0
-; CHECK-LE-P8-NEXT:mtvsrwz v4, r4
-; CHECK-LE-P8-NEXT:vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT:lvx v4, 0, r3
+; CHECK-LE-P8-NEXT:vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:blr
 ;
 ; CHECK-LE-P9-LABEL: test:
 ; CHECK-LE-P9:   # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:xscvdpsxws f0, f1
-; CHECK-LE-P9-NEXT:mffprwz r3, f0
-; CHECK-LE-P9-NEXT:mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:xxinsertw v2, vs0, 0
 ; CHECK-LE-P9-NEXT:blr
 ;
@@ -70,9 +66,7 @@
 ;
 ; CHECK-BE-P8-LABEL: test:
 ; CHECK-BE-P8:   # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:xscvdpsxws f0, f1
-; 

[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-28 Thread Nemanja Ivanovic via Phabricator via cfe-commits
nemanjai requested changes to this revision.
nemanjai added a comment.
This revision now requires changes to proceed.

I really don't understand what happened now. It seems that you have simply 
reverted to an older version of this patch. The test case appears to not have 
been pre-committed any longer, the Power9 patterns and test cases are gone. 
What has happened here?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-28 Thread Albion Fung via Phabricator via cfe-commits
Conanap marked 2 inline comments as done.
Conanap added inline comments.



Comment at: llvm/lib/Target/PowerPC/PPCInstrVSX.td:4150
+0))>;
+def : Pat<(v4i32 (insertelt v4i32:$VEC, DblToUInt.A, 0)),
+  (v4i32 (XXINSERTW v4i32:$VEC,

nemanjai wrote:
> What am I missing here? It appears to me that this pattern is exactly the 
> same as the one above it. The same appears to be the case for all of these. 
> Also, why the change in naming convention?
This one is for the unsigned version, the above is for the signed version. 
Originally, using `DblToInt.A` gave me problems as `$A` are now used in two 
places, so I've changed the other variable names instead. I'll use `DblToInt.B` 
instead to match the convention, thanks!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-28 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 375667.
Conanap added a comment.

Added context, changed var names in aptterns


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/test-vector-insert.ll
  llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpuxws v2, v2
+; CHECK-P8-NEXT:xscvdpuxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -26,26 +22,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpuxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpuxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpuxws v3, v2
+; CHECK-P9-NEXT:xscvdpuxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpuxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpuxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpuxws v3, v2
+; CHECK-BE-NEXT:xscvdpuxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
@@ -305,12 +293,8 @@
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpsxws f1, v2
-; CHECK-P8-NEXT:xscvdpsxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -318,26 +302,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpsxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpsxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpsxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpsxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
Index: llvm/test/CodeGen/PowerPC/test-vector-insert.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-LE-P7
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-BE-P7
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-BE-P8
+; xscvdpsxws and uxws is only available on Power7 and above
+; Codgen is different for LE Power7 and Power8
+
+define dso_local <4 x i32> @test(<4 x 

[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-28 Thread Nemanja Ivanovic via Phabricator via cfe-commits
nemanjai requested changes to this revision.
nemanjai added a comment.
This revision now requires changes to proceed.

I cannot tell without context whether the codegen changes for the `f32->i32` 
convert + insert case (for example `@test2` in 
`test/CodeGen/PowerPC/test-vector-insert.ll`) but it should change.




Comment at: llvm/lib/Target/PowerPC/PPCInstrVSX.td:4145
   (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$VEC, DblToInt.A, 0)),
+  (v4i32 (XXINSERTW v4i32:$VEC,

I assume this is the big endian Power9 block (and the below is the little 
endian). However, I can't confirm that since the context for the patch has 
disappeared somehow.



Comment at: llvm/lib/Target/PowerPC/PPCInstrVSX.td:4150
+0))>;
+def : Pat<(v4i32 (insertelt v4i32:$VEC, DblToUInt.A, 0)),
+  (v4i32 (XXINSERTW v4i32:$VEC,

What am I missing here? It appears to me that this pattern is exactly the same 
as the one above it. The same appears to be the case for all of these. Also, 
why the change in naming convention?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-24 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 374906.
Conanap added a comment.

This modifies a test case introduced in this commit: 
https://github.com/llvm/llvm-project/commit/3678df5ae6618eec656ae0ea0dab3be09d73bc9a


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/test-vector-insert.ll
  llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpuxws v2, v2
+; CHECK-P8-NEXT:xscvdpuxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -26,26 +22,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpuxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpuxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpuxws v3, v2
+; CHECK-P9-NEXT:xscvdpuxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpuxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpuxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpuxws v3, v2
+; CHECK-BE-NEXT:xscvdpuxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
@@ -305,12 +293,8 @@
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpsxws f1, v2
-; CHECK-P8-NEXT:xscvdpsxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -318,26 +302,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpsxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpsxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpsxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpsxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
Index: llvm/test/CodeGen/PowerPC/test-vector-insert.ll
===
--- llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -38,20 +38,16 @@
 ;
 ; CHECK-LE-P8-LABEL: test:
 ; CHECK-LE-P8:   # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:xscvdpsxws f0, f1
+; CHECK-LE-P8-NEXT:xscvdpsxws v3, f1
 ; CHECK-LE-P8-NEXT:addis r3, r2, .LCPI0_0@toc@ha
 ; CHECK-LE-P8-NEXT:addi r3, r3, .LCPI0_0@toc@l
-; CHECK-LE-P8-NEXT:lvx v3, 0, r3
-; CHECK-LE-P8-NEXT:mffprwz r4, f0
-; CHECK-LE-P8-NEXT:mtvsrwz v4, r4
-; CHECK-LE-P8-NEXT:vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT:lvx v4, 0, r3
+; CHECK-LE-P8-NEXT:vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:blr
 ;
 ; CHECK-LE-P9-LABEL: test:
 ; CHECK-LE-P9:   # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:xscvdpsxws f0, f1
-; CHECK-LE-P9-NEXT:mffprwz r3, f0
-; CHECK-LE-P9-NEXT:mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:xxinsertw v2, vs0, 0
 ; CHECK-LE-P9-NEXT:blr
 ;
@@ -70,9 +66,7 @@
 ;
 ; CHECK-BE-P8-LABEL: test:
 ; CHECK-BE-P8:   # %bb.0: 

[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-23 Thread Nemanja Ivanovic via Phabricator via cfe-commits
nemanjai requested changes to this revision.
nemanjai added a comment.
This revision now requires changes to proceed.

There isn't enough information in the test to determine whether this adequately 
improves code generation (in fact, whether it improves code generation at all).




Comment at: llvm/test/CodeGen/PowerPC/test-vector-insert.ll:1
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \

Test cases where we improve code generation should be pre-committed as an NFC 
change so that the review shows just the differences in code generation. It is 
not easy to evaluate whether the patch improves the code or not without seeing 
how it changed.



Comment at: llvm/test/CodeGen/PowerPC/test-vector-insert.ll:5
+; RUN:  -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \

Code generation on Power9 is important as well. Please add that test.



Comment at: llvm/test/CodeGen/PowerPC/test-vector-insert.ll:15
+; xscvdpsxws and uxws is only available on Power7 and above
+; Codgen is different for LE Power7 and Power8
+

nit: s/Codgen/Codegen


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-23 Thread Amy Kwan via Phabricator via cfe-commits
amyk accepted this revision as: amyk.
amyk added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks for addressing comments.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-23 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 374614.
Conanap added a comment.

Added P7  and P8  run 
lines for BE


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/test-vector-insert.ll
  llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpuxws v2, v2
+; CHECK-P8-NEXT:xscvdpuxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -26,26 +22,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpuxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpuxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpuxws v3, v2
+; CHECK-P9-NEXT:xscvdpuxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpuxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpuxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpuxws v3, v2
+; CHECK-BE-NEXT:xscvdpuxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
@@ -305,12 +293,8 @@
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpsxws f1, v2
-; CHECK-P8-NEXT:xscvdpsxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -318,26 +302,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpsxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpsxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpsxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpsxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
Index: llvm/test/CodeGen/PowerPC/test-vector-insert.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-LE-P7
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-BE-P7
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-BE-P8
+; xscvdpsxws and uxws is only available on Power7 and above
+; Codgen is different for LE Power7 and 

[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-23 Thread Albion Fung via Phabricator via cfe-commits
Conanap marked 3 inline comments as done.
Conanap added inline comments.



Comment at: llvm/test/CodeGen/PowerPC/test-vector-insert.ll:8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s
+

kamaub wrote:
> It seems like this run line is redundant, it produces the same assembly as 
> the big endian specific line above. Maybe the `-mcpu=pwr7` can be moved to 
> the first Little-endian specific run line? That line currently only test the 
> target cpu of the test machine.
good point, thanks!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-23 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 374578.
Conanap added a comment.

Updated test cases, fixed a typo


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/test-vector-insert.ll
  llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpuxws v2, v2
+; CHECK-P8-NEXT:xscvdpuxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -26,26 +22,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpuxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpuxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpuxws v3, v2
+; CHECK-P9-NEXT:xscvdpuxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpuxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpuxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpuxws v3, v2
+; CHECK-BE-NEXT:xscvdpuxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
@@ -305,12 +293,8 @@
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpsxws f1, v2
-; CHECK-P8-NEXT:xscvdpsxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -318,26 +302,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpsxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpsxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpsxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpsxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
Index: llvm/test/CodeGen/PowerPC/test-vector-insert.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -0,0 +1,184 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-LE-P7
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:  -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-BE
+; xscvdpsxws and uxws is only available on Power7 and above
+; Codgen is different for LE Power7 and Power8
+
+define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
+; CHECK-LE-P8-LABEL: test:
+; CHECK-LE-P8:   # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:xscvdpsxws v3, f1
+; CHECK-LE-P8-NEXT:addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-LE-P8-NEXT:

[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-23 Thread Amy Kwan via Phabricator via cfe-commits
amyk added inline comments.



Comment at: llvm/lib/Target/PowerPC/PPCInstrVSX.td:2815
+def : Pat<(v4i32 (PPCSToV DblToUInt.A)),
+  (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPSXWS f64:$A), sub_64))>;
 defm : ScalToVecWPermute<

This should be `XSCVDPUXWS`?



Comment at: llvm/test/CodeGen/PowerPC/test-vector-insert.ll:2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; xscvdpsxws and uxws is only available on Power7 and above
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \

nit: Move this comment under the RUN lines.



Comment at: llvm/test/CodeGen/PowerPC/test-vector-insert.ll:3
+; xscvdpsxws and uxws is only available on Power7 and above
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-LE

I'm not sure why P8 is LE run only line, and P7 is BE run line only. 
Maybe we should have LE/BE run lines for both P7 and P8 for more coverage. 
Furthermore, if both the LE/BE checks end up the same, we can do `CHECK-P7` and 
`CHECK-P8`. 

Also, since this looks like it's a Linux test, please add 
`-ppc-asm-full-reg-names -ppc-vsr-nums-as-vr`.



Comment at: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll:16
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0

This is an unsigned test case, so should be `xscvdpuxws`, right?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-21 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 374059.
Conanap marked an inline comment as done.
Conanap added a comment.

Removed AIX test line as it has the same code gen as BE


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/test-vector-insert.ll
  llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -26,26 +22,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpuxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpuxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpuxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpuxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
@@ -305,12 +293,8 @@
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpsxws f1, v2
-; CHECK-P8-NEXT:xscvdpsxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -318,26 +302,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpsxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpsxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpsxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpsxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
Index: llvm/test/CodeGen/PowerPC/test-vector-insert.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; xscvdpsxws and uxws is only available on Power7 and above
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
+; CHECK-LE-LABEL: test:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:xscvdpsxws 35, 1
+; CHECK-LE-NEXT:addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-LE-NEXT:addi 3, 3, .LCPI0_0@toc@l
+; CHECK-LE-NEXT:lvx 4, 0, 3
+; CHECK-LE-NEXT:vperm 2, 3, 2, 4
+; CHECK-LE-NEXT:blr
+;
+; CHECK-BE-LABEL: test:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:xscvdpsxws 0, 1
+; CHECK-BE-NEXT:addi 3, 1, -4
+; CHECK-BE-NEXT:stfiwx 0, 0, 3
+; CHECK-BE-NEXT: 

[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-21 Thread Kamau Bridgeman via Phabricator via cfe-commits
kamaub added a reviewer: kamaub.
kamaub added a comment.

This patch seems almost ready to land to me, I'm just a bit concerned about the 
testing coverage, 
is the little endian testing case suppose to target `pwr7` as the big endian 
test does? The default 
FileCheck line seems redundant to me, did I misunderstand?




Comment at: llvm/test/CodeGen/PowerPC/test-vector-insert.ll:8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s
+

It seems like this run line is redundant, it produces the same assembly as the 
big endian specific line above. Maybe the `-mcpu=pwr7` can be moved to the 
first Little-endian specific run line? That line currently only test the target 
cpu of the test machine.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 373045.
Conanap edited the summary of this revision.
Conanap added a comment.

Removed complexity and restored a test case


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/test-vector-insert.ll
  llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -26,26 +22,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpuxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpuxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpuxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpuxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
@@ -305,12 +293,8 @@
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpsxws f1, v2
-; CHECK-P8-NEXT:xscvdpsxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -318,26 +302,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpsxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpsxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpsxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpsxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
Index: llvm/test/CodeGen/PowerPC/test-vector-insert.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; xscvdpsxws and uxws is only available on Power7 and above
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  < %s | FileCheck %s --check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s
+
+define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
+; CHECK-LE-LABEL: test:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:xscvdpsxws 35, 1
+; CHECK-LE-NEXT:addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-LE-NEXT:addi 3, 3, .LCPI0_0@toc@l
+; CHECK-LE-NEXT:lvx 4, 0, 3
+; CHECK-LE-NEXT:vperm 2, 3, 2, 4
+; CHECK-LE-NEXT:blr
+;
+; CHECK-BE-LABEL: test:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:xscvdpsxws 0, 1

[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 373009.
Conanap added a comment.

Removed unintended change, moved pattern to more appropriate location, reduced 
added complexity to 600 as it still works.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109902/new/

https://reviews.llvm.org/D109902

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/build-vector-tests.ll
  llvm/test/CodeGen/PowerPC/test-vector-insert.ll
  llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -26,26 +22,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpuxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpuxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpuxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpuxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
@@ -305,12 +293,8 @@
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpsxws f1, v2
-; CHECK-P8-NEXT:xscvdpsxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -318,26 +302,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpsxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpsxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpsxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpsxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
Index: llvm/test/CodeGen/PowerPC/test-vector-insert.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; xscvdpsxws and uxws is only available on Power7 and above
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  < %s | FileCheck %s --check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s
+
+define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
+; CHECK-LE-LABEL: test:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:xscvdpsxws 35, 1
+; CHECK-LE-NEXT:addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-LE-NEXT:addi 3, 3, .LCPI0_0@toc@l
+; CHECK-LE-NEXT:lvx 4, 0, 3
+; CHECK-LE-NEXT:vperm 2, 3, 2, 4
+; CHECK-LE-NEXT:blr
+;
+; 

[PATCH] D109902: [PowerPC] Improved codegen related to xscvdpsxws/xscvdpuxws

2021-09-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: PowerPC, nemanjai, saghir.
Conanap added projects: PowerPC, LLVM, clang.
Herald added subscribers: kbarton, hiraditya.
Conanap requested review of this revision.

This patch removes the uneccessary mf/mtvsr generated in conjunction
with xscvdpsxws/xscvdpuxws.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D109902

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/build-vector-tests.ll
  llvm/test/CodeGen/PowerPC/test-vector-insert.ll
  llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,12 +13,8 @@
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpuxws f1, v2
-; CHECK-P8-NEXT:xscvdpuxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -26,26 +22,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpuxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpuxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpuxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpuxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
@@ -305,12 +293,8 @@
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:   # %bb.0: # %entry
 ; CHECK-P8-NEXT:xxswapd vs0, v2
-; CHECK-P8-NEXT:xscvdpsxws f1, v2
-; CHECK-P8-NEXT:xscvdpsxws f0, f0
-; CHECK-P8-NEXT:mffprwz r3, f1
-; CHECK-P8-NEXT:mtvsrwz v2, r3
-; CHECK-P8-NEXT:mffprwz r4, f0
-; CHECK-P8-NEXT:mtvsrwz v3, r4
+; CHECK-P8-NEXT:xscvdpsxws v2, v2
+; CHECK-P8-NEXT:xscvdpsxws v3, f0
 ; CHECK-P8-NEXT:vmrghw v2, v2, v3
 ; CHECK-P8-NEXT:xxswapd vs0, v2
 ; CHECK-P8-NEXT:mffprd r3, f0
@@ -318,26 +302,18 @@
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9-NEXT:xscvdpsxws f0, v2
-; CHECK-P9-NEXT:mffprwz r3, f0
 ; CHECK-P9-NEXT:xxswapd vs0, v2
-; CHECK-P9-NEXT:mtvsrwz v3, r3
-; CHECK-P9-NEXT:xscvdpsxws f0, f0
-; CHECK-P9-NEXT:mffprwz r3, f0
-; CHECK-P9-NEXT:mtvsrwz v2, r3
+; CHECK-P9-NEXT:xscvdpsxws v3, v2
+; CHECK-P9-NEXT:xscvdpsxws v2, f0
 ; CHECK-P9-NEXT:vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:mfvsrld r3, v2
 ; CHECK-P9-NEXT:blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
-; CHECK-BE-NEXT:xscvdpsxws f0, v2
-; CHECK-BE-NEXT:mffprwz r3, f0
 ; CHECK-BE-NEXT:xxswapd vs0, v2
-; CHECK-BE-NEXT:mtvsrwz v3, r3
-; CHECK-BE-NEXT:xscvdpsxws f0, f0
-; CHECK-BE-NEXT:mffprwz r3, f0
-; CHECK-BE-NEXT:mtvsrwz v2, r3
+; CHECK-BE-NEXT:xscvdpsxws v3, v2
+; CHECK-BE-NEXT:xscvdpsxws v2, f0
 ; CHECK-BE-NEXT:vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:mfvsrd r3, v2
 ; CHECK-BE-NEXT:blr
Index: llvm/test/CodeGen/PowerPC/test-vector-insert.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; xscvdpsxws and uxws is only available on Power7 and above
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:  < %s | FileCheck %s --check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:  -mcpu=pwr7 < %s | FileCheck %s
+
+define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
+; CHECK-LE-LABEL: test:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:xscvdpsxws 35, 1
+; CHECK-LE-NEXT:addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-LE-NEXT:addi 3, 3, .LCPI0_0@toc@l
+; CHECK-LE-NEXT: