[PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns

2023-09-22 Thread Hongyu Wang
For vector move insns like vmovdqa/vmovdqu, their evex counterparts
requrire explicit suffix 64/32/16/8. The usage of these instruction
are prohibited under AVX10_1 or AVX512F, so for we select
vmovaps/vmovups for vector load/store insns that contains EGPR if
ther is no AVX512VL, and keep the original move insn selection
otherwise.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_get_ssemov): Check if egpr is used,
adjust mnemonic for vmovduq/vmovdqa.
* config/i386/sse.md 
(*_vinsert_0):
Check if egpr is used, adjust mnemonic for vmovdqu/vmovdqa.
(avx_vec_concat): Likewise, and separate alternative 0 to
avx_noavx512f.

Co-authored-by: Kong Lingling 
Co-authored-by: Hongtao Liu 
---
 gcc/config/i386/i386.cc | 42 +++--
 gcc/config/i386/sse.md  | 34 +++--
 2 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index ea94663eb68..5d47c2af25e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5478,6 +5478,12 @@ ix86_get_ssemov (rtx *operands, unsigned size,
   bool evex_reg_p = (size == 64
 || EXT_REX_SSE_REG_P (operands[0])
 || EXT_REX_SSE_REG_P (operands[1]));
+
+  bool egpr_p = (TARGET_APX_EGPR
+&& (x86_extended_rex2reg_mentioned_p (operands[0])
+|| x86_extended_rex2reg_mentioned_p (operands[1])));
+  bool egpr_vl = egpr_p && TARGET_AVX512VL;
+
   machine_mode scalar_mode;
 
   const char *opcode = NULL;
@@ -5550,12 +5556,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
{
case E_HFmode:
case E_BFmode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = (misaligned_p
  ? (TARGET_AVX512BW
 ? "vmovdqu16"
 : "vmovdqu64")
  : "vmovdqa64");
+ else if (egpr_p)
+   opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+? "vmovdqu16"
+: "%vmovups")
+ : "%vmovaps");
  else
opcode = (misaligned_p
  ? (TARGET_AVX512BW
@@ -5570,8 +5582,10 @@ ix86_get_ssemov (rtx *operands, unsigned size,
  opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
  break;
case E_TFmode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+ else if (egpr_p)
+   opcode = misaligned_p ? "%vmovups" : "%vmovaps";
  else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
  break;
@@ -5584,12 +5598,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
   switch (scalar_mode)
{
case E_QImode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = (misaligned_p
  ? (TARGET_AVX512BW
 ? "vmovdqu8"
 : "vmovdqu64")
  : "vmovdqa64");
+ else if (egpr_p)
+   opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+? "vmovdqu8"
+: "%vmovups")
+ : "%vmovaps");
  else
opcode = (misaligned_p
  ? (TARGET_AVX512BW
@@ -5598,12 +5618,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
  : "%vmovdqa");
  break;
case E_HImode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = (misaligned_p
  ? (TARGET_AVX512BW
 ? "vmovdqu16"
 : "vmovdqu64")
  : "vmovdqa64");
+ else if (egpr_p)
+   opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+? "vmovdqu16"
+: "%vmovups")
+ : "%vmovaps");
  else
opcode = (misaligned_p
  ? (TARGET_AVX512BW
@@ -5612,16 +5638,20 @@ ix86_get_ssemov (rtx *operands, unsigned size,
  : "%vmovdqa");
  break;
case E_SImode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
+ else if (egpr_p)
+   opcode = misaligned_p ? "%vmovups" : "%vmovaps";
  else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
  break;
case E_DImode:
case E_TImode:
case E_OImode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+ else if (egpr_p)
+   opcode = misaligned_p ? "%vmovups" : "%vmovaps";
  else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
  break;

Re: [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns

2023-09-01 Thread Jakub Jelinek via Gcc-patches
On Fri, Sep 01, 2023 at 07:34:16PM +0800, Hongyu Wang wrote:
> > On Fri, Sep 01, 2023 at 05:07:53PM +0800, Hongyu Wang wrote:
> > > Jakub Jelinek via Gcc-patches  于2023年8月31日周四 
> > > 17:44写道:
> > > >
> > > > On Thu, Aug 31, 2023 at 04:20:19PM +0800, Hongyu Wang via Gcc-patches 
> > > > wrote:
> > > > > For vector move insns like vmovdqa/vmovdqu, their evex counterparts
> > > > > requrire explicit suffix 64/32/16/8. The usage of these instruction
> > > > > are prohibited under AVX10_1 or AVX512F, so for AVX2+APX_F we select
> > > > > vmovaps/vmovups for vector load/store insns that contains EGPR.
> > > >
> > > > Why not make it dependent on AVX512VL?
> > > > I.e. if egpr_p && TARGET_AVX512VL, still use vmovdqu16 or vmovdqa16
> > > > and the like, and only if !evex_reg_p && egpr_p && !TARGET_AVX512VL
> > > > fall back to what you're doing?
> > >
> > > I'm not sure if it is necessary, as on hardware there is no difference 
> > > between
> > > vmovdqu16/vmovups. If vmovups already has the capability to represent
> > > EGPR why do we need to distinguish them under VL?
> >
> > On the Intel HW you're currently planning.
> > Will that be the case for AMD as well?
> > Some insns are documented to move float or double vectors while others
> > integer vectors (of different element sizes).
> > Or is vmovups with GPR32 at least encoded smaller than vmovdqu{16,32,64}?
> 
> With GPR32 they have same encoding size. If we need to strictly follow
> the meaning of mnemonics,
> I will adjust as you suggested. Thanks.

I think it is useful, even if just for those who try to read the
assembler/disassembler.  Of course, if there are cases where only one of
those has to be used (say -mavx -mno-avx2 and 256-bit integer vector moves),
there is no way around that and one just uses what is available.

Jakub



Re: [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns

2023-09-01 Thread Hongyu Wang via Gcc-patches
Jakub Jelinek  于2023年9月1日周五 17:20写道:
>
> On Fri, Sep 01, 2023 at 05:07:53PM +0800, Hongyu Wang wrote:
> > Jakub Jelinek via Gcc-patches  于2023年8月31日周四 
> > 17:44写道:
> > >
> > > On Thu, Aug 31, 2023 at 04:20:19PM +0800, Hongyu Wang via Gcc-patches 
> > > wrote:
> > > > For vector move insns like vmovdqa/vmovdqu, their evex counterparts
> > > > requrire explicit suffix 64/32/16/8. The usage of these instruction
> > > > are prohibited under AVX10_1 or AVX512F, so for AVX2+APX_F we select
> > > > vmovaps/vmovups for vector load/store insns that contains EGPR.
> > >
> > > Why not make it dependent on AVX512VL?
> > > I.e. if egpr_p && TARGET_AVX512VL, still use vmovdqu16 or vmovdqa16
> > > and the like, and only if !evex_reg_p && egpr_p && !TARGET_AVX512VL
> > > fall back to what you're doing?
> >
> > I'm not sure if it is necessary, as on hardware there is no difference 
> > between
> > vmovdqu16/vmovups. If vmovups already has the capability to represent
> > EGPR why do we need to distinguish them under VL?
>
> On the Intel HW you're currently planning.
> Will that be the case for AMD as well?
> Some insns are documented to move float or double vectors while others
> integer vectors (of different element sizes).
> Or is vmovups with GPR32 at least encoded smaller than vmovdqu{16,32,64}?

With GPR32 they have same encoding size. If we need to strictly follow
the meaning of mnemonics,
I will adjust as you suggested. Thanks.


>
> Jakub
>


Re: [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns

2023-09-01 Thread Jakub Jelinek via Gcc-patches
On Fri, Sep 01, 2023 at 05:07:53PM +0800, Hongyu Wang wrote:
> Jakub Jelinek via Gcc-patches  于2023年8月31日周四 17:44写道:
> >
> > On Thu, Aug 31, 2023 at 04:20:19PM +0800, Hongyu Wang via Gcc-patches wrote:
> > > For vector move insns like vmovdqa/vmovdqu, their evex counterparts
> > > requrire explicit suffix 64/32/16/8. The usage of these instruction
> > > are prohibited under AVX10_1 or AVX512F, so for AVX2+APX_F we select
> > > vmovaps/vmovups for vector load/store insns that contains EGPR.
> >
> > Why not make it dependent on AVX512VL?
> > I.e. if egpr_p && TARGET_AVX512VL, still use vmovdqu16 or vmovdqa16
> > and the like, and only if !evex_reg_p && egpr_p && !TARGET_AVX512VL
> > fall back to what you're doing?
> 
> I'm not sure if it is necessary, as on hardware there is no difference between
> vmovdqu16/vmovups. If vmovups already has the capability to represent
> EGPR why do we need to distinguish them under VL?

On the Intel HW you're currently planning.
Will that be the case for AMD as well?
Some insns are documented to move float or double vectors while others
integer vectors (of different element sizes).
Or is vmovups with GPR32 at least encoded smaller than vmovdqu{16,32,64}?

Jakub



Re: [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns

2023-09-01 Thread Hongyu Wang via Gcc-patches
Jakub Jelinek via Gcc-patches  于2023年8月31日周四 17:44写道:
>
> On Thu, Aug 31, 2023 at 04:20:19PM +0800, Hongyu Wang via Gcc-patches wrote:
> > For vector move insns like vmovdqa/vmovdqu, their evex counterparts
> > requrire explicit suffix 64/32/16/8. The usage of these instruction
> > are prohibited under AVX10_1 or AVX512F, so for AVX2+APX_F we select
> > vmovaps/vmovups for vector load/store insns that contains EGPR.
>
> Why not make it dependent on AVX512VL?
> I.e. if egpr_p && TARGET_AVX512VL, still use vmovdqu16 or vmovdqa16
> and the like, and only if !evex_reg_p && egpr_p && !TARGET_AVX512VL
> fall back to what you're doing?

I'm not sure if it is necessary, as on hardware there is no difference between
vmovdqu16/vmovups. If vmovups already has the capability to represent
EGPR why do we need to distinguish them under VL?

> >
> > gcc/ChangeLog:
> >
> >   * config/i386/i386.cc (ix86_get_ssemov): Check if egpr is used,
> >   adjust mnemonic for vmovduq/vmovdqa.
> >   * config/i386/sse.md 
> > (*_vinsert_0):
> >   Check if egpr is used, adjust mnemonic for vmovdqu/vmovdqa.
> >   (avx_vec_concat): Likewise, and separate alternative 0 to
> >   avx_noavx512f.
>
> Jakub
>


Re: [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns

2023-08-31 Thread Jakub Jelinek via Gcc-patches
On Thu, Aug 31, 2023 at 04:20:19PM +0800, Hongyu Wang via Gcc-patches wrote:
> For vector move insns like vmovdqa/vmovdqu, their evex counterparts
> requrire explicit suffix 64/32/16/8. The usage of these instruction
> are prohibited under AVX10_1 or AVX512F, so for AVX2+APX_F we select
> vmovaps/vmovups for vector load/store insns that contains EGPR.

Why not make it dependent on AVX512VL?
I.e. if egpr_p && TARGET_AVX512VL, still use vmovdqu16 or vmovdqa16
and the like, and only if !evex_reg_p && egpr_p && !TARGET_AVX512VL
fall back to what you're doing?
> 
> gcc/ChangeLog:
> 
>   * config/i386/i386.cc (ix86_get_ssemov): Check if egpr is used,
>   adjust mnemonic for vmovduq/vmovdqa.
>   * config/i386/sse.md 
> (*_vinsert_0):
>   Check if egpr is used, adjust mnemonic for vmovdqu/vmovdqa.
>   (avx_vec_concat): Likewise, and separate alternative 0 to
>   avx_noavx512f.

Jakub



[PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns

2023-08-31 Thread Hongyu Wang via Gcc-patches
For vector move insns like vmovdqa/vmovdqu, their evex counterparts
requrire explicit suffix 64/32/16/8. The usage of these instruction
are prohibited under AVX10_1 or AVX512F, so for AVX2+APX_F we select
vmovaps/vmovups for vector load/store insns that contains EGPR.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_get_ssemov): Check if egpr is used,
adjust mnemonic for vmovduq/vmovdqa.
* config/i386/sse.md 
(*_vinsert_0):
Check if egpr is used, adjust mnemonic for vmovdqu/vmovdqa.
(avx_vec_concat): Likewise, and separate alternative 0 to
avx_noavx512f.
---
 gcc/config/i386/i386.cc | 31 ++-
 gcc/config/i386/sse.md  | 34 --
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 412f3aefc43..f5d642948bc 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5469,6 +5469,11 @@ ix86_get_ssemov (rtx *operands, unsigned size,
   bool evex_reg_p = (size == 64
 || EXT_REX_SSE_REG_P (operands[0])
 || EXT_REX_SSE_REG_P (operands[1]));
+
+  bool egpr_p = (TARGET_APX_EGPR
+&& (x86_extended_rex2reg_mentioned_p (operands[0])
+|| x86_extended_rex2reg_mentioned_p (operands[1])));
+
   machine_mode scalar_mode;
 
   const char *opcode = NULL;
@@ -5547,6 +5552,12 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 ? "vmovdqu16"
 : "vmovdqu64")
  : "vmovdqa64");
+ else if (egpr_p)
+   opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+? "vmovdqu16"
+: "%vmovups")
+ : "%vmovaps");
  else
opcode = (misaligned_p
  ? (TARGET_AVX512BW
@@ -5563,6 +5574,8 @@ ix86_get_ssemov (rtx *operands, unsigned size,
case E_TFmode:
  if (evex_reg_p)
opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+ else if (egpr_p)
+   opcode = misaligned_p ? "%vmovups" : "%vmovaps";
  else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
  break;
@@ -5581,6 +5594,12 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 ? "vmovdqu8"
 : "vmovdqu64")
  : "vmovdqa64");
+ else if (egpr_p)
+   opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+? "vmovdqu8"
+: "%vmovups")
+ : "%vmovaps");
  else
opcode = (misaligned_p
  ? (TARGET_AVX512BW
@@ -5589,12 +5608,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
  : "%vmovdqa");
  break;
case E_HImode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_p)
opcode = (misaligned_p
  ? (TARGET_AVX512BW
 ? "vmovdqu16"
 : "vmovdqu64")
  : "vmovdqa64");
+ else if (egpr_p)
+   opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+? "vmovdqu16"
+: "%vmovups")
+ : "%vmovaps");
  else
opcode = (misaligned_p
  ? (TARGET_AVX512BW
@@ -5605,6 +5630,8 @@ ix86_get_ssemov (rtx *operands, unsigned size,
case E_SImode:
  if (evex_reg_p)
opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
+ else if (egpr_p)
+   opcode = misaligned_p ? "%vmovups" : "%vmovaps";
  else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
  break;
@@ -5613,6 +5640,8 @@ ix86_get_ssemov (rtx *operands, unsigned size,
case E_OImode:
  if (evex_reg_p)
opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+ else if (egpr_p)
+   opcode = misaligned_p ? "%vmovups" : "%vmovaps";
  else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
  break;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 192e746fda3..bd6674d34f9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18918,6 +18918,12 @@ (define_insn 
"*_vinsert_0"
 {
   if (which_alternative == 0)
 return "vinsert\t{$0, %2, %1, %0|%0, %1, %2, 0}";
+  bool egpr_used = (TARGET_APX_EGPR
+   && x86_extended_rex2reg_mentioned_p (operands[2]));
+  const char *align_templ = egpr_used ? "vmovdqa\t{%2, %x0|%x0, %2}"
+ : "vmovaps\t{%2, %x0|%x0, %2}";
+  const char *unalign_templ = egpr_used ? "vmovdqu\t{%2, %x0|%x0, %2}"
+   : "vmovups\t{%2, %x0|%x0, %2}";
   switch (mode)
 {
 case E_V8DFmode:
@@ -18933,17 +18939,17 @@ (define_insn 
"*_vinsert_0"
 case