On Tue, Feb 04, 2020 at 11:16:06AM +0100, Uros Bizjak wrote:
> If it works OK, I'd rather see this functionality implemented as an
> epilogue_completed guarded splitter. In the .md files, there are
> already cases where we split at this point, and where it is assumed
> that allocated registers won't change anymore. Also, please don't make
> the functionality conditional on flag_split_ra. This way, we would
> always get new patterns in the debug dumps, so in case something goes
> wrong, one could at least clearly see the full pattern.

The following seems to work on the testcase, will bootstrap/regtest it soon.

2020-02-04  Jakub Jelinek  <ja...@redhat.com>

        PR target/92190
        * config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper): Only
        include sets and not clobbers in the vzeroupper pattern.
        * config/i386/sse.md (*avx_vzeroupper): Change from define_insn to
        define_insn_and_split, split if epilogue_completed and not all 
xmm0-xmm15
        registers are mentioned in the pattern and add clobbers for the missing
        registers at that point.

        * gcc.target/i386/pr92190.c: New test.

--- gcc/config/i386/i386-features.c.jj  2020-02-04 11:40:58.755611428 +0100
+++ gcc/config/i386/i386-features.c     2020-02-04 11:51:33.602148491 +0100
@@ -1764,29 +1764,32 @@ convert_scalars_to_vector (bool timode_p
 
      (set (reg:V2DF R) (reg:V2DF R))
 
-   which preserves the low 128 bits but clobbers the upper bits.
-   For a dead register we just use:
-
-     (clobber (reg:V2DF R))
-
-   which invalidates any previous contents of R and stops R from becoming
-   live across the vzeroupper in future.  */
+   which preserves the low 128 bits but clobbers the upper bits.  */
 
 static void
 ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
 {
   rtx pattern = PATTERN (insn);
   unsigned int nregs = TARGET_64BIT ? 16 : 8;
-  rtvec vec = rtvec_alloc (nregs + 1);
-  RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
+  unsigned int npats = nregs;
   for (unsigned int i = 0; i < nregs; ++i)
     {
       unsigned int regno = GET_SSE_REGNO (i);
+      if (!bitmap_bit_p (live_regs, regno))
+       npats--;
+    }
+  if (npats == 0)
+    return;
+  rtvec vec = rtvec_alloc (npats + 1);
+  RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
+  for (unsigned int i = 0, j = 0; i < nregs; ++i)
+    {
+      unsigned int regno = GET_SSE_REGNO (i);
+      if (!bitmap_bit_p (live_regs, regno))
+       continue;
       rtx reg = gen_rtx_REG (V2DImode, regno);
-      if (bitmap_bit_p (live_regs, regno))
-       RTVEC_ELT (vec, i + 1) = gen_rtx_SET (reg, reg);
-      else
-       RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
+      ++j;
+      RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg);
     }
   XVEC (pattern, 0) = vec;
   df_insn_rescan (insn);
--- gcc/config/i386/sse.md.jj   2020-02-04 11:40:58.813610563 +0100
+++ gcc/config/i386/sse.md      2020-02-04 11:58:31.544909659 +0100
@@ -19815,11 +19815,43 @@ (define_expand "avx_vzeroupper"
   [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
   "TARGET_AVX")
 
-(define_insn "*avx_vzeroupper"
+(define_insn_and_split "*avx_vzeroupper"
   [(match_parallel 0 "vzeroupper_pattern"
      [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
   "TARGET_AVX"
-  "vzeroupper"
+{
+  if (XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1)
+    return "#";
+  else
+    return "vzeroupper";
+}
+  "epilogue_completed
+   && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
+  [(match_dup 0)]
+{
+  /* For IPA-RA purposes, make it clear the instruction clobbers
+     even XMM registers not mentioned explicitly in the pattern.  */
+  unsigned int nregs = TARGET_64BIT ? 16 : 8;
+  unsigned int npats = XVECLEN (operands[0], 0);
+  rtvec vec = rtvec_alloc (nregs + 1);
+  RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
+  for (unsigned int i = 0, j = 1; i < nregs; ++i)
+    {
+      unsigned int regno = GET_SSE_REGNO (i);
+      if (j < npats
+         && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
+       {
+         RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
+         j++;
+       }
+      else
+       {
+         rtx reg = gen_rtx_REG (V2DImode, regno);
+         RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
+       }
+    }
+  XVEC (operands[0], 0) = vec;
+}
   [(set_attr "type" "sse")
    (set_attr "modrm" "0")
    (set_attr "memory" "none")
--- gcc/testsuite/gcc.target/i386/pr92190.c.jj  2020-02-04 11:51:33.608148402 
+0100
+++ gcc/testsuite/gcc.target/i386/pr92190.c     2020-02-04 11:51:33.608148402 
+0100
@@ -0,0 +1,19 @@
+/* PR target/92190 */
+/* { dg-do compile { target { *-*-linux* && lp64 } } } */
+/* { dg-options "-mabi=ms -O2 -mavx512f" } */
+
+typedef char VC __attribute__((vector_size (16)));
+typedef int VI __attribute__((vector_size (16 * sizeof 0)));
+VC a;
+VI b;
+void bar (VI);
+void baz (VC);
+
+void
+foo (void)
+{
+  VC k = a;
+  VI n = b;
+  bar (n);
+  baz (k);
+}


        Jakub

Reply via email to