On Tue, Feb 4, 2020 at 1:06 PM Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> Uros Bizjak <ubiz...@gmail.com> writes:
> > On Tue, Feb 4, 2020 at 12:13 PM Uros Bizjak <ubiz...@gmail.com> wrote:
> >>
> >> On Tue, Feb 4, 2020 at 12:05 PM Jakub Jelinek <ja...@redhat.com> wrote:
> >> >
> >> > On Tue, Feb 04, 2020 at 11:16:06AM +0100, Uros Bizjak wrote:
> >> > > If it works OK, I'd rather see this functionality implemented as an
> >> > > epilogue_completed guarded splitter. In the .md files, there are
> >> > > already cases where we split at this point, and where it is assumed
> >> > > that allocated registers won't change anymore. Also, please don't make
> >> > > the functionality conditional on flag_split_ra. This way, we would
> >> > > always get new patterns in the debug dumps, so in case something goes
> >> > > wrong, one could at least clearly see the full pattern.
> >> >
> >> > The following seems to work on the testcase, will bootstrap/regtest it 
> >> > soon.
> >> >
> >> > 2020-02-04  Jakub Jelinek  <ja...@redhat.com>
> >> >
> >> >         PR target/92190
> >> >         * config/i386/i386-features.c 
> >> > (ix86_add_reg_usage_to_vzeroupper): Only
> >> >         include sets and not clobbers in the vzeroupper pattern.
> >> >         * config/i386/sse.md (*avx_vzeroupper): Change from define_insn 
> >> > to
> >> >         define_insn_and_split, split if epilogue_completed and not all 
> >> > xmm0-xmm15
> >> >         registers are mentioned in the pattern and add clobbers for the 
> >> > missing
> >> >         registers at that point.
> >> >
> >> >         * gcc.target/i386/pr92190.c: New test.
> >>
> >> A && is missing in the split condition to inherit TARGET_AVX.
> >
> > Also, you don't need to emit "#" in output template. This is just for
> > extra safety, we can live without. Please see e.g. "*tzcnt<mode>_1".
>
> IMO it's better to keep it here, where we're relying on the split
> happening for correctness.  There's no theoretical guarantee that
> a split ever happens otherwise.

In this case, I think it would be better to have two patterns, one
(define_insn_and_split) with "#" and the other "real" (define_insn).

Uros.

> Thanks,
> Richard


> >> OK with this change.
> >>
> >> Thanks,
> >> Uros.
> >>
> >> >
> >> > --- gcc/config/i386/i386-features.c.jj  2020-02-04 11:40:58.755611428 
> >> > +0100
> >> > +++ gcc/config/i386/i386-features.c     2020-02-04 11:51:33.602148491 
> >> > +0100
> >> > @@ -1764,29 +1764,32 @@ convert_scalars_to_vector (bool timode_p
> >> >
> >> >       (set (reg:V2DF R) (reg:V2DF R))
> >> >
> >> > -   which preserves the low 128 bits but clobbers the upper bits.
> >> > -   For a dead register we just use:
> >> > -
> >> > -     (clobber (reg:V2DF R))
> >> > -
> >> > -   which invalidates any previous contents of R and stops R from 
> >> > becoming
> >> > -   live across the vzeroupper in future.  */
> >> > +   which preserves the low 128 bits but clobbers the upper bits.  */
> >> >
> >> >  static void
> >> >  ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
> >> >  {
> >> >    rtx pattern = PATTERN (insn);
> >> >    unsigned int nregs = TARGET_64BIT ? 16 : 8;
> >> > -  rtvec vec = rtvec_alloc (nregs + 1);
> >> > -  RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
> >> > +  unsigned int npats = nregs;
> >> >    for (unsigned int i = 0; i < nregs; ++i)
> >> >      {
> >> >        unsigned int regno = GET_SSE_REGNO (i);
> >> > +      if (!bitmap_bit_p (live_regs, regno))
> >> > +       npats--;
> >> > +    }
> >> > +  if (npats == 0)
> >> > +    return;
> >> > +  rtvec vec = rtvec_alloc (npats + 1);
> >> > +  RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
> >> > +  for (unsigned int i = 0, j = 0; i < nregs; ++i)
> >> > +    {
> >> > +      unsigned int regno = GET_SSE_REGNO (i);
> >> > +      if (!bitmap_bit_p (live_regs, regno))
> >> > +       continue;
> >> >        rtx reg = gen_rtx_REG (V2DImode, regno);
> >> > -      if (bitmap_bit_p (live_regs, regno))
> >> > -       RTVEC_ELT (vec, i + 1) = gen_rtx_SET (reg, reg);
> >> > -      else
> >> > -       RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
> >> > +      ++j;
> >> > +      RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg);
> >> >      }
> >> >    XVEC (pattern, 0) = vec;
> >> >    df_insn_rescan (insn);
> >> > --- gcc/config/i386/sse.md.jj   2020-02-04 11:40:58.813610563 +0100
> >> > +++ gcc/config/i386/sse.md      2020-02-04 11:58:31.544909659 +0100
> >> > @@ -19815,11 +19815,43 @@ (define_expand "avx_vzeroupper"
> >> >    [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
> >> >    "TARGET_AVX")
> >> >
> >> > -(define_insn "*avx_vzeroupper"
> >> > +(define_insn_and_split "*avx_vzeroupper"
> >> >    [(match_parallel 0 "vzeroupper_pattern"
> >> >       [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
> >> >    "TARGET_AVX"
> >> > -  "vzeroupper"
> >> > +{
> >> > +  if (XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1)
> >> > +    return "#";
> >> > +  else
> >> > +    return "vzeroupper";
> >> > +}
> >> > +  "epilogue_completed
> >>
> >> && epilogue_completed
> >>
> >> > +   && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
> >> > +  [(match_dup 0)]
> >> > +{
> >> > +  /* For IPA-RA purposes, make it clear the instruction clobbers
> >> > +     even XMM registers not mentioned explicitly in the pattern.  */
> >> > +  unsigned int nregs = TARGET_64BIT ? 16 : 8;
> >> > +  unsigned int npats = XVECLEN (operands[0], 0);
> >> > +  rtvec vec = rtvec_alloc (nregs + 1);
> >> > +  RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
> >> > +  for (unsigned int i = 0, j = 1; i < nregs; ++i)
> >> > +    {
> >> > +      unsigned int regno = GET_SSE_REGNO (i);
> >> > +      if (j < npats
> >> > +         && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
> >> > +       {
> >> > +         RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
> >> > +         j++;
> >> > +       }
> >> > +      else
> >> > +       {
> >> > +         rtx reg = gen_rtx_REG (V2DImode, regno);
> >> > +         RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
> >> > +       }
> >> > +    }
> >> > +  XVEC (operands[0], 0) = vec;
> >> > +}
> >> >    [(set_attr "type" "sse")
> >> >     (set_attr "modrm" "0")
> >> >     (set_attr "memory" "none")
> >> > --- gcc/testsuite/gcc.target/i386/pr92190.c.jj  2020-02-04 
> >> > 11:51:33.608148402 +0100
> >> > +++ gcc/testsuite/gcc.target/i386/pr92190.c     2020-02-04 
> >> > 11:51:33.608148402 +0100
> >> > @@ -0,0 +1,19 @@
> >> > +/* PR target/92190 */
> >> > +/* { dg-do compile { target { *-*-linux* && lp64 } } } */
> >> > +/* { dg-options "-mabi=ms -O2 -mavx512f" } */
> >> > +
> >> > +typedef char VC __attribute__((vector_size (16)));
> >> > +typedef int VI __attribute__((vector_size (16 * sizeof 0)));
> >> > +VC a;
> >> > +VI b;
> >> > +void bar (VI);
> >> > +void baz (VC);
> >> > +
> >> > +void
> >> > +foo (void)
> >> > +{
> >> > +  VC k = a;
> >> > +  VI n = b;
> >> > +  bar (n);
> >> > +  baz (k);
> >> > +}
> >> >
> >> >
> >> >         Jakub
> >> >

Reply via email to