Re: [PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-26 Thread Hongtao Liu
On Mon, Feb 26, 2024 at 6:30 PM H.J. Lu  wrote:
>
> On Sun, Feb 25, 2024 at 8:25 PM H.J. Lu  wrote:
> >
> > On Sun, Feb 25, 2024 at 7:03 PM Hongtao Liu  wrote:
> > >
> > > On Mon, Feb 26, 2024 at 10:37 AM H.J. Lu  wrote:
> > > >
> > > > On Sun, Feb 25, 2024 at 6:03 PM Hongtao Liu  wrote:
> > > > >
> > > > > On Mon, Feb 26, 2024 at 5:11 AM H.J. Lu  wrote:
> > > > > >
> > > > > > ldtilecfg and sttilecfg take a 512-byte memory block.  With
> > > > > > _tile_loadconfig implemented as
> > > > > >
> > > > > > extern __inline void
> > > > > > __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > > > _tile_loadconfig (const void *__config)
> > > > > > {
> > > > > >   __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > > > **)__config)));
> > > > > > }
> > > > > >
> > > > > > GCC sees:
> > > > > >
> > > > > > (parallel [
> > > > > >   (asm_operands/v ("ldtilecfg   %X0") ("") 0
> > > > > >[(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
> > > > > >  (const_int -64 [0xffc0])) [1 
> > > > > > MEM[(const void * *)&tile_data]+0 S8 A128])]
> > > > > >[(asm_input:DI ("m"))]
> > > > > >(clobber (reg:CC 17 flags))])
> > > > > >
> > > > > > and the memory operand size is 1 byte.  As the result, the rest of 
> > > > > > 511
> > > > > > bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg 
> > > > > > intrinsics
> > > > > > with a pointer to BLKmode to honor the 512-byte memory block.
> > > > > >
> > > > > > gcc/ChangeLog:
> > > > > >
> > > > > > PR target/114098
> > > > > > * config/i386/amxtileintrin.h (_tile_loadconfig): Use
> > > > > > __builtin_ia32_ldtilecfg.
> > > > > > (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
> > > > > > * config/i386/i386-builtin.def (BDESC): Add
> > > > > > __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
> > > > > > * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
> > > > > > IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
> > > > > > * config/i386/i386.md (ldtilecfg): New pattern.
> > > > > > (sttilecfg): Likewise.
> > > > > >
> > > > > > gcc/testsuite/ChangeLog:
> > > > > >
> > > > > > PR target/114098
> > > > > > * gcc.target/i386/amxtile-4.c: New test.
> > > > > > ---
> > > > > >  gcc/config/i386/amxtileintrin.h   |  4 +-
> > > > > >  gcc/config/i386/i386-builtin.def  |  4 ++
> > > > > >  gcc/config/i386/i386-expand.cc| 19 
> > > > > >  gcc/config/i386/i386.md   | 24 ++
> > > > > >  gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 
> > > > > > +++
> > > > > >  5 files changed, 104 insertions(+), 2 deletions(-)
> > > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c
> > > > > >
> > > > > > diff --git a/gcc/config/i386/amxtileintrin.h 
> > > > > > b/gcc/config/i386/amxtileintrin.h
> > > > > > index d1a26e0fea5..5081b326498 100644
> > > > > > --- a/gcc/config/i386/amxtileintrin.h
> > > > > > +++ b/gcc/config/i386/amxtileintrin.h
> > > > > > @@ -39,14 +39,14 @@ extern __inline void
> > > > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > > >  _tile_loadconfig (const void *__config)
> > > > > >  {
> > > > > > -  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > > > **)__config)));
> > > > > > +  __builtin_ia32_ldtilecfg (__config);
> > > > > >  }
> > > > > >
> > > > > >  extern __inline void
> > > > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > > >  _tile_storeconfig (void *__config)
> > > > > >  {
> > > > > > -  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void 
> > > > > > **)__config)));
> > > > > > +  __builtin_ia32_sttilecfg (__config);
> > > > > >  }
> > > > > >
> > > > > >  extern __inline void
> > > > > > diff --git a/gcc/config/i386/i386-builtin.def 
> > > > > > b/gcc/config/i386/i386-builtin.def
> > > > > > index 729355230b8..88dd7f8857f 100644
> > > > > > --- a/gcc/config/i386/i386-builtin.def
> > > > > > +++ b/gcc/config/i386/i386-builtin.def
> > > > > > @@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | 
> > > > > > OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__b
> > > > > >  BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, 
> > > > > > CODE_FOR_nothing, "__builtin_ia32_xrstors64", 
> > > > > > IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> > > > > >  BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, 
> > > > > > CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, 
> > > > > > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> > > > > >
> > > > > > +/* LDFILECFG and STFILECFG.  */
> > > > > > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > > > > > CODE_FOR_ldtilecfg, "__builtin_ia32_ldtilecfg", 
> > > > > > IX86_BUILTIN_LDTILECFG, UNKNOWN, (int) VOID_FTYPE_PCVOID)
> > > > > > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > 

Re: [PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-26 Thread H.J. Lu
On Sun, Feb 25, 2024 at 8:25 PM H.J. Lu  wrote:
>
> On Sun, Feb 25, 2024 at 7:03 PM Hongtao Liu  wrote:
> >
> > On Mon, Feb 26, 2024 at 10:37 AM H.J. Lu  wrote:
> > >
> > > On Sun, Feb 25, 2024 at 6:03 PM Hongtao Liu  wrote:
> > > >
> > > > On Mon, Feb 26, 2024 at 5:11 AM H.J. Lu  wrote:
> > > > >
> > > > > ldtilecfg and sttilecfg take a 512-byte memory block.  With
> > > > > _tile_loadconfig implemented as
> > > > >
> > > > > extern __inline void
> > > > > __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > > _tile_loadconfig (const void *__config)
> > > > > {
> > > > >   __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > > **)__config)));
> > > > > }
> > > > >
> > > > > GCC sees:
> > > > >
> > > > > (parallel [
> > > > >   (asm_operands/v ("ldtilecfg   %X0") ("") 0
> > > > >[(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
> > > > >  (const_int -64 [0xffc0])) [1 
> > > > > MEM[(const void * *)&tile_data]+0 S8 A128])]
> > > > >[(asm_input:DI ("m"))]
> > > > >(clobber (reg:CC 17 flags))])
> > > > >
> > > > > and the memory operand size is 1 byte.  As the result, the rest of 511
> > > > > bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
> > > > > with a pointer to BLKmode to honor the 512-byte memory block.
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > > PR target/114098
> > > > > * config/i386/amxtileintrin.h (_tile_loadconfig): Use
> > > > > __builtin_ia32_ldtilecfg.
> > > > > (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
> > > > > * config/i386/i386-builtin.def (BDESC): Add
> > > > > __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
> > > > > * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
> > > > > IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
> > > > > * config/i386/i386.md (ldtilecfg): New pattern.
> > > > > (sttilecfg): Likewise.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > > PR target/114098
> > > > > * gcc.target/i386/amxtile-4.c: New test.
> > > > > ---
> > > > >  gcc/config/i386/amxtileintrin.h   |  4 +-
> > > > >  gcc/config/i386/i386-builtin.def  |  4 ++
> > > > >  gcc/config/i386/i386-expand.cc| 19 
> > > > >  gcc/config/i386/i386.md   | 24 ++
> > > > >  gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 
> > > > > +++
> > > > >  5 files changed, 104 insertions(+), 2 deletions(-)
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c
> > > > >
> > > > > diff --git a/gcc/config/i386/amxtileintrin.h 
> > > > > b/gcc/config/i386/amxtileintrin.h
> > > > > index d1a26e0fea5..5081b326498 100644
> > > > > --- a/gcc/config/i386/amxtileintrin.h
> > > > > +++ b/gcc/config/i386/amxtileintrin.h
> > > > > @@ -39,14 +39,14 @@ extern __inline void
> > > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > >  _tile_loadconfig (const void *__config)
> > > > >  {
> > > > > -  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > > **)__config)));
> > > > > +  __builtin_ia32_ldtilecfg (__config);
> > > > >  }
> > > > >
> > > > >  extern __inline void
> > > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > >  _tile_storeconfig (void *__config)
> > > > >  {
> > > > > -  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
> > > > > +  __builtin_ia32_sttilecfg (__config);
> > > > >  }
> > > > >
> > > > >  extern __inline void
> > > > > diff --git a/gcc/config/i386/i386-builtin.def 
> > > > > b/gcc/config/i386/i386-builtin.def
> > > > > index 729355230b8..88dd7f8857f 100644
> > > > > --- a/gcc/config/i386/i386-builtin.def
> > > > > +++ b/gcc/config/i386/i386-builtin.def
> > > > > @@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | 
> > > > > OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__b
> > > > >  BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, 
> > > > > CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, 
> > > > > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> > > > >  BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, 
> > > > > CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, 
> > > > > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> > > > >
> > > > > +/* LDFILECFG and STFILECFG.  */
> > > > > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > > > > CODE_FOR_ldtilecfg, "__builtin_ia32_ldtilecfg", 
> > > > > IX86_BUILTIN_LDTILECFG, UNKNOWN, (int) VOID_FTYPE_PCVOID)
> > > > > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > > > > CODE_FOR_ldtilecfg, "__builtin_ia32_sttilecfg", 
> > > > > IX86_BUILTIN_STTILECFG, UNKNOWN, (int) VOID_FTYPE_PVOID)
> > > > CODE_FOR_sttilecfg.
> > >
> > > It is unused.  I changed both to CODE_FOR_nothing.
> > >
> > > > > +
> > > > >  /* SSE */
> > > > >  BDESC (OPTION_MASK_ISA_SSE, 0, COD

Re: [PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-25 Thread H.J. Lu
On Sun, Feb 25, 2024 at 7:03 PM Hongtao Liu  wrote:
>
> On Mon, Feb 26, 2024 at 10:37 AM H.J. Lu  wrote:
> >
> > On Sun, Feb 25, 2024 at 6:03 PM Hongtao Liu  wrote:
> > >
> > > On Mon, Feb 26, 2024 at 5:11 AM H.J. Lu  wrote:
> > > >
> > > > ldtilecfg and sttilecfg take a 512-byte memory block.  With
> > > > _tile_loadconfig implemented as
> > > >
> > > > extern __inline void
> > > > __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > _tile_loadconfig (const void *__config)
> > > > {
> > > >   __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > **)__config)));
> > > > }
> > > >
> > > > GCC sees:
> > > >
> > > > (parallel [
> > > >   (asm_operands/v ("ldtilecfg   %X0") ("") 0
> > > >[(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
> > > >  (const_int -64 [0xffc0])) [1 
> > > > MEM[(const void * *)&tile_data]+0 S8 A128])]
> > > >[(asm_input:DI ("m"))]
> > > >(clobber (reg:CC 17 flags))])
> > > >
> > > > and the memory operand size is 1 byte.  As the result, the rest of 511
> > > > bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
> > > > with a pointer to BLKmode to honor the 512-byte memory block.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > PR target/114098
> > > > * config/i386/amxtileintrin.h (_tile_loadconfig): Use
> > > > __builtin_ia32_ldtilecfg.
> > > > (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
> > > > * config/i386/i386-builtin.def (BDESC): Add
> > > > __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
> > > > * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
> > > > IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
> > > > * config/i386/i386.md (ldtilecfg): New pattern.
> > > > (sttilecfg): Likewise.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > > PR target/114098
> > > > * gcc.target/i386/amxtile-4.c: New test.
> > > > ---
> > > >  gcc/config/i386/amxtileintrin.h   |  4 +-
> > > >  gcc/config/i386/i386-builtin.def  |  4 ++
> > > >  gcc/config/i386/i386-expand.cc| 19 
> > > >  gcc/config/i386/i386.md   | 24 ++
> > > >  gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 +++
> > > >  5 files changed, 104 insertions(+), 2 deletions(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c
> > > >
> > > > diff --git a/gcc/config/i386/amxtileintrin.h 
> > > > b/gcc/config/i386/amxtileintrin.h
> > > > index d1a26e0fea5..5081b326498 100644
> > > > --- a/gcc/config/i386/amxtileintrin.h
> > > > +++ b/gcc/config/i386/amxtileintrin.h
> > > > @@ -39,14 +39,14 @@ extern __inline void
> > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > >  _tile_loadconfig (const void *__config)
> > > >  {
> > > > -  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > **)__config)));
> > > > +  __builtin_ia32_ldtilecfg (__config);
> > > >  }
> > > >
> > > >  extern __inline void
> > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > >  _tile_storeconfig (void *__config)
> > > >  {
> > > > -  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
> > > > +  __builtin_ia32_sttilecfg (__config);
> > > >  }
> > > >
> > > >  extern __inline void
> > > > diff --git a/gcc/config/i386/i386-builtin.def 
> > > > b/gcc/config/i386/i386-builtin.def
> > > > index 729355230b8..88dd7f8857f 100644
> > > > --- a/gcc/config/i386/i386-builtin.def
> > > > +++ b/gcc/config/i386/i386-builtin.def
> > > > @@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | 
> > > > OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__b
> > > >  BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, 
> > > > CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, 
> > > > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> > > >  BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, 
> > > > CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, 
> > > > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> > > >
> > > > +/* LDFILECFG and STFILECFG.  */
> > > > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > > > CODE_FOR_ldtilecfg, "__builtin_ia32_ldtilecfg", IX86_BUILTIN_LDTILECFG, 
> > > > UNKNOWN, (int) VOID_FTYPE_PCVOID)
> > > > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > > > CODE_FOR_ldtilecfg, "__builtin_ia32_sttilecfg", IX86_BUILTIN_STTILECFG, 
> > > > UNKNOWN, (int) VOID_FTYPE_PVOID)
> > > CODE_FOR_sttilecfg.
> >
> > It is unused.  I changed both to CODE_FOR_nothing.
> >
> > > > +
> > > >  /* SSE */
> > > >  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, 
> > > > "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) 
> > > > VOID_FTYPE_PFLOAT_V4SF)
> > > >  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movntv4sf, 
> > > > "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) 
> > > > VOID_

Re: [PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-25 Thread Hongtao Liu
On Mon, Feb 26, 2024 at 10:37 AM H.J. Lu  wrote:
>
> On Sun, Feb 25, 2024 at 6:03 PM Hongtao Liu  wrote:
> >
> > On Mon, Feb 26, 2024 at 5:11 AM H.J. Lu  wrote:
> > >
> > > ldtilecfg and sttilecfg take a 512-byte memory block.  With
> > > _tile_loadconfig implemented as
> > >
> > > extern __inline void
> > > __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > _tile_loadconfig (const void *__config)
> > > {
> > >   __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
> > > }
> > >
> > > GCC sees:
> > >
> > > (parallel [
> > >   (asm_operands/v ("ldtilecfg   %X0") ("") 0
> > >[(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
> > >  (const_int -64 [0xffc0])) [1 
> > > MEM[(const void * *)&tile_data]+0 S8 A128])]
> > >[(asm_input:DI ("m"))]
> > >(clobber (reg:CC 17 flags))])
> > >
> > > and the memory operand size is 1 byte.  As the result, the rest of 511
> > > bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
> > > with a pointer to BLKmode to honor the 512-byte memory block.
> > >
> > > gcc/ChangeLog:
> > >
> > > PR target/114098
> > > * config/i386/amxtileintrin.h (_tile_loadconfig): Use
> > > __builtin_ia32_ldtilecfg.
> > > (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
> > > * config/i386/i386-builtin.def (BDESC): Add
> > > __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
> > > * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
> > > IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
> > > * config/i386/i386.md (ldtilecfg): New pattern.
> > > (sttilecfg): Likewise.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > PR target/114098
> > > * gcc.target/i386/amxtile-4.c: New test.
> > > ---
> > >  gcc/config/i386/amxtileintrin.h   |  4 +-
> > >  gcc/config/i386/i386-builtin.def  |  4 ++
> > >  gcc/config/i386/i386-expand.cc| 19 
> > >  gcc/config/i386/i386.md   | 24 ++
> > >  gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 +++
> > >  5 files changed, 104 insertions(+), 2 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c
> > >
> > > diff --git a/gcc/config/i386/amxtileintrin.h 
> > > b/gcc/config/i386/amxtileintrin.h
> > > index d1a26e0fea5..5081b326498 100644
> > > --- a/gcc/config/i386/amxtileintrin.h
> > > +++ b/gcc/config/i386/amxtileintrin.h
> > > @@ -39,14 +39,14 @@ extern __inline void
> > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > >  _tile_loadconfig (const void *__config)
> > >  {
> > > -  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > **)__config)));
> > > +  __builtin_ia32_ldtilecfg (__config);
> > >  }
> > >
> > >  extern __inline void
> > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > >  _tile_storeconfig (void *__config)
> > >  {
> > > -  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
> > > +  __builtin_ia32_sttilecfg (__config);
> > >  }
> > >
> > >  extern __inline void
> > > diff --git a/gcc/config/i386/i386-builtin.def 
> > > b/gcc/config/i386/i386-builtin.def
> > > index 729355230b8..88dd7f8857f 100644
> > > --- a/gcc/config/i386/i386-builtin.def
> > > +++ b/gcc/config/i386/i386-builtin.def
> > > @@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | 
> > > OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__b
> > >  BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, 
> > > CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, 
> > > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> > >  BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, 
> > > CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, 
> > > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> > >
> > > +/* LDFILECFG and STFILECFG.  */
> > > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > > CODE_FOR_ldtilecfg, "__builtin_ia32_ldtilecfg", IX86_BUILTIN_LDTILECFG, 
> > > UNKNOWN, (int) VOID_FTYPE_PCVOID)
> > > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > > CODE_FOR_ldtilecfg, "__builtin_ia32_sttilecfg", IX86_BUILTIN_STTILECFG, 
> > > UNKNOWN, (int) VOID_FTYPE_PVOID)
> > CODE_FOR_sttilecfg.
>
> It is unused.  I changed both to CODE_FOR_nothing.
>
> > > +
> > >  /* SSE */
> > >  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, 
> > > "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) 
> > > VOID_FTYPE_PFLOAT_V4SF)
> > >  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movntv4sf, 
> > > "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) 
> > > VOID_FTYPE_PFLOAT_V4SF)
> > > diff --git a/gcc/config/i386/i386-expand.cc 
> > > b/gcc/config/i386/i386-expand.cc
> > > index a4d3369f01b..17993eb837f 100644
> > > --- a/gcc/config/i386/i386-expand.cc
> > > +++ b/gcc/config/i386/i386-expand.cc
> > > @@ -14152,6 +14152,25 @@ ix86_expand_built

Re: [PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-25 Thread H.J. Lu
On Sun, Feb 25, 2024 at 6:03 PM Hongtao Liu  wrote:
>
> On Mon, Feb 26, 2024 at 5:11 AM H.J. Lu  wrote:
> >
> > ldtilecfg and sttilecfg take a 512-byte memory block.  With
> > _tile_loadconfig implemented as
> >
> > extern __inline void
> > __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > _tile_loadconfig (const void *__config)
> > {
> >   __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
> > }
> >
> > GCC sees:
> >
> > (parallel [
> >   (asm_operands/v ("ldtilecfg   %X0") ("") 0
> >[(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
> >  (const_int -64 [0xffc0])) [1 
> > MEM[(const void * *)&tile_data]+0 S8 A128])]
> >[(asm_input:DI ("m"))]
> >(clobber (reg:CC 17 flags))])
> >
> > and the memory operand size is 1 byte.  As the result, the rest of 511
> > bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
> > with a pointer to BLKmode to honor the 512-byte memory block.
> >
> > gcc/ChangeLog:
> >
> > PR target/114098
> > * config/i386/amxtileintrin.h (_tile_loadconfig): Use
> > __builtin_ia32_ldtilecfg.
> > (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
> > * config/i386/i386-builtin.def (BDESC): Add
> > __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
> > * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
> > IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
> > * config/i386/i386.md (ldtilecfg): New pattern.
> > (sttilecfg): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR target/114098
> > * gcc.target/i386/amxtile-4.c: New test.
> > ---
> >  gcc/config/i386/amxtileintrin.h   |  4 +-
> >  gcc/config/i386/i386-builtin.def  |  4 ++
> >  gcc/config/i386/i386-expand.cc| 19 
> >  gcc/config/i386/i386.md   | 24 ++
> >  gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 +++
> >  5 files changed, 104 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c
> >
> > diff --git a/gcc/config/i386/amxtileintrin.h 
> > b/gcc/config/i386/amxtileintrin.h
> > index d1a26e0fea5..5081b326498 100644
> > --- a/gcc/config/i386/amxtileintrin.h
> > +++ b/gcc/config/i386/amxtileintrin.h
> > @@ -39,14 +39,14 @@ extern __inline void
> >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> >  _tile_loadconfig (const void *__config)
> >  {
> > -  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
> > +  __builtin_ia32_ldtilecfg (__config);
> >  }
> >
> >  extern __inline void
> >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> >  _tile_storeconfig (void *__config)
> >  {
> > -  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
> > +  __builtin_ia32_sttilecfg (__config);
> >  }
> >
> >  extern __inline void
> > diff --git a/gcc/config/i386/i386-builtin.def 
> > b/gcc/config/i386/i386-builtin.def
> > index 729355230b8..88dd7f8857f 100644
> > --- a/gcc/config/i386/i386-builtin.def
> > +++ b/gcc/config/i386/i386-builtin.def
> > @@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 
> > 0, CODE_FOR_nothing, "__b
> >  BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, 
> > CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, 
> > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> >  BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, 
> > CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, 
> > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> >
> > +/* LDFILECFG and STFILECFG.  */
> > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > CODE_FOR_ldtilecfg, "__builtin_ia32_ldtilecfg", IX86_BUILTIN_LDTILECFG, 
> > UNKNOWN, (int) VOID_FTYPE_PCVOID)
> > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > CODE_FOR_ldtilecfg, "__builtin_ia32_sttilecfg", IX86_BUILTIN_STTILECFG, 
> > UNKNOWN, (int) VOID_FTYPE_PVOID)
> CODE_FOR_sttilecfg.

It is unused.  I changed both to CODE_FOR_nothing.

> > +
> >  /* SSE */
> >  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, 
> > "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) 
> > VOID_FTYPE_PFLOAT_V4SF)
> >  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movntv4sf, 
> > "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) 
> > VOID_FTYPE_PFLOAT_V4SF)
> > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > index a4d3369f01b..17993eb837f 100644
> > --- a/gcc/config/i386/i386-expand.cc
> > +++ b/gcc/config/i386/i386-expand.cc
> > @@ -14152,6 +14152,25 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
> > subtarget,
> > emit_insn (pat);
> >return 0;
> >
> > +case IX86_BUILTIN_LDTILECFG:
> > +case IX86_BUILTIN_STTILECFG:
> > +  arg0 = CALL_EXPR_ARG (exp, 0);
> > +  op0 = expand_normal (arg0);
> > +
> > +  if (!address_oper

Re: [PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-25 Thread Hongtao Liu
On Mon, Feb 26, 2024 at 5:11 AM H.J. Lu  wrote:
>
> ldtilecfg and sttilecfg take a 512-byte memory block.  With
> _tile_loadconfig implemented as
>
> extern __inline void
> __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _tile_loadconfig (const void *__config)
> {
>   __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
> }
>
> GCC sees:
>
> (parallel [
>   (asm_operands/v ("ldtilecfg   %X0") ("") 0
>[(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
>  (const_int -64 [0xffc0])) [1 MEM[(const 
> void * *)&tile_data]+0 S8 A128])]
>[(asm_input:DI ("m"))]
>(clobber (reg:CC 17 flags))])
>
> and the memory operand size is 1 byte.  As the result, the rest of 511
> bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
> with a pointer to BLKmode to honor the 512-byte memory block.
>
> gcc/ChangeLog:
>
> PR target/114098
> * config/i386/amxtileintrin.h (_tile_loadconfig): Use
> __builtin_ia32_ldtilecfg.
> (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
> * config/i386/i386-builtin.def (BDESC): Add
> __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
> * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
> IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
> * config/i386/i386.md (ldtilecfg): New pattern.
> (sttilecfg): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> PR target/114098
> * gcc.target/i386/amxtile-4.c: New test.
> ---
>  gcc/config/i386/amxtileintrin.h   |  4 +-
>  gcc/config/i386/i386-builtin.def  |  4 ++
>  gcc/config/i386/i386-expand.cc| 19 
>  gcc/config/i386/i386.md   | 24 ++
>  gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 +++
>  5 files changed, 104 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c
>
> diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h
> index d1a26e0fea5..5081b326498 100644
> --- a/gcc/config/i386/amxtileintrin.h
> +++ b/gcc/config/i386/amxtileintrin.h
> @@ -39,14 +39,14 @@ extern __inline void
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  _tile_loadconfig (const void *__config)
>  {
> -  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
> +  __builtin_ia32_ldtilecfg (__config);
>  }
>
>  extern __inline void
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  _tile_storeconfig (void *__config)
>  {
> -  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
> +  __builtin_ia32_sttilecfg (__config);
>  }
>
>  extern __inline void
> diff --git a/gcc/config/i386/i386-builtin.def 
> b/gcc/config/i386/i386-builtin.def
> index 729355230b8..88dd7f8857f 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 
> 0, CODE_FOR_nothing, "__b
>  BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, 
> "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) 
> VOID_FTYPE_PVOID_INT64)
>  BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, 
> "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) 
> VOID_FTYPE_PVOID_INT64)
>
> +/* LDFILECFG and STFILECFG.  */
> +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_ldtilecfg, 
> "__builtin_ia32_ldtilecfg", IX86_BUILTIN_LDTILECFG, UNKNOWN, (int) 
> VOID_FTYPE_PCVOID)
> +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_ldtilecfg, 
> "__builtin_ia32_sttilecfg", IX86_BUILTIN_STTILECFG, UNKNOWN, (int) 
> VOID_FTYPE_PVOID)
CODE_FOR_sttilecfg.
> +
>  /* SSE */
>  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, 
> "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) 
> VOID_FTYPE_PFLOAT_V4SF)
>  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movntv4sf, 
> "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) 
> VOID_FTYPE_PFLOAT_V4SF)
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index a4d3369f01b..17993eb837f 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -14152,6 +14152,25 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
> subtarget,
> emit_insn (pat);
>return 0;
>
> +case IX86_BUILTIN_LDTILECFG:
> +case IX86_BUILTIN_STTILECFG:
> +  arg0 = CALL_EXPR_ARG (exp, 0);
> +  op0 = expand_normal (arg0);
> +
> +  if (!address_operand (op0, VOIDmode))
> +   {
> + op0 = convert_memory_address (Pmode, op0);
> + op0 = copy_addr_to_reg (op0);
> +   }
> +  op0 = gen_rtx_MEM (BLKmode, op0);
maybe we can just use XImode, and adjust the patterns with XI.
> +  if (fcode == IX86_BUILTIN_LDTILECFG)
> +   icode = CODE_FOR_ldtilecfg;
> +  else
> 

Re: [PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-25 Thread Hongyu Wang
Thanks for fixing this! Didn't notice that the pointer conversion can
cause this issue...

Was it possible to use local array like

char a[64] = (char *)p
__asm__ volatile ("ldtilecfg\t%X0" :: "m" (a)));

If not, for the two patterns we can use "m" instead of "jm" as APX
supports EGPR extension for AMX.


[PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-25 Thread H.J. Lu
ldtilecfg and sttilecfg take a 512-byte memory block.  With
_tile_loadconfig implemented as

extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tile_loadconfig (const void *__config)
{
  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
}

GCC sees:

(parallel [
  (asm_operands/v ("ldtilecfg   %X0") ("") 0
   [(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
 (const_int -64 [0xffc0])) [1 MEM[(const 
void * *)&tile_data]+0 S8 A128])]
   [(asm_input:DI ("m"))]
   (clobber (reg:CC 17 flags))])

and the memory operand size is 1 byte.  As the result, the rest of 511
bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
with a pointer to BLKmode to honor the 512-byte memory block.

gcc/ChangeLog:

PR target/114098
* config/i386/amxtileintrin.h (_tile_loadconfig): Use
__builtin_ia32_ldtilecfg.
(_tile_storeconfig): Use __builtin_ia32_sttilecfg.
* config/i386/i386-builtin.def (BDESC): Add
__builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
* config/i386/i386-expand.cc (ix86_expand_builtin): Handle
IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
* config/i386/i386.md (ldtilecfg): New pattern.
(sttilecfg): Likewise.

gcc/testsuite/ChangeLog:

PR target/114098
* gcc.target/i386/amxtile-4.c: New test.
---
 gcc/config/i386/amxtileintrin.h   |  4 +-
 gcc/config/i386/i386-builtin.def  |  4 ++
 gcc/config/i386/i386-expand.cc| 19 
 gcc/config/i386/i386.md   | 24 ++
 gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 +++
 5 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c

diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h
index d1a26e0fea5..5081b326498 100644
--- a/gcc/config/i386/amxtileintrin.h
+++ b/gcc/config/i386/amxtileintrin.h
@@ -39,14 +39,14 @@ extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _tile_loadconfig (const void *__config)
 {
-  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
+  __builtin_ia32_ldtilecfg (__config);
 }
 
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _tile_storeconfig (void *__config)
 {
-  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
+  __builtin_ia32_sttilecfg (__config);
 }
 
 extern __inline void
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 729355230b8..88dd7f8857f 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, 
CODE_FOR_nothing, "__b
 BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, 
"__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) 
VOID_FTYPE_PVOID_INT64)
 BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, 
"__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) 
VOID_FTYPE_PVOID_INT64)
 
+/* LDFILECFG and STFILECFG.  */
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_ldtilecfg, 
"__builtin_ia32_ldtilecfg", IX86_BUILTIN_LDTILECFG, UNKNOWN, (int) 
VOID_FTYPE_PCVOID)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_ldtilecfg, 
"__builtin_ia32_sttilecfg", IX86_BUILTIN_STTILECFG, UNKNOWN, (int) 
VOID_FTYPE_PVOID)
+
 /* SSE */
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, 
"__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) 
VOID_FTYPE_PFLOAT_V4SF)
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movntv4sf, 
"__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) 
VOID_FTYPE_PFLOAT_V4SF)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a4d3369f01b..17993eb837f 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -14152,6 +14152,25 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
emit_insn (pat);
   return 0;
 
+case IX86_BUILTIN_LDTILECFG:
+case IX86_BUILTIN_STTILECFG:
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  op0 = expand_normal (arg0);
+
+  if (!address_operand (op0, VOIDmode))
+   {
+ op0 = convert_memory_address (Pmode, op0);
+ op0 = copy_addr_to_reg (op0);
+   }
+  op0 = gen_rtx_MEM (BLKmode, op0);
+  if (fcode == IX86_BUILTIN_LDTILECFG)
+   icode = CODE_FOR_ldtilecfg;
+  else
+   icode = CODE_FOR_sttilecfg;
+  pat = GEN_FCN (icode) (op0);
+  emit_insn (pat);
+  return 0;
+
 case IX86_BUILTIN_LLWPCB:
   arg0 = CALL_EXPR_ARG (exp, 0);
   op0 = expand_normal (arg0);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6a26d966a0e..0ede6adac2f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/confi