Re: [PATCH 3/3] i386: Enable AVX512 memory broadcast for INT andnot

2018-10-21 Thread Uros Bizjak
On Mon, Oct 22, 2018 at 12:59 AM H.J. Lu  wrote:
>
> Many AVX512 vector operations can broadcast from a scalar memory source.
> This patch enables memory broadcast for INT andnot operations.
>
> gcc/
>
> PR target/72782
> * config/i386/sse.md (*andnot3_bst): New.

This pattern should be named ..._bcst, to be consistent with others.

> gcc/testsuite/
>
> PR target/72782
> * gcc.target/i386/avx512f-andn-di-zmm-1.c: New test.
> * gcc.target/i386/avx512f-andn-si-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-andn-si-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-andn-si-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-andn-si-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-andn-si-zmm-5.c: Likewise.
> * gcc.target/i386/avx512vl-andn-si-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-andn-si-ymm-1.c: Likewise.

OK with updated pattern name and ChangeLog.

Thanks,
Uros.

>  gcc/config/i386/sse.md  | 13 +
>  .../gcc.target/i386/avx512f-andn-di-zmm-1.c | 12 
>  .../gcc.target/i386/avx512f-andn-si-zmm-1.c | 12 
>  .../gcc.target/i386/avx512f-andn-si-zmm-2.c | 12 
>  .../gcc.target/i386/avx512f-andn-si-zmm-3.c | 12 
>  .../gcc.target/i386/avx512f-andn-si-zmm-4.c | 12 
>  .../gcc.target/i386/avx512f-andn-si-zmm-5.c | 12 
>  .../gcc.target/i386/avx512vl-andn-si-xmm-1.c| 12 
>  .../gcc.target/i386/avx512vl-andn-si-ymm-1.c| 12 
>  9 files changed, 109 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-andn-si-xmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-andn-si-ymm-1.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 29f390ead1f..05bd5781804 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -12070,6 +12070,19 @@
>   ]
>   (const_string "")))])
>
> +(define_insn "*andnot3_bst"

*andnot3_bcst

> +  [(set (match_operand:VI 0 "register_operand" "=v")
> +   (and:VI
> + (not:VI48_AVX512VL
> +(match_operand:VI48_AVX512VL 1 "register_operand" "v"))
> + (vec_duplicate:VI48_AVX512VL
> +   (match_operand: 2 "memory_operand" "m"]
> +  "TARGET_AVX512F"
> +  "vpandn\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "type" "sselog")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "")])
> +
>  (define_insn "*andnot3_mask"
>[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
> (vec_merge:VI48_AVX512VL
> diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-1.c 
> b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-1.c
> new file mode 100644
> index 000..1450d3c1914
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -O2" } */
> +/* { dg-final { scan-assembler-times "vpandnq\[ 
> \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
> +/* { dg-final { scan-assembler-not "vpbroadcastq\[^\n\]*%zmm\[0-9\]+" } } */
> +
> +#define type __m512i
> +#define vec 512
> +#define op andnot
> +#define suffix epi64
> +#define SCALAR long long
> +
> +#include "avx512-binop-1.h"
> diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-1.c 
> b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-1.c
> new file mode 100644
> index 000..c9d8a820295
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -O2" } */
> +/* { dg-final { scan-assembler-times "vpandnd\[ 
> \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
> +/* { dg-final { scan-assembler-not "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" } } */
> +
> +#define type __m512i
> +#define vec 512
> +#define op andnot
> +#define suffix epi32
> +#define SCALAR int
> +
> +#include "avx512-binop-1.h"
> diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c 
> b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
> new file mode 100644
> index 000..a9608ca095d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -O2" } */
> +/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } 
> } */
>

Re: [PATCH 2/3] i386: Enable AVX512 memory broadcast for INT logic

2018-10-21 Thread Uros Bizjak
On Mon, Oct 22, 2018 at 12:59 AM H.J. Lu  wrote:
>
> Many AVX512 vector operations can broadcast from a scalar memory source.
> This patch enables memory broadcast for INT logic operations.
>
> gcc/
>
> PR target/72782
> * config/i386/sse.md (*3_bcst): New.
>
> gcc/testsuite/
>
> PR target/72782
> * gcc.target/i386/avx512f-and-di-zmm-1.c: New test.
> * gcc.target/i386/avx512f-and-si-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-and-si-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-and-si-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-and-si-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-and-si-zmm-5.c: Likewise.
> * gcc.target/i386/avx512f-and-si-zmm-6.c: Likewise.
> * gcc.target/i386/avx512f-or-di-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-or-si-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-or-si-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-or-si-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-or-si-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-or-si-zmm-5.c: Likewise.
> * gcc.target/i386/avx512f-or-si-zmm-6.c: Likewise.
> * gcc.target/i386/avx512f-xor-di-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-xor-si-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-xor-si-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-xor-si-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-xor-si-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-xor-si-zmm-5.c: Likewise.
> * gcc.target/i386/avx512f-xor-si-zmm-6.c: Likewise.
> * gcc.target/i386/avx512vl-and-si-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-and-si-ymm-1.c: Likewise.
> * gcc.target/i386/avx512vl-or-si-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-or-si-ymm-1.c: Likewise.
> * gcc.target/i386/avx512vl-xor-si-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-xor-si-ymm-1.c: Likewise.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/sse.md   | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-and-di-zmm-1.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-1.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-2.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-3.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-4.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-5.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-6.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-or-di-zmm-1.c  | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-1.c  | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-2.c  | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-3.c  | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-4.c  | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-5.c  | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-6.c  | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-xor-di-zmm-1.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-1.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-2.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-3.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-4.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-5.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-6.c | 12 
>  .../gcc.target/i386/avx512vl-and-si-xmm-1.c  | 12 
>  .../gcc.target/i386/avx512vl-and-si-ymm-1.c  | 12 
>  gcc/testsuite/gcc.target/i386/avx512vl-or-si-xmm-1.c | 12 
>  gcc/testsuite/gcc.target/i386/avx512vl-or-si-ymm-1.c | 12 
>  .../gcc.target/i386/avx512vl-xor-si-xmm-1.c  | 12 
>  .../gcc.target/i386/avx512vl-xor-si-ymm-1.c  | 12 
>  28 files changed, 336 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-di-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-di-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i

Re: [PATCH 1/3] i386: Enable AVX512 memory broadcast for INT add

2018-10-21 Thread Uros Bizjak
On Mon, Oct 22, 2018 at 12:59 AM H.J. Lu  wrote:
>
> Many AVX512 vector operations can broadcast from a scalar memory source.
> This patch enables memory broadcast for INT add operations.
>
> gcc/
>
> PR target/72782
> * config/i386/sse.md (avx512bcst): Updated for V4SI, V2DI, V8SI,
> V4DI, V16SI and V8DI.
> (*minus3_bcst): New.
> (*plus3_bcst): Likewise.

These patterns are named *add... and *sub.

> gcc/testsuite/
>
> PR target/72782
> * gcc.target/i386/avx512f-add-di-zmm-1.c: New test.
> * gcc.target/i386/avx512f-add-si-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-add-si-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-add-si-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-add-si-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-add-si-zmm-5.c: Likewise.
> * gcc.target/i386/avx512f-add-si-zmm-6.c: Likewise.
> * gcc.target/i386/avx512f-sub-di-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-sub-si-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-sub-si-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-sub-si-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-sub-si-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-sub-si-zmm-5.c: Likewise.
> * gcc.target/i386/avx512vl-add-si-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-add-si-ymm-1.c: Likewise.
> * gcc.target/i386/avx512vl-sub-si-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-sub-si-ymm-1.c: Likewise.

OK with an updated pattern name and ChangeLog.

Thanks,
Uros,

> ---
>  gcc/config/i386/sse.md| 29 ++-
>  .../gcc.target/i386/avx512f-add-di-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-add-si-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-add-si-zmm-2.c| 12 
>  .../gcc.target/i386/avx512f-add-si-zmm-3.c| 12 
>  .../gcc.target/i386/avx512f-add-si-zmm-4.c| 12 
>  .../gcc.target/i386/avx512f-add-si-zmm-5.c| 12 
>  .../gcc.target/i386/avx512f-add-si-zmm-6.c| 12 
>  .../gcc.target/i386/avx512f-sub-di-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-sub-si-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-sub-si-zmm-2.c| 12 
>  .../gcc.target/i386/avx512f-sub-si-zmm-3.c| 12 
>  .../gcc.target/i386/avx512f-sub-si-zmm-4.c| 12 
>  .../gcc.target/i386/avx512f-sub-si-zmm-5.c| 12 
>  .../gcc.target/i386/avx512vl-add-si-xmm-1.c   | 12 
>  .../gcc.target/i386/avx512vl-add-si-ymm-1.c   | 12 
>  .../gcc.target/i386/avx512vl-sub-si-xmm-1.c   | 12 
>  .../gcc.target/i386/avx512vl-sub-si-ymm-1.c   | 12 
>  18 files changed, 232 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-di-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-di-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-si-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-si-zmm-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-si-zmm-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-si-zmm-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-si-zmm-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-add-si-xmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-add-si-ymm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-sub-si-xmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-sub-si-ymm-1.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 2c702ceed2d..2d4fac3f8f7 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -660,7 +660,10 @@
> V16SF V8DF])
>
>  (define_mode_attr avx512bcst
> -  [(V4SF "%{1to4%}") (V2DF "%{1to2%}")
> +  [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
> +   (V8SI "%{1to8%}") (V4DI "%{1to4%}")
> +   (V16SI "%{1to16%}") (V8DI "%{1to8%}")
> +   (V4SF "%{1to4%}") (V2DF "%{1to2%}")
> (V8SF "%{1to8%}") (V4DF "%{1to4%}")
> (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
>
> @@ -10408,6 +10411,30 @@
> (set_attr "prefix" "orig,vex")
> (set_attr "mode" "")])
>
> +(define_insn "*sub3_bcst"
> +  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
> +   (minus:VI48_AVX512VL
> + (match_operand:VI48_AVX512VL 1 "register_operand" "v")
> + (vec_duplicate:VI48_AVX512VL
> +   (match_operand: 

Re: [PATCH 14/14] Add D Phobos config, makefiles, and testsuite.

2018-10-21 Thread Iain Buclaw
On Tue, 16 Oct 2018 at 19:01, Richard Sandiford
 wrote:
>
> Iain Buclaw  writes:
> > diff --git a/libphobos/d_rules.am b/libphobos/d_rules.am
> > new file mode 100644
> > index 000..b16cf5052d2
> > --- /dev/null
> > +++ b/libphobos/d_rules.am
> > @@ -0,0 +1,60 @@
> > +# This file contains some common rules for D source compilation
> > +# used for libdruntime and libphobos
> > +
> > +# If there are no sources with known extension (i.e. only D sources)
> > +# automake forgets to set this
>
> Needs a copyright notice and licence.
>
> > +# AC_LANG(D)
> > +# ---
> > +# (we have to use GDC as variable prefix as our GCC patches set GDC
> > +#  GDC_FOR_BUILD etc. If we ever want to support other D compilers all
> > +#  names need to be changed to DC)
>
> Seems like this is still talking about GDC as a separate project.
>
> > +  # This checks to see if the host supports the compiler-generated builtins
> > +  # for atomic operations for various integral sizes. Note, this is 
> > intended
> > +  # to be an all-or-nothing switch, so all the atomic operations that are
> > +  # used should be checked.
> > +  AC_MSG_CHECKING([for atomic builtins for byte])
> > +  AC_CACHE_VAL(druntime_cv_atomic_byte, [
> > +AC_TRY_LINK(
> > +  [import gcc.builtins;], [
> > +  shared(byte) c1;
> > +   byte c2, c3;
> > +   __atomic_compare_exchange_1(&c1, &c2, c3, false, 5, 5);
> > +   __atomic_load_1(&c1, 5);
> > +   __atomic_store_1(&c1, c2, 5);
> > +   return 0;
> > +  ],
> > +  [druntime_cv_atomic_byte=yes],
> > +  [druntime_cv_atomic_byte=no])
> > +  ])
> > +  AC_MSG_RESULT($druntime_cv_atomic_byte)
>
> Link tests generally don't work for newlib targets, since they often
> require a specific command-line option to specify the target system.
> But perhaps you don't support newlib targets anyway.  Either way,
> it shouldn't hold up acceptance.
>
> > --- /dev/null
> > +++ b/libphobos/src/Makefile.am
> > @@ -0,0 +1,211 @@
> > +# Makefile for the Phobos standard library.
> > +# Copyright (C) 2012-2017 Free Software Foundation, Inc.
>
> 2012-2018.
>
> > diff --git a/libphobos/testsuite/Makefile.am 
> > b/libphobos/testsuite/Makefile.am
> > new file mode 100644
> > index 000..dd99d9d871e
> > --- /dev/null
> > +++ b/libphobos/testsuite/Makefile.am
> > @@ -0,0 +1,15 @@
> > +## Process this file with automake to produce Makefile.in.
> > +
> > +AUTOMAKE_OPTIONS = foreign dejagnu
> > +
> > +# Setup the testing framework, if you have one
> > +EXPECT = $(shell if test -f $(top_builddir)/../expect/expect; then \
> > +echo $(top_builddir)/../expect/expect; else echo expect; fi)
> > +
> > +_RUNTEST = $(shell if test -f $(top_srcdir)/../dejagnu/runtest; then \
> > +  echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi)
> > +RUNTEST = "$(_RUNTEST) $(AM_RUNTESTFLAGS)"
> > +
> > +AM_MAKEFLAGS = "EXEEXT=$(EXEEXT)"
> > +
> > +CLEANFILES = *.log *.sum
>
> Should probably have a copyright & licence here too, even though
> it's small, since it could grow in future.
>
> > +// { dg-shouldfail "static_dtor_exception" }
> > +// { dg-output "object.Exception@.*: static_dtor_exception" }
> > +// Issue 16594
> > +import core.stdc.stdio;
>
> Which bug tracker is this referring to?  Maybe a URI would be better,
> to avoid confusion with GCC's bugzilla.  Same for other bugzilla
> references in later tests.  Or just remove if the tracker isn't public.
>
> OK otherwise, thanks.
>
> I think that's the last of the unreviewed patches.  Let me know
> if I missed one.
>

Attaching updates as per above comments.

Regards
--
Iain
diff --git a/libphobos/configure b/libphobos/configure
index 42be84d394d..3f6522f0fe3 100755
--- a/libphobos/configure
+++ b/libphobos/configure
@@ -4562,7 +4562,7 @@ $as_echo "no" >&6; }
as_fn_error "can't compile D sources!" "$LINENO" 5
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  ac_ext=c
+  ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
diff --git a/libphobos/d_rules.am b/libphobos/d_rules.am
index b16cf5052d2..958b7c1209b 100644
--- a/libphobos/d_rules.am
+++ b/libphobos/d_rules.am
@@ -1,5 +1,19 @@
-# This file contains some common rules for D source compilation
-# used for libdruntime and libphobos
+## Common rules for D source compilation used in all Makefile.am's.
+## Copyright (C) 2016-2018 Free Software Foundation, Inc.
+##
+## GCC is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3, or (at your option)
+## any later version.
+##
+## GCC is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for mor

Re: [PATCH 10/14] Add GDC Testsuite files.

2018-10-21 Thread Iain Buclaw
On Fri, 21 Sep 2018 at 22:47, Mike Stump  wrote:
>
> On Sep 17, 2018, at 5:37 PM, Iain Buclaw  wrote:
> >
> > This patch adds a further number of tests, but were added as part of
> > fixing gdc-specific bugs.
>
> Ok.  Trivial, and self-review applicable.

Attaching Dejagnu file changes to support what was done in 08/14.

Regards
--
Iain

---
diff --git a/gcc/testsuite/gdc.test/d_do_test.exp b/gcc/testsuite/gdc.test/d_do_test.exp
index a3c49165bbd..246ac850a20 100644
--- a/gcc/testsuite/gdc.test/d_do_test.exp
+++ b/gcc/testsuite/gdc.test/d_do_test.exp
@@ -22,16 +22,16 @@ load_lib gdc-dg.exp
 # Convert DMD arguments to GDC equivalent
 #
 
-proc gdc-convert-args { base args } {
+proc gdc-convert-args { args } {
 set out ""
 
 foreach arg [split [lindex $args 0] " "] {
 # List of switches kept in ASCII collated order.
 if { [regexp -- {^-I([\w+/-]+)} $arg pattern path] } {
-lappend out "-I$base/$path"
+lappend out "-I$path"
 
 } elseif { [regexp -- {^-J([\w+/-]+)} $arg pattern path] } {
-lappend out "-J$base/$path"
+lappend out "-J$path"
 
 } elseif [string match "-allinst" $arg] {
 lappend out "-fall-instantiations"
@@ -158,6 +158,9 @@ proc gdc-copy-extra { base extra } {
 #
 #   COMPILE_SEPARATELY: Not handled.
 #   EXECUTE_ARGS:   Parameters to add to the execution of the test.
+#   COMPILED_IMPORTS:   List of modules files that are imported by the main
+#   source file that should be included in compilation.
+#   Currently handled the same as EXTRA_SOURCES.
 #   EXTRA_SOURCES:  List of extra sources to build and link along with
 #   the test.
 #   EXTRA_FILES:List of extra files to copy for the test runs.
@@ -207,7 +210,7 @@ proc dmd2dg { base test } {
 
 } elseif [regexp -- {PERMUTE_ARGS\s*:\s*(.*)} $copy_line match args] {
 # PERMUTE_ARGS is handled by gdc-do-test.
-set PERMUTE_ARGS [gdc-convert-args $base $args]
+set PERMUTE_ARGS [gdc-convert-args $args]
 regsub -- {PERMUTE_ARGS.*$} $copy_line "" out_line
 
 } elseif [regexp -- {EXECUTE_ARGS\s*:\s*(.*)} $copy_line match args] {
@@ -219,7 +222,7 @@ proc dmd2dg { base test } {
 
 } elseif [regexp -- {REQUIRED_ARGS\s*:\s*(.*)} $copy_line match args] {
 # Convert all listed arguments to from dmd to gdc-style.
-set new_option "{ dg-additional-options \"[gdc-convert-args $base $args]\" }"
+set new_option "{ dg-additional-options \"[gdc-convert-args $args]\" }"
 regsub -- {REQUIRED_ARGS.*$} $copy_line $new_option out_line
 
 } elseif [regexp -- {EXTRA_SOURCES\s*:\s*(.*)} $copy_line match sources] {
@@ -241,7 +244,7 @@ proc dmd2dg { base test } {
 regsub -- {EXTRA_CPP_SOURCES.*$} $copy_line $new_option out_line
 
 } elseif [regexp -- {EXTRA_FILES\s*:\s*(.*)} $copy_line match files] {
-# Copy all sources to the testsuite build directory.
+# Copy all files to the testsuite build directory.
 foreach import $files {
 # print "Import: $base $type/$import"
 gdc-copy-extra $base "$type/$import"
@@ -249,6 +252,15 @@ proc dmd2dg { base test } {
 set new_option "{ dg-additional-files \"$files\" }"
 regsub -- {EXTRA_FILES.*$} $copy_line $new_option out_line
 
+} elseif [regexp -- {COMPILED_IMPORTS\s*:\s*(.*)} $copy_line match sources] {
+# Copy all sources to the testsuite build directory.
+foreach import $sources {
+# print "Import: $base $type/$import"
+gdc-copy-extra $base "$type/$import"
+}
+set new_option "{ dg-additional-sources \"$sources\" }"
+regsub -- {COMPILED_IMPORTS.*$} $copy_line $new_option out_line
+
 }
 
 puts $fdout $out_line


Re: [PATCH 12/14] Add GDC/GCC builtins and runtime support (part of D runtime)

2018-10-21 Thread Iain Buclaw
On Wed, 19 Sep 2018 at 22:45, Iain Buclaw  wrote:
>
> On 18 September 2018 at 02:37, Iain Buclaw  wrote:
> > This patch adds GCC builtins and runtime support for GDC compiled code.
> >
> >   - module __entrypoint defines the C main function.  Its contents are
> > parsed and compiled in during compilation, but only if needed.
> >   - module gcc.attribute exposes GDC-specific attributes.
> >   - module gcc.backtrace implements backtrace support for GDC.
> >   - module gcc.builtins exposes GCC builtins to D code.
> >   - module gcc.config exposes things determined at configure time to D code.
> >   - module gcc.deh implements D unwind EH.
> >   - module gcc.libbacktrace defines C bindings to libbacktrace.
> >   - module gcc.unwind defines C bindings to libgcc unwind library.
> >   - libgphobos.spec contains a list of libraries to link in that are
> > dependencies of D runtime and/or the Phobos standard library.  It is
> > used by the GDC driver.
> >
>
> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00735.html
>
> It looks like this patch was previously approved.  All sources here
> are linked as part of druntime, but would be maintained in tree by
> myself and other D front end maintainers.
>

Updated copyright years as per request in 14/14, plus additional
changes that have been made since original patch.

Regards
--
Iain

---
diff --git a/libphobos/libdruntime/gcc/attribute.d b/libphobos/libdruntime/gcc/attribute.d
index 2498c27e7cc..8ca772122bb 100644
--- a/libphobos/libdruntime/gcc/attribute.d
+++ b/libphobos/libdruntime/gcc/attribute.d
@@ -1,5 +1,5 @@
 // GNU D Compiler attribute support declarations.
-// Copyright (C) 2013-2017 Free Software Foundation, Inc.
+// Copyright (C) 2013-2018 Free Software Foundation, Inc.
 
 // GCC is free software; you can redistribute it and/or modify it under
 // the terms of the GNU General Public License as published by the Free
diff --git a/libphobos/libdruntime/gcc/backtrace.d b/libphobos/libdruntime/gcc/backtrace.d
index 9c76e6117e8..37a7fa729b8 100644
--- a/libphobos/libdruntime/gcc/backtrace.d
+++ b/libphobos/libdruntime/gcc/backtrace.d
@@ -1,5 +1,5 @@
 // GNU D Compiler routines for stack backtrace support.
-// Copyright (C) 2013-2017 Free Software Foundation, Inc.
+// Copyright (C) 2013-2018 Free Software Foundation, Inc.
 
 // GCC is free software; you can redistribute it and/or modify it under
 // the terms of the GNU General Public License as published by the Free
@@ -271,13 +271,23 @@ static if (BACKTRACE_SUPPORTED && !BACKTRACE_USES_MALLOC)
 
 int opApply(scope ApplyCallback dg) const
 {
+initLibBacktrace();
+
 // If backtrace_simple produced an error report it and exit
 if (!state || error != 0)
 {
 size_t pos = 0;
 SymbolOrError symError;
-symError.errnum = error;
-symError.msg = errorBuf.ptr;
+if (!state)
+{
+symError.msg = "libbacktrace failed to initialize\0";
+symError.errnum = 1;
+}
+else
+{
+symError.errnum = error;
+symError.msg = errorBuf.ptr;
+}
 
 return dg(pos, symError);
 }
@@ -342,7 +352,7 @@ static if (BACKTRACE_SUPPORTED && !BACKTRACE_USES_MALLOC)
 
 int   error = 0;
 int _firstFrame = 0;
-char[128] errorBuf;
+char[128] errorBuf = "\0";
 }
 }
 else
diff --git a/libphobos/libdruntime/gcc/builtins.d b/libphobos/libdruntime/gcc/builtins.d
index e4e0bf3481f..a84d60bafa8 100644
--- a/libphobos/libdruntime/gcc/builtins.d
+++ b/libphobos/libdruntime/gcc/builtins.d
@@ -1,5 +1,5 @@
 /* GNU D Compiler bindings for built-in functions and types.
-   Copyright (C) 2006-2017 Free Software Foundation, Inc.
+   Copyright (C) 2006-2018 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/libphobos/libdruntime/gcc/config.d.in b/libphobos/libdruntime/gcc/config.d.in
index ae61a7195df..89f807d3835 100644
--- a/libphobos/libdruntime/gcc/config.d.in
+++ b/libphobos/libdruntime/gcc/config.d.in
@@ -1,5 +1,5 @@
 // GNU D Compiler configure constants.
-// Copyright (C) 2015-2017 Free Software Foundation, Inc.
+// Copyright (C) 2015-2018 Free Software Foundation, Inc.
 
 // GCC is free software; you can redistribute it and/or modify it under
 // the terms of the GNU General Public License as published by the Free
diff --git a/libphobos/libdruntime/gcc/deh.d b/libphobos/libdruntime/gcc/deh.d
index 15746f6430c..8323f4aa076 100644
--- a/libphobos/libdruntime/gcc/deh.d
+++ b/libphobos/libdruntime/gcc/deh.d
@@ -1,5 +1,5 @@
 // GNU D Compiler exception personality routines.
-// Copyright (C) 2011-2017 Free Software Foundation, Inc.
+// Copyright (C) 2011-2018 Free Software Founda

[PATCH v2] Avoid unnecessarily numbered clone symbols

2018-10-21 Thread Michael Ploujnikov
Continuing from https://gcc.gnu.org/ml/gcc-patches/2018-10/msg01258.html

Fixed up the code after the change to concat suggested by Bernhard
Reutner.

Outstanding question still remains:

To write an exact replacement for numbered_clone_function_name (apart
from the numbering) I also need to copy the double underscore
prefixing behaviour done by ASM_PN_FORMAT (right?)  which is used by
ASM_FORMAT_PRIVATE_NAME. Does that mean that I can't use my
suffixed_function_name to replace the very similar looking code in
cgraph_node::create_virtual_clone? Or is it just missing the double
underscore prefix by mistake?


- Michael
From 74435e1d8c5984eaee766d7940eeffbe565fcc2e Mon Sep 17 00:00:00 2001
From: Michael Ploujnikov 
Date: Tue, 7 Aug 2018 20:36:53 -0400
Subject: [PATCH 1/4] Rename clone_function_name_1 and clone_function_name to
 clarify usage.

gcc:
2018-10-19  Michael Ploujnikov  

   * gcc/cgraph.h: Rename clone_function_name_1 to
 numbered_clone_function_name_1. Rename clone_function_name to
 numbered_clone_function_name.
   * cgraphclones.c: Ditto.
   * config/rs6000/rs6000.c: Ditto.
   * lto/lto-partition.c: Ditto.
   * multiple_target.c: Ditto.
   * omp-expand.c: Ditto.
   * omp-low.c: Ditto.
   * omp-simd-clone.c: Ditto.
   * symtab.c: Ditto.
---
 gcc/cgraph.h   |  4 ++--
 gcc/cgraphclones.c | 22 +-
 gcc/config/rs6000/rs6000.c |  2 +-
 gcc/lto/lto-partition.c|  4 ++--
 gcc/multiple_target.c  |  8 
 gcc/omp-expand.c   |  2 +-
 gcc/omp-low.c  |  4 ++--
 gcc/omp-simd-clone.c   |  2 +-
 gcc/symtab.c   |  3 ++-
 9 files changed, 28 insertions(+), 23 deletions(-)

diff --git gcc/cgraph.h gcc/cgraph.h
index a8b1b4c..3583f7e 100644
--- gcc/cgraph.h
+++ gcc/cgraph.h
@@ -2368,8 +2368,8 @@ basic_block init_lowered_empty_function (tree, bool, profile_count);
 tree thunk_adjust (gimple_stmt_iterator *, tree, bool, HOST_WIDE_INT, tree);
 /* In cgraphclones.c  */
 
-tree clone_function_name_1 (const char *, const char *);
-tree clone_function_name (tree decl, const char *);
+tree numbered_clone_function_name_1 (const char *, const char *);
+tree numbered_clone_function_name (tree decl, const char *);
 
 void tree_function_versioning (tree, tree, vec *,
 			   bool, bitmap, bool, bitmap, basic_block);
diff --git gcc/cgraphclones.c gcc/cgraphclones.c
index 6e84a31..bc59dc2 100644
--- gcc/cgraphclones.c
+++ gcc/cgraphclones.c
@@ -316,7 +316,8 @@ duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node)
   gcc_checking_assert (!DECL_RESULT (new_decl));
   gcc_checking_assert (!DECL_RTL_SET_P (new_decl));
 
-  DECL_NAME (new_decl) = clone_function_name (thunk->decl, "artificial_thunk");
+  DECL_NAME (new_decl) = numbered_clone_function_name (thunk->decl,
+		   "artificial_thunk");
   SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
 
   new_thunk = cgraph_node::create (new_decl);
@@ -514,11 +515,11 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count,
 
 static GTY(()) unsigned int clone_fn_id_num;
 
-/* Return a new assembler name for a clone with SUFFIX of a decl named
-   NAME.  */
+/* Return NAME appended with string SUFFIX and a unique unspecified
+   number.  */
 
 tree
-clone_function_name_1 (const char *name, const char *suffix)
+numbered_clone_function_name_1 (const char *name, const char *suffix)
 {
   size_t len = strlen (name);
   char *tmp_name, *prefix;
@@ -531,13 +532,15 @@ clone_function_name_1 (const char *name, const char *suffix)
   return get_identifier (tmp_name);
 }
 
-/* Return a new assembler name for a clone of DECL with SUFFIX.  */
+/* Return a new assembler name for a clone of DECL.  Apart from the
+   string SUFFIX, the new name will end with a unique unspecified
+   number.  */
 
 tree
-clone_function_name (tree decl, const char *suffix)
+numbered_clone_function_name (tree decl, const char *suffix)
 {
   tree name = DECL_ASSEMBLER_NAME (decl);
-  return clone_function_name_1 (IDENTIFIER_POINTER (name), suffix);
+  return numbered_clone_function_name_1 (IDENTIFIER_POINTER (name), suffix);
 }
 
 
@@ -585,7 +588,8 @@ cgraph_node::create_virtual_clone (vec redirect_callers,
   strcpy (name + len + 1, suffix);
   name[len] = '.';
   DECL_NAME (new_decl) = get_identifier (name);
-  SET_DECL_ASSEMBLER_NAME (new_decl, clone_function_name (old_decl, suffix));
+  SET_DECL_ASSEMBLER_NAME (new_decl,
+			   numbered_clone_function_name (old_decl,suffix));
   SET_DECL_RTL (new_decl, NULL);
 
   new_node = create_clone (new_decl, count, false,
@@ -964,7 +968,7 @@ cgraph_node::create_version_clone_with_body
   = build_function_decl_skip_args (old_decl, args_to_skip, skip_return);
 
   /* Generate a new name for the new version. */
-  DECL_NAME (new_decl) = clone_function_name (old_decl, suffix);
+  DECL_NAME (new_decl) = numbered_clone_function_name (old_decl, suffix);
   SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (ne

Re: [PATCH 09/14] Add D2 Testsuite Dejagnu files.

2018-10-21 Thread Iain Buclaw
On Fri, 21 Sep 2018 at 21:59, Mike Stump  wrote:
>
> Ok.

Richard requested that all tests use issue links rather than numbers
so as not to confuse with gcc's bugzilla tracker.

Posting just the patch overlay to original that applies just that.

Regards
--
Iain

---


09-v4v5-d-testsuite-dejagnu.patch.xz
Description: Binary data


Re: [PATCH 08/14] Add D2 Testsuite files.

2018-10-21 Thread Iain Buclaw
On Tue, 25 Sep 2018 at 00:58, Iain Buclaw  wrote:
>
> On 24 September 2018 at 20:28, Mike Stump  wrote:
> > On Sep 21, 2018, at 2:38 PM, Iain Buclaw  wrote:
> >>
> >> On 21 September 2018 at 22:54, Mike Stump  wrote:
> >>> On Sep 17, 2018, at 5:36 PM, Iain Buclaw  wrote:
> 
>  This patch adds part of the D2 testsuite, which includes D source code
>  files that are considered compilable; files that are considered
>  uncompilable, but should not ICE; and files that should execute on
>  targets with crash or assertion failures.
> >>>
> >>> Ok.  [ not needed, you can self-review things like this no problem ]
> >>>
> >>> I see you are sneaking in Alice in Wonderland...
> >>>
> >>
> >> I had forgotten about that test.  I recall another project written in
> >> D (Dustmite) ran into some trouble with Debian due to the Project
> >> Gutenberg small print, which they ended up removing.
> >>
> >> The "small print" section in the text file says:
> >>
> >>DISTRIBUTION UNDER "PROJECT GUTENBERG-tm"
> >>You may distribute copies of this etext electronically, or by
> >>disk, book or any other medium if you either delete this
> >>"Small Print!" and all other references to Project Gutenberg,
> >>or:
> >>[...]
> >>
> >> I'll look into convincing upstream to also do the same.
> >
> > Oh, if you know of any reason why a file cannot be distributed, you cannot 
> > check that in ever.  So, you must resolve any outstanding issues before 
> > committing.   So, if Gutenberg has restrictions on distribution, you have 
> > to resolve those first, before it hit git.
> >
> > If it had hit git, please either remove such for now until resolved, or 
> > resolve any issues.  Thanks.
>
> As I said a day later, I've pushed the removal of in upstream and have
> propagated it down.  Thanks for noticing though, had you not mentioned
> Alice, I would not have had remembered the trouble Dustmite had with
> Debian in the first place.
>

Attaching the diff to the original, rather than the whole patch again.

Only other change apart from noted small print removal is supporting a
COMPILED_IMPORTS option in test sources.

Regards
--
Iain

---


08-v4v5-d-testsuite.patch.xz
Description: Binary data


Re: [PATCH 06/14] Add patches for D language support in GCC proper.

2018-10-21 Thread Iain Buclaw
On Wed, 19 Sep 2018 at 22:21, Iain Buclaw  wrote:
>
> On 18 September 2018 at 02:35, Iain Buclaw  wrote:
> >
> > This patch adds D language support to GCC itself.
> >
>
> Likewise, this was approved so long as all prerequisites have been
> approved also.
>
> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00609.html
>

Making one small change here off the back of review from 02/14, adding
D to testsuite/gcc.misc-tests/help.exp.

Regards
--
Iain

---
gcc/ChangeLog:

* config/powerpcspe/powerpcspe.c (rs6000_output_function_epilogue):
Support GNU D by using 0 as the language type.
* config/rs6000/rs6000.c (rs6000_output_function_epilogue):
Support GNU D by using 0 as the language type.
* dwarf2out.c (is_dlang): New function.
(gen_compile_unit_die): Use DW_LANG_D for D.
(declare_in_namespace): Return module die for D, instead of adding
extra declarations into the namespace.
(gen_namespace_die): Generate DW_TAG_module for D.
(gen_decl_die, dwarf2out_decl): Handle CONST_DECLSs for D.
(prune_unused_types_walk_local_classes): Handle
DW_tag_interface_type.
(prune_unused_types_walk): Handle DW_tag_interface_type same as
other kinds of aggregates.
* gcc.c (default_compilers): Add entries for ".d", ".dd" and ".di".

gcc/po/ChangeLog:

* EXCLUDES: Add sources from d/dfrontend.

gcc/testsuite/ChangeLog:

* gcc.misc-tests/help.exp: Add D to option descriptions check.
---
 gcc/config/powerpcspe/powerpcspe.c|  5 ++--
 gcc/config/rs6000/rs6000.c|  5 ++--
 gcc/dwarf2out.c   | 23 +
 gcc/gcc.c |  1 +
 gcc/po/EXCLUDES   | 40 ++
 gcc/testsuite/gcc.misc-tests/help.exp |  2 +-
 6 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c
index dea1eab1188..8b157174473 100644
--- a/gcc/config/powerpcspe/powerpcspe.c
+++ b/gcc/config/powerpcspe/powerpcspe.c
@@ -32026,11 +32026,12 @@ rs6000_output_function_epilogue (FILE *file)
 	 use language_string.
 	 C is 0.  Fortran is 1.  Pascal is 2.  Ada is 3.  C++ is 9.
 	 Java is 13.  Objective-C is 14.  Objective-C++ isn't assigned
-	 a number, so for now use 9.  LTO, Go and JIT aren't assigned numbers
-	 either, so for now use 0.  */
+	 a number, so for now use 9.  LTO, Go, D and JIT aren't assigned
+	 numbers either, so for now use 0.  */
   if (lang_GNU_C ()
 	  || ! strcmp (language_string, "GNU GIMPLE")
 	  || ! strcmp (language_string, "GNU Go")
+	  || ! strcmp (language_string, "GNU D")
 	  || ! strcmp (language_string, "libgccjit"))
 	i = 0;
   else if (! strcmp (language_string, "GNU F77")
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index a36e6140ecb..97ca2dac289 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -28405,11 +28405,12 @@ rs6000_output_function_epilogue (FILE *file)
 	 use language_string.
 	 C is 0.  Fortran is 1.  Pascal is 2.  Ada is 3.  C++ is 9.
 	 Java is 13.  Objective-C is 14.  Objective-C++ isn't assigned
-	 a number, so for now use 9.  LTO, Go and JIT aren't assigned numbers
-	 either, so for now use 0.  */
+	 a number, so for now use 9.  LTO, Go, D, and JIT aren't assigned
+	 numbers either, so for now use 0.  */
   if (lang_GNU_C ()
 	  || ! strcmp (language_string, "GNU GIMPLE")
 	  || ! strcmp (language_string, "GNU Go")
+	  || ! strcmp (language_string, "GNU D")
 	  || ! strcmp (language_string, "libgccjit"))
 	i = 0;
   else if (! strcmp (language_string, "GNU F77")
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 48c50378622..47877faebd7 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -5521,6 +5521,16 @@ is_ada (void)
   return lang == DW_LANG_Ada95 || lang == DW_LANG_Ada83;
 }
 
+/* Return TRUE if the language is D.  */
+
+static inline bool
+is_dlang (void)
+{
+  unsigned int lang = get_AT_unsigned (comp_unit_die (), DW_AT_language);
+
+  return lang == DW_LANG_D;
+}
+
 /* Remove the specified attribute if present.  Return TRUE if removal
was successful.  */
 
@@ -24550,6 +24560,8 @@ gen_compile_unit_die (const char *filename)
 	language = DW_LANG_ObjC;
   else if (strcmp (language_string, "GNU Objective-C++") == 0)
 	language = DW_LANG_ObjC_plus_plus;
+  else if (strcmp (language_string, "GNU D") == 0)
+	language = DW_LANG_D;
   else if (dwarf_version >= 5 || !dwarf_strict)
 	{
 	  if (strcmp (language_string, "GNU Go") == 0)
@@ -26150,7 +26162,7 @@ declare_in_namespace (tree thing, dw_die_ref context_die)
 
   if (ns_context != context_die)
 {
-  if (is_fortran ())
+  if (is_fortran () || is_dlang ())
 	return ns_context;
   if (DECL_P (thing))
 	gen_decl_die (thing, NULL, NULL, ns_context);
@@ -26173,7 +26185,7 @@ gen_namespace_die (tree decl, dw_die_ref context_die)
 {
   /* Output a real namespace or module.  */
   context_die = setup_namespace_context (dec

Re: [PATCH 07/14] Add patches for D language support in GCC targets.

2018-10-21 Thread Iain Buclaw
On Tue, 16 Oct 2018 at 17:28, Richard Sandiford
 wrote:
>
> Iain Buclaw  writes:
> > diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> > index 4b7cec82382..0b2daa320c3 100644
> > --- a/gcc/Makefile.in
> > +++ b/gcc/Makefile.in
> > @@ -2496,6 +2525,7 @@ s-tm-texi: build/genhooks$(build_exeext) 
> > $(srcdir)/doc/tm.texi.in
> > && ( test $(srcdir)/doc/tm.texi -nt $(srcdir)/target.def \
> >   || test $(srcdir)/doc/tm.texi -nt $(srcdir)/c-family/c-target.def 
> > \
> >   || test $(srcdir)/doc/tm.texi -nt 
> > $(srcdir)/common/common-target.def \
> > + || test $(srcdir)/doc/tm.texi -nt $(srcdir)/d/d-target.def \
> > ); then \
> > echo >&2 ; \
> > echo You should edit $(srcdir)/doc/tm.texi.in rather than 
> > $(srcdir)/doc/tm.texi . >&2 ; \
> > [...]
> > @@ -2784,7 +2815,7 @@ build/genrecog.o : genrecog.c $(RTL_BASE_H) 
> > $(BCONFIG_H) $(SYSTEM_H)\
> >$(CORETYPES_H) $(GTM_H) errors.h $(READ_MD_H) $(GENSUPPORT_H)
> >   \
> >$(HASH_TABLE_H) inchash.h
> >  build/genhooks.o : genhooks.c $(TARGET_DEF) $(C_TARGET_DEF)  \
> > -  $(COMMON_TARGET_DEF) $(BCONFIG_H) $(SYSTEM_H) errors.h
> > +  $(COMMON_TARGET_DEF) $(D_TARGET_DEF) $(BCONFIG_H) $(SYSTEM_H) errors.h
> >  build/genmddump.o : genmddump.c $(RTL_BASE_H) $(BCONFIG_H) $(SYSTEM_H) 
> >   \
> >$(CORETYPES_H) $(GTM_H) errors.h $(READ_MD_H) $(GENSUPPORT_H)
> >  build/genmatch.o : genmatch.c $(BCONFIG_H) $(SYSTEM_H) \
>
> I was initially a bit worried about this because it makes the build
> depend on the existence of the d/ directory.  But it doesn't look
> like we try to ship separate tarballs for specific source languages
> any more, so that's probably not a problem.
>
> > @@ -10659,6 +10668,22 @@ unloaded. The default is to return false.
> >  Return target-specific mangling context of @var{decl} or @code{NULL_TREE}.
> >  @end deftypefn
> >
> > +@node D Language and ABI
> > +@section D ABI parameters
> > +@cindex parameters, d abi
> > +
> > +@deftypefn {D Target Hook} void TARGET_D_CPU_VERSIONS (void)
> > +Declare all environmental version identifiers relating to the target CPU 
> > using the function @code{builtin_version}, which takes a string 
> > representing the name of the version.  Version identifiers predefined by 
> > this hook apply to all modules and being compiled and imported.
> > +@end deftypefn
>
> "and being"?  Does this mean "that are being"?
>
> > +@deftypefn {D Target Hook} void TARGET_D_OS_VERSIONS (void)
> > +Similarly to @code{TARGET_D_CPU_VERSIONS}, but is used for versions 
> > relating to the target operating system.
> > +@end deftypefn
> > +
> > +@deftypefn {D Target Hook} unsigned TARGET_D_CRITSEC_SIZE (void)
> > +Returns the size of the data structure used by the targeted operating 
> > system for critical sections and monitors.  For example, on Microsoft 
> > Windows this would return the @code{sizeof(CRITICAL_SECTION)}, while other 
> > platforms that implement pthreads would return 
> > @code{sizeof(pthread_mutex_t)}.
> > +@end deftypefn
>
> Please reflow the .def so that these fit within 80 chars.
>
> OK otherwise if no target maintainer objects to the changes to their port
> (I think they've had plenty of time already :-))
>

Regenerated tm.texi with corrections.

Regards
--
Iain

---
gcc/ChangeLog

* gcc/Makefile.in (tm_d_file_list, tm_d_include_list,
TM_D_H, D_TARGET_DEF, D_TARGET_H, D_TARGET_OBJS): New variables.
(tm_d.h, cs-tm_d.h, default-d.o, d/d-target-hooks-def.h,
s-d-target-hooks-def-h): New rules.
(s-tm-texi): Also check timestamp on d-target.def.
(generated_files): Add TM_D_H and d-target-hooks-def.h.
(build/genhooks.o): Also depend on D_TARGET_DEF.
* gcc/config.gcc (tm_d_file, d_target_objs, target_has_targetdm):
New variables.
* config/aarch64/aarch64-d.c: New file.
* config/aarch64/aarch64-linux.h (GNU_USER_TARGET_D_CRITSEC_SIZE):
Define.
* config/aarch64/aarch64-protos.h (aarch64_d_target_versions): New
prototype.
* config/aarch64/aarch64.h (TARGET_D_CPU_VERSIONS): Define.
* config/aarch64/t-aarch64 (aarch64-d.o): New rule.
* config/arm/arm-d.c: New file.
* config/arm/arm-protos.h (arm_d_target_versions): New prototype.
* config/arm/arm.h (TARGET_D_CPU_VERSIONS): Define.
* config/arm/linux-eabi.h (EXTRA_TARGET_D_OS_VERSIONS): Define.
* config/arm/t-arm (arm-d.o): New rule.
* config/default-d.c: New file.
* config/glibc-d.c: New file.
* config/gnu.h (GNU_USER_TARGET_D_OS_VERSIONS): Define.
* config/i386/i386-d.c: New file.
* config/i386/i386-protos.h (ix86_d_target_versions): New prototype.
* config/i386/i386.h (TARGET_D_CPU_VERSIONS): Define.
* config/i386/linux-common.h (EXTRA_TARGET_D_OS_VERSIONS,
GNU_USER_TARGET_D_CRITSEC_SIZE): Define.
* config/i386/t-i386 (i386-d.o): New rule.
* config/kfreebsd-gnu.h (GNU_USER_TARGET_D_OS_VERSIONS): Define.
* config/kopensolaris-gnu.h (GNU_USER_TARGET_D_OS_VERSIO

Re: [PATCH 04/14] Add D front-end (GDC) config, Makefile, and manpages.

2018-10-21 Thread Iain Buclaw
On Tue, 16 Oct 2018 at 11:31, Richard Sandiford
 wrote:
>
> I'll pretend to be a doc reviewer...
>
> Iain Buclaw  writes:
> > +@page
> > +@vskip 0pt plus 1filll
> > +Published by the Free Software Foundation @*
> > +51 Franklin Street, Fifth Floor@*
> > +Boston, MA 02110-1301, USA@*
>
> Would be good to copy the expanded GNU Press stuff from gcc.texi.
>
> > +The @command{gdc} command is a frontend to @command{gcc} and supports many
> > +of the same options.
>
> A bit pedantic, but I think this should be something like:
>
>   The @command{gdc} command is the GNU compiler for the D language and
>   supports many of the same options as @command{gcc}.
>
> > +@menu
> > +* Input and Output files::  Controlling the kind of output:
> > +an executable, object files, assembler files,
> > +* Runtime Options:: Options controlling runtime behaviour
>
> "behavior"
>
> > +* Directory Options::   Where to find module files
> > +* Code Generation:: Options controlling the output of gdc
> > +* Warnings::Options controlling warnings specific to gdc
> > +* Linking:: Options influceing the linking step
>
> "influencing"
>
> > +* Developer Options::   Options you won't use
>
> Don't know if this came from an earlier version of the main GCC manual,
> but it seems a bit off-message for the FSF.  Something boring like:
>
>   Options useful for developers of gdc
>
> might be better.
>
> > +For any given input file, the file name suffix determines what kind of
> > +compilation is done.  The following kinds of input file names are 
> > supported:
> > +
> > +@table @gcctabopt
> > +@item @var{file}.d
> > +D source files.
> > +@item @var{file}.dd
> > +Ddoc source files.
> > +@item @var{file}.di
> > +D interface files.
> > +@end table
> > +
> > +You can specify more than one input file on the @command{gdc} command line,
> > +in which case they will all be compiled.  If you specify a
>
> The documentation convention is to avoid the future tense for things
> like this.  s/will all be/are all/ doesn't read very naturally so it
> might need to be reworded.
>
> > +@code{-o @var{file}} option, all the input files will be compiled together,
>
> Same here.
>
> > +@item -fno-bounds-check
> > +@cindex @option{-fbounds-check}
> > +@cindex @option{-fno-bounds-check}
> > +Turns off array bounds checking for all functions, which can improve
> > +performance for code that uses array extensively.  Note that this
>
> "uses arrays extensively"?  Or maybe some markup is missing from "array".
>
> > +can result in unpredictable behavior if the code in question actually
> > +does violate array bounds constraints.  It is safe to use this option
> > +if you are sure that your code will never throw a @code{RangeError}.
>
> s/will never throw/never throws/
>
> > +@item -fno-builtin
> > +@cindex @option{-fbuiltin}
> > +@cindex @option{-fno-builtin}
> > +Don't recognize built-in functions that do not begin with
> > +@samp{__builtin_} as prefix.  By default, the compiler will recognize
> > +when a function in the @code{core.stdc} package is a built-in function.
>
> maybe "unless they begin with the prefix @samp{__builtin_}"?
> s/will recognize/recognizes/
>
> > +@table @samp
> > +@item level
> > +@cindex @option{-fdebug=level}
> > +Sets the debug level to @var{level}, any @code{debug} code <= @var{level}
> > +is compiled into the program.
> > +@item ident
> > +@cindex @option{-fdebug=ident}
> > +Turns on compilation of any @code{debug} code identified by @var{ident}.
> > +@end table
>
> Should be @var rather than @samp in the @table.  Also @var{...} in
> the @option{...}s.
>
> > +@item -fno-moduleinfo
> > +@cindex @option{-fmoduleinfo}
> > +@cindex @option{-fno-moduleinfo}
> > +Turns off generation of the @code{ModuleInfo} and related functions
> > +that would become unreferenced without it, which may allow linking
> > +to programs not written in D.  Functions that will not be generated
>
> s/will not be/are not/
>
> > +include module constructor and destructors (@code{static this} and
>
> s/constructor/constructors/?  Or is there only one of each, in which
> case maybe "the module constructor and destructor".
>
> > +@item -frelease
> > +@cindex @option{-frelease}
> > +@cindex @option{-fno-release}
> > +Turns on compiling in release mode, which means not emitting runtime
> > +checks for contracts and asserts.  Array bounds checking is not done
> > +for @code{@@system} and @code{@@trusted} functions, and assertion
> > +failures are undefined behaviour.
>
> "behavior"
>
> > +This is equivalent to compiling with the following options:
> > +
> > +@example
> > +gdc -fno-assert -fbounds-check=safe -fno-invariants \
> > +-fno-postconditions -fno-preconditions -fno-switch-errors
> > +@end example
> > +
> > +@item -fno-switch-errors
> > +@cindex @option{-fswitch-errors}
> > +@cindex @option{-fno-switch-errors}
> > +This option controls what code should be generated when no case is
>
> s/s

Re: [PATCH 02/14] Add D frontend (GDC) implementation.

2018-10-21 Thread Iain Buclaw
On Sat, 20 Oct 2018 at 11:03, Richard Sandiford
 wrote:
>
> Iain Buclaw  writes:
> > On 14 October 2018 at 17:29, Richard Sandiford
> >  wrote:
> >> [Sorry if this turns out to do be a dup]
> >>
> >> Iain Buclaw  writes:
> >>> +/* Build nodes that are used by the D front-end.
> >>> +   These are distinct from C types.  */
> >>> +
> >>> +static void
> >>> +d_build_d_type_nodes (void)
> >>> +{
> >>> +  /* Integral types.  */
> >>> +  byte_type_node = make_signed_type (8);
> >>> +  ubyte_type_node = make_unsigned_type (8);
> >>> +
> >>> +  short_type_node = make_signed_type (16);
> >>> +  ushort_type_node = make_unsigned_type (16);
> >>> +
> >>> +  int_type_node = make_signed_type (32);
> >>> +  uint_type_node = make_unsigned_type (32);
> >>> +
> >>> +  long_type_node = make_signed_type (64);
> >>> +  ulong_type_node = make_unsigned_type (64);
> >>
> >> It's a bit confusing for the D type to be long_type_node but the C/ABI type
> >> to be long_integer_type_node.  The D type is surely an integer too. :-)
> >> With this coming among the handling of built-in functions, it initially
> >> looked related, and I was wondering how it was safe on ILP32 systems
> >> before realising the difference.
> >>
> >> Maybe prefixing them all with "d_" would be too ugly, but it would at
> >> least be good to clarify the comment to say that these are "distinct
> >> type nodes" (rather than just distinct definitions, as I'd initially
> >> assumed) and that they're not used outside the frontend, or by the C
> >> imports.
> >>
> >
> > If prefixing with "d_", perhaps dropping the "_node" would make them
> > sufficiently not ugly (d_long_type, d_uint_type, d_byte_type).
>
> Sounds good to me FWIW.
>
> >>> +/* Helper routine for all error routines.  Reports a diagnostic 
> >>> specified by
> >>> +   KIND at the explicit location LOC, where the message FORMAT has not 
> >>> yet
> >>> +   been translated by the gcc diagnostic routines.  */
> >>> +
> >>> +static void ATTRIBUTE_GCC_DIAG(3,0)
> >>> +d_diagnostic_report_diagnostic (const Loc& loc, int opt, const char 
> >>> *format,
> >>> + va_list ap, diagnostic_t kind, bool 
> >>> verbatim)
> >>> +{
> >>> +  va_list argp;
> >>> +  va_copy (argp, ap);
> >>> +
> >>> +  if (loc.filename || !verbatim)
> >>> +{
> >>> +  rich_location rich_loc (line_table, get_linemap (loc));
> >>> +  diagnostic_info diagnostic;
> >>> +  char *xformat = expand_format (format);
> >>> +
> >>> +  diagnostic_set_info (&diagnostic, xformat, &argp, &rich_loc, kind);
> >>
> >> How does this work with translation?  xgettext will only see the original
> >> format string, not the result of expand_format.  Do you have some scripting
> >> to do the same format mangling when collecting the translation strings?
> >> Same concern:
> >>
> >
> > These diagnostic routines handle errors coming from the dmd front-end,
> > which are not translated - all sources are listed under po/EXCLUDES in
> > another patch.
>
> OK.  In that case I think you want to use diagnostic_set_info_translated
> instead of diagnostic_set_info, so that we don't try to translate things
> that aren't meant to be translated.  Also it would be good to reword the
> comment above the function, since "where the message FORMAT has not yet
> been translated by the gcc diagnostic routines" made it sound like these
> messages were supposed to be translated at some point, which is where the
> confusion started. :-)
>
> >>> +/* Write a little-endian 32-bit VALUE to BUFFER.  */
> >>> +
> >>> +void
> >>> +Port::writelongLE (unsigned value, void *buffer)
> >>> +{
> >>> +unsigned char *p = (unsigned char*) buffer;
> >>> +
> >>> +p[0] = (unsigned) value;
> >>> +p[1] = (unsigned) value >> 8;
> >>> +p[2] = (unsigned) value >> 16;
> >>> +p[3] = (unsigned) value >> 24;
> >>> +}
> >>> ...
> >>> +/* Write a big-endian 32-bit VALUE to BUFFER.  */
> >>> +
> >>> +void
> >>> +Port::writelongBE (unsigned value, void *buffer)
> >>> +{
> >>> +unsigned char *p = (unsigned char*) buffer;
> >>> +
> >>> +p[0] = (unsigned) value >> 24;
> >>> +p[1] = (unsigned) value >> 16;
> >>> +p[2] = (unsigned) value >> 8;
> >>> +p[3] = (unsigned) value;
> >>> +}
> >>
> >> Overindented bodies.  Missing space before "*" in "(unsigned char*)"
> >> in all these functions.
> >>
> >> Obviously this stuff assumes host CHAR_BIT == 8, but let's be realistic :-)
> >> Is it also used in ways that require the target BITS_PER_UNIT to be 8
> >> as well?  That could realistically be a different value (and we've had
> >> ports like that in the past).
> >>
> >
> > These read(long|word)(BE|LE) functions should only ever be used when
> > reading the BOM of a UTF-16 or UTF-32 file.
> >
> > I've done a grep, and the write(long|word)(BE|LE) are no longer used
> > by the dmd frontend, so there's little point keeping them around.
> >
> > If there's any utility in libiberty or another location then I'd be
> > more than happy to delegate this a

Re: [PATCH 01/14] Add D front-end (DMD) language implementation and license.

2018-10-21 Thread Iain Buclaw
On Wed, 19 Sep 2018 at 20:46, Iain Buclaw  wrote:
>
> On 18 September 2018 at 02:31, Iain Buclaw  wrote:
> > This patch adds the DMD front-end proper and license (Boost) files,
> > comprised of a lexer, parser, and semantic analyzer.
> >
> > ftp://ftp.gdcproject.org/patches/v4/01-v4-d-frontend-dmd.patch
> >
>
> I've dug up the history of what was reviewed before, so perhaps we can
> avoid repeating what has been previously been discussed.
>
> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00582.html
>
> All sources in this patch are mainlined on github, and we are just a
> downstream user.
>
> The FSF has said that there's no problem including these in gcc as they are.
>

Following the review of other parts, attaching just the difference to
the current patch to keep size down.

I'll upload the unabridged patch series later if needed.

Changelog entries are in 02/14.

Regards
--
Iain

---


01-v4v5-d-frontend-dmd.patch.xz
Description: Binary data


Re: [PATCH, rs6000] Use unaligned vector types for some pointer casts

2018-10-21 Thread Bill Schmidt
On 10/20/18 10:53 AM, Segher Boessenkool wrote:
> Hi!
>
> On Fri, Oct 19, 2018 at 04:27:27PM -0500, Bill Schmidt wrote:
>> The x86 intrinsic compatibility headers contain a couple of instances of
>> undefined behavior where a cast to an aligned type is used when that
>> alignment is not guaranteed by the expression to be cast from.  This
>> patch fixes that problem by replacing the aligned types with unaligned
>> versions of the same type.
> How did you find these?  What I'm after is, did you find all instances?

Jinsong found these by using the test cases for these header files when
incorporating the headers into Clang.  I will ask him whether he scanned
for additional similar cases.

>
>> --- gcc/config/rs6000/xmmintrin.h(revision 265318)
>> +++ gcc/config/rs6000/xmmintrin.h(working copy)
>> @@ -85,6 +85,9 @@
>> vector types, and their scalar components.  */
>>  typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
>>  
>> +/* Unaligned version of the same type.  */
>> +typedef float __m128_u __attribute__ ((__vector_size__ (16), 
>> __may_alias__));
> This is identical to __m128.  Do you want aligned(1) as well?

Argh, yes.  I failed to copy this correctly from Jinsong.  I will re-test before
applying.

Thanks!
Bill

>
> Okay for trunk if you want that; if not, this needs explanation (a code
> comment or similar).
>
> Thanks!
>
>
> Segher
>



Re: [patch, fortran] Implement FINDLOC

2018-10-21 Thread Dominique d'Humières
Hi Thomas,

With your patch, compiling the following test

program logtest3 
   implicit none 
   logical :: x = .true. 
   integer, parameter :: I_FINDLOC_BACK(1) = findloc([1,1],1, & 
  back=x) 
end program logtest3 

gives an ICE

gfc: internal compiler error: Segmentation fault: 11 signal terminated program 
f951

I see some kind of "infinite" recursion

…
frame #899971: 0x000100037e44 f951`gfc_check_init_expr(gfc_expr*) 
[inlined] check_init_expr_arguments(e=0x00014c34bd80) at expr.c:2374
frame #899972: 0x000100037e24 f951`gfc_check_init_expr(gfc_expr*) 
[inlined] check_conversion(e=0x00014c34bd80)
frame #899973: 0x000100037e1d 
f951`gfc_check_init_expr(e=0x00014c34bd80)
frame #899974: 0x000100037e44 f951`gfc_check_init_expr(gfc_expr*) 
[inlined] check_init_expr_arguments(e=0x00014c34bc40) at expr.c:2374
frame #899975: 0x000100037e24 f951`gfc_check_init_expr(gfc_expr*) 
[inlined] check_conversion(e=0x00014c34bc40)
frame #899976: 0x000100037e1d 
f951`gfc_check_init_expr(e=0x00014c34bc40)
frame #899977: 0x000100037e44 f951`gfc_check_init_expr(gfc_expr*) 
[inlined] check_init_expr_arguments(e=0x00014c34bb00) at expr.c:2374
frame #899978: 0x000100037e24 f951`gfc_check_init_expr(gfc_expr*) 
[inlined] check_conversion(e=0x00014c34bb00)
frame #899979: 0x000100037e1d 
f951`gfc_check_init_expr(e=0x00014c34bb00)
frame #899980: 0x000100037e44 f951`gfc_check_init_expr(gfc_expr*) 
[inlined] check_init_expr_arguments(e=0x00014c34b9c0) at expr.c:2374
frame #899981: 0x000100037e24 f951`gfc_check_init_expr(gfc_expr*) 
[inlined] check_conversion(e=0x00014c34b9c0)
frame #899982: 0x000100037e1d 
f951`gfc_check_init_expr(e=0x00014c34b9c0)

Also in gfortran.dg/findloc_4.f90 should not the lines

  print *,findloc(a,value=1.5,dim=2,back=.true.)
  print *,findloc(a,value=1,dim=1,mask=lo)

converted to tests?

Thanks for working on the implementation of FINDLOC.

Dominique

[PATCH 3/3] i386: Enable AVX512 memory broadcast for INT andnot

2018-10-21 Thread H.J. Lu
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for INT andnot operations.

gcc/

PR target/72782
* config/i386/sse.md (*andnot3_bst): New.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512f-andn-di-zmm-1.c: New test.
* gcc.target/i386/avx512f-andn-si-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-andn-si-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-andn-si-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-andn-si-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-andn-si-zmm-5.c: Likewise.
* gcc.target/i386/avx512vl-andn-si-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-andn-si-ymm-1.c: Likewise.
---
 gcc/config/i386/sse.md  | 13 +
 .../gcc.target/i386/avx512f-andn-di-zmm-1.c | 12 
 .../gcc.target/i386/avx512f-andn-si-zmm-1.c | 12 
 .../gcc.target/i386/avx512f-andn-si-zmm-2.c | 12 
 .../gcc.target/i386/avx512f-andn-si-zmm-3.c | 12 
 .../gcc.target/i386/avx512f-andn-si-zmm-4.c | 12 
 .../gcc.target/i386/avx512f-andn-si-zmm-5.c | 12 
 .../gcc.target/i386/avx512vl-andn-si-xmm-1.c| 12 
 .../gcc.target/i386/avx512vl-andn-si-ymm-1.c| 12 
 9 files changed, 109 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-andn-si-xmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-andn-si-ymm-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 29f390ead1f..05bd5781804 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -12070,6 +12070,19 @@
  ]
  (const_string "")))])
 
+(define_insn "*andnot3_bst"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+   (and:VI
+ (not:VI48_AVX512VL
+(match_operand:VI48_AVX512VL 1 "register_operand" "v"))
+ (vec_duplicate:VI48_AVX512VL
+   (match_operand: 2 "memory_operand" "m"]
+  "TARGET_AVX512F"
+  "vpandn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
 (define_insn "*andnot3_mask"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI48_AVX512VL
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-1.c 
b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-1.c
new file mode 100644
index 000..1450d3c1914
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpandnq\[ 
\\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vpbroadcastq\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512i
+#define vec 512
+#define op andnot
+#define suffix epi64
+#define SCALAR long long
+
+#include "avx512-binop-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-1.c 
b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-1.c
new file mode 100644
index 000..c9d8a820295
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpandnd\[ 
\\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512i
+#define vec 512
+#define op andnot
+#define suffix epi32
+#define SCALAR int
+
+#include "avx512-binop-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c 
b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
new file mode 100644
index 000..a9608ca095d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } 
*/
+/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */
+
+#define type __m512i
+#define vec 512
+#define op andnot
+#define suffix epi32
+#define SCALAR int
+
+#include "avx512-binop-2.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-3.c 
b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-3.c
new file mode 100644
index 000..71751fc874c
--- /dev/null
+++ b/gcc/tes

[PATCH 1/3] i386: Enable AVX512 memory broadcast for INT add

2018-10-21 Thread H.J. Lu
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for INT add operations.

gcc/

PR target/72782
* config/i386/sse.md (avx512bcst): Updated for V4SI, V2DI, V8SI,
V4DI, V16SI and V8DI.
(*minus3_bcst): New.
(*plus3_bcst): Likewise.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512f-add-di-zmm-1.c: New test.
* gcc.target/i386/avx512f-add-si-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-add-si-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-add-si-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-add-si-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-add-si-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-add-si-zmm-6.c: Likewise.
* gcc.target/i386/avx512f-sub-di-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-sub-si-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-sub-si-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-sub-si-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-sub-si-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-sub-si-zmm-5.c: Likewise.
* gcc.target/i386/avx512vl-add-si-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-add-si-ymm-1.c: Likewise.
* gcc.target/i386/avx512vl-sub-si-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-sub-si-ymm-1.c: Likewise.
---
 gcc/config/i386/sse.md| 29 ++-
 .../gcc.target/i386/avx512f-add-di-zmm-1.c| 12 
 .../gcc.target/i386/avx512f-add-si-zmm-1.c| 12 
 .../gcc.target/i386/avx512f-add-si-zmm-2.c| 12 
 .../gcc.target/i386/avx512f-add-si-zmm-3.c| 12 
 .../gcc.target/i386/avx512f-add-si-zmm-4.c| 12 
 .../gcc.target/i386/avx512f-add-si-zmm-5.c| 12 
 .../gcc.target/i386/avx512f-add-si-zmm-6.c| 12 
 .../gcc.target/i386/avx512f-sub-di-zmm-1.c| 12 
 .../gcc.target/i386/avx512f-sub-si-zmm-1.c| 12 
 .../gcc.target/i386/avx512f-sub-si-zmm-2.c| 12 
 .../gcc.target/i386/avx512f-sub-si-zmm-3.c| 12 
 .../gcc.target/i386/avx512f-sub-si-zmm-4.c| 12 
 .../gcc.target/i386/avx512f-sub-si-zmm-5.c| 12 
 .../gcc.target/i386/avx512vl-add-si-xmm-1.c   | 12 
 .../gcc.target/i386/avx512vl-add-si-ymm-1.c   | 12 
 .../gcc.target/i386/avx512vl-sub-si-xmm-1.c   | 12 
 .../gcc.target/i386/avx512vl-sub-si-ymm-1.c   | 12 
 18 files changed, 232 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-di-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-si-zmm-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-di-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-si-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-si-zmm-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-si-zmm-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-si-zmm-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-si-zmm-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-add-si-xmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-add-si-ymm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-sub-si-xmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-sub-si-ymm-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 2c702ceed2d..2d4fac3f8f7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -660,7 +660,10 @@
V16SF V8DF])
 
 (define_mode_attr avx512bcst
-  [(V4SF "%{1to4%}") (V2DF "%{1to2%}")
+  [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
+   (V8SI "%{1to8%}") (V4DI "%{1to4%}")
+   (V16SI "%{1to16%}") (V8DI "%{1to8%}")
+   (V4SF "%{1to4%}") (V2DF "%{1to2%}")
(V8SF "%{1to8%}") (V4DF "%{1to4%}")
(V16SF "%{1to16%}") (V8DF "%{1to8%}")])
 
@@ -10408,6 +10411,30 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "")])
 
+(define_insn "*sub3_bcst"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+   (minus:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "register_operand" "v")
+ (vec_duplicate:VI48_AVX512VL
+   (match_operand: 2 "memory_operand" "m"]
+  "TARGET_AVX512F && ix86_binary_operator_ok (MINUS, mode, operands)"
+  "vpsub\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_insn "*plus3_bcst"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+   (plus:VI4

[PATCH 2/3] i386: Enable AVX512 memory broadcast for INT logic

2018-10-21 Thread H.J. Lu
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for INT logic operations.

gcc/

PR target/72782
* config/i386/sse.md (*3_bcst): New.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512f-and-di-zmm-1.c: New test.
* gcc.target/i386/avx512f-and-si-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-and-si-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-and-si-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-and-si-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-and-si-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-and-si-zmm-6.c: Likewise.
* gcc.target/i386/avx512f-or-di-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-or-si-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-or-si-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-or-si-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-or-si-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-or-si-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-or-si-zmm-6.c: Likewise.
* gcc.target/i386/avx512f-xor-di-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-xor-si-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-xor-si-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-xor-si-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-xor-si-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-xor-si-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-xor-si-zmm-6.c: Likewise.
* gcc.target/i386/avx512vl-and-si-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-and-si-ymm-1.c: Likewise.
* gcc.target/i386/avx512vl-or-si-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-or-si-ymm-1.c: Likewise.
* gcc.target/i386/avx512vl-xor-si-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-xor-si-ymm-1.c: Likewise.
---
 gcc/config/i386/sse.md   | 12 
 gcc/testsuite/gcc.target/i386/avx512f-and-di-zmm-1.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-1.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-2.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-3.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-4.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-5.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-6.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-or-di-zmm-1.c  | 12 
 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-1.c  | 12 
 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-2.c  | 12 
 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-3.c  | 12 
 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-4.c  | 12 
 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-5.c  | 12 
 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-6.c  | 12 
 gcc/testsuite/gcc.target/i386/avx512f-xor-di-zmm-1.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-1.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-2.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-3.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-4.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-5.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-xor-si-zmm-6.c | 12 
 .../gcc.target/i386/avx512vl-and-si-xmm-1.c  | 12 
 .../gcc.target/i386/avx512vl-and-si-ymm-1.c  | 12 
 gcc/testsuite/gcc.target/i386/avx512vl-or-si-xmm-1.c | 12 
 gcc/testsuite/gcc.target/i386/avx512vl-or-si-ymm-1.c | 12 
 .../gcc.target/i386/avx512vl-xor-si-xmm-1.c  | 12 
 .../gcc.target/i386/avx512vl-xor-si-ymm-1.c  | 12 
 28 files changed, 336 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-di-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-and-si-zmm-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-di-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-or-si-zmm-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-

V3 [PATCH] i386: Use scalar operand in floating point vec_dup patterns

2018-10-21 Thread H.J. Lu
On 10/21/18, H.J. Lu  wrote:
> On 10/17/18, H.J. Lu  wrote:
>> Since vector registers are also used for scalar floating point values,
>> we can use scalar operand in floating point vec_dup patterns, which
>> enables combiner to generate
>>
>> (set (reg:V8SF 84)
>>  (vec_duplicate:V8SF (mem/c:SF (symbol_ref:DI ("y")
>>
>> For AVX512 broadcast instructions from integer register operand, we only
>> need to broadcast integer to integer vectors.
>>
>> gcc/
>>
>>  PR target/87537
>>  * config/i386/i386-builtin-types.def: Replace
>>  CODE_FOR_avx2_vec_dupv4sf, CODE_FOR_avx2_vec_dupv8sf and
>>  CODE_FOR_avx2_vec_dupv4df with CODE_FOR_vec_dupv4sf,
>>  CODE_FOR_vec_dupv8sf and CODE_FOR_vec_dupv4df, respectively.
>>  * config/i386/i386.c (expand_vec_perm_1): Replace
>>  gen_avx512f_vec_dupv16sf_1, gen_avx2_vec_dupv8sf_1 and
>>  gen_avx512f_vec_dupv8df_1 with gen_avx512f_vec_dupv16sf,
>>  gen_vec_dupv8sf and gen_avx512f_vec_dupv8df, respectively.
>>  Duplicate them from scalar operand.
>>  * config/i386/i386.md (SF to DF splitter): Replace
>>  gen_avx512f_vec_dupv16sf_1 with gen_avx512f_vec_dupv16sf.
>>  * config/i386/sse.md (VF48_AVX512VL): New.
>>  (avx2_vec_dup): Removed.
>>  (avx2_vec_dupv8sf_1): Likewise.
>>  (avx512f_vec_dup_1): Likewise.
>>  (avx2_vec_dupv4df): Likewise.
>>  (_vec_dup:V48_AVX512VL): Likewise.
>>  (_vec_dup:VF48_AVX512VL): New.
>>  (_vec_dup:VI48_AVX512VL): Likewise.
>>  (_vec_dup_gpr): Replace
>>  V48_AVX512VL with VI48_AVX512VL.
>>  (*avx_vperm_broadcast_): Replace gen_avx2_vec_dupv8sf with
>>  gen_vec_dupv8sf.
>>
>> gcc/testsuite/
>>
>>  PR target/87537
>>  * gcc.target/i386/avx2-vbroadcastss_ps256-1.c: Updated.
>>  * gcc.target/i386/avx512vl-vbroadcast-3.c: Likewise.
>
> Here is the updated patch. I added const_vector_duplicate_operand to
> handle constant vector broadcast from memory.  OK for trunk?

Here is the updated patch with a testcase for const_vector_duplicate_operand.
We should split

(set (reg:V16SF 86)
 (const_vector:V16SF
   [(const_double:SF 2.0e+0 [0x0.8p+2]) repeated x16])

to

(set (reg:V16SF 86)
 (vec_duplicate:V16SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC1")

only before register allocation and we shouldn't split special SSE constants.
OK for trunk?

Thanks.

-- 
H.J.
From b8809a6ae86c32cb97f38ecda5d6c8b167b4259c Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Tue, 2 Oct 2018 14:27:55 -0700
Subject: [PATCH] i386: Use scalar operand in floating point vec_dup patterns

Since vector registers are also used for scalar floating point values,
we can use scalar operand in floating point vec_dup patterns, which
enables combiner to generate

(set (reg:V8SF 84)
 (vec_duplicate:V8SF (mem/c:SF (symbol_ref:DI ("y")

const_vector_duplicate_operand is added for constant vector broadcast.
We split

(set (reg:V16SF 86)
 (const_vector:V16SF
   [(const_double:SF 2.0e+0 [0x0.8p+2]) repeated x16])

to

(set (reg:V16SF 86)
 (vec_duplicate:V16SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC1")

before register allocation so tha IRA can turn

(set (reg:V16SF 86)
 (vec_duplicate:V16SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC1")
(set (reg:V16SF 90)
 (plus:V16SF (reg/v:V16SF 85 [ x ])
		 (reg:V16SF 86)))

into

(set (reg:V16SF 90)
 (plus:V16SF
   (vec_duplicate:V16SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC1"
   (reg/v:V16SF 85 [ x ])))

For AVX512 broadcast instructions from integer register operand, we only
need to broadcast integer to integer vectors.

gcc/

	PR target/87537
	* config/i386/i386-builtin-types.def: Replace
	CODE_FOR_avx2_vec_dupv4sf, CODE_FOR_avx2_vec_dupv8sf and
	CODE_FOR_avx2_vec_dupv4df with CODE_FOR_vec_dupv4sf,
	CODE_FOR_vec_dupv8sf and CODE_FOR_vec_dupv4df, respectively.
	* config/i386/i386.c (expand_vec_perm_1): Replace
	gen_avx512f_vec_dupv16sf_1, gen_avx2_vec_dupv8sf_1 and
	gen_avx512f_vec_dupv8df_1 with gen_avx512f_vec_dupv16sf,
	gen_vec_dupv8sf and gen_avx512f_vec_dupv8df, respectively.
	Duplicate them from scalar operand.
	* config/i386/i386.md (SF to DF splitter): Replace
	gen_avx512f_vec_dupv16sf_1 with gen_avx512f_vec_dupv16sf.
	* config/i386/predicates.md (const_vector_duplicate_operand): New.
	* config/i386/sse.md (VF48_AVX512VL): New.
	(avx2_vec_dup): Removed.
	(avx2_vec_dupv8sf_1): Likewise.
	(avx512f_vec_dup_1): Likewise.
	(avx2_vec_dupv4df): Likewise.
	(_vec_dup:V48_AVX512VL): Likewise.
	(_vec_dup:VF48_AVX512VL): New.
	(*_const_vec_dup): Likewise.
	(_vec_dup:VI48_AVX512VL): Likewise.
	(_vec_dup_gpr): Replace
	V48_AVX512VL with VI48_AVX512VL.
	(*avx_vperm_broadcast_): Replace gen_avx2_vec_dupv8sf with
	gen_vec_dupv8sf.

gcc/testsuite/

	PR target/87537
	* gcc.target/i386/avx2-vbroadcastss_ps256-1.c: Updated.
	* gcc.target/i386/avx512vl-vbroadcast-3.c: Likewise.
	* gcc.target/i386/avx512-binop-7.h: New file.
	* gcc.target/i386/avx512f-add-sf-zmm-7.c: Likewise.
	* gcc.target/i386/pr87537-2.c: Likewise.
	* g

[[C++ PATCH]] Implement C++2a P0330R2 - Literal Suffixes for ptrdiff_t and size_t

2018-10-21 Thread Ed Smith-Rowland

All,

This patch implements C++2a proposal P0330R2 Literal Suffixes for 
ptrdiff_t and size_t*.  It's not official yet but looks very likely to 
pass.  It is incomplete because I'm looking for some opinions. 9We also 
might wait 'till it actually passes).


This paper takes the direction of a language change rather than a 
library change through C++11 literal operators.  This was after feedback 
on that paper after a few iterations.


As coded in this patch, integer suffixes involving 'z' are errors in C 
and warnings for C++ <= 17 (in addition to the usual warning about 
implementation suffixes shadowing user-defined ones).


OTOH, the 'z' suffix is not currently legal - it can't break 
currently-correct code in any C/C++ dialect.  furthermore, I suspect the 
language direction was chosen to accommodate a similar addition to C20.


I'm thinking of making this feature available as an extension to all of 
C/C++ perhaps with appropriate pedwarn.


Opinions?

Ed Smith-Rowland

[*] http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2018/p0330r2.html


Index: gcc/c-family/c-cppbuiltin.c
===
--- gcc/c-family/c-cppbuiltin.c (revision 265343)
+++ gcc/c-family/c-cppbuiltin.c (working copy)
@@ -975,6 +975,11 @@
  cpp_define (pfile, "__cpp_structured_bindings=201606");
  cpp_define (pfile, "__cpp_variadic_using=201611");
}
+  if (cxx_dialect > cxx17)
+   {
+ /* Set feature test macros for C++2a.  */
+ cpp_define (pfile, "__cpp_ptrdiff_t_suffix=201811");
+   }
   if (flag_concepts)
cpp_define (pfile, "__cpp_concepts=201507");
   if (flag_tm)
Index: gcc/c-family/c-lex.c
===
--- gcc/c-family/c-lex.c(revision 265343)
+++ gcc/c-family/c-lex.c(working copy)
@@ -766,6 +766,14 @@
 type = ((flags & CPP_N_UNSIGNED)
? widest_unsigned_literal_type_node
: widest_integer_literal_type_node);
+  else if (flags & CPP_N_PTRDIFF_T)
+{
+  /* itk refers to fundamental types not aliased size types.  */
+  if (flags & CPP_N_UNSIGNED)
+   type = size_type_node;
+  else
+   type = ptrdiff_type_node;
+}
   else
 {
   type = integer_types[itk];
Index: gcc/c-family/c.opt
===
--- gcc/c-family/c.opt  (revision 265343)
+++ gcc/c-family/c.opt  (working copy)
@@ -699,6 +699,10 @@
 C ObjC C++ ObjC++ CPP(cpp_warn_long_long) CppReason(CPP_W_LONG_LONG) 
Var(warn_long_long) Init(-1) Warning LangEnabledBy(C ObjC,Wc90-c99-compat)
 Do not warn about using \"long long\" when -pedantic.
 
+Wptrdiff_t-literals
+C ObjC C++ ObjC++ CPP(ptrdiff_t_literals) CppReason(CPP_W_PTRDIFF_T_LITERALS) 
Var(warn_ptrdiff_t_literals) Init(1) Warning LangEnabledBy(C ObjC C++ ObjC++)
+Warn when \"z\" or \"Z\" is used as a numeric literal suffix for C or C++yy, 
yy <= 17.
+
 Wmain
 C ObjC C++ ObjC++ Var(warn_main) Init(-1) Warning LangEnabledBy(C ObjC,Wall, 
2, 0)
 Warn about suspicious declarations of \"main\".
Index: gcc/testsuite/g++.dg/cpp0x/udlit-shadow-neg.C
===
--- gcc/testsuite/g++.dg/cpp0x/udlit-shadow-neg.C   (revision 265343)
+++ gcc/testsuite/g++.dg/cpp0x/udlit-shadow-neg.C   (working copy)
@@ -17,6 +17,30 @@
 operator"" ull(unsigned long long int k)  // { dg-warning "integer 
suffix|shadowed by implementation" }
 { return k; }
 
+unsigned long long int
+operator"" z(unsigned long long int k)  // { dg-warning "integer 
suffix|shadowed by implementation" }
+{ return k; }
+
+unsigned long long int
+operator"" uz(unsigned long long int k)  // { dg-warning "integer 
suffix|shadowed by implementation" }
+{ return k; }
+
+unsigned long long int
+operator"" zu(unsigned long long int k)  // { dg-warning "integer 
suffix|shadowed by implementation" }
+{ return k; }
+
+unsigned long long int
+operator"" Z(unsigned long long int k)  // { dg-warning "integer 
suffix|shadowed by implementation" }
+{ return k; }
+
+unsigned long long int
+operator"" UZ(unsigned long long int k)  // { dg-warning "integer 
suffix|shadowed by implementation" }
+{ return k; }
+
+unsigned long long int
+operator"" ZU(unsigned long long int k)  // { dg-warning "integer 
suffix|shadowed by implementation" }
+{ return k; }
+
 //  Namespaces are no hiding place.
 namespace Long
 {
@@ -37,6 +61,30 @@
 operator"" ull(unsigned long long int k)  // { dg-warning "integer 
suffix|shadowed by implementation" }
 { return k; }
 
+unsigned long long int
+operator"" z(unsigned long long int k)  // { dg-warning "integer 
suffix|shadowed by implementation" }
+{ return k; }
+
+unsigned long long int
+operator"" uz(unsigned long long int k)  // { dg-warning "integer 
suffix|shadowed by implementation" }
+{ return k; }
+
+unsigned long long int
+operator"" zu(unsigned long long int k)  // { dg-warning "integer 
suffix

Fwd: [patch, fortran] Implement FINDLOC

2018-10-21 Thread Thomas Koenig

Hi,

again rejected due to being overly large...

The whole message can be found at

https://gcc.gnu.org/ml/fortran/2018-10/msg00102.html

 Weitergeleitete Nachricht 
Betreff: [patch, fortran] Implement FINDLOC
Datum: Sun, 21 Oct 2018 19:36:35 +0200
Von: Thomas Koenig 
An: fort...@gcc.gnu.org , gcc-patches 



Hello world,

here is the implementation of FINDLOC.  This is another
step towards full F2008 compliance (we're not that far
away, actually, modulo a few bugs, of course).

This was quite a big piece of work, but at least I ended
up understanding a bit about trans-*.

Regression-tested.

OK for trunk?

Regards

Thomas

2017-10-21  Thomas Koenig  

PR fortran/54613
* gfortran.h (gfc_isym_id): Add GFC_ISYM_FINDLOC.
(gfc_check_f): Add f6fl field.
(gfc_simplify_f): Add f6 field.
(gfc_resolve_f): Likewise.
(gfc_type_letter): Add optional logical_equas_int flag.
* check.c (intrinsic_type_check): New function.
(gfc_check_findloc): New function.
* intrinsics.c (gfc_type_letter): If logical_equals_int is
set, act accordingly.
(add_sym_5ml):  Reformat comment.
(add_sym_6fl): New function.
(add_functions): Add findloc.
(check_arglist): Add sixth argument, handle it.
(resolve_intrinsic): Likewise.
(check_specific): Handle findloc.
* intrinsic.h (gfc_check_findloc): Add prototype.
(gfc_simplify_findloc): Likewise.
(gfc_resolve_findloc): Likewise.
(MAX_INTRINSIC_ARGS): Adjust.
* iresolve.c (gfc_resolve_findloc): New function.
* simplify.c (gfc_simplify_minmaxloc): Make static.
(simplify_findloc_to_scalar): New function.
(simplify_findloc_nodim): New function.
(simplify_findloc_to_array): New function.
(gfc_simplify_findloc): New function.
(gfc_conv_intrinsic_findloc): New function.
(gfc_conv_intrinsic_function): Handle GFC_ISYM_FINDLOC.
(gfc_is_intrinsic_libcall): Likewise.

2017-10-21  Thomas Koenig  

PR fortran/54613
* Makefile.am: Add files for findloc.
* Makefile.in: Regenerated.
* libgfortran.h (gfc_array_index_type): Add.
(gfc_array_s1): Add using GFC_UINTEGER_1.
(gfc_array_s4): Likewise.
Replace unnecessary comment.
(HAVE_GFC_UINTEGER_1): Define.
(HAVE_GFC_UINTEGER_4): Define.
* m4/findloc0.m4: New file.
* m4/findloc0s.m4: New file.
* m4/findloc1.m4: New file.
* m4/findloc1s.m4: New file.
* m4/findloc2s.m4: New file.
* m4/ifindloc0.m4: New file.
* m4/ifindloc1.m4: New file.
* m4/ifindloc2.m4: New file.
* m4/iparm.m4: Use unsigned integer for characters.
 * generated/findloc0_c16.c: New file.
 * generated/findloc0_c4.c: New file.
 * generated/findloc0_c8.c: New file.
 * generated/findloc0_i1.c: New file.
 * generated/findloc0_i16.c: New file.
 * generated/findloc0_i2.c: New file.
 * generated/findloc0_i4.c: New file.
 * generated/findloc0_i8.c: New file.
 * generated/findloc0_r16.c: New file.
 * generated/findloc0_r4.c: New file.
 * generated/findloc0_r8.c: New file.
 * generated/findloc0_s1.c: New file.
 * generated/findloc0_s4.c: New file.
 * generated/findloc1_c16.c: New file.
 * generated/findloc1_c4.c: New file.
 * generated/findloc1_c8.c: New file.
 * generated/findloc1_i1.c: New file.
 * generated/findloc1_i16.c: New file.
 * generated/findloc1_i2.c: New file.
 * generated/findloc1_i4.c: New file.
 * generated/findloc1_i8.c: New file.
 * generated/findloc1_r16.c: New file.
 * generated/findloc1_r4.c: New file.
 * generated/findloc1_r8.c: New file.
 * generated/findloc1_s1.c: New file.
 * generated/findloc1_s4.c: New file.
 * generated/findloc2_s1.c: New file.
 * generated/findloc2_s4.c: New file.
 * generated/maxloc0_16_s1.c: Regenerated.
 * generated/maxloc0_16_s4.c: Regenerated.
 * generated/maxloc0_4_s1.c: Regenerated.
 * generated/maxloc0_4_s4.c: Regenerated.
 * generated/maxloc0_8_s1.c: Regenerated.
 * generated/maxloc0_8_s4.c: Regenerated.
 * generated/maxloc1_16_s1.c: Regenerated.
 * generated/maxloc1_16_s4.c: Regenerated.
 * generated/maxloc1_4_s1.c: Regenerated.
 * generated/maxloc1_4_s4.c: Regenerated.
 * generated/maxloc1_8_s1.c: Regenerated.
 * generated/maxloc1_8_s4.c: Regenerated.
 * generated/maxloc2_16_s1.c: Regenerated.
 * generated/maxloc2_16_s4.c: Regenerated.
 * generated/maxloc2_4_s1.c: Regenerated.
 * generated/maxloc2_4_s4.c: Regenerated.
 * generated/maxloc2_8_s1.c: Regenerated.
 * generated/maxloc2_8_s4.c: Regenerated.
  

[Patch, fortran] PR71880 - pointer to allocatable character

2018-10-21 Thread Paul Richard Thomas
Committed as revision 265353.

> Not quite fixed. The lhs character length doesn't get set and so it
> fails at runtime. I will commit the patch as 'obvious'.
>
> Paul
>
> 2018-10-19  Paul Thomas  
>
> PR fortran/71880
> * trans-expr.c (gfc_trans_pointer_assignment): Set the string
> length for array valued deferred length lhs.
>
> 2018-10-19  Paul Thomas  
>
> PR fortran/71880
> * gfortran.dg/deferred_character_31.f90 : New test.


Re: gOlogy: do not change code in isolate-paths for warnings only

2018-10-21 Thread Richard Biener
On October 21, 2018 10:06:06 AM GMT+02:00, Alexandre Oliva  
wrote:
>The isolate-paths pass is activated by various -f flags, but also by
>-Wnull-dereference.  Most of its codegen changes are conditioned on at
>least one of the -f flags, but those that detect, warn about and
>isolate paths that return the address of local variables are enabled
>even if the pass is activated only by -Wnull-dereference.
>
>-W flags should not cause codegen changes, so this patch makes the
>codegen changes conditional on the presence of any of the -f flags
>that activate the pass.  Should we have a separate option to activate
>only this kind of transformation?

I don't think this is necessary. 

>Regstrapped on x86_64- and i686-linux-gnu.  Ok to install?

OK. 

Richard. 

>
>for  gcc/ChangeLog
>
>   * gimple-ssa-isolate-paths.c
>   (find_implicit_erroneous_behavior): Do not change code if the
>   pass is running for warnings only.
>   (find_explicit_erroneous_behavior): Likewise.
>---
> gcc/gimple-ssa-isolate-paths.c |   17 +
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
>diff --git a/gcc/gimple-ssa-isolate-paths.c
>b/gcc/gimple-ssa-isolate-paths.c
>index e1fab61bedab..880836c21aa7 100644
>--- a/gcc/gimple-ssa-isolate-paths.c
>+++ b/gcc/gimple-ssa-isolate-paths.c
>@@ -431,7 +431,9 @@ find_implicit_erroneous_behavior (void)
>   "declared here");
> }
> 
>-if (gimple_bb (use_stmt) == bb)
>+if ((flag_isolate_erroneous_paths_dereference
>+ || flag_isolate_erroneous_paths_attribute)
>+&& gimple_bb (use_stmt) == bb)
>   {
> duplicate = isolate_path (bb, duplicate, e,
>   use_stmt, lhs, true);
>@@ -553,9 +555,16 @@ find_explicit_erroneous_behavior (void)
> inform (DECL_SOURCE_LOCATION(valbase),
> "declared here");
> }
>-tree zero = build_zero_cst (TREE_TYPE (val));
>-gimple_return_set_retval (return_stmt, zero);
>-update_stmt (stmt);
>+
>+/* Do not modify code if the user only asked for
>+   warnings.  */
>+if (flag_isolate_erroneous_paths_dereference
>+|| flag_isolate_erroneous_paths_attribute)
>+  {
>+tree zero = build_zero_cst (TREE_TYPE (val));
>+gimple_return_set_retval (return_stmt, zero);
>+update_stmt (stmt);
>+  }
>   }
>   }
>   }



Re: [PATCH 1/4] i386: Enable AVX512 memory broadcast for FMSUB

2018-10-21 Thread Uros Bizjak
On Sun, Oct 21, 2018 at 6:04 PM Uros Bizjak  wrote:
>
> On Sat, Oct 20, 2018 at 8:46 AM H.J. Lu  wrote:
> >
> > Many AVX512 vector operations can broadcast from a scalar memory source.
> > This patch enables memory broadcast for FMSUB operations.  In order to
> > support AVX512 memory broadcast for FMSUB, FMSUB builtin functions are
> > also added, instead of passing the negated value to FMA builtin functions.
> >
> > gcc/
> >
> > PR target/72782
> > * config/i386/avx512fintrin.h (_mm512_fmsub_round_pd): Use
> > __builtin_ia32_vfmsubpd512_mask.
> > (_mm512_mask_fmsub_round_pd): Likewise.
> > (_mm512_fmsub_pd): Likewise.
> > (_mm512_mask_fmsub_pd): Likewise.
> > (_mm512_maskz_fmsub_round_pd): Use
> > __builtin_ia32_vfmsubpd512_maskz.
> > (_mm512_maskz_fmsub_pd): Likewise.
> > (_mm512_fmsub_round_ps): Use __builtin_ia32_vfmsubps512_mask.
> > (_mm512_mask_fmsub_round_ps): Likewise.
> > (_mm512_fmsub_ps): Likewise.
> > (_mm512_mask_fmsub_ps): Likewise.
> > (_mm512_maskz_fmsub_round_ps): Use
> > __builtin_ia32_vfmsubps512_maskz.
> > (_mm512_maskz_fmsub_ps): Likewise.
> > * config/i386/avx512vlintrin.h (_mm256_mask_fmsub_pd): Use
> > __builtin_ia32_vfmsubpd256_mask.
> > (_mm256_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd256_maskz.
> > (_mm_mask_fmsub_pd): Use __builtin_ia32_vfmaddpd128_mask
> > (_mm_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd128_maskz.
> > (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
> > (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
> > (_mm256_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps256_maskz.
> > (_mm_mask_fmsub_ps): Use __builtin_ia32_vfmsubps128_mask.
> > (_mm_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps128_maskz.
> > * config/i386/fmaintrin.h (_mm_fmsub_pd): Use
> > __builtin_ia32_vfmsubpd.
> > (_mm256_fmsub_pd): Use __builtin_ia32_vfmsubpd256.
> > (_mm_fmsub_ps): Use __builtin_ia32_vfmsubps.
> > (_mm256_fmsub_ps): Use __builtin_ia32_vfmsubps256.
> > (_mm_fmsub_sd): Use __builtin_ia32_vfmsubsd3.
> > (_mm_fmsub_ss): Use __builtin_ia32_vfmsubss3.
> > * config/i386/i386-builtin.def: Add
> > __builtin_ia32_vfmsubpd256_mask,
> > __builtin_ia32_vfmsubpd256_maskz,
> > __builtin_ia32_vfmsubpd128_mask,
> > __builtin_ia32_vfmsubpd128_maskz,
> > __builtin_ia32_vfmsubps256_mask,
> > __builtin_ia32_vfmsubps256_maskz,
> > __builtin_ia32_vfmsubps128_mask,
> > __builtin_ia32_vfmsubps128_maskz,
> > __builtin_ia32_vfmsubpd512_mask,
> > __builtin_ia32_vfmsubpd512_maskz,
> > __builtin_ia32_vfmsubps512_mask,
> > __builtin_ia32_vfmsubps512_maskz, __builtin_ia32_vfmsubss3,
> > __builtin_ia32_vfmsubsd3, __builtin_ia32_vfmsubps,
> > __builtin_ia32_vfmsubpd, __builtin_ia32_vfmsubps256 and.
> > __builtin_ia32_vfmsubpd256.
> > * config/i386/sse.md (fma4i_fmsub_): New.
> > (_fmsub__maskz): Likewise.
> > (*fma_fmsub__bcst_1):
> > Likewise.
> > (*fma_fmsub__bcst_2):
> > Likewise.
> > (*fma_fmsub__bcst_3):
> > Likewise.
> > (fmai_vmfmsub_): Likewise.
> >
> > gcc/testsuite/
> >
> > PR target/72782
> > * gcc.target/i386/avx512f-fmsub-df-zmm-1.c: New test.
> > * gcc.target/i386/avx512f-fmsub-sf-zmm-1.c: Likewise.
> > * gcc.target/i386/avx512f-fmsub-sf-zmm-2.c: Likewise.
> > * gcc.target/i386/avx512f-fmsub-sf-zmm-3.c: Likewise.
> > * gcc.target/i386/avx512f-fmsub-sf-zmm-4.c: Likewise.
> > * gcc.target/i386/avx512f-fmsub-sf-zmm-5.c: Likewise.
> > * gcc.target/i386/avx512f-fmsub-sf-zmm-6.c: Likewise.
> > * gcc.target/i386/avx512f-fmsub-sf-zmm-7.c: Likewise.
> > * gcc.target/i386/avx512f-fmsub-sf-zmm-8.c: Likewise.
> > * gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c: Likewise.
> > * gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c: Likewise.
>
> LGTM.

LGTM for the whole patch serie (all patches implement the same approach).

Thanks,
Uros.


Re: [PATCH 1/4] i386: Enable AVX512 memory broadcast for FMSUB

2018-10-21 Thread Uros Bizjak
On Sat, Oct 20, 2018 at 8:46 AM H.J. Lu  wrote:
>
> Many AVX512 vector operations can broadcast from a scalar memory source.
> This patch enables memory broadcast for FMSUB operations.  In order to
> support AVX512 memory broadcast for FMSUB, FMSUB builtin functions are
> also added, instead of passing the negated value to FMA builtin functions.
>
> gcc/
>
> PR target/72782
> * config/i386/avx512fintrin.h (_mm512_fmsub_round_pd): Use
> __builtin_ia32_vfmsubpd512_mask.
> (_mm512_mask_fmsub_round_pd): Likewise.
> (_mm512_fmsub_pd): Likewise.
> (_mm512_mask_fmsub_pd): Likewise.
> (_mm512_maskz_fmsub_round_pd): Use
> __builtin_ia32_vfmsubpd512_maskz.
> (_mm512_maskz_fmsub_pd): Likewise.
> (_mm512_fmsub_round_ps): Use __builtin_ia32_vfmsubps512_mask.
> (_mm512_mask_fmsub_round_ps): Likewise.
> (_mm512_fmsub_ps): Likewise.
> (_mm512_mask_fmsub_ps): Likewise.
> (_mm512_maskz_fmsub_round_ps): Use
> __builtin_ia32_vfmsubps512_maskz.
> (_mm512_maskz_fmsub_ps): Likewise.
> * config/i386/avx512vlintrin.h (_mm256_mask_fmsub_pd): Use
> __builtin_ia32_vfmsubpd256_mask.
> (_mm256_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd256_maskz.
> (_mm_mask_fmsub_pd): Use __builtin_ia32_vfmaddpd128_mask
> (_mm_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd128_maskz.
> (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
> (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
> (_mm256_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps256_maskz.
> (_mm_mask_fmsub_ps): Use __builtin_ia32_vfmsubps128_mask.
> (_mm_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps128_maskz.
> * config/i386/fmaintrin.h (_mm_fmsub_pd): Use
> __builtin_ia32_vfmsubpd.
> (_mm256_fmsub_pd): Use __builtin_ia32_vfmsubpd256.
> (_mm_fmsub_ps): Use __builtin_ia32_vfmsubps.
> (_mm256_fmsub_ps): Use __builtin_ia32_vfmsubps256.
> (_mm_fmsub_sd): Use __builtin_ia32_vfmsubsd3.
> (_mm_fmsub_ss): Use __builtin_ia32_vfmsubss3.
> * config/i386/i386-builtin.def: Add
> __builtin_ia32_vfmsubpd256_mask,
> __builtin_ia32_vfmsubpd256_maskz,
> __builtin_ia32_vfmsubpd128_mask,
> __builtin_ia32_vfmsubpd128_maskz,
> __builtin_ia32_vfmsubps256_mask,
> __builtin_ia32_vfmsubps256_maskz,
> __builtin_ia32_vfmsubps128_mask,
> __builtin_ia32_vfmsubps128_maskz,
> __builtin_ia32_vfmsubpd512_mask,
> __builtin_ia32_vfmsubpd512_maskz,
> __builtin_ia32_vfmsubps512_mask,
> __builtin_ia32_vfmsubps512_maskz, __builtin_ia32_vfmsubss3,
> __builtin_ia32_vfmsubsd3, __builtin_ia32_vfmsubps,
> __builtin_ia32_vfmsubpd, __builtin_ia32_vfmsubps256 and.
> __builtin_ia32_vfmsubpd256.
> * config/i386/sse.md (fma4i_fmsub_): New.
> (_fmsub__maskz): Likewise.
> (*fma_fmsub__bcst_1):
> Likewise.
> (*fma_fmsub__bcst_2):
> Likewise.
> (*fma_fmsub__bcst_3):
> Likewise.
> (fmai_vmfmsub_): Likewise.
>
> gcc/testsuite/
>
> PR target/72782
> * gcc.target/i386/avx512f-fmsub-df-zmm-1.c: New test.
> * gcc.target/i386/avx512f-fmsub-sf-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-fmsub-sf-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-fmsub-sf-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-fmsub-sf-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-fmsub-sf-zmm-5.c: Likewise.
> * gcc.target/i386/avx512f-fmsub-sf-zmm-6.c: Likewise.
> * gcc.target/i386/avx512f-fmsub-sf-zmm-7.c: Likewise.
> * gcc.target/i386/avx512f-fmsub-sf-zmm-8.c: Likewise.
> * gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c: Likewise.

LGTM.

Thanks,
Uros.

> ---
>  gcc/config/i386/avx512fintrin.h   | 60 +++
>  gcc/config/i386/avx512vlintrin.h  | 32 
>  gcc/config/i386/fmaintrin.h   | 24 +++---
>  gcc/config/i386/i386-builtin.def  | 18 +
>  gcc/config/i386/sse.md| 77 +++
>  .../gcc.target/i386/avx512f-fmsub-df-zmm-1.c  | 12 +++
>  .../gcc.target/i386/avx512f-fmsub-sf-zmm-1.c  | 12 +++
>  .../gcc.target/i386/avx512f-fmsub-sf-zmm-2.c  | 12 +++
>  .../gcc.target/i386/avx512f-fmsub-sf-zmm-3.c  | 12 +++
>  .../gcc.target/i386/avx512f-fmsub-sf-zmm-4.c  | 12 +++
>  .../gcc.target/i386/avx512f-fmsub-sf-zmm-5.c  | 12 +++
>  .../gcc.target/i386/avx512f-fmsub-sf-zmm-6.c  | 12 +++
>  .../gcc.target/i386/avx512f-fmsub-sf-zmm-7.c  | 12 +++
>  .../gcc.target/i386/avx512f-fmsub-sf-zmm-8.c  | 12 +++
>  .../gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c | 12 +++
>  .../gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c | 12 +++
>  16 files changed, 285 insertions(+), 58 deletion

Re: [patch] new API for value_range

2018-10-21 Thread Aldy Hernandez
Is this fixed by Richard's patch to 87640?  if so, perhaps this is a
duplicate of said PR.
On Sun, Oct 21, 2018 at 3:34 AM H.J. Lu  wrote:
>
> On Wed, Oct 17, 2018 at 7:39 AM Aldy Hernandez  wrote:
> >
> >
> >
> > On 10/17/18 6:50 AM, Richard Biener wrote:
> > > On Thu, Oct 11, 2018 at 8:25 PM Aldy Hernandez  wrote:
> > >>
> > >>
> > >>
> > >> On 10/11/18 5:47 AM, Richard Biener wrote:
> > >>> On Thu, Oct 11, 2018 at 10:19 AM Aldy Hernandez  
> > >>> wrote:
> > 
> >  Hi Richard.  Thanks for reviewing.
> > 
> >  On 10/10/18 6:27 AM, Richard Biener wrote:
> > > On Tue, Oct 9, 2018 at 6:23 PM Aldy Hernandez  
> > > wrote:
> > >>
> > >> I'm assuming the silence on the RFC means nobody is viscerally 
> > >> opposed
> > >> to it, so here goes the actual implementation ;-).
> > >>
> > >>FWI: 
> > >> https://gcc.gnu.org/ml/gcc-patches/2018-10/msg00157.html
> > >>
> > >> My aim is no change to the current functionality, but there are some
> > >> things that changed slightly (with no appreciable change in
> > >> bootstrapability or tests).
> > >>
> > >> 1.  Primarily, we were building value_ranges by modifying them 
> > >> in-flight
> > >> with no regards to the validity of the resulting range.  By enforcing
> > >> the API, I noticed we periodically built VR_VARYING / VR_UNDEFINED, 
> > >> but
> > >> left the equivalence bits uncleared.  This comment in the original
> > >> header file indicates that this is invalid behavior:
> > >>
> > >>   /* Set of SSA names whose value ranges are equivalent to this 
> > >> one.
> > >>  This set is only valid when TYPE is VR_RANGE or 
> > >> VR_ANTI_RANGE.  */
> > >>
> > >> The API now enforces this upon construction.
> > >>
> > >> 2. I also saw us setting min/max when VARYING or UNDEFINED was set.
> > >> This is invalid.  Although these values were being ignored, the API 
> > >> now
> > >> enforces this.
> > >>
> > >> 3. I saw one case in set_value_range_with_overflow() were we were
> > >> building an invalid range with swapped ranges, where we were silently
> > >> depending on somebody further up the call chain to swap them for us.
> > >> I've fixed this at creation.
> > >>
> > >> 4. There is one assert in ipcp_vr_lattice which I hope to remove, but
> > >> left as proof that the original VR_UNDEFINED set was not necessary, 
> > >> as
> > >> it is now done by default on an empty constructor:
> > >>
> > >> -  void init () { m_vr.type = VR_UNDEFINED; }
> > >> +  void init () { gcc_assert (m_vr.undefined_p ()); }
> > >>
> > >> One last note.  The file tree-vrp.c already has a cripple API of 
> > >> sorts
> > >> in the form of functions (set_value_range_to_varying, etc).  I have
> > >> tried to keep those functions available, by calling the API under the
> > >> covers, but would be okay in removing them altogether as a follow-up.
> > >>
> > >> Please refer to the RFC wrt the min/max/vrtype accessors, as well as 
> > >> the
> > >> new tree type field.
> > >>
> > >> I am quoting the class declaration below to make it easy to review 
> > >> at a
> > >> high level.
> > >>
> > >> Tested on x86-64 Linux.  All languages, including Ada and Go.
> > >>
> > >> OK for trunk?
> > >
> > > Reviewing in patch order.
> > >
> > >> Aldy
> > >>
> > >> class GTY((for_user)) value_range
> > >> {
> > >>  public:
> > >>   value_range ();
> > >>   value_range (tree type);
> > >>   value_range (value_range_type, tree type, tree, tree, bitmap = 
> > >> NULL);
> > >>   bool operator== (const value_range &) const;
> > >>   bool operator!= (const value_range &) const;
> > >>   void intersect (const value_range *);
> > >>   void union_ (const value_range *);
> > >
> > > with trailing underscore?  seriously?
> > 
> >  Hey!  You complained about Union() last year, at which point the
> >  consensus was that trailing underscores would be ok for symbol names
> >  that clashed with keywords.
> > >>>
> > >>> ;)
> > >>>
> > >>> I also thought about union_into / union_with.  As opposed to a 
> > >>> hypothetical
> > >>>
> > >>> value_range union (const value_range& a, const value_range& b)
> > >>>
> > >>> function.
> > >>>
> >  And yes, it was also discussed whether we should overload | and ^ for
> >  union and intersection, but was denied for readability and what have 
> >  yous.
> > 
> > >
> > >>   /* Like operator== but ignore equivalence bitmap.  */
> > >>   bool ignore_equivs_equal_p (const value_range &) const;
> > >>   /* Like a operator= but update equivalence bitmap efficiently. 
> > >>  */
> > >>   void copy_with_equiv_update (const value_range *

[PATCH,FORTRAN] Fix memory leak of gsymbol

2018-10-21 Thread Bernhard Reutner-Fischer
Hi!

Regtested on x86_64-unknown-linux, installing on
aldot/fortran-fe-stringpool.

We did not free global symbols. For a simplified abstract_type_3.f03
valgrind reports:

96 bytes in 1 blocks are still reachable in loss record 461 of 602
   at 0x48377D5: calloc (vg_replace_malloc.c:711)
   by 0x21257C3: xcalloc (xmalloc.c:162)
   by 0x98611B: gfc_get_gsymbol(char const*) (symbol.c:4341)
   by 0x932C58: parse_module() (parse.c:5912)
   by 0x9336F8: gfc_parse_file() (parse.c:6236)
   by 0x991449: gfc_be_parse_file() (f95-lang.c:204)
   by 0x11D8EDE: compile_file() (toplev.c:455)
   by 0x11DB9C3: do_compile() (toplev.c:2170)
   by 0x11DBCAF: toplev::main(int, char**) (toplev.c:2305)
   by 0x2045D37: main (main.c:39)

This patch reduces leaks to

 LEAK SUMMARY:
definitely lost: 344 bytes in 1 blocks
indirectly lost: 3,024 bytes in 4 blocks
  possibly lost: 0 bytes in 0 blocks
-   still reachable: 1,576,174 bytes in 2,277 blocks
+   still reachable: 1,576,078 bytes in 2,276 blocks
 suppressed: 0 bytes in 0 blocks

gcc/fortran/ChangeLog:

2018-10-21  Bernhard Reutner-Fischer  

* parse.c (clean_up_modules): Free gsym.
---
 gcc/fortran/parse.c | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/fortran/parse.c b/gcc/fortran/parse.c
index b7265c42f58..f7c369a17ac 100644
--- a/gcc/fortran/parse.c
+++ b/gcc/fortran/parse.c
@@ -6066,7 +6066,7 @@ resolve_all_program_units (gfc_namespace 
*gfc_global_ns_list)
 
 
 static void
-clean_up_modules (gfc_gsymbol *gsym)
+clean_up_modules (gfc_gsymbol *&gsym)
 {
   if (gsym == NULL)
 return;
@@ -6074,14 +6074,18 @@ clean_up_modules (gfc_gsymbol *gsym)
   clean_up_modules (gsym->left);
   clean_up_modules (gsym->right);
 
-  if (gsym->type != GSYM_MODULE || !gsym->ns)
+  if (gsym->type != GSYM_MODULE)
 return;
 
-  gfc_current_ns = gsym->ns;
-  gfc_derived_types = gfc_current_ns->derived_types;
-  gfc_done_2 ();
-  gsym->ns = NULL;
-  return;
+  if (gsym->ns)
+{
+  gfc_current_ns = gsym->ns;
+  gfc_derived_types = gfc_current_ns->derived_types;
+  gfc_done_2 ();
+  gsym->ns = NULL;
+}
+  free (gsym);
+  gsym = NULL;
 }
 
 
-- 
2.19.1



Re: [PATCH 1/2] i386: Enable AVX512 memory broadcast for FP mul

2018-10-21 Thread H.J. Lu
On 10/21/18, Uros Bizjak  wrote:
> On Sat, Oct 20, 2018 at 11:47 PM H.J. Lu  wrote:
>>
>> On 10/20/18, Uros Bizjak  wrote:
>> > On Fri, Oct 19, 2018 at 11:08 PM H.J. Lu  wrote:
>> >>
>> >> Many AVX512 vector operations can broadcast from a scalar memory
>> >> source.
>> >> This patch enables memory broadcast for FP mul operations.
>> >>
>> >> gcc/
>> >>
>> >> PR target/72782
>> >> * config/i386/sse.md (*mul3_bcst_1): New.
>> >> (*mul3_bcst_2): Likewise.
>> >>
>> >> gcc/testsuite/
>> >>
>> >> PR target/72782
>> >> * gcc.target/i386/avx512f-mul-df-zmm-1.c: New test.
>> >> * gcc.target/i386/avx512f-mul-sf-zmm-1.c: Likewise.
>> >> * gcc.target/i386/avx512f-mul-sf-zmm-2.c: Likewise.
>> >> * gcc.target/i386/avx512f-mul-sf-zmm-3.c: Likewise.
>> >> * gcc.target/i386/avx512f-mul-sf-zmm-4.c: Likewise.
>> >> * gcc.target/i386/avx512f-mul-sf-zmm-5.c: Likewise.
>> >> * gcc.target/i386/avx512f-mul-sf-zmm-6.c: Likewise.
>> >> * gcc.target/i386/avx512vl-mul-sf-xmm-1.c: Likewise.
>> >> * gcc.target/i386/avx512vl-mul-sf-ymm-1.c: Likewise.
>> >> ---
>> >>  gcc/config/i386/sse.md| 24
>> >> +++
>> >>  .../gcc.target/i386/avx512f-mul-df-zmm-1.c| 12 ++
>> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-1.c| 12 ++
>> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-2.c| 12 ++
>> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-3.c| 12 ++
>> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-4.c| 12 ++
>> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-5.c| 12 ++
>> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-6.c| 12 ++
>> >>  .../gcc.target/i386/avx512vl-mul-sf-xmm-1.c   | 12 ++
>> >>  .../gcc.target/i386/avx512vl-mul-sf-ymm-1.c   | 12 ++
>> >>  10 files changed, 132 insertions(+)
>> >>  create mode 100644
>> >> gcc/testsuite/gcc.target/i386/avx512f-mul-df-zmm-1.c
>> >>  create mode 100644
>> >> gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-1.c
>> >>  create mode 100644
>> >> gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-2.c
>> >>  create mode 100644
>> >> gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-3.c
>> >>  create mode 100644
>> >> gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-4.c
>> >>  create mode 100644
>> >> gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-5.c
>> >>  create mode 100644
>> >> gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-6.c
>> >>  create mode 100644
>> >> gcc/testsuite/gcc.target/i386/avx512vl-mul-sf-xmm-1.c
>> >>  create mode 100644
>> >> gcc/testsuite/gcc.target/i386/avx512vl-mul-sf-ymm-1.c
>> >>
>> >> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
>> >> index 411c78ae8d3..a73659e6bd2 100644
>> >> --- a/gcc/config/i386/sse.md
>> >> +++ b/gcc/config/i386/sse.md
>> >> @@ -1754,6 +1754,30 @@
>> >> (set_attr "btver2_decode" "direct,double")
>> >> (set_attr "mode" "")])
>> >>
>> >> +(define_insn "*mul3_bcst_1"
>> >> +  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
>> >> +   (mult:VF_AVX512
>> >> + (match_operand:VF_AVX512 1 "register_operand" "v")
>> >> + (vec_duplicate:VF_AVX512
>> >> +(match_operand: 2 "memory_operand"
>> >> "m"]
>> >> +  "TARGET_AVX512F && "
>> >> +  "vmul\t{%2, %1,
>> >> %0|%0, %1, %2<>}"
>> >> +  [(set_attr "prefix" "evex")
>> >> +   (set_attr "type" "ssemul")
>> >> +   (set_attr "mode" "")])
>> >> +
>> >> +(define_insn "*mul3_bcst_2"
>> >> +  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
>> >> +   (mult:VF_AVX512
>> >> + (vec_duplicate:VF_AVX512
>> >> +(match_operand: 1 "memory_operand" "m"))
>> >> + (match_operand:VF_AVX512 2 "register_operand" "v")))]
>> >> +  "TARGET_AVX512F && "
>> >> +  "vmul\t{%1, %2,
>> >> %0|%0, %2, %1<>}"
>> >> +  [(set_attr "prefix" "evex")
>> >> +   (set_attr "type" "ssemul")
>> >> +   (set_attr "mode" "")])
>> >
>> > Do we really need two patterns here? IIRC, the compiler canonicalizes
>> > commutative binops so that they have memory operand in the second
>> > place. We have vec_duplicate here, so this may not be the case, but
>> > please investigate if we really need two patterns for commutative
>> > binops.
>> >
>>
>> Only one pattern is needed.   For
>>
>>(set (reg:V16SF 89) (vec_duplicate:V16SF (reg:SF 91)))
>>(set (reg:V16SF 95) (mult:V16SF (reg:V16SF 87) (reg:V16SF 89)))
>>
>> combiner prefers
>>
>>(set (reg:V16SF 95)
>>   (mult:V16SF
>> (vec_duplicate:V16SF (reg:SF 91))
>> (reg:V16SF 87)))
>>
>> instead of
>>
>>(set (reg:V16SF 95)
>>   (mult:V16SF
>> (reg:V16SF 87)
>> (vec_duplicate:V16SF (reg:SF 91
>>
>> commutation is performed at
>>
>> (set (reg:V16SF 95) (mult:V16SF (reg:V16SF 87) (reg:V16SF 89)))
>>
>> Here is the updated patch.  OK for trunk?
>
> No need for a big comment, this is due to RTX operator precedence in
> commutati

Re: [Patc, fortran] PR85603 - ICE with character array substring assignment

2018-10-21 Thread Thomas Koenig

Hi Paul,



2018-10-18  Paul Thomas  

 PR fortran/85603
 * frontend-passes.c (get_len_call): New function to generate a
 call to intrinsic LEN.
 (create_var): Use this to make length expressions for variable
 rhs string lengths.
 Clean up some white space issues.

2018-10-18  Paul Thomas  

 PR fortran/85603
 * gfortran.dg/deferred_character_23.f90 : Check reallocation is
 occurring as it should and a regression caused by version 1 of
 this patch.


OK.

Thanks for the patch!

Regards

Thomas


Re: [PATCH] i386: Add missing AVX512VL or/xor intrinsics

2018-10-21 Thread Uros Bizjak
On Sun, Oct 21, 2018 at 2:03 AM H.J. Lu  wrote:
>
> gcc/
>
> PR target/87662
> * i386/avx512vlintrin.h (_mm256_or_epi32): New.
> (_mm_or_epi32): Likewise.
> (_mm256_xor_epi32): Likewise.
> (_mm_xor_epi32): Likewise.
> (_mm256_or_epi64): Likewise.
> (_mm_or_epi64): Likewise.
> (_mm256_xor_epi64): Likewise.
> (_mm_xor_epi64): Likewise.
>
> gcc/testsuite/
>
> PR target/87662
> * gcc.target/i386/pr87662.c

LGTM.

Thanks,
Uros.

> ---
>  gcc/config/i386/avx512vlintrin.h| 48 
>  gcc/testsuite/gcc.target/i386/pr87662.c | 76 +
>  2 files changed, 124 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr87662.c
>
> diff --git a/gcc/config/i386/avx512vlintrin.h 
> b/gcc/config/i386/avx512vlintrin.h
> index 68b5537845b..a4fb0b0ac00 100644
> --- a/gcc/config/i386/avx512vlintrin.h
> +++ b/gcc/config/i386/avx512vlintrin.h
> @@ -4855,6 +4855,12 @@ _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, 
> __m256i __B)
> (__mmask8) __U);
>  }
>
> +extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> +_mm256_or_epi32 (__m256i __A, __m256i __B)
> +{
> +  return (__m256i) ((__v8su)__A | (__v8su)__B);
> +}
> +
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
> @@ -4876,6 +4882,12 @@ _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i 
> __B)
> (__mmask8) __U);
>  }
>
> +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> +_mm_or_epi32 (__m128i __A, __m128i __B)
> +{
> +  return (__m128i) ((__v4su)__A | (__v4su)__B);
> +}
> +
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
> @@ -4898,6 +4910,12 @@ _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, 
> __m256i __B)
>  (__mmask8) __U);
>  }
>
> +extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> +_mm256_xor_epi32 (__m256i __A, __m256i __B)
> +{
> +  return (__m256i) ((__v8su)__A ^ (__v8su)__B);
> +}
> +
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
> @@ -4920,6 +4938,12 @@ _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, 
> __m128i __B)
>  (__mmask8) __U);
>  }
>
> +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> +_mm_xor_epi32 (__m128i __A, __m128i __B)
> +{
> +  return (__m128i) ((__v4su)__A ^ (__v4su)__B);
> +}
> +
>  extern __inline __m128
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
> @@ -7340,6 +7364,12 @@ _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, 
> __m256i __B)
> (__mmask8) __U);
>  }
>
> +extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> +_mm256_or_epi64 (__m256i __A, __m256i __B)
> +{
> +  return (__m256i) ((__v4du)__A | (__v4du)__B);
> +}
> +
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
> @@ -7361,6 +7391,12 @@ _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i 
> __B)
> (__mmask8) __U);
>  }
>
> +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> +_mm_or_epi64 (__m128i __A, __m128i __B)
> +{
> +  return (__m128i) ((__v2du)__A | (__v2du)__B);
> +}
> +
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
> @@ -7383,6 +7419,12 @@ _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, 
> __m256i __B)
>  (__mmask8) __U);
>  }
>
> +extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> +_mm256_xor_epi64 (__m256i __A, __m256i __B)
> +{
> +  return (__m256i) ((__v4du)__A ^ (__v4du)__B);
> +}
> +
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
> @@ -7405,6 +7447,12 @@ _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, 
> __m128i __B)
>  (__mmask8) __U);
>  }
>
> +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> +_mm_xor_

Re: [PATCH 1/2] i386: Enable AVX512 memory broadcast for FP mul

2018-10-21 Thread Uros Bizjak
On Sat, Oct 20, 2018 at 11:47 PM H.J. Lu  wrote:
>
> On 10/20/18, Uros Bizjak  wrote:
> > On Fri, Oct 19, 2018 at 11:08 PM H.J. Lu  wrote:
> >>
> >> Many AVX512 vector operations can broadcast from a scalar memory source.
> >> This patch enables memory broadcast for FP mul operations.
> >>
> >> gcc/
> >>
> >> PR target/72782
> >> * config/i386/sse.md (*mul3_bcst_1): New.
> >> (*mul3_bcst_2): Likewise.
> >>
> >> gcc/testsuite/
> >>
> >> PR target/72782
> >> * gcc.target/i386/avx512f-mul-df-zmm-1.c: New test.
> >> * gcc.target/i386/avx512f-mul-sf-zmm-1.c: Likewise.
> >> * gcc.target/i386/avx512f-mul-sf-zmm-2.c: Likewise.
> >> * gcc.target/i386/avx512f-mul-sf-zmm-3.c: Likewise.
> >> * gcc.target/i386/avx512f-mul-sf-zmm-4.c: Likewise.
> >> * gcc.target/i386/avx512f-mul-sf-zmm-5.c: Likewise.
> >> * gcc.target/i386/avx512f-mul-sf-zmm-6.c: Likewise.
> >> * gcc.target/i386/avx512vl-mul-sf-xmm-1.c: Likewise.
> >> * gcc.target/i386/avx512vl-mul-sf-ymm-1.c: Likewise.
> >> ---
> >>  gcc/config/i386/sse.md| 24 +++
> >>  .../gcc.target/i386/avx512f-mul-df-zmm-1.c| 12 ++
> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-1.c| 12 ++
> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-2.c| 12 ++
> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-3.c| 12 ++
> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-4.c| 12 ++
> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-5.c| 12 ++
> >>  .../gcc.target/i386/avx512f-mul-sf-zmm-6.c| 12 ++
> >>  .../gcc.target/i386/avx512vl-mul-sf-xmm-1.c   | 12 ++
> >>  .../gcc.target/i386/avx512vl-mul-sf-ymm-1.c   | 12 ++
> >>  10 files changed, 132 insertions(+)
> >>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-df-zmm-1.c
> >>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-1.c
> >>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-2.c
> >>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-3.c
> >>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-4.c
> >>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-5.c
> >>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-6.c
> >>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-mul-sf-xmm-1.c
> >>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-mul-sf-ymm-1.c
> >>
> >> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> >> index 411c78ae8d3..a73659e6bd2 100644
> >> --- a/gcc/config/i386/sse.md
> >> +++ b/gcc/config/i386/sse.md
> >> @@ -1754,6 +1754,30 @@
> >> (set_attr "btver2_decode" "direct,double")
> >> (set_attr "mode" "")])
> >>
> >> +(define_insn "*mul3_bcst_1"
> >> +  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
> >> +   (mult:VF_AVX512
> >> + (match_operand:VF_AVX512 1 "register_operand" "v")
> >> + (vec_duplicate:VF_AVX512
> >> +(match_operand: 2 "memory_operand" "m"]
> >> +  "TARGET_AVX512F && "
> >> +  "vmul\t{%2, %1,
> >> %0|%0, %1, %2<>}"
> >> +  [(set_attr "prefix" "evex")
> >> +   (set_attr "type" "ssemul")
> >> +   (set_attr "mode" "")])
> >> +
> >> +(define_insn "*mul3_bcst_2"
> >> +  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
> >> +   (mult:VF_AVX512
> >> + (vec_duplicate:VF_AVX512
> >> +(match_operand: 1 "memory_operand" "m"))
> >> + (match_operand:VF_AVX512 2 "register_operand" "v")))]
> >> +  "TARGET_AVX512F && "
> >> +  "vmul\t{%1, %2,
> >> %0|%0, %2, %1<>}"
> >> +  [(set_attr "prefix" "evex")
> >> +   (set_attr "type" "ssemul")
> >> +   (set_attr "mode" "")])
> >
> > Do we really need two patterns here? IIRC, the compiler canonicalizes
> > commutative binops so that they have memory operand in the second
> > place. We have vec_duplicate here, so this may not be the case, but
> > please investigate if we really need two patterns for commutative
> > binops.
> >
>
> Only one pattern is needed.   For
>
>(set (reg:V16SF 89) (vec_duplicate:V16SF (reg:SF 91)))
>(set (reg:V16SF 95) (mult:V16SF (reg:V16SF 87) (reg:V16SF 89)))
>
> combiner prefers
>
>(set (reg:V16SF 95)
>   (mult:V16SF
> (vec_duplicate:V16SF (reg:SF 91))
> (reg:V16SF 87)))
>
> instead of
>
>(set (reg:V16SF 95)
>   (mult:V16SF
> (reg:V16SF 87)
> (vec_duplicate:V16SF (reg:SF 91
>
> commutation is performed at
>
> (set (reg:V16SF 95) (mult:V16SF (reg:V16SF 87) (reg:V16SF 89)))
>
> Here is the updated patch.  OK for trunk?

No need for a big comment, this is due to RTX operator precedence in
commutative operators..

OK with the above change.

Please also remove plus part from

*3_bcst_1

and rename it together with

*add3_bcst_2

to ..._bcst, without suffix.

Thanks,
Uros.

>
> --
> H.J.


[PATCH][PR 87633] Do not generate unordered integer comparisons

2018-10-21 Thread Yuri Gribov
Hi all,

My recent patch which replaced
  (float_type)int_var1 CMP (float_type)int_var2
with
  int_var1 CMP int_var2
may generate unordered comparisons of integer values which are both
unnecessary and may also upset later passes.

This patch fixes the problem and bootstraps without regressions on
x86_64-linux-gnu.

Ok to commit?

-Yury


pr87633-1.patch
Description: Binary data


V2 [PATCH] i386: Use scalar operand in floating point vec_dup patterns

2018-10-21 Thread H.J. Lu
On 10/17/18, H.J. Lu  wrote:
> Since vector registers are also used for scalar floating point values,
> we can use scalar operand in floating point vec_dup patterns, which
> enables combiner to generate
>
> (set (reg:V8SF 84)
>  (vec_duplicate:V8SF (mem/c:SF (symbol_ref:DI ("y")
>
> For AVX512 broadcast instructions from integer register operand, we only
> need to broadcast integer to integer vectors.
>
> gcc/
>
>   PR target/87537
>   * config/i386/i386-builtin-types.def: Replace
>   CODE_FOR_avx2_vec_dupv4sf, CODE_FOR_avx2_vec_dupv8sf and
>   CODE_FOR_avx2_vec_dupv4df with CODE_FOR_vec_dupv4sf,
>   CODE_FOR_vec_dupv8sf and CODE_FOR_vec_dupv4df, respectively.
>   * config/i386/i386.c (expand_vec_perm_1): Replace
>   gen_avx512f_vec_dupv16sf_1, gen_avx2_vec_dupv8sf_1 and
>   gen_avx512f_vec_dupv8df_1 with gen_avx512f_vec_dupv16sf,
>   gen_vec_dupv8sf and gen_avx512f_vec_dupv8df, respectively.
>   Duplicate them from scalar operand.
>   * config/i386/i386.md (SF to DF splitter): Replace
>   gen_avx512f_vec_dupv16sf_1 with gen_avx512f_vec_dupv16sf.
>   * config/i386/sse.md (VF48_AVX512VL): New.
>   (avx2_vec_dup): Removed.
>   (avx2_vec_dupv8sf_1): Likewise.
>   (avx512f_vec_dup_1): Likewise.
>   (avx2_vec_dupv4df): Likewise.
>   (_vec_dup:V48_AVX512VL): Likewise.
>   (_vec_dup:VF48_AVX512VL): New.
>   (_vec_dup:VI48_AVX512VL): Likewise.
>   (_vec_dup_gpr): Replace
>   V48_AVX512VL with VI48_AVX512VL.
>   (*avx_vperm_broadcast_): Replace gen_avx2_vec_dupv8sf with
>   gen_vec_dupv8sf.
>
> gcc/testsuite/
>
>   PR target/87537
>   * gcc.target/i386/avx2-vbroadcastss_ps256-1.c: Updated.
>   * gcc.target/i386/avx512vl-vbroadcast-3.c: Likewise.

Here is the updated patch. I added const_vector_duplicate_operand to
handle constant vector broadcast from memory.  OK for trunk?

Thanks.

-- 
H.J.
From 1944aa2489a2e5365fdcd609d4810badec262598 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Tue, 2 Oct 2018 14:27:55 -0700
Subject: [PATCH] i386: Use scalar operand in floating point vec_dup patterns

Since vector registers are also used for scalar floating point values,
we can use scalar operand in floating point vec_dup patterns, which
enables combiner to generate

(set (reg:V8SF 84)
 (vec_duplicate:V8SF (mem/c:SF (symbol_ref:DI ("y")

For AVX512 broadcast instructions from integer register operand, we only
need to broadcast integer to integer vectors.

gcc/

	PR target/87537
	* config/i386/i386-builtin-types.def: Replace
	CODE_FOR_avx2_vec_dupv4sf, CODE_FOR_avx2_vec_dupv8sf and
	CODE_FOR_avx2_vec_dupv4df with CODE_FOR_vec_dupv4sf,
	CODE_FOR_vec_dupv8sf and CODE_FOR_vec_dupv4df, respectively.
	* config/i386/i386.c (expand_vec_perm_1): Replace
	gen_avx512f_vec_dupv16sf_1, gen_avx2_vec_dupv8sf_1 and
	gen_avx512f_vec_dupv8df_1 with gen_avx512f_vec_dupv16sf,
	gen_vec_dupv8sf and gen_avx512f_vec_dupv8df, respectively.
	Duplicate them from scalar operand.
	* config/i386/i386.md (SF to DF splitter): Replace
	gen_avx512f_vec_dupv16sf_1 with gen_avx512f_vec_dupv16sf.
	* config/i386/predicates.md (const_vector_duplicate_operand): New.
	* config/i386/sse.md (VF48_AVX512VL): New.
	(avx2_vec_dup): Removed.
	(avx2_vec_dupv8sf_1): Likewise.
	(avx512f_vec_dup_1): Likewise.
	(avx2_vec_dupv4df): Likewise.
	(_vec_dup:V48_AVX512VL): Likewise.
	(_vec_dup:VF48_AVX512VL): New.
	(*_const_vec_dup): Likewise.
	(_vec_dup:VI48_AVX512VL): Likewise.
	(_vec_dup_gpr): Replace
	V48_AVX512VL with VI48_AVX512VL.
	(*avx_vperm_broadcast_): Replace gen_avx2_vec_dupv8sf with
	gen_vec_dupv8sf.

gcc/testsuite/

	PR target/87537
	* gcc.target/i386/avx2-vbroadcastss_ps256-1.c: Updated.
	* gcc.target/i386/avx512vl-vbroadcast-3.c: Likewise.
	* gcc.target/i386/pr87537-2.c: New test.
	* gcc.target/i386/pr87537-3.c: Likewise.
	* gcc.target/i386/pr87537-4.c: Likewise.
	* gcc.target/i386/pr87537-5.c: Likewise.
	* gcc.target/i386/pr87537-6.c: Likewise.
	* gcc.target/i386/pr87537-7.c: Likewise.
	* gcc.target/i386/pr87537-8.c: Likewise.
	* gcc.target/i386/pr87537-9.c: Likewise.
---
 gcc/config/i386/i386-builtin.def  |  6 +-
 gcc/config/i386/i386.c| 28 +-
 gcc/config/i386/i386.md   |  2 +-
 gcc/config/i386/predicates.md | 12 +++
 gcc/config/i386/sse.md| 95 +++
 .../i386/avx2-vbroadcastss_ps256-1.c  |  3 +-
 .../gcc.target/i386/avx512vl-vbroadcast-3.c   |  5 +-
 gcc/testsuite/gcc.target/i386/pr87537-2.c | 12 +++
 gcc/testsuite/gcc.target/i386/pr87537-3.c | 12 +++
 gcc/testsuite/gcc.target/i386/pr87537-4.c | 12 +++
 gcc/testsuite/gcc.target/i386/pr87537-5.c | 12 +++
 gcc/testsuite/gcc.target/i386/pr87537-6.c | 12 +++
 gcc/testsuite/gcc.target/i386/pr87537-7.c | 12 +++
 gcc/testsuite/gcc.target/i386/pr87537-8.c | 12 +++
 gcc/testsuite/gcc.target/i386/pr87537-9.c | 12 +++
 15 files changed, 177 insertions

gOlogy: fix debug binds in auto-inc-dec

2018-10-21 Thread Alexandre Oliva
As auto_inc_dec pass combines incs and mems from different insns, it
often causes regs to temporarily hold a value different from the one
it would before the transformation.  Debug insns within that range
would therefore end up binding to the wrong expression after the
transformation.

This patch adjusts debug binds in the affected range.

Regstrapped on x86_64-, i686-, ppc64-, ppc64el-, and aarch64-linux-gnu.
Ok to install?

for  gcc/ChangeLog

* auto-inc-dec.c: Include valtrack.h.  Improve comments.
(reg_next_debug_use): New.
(attempt_change): Propagate adjusted expression into affected
debug insns.
(merge_in_block): Track uses in debug insns.
(pass_inc_dec::execute): Allocate and release
reg_next_debug_use.
---
 gcc/auto-inc-dec.c |  128 +++-
 1 file changed, 125 insertions(+), 3 deletions(-)

diff --git a/gcc/auto-inc-dec.c b/gcc/auto-inc-dec.c
index e6dc1c30d716..064b8afd4ff9 100644
--- a/gcc/auto-inc-dec.c
+++ b/gcc/auto-inc-dec.c
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "dbgcnt.h"
 #include "print-rtl.h"
+#include "valtrack.h"
 
 /* This pass was originally removed from flow.c. However there is
almost nothing that remains of that code.
@@ -53,6 +54,21 @@ along with GCC; see the file COPYING3.  If not see
...
*(a += c) pre
 
+or, alternately,
+
+   a <- b + c
+   ...
+   *b
+
+becomes
+
+   a <- b
+   ...
+   *(a += c) post
+
+This uses a post-add, but it's handled as FORM_PRE_ADD because
+the "increment" insn appears before the memory access.
+
 
   (2) FORM_PRE_INC
a += c
@@ -61,6 +77,7 @@ along with GCC; see the file COPYING3.  If not see
 
 becomes
 
+   ...
*(a += c) pre
 
 
@@ -75,8 +92,8 @@ along with GCC; see the file COPYING3.  If not see
 becomes
 
b <- a
-   ...
*(b += c) post
+   ...
 
 
   (4) FORM_POST_INC
@@ -87,6 +104,8 @@ along with GCC; see the file COPYING3.  If not see
 becomes
 
*(a += c) post
+   ...
+
 
   There are three types of values of c.
 
@@ -393,6 +412,7 @@ dump_mem_insn (FILE *file)
must be compared with the current block.
 */
 
+static rtx_insn **reg_next_debug_use = NULL;
 static rtx_insn **reg_next_use = NULL;
 static rtx_insn **reg_next_inc_use = NULL;
 static rtx_insn **reg_next_def = NULL;
@@ -509,27 +529,83 @@ attempt_change (rtx new_addr, rtx inc_reg)
   gcc_assert (mov_insn);
   emit_insn_before (mov_insn, inc_insn.insn);
   regno = REGNO (inc_insn.reg0);
+  /* ??? Could REGNO possibly be used in MEM_INSN other than in
+the MEM address, and still die there, so that move_dead_notes
+would incorrectly move the note?  */
   if (reg_next_use[regno] == mem_insn.insn)
move_dead_notes (mov_insn, mem_insn.insn, inc_insn.reg0);
   else
move_dead_notes (mov_insn, inc_insn.insn, inc_insn.reg0);
 
   regno = REGNO (inc_insn.reg_res);
+  if (reg_next_debug_use && reg_next_debug_use[regno]
+ && BLOCK_FOR_INSN (reg_next_debug_use[regno]) == bb)
+   {
+ rtx adjres = gen_rtx_PLUS (GET_MODE (inc_insn.reg_res),
+inc_insn.reg_res, inc_insn.reg1);
+ if (dump_file)
+   fprintf (dump_file, "adjusting debug insns\n");
+ propagate_for_debug (PREV_INSN (reg_next_debug_use[regno]),
+  mem_insn.insn,
+  inc_insn.reg_res, adjres, bb);
+ reg_next_debug_use[regno] = NULL;
+   }
   reg_next_def[regno] = mov_insn;
   reg_next_use[regno] = NULL;
+
   regno = REGNO (inc_insn.reg0);
+  if (reg_next_debug_use && reg_next_debug_use[regno]
+ && BLOCK_FOR_INSN (reg_next_debug_use[regno]) == bb
+ && find_reg_note (mov_insn, REG_DEAD, inc_insn.reg0))
+   {
+ if (dump_file)
+   fprintf (dump_file, "remapping debug insns\n");
+ propagate_for_debug (PREV_INSN (reg_next_debug_use[regno]),
+  mem_insn.insn,
+  inc_insn.reg0, inc_insn.reg_res, bb);
+ reg_next_debug_use[regno] = NULL;
+   }
   reg_next_use[regno] = mov_insn;
   df_recompute_luids (bb);
   break;
 
 case FORM_POST_INC:
   regno = REGNO (inc_insn.reg_res);
+  if (reg_next_debug_use && reg_next_debug_use[regno]
+ && BLOCK_FOR_INSN (reg_next_debug_use[regno]) == bb)
+   {
+ rtx adjres = gen_rtx_MINUS (GET_MODE (inc_insn.reg_res),
+ inc_insn.reg_res, inc_insn.reg1);
+ if (dump_file)
+   fprintf (dump_file, "adjusting debug insns\n");
+ propagate_for_debug (PREV_INSN (reg_next_debug_use[regno]),
+ 

gOlogy: do not change code in isolate-paths for warnings only

2018-10-21 Thread Alexandre Oliva
The isolate-paths pass is activated by various -f flags, but also by
-Wnull-dereference.  Most of its codegen changes are conditioned on at
least one of the -f flags, but those that detect, warn about and
isolate paths that return the address of local variables are enabled
even if the pass is activated only by -Wnull-dereference.

-W flags should not cause codegen changes, so this patch makes the
codegen changes conditional on the presence of any of the -f flags
that activate the pass.  Should we have a separate option to activate
only this kind of transformation?

Regstrapped on x86_64- and i686-linux-gnu.  Ok to install?


for  gcc/ChangeLog

* gimple-ssa-isolate-paths.c
(find_implicit_erroneous_behavior): Do not change code if the
pass is running for warnings only.
(find_explicit_erroneous_behavior): Likewise.
---
 gcc/gimple-ssa-isolate-paths.c |   17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/gcc/gimple-ssa-isolate-paths.c b/gcc/gimple-ssa-isolate-paths.c
index e1fab61bedab..880836c21aa7 100644
--- a/gcc/gimple-ssa-isolate-paths.c
+++ b/gcc/gimple-ssa-isolate-paths.c
@@ -431,7 +431,9 @@ find_implicit_erroneous_behavior (void)
"declared here");
  }
 
- if (gimple_bb (use_stmt) == bb)
+ if ((flag_isolate_erroneous_paths_dereference
+  || flag_isolate_erroneous_paths_attribute)
+ && gimple_bb (use_stmt) == bb)
{
  duplicate = isolate_path (bb, duplicate, e,
use_stmt, lhs, true);
@@ -553,9 +555,16 @@ find_explicit_erroneous_behavior (void)
  inform (DECL_SOURCE_LOCATION(valbase),
  "declared here");
  }
- tree zero = build_zero_cst (TREE_TYPE (val));
- gimple_return_set_retval (return_stmt, zero);
- update_stmt (stmt);
+
+ /* Do not modify code if the user only asked for
+warnings.  */
+ if (flag_isolate_erroneous_paths_dereference
+ || flag_isolate_erroneous_paths_attribute)
+   {
+ tree zero = build_zero_cst (TREE_TYPE (val));
+ gimple_return_set_retval (return_stmt, zero);
+ update_stmt (stmt);
+   }
}
}
}


-- 
Alexandre Oliva, freedom fighter   https://FSFLA.org/blogs/lxo
Be the change, be Free! FSF Latin America board member
GNU Toolchain EngineerFree Software Evangelist
Hay que enGNUrecerse, pero sin perder la terGNUra jamás-GNUChe