[PATCH 4/4] i386: Update AVX512 FMSUB/FNMADD/FNMSUB tests

2018-10-19 Thread H.J. Lu
Update AVX512 tests to test the newly added FMSUB, FNMADD and FNMSUB
builtin functions.

PR target/72782
* gcc.target/i386/avx-1.c (__builtin_ia32_vfmsubpd512_mask): New.
(__builtin_ia32_vfmsubpd512_maskz): Likewise.
(__builtin_ia32_vfmsubps512_mask): Likewise.
(__builtin_ia32_vfmsubps512_maskz): Likewise.
(__builtin_ia32_vfnmaddpd512_mask3): Likewise.
(__builtin_ia32_vfnmaddpd512_maskz): Likewise.
(__builtin_ia32_vfnmaddps512_mask3): Likewise.
(__builtin_ia32_vfnmaddps512_maskz): Likewise.
(__builtin_ia32_vfnmsubpd512_maskz): Likewise.
(__builtin_ia32_vfnmsubps512_maskz): Likewise.
* testsuite/gcc.target/i386/sse-13.c
(__builtin_ia32_vfmsubpd512_mask): Likewise.
(__builtin_ia32_vfmsubpd512_maskz): Likewise.
(__builtin_ia32_vfmsubps512_mask): Likewise.
(__builtin_ia32_vfmsubps512_maskz): Likewise.
(__builtin_ia32_vfnmaddpd512_mask3): Likewise.
(__builtin_ia32_vfnmaddpd512_maskz): Likewise.
(__builtin_ia32_vfnmaddps512_mask3): Likewise.
(__builtin_ia32_vfnmaddps512_maskz): Likewise.
(__builtin_ia32_vfnmsubpd512_maskz): Likewise.
(__builtin_ia32_vfnmsubps512_maskz): Likewise.
* testsuite/gcc.target/i386/sse-23.c
(__builtin_ia32_vfmsubpd512_mask): Likewise.
(__builtin_ia32_vfmsubpd512_maskz): Likewise.
(__builtin_ia32_vfmsubps512_mask): Likewise.
(__builtin_ia32_vfmsubps512_maskz): Likewise.
(__builtin_ia32_vfnmaddpd512_mask3): Likewise.
(__builtin_ia32_vfnmaddpd512_maskz): Likewise.
(__builtin_ia32_vfnmaddps512_mask3): Likewise.
(__builtin_ia32_vfnmaddps512_maskz): Likewise.
(__builtin_ia32_vfnmsubpd512_maskz): Likewise.
(__builtin_ia32_vfnmsubps512_maskz): Likewise.
---
 gcc/testsuite/gcc.target/i386/avx-1.c  | 10 ++
 gcc/testsuite/gcc.target/i386/sse-13.c | 10 ++
 gcc/testsuite/gcc.target/i386/sse-23.c | 10 ++
 3 files changed, 30 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c 
b/gcc/testsuite/gcc.target/i386/avx-1.c
index c877f9996b3..f67bc5f5044 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -351,16 +351,26 @@
 #define __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, E) 
__builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, 8)
 #define __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, E) 
__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, 8)
 #define __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, E) 
__builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubpd512_mask(A, B, C, D, E) 
__builtin_ia32_vfmsubpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, E) 
__builtin_ia32_vfmsubpd512_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubpd512_maskz(A, B, C, D, E) 
__builtin_ia32_vfmsubpd512_maskz(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubps512_mask(A, B, C, D, E) 
__builtin_ia32_vfmsubps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_vfmsubps512_mask3(A, B, C, D, E) 
__builtin_ia32_vfmsubps512_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfmsubps512_maskz(A, B, C, D, E) 
__builtin_ia32_vfmsubps512_maskz(A, B, C, D, 8)
 #define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) 
__builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8)
 #define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) 
__builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8)
 #define __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, E) 
__builtin_ia32_vfnmaddpd512_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddpd512_mask3(A, B, C, D, E) 
__builtin_ia32_vfnmaddpd512_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddpd512_maskz(A, B, C, D, E) 
__builtin_ia32_vfnmaddpd512_maskz(A, B, C, D, 8)
 #define __builtin_ia32_vfnmaddps512_mask(A, B, C, D, E) 
__builtin_ia32_vfnmaddps512_mask(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddps512_mask3(A, B, C, D, E) 
__builtin_ia32_vfnmaddps512_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfnmaddps512_maskz(A, B, C, D, E) 
__builtin_ia32_vfnmaddps512_maskz(A, B, C, D, 8)
 #define __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, E) 
__builtin_ia32_vfnmsubpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, E) 
__builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubpd512_maskz(A, B, C, D, E) 
__builtin_ia32_vfnmsubpd512_maskz(A, B, C, D, 8)
 #define __builtin_ia32_vfnmsubps512_mask(A, B, C, D, E) 
__builtin_ia32_vfnmsubps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, E) 
__builtin_ia32_vfnmsubps512_mask3(A, B, C, D, 8)
+#define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) 
__builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8)
 #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) 
__builtin_ia32_vpermilpd512_mask(A, 1, C, D)
 #define __builtin_ia32_vpermilps512_mask(A, E, C, D) 
__builtin_ia32_vpermilps512_mask(A, 1, C, D)
 
diff --git a/gcc/testsuite/gcc.ta

[PATCH 3/4] i386: Enable AVX512 memory broadcast for FNMSUB

2018-10-19 Thread H.J. Lu
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for FNMSUB operations.  In order to
support AVX512 memory broadcast for FNMSUB, FNMSUB builtin functions are
also added, instead of passing the negated value to FMA builtin functions.

gcc/

PR target/72782
* config/i386/avx512fintrin.h (_mm512_fnmsub_round_pd): Use
__builtin_ia32_vfnmsubpd512_mask.
(_mm512_mask_fnmsub_round_pd): Likewise.
(_mm512_fnmsub_pd): Likewise.
(_mm512_mask_fnmsub_pd): Likewise.
(_mm512_maskz_fnmsub_round_pd): Use
__builtin_ia32_vfnmsubpd512_maskz.
(_mm512_maskz_fnmsub_pd): Likewise.
(_mm512_fnmsub_round_ps): Use __builtin_ia32_vfnmsubps512_mask.
(_mm512_mask_fnmsub_round_ps): Likewise.
(_mm512_fnmsub_ps): Likewise.
(_mm512_mask_fnmsub_ps): Likewise.
(_mm512_maskz_fnmsub_round_ps): Use
__builtin_ia32_vfnmsubps512_maskz.
(_mm512_maskz_fnmsub_ps): Likewise.
* config/i386/avx512vlintrin.h (_mm256_mask_fnmsub_pd): Use
__builtin_ia32_vfnmsubpd256_mask.
(_mm256_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256_maskz.
(_mm_mask_fnmsub_pd): Use __builtin_ia32_vfmaddpd128_mask
(_mm_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd128_maskz.
(_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask.
(_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask.
(_mm256_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_maskz.
(_mm_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_mask.
(_mm_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_maskz.
* config/i386/fmaintrin.h (_mm_fnmsub_pd): Use
__builtin_ia32_vfnmsubpd.
(_mm256_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256.
(_mm_fnmsub_ps): Use __builtin_ia32_vfnmsubps.
(_mm256_fnmsub_ps): Use __builtin_ia32_vfnmsubps256.
(_mm_fnmsub_sd): Use __builtin_ia32_vfnmsubsd3.
(_mm_fnmsub_ss): Use __builtin_ia32_vfnmsubss3.
* config/i386/i386-builtin.def: Add
__builtin_ia32_vfnmsubpd256_mask,
__builtin_ia32_vfnmsubpd256_maskz,
__builtin_ia32_vfnmsubpd128_mask,
__builtin_ia32_vfnmsubpd128_maskz,
__builtin_ia32_vfnmsubps256_mask,
__builtin_ia32_vfnmsubps256_maskz,
__builtin_ia32_vfnmsubps128_mask,
__builtin_ia32_vfnmsubps128_maskz,
__builtin_ia32_vfnmsubpd512_mask,
__builtin_ia32_vfnmsubpd512_maskz,
__builtin_ia32_vfnmsubps512_mask,
__builtin_ia32_vfnmsubps512_maskz, __builtin_ia32_vfnmsubss3,
__builtin_ia32_vfnmsubsd3, __builtin_ia32_vfnmsubps,
__builtin_ia32_vfnmsubpd, __builtin_ia32_vfnmsubps256 and.
__builtin_ia32_vfnmsubpd256.
* config/i386/sse.md (fma4i_fnmsub_): New.
(_fnmsub__maskz): Likewise.
(*fma_fnmsub__bcst_1):
Likewise.
(*fma_fnmsub__bcst_2):
Likewise.
(*fma_fnmsub__bcst_3):
Likewise.
(fmai_vmfnmsub_): Likewise.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512f-fnmsub-df-zmm-1.c: New test.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c: Likewise.
* gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c: Likewise.
* gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c: Likewise.
---
 gcc/config/i386/avx512fintrin.h   | 80 +-
 gcc/config/i386/avx512vlintrin.h  | 32 
 gcc/config/i386/fmaintrin.h   | 24 +++---
 gcc/config/i386/i386-builtin.def  | 12 +++
 gcc/config/i386/sse.md| 82 +++
 .../gcc.target/i386/avx512f-fnmsub-df-zmm-1.c | 12 +++
 .../gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c | 12 +++
 .../gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c | 12 +++
 .../gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c | 12 +++
 .../gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c | 12 +++
 .../gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c | 12 +++
 .../gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c | 12 +++
 .../gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c | 12 +++
 .../gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c | 12 +++
 .../i386/avx512vl-fnmsub-sf-xmm-1.c   | 12 +++
 .../i386/avx512vl-fnmsub-sf-ymm-1.c   | 12 +++
 16 files changed, 294 insertions(+), 68 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c
 create mode 100644 gcc/

[PATCH 1/4] i386: Enable AVX512 memory broadcast for FMSUB

2018-10-19 Thread H.J. Lu
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for FMSUB operations.  In order to
support AVX512 memory broadcast for FMSUB, FMSUB builtin functions are
also added, instead of passing the negated value to FMA builtin functions.

gcc/

PR target/72782
* config/i386/avx512fintrin.h (_mm512_fmsub_round_pd): Use
__builtin_ia32_vfmsubpd512_mask.
(_mm512_mask_fmsub_round_pd): Likewise.
(_mm512_fmsub_pd): Likewise.
(_mm512_mask_fmsub_pd): Likewise.
(_mm512_maskz_fmsub_round_pd): Use
__builtin_ia32_vfmsubpd512_maskz.
(_mm512_maskz_fmsub_pd): Likewise.
(_mm512_fmsub_round_ps): Use __builtin_ia32_vfmsubps512_mask.
(_mm512_mask_fmsub_round_ps): Likewise.
(_mm512_fmsub_ps): Likewise.
(_mm512_mask_fmsub_ps): Likewise.
(_mm512_maskz_fmsub_round_ps): Use
__builtin_ia32_vfmsubps512_maskz.
(_mm512_maskz_fmsub_ps): Likewise.
* config/i386/avx512vlintrin.h (_mm256_mask_fmsub_pd): Use
__builtin_ia32_vfmsubpd256_mask.
(_mm256_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd256_maskz.
(_mm_mask_fmsub_pd): Use __builtin_ia32_vfmaddpd128_mask
(_mm_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd128_maskz.
(_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
(_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
(_mm256_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps256_maskz.
(_mm_mask_fmsub_ps): Use __builtin_ia32_vfmsubps128_mask.
(_mm_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps128_maskz.
* config/i386/fmaintrin.h (_mm_fmsub_pd): Use
__builtin_ia32_vfmsubpd.
(_mm256_fmsub_pd): Use __builtin_ia32_vfmsubpd256.
(_mm_fmsub_ps): Use __builtin_ia32_vfmsubps.
(_mm256_fmsub_ps): Use __builtin_ia32_vfmsubps256.
(_mm_fmsub_sd): Use __builtin_ia32_vfmsubsd3.
(_mm_fmsub_ss): Use __builtin_ia32_vfmsubss3.
* config/i386/i386-builtin.def: Add
__builtin_ia32_vfmsubpd256_mask,
__builtin_ia32_vfmsubpd256_maskz,
__builtin_ia32_vfmsubpd128_mask,
__builtin_ia32_vfmsubpd128_maskz,
__builtin_ia32_vfmsubps256_mask,
__builtin_ia32_vfmsubps256_maskz,
__builtin_ia32_vfmsubps128_mask,
__builtin_ia32_vfmsubps128_maskz,
__builtin_ia32_vfmsubpd512_mask,
__builtin_ia32_vfmsubpd512_maskz,
__builtin_ia32_vfmsubps512_mask,
__builtin_ia32_vfmsubps512_maskz, __builtin_ia32_vfmsubss3,
__builtin_ia32_vfmsubsd3, __builtin_ia32_vfmsubps,
__builtin_ia32_vfmsubpd, __builtin_ia32_vfmsubps256 and.
__builtin_ia32_vfmsubpd256.
* config/i386/sse.md (fma4i_fmsub_): New.
(_fmsub__maskz): Likewise.
(*fma_fmsub__bcst_1):
Likewise.
(*fma_fmsub__bcst_2):
Likewise.
(*fma_fmsub__bcst_3):
Likewise.
(fmai_vmfmsub_): Likewise.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512f-fmsub-df-zmm-1.c: New test.
* gcc.target/i386/avx512f-fmsub-sf-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-6.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-7.c: Likewise.
* gcc.target/i386/avx512f-fmsub-sf-zmm-8.c: Likewise.
* gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c: Likewise.
---
 gcc/config/i386/avx512fintrin.h   | 60 +++
 gcc/config/i386/avx512vlintrin.h  | 32 
 gcc/config/i386/fmaintrin.h   | 24 +++---
 gcc/config/i386/i386-builtin.def  | 18 +
 gcc/config/i386/sse.md| 77 +++
 .../gcc.target/i386/avx512f-fmsub-df-zmm-1.c  | 12 +++
 .../gcc.target/i386/avx512f-fmsub-sf-zmm-1.c  | 12 +++
 .../gcc.target/i386/avx512f-fmsub-sf-zmm-2.c  | 12 +++
 .../gcc.target/i386/avx512f-fmsub-sf-zmm-3.c  | 12 +++
 .../gcc.target/i386/avx512f-fmsub-sf-zmm-4.c  | 12 +++
 .../gcc.target/i386/avx512f-fmsub-sf-zmm-5.c  | 12 +++
 .../gcc.target/i386/avx512f-fmsub-sf-zmm-6.c  | 12 +++
 .../gcc.target/i386/avx512f-fmsub-sf-zmm-7.c  | 12 +++
 .../gcc.target/i386/avx512f-fmsub-sf-zmm-8.c  | 12 +++
 .../gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c | 12 +++
 .../gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c | 12 +++
 16 files changed, 285 insertions(+), 58 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c
 create mode 100644 gcc/testsuite/g

[PATCH 2/4] i386: Enable AVX512 memory broadcast for FNMADD

2018-10-19 Thread H.J. Lu
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for FNMADD operations.  In order to
support AVX512 memory broadcast for FNMADD, FNMADD builtin functions are
also added, instead of passing the negated value to FMA builtin functions.

gcc/

PR target/72782
* config/i386/avx512fintrin.h (_mm512_fnmadd_round_pd): Use
__builtin_ia32_vfnmaddpd512_mask.
(_mm512_mask_fnmadd_round_pd): Likewise.
(_mm512_fnmadd_pd): Likewise.
(_mm512_mask_fnmadd_pd): Likewise.
(_mm512_maskz_fnmadd_round_pd): Use
__builtin_ia32_vfnmaddpd512_maskz.
(_mm512_maskz_fnmadd_pd): Likewise.
(_mm512_fnmadd_round_ps): Use __builtin_ia32_vfnmaddps512_mask.
(_mm512_mask_fnmadd_round_ps): Likewise.
(_mm512_fnmadd_ps): Likewise.
(_mm512_mask_fnmadd_ps): Likewise.
(_mm512_maskz_fnmadd_round_ps): Use
__builtin_ia32_vfnmaddps512_maskz.
(_mm512_maskz_fnmadd_ps): Likewise.
* config/i386/avx512vlintrin.h (_mm256_mask_fnmadd_pd): Use
__builtin_ia32_vfnmaddpd256_mask.
(_mm256_maskz_fnmadd_pd): Use __builtin_ia32_vfnmaddpd256_maskz.
(_mm_mask_fnmadd_pd): Use __builtin_ia32_vfmaddpd128_mask
(_mm_maskz_fnmadd_pd): Use __builtin_ia32_vfnmaddpd128_maskz.
(_mm256_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_mask.
(_mm256_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_mask.
(_mm256_maskz_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_maskz.
(_mm_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps128_mask.
(_mm_maskz_fnmadd_ps): Use __builtin_ia32_vfnmaddps128_maskz.
* config/i386/fmaintrin.h (_mm_fnmadd_pd): Use
__builtin_ia32_vfnmaddpd.
(_mm256_fnmadd_pd): Use __builtin_ia32_vfnmaddpd256.
(_mm_fnmadd_ps): Use __builtin_ia32_vfnmaddps.
(_mm256_fnmadd_ps): Use __builtin_ia32_vfnmaddps256.
(_mm_fnmadd_sd): Use __builtin_ia32_vfnmaddsd3.
(_mm_fnmadd_ss): Use __builtin_ia32_vfnmaddss3.
* config/i386/i386-builtin.def: Add
__builtin_ia32_vfnmaddpd256_mask,
__builtin_ia32_vfnmaddpd256_maskz,
__builtin_ia32_vfnmaddpd128_mask,
__builtin_ia32_vfnmaddpd128_maskz,
__builtin_ia32_vfnmaddps256_mask,
__builtin_ia32_vfnmaddps256_maskz,
__builtin_ia32_vfnmaddps128_mask,
__builtin_ia32_vfnmaddps128_maskz,
__builtin_ia32_vfnmaddpd512_mask,
__builtin_ia32_vfnmaddpd512_maskz,
__builtin_ia32_vfnmaddps512_mask,
__builtin_ia32_vfnmaddps512_maskz, __builtin_ia32_vfnmaddss3,
__builtin_ia32_vfnmaddsd3, __builtin_ia32_vfnmaddps,
__builtin_ia32_vfnmaddpd, __builtin_ia32_vfnmaddps256 and.
__builtin_ia32_vfnmaddpd256.
* config/i386/sse.md (fma4i_fnmadd_): New.
(_fnmadd__maskz): Likewise.
(*fma_fnmadd__bcst_1):
Likewise.
(*fma_fnmadd__bcst_2):
Likewise.
(*fma_fnmadd__bcst_3):
Likewise.
(fmai_vmfnmadd_): Likewise.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512f-fnmadd-df-zmm-1.c: New test.
* gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-fnmadd-sf-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-fnmadd-sf-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-fnmadd-sf-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-fnmadd-sf-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-fnmadd-sf-zmm-6.c: Likewise.
* gcc.target/i386/avx512f-fnmadd-sf-zmm-7.c: Likewise.
* gcc.target/i386/avx512f-fnmadd-sf-zmm-8.c: Likewise.
* gcc.target/i386/avx512vl-fnmadd-sf-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-fnmadd-sf-ymm-1.c: Likewise.
---
 gcc/config/i386/avx512fintrin.h   | 124 +-
 gcc/config/i386/avx512vlintrin.h  |  64 -
 gcc/config/i386/fmaintrin.h   |  24 ++--
 gcc/config/i386/i386-builtin.def  |  20 +++
 gcc/config/i386/sse.md|  77 +++
 .../gcc.target/i386/avx512f-fnmadd-df-zmm-1.c |  12 ++
 .../gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c |  12 ++
 .../gcc.target/i386/avx512f-fnmadd-sf-zmm-2.c |  12 ++
 .../gcc.target/i386/avx512f-fnmadd-sf-zmm-3.c |  12 ++
 .../gcc.target/i386/avx512f-fnmadd-sf-zmm-4.c |  12 ++
 .../gcc.target/i386/avx512f-fnmadd-sf-zmm-5.c |  12 ++
 .../gcc.target/i386/avx512f-fnmadd-sf-zmm-6.c |  12 ++
 .../gcc.target/i386/avx512f-fnmadd-sf-zmm-7.c |  12 ++
 .../gcc.target/i386/avx512f-fnmadd-sf-zmm-8.c |  12 ++
 .../i386/avx512vl-fnmadd-sf-xmm-1.c   |  12 ++
 .../i386/avx512vl-fnmadd-sf-ymm-1.c   |  12 ++
 16 files changed, 335 insertions(+), 106 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fnmadd-df-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c
 create mode 100644 gcc/tes

Re: [PATCH v2 1/3] or1k: libgcc: initial support for openrisc

2018-10-19 Thread Stafford Horne
On Thu, Oct 18, 2018 at 05:55:35PM -0600, Jeff Law wrote:
> On 10/18/18 2:06 PM, Stafford Horne wrote:
> > On Thu, Oct 18, 2018 at 03:22:56PM +0200, Sebastian Huber wrote:
> >> Hello,
> >>
> >> is there a chance to get the or1k support integrated before the GCC 9 stage
> >> 3?
> > 
> > Hello,
> > 
> > I would definitly like that and that is my goal.  It seems the limiting 
> > factor
> > is getting technical review and signoff on this set of patches.
> > 
> > I will send out a PATCH v3 with a few minor enhancements gathered since v2 
> > today
> > or tomorrow.  Then I will try to ping a few people if I dont get reviews by 
> > next
> > week.
> Also note that for a port with minimal bleed out (and I think the or1k
> qualifies) we can still integrate it during stage3.  BUt obviously it'd
> better to get it in during stage1.

Thanks for the info.  Its good to know the hard deadline is not this month for
me.

Did you get any chance to look at the second patch series for the OpenRISC gcc
patches?  I added the function comments you were asking for.  Also Richard fixed
up a few other things you were mentioning.

I understand you must be busy with the upcoming lockdown.

-Stafford


Re: [RFC] GCC support for live-patching

2018-10-19 Thread Andi Kleen
> > Is it because you generate something manually and want to limit that
> > work,
> 
> I think that this is one of the reasons. 
> and as mentioned in my writeup, the targeted users of this new functionality 
> is for live-patching users who generate
> patches by hand. 

Ok just means they need better tooling.

> in which, it explains that these new options are for helping live-patching 
> users who create patches entirely by hand, including kernel
> live-patching scheme kGraft, and one of our internal customers. 

It sounds to me the problem is not gcc here, but an inefficient scheme to 
create patches.

> the major reason is to control the code size explosion of manual patches. 
> It’s the request from our internal customer. 

So essentially you want to disable inlining.

The Linux kernel code heavily relies on inlining to optimize constants
and remove unnecessary code paths.

For example I cannot even imagine how horrible the code for get/put/copy_*_user 
would be if you just disabled inlining on it. That's a fairly common 
coding pattern in core kernel code and it's not going away. 

I think the time that is spent here pessimizing code would be far better
spent creating better tools to create patches. There's a reason
why near all people stopped writing things manually in assembler and moved
to compilers. Same reasons should apply to patches.

> I think that the current option 
> -fease-live-patching=inline-clone  -flive-patching-list
> 
> should automatically ONLY enabling inline+clone optimization (disable all 
> other ipa optimization/analyses at the same time) and generate the impacted 
> function
> list for each of the function.

Dwarf2+ has all the information that is needed to find inlines and clones 
already.

e.g. systemtap and gdb and perf probes and most other debuggers support it fine
to find all copies of a given line.

I wrote parsing tools for that too and it's not too difficult to use.

-Andi



[PATCH] Fix ICE with address of (static) compound literal (PR middle-end/87647)

2018-10-19 Thread Jakub Jelinek
Hi!

COMPOUND_LITERAL_EXPRs are removed from static initializers in
record_references_in_initializer, unfortunately decode_addr_const can be
called from const_hash_1 from output_constant_def before that happens
and as record_references_in_initializer needs a varpool node, we can't call
it during the hashing used to check if we have such a constant already.

The following patch handles COMPOUND_LITERAL_EXPRs like they will be handled
later on for the purpose of hashing them.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-10-19  Jakub Jelinek  

PR middle-end/87647
* varasm.c (decode_addr_const): Handle COMPOUND_LITERAL_EXPR.

* gcc.c-torture/compile/pr87647.c: New test.

--- gcc/varasm.c.jj 2018-10-11 09:05:48.124511816 +0200
+++ gcc/varasm.c2018-10-19 13:27:41.921160889 +0200
@@ -2953,6 +2953,11 @@ decode_addr_const (tree exp, struct addr
   gen_rtx_SYMBOL_REF (Pmode, "origin of addresses"));
   break;
 
+case COMPOUND_LITERAL_EXPR:
+  gcc_assert (COMPOUND_LITERAL_EXPR_DECL (target));
+  x = DECL_RTL (COMPOUND_LITERAL_EXPR_DECL (target));
+  break;
+
 default:
   gcc_unreachable ();
 }
--- gcc/testsuite/gcc.c-torture/compile/pr87647.c.jj2018-10-19 
13:30:28.797388068 +0200
+++ gcc/testsuite/gcc.c-torture/compile/pr87647.c   2018-10-19 
13:30:00.778853626 +0200
@@ -0,0 +1,15 @@
+/* PR middle-end/87647 */
+
+struct A {};
+struct A *const b = &(struct A) {};
+struct B { char *s; struct A *t; };
+void bar (struct B *);
+
+void
+foo (void)
+{
+  struct B a[] = { "", b, "", b, "", b, "", b, "", b, "", b, "", b, "", b,
+  "", b, "", b, "", b, "", b, "", b, "", b, "", b, "", b,
+  "", b };
+  bar (a);
+}

Jakub


[committed] Diagnose ordered construct without depend clauses in ordered(n) loop

2018-10-19 Thread Jakub Jelinek
Hi!

We ICE on the following invalid testcase, because we failed to diagnose if
ordered construct without depend clause binds to loop with ordered(n) clause
(i.e. doacross loop).

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk so far.

Not happy about the sink-3.c extra diagnostics, when I find time, I'll
change the depend sink clause parsing so that the clauses aren't removed,
but kept say with error_mark_node OMP_CLAUSE_DECL and can be removed only during
gimplification together with the whole construct.

2018-10-19  Jakub Jelinek  

PR middle-end/85488
PR middle-end/87649
* omp-low.c (check_omp_nesting_restrictions): Diagnose ordered without
depend closely nested inside of loop with ordered clause with
a parameter.

* c-c++-common/gomp/doacross-2.c: New test.
* c-c++-common/gomp/sink-3.c: Expect another error during error
recovery.

--- gcc/omp-low.c.jj2018-08-27 17:50:42.804505831 +0200
+++ gcc/omp-low.c   2018-10-19 11:43:46.924579912 +0200
@@ -2762,14 +2762,25 @@ check_omp_nesting_restrictions (gimple *
  case GIMPLE_OMP_FOR:
if (gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_TASKLOOP)
  goto ordered_in_taskloop;
-   if (omp_find_clause (gimple_omp_for_clauses (ctx->stmt),
-OMP_CLAUSE_ORDERED) == NULL)
+   tree o;
+   o = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),
+OMP_CLAUSE_ORDERED);
+   if (o == NULL)
  {
error_at (gimple_location (stmt),
  "% region must be closely nested inside "
  "a loop region with an % clause");
return false;
  }
+   if (OMP_CLAUSE_ORDERED_EXPR (o) != NULL_TREE
+   && omp_find_clause (c, OMP_CLAUSE_DEPEND) == NULL_TREE)
+ {
+   error_at (gimple_location (stmt),
+ "% region without % clause may "
+ "not be closely nested inside a loop region with "
+ "an % clause with a parameter");
+   return false;
+ }
return true;
  case GIMPLE_OMP_TARGET:
if (gimple_omp_target_kind (ctx->stmt)
--- gcc/testsuite/c-c++-common/gomp/doacross-2.c.jj 2018-10-19 
11:38:19.185057188 +0200
+++ gcc/testsuite/c-c++-common/gomp/doacross-2.c2018-10-19 
11:38:11.027193588 +0200
@@ -0,0 +1,49 @@
+/* PR middle-end/87649 */
+
+void
+foo (void)
+{
+  int i;
+  #pragma omp for ordered(1)
+  for (i = 0; i < 64; i++)
+{
+  #pragma omp ordered  /* { dg-error "'ordered' region 
without 'depend' clause may not be closely nested inside a loop region with an 
'ordered' clause with a parameter" } */
+  ;
+}
+  #pragma omp for ordered(1)
+  for (i = 0; i < 64; i++)
+{
+  #pragma omp ordered threads  /* { dg-error "'ordered' region 
without 'depend' clause may not be closely nested inside a loop region with an 
'ordered' clause with a parameter" } */
+  ;
+}
+}
+
+void
+bar (void)
+{
+  int i;
+  #pragma omp for ordered
+  for (i = 0; i < 64; i++)
+{
+  #pragma omp ordered depend(source)   /* { dg-error "'ordered' 
construct with 'depend' clause must be closely nested inside a loop with 
'ordered' clause with a parameter" } */
+  #pragma omp ordered depend(sink: i - 1)  /* { dg-error "'ordered' 
construct with 'depend' clause must be closely nested inside a loop with 
'ordered' clause with a parameter" } */
+}
+  #pragma omp for
+  for (i = 0; i < 64; i++)
+{
+  #pragma omp ordered depend(source)   /* { dg-error "'ordered' 
construct with 'depend' clause must be closely nested inside a loop with 
'ordered' clause with a parameter" } */
+  #pragma omp ordered depend(sink: i - 1)  /* { dg-error "'ordered' 
construct with 'depend' clause must be closely nested inside a loop with 
'ordered' clause with a parameter" } */
+}
+  #pragma omp for
+  for (i = 0; i < 64; i++)
+{
+  #pragma omp ordered  /* { dg-error "'ordered' region 
must be closely nested inside a loop region with an 'ordered' clause" } */
+  ;
+}
+  #pragma omp for
+  for (i = 0; i < 64; i++)
+{
+  #pragma omp ordered threads  /* { dg-error "'ordered' region 
must be closely nested inside a loop region with an 'ordered' clause" } */
+  ;
+}
+}
--- gcc/testsuite/c-c++-common/gomp/sink-3.c.jj 2015-10-13 20:57:40.712493500 
+0200
+++ gcc/testsuite/c-c++-common/gomp/sink-3.c2018-10-19 12:03:27.326879161 
+0200
@@ -14,7 +14,7 @@ foo ()
   for (i=0; i < 100; ++i)
 {
 #pragma omp ordered depend(sink:poo-1,paa+1) /* { dg-error 
"poo.*declared.*paa.*declared" } */
-bar(&i);
+bar(&i);/* { dg-error "may not be closely 
neste

Re: [RFC][PATCH LRA] WIP patch to fix one part of PR87507

2018-10-19 Thread Peter Bergner
On 10/19/18 4:16 PM, Peter Bergner wrote:
> Thoughts?  I'll note that this does not fix the S390 bugs, since those seem
> to be due to problems with early clobber operands and "matching" constraint
> operands.  I'm still working on that and hope to have something soon.
[snip]
>   * lra-constraints.c (process_alt_operands): Abort on illegal hard
>   register usage.  Prefer reloading non hard register operands.

I stand corrected.  Using this patch, plus Segher's combine patch, I am
able to bootstrap s390x-linux.  I'm running the test suite to see whether
that looks clean as well.  Maybe those s390 issues were related to combine
pushing hard regs into patterns and we just weren't handling them well?

Jeff, maybe once Segher commits his patch, can you give this patch a try
on your testers?

Peter



Avoid unnecessarily numbered clone symbols

2018-10-19 Thread Michael Ploujnikov
While working on
https://gcc.gnu.org/ml/gcc-patches/2018-09/msg00228.html I've
accumulated a few easy patches.

The first one renames the functions in question to hopefully encourage
proper future usage. The other ones use the unnumbered version of the
clone name function where I've verified the numbers are not
needed. I've verified these by doing a full bootstrap and a regression
test, by instrumenting the code and by understanding and following the
surrounding code to convince myself that the numbering is indeed not
needed. For the cold functions I've also confirmed with Sriraman
Tallam that they don't need to be numbered.



Regards,
- Michael
From 0bbcf3b8c20498f4d861e088ff7ab38e2a43800b Mon Sep 17 00:00:00 2001
From: Michael Ploujnikov 
Date: Tue, 7 Aug 2018 20:36:53 -0400
Subject: [PATCH 1/4] Rename clone_function_name_1 and clone_function_name to
 clarify usage.

gcc:
2018-10-19  Michael Ploujnikov  

   * gcc/cgraph.h: Rename clone_function_name_1 to
 numbered_clone_function_name_1. Rename clone_function_name to
 numbered_clone_function_name.
   * cgraphclones.c: Ditto.
   * config/rs6000/rs6000.c: Ditto.
   * lto/lto-partition.c: Ditto.
   * multiple_target.c: Ditto.
   * omp-expand.c: Ditto.
   * omp-low.c: Ditto.
   * omp-simd-clone.c: Ditto.
   * symtab.c: Ditto.
---
 gcc/cgraph.h   |  4 ++--
 gcc/cgraphclones.c | 20 +++-
 gcc/config/rs6000/rs6000.c |  2 +-
 gcc/lto/lto-partition.c|  4 ++--
 gcc/multiple_target.c  |  8 
 gcc/omp-expand.c   |  2 +-
 gcc/omp-low.c  |  4 ++--
 gcc/omp-simd-clone.c   |  2 +-
 gcc/symtab.c   |  2 +-
 9 files changed, 25 insertions(+), 23 deletions(-)

diff --git gcc/cgraph.h gcc/cgraph.h
index a8b1b4c..3583f7e 100644
--- gcc/cgraph.h
+++ gcc/cgraph.h
@@ -2368,8 +2368,8 @@ basic_block init_lowered_empty_function (tree, bool, profile_count);
 tree thunk_adjust (gimple_stmt_iterator *, tree, bool, HOST_WIDE_INT, tree);
 /* In cgraphclones.c  */
 
-tree clone_function_name_1 (const char *, const char *);
-tree clone_function_name (tree decl, const char *);
+tree numbered_clone_function_name_1 (const char *, const char *);
+tree numbered_clone_function_name (tree decl, const char *);
 
 void tree_function_versioning (tree, tree, vec *,
 			   bool, bitmap, bool, bitmap, basic_block);
diff --git gcc/cgraphclones.c gcc/cgraphclones.c
index 6e84a31..cdb183d 100644
--- gcc/cgraphclones.c
+++ gcc/cgraphclones.c
@@ -316,7 +316,7 @@ duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node)
   gcc_checking_assert (!DECL_RESULT (new_decl));
   gcc_checking_assert (!DECL_RTL_SET_P (new_decl));
 
-  DECL_NAME (new_decl) = clone_function_name (thunk->decl, "artificial_thunk");
+  DECL_NAME (new_decl) = numbered_clone_function_name (thunk->decl, "artificial_thunk");
   SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
 
   new_thunk = cgraph_node::create (new_decl);
@@ -514,11 +514,11 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count,
 
 static GTY(()) unsigned int clone_fn_id_num;
 
-/* Return a new assembler name for a clone with SUFFIX of a decl named
-   NAME.  */
+/* Return NAME appended with string SUFFIX and a unique unspecified
+   number.  */
 
 tree
-clone_function_name_1 (const char *name, const char *suffix)
+numbered_clone_function_name_1 (const char *name, const char *suffix)
 {
   size_t len = strlen (name);
   char *tmp_name, *prefix;
@@ -531,13 +531,15 @@ clone_function_name_1 (const char *name, const char *suffix)
   return get_identifier (tmp_name);
 }
 
-/* Return a new assembler name for a clone of DECL with SUFFIX.  */
+/* Return a new assembler name for a clone of DECL. Apart from the
+   string SUFFIX, the new name will end with a unique unspecified
+   number.  */
 
 tree
-clone_function_name (tree decl, const char *suffix)
+numbered_clone_function_name (tree decl, const char *suffix)
 {
   tree name = DECL_ASSEMBLER_NAME (decl);
-  return clone_function_name_1 (IDENTIFIER_POINTER (name), suffix);
+  return numbered_clone_function_name_1 (IDENTIFIER_POINTER (name), suffix);
 }
 
 
@@ -585,7 +587,7 @@ cgraph_node::create_virtual_clone (vec redirect_callers,
   strcpy (name + len + 1, suffix);
   name[len] = '.';
   DECL_NAME (new_decl) = get_identifier (name);
-  SET_DECL_ASSEMBLER_NAME (new_decl, clone_function_name (old_decl, suffix));
+  SET_DECL_ASSEMBLER_NAME (new_decl, numbered_clone_function_name (old_decl, suffix));
   SET_DECL_RTL (new_decl, NULL);
 
   new_node = create_clone (new_decl, count, false,
@@ -964,7 +966,7 @@ cgraph_node::create_version_clone_with_body
   = build_function_decl_skip_args (old_decl, args_to_skip, skip_return);
 
   /* Generate a new name for the new version. */
-  DECL_NAME (new_decl) = clone_function_name (old_decl, suffix);
+  DECL_NAME (new_decl) = numbered_clone_function_name (old_decl, suffix);
   SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAM

Re: [Patc, fortran] PR85603 - ICE with character array substring assignment

2018-10-19 Thread Dominique d'Humières
Reduced test

! { dg-do compile }
MODULE TN4
  IMPLICIT NONE
  PRIVATE
  INTEGER,PARAMETER::SH4=KIND('a')
  TYPE,PUBLIC::TOP
CHARACTER(:,KIND=SH4),ALLOCATABLE::ROR
CHARACTER(:,KIND=SH4),ALLOCATABLE::VI8
  CONTAINS
PROCEDURE,NON_OVERRIDABLE::SB=>TPX
  END TYPE TOP
CONTAINS
  SUBROUTINE TPX(TP6,PP4,BA3)
CLASS(TOP),INTENT(INOUT)::TP6
INTEGER,INTENT(IN)::PP4
TYPE(TOP),INTENT(OUT)::BA3
BA3%ROR=TP6%ROR(PP4:)
BA3%VI8=TP6%ROR(PP4:)
TP6%ROR=TP6%ROR(:PP4-1)
TP6%VI8=TP6%ROR(:PP4-1)
  END SUBROUTINE TPX
END MODULE TN4
! https://groups.google.com/forum/#!topic/comp.lang.fortran/nV3TlRlVKBc

TIA

Dominique

> Le 19 oct. 2018 à 23:39, Dominique d'Humières  a écrit :
> 
> Hi Paul,
> 
> I get a regression with your patch:
> 
> obfuscated_tn4.f90:300:0:
> 
>  300 | TP6%ROR=TP6%ROR(:PP4-1)
>  | 
> internal compiler error: in gfc_trans_deferred_vars, at 
> fortran/trans-decl.c:4754
> 
> 
> I’ll try to reduce the test.
> 
> Dominique
> 



[PATCH] Skip tests for GNU extensions when testing with strict mode

2018-10-19 Thread Jonathan Wakely

Tests for the implicit allocator rebinding extension will fail if the
extension is disabled, so skip them.

* testsuite/23_containers/array/requirements/explicit_instantiation/
3.cc: Skip test when compiled with a -std=c++NN strict mode.
* testsuite/23_containers/deque/requirements/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/forward_list/requirements/
explicit_instantiation/3.cc: Likewise.
* testsuite/23_containers/list/requirements/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/map/requirements/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/multimap/requirements/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/multiset/requirements/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/set/requirements/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/unordered_map/requirements/
explicit_instantiation/3.cc: Likewise.
* testsuite/23_containers/unordered_multimap/requirements/
explicit_instantiation/3.cc: Likewise.
* testsuite/23_containers/unordered_multiset/requirements/
explicit_instantiation/3.cc: Likewise.
* testsuite/23_containers/unordered_set/requirements/
explicit_instantiation/3.cc: Likewise.
* testsuite/23_containers/vector/ext_pointer/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/vector/requirements/explicit_instantiation/
3.cc: Likewise.
  
Tested powerpc64le-linux, committed to trunk.


commit 2bcdb54ddf73aebf4935fde4ee9e0eabbdd18d83
Author: Jonathan Wakely 
Date:   Fri Oct 19 22:23:00 2018 +0100

Skip tests for GNU extensions when testing with strict mode

Tests for the implicit allocator rebinding extension will fail if the
extension is disabled, so skip them.

* testsuite/23_containers/array/requirements/explicit_instantiation/
3.cc: Skip test when compiled with a -std=c++NN strict mode.
* testsuite/23_containers/deque/requirements/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/forward_list/requirements/
explicit_instantiation/3.cc: Likewise.
* testsuite/23_containers/list/requirements/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/map/requirements/explicit_instantiation/
3.cc: Likewise.
* 
testsuite/23_containers/multimap/requirements/explicit_instantiation/
3.cc: Likewise.
* 
testsuite/23_containers/multiset/requirements/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/set/requirements/explicit_instantiation/
3.cc: Likewise.
* testsuite/23_containers/unordered_map/requirements/
explicit_instantiation/3.cc: Likewise.
* testsuite/23_containers/unordered_multimap/requirements/
explicit_instantiation/3.cc: Likewise.
* testsuite/23_containers/unordered_multiset/requirements/
explicit_instantiation/3.cc: Likewise.
* testsuite/23_containers/unordered_set/requirements/
explicit_instantiation/3.cc: Likewise.
* testsuite/23_containers/vector/ext_pointer/explicit_instantiation/
3.cc: Likewise.
* 
testsuite/23_containers/vector/requirements/explicit_instantiation/
3.cc: Likewise.

diff --git 
a/libstdc++-v3/testsuite/23_containers/array/requirements/explicit_instantiation/3.cc
 
b/libstdc++-v3/testsuite/23_containers/array/requirements/explicit_instantiation/3.cc
index 56c6492c176..000281c6c56 100644
--- 
a/libstdc++-v3/testsuite/23_containers/array/requirements/explicit_instantiation/3.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/array/requirements/explicit_instantiation/3.cc
@@ -1,4 +1,5 @@
 // { dg-do compile { target c++11 } }
+// { dg-skip-if "no extensions in strict dialects" { *-*-* } { "-std=c++*" } }
 
 // 2010-05-20  Paolo Carlini  
 //
@@ -20,6 +21,7 @@
 // .
 
 // This file tests explicit instantiation of library containers
+// with an allocator for a different value_type (which is a GNU extension).
 
 #include 
 #include 
diff --git 
a/libstdc++-v3/testsuite/23_containers/deque/requirements/explicit_instantiation/3.cc
 
b/libstdc++-v3/testsuite/23_containers/deque/requirements/explicit_instantiation/3.cc
index ced9c45d0cf..22aa6cf836f 100644
--- 
a/libstdc++-v3/testsuite/23_containers/deque/requirements/explicit_instantiation/3.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/deque/requirements/explicit_instantiation/3.cc
@@ -15,12 +15,13 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-
 // This file tests explicit instantiation of library containers
+// wit

[PATCH] Fix testsuite failures due to extra errors in strict dialects

2018-10-19 Thread Jonathan Wakely

When __STRICT_ANSI__ is defined the incorrect allocators used in these
tests also trigger and additional static assertion. Prune those extra
errors so that the tests don't fail when built with strict dialects.

* testsuite/23_containers/deque/48101_neg.cc: Prune additional errors
printed when __STRICT_ANSI__ is defined.
* testsuite/23_containers/forward_list/48101_neg.cc: Likewise.
* testsuite/23_containers/list/48101_neg.cc: Likewise.
* testsuite/23_containers/multiset/48101_neg.cc: Likewise.
* testsuite/23_containers/set/48101_neg.cc: Likewise.
* testsuite/23_containers/unordered_multiset/48101_neg.cc: Likewise.
* testsuite/23_containers/unordered_set/48101_neg.cc: Likewise.
* testsuite/23_containers/vector/48101_neg.cc: Likewise.
  
Tested powerpc64le-linux, committed to trunk.


commit 5d8820b07f70730a263165592bba567fa6ee5e99
Author: Jonathan Wakely 
Date:   Fri Oct 19 22:20:16 2018 +0100

Fix testsuite failures due to extra errors in strict dialects

When __STRICT_ANSI__ is defined the incorrect allocators used in these
tests also trigger and additional static assertion. Prune those extra
errors so that the tests don't fail when built with strict dialects.

* testsuite/23_containers/deque/48101_neg.cc: Prune additional 
errors
printed when __STRICT_ANSI__ is defined.
* testsuite/23_containers/forward_list/48101_neg.cc: Likewise.
* testsuite/23_containers/list/48101_neg.cc: Likewise.
* testsuite/23_containers/multiset/48101_neg.cc: Likewise.
* testsuite/23_containers/set/48101_neg.cc: Likewise.
* testsuite/23_containers/unordered_multiset/48101_neg.cc: Likewise.
* testsuite/23_containers/unordered_set/48101_neg.cc: Likewise.
* testsuite/23_containers/vector/48101_neg.cc: Likewise.

diff --git a/libstdc++-v3/testsuite/23_containers/deque/48101_neg.cc 
b/libstdc++-v3/testsuite/23_containers/deque/48101_neg.cc
index cee0c9fde47..1f9e3e3b932 100644
--- a/libstdc++-v3/testsuite/23_containers/deque/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/deque/48101_neg.cc
@@ -27,3 +27,4 @@ test01()
 
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
 // { dg-prune-output "std::allocator<.* has no member named " }
+// { dg-prune-output "must have the same value_type as its allocator" }
diff --git a/libstdc++-v3/testsuite/23_containers/forward_list/48101_neg.cc 
b/libstdc++-v3/testsuite/23_containers/forward_list/48101_neg.cc
index fea18df8d42..46163d17a95 100644
--- a/libstdc++-v3/testsuite/23_containers/forward_list/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/forward_list/48101_neg.cc
@@ -27,3 +27,4 @@ test01()
 
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
 // { dg-prune-output "std::allocator<.* has no member named " }
+// { dg-prune-output "must have the same value_type as its allocator" }
diff --git a/libstdc++-v3/testsuite/23_containers/list/48101_neg.cc 
b/libstdc++-v3/testsuite/23_containers/list/48101_neg.cc
index 478903a92c5..45848e50809 100644
--- a/libstdc++-v3/testsuite/23_containers/list/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/list/48101_neg.cc
@@ -27,3 +27,4 @@ test01()
 
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
 // { dg-prune-output "std::allocator<.* has no member named " }
+// { dg-prune-output "must have the same value_type as its allocator" }
diff --git a/libstdc++-v3/testsuite/23_containers/multiset/48101_neg.cc 
b/libstdc++-v3/testsuite/23_containers/multiset/48101_neg.cc
index b815f86219b..3b607f985ad 100644
--- a/libstdc++-v3/testsuite/23_containers/multiset/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/multiset/48101_neg.cc
@@ -29,3 +29,4 @@ test01()
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
 // { dg-error "comparison object must be invocable" "" { target *-*-* } 0 }
 // { dg-prune-output "std::allocator<.* has no member named " }
+// { dg-prune-output "must have the same value_type as its allocator" }
diff --git a/libstdc++-v3/testsuite/23_containers/set/48101_neg.cc 
b/libstdc++-v3/testsuite/23_containers/set/48101_neg.cc
index 9cd728d9bc6..bf0f4210594 100644
--- a/libstdc++-v3/testsuite/23_containers/set/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/set/48101_neg.cc
@@ -29,3 +29,4 @@ test01()
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
 // { dg-error "comparison object must be invocable" "" { target *-*-* } 0 }
 // { dg-prune-output "std::allocator<.* has no member named " }
+// { dg-prune-output "must have the same value_type as its allocator" }
diff --git 
a/libstdc++-v3/testsuite/23_containers/unordered_multiset/48101_neg.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_multiset/48101_neg.cc
index a06c302ad6b..fb239501711 100644
--- a/libstdc++-v3/testsuite/23_contain

[PATCH] Conditionally disable tests of non-standard extensions

2018-10-19 Thread Jonathan Wakely

These tests include uses of the extension to allow allocators with the
wrong value_type in containers. Skip those parts of the tests when
__STRICT_ANIS__ is defined.

* testsuite/23_containers/forward_list/requirements/
explicit_instantiation/5.cc [__STRICT_ANSI__]: Don't test non-standard
extension.
* testsuite/23_containers/list/requirements/explicit_instantiation/
5.cc [__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/map/requirements/explicit_instantiation/5.cc
[__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/multimap/requirements/explicit_instantiation/
5.cc [__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/multiset/requirements/explicit_instantiation/
5.cc [__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/set/requirements/explicit_instantiation/5.cc
[__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/unordered_map/requirements/debug_container.cc
[__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/unordered_map/requirements/
explicit_instantiation/5.cc [__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/unordered_multimap/requirements/
explicit_instantiation/5.cc [__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/unordered_multiset/requirements/
explicit_instantiation/5.cc [__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/unordered_set/requirements/
explicit_instantiation/5.cc [__STRICT_ANSI__]: Likewise.
  
Tested powerpc64le-linux, committed to trunk.


commit e6703d881a2ec1cf8463d7a7864b2382b4cb4432
Author: Jonathan Wakely 
Date:   Fri Oct 19 22:15:20 2018 +0100

Conditionally disable tests of non-standard extensions

These tests include uses of the extension to allow allocators with the
wrong value_type in containers. Skip those parts of the tests when
__STRICT_ANIS__ is defined.

* testsuite/23_containers/forward_list/requirements/
explicit_instantiation/5.cc [__STRICT_ANSI__]: Don't test 
non-standard
extension.
* testsuite/23_containers/list/requirements/explicit_instantiation/
5.cc [__STRICT_ANSI__]: Likewise.
* 
testsuite/23_containers/map/requirements/explicit_instantiation/5.cc
[__STRICT_ANSI__]: Likewise.
* 
testsuite/23_containers/multimap/requirements/explicit_instantiation/
5.cc [__STRICT_ANSI__]: Likewise.
* 
testsuite/23_containers/multiset/requirements/explicit_instantiation/
5.cc [__STRICT_ANSI__]: Likewise.
* 
testsuite/23_containers/set/requirements/explicit_instantiation/5.cc
[__STRICT_ANSI__]: Likewise.
* 
testsuite/23_containers/unordered_map/requirements/debug_container.cc
[__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/unordered_map/requirements/
explicit_instantiation/5.cc [__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/unordered_multimap/requirements/
explicit_instantiation/5.cc [__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/unordered_multiset/requirements/
explicit_instantiation/5.cc [__STRICT_ANSI__]: Likewise.
* testsuite/23_containers/unordered_set/requirements/
explicit_instantiation/5.cc [__STRICT_ANSI__]: Likewise.

diff --git 
a/libstdc++-v3/testsuite/23_containers/forward_list/requirements/explicit_instantiation/5.cc
 
b/libstdc++-v3/testsuite/23_containers/forward_list/requirements/explicit_instantiation/5.cc
index 2060ac7db6d..beb2da4f3ac 100644
--- 
a/libstdc++-v3/testsuite/23_containers/forward_list/requirements/explicit_instantiation/5.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/forward_list/requirements/explicit_instantiation/5.cc
@@ -25,4 +25,6 @@
 
 // libstdc++/50118
 template class std::forward_list>;
+#ifndef __STRICT_ANSI__
 template class std::forward_list>;
+#endif
diff --git 
a/libstdc++-v3/testsuite/23_containers/list/requirements/explicit_instantiation/5.cc
 
b/libstdc++-v3/testsuite/23_containers/list/requirements/explicit_instantiation/5.cc
index c34afd0d42f..36d0ac33f13 100644
--- 
a/libstdc++-v3/testsuite/23_containers/list/requirements/explicit_instantiation/5.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/list/requirements/explicit_instantiation/5.cc
@@ -24,4 +24,6 @@
 
 // libstdc++/50118
 template class std::list >;
+#ifndef __STRICT_ANSI__
 template class std::list >;
+#endif
diff --git 
a/libstdc++-v3/testsuite/23_containers/map/requirements/explicit_instantiation/5.cc
 
b/libstdc++-v3/testsuite/23_containers/map/requirements/explicit_instantiation/5.cc
index 9dde5bca362..2e86fb03156 100644
--- 
a/libstdc++-v3/testsuite/23_containers/map/requirements/explicit_instantiation/5.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/map/requirements/explicit_instantiation/5.cc
@@ -22,8 +22,12 @@
 
 // { dg-d

[PATCH] Fix tests that use allocators with incorrect value types

2018-10-19 Thread Jonathan Wakely

As a GNU extension we allow containers to be instantiated with
allocators that use a different value type from the container, and
automatically rebind the allocator to the correct type. This extension
is disabled in strict modes (when __STRICT_ANSI__ is defined, i.e.
-std=c++NN dialects). These testcases unintentionally rely on the
extension and so fail for strict modes.

Tests which intentionally make use of the extension will still fail in
strict dialects, but will be addressed in a later change.

* testsuite/20_util/scoped_allocator/1.cc: Use allocator with correct
value type for the container.
* testsuite/23_containers/forward_list/cons/14.cc: Likewise.
* testsuite/23_containers/map/56613.cc: Likewise.
* testsuite/23_containers/unordered_map/55043.cc: Likewise.
* testsuite/23_containers/unordered_map/allocator/copy.cc: Likewise.
* testsuite/23_containers/unordered_map/allocator/copy_assign.cc:
Likewise.
* testsuite/23_containers/unordered_map/allocator/minimal.cc:
Likewise.
* testsuite/23_containers/unordered_map/allocator/move.cc: Likewise.
* testsuite/23_containers/unordered_map/allocator/move_assign.cc:
Likewise.
* testsuite/23_containers/unordered_map/allocator/noexcept.cc:
Likewise.
* testsuite/23_containers/unordered_map/cons/81891.cc: Likewise.
* testsuite/23_containers/unordered_map/requirements/exception/
basic.cc: Likewise.
* testsuite/23_containers/unordered_map/requirements/exception/
generation_prohibited.cc: Likewise.
* testsuite/23_containers/unordered_map/requirements/exception/
propagation_consistent.cc: Likewise.
* testsuite/23_containers/unordered_multimap/55043.cc: Likewise.
* testsuite/23_containers/unordered_multimap/allocator/copy.cc:
Likewise.
* testsuite/23_containers/unordered_multimap/allocator/copy_assign.cc:
Likewise.
* testsuite/23_containers/unordered_multimap/allocator/minimal.cc:
Likewise.
* testsuite/23_containers/unordered_multimap/allocator/move.cc:
Likewise.
* testsuite/23_containers/unordered_multimap/allocator/move_assign.cc:
Likewise.
* testsuite/23_containers/unordered_multimap/allocator/noexcept.cc:
Likewise.
* testsuite/23_containers/unordered_multimap/requirements/exception/
basic.cc: Likewise.
* testsuite/23_containers/unordered_multimap/requirements/exception/
generation_prohibited.cc: Likewise.
* testsuite/23_containers/unordered_multimap/requirements/exception/
propagation_consistent.cc: Likewise.
* testsuite/23_containers/unordered_multimap/requirements/
explicit_instantiation/5.cc: Likewise.
* testsuite/ext/malloc_allocator/sanity.cc: Likewise.
  
Tested powerpc64le-linux, committed to trunk.


commit fe9420347c0f639ff056cee6abbbe69191e55a2c
Author: Jonathan Wakely 
Date:   Fri Oct 19 22:06:26 2018 +0100

Fix tests that use allocators with incorrect value types

As a GNU extension we allow containers to be instantiated with
allocators that use a different value type from the container, and
automatically rebind the allocator to the correct type. This extension
is disabled in strict modes (when __STRICT_ANSI__ is defined, i.e.
-std=c++NN dialects). These testcases unintentionally rely on the
extension and so fail for strict modes.

Tests which intentionally make use of the extension will still fail in
strict dialects, but will be addressed in a later change.

* testsuite/20_util/scoped_allocator/1.cc: Use allocator with 
correct
value type for the container.
* testsuite/23_containers/forward_list/cons/14.cc: Likewise.
* testsuite/23_containers/map/56613.cc: Likewise.
* testsuite/23_containers/unordered_map/55043.cc: Likewise.
* testsuite/23_containers/unordered_map/allocator/copy.cc: Likewise.
* testsuite/23_containers/unordered_map/allocator/copy_assign.cc:
Likewise.
* testsuite/23_containers/unordered_map/allocator/minimal.cc:
Likewise.
* testsuite/23_containers/unordered_map/allocator/move.cc: Likewise.
* testsuite/23_containers/unordered_map/allocator/move_assign.cc:
Likewise.
* testsuite/23_containers/unordered_map/allocator/noexcept.cc:
Likewise.
* testsuite/23_containers/unordered_map/cons/81891.cc: Likewise.
* testsuite/23_containers/unordered_map/requirements/exception/
basic.cc: Likewise.
* testsuite/23_containers/unordered_map/requirements/exception/
generation_prohibited.cc: Likewise.
* testsuite/23_containers/unordered_map/requirements/exception/
propagation_consistent.cc: Likewise.
   

[PATCH] Disable tests that only pass for GNU dialects

2018-10-19 Thread Jonathan Wakely

The airy and hypergeometric functions are non-standard extensions and
are only defined for -std=gnu++NN dialects, not -std=c++NN ones.

* ext/special_functions/airy_ai/check_nan.cc: Skip test for
non-standard extension when a strict -std=c++NN dialect is used.
* ext/special_functions/airy_ai/check_value.cc: Likewise.
* ext/special_functions/airy_ai/compile.cc: Likewise.
* ext/special_functions/airy_bi/check_nan.cc: Likewise.
* ext/special_functions/airy_bi/check_value.cc: Likewise.
* ext/special_functions/airy_bi/compile.cc: Likewise.
* ext/special_functions/conf_hyperg/check_nan.cc: Likewise.
* ext/special_functions/conf_hyperg/check_value.cc: Likewise.
* ext/special_functions/conf_hyperg/compile.cc: Likewise.
* ext/special_functions/hyperg/check_nan.cc: Likewise.
* ext/special_functions/hyperg/check_value.cc: Likewise.
* ext/special_functions/hyperg/compile.cc: Likewise.
  
Tested powerpc64le-linux, committed to trunk.


commit af8f4eee0b824de168fde8b8d7423360fdc8f6ae
Author: Jonathan Wakely 
Date:   Fri Oct 19 21:23:56 2018 +0100

Disable tests that only pass for GNU dialects

The airy and hypergeometric functions are non-standard extensions and
are only defined for -std=gnu++NN dialects, not -std=c++NN ones.

* ext/special_functions/airy_ai/check_nan.cc: Skip test for
non-standard extension when a strict -std=c++NN dialect is used.
* ext/special_functions/airy_ai/check_value.cc: Likewise.
* ext/special_functions/airy_ai/compile.cc: Likewise.
* ext/special_functions/airy_bi/check_nan.cc: Likewise.
* ext/special_functions/airy_bi/check_value.cc: Likewise.
* ext/special_functions/airy_bi/compile.cc: Likewise.
* ext/special_functions/conf_hyperg/check_nan.cc: Likewise.
* ext/special_functions/conf_hyperg/check_value.cc: Likewise.
* ext/special_functions/conf_hyperg/compile.cc: Likewise.
* ext/special_functions/hyperg/check_nan.cc: Likewise.
* ext/special_functions/hyperg/check_value.cc: Likewise.
* ext/special_functions/hyperg/compile.cc: Likewise.

diff --git a/libstdc++-v3/testsuite/ext/special_functions/airy_ai/check_nan.cc 
b/libstdc++-v3/testsuite/ext/special_functions/airy_ai/check_nan.cc
index c5473a35aea..563bf891b8d 100644
--- a/libstdc++-v3/testsuite/ext/special_functions/airy_ai/check_nan.cc
+++ b/libstdc++-v3/testsuite/ext/special_functions/airy_ai/check_nan.cc
@@ -2,6 +2,7 @@
 // { dg-require-c-std "" }
 // { dg-options "-D__STDCPP_WANT_MATH_SPEC_FUNCS__" }
 // { dg-add-options ieee }
+// { dg-skip-if "no extensions in strict dialects" { *-*-* } { "-std=c++*" } }
 
 // Copyright (C) 2016-2018 Free Software Foundation, Inc.
 //
diff --git 
a/libstdc++-v3/testsuite/ext/special_functions/airy_ai/check_value.cc 
b/libstdc++-v3/testsuite/ext/special_functions/airy_ai/check_value.cc
index 8c4c5362b86..e92267bfffa 100644
--- a/libstdc++-v3/testsuite/ext/special_functions/airy_ai/check_value.cc
+++ b/libstdc++-v3/testsuite/ext/special_functions/airy_ai/check_value.cc
@@ -1,5 +1,6 @@
 // { dg-do run { target c++11 } }
 // { dg-options "-D__STDCPP_WANT_MATH_SPEC_FUNCS__" }
+// { dg-skip-if "no extensions in strict dialects" { *-*-* } { "-std=c++*" } }
 //
 // Copyright (C) 2016-2018 Free Software Foundation, Inc.
 //
diff --git a/libstdc++-v3/testsuite/ext/special_functions/airy_ai/compile.cc 
b/libstdc++-v3/testsuite/ext/special_functions/airy_ai/compile.cc
index a4d5006dafc..581701a6bae 100644
--- a/libstdc++-v3/testsuite/ext/special_functions/airy_ai/compile.cc
+++ b/libstdc++-v3/testsuite/ext/special_functions/airy_ai/compile.cc
@@ -1,5 +1,6 @@
 // { dg-do compile { target c++11 } }
 // { dg-options "-D__STDCPP_WANT_MATH_SPEC_FUNCS__" }
+// { dg-skip-if "no extensions in strict dialects" { *-*-* } { "-std=c++*" } }
 
 // Copyright (C) 2016-2018 Free Software Foundation, Inc.
 //
diff --git a/libstdc++-v3/testsuite/ext/special_functions/airy_bi/check_nan.cc 
b/libstdc++-v3/testsuite/ext/special_functions/airy_bi/check_nan.cc
index 4b852882b0b..07649603356 100644
--- a/libstdc++-v3/testsuite/ext/special_functions/airy_bi/check_nan.cc
+++ b/libstdc++-v3/testsuite/ext/special_functions/airy_bi/check_nan.cc
@@ -2,6 +2,7 @@
 // { dg-require-c-std "" }
 // { dg-options "-D__STDCPP_WANT_MATH_SPEC_FUNCS__" }
 // { dg-add-options ieee }
+// { dg-skip-if "no extensions in strict dialects" { *-*-* } { "-std=c++*" } }
 
 // Copyright (C) 2016-2018 Free Software Foundation, Inc.
 //
diff --git 
a/libstdc++-v3/testsuite/ext/special_functions/airy_bi/check_value.cc 
b/libstdc++-v3/testsuite/ext/special_functions/airy_bi/check_value.cc
index d830fbf8aa5..d10c6c5f15c 100644
--- a/libstdc++-v3/testsuite/ext/special_functions/airy_bi/check_value.cc
+++ b/libstdc++-v3/testsuite/ext/special_functions/airy_bi/check_value.cc
@@ -1,5 +1,6 @@
 /

[PATCH] Remove duplicate tests

2018-10-19 Thread Jonathan Wakely

These tests originally existed to check the containers in C++11 mode,
when the default was C++98 mode. Now that the default is C++14 (and we
run most tests for all modes) it serves no purpose to have two copies of
the tests when neither is explicitly using -std=gnu++98 anyway.

* testsuite/23_containers/list/requirements/explicit_instantiation/
5_c++0x.cc: Remove redundant test that is functionally identical to
the 5.cc test.
* testsuite/23_containers/map/requirements/explicit_instantiation/
5_c++0x.cc: Likewise.
* testsuite/23_containers/multimap/requirements/explicit_instantiation/
5_c++0x.cc: Likewise.
* testsuite/23_containers/multiset/requirements/explicit_instantiation/
5_c++0x.cc: Likewise.
* testsuite/23_containers/set/requirements/explicit_instantiation/
5_c++0x.cc: Likewise.

Tested powerpc64le-linux, committed to trunk.

commit db34c4bc1eabb9eadf9f742d158b80e006be7f5c
Author: Jonathan Wakely 
Date:   Fri Oct 19 20:40:01 2018 +0100

Remove duplicate tests

These tests originally existed to check the containers in C++11 mode,
when the default was C++98 mode. Now that the default is C++14 (and we
run most tests for all modes) it serves no purpose to have two copies of
the tests when neither is explicitly using -std=gnu++98 anyway.

* testsuite/23_containers/list/requirements/explicit_instantiation/
5_c++0x.cc: Remove redundant test that is functionally identical to
the 5.cc test.
* testsuite/23_containers/map/requirements/explicit_instantiation/
5_c++0x.cc: Likewise.
* 
testsuite/23_containers/multimap/requirements/explicit_instantiation/
5_c++0x.cc: Likewise.
* 
testsuite/23_containers/multiset/requirements/explicit_instantiation/
5_c++0x.cc: Likewise.
* testsuite/23_containers/set/requirements/explicit_instantiation/
5_c++0x.cc: Likewise.

diff --git 
a/libstdc++-v3/testsuite/23_containers/list/requirements/explicit_instantiation/5_c++0x.cc
 
b/libstdc++-v3/testsuite/23_containers/list/requirements/explicit_instantiation/5_c++0x.cc
deleted file mode 100644
index e93a8c75707..000
--- 
a/libstdc++-v3/testsuite/23_containers/list/requirements/explicit_instantiation/5_c++0x.cc
+++ /dev/null
@@ -1,28 +0,0 @@
-
-// Copyright (C) 2011-2018 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library.  This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-
-// You should have received a copy of the GNU General Public License along
-// with this library; see the file COPYING3.  If not see
-// .
-
-// This file tests explicit instantiation of library containers
-
-#include 
-#include 
-
-// { dg-do compile { target c++11 } }
-
-// libstdc++/50118
-template class std::list>;
-template class std::list>;
diff --git 
a/libstdc++-v3/testsuite/23_containers/map/requirements/explicit_instantiation/5_c++0x.cc
 
b/libstdc++-v3/testsuite/23_containers/map/requirements/explicit_instantiation/5_c++0x.cc
deleted file mode 100644
index 88bd861f631..000
--- 
a/libstdc++-v3/testsuite/23_containers/map/requirements/explicit_instantiation/5_c++0x.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-
-// Copyright (C) 2011-2018 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library.  This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-
-// You should have received a copy of the GNU General Public License along
-// with this library; see the file COPYING3.  If not see
-// .
-
-// This file tests explicit instantiation of library containers
-
-#include 
-#include 
-
-// { dg-do compile { target c++11 } }
-
-// libstdc++/50118
-template class std::map,
-   __gnu_test::ExplicitConsAlloc>;
-template class std::map,
-   __gnu_test::ExplicitConsAlloc>;
diff --git 
a/libstdc++-v3/testsuite/23_containers/multimap/requirements/explicit_instantiation/5_c++0x.cc
 
b/libstdc++-v3/t

Re: [Patc, fortran] PR85603 - ICE with character array substring assignment

2018-10-19 Thread Dominique d'Humières
Hi Paul,

I get a regression with your patch:

obfuscated_tn4.f90:300:0:

  300 | TP6%ROR=TP6%ROR(:PP4-1)
  | 
internal compiler error: in gfc_trans_deferred_vars, at 
fortran/trans-decl.c:4754


I’ll try to reduce the test.

Dominique



[PATCH, rs6000] Use unaligned vector types for some pointer casts

2018-10-19 Thread Bill Schmidt
Hi,

The x86 intrinsic compatibility headers contain a couple of instances of
undefined behavior where a cast to an aligned type is used when that
alignment is not guaranteed by the expression to be cast from.  This
patch fixes that problem by replacing the aligned types with unaligned
versions of the same type.

Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.
Is this okay for trunk?

(I also cleaned up a badly formatted comment in the neighborhood.)

Thanks,
Bill


2018-10-19  Bill Schmidt  
Jinsong Ji  

* config/rs6000/emmintrin.h (_MM_SHUFFLE2): Comment cleanup.
(_mm_store_pd): Use unaligned vector type for pointer cast.
(_mm_maskmoveu_si128): Likewise.
* config/rs6000/xmmintrin.h (__m128_u): New typedef.
(_mm_store_ps): Use unaligned vector type for pointer cast.


Index: gcc/config/rs6000/emmintrin.h
===
--- gcc/config/rs6000/emmintrin.h   (revision 265318)
+++ gcc/config/rs6000/emmintrin.h   (working copy)
@@ -85,7 +85,7 @@ typedef double __m128d __attribute__ ((__vector_si
 typedef long long __m128i_u __attribute__ ((__vector_size__ (16), 
__may_alias__, __aligned__ (1)));
 typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, 
__aligned__ (1)));
 
-/* Define two value permute mask */
+/* Define two value permute mask.  */
 #define _MM_SHUFFLE2(x,y) (((x) << 1) | (y))
 
 /* Create a vector with element 0 as F and the rest zero.  */
@@ -201,7 +201,7 @@ _mm_store_pd (double *__P, __m128d __A)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_storeu_pd (double *__P, __m128d __A)
 {
-  *(__m128d *)__P = __A;
+  *(__m128d_u *)__P = __A;
 }
 
 /* Stores the lower DPFP value.  */
@@ -2175,7 +2175,7 @@ _mm_maskmoveu_si128 (__m128i __A, __m128i __B, cha
 {
   __v2du hibit = { 0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL};
   __v16qu mask, tmp;
-  __m128i *p = (__m128i*)__C;
+  __m128i_u *p = (__m128i_u*)__C;
 
   tmp = (__v16qu)_mm_loadu_si128(p);
   mask = (__v16qu)vec_cmpgt ((__v16qu)__B, (__v16qu)hibit);
Index: gcc/config/rs6000/xmmintrin.h
===
--- gcc/config/rs6000/xmmintrin.h   (revision 265318)
+++ gcc/config/rs6000/xmmintrin.h   (working copy)
@@ -85,6 +85,9 @@
vector types, and their scalar components.  */
 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
 
+/* Unaligned version of the same type.  */
+typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__));
+
 /* Internal data types for implementing the intrinsics.  */
 typedef float __v4sf __attribute__ ((__vector_size__ (16)));
 
@@ -172,7 +175,7 @@ _mm_store_ps (float *__P, __m128 __A)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_storeu_ps (float *__P, __m128 __A)
 {
-  *(__m128 *)__P = __A;
+  *(__m128_u *)__P = __A;
 }
 
 /* Store four SPFP values in reverse order.  The address must be aligned.  */



[PATCH, rs6000] Don't use __vector __m64 for compatibility reasons

2018-10-19 Thread Bill Schmidt
Hi,

For historical reasons, there are different interpretations of whether a
type "__vector " is allowed when  is a typedef.  For maximum
compatibility between compilers, this patch removes some such cases from
the x86 intrinsic compatibility headers.

Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.
Is this okay for trunk?

Thanks!
Bill


2018-10-19  Bill Schmidt  
Jinsong Ji  

* config/rs6000/emmintrin.h (_mm_movemask_pd): Replace __vector
__m64 with __vector unsigned long long for compatibility.
(_mm_movemask_epi8): Likewise.
* config/rs6000/xmmintrin.h (_mm_cvtps_pi32): Likewise.
(_mm_cvttps_pi32): Likewise.
(_mm_cvtpi32_ps): Likewise.
(_mm_cvtps_pi16): Likewise.
(_mm_loadh_pi): Likewise.
(_mm_storeh_pi): Likewise.
(_mm_movehl_ps): Likewise.
(_mm_movelh_ps): Likewise.
(_mm_loadl_pi): Likewise.
(_mm_storel_pi): Likewise.
(_mm_movemask_ps): Likewise.
(_mm_shuffle_pi16): Likewise.


Index: gcc/config/rs6000/emmintrin.h
===
--- gcc/config/rs6000/emmintrin.h   (revision 265318)
+++ gcc/config/rs6000/emmintrin.h   (working copy)
@@ -1228,7 +1228,7 @@ _mm_loadl_pd (__m128d __A, double const *__B)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_movemask_pd (__m128d  __A)
 {
-  __vector __m64 result;
+  __vector unsigned long long result;
   static const __vector unsigned int perm_mask =
 {
 #ifdef __LITTLE_ENDIAN__
@@ -1238,8 +1238,9 @@ _mm_movemask_pd (__m128d  __A)
 #endif
 };
 
-  result = (__vector __m64) vec_vbpermq ((__vector unsigned char) __A,
-(__vector unsigned char) perm_mask);
+  result = ((__vector unsigned long long)
+   vec_vbpermq ((__vector unsigned char) __A,
+(__vector unsigned char) perm_mask));
 
 #ifdef __LITTLE_ENDIAN__
   return result[1];
@@ -2012,7 +2013,7 @@ _mm_min_epu8 (__m128i __A, __m128i __B)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_movemask_epi8 (__m128i __A)
 {
-  __vector __m64 result;
+  __vector unsigned long long result;
   static const __vector unsigned char perm_mask =
 {
 #ifdef __LITTLE_ENDIAN__
@@ -2024,8 +2025,9 @@ _mm_movemask_epi8 (__m128i __A)
 #endif
 };
 
-  result = (__vector __m64) vec_vbpermq ((__vector unsigned char) __A,
-(__vector unsigned char) perm_mask);
+  result = ((__vector unsigned long long)
+   vec_vbpermq ((__vector unsigned char) __A,
+(__vector unsigned char) perm_mask));
 
 #ifdef __LITTLE_ENDIAN__
   return result[1];
Index: gcc/config/rs6000/xmmintrin.h
===
--- gcc/config/rs6000/xmmintrin.h   (revision 265318)
+++ gcc/config/rs6000/xmmintrin.h   (working copy)
@@ -985,12 +985,12 @@ _mm_cvtps_pi32 (__m128 __A)
 {
   /* Splat two lower SPFP values to both halves.  */
   __v4sf temp, rounded;
-  __vector __m64 result;
+  __vector unsigned long long result;
 
   /* Splat two lower SPFP values to both halves.  */
   temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
   rounded = vec_rint(temp);
-  result = (__vector __m64) vec_cts (rounded, 0);
+  result = (__vector unsigned long long) vec_cts (rounded, 0);
 
   return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 
0));
 }
@@ -1043,11 +1043,11 @@ extern __inline __m64 __attribute__((__gnu_inline_
 _mm_cvttps_pi32 (__m128 __A)
 {
   __v4sf temp;
-  __vector __m64 result;
+  __vector unsigned long long result;
 
   /* Splat two lower SPFP values to both halves.  */
   temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
-  result = (__vector __m64) vec_cts (temp, 0);
+  result = (__vector unsigned long long) vec_cts (temp, 0);
 
   return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 
0));
 }
@@ -1103,8 +1103,9 @@ _mm_cvtpi32_ps (__m128__A, __m64__
   vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B);
   vf1 = (__vector float) vec_ctf (vm1, 0);
 
-  return ((__m128) (__vector __m64)
-{ ((__vector __m64)vf1) [0], ((__vector __m64)__A) [1]});
+  return ((__m128) (__vector unsigned long long)
+{ ((__vector unsigned long long)vf1) [0],
+   ((__vector unsigned long long)__A) [1]});
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1201,11 +1202,11 @@ _mm_cvtps_pi16(__m128 __A)
 {
   __v4sf rounded;
   __vector signed int temp;
-  __vector __m64 result;
+  __vector unsigned long long result;
 
   rounded = vec_rint(__A);
   temp = vec_cts (rounded, 0);
-  result = (__vector __m64) vec_pack (temp, temp);
+  result = (__vector unsigned long long) vec_pack (temp, temp);
 
   return ((__m64) __builtin_unpack_vector

[RFC][PATCH LRA] WIP patch to fix one part of PR87507

2018-10-19 Thread Peter Bergner
Vlad, Jeff and Segher,

I think I have determined what is happening with the aarch64 test case that
is failing after my r264897 commit.  It appears my patch is just exposing
an issue in lra-constraints.c:process_alt_operands() when processing an insn
with early clobber operands.  Jeff & Segher, I have reverted all of my
previous patches we were making to lra-lives.c for the analysis below and
am just using straight trunk revision 264897 (ie, my last patch).

The aarch64 test case is below.  Note that the first 2 input/output operands
are early clobber and are used with asm defined hard regs (x0 and x1).
Also notice that oldval1 and oldval2 are pseudos with the same values as
those in/out operands input values.

long
__cmpxchg_double (unsigned long arg)
{
  unsigned long old1 = 1;
  unsigned long old2 = arg;
  unsigned long new1 = 2;
  unsigned long new2 = 3;
  void *ptr = 0;

  unsigned long oldval1 = old1;
  unsigned long oldval2 = old2;
  register unsigned long x0 asm ("x0") = old1;
  register unsigned long x1 asm ("x1") = old2;
  register unsigned long x2 asm ("x2") = new1;
  register unsigned long x3 asm ("x3") = new2;
  register unsigned long x4 asm ("x4") = (unsigned long) ptr;
  asm volatile ("casp %[old1], %[old2], %[new1], %[new2], %[v]\n"
"eor  %[old1], %[old1], %[oldval1]\n"
"eor  %[old2], %[old2], %[oldval2]\n"
"orr  %[old1], %[old1], %[old2]\n"
: [old1] "+&r" (x0), [old2] "+&r" (x1), [v] "+Q" (* (unsigned 
long *) ptr)
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), [oldval1] 
"r" (oldval1), [oldval2] "r" (oldval2)
: "x16", "x17", "x30");
  return x0;
}

After IRA, we have the following register assignment and rtl.  The only
difference between before my patch and after my patch is that r92 used
to be assigned to a hardreg that is not x0 - x4, which hides the lurking
problem.

r92 -> x1, r93 -> x5, r94 -> x6

(insn 2 3 8 2 (set (reg/v:DI 92 [arg]) (reg:DI 0 x0)) (REG_DEAD (reg:DI 0 x0)))
(insn 8 2 7 2 (set (reg/v:DI 2 x2 [*x2]) (const_int 2)))
(insn 7 8 6 2 (set (reg/v:DI 1 x1 [*x1]) (reg/v:DI 92 [arg])))
(insn 6 7 9 2 (set (reg/v:DI 0 x0 [*x0]) (const_int 1)))
(insn 9 6 12 2 (set (reg/v:DI 3 x3 [*x3]) (const_int 3)))
(insn 12 9 10 2 (set (reg:DI 94 [*x0]) (reg/v:DI 0 x0 [*x0])))
(insn 10 12 11 2 (set (reg/v:DI 4 x4 [*x4]) (const_int 0)))
(insn 11 10 14 2 (set (reg/f:DI 93) (const_int 0)))
(insn 14 11 21 2 (parallel [
(set (reg/v:DI 0 x0 [*x0])
(asm_operands/v:DI ("casp %0, %1, %3, %4, %2
eor %0, %0, %6
eor %1, %1, %7
orr %0, %0, %1
") ("=&r") 0 [(reg/v:DI 2 x2 [*x2])
  (reg/v:DI 3 x3 [*x3])
  (reg/v:DI 4 x4 [*x4])
  (reg:DI 94 [*x0])
  (reg/v:DI 92 [arg])
  (reg/v:DI 0 x0 [*x0])
  (reg/v:DI 1 x1 [*x1])
  (mem:DI (reg/f:DI 93))]
 [(asm_input:DI ("r"))
  (asm_input:DI ("r"))
  (asm_input:DI ("r"))
  (asm_input:DI ("r"))
  (asm_input:DI ("r"))
  (asm_input:DI ("0"))
  (asm_input:DI ("1"))
  (asm_input:DI ("Q"))])
(set (reg/v:DI 1 x1 [*x1])

") ("=&r") 1 []
(set (mem:DI (reg/f:DI 93) [1 MEM[(long unsigned intD.11 *)0B]+0 S8 
A128])

") ("=Q") 2 []
(clobber (reg:DI 30 x30))
(clobber (reg:DI 17 x17))
(clobber (reg:DI 16 x16))
]) "slub-min.c":17 -1
 (expr_list:REG_DEAD (reg:DI 94 [*x0])
(expr_list:REG_DEAD (reg/f:DI 93)
(expr_list:REG_DEAD (reg/v:DI 92 [arg])
(expr_list:REG_DEAD (reg/v:DI 4 x4 [*x4])
(expr_list:REG_DEAD (reg/v:DI 3 x3 [*x3])
(expr_list:REG_DEAD (reg/v:DI 2 x2 [*x2])
(expr_list:REG_UNUSED (reg:DI 30 x30)
(expr_list:REG_UNUSED (reg:DI 17 x17)
(expr_list:REG_UNUSED (reg:DI 16 x16)
(expr_list:REG_UNUSED (reg/v:DI 1 x1 
[*x1])
(nil
(insn 21 14 23 2 (use (reg/i:DI 0 x0)))


In lra-constraints.c:process_alt_operands(), we notice that pseudo 92 is
assigned to x1 and that an early clobber operand is also assigned to x1, or
rather, that it uses x1 explicitly.  This is enough to trigger reload(s),
but the problem is we end up trying to reload the early clobber operand
which has been forced into x1 via register asm assignment, instead of
pseudo 92 which conflicts with it.

The problematic code in question is:

  /* If earlyclobber operand conflicts with another
 non-matching operand which is actually the same register
 as the earlyclobber operand, it is better to reload the
 another operand as an operand matching the earlyclobber

[PATCH 1/2] i386: Enable AVX512 memory broadcast for FP mul

2018-10-19 Thread H.J. Lu
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for FP mul operations.

gcc/

PR target/72782
* config/i386/sse.md (*mul3_bcst_1): New.
(*mul3_bcst_2): Likewise.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512f-mul-df-zmm-1.c: New test.
* gcc.target/i386/avx512f-mul-sf-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-mul-sf-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-mul-sf-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-mul-sf-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-mul-sf-zmm-5.c: Likewise.
* gcc.target/i386/avx512f-mul-sf-zmm-6.c: Likewise.
* gcc.target/i386/avx512vl-mul-sf-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-mul-sf-ymm-1.c: Likewise.
---
 gcc/config/i386/sse.md| 24 +++
 .../gcc.target/i386/avx512f-mul-df-zmm-1.c| 12 ++
 .../gcc.target/i386/avx512f-mul-sf-zmm-1.c| 12 ++
 .../gcc.target/i386/avx512f-mul-sf-zmm-2.c| 12 ++
 .../gcc.target/i386/avx512f-mul-sf-zmm-3.c| 12 ++
 .../gcc.target/i386/avx512f-mul-sf-zmm-4.c| 12 ++
 .../gcc.target/i386/avx512f-mul-sf-zmm-5.c| 12 ++
 .../gcc.target/i386/avx512f-mul-sf-zmm-6.c| 12 ++
 .../gcc.target/i386/avx512vl-mul-sf-xmm-1.c   | 12 ++
 .../gcc.target/i386/avx512vl-mul-sf-ymm-1.c   | 12 ++
 10 files changed, 132 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-df-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-mul-sf-xmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-mul-sf-ymm-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 411c78ae8d3..a73659e6bd2 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1754,6 +1754,30 @@
(set_attr "btver2_decode" "direct,double")
(set_attr "mode" "")])
 
+(define_insn "*mul3_bcst_1"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
+   (mult:VF_AVX512
+ (match_operand:VF_AVX512 1 "register_operand" "v")
+ (vec_duplicate:VF_AVX512
+(match_operand: 2 "memory_operand" "m"]
+  "TARGET_AVX512F && "
+  "vmul\t{%2, %1, 
%0|%0, %1, %2<>}"
+  [(set_attr "prefix" "evex")
+   (set_attr "type" "ssemul")
+   (set_attr "mode" "")])
+
+(define_insn "*mul3_bcst_2"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
+   (mult:VF_AVX512
+ (vec_duplicate:VF_AVX512
+(match_operand: 1 "memory_operand" "m"))
+ (match_operand:VF_AVX512 2 "register_operand" "v")))]
+  "TARGET_AVX512F && "
+  "vmul\t{%1, %2, 
%0|%0, %2, %1<>}"
+  [(set_attr "prefix" "evex")
+   (set_attr "type" "ssemul")
+   (set_attr "mode" "")])
+
 (define_insn 
"_vm3"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-mul-df-zmm-1.c 
b/gcc/testsuite/gcc.target/i386/avx512f-mul-df-zmm-1.c
new file mode 100644
index 000..e3c51986fe2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-mul-df-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmulpd\[ 
\\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512d
+#define vec 512
+#define op mul
+#define suffix pd
+#define SCALAR double
+
+#include "avx512-binop-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-1.c 
b/gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-1.c
new file mode 100644
index 000..14bccca276a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmulps\[ 
\\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op mul
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-binop-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-2.c 
b/gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-2.c
new file mode 100644
index 000..8293324084b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-mul-sf-zmm-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-opt

[PATCH 2/2] i386: Enable AVX512 memory broadcast for FP div

2018-10-19 Thread H.J. Lu
Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for FP div operations.

gcc/

PR target/72782
* config/i386/sse.md (*_div3_bcst): New.

gcc/testsuite/

PR target/72782
* gcc.target/i386/avx512f-div-df-zmm-1.c: New test.
* gcc.target/i386/avx512f-div-sf-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-div-sf-zmm-2.c: Likewise.
* gcc.target/i386/avx512f-div-sf-zmm-3.c: Likewise.
* gcc.target/i386/avx512f-div-sf-zmm-4.c: Likewise.
* gcc.target/i386/avx512f-div-sf-zmm-5.c: Likewise.
* gcc.target/i386/avx512vl-div-sf-xmm-1.c: Likewise.
* gcc.target/i386/avx512vl-div-sf-ymm-1.c: Likewise.
---
 gcc/config/i386/sse.md   | 12 
 gcc/testsuite/gcc.target/i386/avx512f-div-df-zmm-1.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-1.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-2.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-3.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-4.c | 12 
 gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-5.c | 12 
 .../gcc.target/i386/avx512vl-div-sf-xmm-1.c  | 12 
 .../gcc.target/i386/avx512vl-div-sf-ymm-1.c  | 12 
 9 files changed, 108 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-div-df-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-div-sf-xmm-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-div-sf-ymm-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a73659e6bd2..635a6902d33 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1836,6 +1836,18 @@
(set_attr "prefix" "")
(set_attr "mode" "")])
 
+(define_insn "*_div3_bcst"
+  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
+   (div:VF_AVX512
+ (match_operand:VF_AVX512 1 "register_operand" "v")
+ (vec_duplicate:VF_AVX512
+(match_operand: 2 "memory_operand" "m"]
+  "TARGET_AVX512F && "
+  "vdiv\t{%2, %1, 
%0|%0, %1, %2<>}"
+  [(set_attr "prefix" "evex")
+(set_attr "type" "ssediv")
+   (set_attr "mode" "")])
+
 (define_insn "_rcp2"
   [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
(unspec:VF1_128_256
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-div-df-zmm-1.c 
b/gcc/testsuite/gcc.target/i386/avx512f-div-df-zmm-1.c
new file mode 100644
index 000..7c40112bbcc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-div-df-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vdivpd\[ 
\\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512d
+#define vec 512
+#define op div
+#define suffix pd
+#define SCALAR double
+
+#include "avx512-binop-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-1.c 
b/gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-1.c
new file mode 100644
index 000..b131929eeba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vdivps\[ 
\\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op div
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-binop-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-2.c 
b/gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-2.c
new file mode 100644
index 000..373e3c63c03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } 
*/
+/* { dg-final { scan-assembler-times "vdivps\[^\n\]*%zmm\[0-9\]+" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op div
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-binop-2.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-3.c 
b/gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-3.c
new file mode 100644
index 000..84e78198175
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-div-sf-zmm-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile

Re: C++: is there a good way to check for a valid no-op conversion?

2018-10-19 Thread Eric Gallager
On 10/17/18, Martin Sebor  wrote:
> On 10/16/2018 02:06 PM, David Malcolm wrote:
>> I've been extending -fopt-info to cover inlining, and I added a %S
>> format code to dump_printf which accepts a symtab_node *.
>>
>> Unfortunately, -Wformat doesn't like the fact that I'm passing in a
>> subclass pointer (cgraph_node *), e.g.:
>>
>> ipa-inline.c: In function ‘unsigned int early_inliner(function*)’:
>> ipa-inline.c:2769:21: error: format ‘%S’ expects argument of type
>> ‘symtab_node*’,
>> but argument 3 has type ‘cgraph_node*’ [-Werror=format=]
>> 2769 |"Flattening %S\n", node);
>>   |~^ 
>>   | | |
>>   | | cgraph_node*
>>   | symtab_node*
>>
>> I could fix this by changing my format converter so that explicitly
>> takes a cgraph_node *, but I wondered if it would be better to instead
>> teach -Wformat to accept non-virtual subclass pointers, so that %S can
>> handle symtab_node * and its two subclasses.
>
> It would have helped in the gcall* vs gimple* case as well.  It
> would be nice to teach -Wformat about these conversions in general.
> (on a somewhat related note, other than pedantic conformance, I
> don't think there is value in -Wformat complaining about %p with
> non-void* object pointer arguments either).

That sounds like something worth splitting out once there's a separate
-Wformat-pedantic flag as per bug 67479:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67479

>
> Martin
>
>>
>> Does this sound sane, and is there a conversion function I can call for
>> this case?  cp_convert_to_pointer seemed the closest match.
>>
>> Thanks
>> Dave
>>
>


[committed] v3: gccint.texi: add user experience guidelines

2018-10-19 Thread David Malcolm
On Thu, 2018-10-18 at 22:25 -0600, Sandra Loosemore wrote:
> On 10/18/2018 03:12 PM, David Malcolm wrote:
> 
> > Here's an updated version of the patch, addressing your above
> > comments,
> > and those from Martin and Richard (I hope).
> 
> Thanks, this one looks more readable.  Some more specific comments 
> included inline below.
> 
> > I have a couple of texinfo questions:
> > 
> > (a) the guidelines frequently have contrasting pairs
> > of examples showing how to do something vs how not to do it.  Is
> > there
> > a way of marking these up in texinfo beyond just @smallexample?
> > (and manually putting in "BAD" and "OK", as I've done)
> 
> No, there's no markup for this.  I think the brief comments in the
> code 
> example and longer discussion in the surrounding text is fine.

Thanks.

> > (b) what's the best way of showing example output from gcc?  In
> > particular I wasn't able to properly express the single quotes
> > emitted by
> > GCC's %qs, %<, and %> directives: everything I've tried so far has
> > issues
> > in at least one of the pdf vs the html output.  I've settled for
> > using
> > single quotes, which is easy to emit via LANG=C and looks OK in
> > html,
> > but less good in pdf.
> 
> I don't understand this question.  Isn't the best way to show single 
> quotes in the output, single quotes?  :-S

Indeed :)  They work in "make html", but "make pdf" gives emits
U+2019 RIGHT SINGLE QUOTATION MARK for all of them.  Although this
looks OK at the ends of the quoted string, it looks strange at their
starts.

I'll use single quotes as per LANG=C output.

> > +@cindex diagnostics, true positive
> > +@cindex false positive
> > +@cindex true positive
> > +
> > +Warnings should have a good @dfn{signal-to-noise ratio}: we should
> > have few
> > +@dfn{false positives} (falsely issuing a warning when no warning
> > is
> > +warranted) and few @dfn{false negatives} (failing to issue a
> > warning when
> > +one @emph{is} justified).
> > +
> > +Note that a ``false positive'' can mean, in practice, a warning
> > that the
> 
> No quote markup needed there.

Removed.

> > +@noindent
> > +This will emit either one diagnostic with two locations:
> 
> s/will emit/emits/
> 
> > +Avoid using the @code{input_location} global, and the diagnostic
> > functions
> > +that implicitly use it - use @code{error_at} and @code{warning_at}
> > rather
> 
> Long dashes in Texinfo are marked up as '---' (three hyphens) with
> no 
> surrounding whitespace.

Fixed.

> > +@noindent
> > +@anchor{input_location_example}
> > +For example, in the example of imprecise wording
> > +above, the diagnostic was generated using @code{warning}:
> 
> How about rephrasing that as
> 
> For example, generating the diagnostic using @code{warning} results
> in 
> the imprecise wording in the example above:
> 
> which puts it both in the present tense and active voice.

This section is more concerned with the location than with the
imprecise wording, so I changed it to:

"For example, in the example of imprecise wording above, generating the
diagnostic using @code{warning}:"

> > +would lead to:

followed by "leads to:" here.

> > +@smallexample
> > +// OK: use location of attribute, with a secondary location
> > +demo.c:1:24: warning: attribute 'noinline' on variable 'foo' was
> > ignored [-Wattributes]
> 
> The above line seems long enough that it might overflow into the
> right 
> margin.  You probably want to use -fmessage-length=70 or something
> like 
> that for these examples.

Done.

> > +@subsection Coding Conventions
> > +
> > +See the @uref{https://gcc.gnu.org/codingconventions.html#Diagnosti
> > cs,
> > +diagnostics section} of the GCC coding conventions.
> > +
> > +In the C++ frontend, when comparing two types in a message, use
> > @code{%H}
> 
> s/frontend/front end/

Fixed

> I think you should be using @samp markup, rather than @code, on all 
> instances of these %-format directives throughout the running text.

Done.

> > +and @code{%I} rather tha @code{%T}, as this allows the diagnostics
> 
> s/tha/than/

Fixed.

> > +subsystem to highlight differences between template-based types.
> > +For example, rather than using @code{%qT}:
> > +
> > +@smallexample
> > +  // BAD: a pair of %qT used in C++ frontend for type comparison
> 
> s/frontend/front end/ again

Fixed.

> > +  error_at (loc, "could not convert %qE from %qT to %qT", expr,
> > +TREE_TYPE (expr), type);
> > +@end smallexample
> > +
> > +@noindent
> > +which could lead to:
> > +
> > +@smallexample
> > +error: could not convert 'map()' from
> > 'map' to 'map'
> 
> That line looks too long too.

Fixed.

> 
> > +@end smallexample
> > +
> > +@noindent
> > +using @code{%H} and @code{%I} (via @code{%qH} and @code{%qI}):
> > +
> > +@smallexample
> > +  // OK: compare types in C++ frontend via %qH and %qI
> 
> s/frontend/front end/ again

Fixed.

> > +  error_at (loc, "could not convert %qE from %qH to %qI", expr,
> > +TREE_TYPE (expr), type);
> > +@end small

Go patch committed: Don't export functions with special names

2018-10-19 Thread Ian Lance Taylor
This patch changes the Go frontend to not export any functions with
special names.  This keeps init functions from appearing in the export
data.  Checking for special names in general means that we don't need
to check specifically for nested functions or thunks, which have
special names.  Bootstrapped and ran Go testsuite on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 265297)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-6f4bce815786ff3803741355f7f280e4e2c89668
+e1dc92a6037a3f81ea1b8ea8fb6207af33505f0c
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/export.cc
===
--- gcc/go/gofrontend/export.cc (revision 265296)
+++ gcc/go/gofrontend/export.cc (working copy)
@@ -75,12 +75,8 @@ should_export(Named_object* no)
   if (Gogo::is_hidden_name(no->name()))
 return false;
 
-  // We don't export nested functions.
-  if (no->is_function() && no->func_value()->enclosing() != NULL)
-return false;
-
-  // We don't export thunks.
-  if (no->is_function() && Gogo::is_thunk(no))
+  // We don't export various special functions.
+  if (Gogo::is_special_name(no->name()))
 return false;
 
   // Methods are exported with the type, not here.


Re: [PATCH 07/14] Add patches for D language support in GCC targets.

2018-10-19 Thread Iain Buclaw
On Tue, 16 Oct 2018 at 17:28, Richard Sandiford
 wrote:
>
> Iain Buclaw  writes:
> > diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> > index 4b7cec82382..0b2daa320c3 100644
> > --- a/gcc/Makefile.in
> > +++ b/gcc/Makefile.in
> > @@ -2496,6 +2525,7 @@ s-tm-texi: build/genhooks$(build_exeext) 
> > $(srcdir)/doc/tm.texi.in
> > && ( test $(srcdir)/doc/tm.texi -nt $(srcdir)/target.def \
> >   || test $(srcdir)/doc/tm.texi -nt $(srcdir)/c-family/c-target.def 
> > \
> >   || test $(srcdir)/doc/tm.texi -nt 
> > $(srcdir)/common/common-target.def \
> > + || test $(srcdir)/doc/tm.texi -nt $(srcdir)/d/d-target.def \
> > ); then \
> > echo >&2 ; \
> > echo You should edit $(srcdir)/doc/tm.texi.in rather than 
> > $(srcdir)/doc/tm.texi . >&2 ; \
> > [...]
> > @@ -2784,7 +2815,7 @@ build/genrecog.o : genrecog.c $(RTL_BASE_H) 
> > $(BCONFIG_H) $(SYSTEM_H)\
> >$(CORETYPES_H) $(GTM_H) errors.h $(READ_MD_H) $(GENSUPPORT_H)
> >   \
> >$(HASH_TABLE_H) inchash.h
> >  build/genhooks.o : genhooks.c $(TARGET_DEF) $(C_TARGET_DEF)  \
> > -  $(COMMON_TARGET_DEF) $(BCONFIG_H) $(SYSTEM_H) errors.h
> > +  $(COMMON_TARGET_DEF) $(D_TARGET_DEF) $(BCONFIG_H) $(SYSTEM_H) errors.h
> >  build/genmddump.o : genmddump.c $(RTL_BASE_H) $(BCONFIG_H) $(SYSTEM_H) 
> >   \
> >$(CORETYPES_H) $(GTM_H) errors.h $(READ_MD_H) $(GENSUPPORT_H)
> >  build/genmatch.o : genmatch.c $(BCONFIG_H) $(SYSTEM_H) \
>
> I was initially a bit worried about this because it makes the build
> depend on the existence of the d/ directory.  But it doesn't look
> like we try to ship separate tarballs for specific source languages
> any more, so that's probably not a problem.
>
> > @@ -10659,6 +10668,22 @@ unloaded. The default is to return false.
> >  Return target-specific mangling context of @var{decl} or @code{NULL_TREE}.
> >  @end deftypefn
> >
> > +@node D Language and ABI
> > +@section D ABI parameters
> > +@cindex parameters, d abi
> > +
> > +@deftypefn {D Target Hook} void TARGET_D_CPU_VERSIONS (void)
> > +Declare all environmental version identifiers relating to the target CPU 
> > using the function @code{builtin_version}, which takes a string 
> > representing the name of the version.  Version identifiers predefined by 
> > this hook apply to all modules and being compiled and imported.
> > +@end deftypefn
>
> "and being"?  Does this mean "that are being"?
>
> > +@deftypefn {D Target Hook} void TARGET_D_OS_VERSIONS (void)
> > +Similarly to @code{TARGET_D_CPU_VERSIONS}, but is used for versions 
> > relating to the target operating system.
> > +@end deftypefn
> > +
> > +@deftypefn {D Target Hook} unsigned TARGET_D_CRITSEC_SIZE (void)
> > +Returns the size of the data structure used by the targeted operating 
> > system for critical sections and monitors.  For example, on Microsoft 
> > Windows this would return the @code{sizeof(CRITICAL_SECTION)}, while other 
> > platforms that implement pthreads would return 
> > @code{sizeof(pthread_mutex_t)}.
> > +@end deftypefn
>
> Please reflow the .def so that these fit within 80 chars.
>

This file is auto-generated, but looking at other entries, some remain
within 80 chars, others are one line such as this.  I'll see if I can
spot the difference in the tm.texi.in file.

> OK otherwise if no target maintainer objects to the changes to their port
> (I think they've had plenty of time already :-))
>

I can cc them just to make sure there's no doubt about that.

-- 
Iain


[PATCH v2] MIPS: Default to --with-llsc for the R5900 Linux target as well

2018-10-19 Thread Fredrik Noring
The Linux kernel requires and emulates LL and SC for the R5900 too.  The
special --without-llsc default for the R5900 is therefore not applicable
in that case.

Reviewed-by: Maciej W. Rozycki 
---
Changes in v2:
- Double spacing instead of single spacing in commit message

---
 gcc/config.gcc | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 720e6a7373d..68c34b16123 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -3711,14 +3711,14 @@ fi
 # Infer a default setting for --with-llsc.
 if test x$with_llsc = x; then
   case ${target} in
-mips64r5900-*-* | mips64r5900el-*-* | mipsr5900-*-* | mipsr5900el-*-*)
-  # The R5900 doesn't support LL(D) and SC(D).
-  with_llsc=no
-  ;;
 mips*-*-linux*)
   # The kernel emulates LL and SC where necessary.
   with_llsc=yes
   ;;
+mips64r5900-*-* | mips64r5900el-*-* | mipsr5900-*-* | mipsr5900el-*-*)
+  # The R5900 doesn't support LL(D) and SC(D).
+  with_llsc=no
+  ;;
   esac
 fi
 
-- 
2.18.1



Re: [PATCH] MIPS: Default to --with-llsc for the R5900 Linux target as well

2018-10-19 Thread Fredrik Noring
Thank you for your review, Maciej,

> > The Linux kernel requires and emulates LL and SC for the R5900 too. The
> 
>  Two spaces after a full stop please (in commit descriptions too).

Fixed in v2, to be posted shortly.

> I hope this helps you get a general maintainer's approval.

Thanks! It looks like Matthew Fortune is the MIPS port maintainer, so I'm
cc'ing him on this patch.

>  Also your change is small enough to be acceptable right away without a 
> copyright assignment to FSF, however I do recommend that you make one for 
> GCC as well (and also glibc and possibly GDB), as I imagine you'll come up 
> with more changes as you progress with your effort.

Good, yes, I mentioned GCC when I posted the form to the FSF. Implementing
the -mfix-r5900 option is one such change. A small number of fixes related
to floating point instructions, MIPS16, etc. are also needed to make GCC
work with the R5900 target in a reasonable manner.

>  Thank you for your contribution!

You are welcome! :)

Fredrik


Re: C++ PATCH to implement C++20 P0892R2 - explicit(bool) [v3]

2018-10-19 Thread Marek Polacek
Ping.

On Fri, Oct 12, 2018 at 12:32:43PM -0400, Marek Polacek wrote:
> On Fri, Oct 12, 2018 at 02:26:45AM -0400, Jason Merrill wrote:
> > On Thu, Oct 11, 2018 at 8:25 PM Marek Polacek  wrote:
> > >
> > > On Thu, Oct 11, 2018 at 11:35:23AM -0400, Jason Merrill wrote:
> > > > > +   /* [dcl.fct.spec]
> > > > > +  "the constant-expression, if supplied, shall be a 
> > > > > contextually
> > > > > +  converted constant expression of type bool."  */
> > > > > +   expr = build_explicit_specifier (expr, tf_warning_or_error);
> > > > > +   /* We could evaluate it -- mark the decl as appropriate.  */
> > > > > +   if (expr == boolean_true_node)
> > > > > + set_and_check_decl_spec_loc (decl_specs, ds_explicit, 
> > > > > token);
> > > > > +   else if (explicit_specifier)
> > > > > + /* The expression was value-dependent.  Remember it so that 
> > > > > we can
> > > > > +substitute it later.  */
> > > > > + *explicit_specifier = expr;
> > > >
> > > > What if expr == boolean_false_node?
> > >
> > > Then we proceed like no explicit was present and the decl isn't marked as
> > > explicit/nonconverting.  Perhaps I could have made this clearer with
> > >
> > >   else if (expr == boolean_true_node)
> > > /* Don't mark the decl as explicit.  */;
> > >
> > > or somesuch.
> > 
> > Yes, and also so that we don't store the false as a "dependent" specifier.
> 
> Oh, absolutely.  Fixed.
> 
> > > > > +  /* Handle explicit(dependent-expr).  */
> > > > > +  if (DECL_HAS_DEPENDENT_EXPLICIT_SPEC_P (t))
> > > > > +   {
> > > > > + tree spec = lookup_explicit_specifier (t);
> > > > > + spec = tsubst_copy_and_build (spec, args, complain, in_decl,
> > > > > +   /*function_p=*/false,
> > > > > +   /*i_c_e_p=*/true);
> > > > > + spec = build_explicit_specifier (spec, complain);
> > > > > + DECL_NONCONVERTING_P (t) = (spec == boolean_true_node);
> > > > > +   }
> > > >
> > > > What if spec is still dependent, e.g. after partial substitution of a
> > > > member template?
> > >
> > > Something like this?
> > >
> > > template struct A {
> > >   template
> > >   explicit(N) operator T();
> > > };
> > >
> > > void
> > > bar ()
> > > {
> > >   A a;
> > >   int i = a;
> > > }
> > >
> > > This also seemed to work: if spec is still dependent, the decl isn't 
> > > marked as
> > > DECL_NONCONVERTING_P, and we'll try again after deduction 
> > > (fn_type_unification
> > > in add_template_candidate).
> > 
> > Does it also work if N is true?  What if the specifier depends on
> > template parameters from both the enclosing class and the member
> > template?
> 
> All of that seems to work.  I've added explicit12.C and explicit13.C tests to
> cover that.  Please check if that's what you had in mind.  Thanks,
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> 2018-10-12  Marek Polacek  
> 
>   Implement P0892R2, explicit(bool).
>   * c-cppbuiltin.c (c_cpp_builtins): Define __cpp_explicit_bool.
> 
>   * call.c (add_template_candidate_real): Return if the declaration is
>   explicit and we're only looking for non-converting constructor.
>   * cp-tree.h (lang_decl_fn): Add has_dependent_explicit_spec_p bit.
>   (DECL_HAS_DEPENDENT_EXPLICIT_SPEC_P): New macro.
>   (build_explicit_specifier, store_explicit_specifier): Declare.
>   * decl.c (build_explicit_specifier): New function.
>   * parser.c (cp_parser_decl_specifier_seq): Add explicit_specifier
>   parameter.  Pass it down to cp_parser_function_specifier_opt.
>   (cp_parser_function_specifier_opt): Add explicit_specifier parameter.
>   : Parse C++20 explicit(bool).
>   (cp_parser_explicit_instantiation): Update call to
>   cp_parser_function_specifier_opt.
>   (cp_parser_member_declaration): Have cp_parser_decl_specifier_seq save
>   the explicit-specifier.  Save it using store_explicit_specifier.
>   (cp_parser_single_declaration): Likewise.
>   * pt.c (store_explicit_specifier, lookup_explicit_specifier): New.
>   (tsubst_function_decl): Handle explicit(dependent-expr).
> 
>   * g++.dg/cpp2a/explicit1.C: New test.
>   * g++.dg/cpp2a/explicit10.C: New test.
>   * g++.dg/cpp2a/explicit11.C: New test.
>   * g++.dg/cpp2a/explicit12.C: New test.
>   * g++.dg/cpp2a/explicit13.C: New test.
>   * g++.dg/cpp2a/explicit2.C: New test.
>   * g++.dg/cpp2a/explicit3.C: New test.
>   * g++.dg/cpp2a/explicit4.C: New test.
>   * g++.dg/cpp2a/explicit5.C: New test.
>   * g++.dg/cpp2a/explicit6.C: New test.
>   * g++.dg/cpp2a/explicit7.C: New test.
>   * g++.dg/cpp2a/explicit8.C: New test.
>   * g++.dg/cpp2a/explicit9.C: New test.
> 
>   * testsuite/20_util/any/cons/explicit.cc: Adjust dg-error.
>   * testsuite/20_util/pair/cons/explicit_construct.cc: Likewise.
>   * testsuite/20_ut

Re: PATCH to enable testing C++17 by default

2018-10-19 Thread Mike Stump
On Oct 17, 2018, at 2:19 PM, Jeff Law  wrote:
>> 2018-10-17  Marek Polacek  
>> 
>>  * g++.dg/*.C: Use target c++17 instead of explicit dg-options.
>>  * lib/g++-dg.exp: Don't test C++11 by default.  Add C++17 to
>>  the list of default stds to test.Given this follows Jason's 
>> recommendations from the thread, OK for the trunk.
> 
> I'll leave it up to Jason to decide when to add 2a and whether or not to
> drop something at that time.

Gosh, I was going to say the same thing.

[PATCH] rs6000: Put CR0 first in REG_ALLOC_ORDER

2018-10-19 Thread Segher Boessenkool
IRA and LRA prefer to use CR7 (which is first in REG_ALLOC_ORDER) over
CR0, although the latter often is cheaper ("x" vs. "y" constraints).
We should figure out why this is and fix it; but until that is done,
this patch makes CR0 the first allocated register: it improves the
current code, and it is required for later patches to be effective.

(It changes two testcases to no longer look at what CR field is
allocated).

Committing to trunk.


2018-10-19  Segher Boessenkool  

* config/rs6000/rs6000.h (REG_ALLOC_ORDER): Move 68 (that is, CR0) to
be the first CR field allocated.

gcc/testsuite/
* gcc.target/powerpc/safe-indirect-jump-2.c: Do not check assigned CR
field number.
* gcc.target/powerpc/safe-indirect-jump-3.c: Ditto.

---
 gcc/config/rs6000/rs6000.h  | 2 +-
 gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-2.c | 6 ++
 gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-3.c | 6 ++
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index eddb834..785e414 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -951,7 +951,7 @@ enum data_align { align_abi, align_opt, align_both };
33, \
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
50, 49, 48, 47, 46, \
-   75, 73, 74, 69, 68, 72, 71, 70, \
+   68, 75, 73, 74, 69, 72, 71, 70, \
MAYBE_R2_AVAILABLE  \
9, 10, 8, 7, 6, 5, 4,   \
3, EARLY_R12 11, 0, \
diff --git a/gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-2.c 
b/gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-2.c
index d3d040f..d6fc6a3 100644
--- a/gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-2.c
@@ -27,8 +27,6 @@ int foo (int x)
   return spaz (x) / 2;
 }
 
-/* The following assumes CR7 as the first chosen volatile.  */
-
-/* { dg-final { scan-assembler "crset 30" } } */
-/* { dg-final { scan-assembler "beqctr- 7" } } */
+/* { dg-final { scan-assembler "crset" } } */
+/* { dg-final { scan-assembler "beqctr-" } } */
 /* { dg-final { scan-assembler {b \$} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-3.c 
b/gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-3.c
index c338e30..87881fb 100644
--- a/gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/safe-indirect-jump-3.c
@@ -46,8 +46,6 @@ int foo (int x)
   return a;
 }
 
-/* The following assumes CR7 as the first chosen volatile.  */
-
-/* { dg-final { scan-assembler "crset 30" } } */
-/* { dg-final { scan-assembler "beqctr- 7" } } */
+/* { dg-final { scan-assembler "crset" } } */
+/* { dg-final { scan-assembler "beqctr-" } } */
 /* { dg-final { scan-assembler {b \$} } } */
-- 
1.8.3.1



Re: [PATCH] Add sinh(tanh(x)) and cosh(tanh(x)) rules

2018-10-19 Thread Wilco Dijkstra
Hi,

>> Maybe I am crazy, or the labels here are wrong, but that looks like the
>> error is three times as *big* after the patch.  I.e. it worsened instead
>> of improving.

This error is actually 1ULP, so just a rounding error. Can't expect any better 
than that!

> with input  :  = 9.988079071044921875e-01
> cosh: before:  = 2.048000e+03
> cosh: after :  = 2.048000244140625000e+03
> cosh: mpfr  :  = 2.0486103515897848424084406334262726138617589463e+03
> error before:  = 6.10351589784842408440633426272613861758946325324235e-05
> error after :  = 1.83105466021515759155936657372738613824105367467577e-04

It may be less confusing to print relative error or ULP error...

Wilco


[PATCH] Fix PR87645

2018-10-19 Thread Richard Biener


The following backports limiting match.pd recursion together with a
new testcase, also applied to trunk.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

2018-10-19  Richard Biener  

PR middle-end/87645
Backport from mainline
2018-07-12  Richard Biener  

* tree-ssa-sccvn.c (mprts_hook_cnt): Remove.
(vn_lookup_simplify_result): Remove recursion limit applied
here.
(vn_nary_build_or_lookup_1): Adjust.
(try_to_simplify): Likewise.
* gimple-match-head.c (gimple_resimplify1): Instead apply one
here.
(gimple_resimplify2): Likewise.
(gimple_resimplify3): Likewise.
(gimple_resimplify4): Likewise.

* gcc.dg/torture/pr87645.c: New testcase.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 265312)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -1650,7 +1650,6 @@ vn_reference_lookup_or_insert_for_pieces
 }
 
 static vn_nary_op_t vn_nary_op_insert_stmt (gimple *stmt, tree result);
-static unsigned mprts_hook_cnt;
 
 /* Hook for maybe_push_res_to_seq, lookup the expression in the VN tables.  */
 
@@ -1672,22 +1671,8 @@ vn_lookup_simplify_result (code_helper r
ops[i] = CONSTRUCTOR_ELT (ops_[0], i)->value;
 }
   vn_nary_op_t vnresult = NULL;
-  tree res = vn_nary_op_lookup_pieces (length, (tree_code) rcode,
-  type, ops, &vnresult);
-  /* We can end up endlessly recursing simplifications if the lookup above
- presents us with a def-use chain that mirrors the original simplification.
- See PR80887 for an example.  Limit successful lookup artificially
- to 10 times if we are called as mprts_hook.  */
-  if (res
-  && mprts_hook
-  && --mprts_hook_cnt == 0)
-{
-  if (dump_file && (dump_flags & TDF_DETAILS))
-   fprintf (dump_file, "Resetting mprts_hook after too many "
-"invocations.\n");
-  mprts_hook = NULL;
-}
-  return res;
+  return vn_nary_op_lookup_pieces (length, (tree_code) rcode,
+  type, ops, &vnresult);
 }
 
 /* Return a value-number for RCODE OPS... either by looking up an existing
@@ -1704,7 +1689,6 @@ vn_nary_build_or_lookup_1 (code_helper r
  So first simplify and lookup this expression to see if it
  is already available.  */
   mprts_hook = vn_lookup_simplify_result;
-  mprts_hook_cnt = 9;
   bool res = false;
   switch (TREE_CODE_LENGTH ((tree_code) rcode))
 {
@@ -3979,7 +3963,6 @@ try_to_simplify (gassign *stmt)
 
   /* First try constant folding based on our current lattice.  */
   mprts_hook = vn_lookup_simplify_result;
-  mprts_hook_cnt = 9;
   tem = gimple_fold_stmt_to_constant_1 (stmt, vn_valueize, vn_valueize);
   mprts_hook = NULL;
   if (tem
Index: gcc/gimple-match-head.c
===
--- gcc/gimple-match-head.c (revision 265312)
+++ gcc/gimple-match-head.c (working copy)
@@ -100,17 +100,34 @@ gimple_resimplify1 (gimple_seq *seq,
}
 }
 
+  /* Limit recursion, there are cases like PR80887 and others, for
+ example when value-numbering presents us with unfolded expressions
+ that we are really not prepared to handle without eventual
+ oscillation like ((_50 + 0) + 8) where _50 gets mapped to _50
+ itself as available expression.  */
+  static unsigned depth;
+  if (depth > 10)
+{
+  if (dump_file && (dump_flags & TDF_FOLDING))
+   fprintf (dump_file, "Aborting expression simplification due to "
+"deep recursion\n");
+  return false;
+}
+
+  ++depth;
   code_helper res_code2;
   tree res_ops2[3] = {};
   if (gimple_simplify (&res_code2, res_ops2, seq, valueize,
   *res_code, type, res_ops[0]))
 {
+  --depth;
   *res_code = res_code2;
   res_ops[0] = res_ops2[0];
   res_ops[1] = res_ops2[1];
   res_ops[2] = res_ops2[2];
   return true;
 }
+  --depth;
 
   return false;
 }
@@ -160,17 +177,30 @@ gimple_resimplify2 (gimple_seq *seq,
   canonicalized = true;
 }
 
+  /* Limit recursion, see gimple_resimplify1.  */
+  static unsigned depth;
+  if (depth > 10)
+{
+  if (dump_file && (dump_flags & TDF_FOLDING))
+   fprintf (dump_file, "Aborting expression simplification due to "
+"deep recursion\n");
+  return false;
+}
+
+  ++depth;
   code_helper res_code2;
   tree res_ops2[3] = {};
   if (gimple_simplify (&res_code2, res_ops2, seq, valueize,
   *res_code, type, res_ops[0], res_ops[1]))
 {
+  --depth;
   *res_code = res_code2;
   res_ops[0] = res_ops2[0];
   res_ops[1] = res_ops2[1];
   res_ops[2] = res_ops2[2];
   return true;
 }
+  --depth;
 
   return canonicalized;
 }
@@ -219,18 +249,31 @@ gimple_resimplify3 (gimple_seq *seq,
   canonicalized = true;
 }
 
+  /*

Re: [PATCH] Add sinh(tanh(x)) and cosh(tanh(x)) rules

2018-10-19 Thread Wilco Dijkstra
Jakub Jelinek wrote:

> At this point this seems like something that shouldn't be done inline
> anymore, so either we don't do this optimization at all, because the errors
> are far bigger than what is acceptable even for -ffast-math, or we have a
> library function that does the sinh (tanh (x)) and cosh (tanh (x))
> computations somewhere (libm, libgcc, ...) that handles all the cornercases.

The FMA version should not have any accuracy issues. Without FMA it's
harder, but it's not that different from the sin(atan(x)) simplification which 
also
requires two separate cases. So it's more a question how much effort we
want to spend optimizing for targets which do not support FMA. 

Wilco


Re: [PATCH] Add sinh(tanh(x)) and cosh(tanh(x)) rules

2018-10-19 Thread Giuliano Augusto Faulin Belinassi
> Maybe I am crazy, or the labels here are wrong, but that looks like the
> error is three times as *big* after the patch.  I.e. it worsened instead
> of improving.

Oh, sorry. I was not clear in my previous message.
The error did not improved with regard to the original formula. What I
meant is with regard to the original (1-x*x) simplification. But you
are right, the above error is about 3 times bigger than the original
formula, but before the error was about 300 times bigger.

You are not crazy :P
On Fri, Oct 19, 2018 at 10:46 AM Segher Boessenkool
 wrote:
>
> Hi all,
>
> On Fri, Oct 19, 2018 at 09:21:07AM -0300, Giuliano Augusto Faulin Belinassi 
> wrote:
> > > Did you enable FMA? I'd expect 1 - x*x to be accurate with FMA, so the 
> > > relative error
> > > should be much better. If there is no FMA, 2*(1-fabs(x)) - (1-fabs(x))^2 
> > > should be
> > > more accurate when abs(x)>0.5 and still much faster.
> >
> > No, but I will check how to enable it if FMA is available.
> > I did a minor test with your formula and the precision improved a lot.
> > Here is an example for floats
> >
> > with input  :  = 9.988079071044921875e-01
> > cosh: before:  = 2.048000e+03
> > cosh: after :  = 2.048000244140625000e+03
> > cosh: mpfr  :  = 2.0486103515897848424084406334262726138617589463e+03
> > error before:  = 6.10351589784842408440633426272613861758946325324235e-05
> > error after :  = 1.83105466021515759155936657372738613824105367467577e-04
>
> Maybe I am crazy, or the labels here are wrong, but that looks like the
> error is three times as *big* after the patch.  I.e. it worsened instead
> of improving.
>
>
> Segher


Re: [PATCH] Add sinh(tanh(x)) and cosh(tanh(x)) rules

2018-10-19 Thread Jakub Jelinek
On Fri, Oct 19, 2018 at 01:39:01PM +, Wilco Dijkstra wrote:
> >> Did you enable FMA? I'd expect 1 - x*x to be accurate with FMA, so the 
> >> relative error
> >> should be much better. If there is no FMA, 2*(1-fabs(x)) - (1-fabs(x))^2 
> >> should be
> >> more accurate when abs(x)>0.5 and still much faster.
> >
> >No, but I will check how to enable it if FMA is available.
> > I did a minor test with your formula and the precision improved a lot.
> 
> > But now I am puzzled about how did you come up with that formula :-).
> > I am able to proof equality, but how did you know it was going to be
> > more precise?
> 
> Basically when x is close to 1, x the top N bits in the mantissa will be ones.
> Then x*x has one bits in the top 2*N bits in the mantissa. Ie. we lose N bits 
> of
> useful information in the multiply - problematic when N gets close to the 
> number
> of mantissa bits. In contrast FMA computes the fully accurate result due to
> cancellation of the top 2*N one-bits in the subtract.
> 
> If we can use (1-x) instead of x in the evaluation, we avoid losing accuracy 
> in the
> multiply when x is close to 1. Then it's basic algebra to find an equivalent 
> formula
> that can produce 1-x^2 using 1-x. For example (1+x)*(1-x) will work fine too 
> (using 1+x loses 1 low bit of x).
> 
> Note that these alternative evaluations lose accuracy close to 0 in exactly 
> the
> same way, so if no FMA is available you'd need to select between the 2 cases.

At this point this seems like something that shouldn't be done inline
anymore, so either we don't do this optimization at all, because the errors
are far bigger than what is acceptable even for -ffast-math, or we have a
library function that does the sinh (tanh (x)) and cosh (tanh (x))
computations somewhere (libm, libgcc, ...) that handles all the cornercases.

Jakub


Re: [PATCH] Add splay-tree "view" for bitmap

2018-10-19 Thread Richard Biener
On Fri, 19 Oct 2018, Richard Biener wrote:

> On Fri, 19 Oct 2018, Richard Sandiford wrote:
> 
> > Richard Biener  writes:
> > > On October 18, 2018 11:05:32 PM GMT+02:00, Richard Sandiford
> > >  wrote:
> > >>Richard Biener  writes:
> > >>> On Thu, 18 Oct 2018, Richard Sandiford wrote:
> >  What's the performance like for more reasonable testcases?  E.g. is
> > >>there
> >  a measurable change either way in --enable-checking=release for some
> > >>gcc
> >  .iis at -g or -O2 -g?
> > >>>
> > >>> I did a quick check on my set of cc1files (still .i, .ii ones tend to
> > >>> be unusable quite quickly...).  Most of them compile too quickly
> > >>> to make any difference appear other than noise.  Multi-second
> > >>differences
> > >>> like for PR63155 should be the exception but our O(n) bitmap
> > >>> implementation really makes some parts of GCC quadratic where it
> > >>> doesn't appear so.
> > >>>
> > >>> Is there a reason you expect it to be ever slower?
> > >>
> > >>During recent stage3s I've tried to look at profiles of cc1plus
> > >>to see whether there was something easy we could do to improve
> > >>compile times.  And bitmap operations always showed up near the
> > >>top of the profile.  There were always some pathological queries
> > >>in which the linear search really hurt, but whenever I tried "simple"
> > >>ways to avoid the obvious cases, they made those queries quicker
> > >>but slowed things down overall.  It seemed that adding any extra logic
> > >>to the queries hurted because even a small slowdown in common lookups
> > >>overwhelmed a big saving in less common lookups.
> > >
> > > Yeah. I also noticed some 'obvious' shortcomings in the heuristics...
> > > I guess in the end well predicted branches in the out of line code are
> > > important...
> > >
> > >>
> > >>But there again I was looking to speed up more "normal" cases, not ones
> > >>like the PR.
> > >>
> > >>FWIW I've tried it on a local x86_64 box and it slows down current
> > >>optabs.ii at -O2 -g by ~0.35% (reproducable).  I see similar slowdowns
> > >>for the other files I tried.  But that's hardly a huge amount, and
> > >>probably a price worth paying for the speed-up in the PR.
> > >
> > > Just to make sure what to reproduce - this is with checking disabled?
> > > And with or without the hunks actually making use of the splay tree
> > > path?
> > 
> > Yeah, with an --enable-checking=release stage3:
> > 
> >./cc1plus optabs.ii -o /dev/null -O2 -g
> > 
> > using the optabs.ii from the unpatched --enable-checking=release build.
> > 
> > It was the whole patch vs. without the patch.
> 
> OK, so there are almost no hits from the SSA propagator or out-of-SSA
> but only from "unchanged" paths:
> 
> -2.90% 2.90%23  cc1plus  cc1plus   [.] 
> bitmap_set_b▒
>- bitmap_set_bit   
>  
> ▒
>   + 0.79% df_lr_bb_local_compute  
>  
> ▒
>   + 0.38% insert_updated_phi_nodes_for
>  
> ▒
>   + 0.27% sched_analyze_reg   
>  
> ▒
>   + 0.23% walk_aliased_vdefs_1
>  
> ▒
>   + 0.13% get_continuation_for_phi
>  
> ▒
>   + 0.13% add_control_edge
>  
> ▒
>   + 0.13% df_md_bb_local_compute_process_def  
>  
> ▒
>   + 0.13% mark_for_renaming   
>  
> ▒
>   + 0.13% (anonymous namespace)::pass_rtl_cprop::execute  
>  
> ▒
>   + 0.13% compute_dominance_frontiers 
>  
> ▒
>   + 0.12% df_simulate_initialize_backwards  
> 
> it's also interesting that most branch misses (for bitmap_set_bit)
> are from
> 
>   bool res = (ptr->bits[word_num] & bit_val) == 0;
>   if (res)
> ptr->bits[word_num] |= bit_val;
>   return res;
> 
> I'm not sure how "bad" a mispredicted branch is here.  I guess
> if it is predicted to be taken (skip the write) then it's bad
> but if it is predicted the other way around it should be a matter
> of not retiring the store...  But I am not a CPU guy.  I guess
> unconditionally updating the memory wouldn't be so bad after all
> and it might also help combine in using architecture specific
> optimizations like using some CC flags of the OR operation
> to get at the comparison result.  Can you benchmark a difference
> for this?

So I can also see mispredicts on

  if (!head->tree_form)
element = bitmap_list_find_element (head, indx, usage);
  else
element = bitmap_tree_find_element (head, indx);

which I could significantly reduce via re-ordering (thus
w/o any branch history this currently gets predicted to the
tree variant).

That suggests to instead of the above runtime checks do
bitmap_tree_bit_p / bit

[PATCH] Fix compilation error with _GLIBCXX_PARALLEL

2018-10-19 Thread Jonathan Wakely

* include/bits/regex_executor.tcc (_Backref_matcher::_M_apply): Use
_GLIBCXX_STD_A to refer to normal mode algorithms.
* testsuite/28_regex/headers/regex/parallel_mode.cc: New test.
* testsuite/28_regex/headers/regex/std_c++0x_neg.cc: Remove empty
whitespace.

Tested x86_64-linux, committed to trunk.

This is a regression in GCC 8.1 and later so I'll commit to the branch
too.


commit fd6e19c6bfc3131563c740e749b44c51ef5419ff
Author: Jonathan Wakely 
Date:   Fri Oct 19 14:38:19 2018 +0100

Fix compilation error with _GLIBCXX_PARALLEL

* include/bits/regex_executor.tcc (_Backref_matcher::_M_apply): Use
_GLIBCXX_STD_A to refer to normal mode algorithms.
* testsuite/28_regex/headers/regex/parallel_mode.cc: New test.
* testsuite/28_regex/headers/regex/std_c++0x_neg.cc: Remove empty
whitespace.

diff --git a/libstdc++-v3/include/bits/regex_executor.tcc 
b/libstdc++-v3/include/bits/regex_executor.tcc
index b3238894f5d..68f1213cb64 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -366,11 +366,11 @@ namespace __detail
   _BiIter __actual_end)
   {
if (!_M_icase)
- return std::__equal4(__expected_begin, __expected_end,
+ return _GLIBCXX_STD_A::__equal4(__expected_begin, __expected_end,
   __actual_begin, __actual_end);
typedef std::ctype<_CharT> __ctype_type;
const auto& __fctyp = use_facet<__ctype_type>(_M_traits.getloc());
-   return std::__equal4(__expected_begin, __expected_end,
+   return _GLIBCXX_STD_A::__equal4(__expected_begin, __expected_end,
 __actual_begin, __actual_end,
 [this, &__fctyp](_CharT __lhs, _CharT __rhs)
 {
diff --git a/libstdc++-v3/testsuite/28_regex/headers/regex/parallel_mode.cc 
b/libstdc++-v3/testsuite/28_regex/headers/regex/parallel_mode.cc
new file mode 100644
index 000..d32df55c966
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/headers/regex/parallel_mode.cc
@@ -0,0 +1,22 @@
+// Copyright (C) 2018 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-do compile { target c++11 } }
+// { dg-require-normal-mode "" }
+
+#define _GLIBCXX_PARALLEL
+#include 
diff --git a/libstdc++-v3/testsuite/28_regex/headers/regex/std_c++0x_neg.cc 
b/libstdc++-v3/testsuite/28_regex/headers/regex/std_c++0x_neg.cc
index 44aa140eeea..85899aab0dd 100644
--- a/libstdc++-v3/testsuite/28_regex/headers/regex/std_c++0x_neg.cc
+++ b/libstdc++-v3/testsuite/28_regex/headers/regex/std_c++0x_neg.cc
@@ -21,6 +21,3 @@
 #include 
 
 // { dg-error "ISO C.. 2011" "" { target *-*-* } 32 }
-
-
-


Re: [PATCH] Add sinh(tanh(x)) and cosh(tanh(x)) rules

2018-10-19 Thread Segher Boessenkool
Hi all,

On Fri, Oct 19, 2018 at 09:21:07AM -0300, Giuliano Augusto Faulin Belinassi 
wrote:
> > Did you enable FMA? I'd expect 1 - x*x to be accurate with FMA, so the 
> > relative error
> > should be much better. If there is no FMA, 2*(1-fabs(x)) - (1-fabs(x))^2 
> > should be
> > more accurate when abs(x)>0.5 and still much faster.
> 
> No, but I will check how to enable it if FMA is available.
> I did a minor test with your formula and the precision improved a lot.
> Here is an example for floats
> 
> with input  :  = 9.988079071044921875e-01
> cosh: before:  = 2.048000e+03
> cosh: after :  = 2.048000244140625000e+03
> cosh: mpfr  :  = 2.0486103515897848424084406334262726138617589463e+03
> error before:  = 6.10351589784842408440633426272613861758946325324235e-05
> error after :  = 1.83105466021515759155936657372738613824105367467577e-04

Maybe I am crazy, or the labels here are wrong, but that looks like the
error is three times as *big* after the patch.  I.e. it worsened instead
of improving.


Segher


Re: [ARM/FDPIC v3 12/21] [ARM] FDPIC: Restore r9 after we call __aeabi_read_tp

2018-10-19 Thread Christophe Lyon

On 11/10/2018 15:34, Christophe Lyon wrote:

We call __aeabi_read_tp() to get the thread pointer. Since this is a
function call, we have to restore the FDPIC register afterwards.

2018-XX-XX  Christophe Lyon  
Mickaël Guêné 

gcc/
* config/arm/arm.c (arm_load_tp): Add FDPIC support.
* config/arm/arm.md (load_tp_soft_fdpic): New pattern.
(load_tp_soft): Disable in FDPIC mode.

Change-Id: I0a2e3466c9afb869ad8e844083ad178de014658e

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d7b7d99..d3a60cb 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8646,7 +8646,25 @@ arm_load_tp (rtx target)
  
rtx tmp;
  
-  emit_insn (gen_load_tp_soft ());

+  if (TARGET_FDPIC)
+   {
+ rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
+
+ emit_insn (gen_load_tp_soft_fdpic ());
+
+ /* Restore r9.  */
+ XVECEXP (par, 0, 0)
+   = gen_rtx_UNSPEC (VOIDmode,
+ gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),
+get_hard_reg_initial_val (Pmode, 
FDPIC_REGNUM)),
+ UNSPEC_PIC_RESTORE);
+ XVECEXP (par, 0, 1) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 
FDPIC_REGNUM));
+ XVECEXP (par, 0, 2)
+   = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));


There is the same problem here as in patch 04/21, fixed in follow-up version.


+ emit_insn (par);
+   }
+  else
+   emit_insn (gen_load_tp_soft ());
  
tmp = gen_rtx_REG (SImode, R0_REGNUM);

emit_move_insn (target, tmp);
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 09a0701..6fea087 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -11485,12 +11485,25 @@
  )
  
  ;; Doesn't clobber R1-R3.  Must use r0 for the first operand.

+(define_insn "load_tp_soft_fdpic"
+  [(set (reg:SI 0) (unspec:SI [(const_int 0)] UNSPEC_TLS))
+   (clobber (reg:SI 9))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (reg:SI IP_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_SOFT_TP && TARGET_FDPIC"
+  "bl\\t__aeabi_read_tp\\t@ load_tp_soft"
+  [(set_attr "conds" "clob")
+   (set_attr "type" "branch")]
+)
+
+;; Doesn't clobber R1-R3.  Must use r0 for the first operand.
  (define_insn "load_tp_soft"
[(set (reg:SI 0) (unspec:SI [(const_int 0)] UNSPEC_TLS))
 (clobber (reg:SI LR_REGNUM))
 (clobber (reg:SI IP_REGNUM))
 (clobber (reg:CC CC_REGNUM))]
-  "TARGET_SOFT_TP"
+  "TARGET_SOFT_TP && !TARGET_FDPIC"
"bl\\t__aeabi_read_tp\\t@ load_tp_soft"
[(set_attr "conds" "clob")
 (set_attr "type" "branch")]





Re: [ARM/FDPIC v3 04/21] [ARM] FDPIC: Add support for FDPIC for arm architecture

2018-10-19 Thread Christophe Lyon

On 12/10/2018 12:45, Richard Earnshaw (lists) wrote:

On 11/10/18 14:34, Christophe Lyon wrote:

The FDPIC register is hard-coded to r9, as defined in the ABI.

We have to disable tailcall optimizations if we don't know if the
target function is in the same module. If not, we have to set r9 to
the value associated with the target module.

When generating a symbol address, we have to take into account whether
it is a pointer to data or to a function, because different
relocations are needed.

2018-XX-XX  Christophe Lyon  
Mickaël Guêné 

* config/arm/arm-c.c (__FDPIC__): Define new pre-processor macro
in FDPIC mode.
* config/arm/arm-protos.h (arm_load_function_descriptor): Declare
new function.
* config/arm/arm.c (arm_option_override): Define pic register to
FDPIC_REGNUM.
(arm_function_ok_for_sibcall) Disable sibcall optimization if we


Missing colon.


have no decl or go through PLT.
(arm_load_pic_register): Handle TARGET_FDPIC.
(arm_is_segment_info_known): New function.
(arm_pic_static_addr): Add support for FDPIC.
(arm_load_function_descriptor): New function.
(arm_assemble_integer): Add support for FDPIC.
* config/arm/arm.h (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED):
Define. (FDPIC_REGNUM): New define.
* config/arm/arm.md (call): Add support for FDPIC.
(call_value): Likewise.
(*restore_pic_register_after_call): New pattern.
(untyped_call): Disable if FDPIC.
(untyped_return): Likewise.
* config/arm/unspecs.md (UNSPEC_PIC_RESTORE): New.



Other comments inline.


diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
index 4471f79..90733cc 100644
--- a/gcc/config/arm/arm-c.c
+++ b/gcc/config/arm/arm-c.c
@@ -202,6 +202,8 @@ arm_cpu_builtins (struct cpp_reader* pfile)
builtin_define ("__ARM_EABI__");
  }
  
+  def_or_undef_macro (pfile, "__FDPIC__", TARGET_FDPIC);

+
def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV);
def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);
  
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

index 0dfb3ac..28cafa8 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -136,6 +136,7 @@ extern int arm_max_const_double_inline_cost (void);
  extern int arm_const_double_inline_cost (rtx);
  extern bool arm_const_double_by_parts (rtx);
  extern bool arm_const_double_by_immediates (rtx);
+extern rtx arm_load_function_descriptor (rtx funcdesc);
  extern void arm_emit_call_insn (rtx, rtx, bool);
  bool detect_cmse_nonsecure_call (tree);
  extern const char *output_call (rtx *);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8810df5..92ae24b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3470,6 +3470,14 @@ arm_option_override (void)
if (flag_pic && TARGET_VXWORKS_RTP)
  arm_pic_register = 9;
  
+  /* If in FDPIC mode then force arm_pic_register to be r9.  */

+  if (TARGET_FDPIC)
+{
+  arm_pic_register = FDPIC_REGNUM;
+  if (TARGET_ARM_ARCH < 7)
+   error ("FDPIC mode is not supported on architectures older than Armv7");


What properties of FDPIC impose this requirement?  Does it also apply to
Armv8-m.baseline?


In fact, there was miscommunication on my side, resulting in a misunderstanding 
between Kyrill and myself, which I badly translated into this condition.

My initial plan was to submit a patch series tested on v7, and send the patches needed to 
support older architectures as a follow-up. The proper restriction is actually "CPUs 
that do not support ARM or Thumb2". As you may have noticed during the iterations of 
this patch series, I had failed to remove partial Thumb1 support hunks.

So really this should be rephrased, and rewritten as "FDPIC mode is supported on 
architecture versions that support ARM or Thumb-2", if that suits you. And the 
condition should thus be:
if (! TARGET_ARM && ! TARGET_THUMB2)
  error ("...")

This would also exclude Armv8-m.baseline, since it doesn't support Thumb2.
As a side note, I tried to build GCC master (without my patches) 
--with-cpu=cortex-m23, and both targets arm-eabi and arm-linux-gnueabi failed 
to buid.

For arm-eabi, there are problems in newlib:
newlib/libc/sys/arm/crt0.S:145: Error: lo register required -- `add sl,r2,#256'
newlib/libc/sys/arm/trap.S:88: Error: lo register required -- `sub ip,sp,ip'

For arm-linux-gnueabi, the failure happens while building libgcc:
/home/christophe.lyon/src/GCC/sources/newlib/newlib/libc/machine/arm/setjmp.S:169:
 Error: selected processor does not support ARM opcodes
/newlib/newlib/libc/machine/arm/setjmp.S:176: Error: attempt to use an ARM 
instruction on a Thumb-only processor -- `stmea a1!,{ v1-v7,fp,ip,sp,lr }'
/newlib/newlib/libc/machine/arm/setjmp.S:186: Error: attempt to use an ARM 
instruction on a Thumb-only processor -- `mov a1,#0'
/newlib/newlib/libc/machine/arm/set

Re: [PATCH] Add sinh(tanh(x)) and cosh(tanh(x)) rules

2018-10-19 Thread Wilco Dijkstra
Hi,

>> Did you enable FMA? I'd expect 1 - x*x to be accurate with FMA, so the 
>> relative error
>> should be much better. If there is no FMA, 2*(1-fabs(x)) - (1-fabs(x))^2 
>> should be
>> more accurate when abs(x)>0.5 and still much faster.
>
>No, but I will check how to enable it if FMA is available.
> I did a minor test with your formula and the precision improved a lot.

> But now I am puzzled about how did you come up with that formula :-).
> I am able to proof equality, but how did you know it was going to be
> more precise?

Basically when x is close to 1, x the top N bits in the mantissa will be ones.
Then x*x has one bits in the top 2*N bits in the mantissa. Ie. we lose N bits of
useful information in the multiply - problematic when N gets close to the number
of mantissa bits. In contrast FMA computes the fully accurate result due to
cancellation of the top 2*N one-bits in the subtract.

If we can use (1-x) instead of x in the evaluation, we avoid losing accuracy in 
the
multiply when x is close to 1. Then it's basic algebra to find an equivalent 
formula
that can produce 1-x^2 using 1-x. For example (1+x)*(1-x) will work fine too 
(using 1+x loses 1 low bit of x).

Note that these alternative evaluations lose accuracy close to 0 in exactly the
same way, so if no FMA is available you'd need to select between the 2 cases.

Wilco

[PATCH] Fix testsuite failures in Debug Mode

2018-10-19 Thread Jonathan Wakely

This fixes the following testsuite failures on ia32 when compiled with
-D_GLIBCXX_DEBUG:

FAIL: 23_containers/map/modifiers/erase/dr130-linkage-check.cc
FAIL: 23_containers/multimap/modifiers/erase/dr130-linkage-check.cc
FAIL: 23_containers/multiset/modifiers/erase/dr130-linkage-check.cc
FAIL: 23_containers/set/modifiers/erase/dr130-linkage-check.cc

The normal mode containers already use the abi-tag to mangle these
overloads differently, but the debug mode versions weren't fixed.

* include/debug/map.h (map::erase(iterator)): Add abi-tag so that
C++11 version mangles differently from incompatible C++98 version.
* include/debug/multimap.h (multimap::erase(iterator)): Likewise.
* include/debug/multiset.h (multiset::erase(iterator))
(multiset::erase(const_iterator, const_iterator)): Likewise.
* include/debug/set.h (set::erase(iterator))
(multiset::erase(const_iterator, const_iterator)): Likewise.

Tested x86_64-linux (with -m32) and committed to trunk.


commit 4bf815f73702a25d66a715445913a676edab3784
Author: Jonathan Wakely 
Date:   Fri Oct 19 13:46:56 2018 +0100

Fix testsuite failures in Debug Mode

This fixes the following testsuite failures on ia32 when compiled with
-D_GLIBCXX_DEBUG:

FAIL: 23_containers/map/modifiers/erase/dr130-linkage-check.cc
FAIL: 23_containers/multimap/modifiers/erase/dr130-linkage-check.cc
FAIL: 23_containers/multiset/modifiers/erase/dr130-linkage-check.cc
FAIL: 23_containers/set/modifiers/erase/dr130-linkage-check.cc

The normal mode containers already use the abi-tag to mangle these
overloads differently, but the debug mode versions weren't fixed.

* include/debug/map.h (map::erase(iterator)): Add abi-tag so that
C++11 version mangles differently from incompatible C++98 version.
* include/debug/multimap.h (multimap::erase(iterator)): Likewise.
* include/debug/multiset.h (multiset::erase(iterator))
(multiset::erase(const_iterator, const_iterator)): Likewise.
* include/debug/set.h (set::erase(iterator))
(multiset::erase(const_iterator, const_iterator)): Likewise.

diff --git a/libstdc++-v3/include/debug/map.h b/libstdc++-v3/include/debug/map.h
index 6821fc561e4..cb29e9ee2a5 100644
--- a/libstdc++-v3/include/debug/map.h
+++ b/libstdc++-v3/include/debug/map.h
@@ -482,6 +482,7 @@ namespace __debug
return { _Base::erase(__position.base()), this };
   }
 
+  _GLIBCXX_ABI_TAG_CXX11
   iterator
   erase(iterator __position)
   { return erase(const_iterator(__position)); }
diff --git a/libstdc++-v3/include/debug/multimap.h 
b/libstdc++-v3/include/debug/multimap.h
index d16ed47ab74..38e38c8c1ce 100644
--- a/libstdc++-v3/include/debug/multimap.h
+++ b/libstdc++-v3/include/debug/multimap.h
@@ -361,6 +361,7 @@ namespace __debug
return { _Base::erase(__position.base()), this };
   }
 
+  _GLIBCXX_ABI_TAG_CXX11
   iterator
   erase(iterator __position)
   { return erase(const_iterator(__position)); }
diff --git a/libstdc++-v3/include/debug/multiset.h 
b/libstdc++-v3/include/debug/multiset.h
index bf154ecad6e..2dd2f731627 100644
--- a/libstdc++-v3/include/debug/multiset.h
+++ b/libstdc++-v3/include/debug/multiset.h
@@ -324,6 +324,7 @@ namespace __debug
 #endif // C++17
 
 #if __cplusplus >= 201103L
+  _GLIBCXX_ABI_TAG_CXX11
   iterator
   erase(const_iterator __position)
   {
@@ -358,6 +359,7 @@ namespace __debug
   }
 
 #if __cplusplus >= 201103L
+  _GLIBCXX_ABI_TAG_CXX11
   iterator
   erase(const_iterator __first, const_iterator __last)
   {
diff --git a/libstdc++-v3/include/debug/set.h b/libstdc++-v3/include/debug/set.h
index c406fb424f4..d8dbaf268a2 100644
--- a/libstdc++-v3/include/debug/set.h
+++ b/libstdc++-v3/include/debug/set.h
@@ -338,6 +338,7 @@ namespace __debug
 #endif // C++17
 
 #if __cplusplus >= 201103L
+  _GLIBCXX_ABI_TAG_CXX11
   iterator
   erase(const_iterator __position)
   {
@@ -370,6 +371,7 @@ namespace __debug
   }
 
 #if __cplusplus >= 201103L
+  _GLIBCXX_ABI_TAG_CXX11
   iterator
   erase(const_iterator __first, const_iterator __last)
   {


Re: [wwwdocs] Announce Solaris 10 obsoletion

2018-10-19 Thread Gerald Pfeifer
On Tue, 16 Oct 2018, Rainer Orth wrote:
> The following patch documents the Solaris 10 obsoletion in the GCC 9
> changes.html.  I've based this on the GCC 4.9 text which allowed for
> obsoletion of several targets.  Tested by inspection in Firefox.
> 
> Ok to install?

Yes.  And technically as maintainer you don't need to ask for 
approval, though I'm happy to have alook when asked. :-)

> As a followup, we should update criteria.html to name
> sparc-sun-solaris2.11 as primary platform.  Although mechanical, I
> suspect this requires SC approval?

We haven't formally defined a policy, but for adjustments like 
that to (primary) platforms we've been deferring to the respective 
maintainers all along.  So, just go for it unless anyone fervently
disagrees.

Gerald (with spotty/non-existant connectivity most of this week)


Re: [00/10][RFC] Splitting the C and C++ concept of "complete type"

2018-10-19 Thread Joseph Myers
On Fri, 19 Oct 2018, Richard Sandiford wrote:

> Joseph Myers  writes:
> > On Thu, 18 Oct 2018, Richard Sandiford wrote:
> >> - Type introspection for things like parsing format strings
> >> 
> >>   It sounded like the type descriptors would be fixed-sized types,
> >>   a bit like a C version of std::type_info.
> >
> > It wasn't clear if people might also want to e.g. extract a list of all 
> > members of a structure type from such an object (which of course could 
> > either involve variable-sized data, or fixed-size data pointing to arrays, 
> > or something else along those lines).
> 
> OK.  But wouldn't that basically be a tree structure?  Or a flexible
> array if flattened?  It doesn't sound like it would need changes to

I don't know (but you mention flexible arrays, and initializers for 
flexible array members, where the size of the object ends up bigger than 
sizeof its type, are also a GNU extension).  Raise that question in the 
WG14 discussion; I'm not the right person to answer questions around 
everyone else's ideas for extensions to the C type system.  As far as I'm 
concerned, this is all a preliminary exploration of ideas that might or 
might not end up involving type system additions, and WG14 is a much 
better place for that than separate single-implementation discussions - 
the point should be to float and explore possible ideas in this space, and 
their benefits and disadvantages, rather than pushing too early for one 
particular approach.  And given how much C++ tends to use class-based 
interfaces where C uses built-in types (complex numbers, decimal floating 
point, ...), I definitely do not want to start from an assumption that the 
right interface or language concepts for this in C++ should look like 
those in C.

For me, thinking of SVE types as something like VLAs but passed by value 
seems a more natural model in C than having them sizeless - but if they 
are sizeless, that pushes them closer to other ideas for types that might 
also be sizeless (and if those other use cases are indeed best specified 
using sizeless types, that provides more justification for using sizeless 
types for SVE).

> > Is there something wrong with a model in C++ where these types have
> > some fixed small sizeof (which carries through to sizeof for
> > containing types), but where different ABIs are used for them, and
> > where much the same raw memory operations on them are disallowed as
> > would be disallowed for a class-based implementation?  (Whether
> > implemented entirely in the compiler or through some combination of
> > the compiler and class implementations in a header - though with the
> > latter you might still need some new language feature, albeit only for
> > use within the header rather than more generally.)
> 
> Having different ABIs would defeat the primary purpose of the extension,
> which is to provide access to the single-vector SVE ABI types in C and C++.

My suggestion is that the ABI for C++ would be different from that 
resulting for a class-based implementation using purely standard C++ (the 
difference being to make it the same as the SVE C API - as with the 
decimal floating-point classes).

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] Fix PR87657

2018-10-19 Thread Richard Biener


The following fixes an ICE I introduced in the x86 backend by not 
considering word_mode vectorization.

Bootstrap & regtest running on x86_64-unknown-linux-gnu, will apply
after that succeeded.

Richard.

2018-10-19  Richard Biener  

PR target/87657
* config/i386/i386.c (ix86_builtin_vectorization_cost): Use
TYPE_VECTOR_SUBPARTS and avoid relying on vector mode.

* gcc.target/i386/pr87657.c: New testcase.

Index: gcc/config/i386/i386.c
===
--- gcc/config/i386/i386.c  (revision 265312)
+++ gcc/config/i386/i386.c  (working copy)
@@ -45173,9 +45173,8 @@ ix86_builtin_vectorization_cost (enum ve
 
   case vec_construct:
{
- gcc_assert (VECTOR_MODE_P (mode));
  /* N element inserts into SSE vectors.  */
- int cost = GET_MODE_NUNITS (mode) * ix86_cost->sse_op;
+ int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
  /* One vinserti128 for combining two SSE vectors for AVX256.  */
  if (GET_MODE_BITSIZE (mode) == 256)
cost += ix86_vec_cost (mode, ix86_cost->addss);
Index: gcc/testsuite/gcc.target/i386/pr87657.c
===
--- gcc/testsuite/gcc.target/i386/pr87657.c (nonexistent)
+++ gcc/testsuite/gcc.target/i386/pr87657.c (working copy)
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-slp-vectorize -fno-vect-cost-model -mno-sse" } */
+
+int x;
+
+void foo (short a, short b)
+{
+  ((short *)&x)[0] = a;
+  ((short *)&x)[1] = b;
+}
+
+#if __SIZEOF_LONG__ == 8
+long y;
+
+void bar (short a, short b)
+{
+  ((short *)&y)[0] = a;
+  ((short *)&y)[1] = b;
+  ((short *)&y)[2] = a;
+  ((short *)&y)[3] = b;
+}
+#endif


Re: [PATCH] Add sinh(tanh(x)) and cosh(tanh(x)) rules

2018-10-19 Thread Giuliano Augusto Faulin Belinassi
Hello,

> Did you enable FMA? I'd expect 1 - x*x to be accurate with FMA, so the 
> relative error
> should be much better. If there is no FMA, 2*(1-fabs(x)) - (1-fabs(x))^2 
> should be
> more accurate when abs(x)>0.5 and still much faster.

No, but I will check how to enable it if FMA is available.
I did a minor test with your formula and the precision improved a lot.
Here is an example for floats

with input  :  = 9.988079071044921875e-01
cosh: before:  = 2.048000e+03
cosh: after :  = 2.048000244140625000e+03
cosh: mpfr  :  = 2.0486103515897848424084406334262726138617589463e+03
error before:  = 6.10351589784842408440633426272613861758946325324235e-05
error after :  = 1.83105466021515759155936657372738613824105367467577e-04

But now I am puzzled about how did you come up with that formula :-).
I am able to proof equality, but how did you know it was going to be
more precise?
On Thu, Oct 18, 2018 at 7:41 PM Wilco Dijkstra  wrote:
>
> Hi,
>
> > Well, I compared the results before and after the simplifications with a 
> > 512-bit
> > precise mpfr value. Unfortunately, I found that sometimes the error is very
> > noticeable :-( .
>
> Did you enable FMA? I'd expect 1 - x*x to be accurate with FMA, so the 
> relative error
> should be much better. If there is no FMA, 2*(1-fabs(x)) - (1-fabs(x))^2 
> should be
> more accurate when abs(x)>0.5 and still much faster.
>
> Wilco
>
>


Re: [PATCH] Add splay-tree "view" for bitmap

2018-10-19 Thread Richard Biener
On Thu, 18 Oct 2018, David Malcolm wrote:

> On Thu, 2018-10-18 at 15:09 +0200, Richard Biener wrote:
> > PR63155 made me pick up this old work from Steven, it turns our
> > linked-list implementation to a two-mode one with one being a
> > splay tree featuring O(log N) complexity for find/remove.
> > 
> > Over Stevens original patch I added a bitmap_tree_to_vec helper
> > that I use from the debug/print methods to avoid changing view
> > there.  In theory the bitmap iterator could get a "stack"
> > as well and we could at least support EXECUTE_IF_SET_IN_BITMAP.
> > 
> > This can be used to fix the two biggest bottlenecks in the PRs
> > testcase, namely SSA propagator worklist handling and out-of-SSA
> > coalesce list building.  perf shows the following data, first
> > unpatched, second patched - also watch the thrid coulumn (samples)
> > when comparing percentages.
> > 
> [...snip...]
> 
> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> > 
> > Any objections?
> > 
> > Thanks,
> > Richard.
> > 
> > 2018-10-18  Steven Bosscher 
> > Richard Biener  
> > 
> > * bitmap.h: Update data structure documentation, including a
> > description of bitmap views as either linked-lists or splay
> > trees.
> 
> [...snip...]
> 
> From a "correctness" perspective, we have some existing unit-test
> coverage for bitmap via selftests in bitmap.c.  Perhaps those tests
> could be generalized to verify that the two different implementations
> work, and that the conversions work correctly?
> 
> e.g. currently we have:
> 
> static void
> test_clear_bit_in_middle ()
> {
>   bitmap b = bitmap_gc_alloc ();
> 
>   /* Set b to [100..200].  */
>   bitmap_set_range (b, 100, 100);
>   ASSERT_EQ (100, bitmap_count_bits (b));
> 
>   /* Clear a bit in the middle.  */
>   bool changed = bitmap_clear_bit (b, 150);
>   ASSERT_TRUE (changed);
>   ASSERT_EQ (99, bitmap_count_bits (b));
>   ASSERT_TRUE (bitmap_bit_p (b, 149));
>   ASSERT_FALSE (bitmap_bit_p (b, 150));
>   ASSERT_TRUE (bitmap_bit_p (b, 151));
> }
> 
> Maybe this could change to:
> 
> static void
> test_clear_bit_in_middle ()
> {
>   bitmap b = bitmap_gc_alloc ();
> 
>   FOR_EACH_BITMAP_IMPL (b)
> {
>   /* Set b to [100..200].  */
>   bitmap_set_range (b, 100, 100);
>   ASSERT_EQ (100, bitmap_count_bits (b));
> }
> 
>   bool first_time = true;
>   /* Clear a bit in the middle.  */
>   FOR_EACH_BITMAP_IMPL (b)
> {
>   if (first_time)
> {
>   bool changed = bitmap_clear_bit (b, 150);
>   ASSERT_TRUE (changed);
>   first_time = false;
> }
>   ASSERT_EQ (99, bitmap_count_bits (b));
>   ASSERT_TRUE (bitmap_bit_p (b, 149));
>   ASSERT_FALSE (bitmap_bit_p (b, 150));
>   ASSERT_TRUE (bitmap_bit_p (b, 151));
> }
> }
> 
> ...or somesuch, where maybe FOR_EACH_BITMAP_IMPL (b) could try linked-
> list, then splay tree, then linked-list, converting "b" as it goes.  
> This would hopefully give us a lot of test coverage for the various
> operations in both modes, and for the conversion routines (in both
> directions, assuming that both directions are supported).

Hmm, unfortunately the splay-tree variant doesn't implement
bitmap_count_bits or bitmap_set_range.

Note some of the missing functionality might need implementation
of a bitmap element iterator (maybe I should transform my
bitmap_tree_to_vec to that instead).

But I'm quite sure bitmaps are extensively tested with GCC
bootstrap ;)

Richard.

> Hope this is constructive
> Dave
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH] Add splay-tree "view" for bitmap

2018-10-19 Thread Richard Biener
On Fri, 19 Oct 2018, Richard Sandiford wrote:

> Richard Biener  writes:
> > On October 18, 2018 11:05:32 PM GMT+02:00, Richard Sandiford
> >  wrote:
> >>Richard Biener  writes:
> >>> On Thu, 18 Oct 2018, Richard Sandiford wrote:
> >>>
>  Richard Biener  writes:
>  > PR63155 made me pick up this old work from Steven, it turns our
>  > linked-list implementation to a two-mode one with one being a
>  > splay tree featuring O(log N) complexity for find/remove.
>  >
>  > Over Stevens original patch I added a bitmap_tree_to_vec helper
>  > that I use from the debug/print methods to avoid changing view
>  > there.  In theory the bitmap iterator could get a "stack"
>  > as well and we could at least support EXECUTE_IF_SET_IN_BITMAP.
>  >
>  > This can be used to fix the two biggest bottlenecks in the PRs
>  > testcase, namely SSA propagator worklist handling and out-of-SSA
>  > coalesce list building.  perf shows the following data, first
>  > unpatched, second patched - also watch the thrid coulumn (samples)
>  > when comparing percentages.
>  >
>  > -O0
>  > -   18.19%17.35%   407  cc1  cc1   [.]
> >>bitmap_set_b▒
>  >- bitmap_set_bit   
> >>▒
>  >   + 8.77% create_coalesce_list_for_region 
> >>▒
>  >   + 4.21% calculate_live_ranges   
> >>▒
>  >   + 2.02% build_ssa_conflict_graph
> >>▒
>  >   + 1.66% insert_phi_nodes_for
> >>▒
>  >   + 0.86% coalesce_ssa_name  
>  > patched:
>  > -   12.39%10.48%   129  cc1  cc1   [.]
> >>bitmap_set_b▒
>  >- bitmap_set_bit   
> >>▒
>  >   + 5.27% calculate_live_ranges   
> >>▒
>  >   + 2.76% insert_phi_nodes_for
> >>▒
>  >   + 1.90% create_coalesce_list_for_region 
> >>▒
>  >   + 1.63% build_ssa_conflict_graph
> >>▒
>  >   + 0.35% coalesce_ssa_name   
>  >
>  > -O1
>  > -   17.53%17.53%   842  cc1  cc1   [.]
> >>bitmap_set_b▒
>  >- bitmap_set_bit   
> >>▒
>  >   + 12.39% add_ssa_edge   
> >>▒
>  >   + 1.48% create_coalesce_list_for_region 
> >>▒
>  >   + 0.82% solve_constraints   
> >>▒
>  >   + 0.71% calculate_live_ranges   
> >>▒
>  >   + 0.64% add_implicit_graph_edge 
> >>▒
>  >   + 0.41% insert_phi_nodes_for
> >>▒
>  >   + 0.34% build_ssa_conflict_graph  
>  > patched:
>  > -5.79% 5.00%   167  cc1  cc1   [.]
> >>bitmap_set_b▒
>  >- bitmap_set_bit   
> >>▒
>  >   + 1.41% add_ssa_edge
> >>▒
>  >   + 0.88% calculate_live_ranges   
> >>▒
>  >   + 0.75% add_implicit_graph_edge 
> >>▒
>  >   + 0.68% solve_constraints   
> >>▒
>  >   + 0.48% insert_phi_nodes_for
> >>▒
>  >   + 0.45% build_ssa_conflict_graph   
>  >
>  > -O3
>  > -   12.37%12.34%  1145  cc1  cc1   [.]
> >>bitmap_set_b▒
>  >- bitmap_set_bit   
> >>▒
>  >   + 9.14% add_ssa_edge
> >>▒
>  >   + 0.80% create_coalesce_list_for_region 
> >>▒
>  >   + 0.69% add_implicit_graph_edge 
> >>▒
>  >   + 0.54% solve_constraints   
> >>▒
>  >   + 0.34% calculate_live_ranges   
> >>▒
>  >   + 0.27% insert_phi_nodes_for
> >>▒
>  >   + 0.21% build_ssa_conflict_graph 
>  > -4.36% 3.86%   227  cc1  cc1   [.]
> >>bitmap_set_b▒
>  >- bitmap_set_bit 

Re: [PATCH] Add splay-tree "view" for bitmap

2018-10-19 Thread Richard Biener
On Fri, 19 Oct 2018, Steven Bosscher wrote:

> On Fri, Oct 19, 2018 at 8:46 AM Richard Biener <> wrote:
> > Yeah. I also noticed some 'obvious' shortcomings in the heuristics...
> > I guess in the end well predicted branches in the out of line code are 
> > important...

I specifically meant the fact that we happily update ->current to
->first and we do not check ->first->index == indx.  We also
decide when we want to walk backwards from current via
head->indx / 2 < indx rather than (head->indx - head->first->indx) / 2
+ head->first->indx < indx - but that's a heuristic assuming evenly
distributed set bits anyway.

> What also would help is to put bitmaps on their own obstack to improve
> cache locality.

That's true.  I guess increasing bitmap_element size to cover a
whole cache-line would be excessive ;)  On lp64 targets it's size
is currently 40 bytes which makes multiple ones not a perfect fit
for a usual cacheline (128 bytes).

> 
> As for the patch, I never hacked it with "production code" in mind, it
> was just a proof of concept. Not all of it is optimal or even safe
> as-is. For example you probably should add
> "gcc_checking_assert(!(BITMAP)->tree-form)" tests in the
> bmp_iter_*_init functions.

Those are already there.

> And perhaps semi-splaying trees work better
> for the use cases of GCC (x.f. "Rehabilitation of an unloved child:
> semi-splaying"). I implemented classic splay trees because I could not
> find a semi-splay tree implementation in any of the usual text books
> while classic splay tree implementations were given in all of those
> books ;-)

I think the classic splay tree mimics best the existing behavior
of the linked-list implementation (in find_element).  Another
thing to note would be that the ->current cache is basically
unused for the tree representation (it should always equal ->first),
but we do neither document that fact nor exploit it by not updating
->indx or ->current.

Anyway, I'll give the paper a read.

Overall it's clear that there are places in GCC and testcases that
make it necessary to address the linar-complexity of our bitmap
implementation...

Thanks,
Richard.


Re: [00/10][RFC] Splitting the C and C++ concept of "complete type"

2018-10-19 Thread Richard Sandiford
Joseph Myers  writes:
> On Thu, 18 Oct 2018, Richard Sandiford wrote:
>> - Type introspection for things like parsing format strings
>> 
>>   It sounded like the type descriptors would be fixed-sized types,
>>   a bit like a C version of std::type_info.
>
> It wasn't clear if people might also want to e.g. extract a list of all 
> members of a structure type from such an object (which of course could 
> either involve variable-sized data, or fixed-size data pointing to arrays, 
> or something else along those lines).

OK.  But wouldn't that basically be a tree structure?  Or a flexible
array if flattened?  It doesn't sound like it would need changes to
the type system.  We can already describe this kind of thing with tree
types in GCC.  (The memory needed to represent the data could of course
be allocated using a single block of stack if that's what's wanted.)

>> So I didn't see anything there that was really related, or anything that
>> relied on sizeof being variable (which as I say seems to be a very high
>> hurdle for C++).
>
> The references you gave regarding the removal of one version of VLAs from 
> C++ didn't seem to make clear whether there were supposed to be general 
> issues with variable-size types fitting in the overall C++ object model, 
> or whether the concerns were more specific to things in the particular 
> proposal - but in either case, the SVE proposals would need to be compared 
> to the actual specific concerns.

But this is also one of my concerns about moving this discussing to the
WG14 list.  It doesn't seem to be publicly readable, and I only knew
about the bignum discussion because you gave me a direct link to the
first article in the thread.  I had to read the rest by wgetting the
individual messages.  So any objections raised there would presumably
be shrouded in mystery to most people, and wouldn't e.g. show up in a
web search.

If we move it to a different forum, I'd rather it be a public one that
would treat C and C++ equally.  But maybe such a thing doesn't exist. :-)

> Anyway, the correct model in C++ need not be the same as the correct model 
> in C.  For example, for decimal floating point, C++ chose a class-based 
> model whereas C chose _Decimal* keywords (and then there's some compiler 
> magic to use appropriate ABIs for std::decimal types, I think).
>
> If you were implementing the SVE API for C++ for non-SVE hardware, you 
> might have a class-based implementation where the class internally 
> contains a pointer to underlying storage and does allocation / 
> deallocation, for example - sizeof would give some fixed small size to the 
> objects with that class type, but e.g. copying them with memcpy would not 
> work correctly (and would be diagnosed with -Wclass-memaccess).

One important point here is that the SVE API isn't a new API whose
primary target happens to be SVE.  It's an API whose *only* target
is SVE.  Anyone wanting to write vector code that runs on non-SVE
hardware should use something that's designed to be cross-platform,
(e.g. P0214 or whatever).  They certainly shouldn't be using this.

Like other vector intrinsics, the SVE ACLE is supposed to be the last
line of defence before resorting to asm, and isn't designed to be any
more portable than asm would be.

> Is there something wrong with a model in C++ where these types have
> some fixed small sizeof (which carries through to sizeof for
> containing types), but where different ABIs are used for them, and
> where much the same raw memory operations on them are disallowed as
> would be disallowed for a class-based implementation?  (Whether
> implemented entirely in the compiler or through some combination of
> the compiler and class implementations in a header - though with the
> latter you might still need some new language feature, albeit only for
> use within the header rather than more generally.)

Having different ABIs would defeat the primary purpose of the extension,
which is to provide access to the single-vector SVE ABI types in C and C++.
We want types that in both C and C++ represent the contents of SVE vector
and predicate registers.  E.g.:

  svfloat64_t vector_sin(svbool_t pg, svfloat64_t vx)

has to map pg to a predicate register (P0), vx to a vector register (Z0)
and return the result in a vector register (Z0), in both C and C++.

The main objection to the details of the sizeless type proposal seems
to be that sizeof was too useful for us to make it invalid.  But if
sizeof has different values for C and C++, wouldn't that defeat the
point?  Users would be forced to use the SVE vector length functions
after all.  Also, for:

  void (*update_vector)(svfloat64_t *px);

how would the caller of update_vector know whether the target
function is using the C or the C++ representation of svfloat64_t
when accessing *px?

> Even if that model doesn't work for some reason, it doesn't mean the only 
> alternatives for C++ are something like VLAs or a new concept of sizeless 
> types for C++ 

Re: [PATCH] i386: Enable AVX512 memory broadcast for FP add

2018-10-19 Thread H.J. Lu
On 10/19/18, Uros Bizjak  wrote:
> On Thu, Oct 18, 2018 at 11:44 PM H.J. Lu  wrote:
>>
>> Many AVX512 vector operations can broadcast from a scalar memory source.
>> This patch enables memory broadcast for FP add operations.
>>
>> gcc/
>>
>> PR target/72782
>> * config/i386/sse.md
>> (*3_bcst_1): New.
>> (*add3_bcst_2): Likewise.
>>
>> gcc/testsuite/
>>
>> PR target/72782
>> * gcc.target/i386/avx512-binop-1.h: New file.
>> * gcc.target/i386/avx512-binop-2.h: Likewise.
>> * gcc.target/i386/avx512-binop-3.h: Likewise.
>> * gcc.target/i386/avx512-binop-4.h: Likewise.
>> * gcc.target/i386/avx512-binop-5.h: Likewise.
>> * gcc.target/i386/avx512-binop-6.h: Likewise.
>> * gcc.target/i386/avx512f-add-df-zmm-1.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-1.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-2.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-3.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-4.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-5.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-6.c: Likewise.
>> * gcc.target/i386/avx512f-sub-df-zmm-1.c: Likewise.
>> * gcc.target/i386/avx512f-sub-sf-zmm-1.c: Likewise.
>> * gcc.target/i386/avx512f-sub-sf-zmm-2.c: Likewise.
>> * gcc.target/i386/avx512f-sub-sf-zmm-3.c: Likewise.
>> * gcc.target/i386/avx512f-sub-sf-zmm-4.c: Likewise.
>> * gcc.target/i386/avx512f-sub-sf-zmm-5.c: Likewise.
>> * gcc.target/i386/avx512vl-add-sf-xmm-1.c: Likewise.
>> * gcc.target/i386/avx512vl-add-sf-ymm-1.c: Likewise.
>> * gcc.target/i386/avx512vl-sub-sf-xmm-1.c: Likewise.
>> * gcc.target/i386/avx512vl-sub-sf-ymm-1.c: Likewise.
>
> Please use "register_operand" when only registers are involved. Please
> change "nonimmediate_operand" to "register_operand" also in your
> previous FMA patch.
>
> OK with that change.
>

This is the patch I am checking in.

Thanks.

-- 
H.J.
From 32d29bd28539dd51fff39df43cbfe1979a426328 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Tue, 2 Oct 2018 15:43:49 -0700
Subject: [PATCH] i386: Enable AVX512 memory broadcast for FP add

Many AVX512 vector operations can broadcast from a scalar memory source.
This patch enables memory broadcast for FP add operations.

gcc/

	PR target/72782
	* config/i386/sse.md
	(*3_bcst_1): New.
	(*add3_bcst_2): Likewise.

gcc/testsuite/

	PR target/72782
	* gcc.target/i386/avx512-binop-1.h: New file.
	* gcc.target/i386/avx512-binop-2.h: Likewise.
	* gcc.target/i386/avx512-binop-3.h: Likewise.
	* gcc.target/i386/avx512-binop-4.h: Likewise.
	* gcc.target/i386/avx512-binop-5.h: Likewise.
	* gcc.target/i386/avx512-binop-6.h: Likewise.
	* gcc.target/i386/avx512f-add-df-zmm-1.c: Likewise.
	* gcc.target/i386/avx512f-add-sf-zmm-1.c: Likewise.
	* gcc.target/i386/avx512f-add-sf-zmm-2.c: Likewise.
	* gcc.target/i386/avx512f-add-sf-zmm-3.c: Likewise.
	* gcc.target/i386/avx512f-add-sf-zmm-4.c: Likewise.
	* gcc.target/i386/avx512f-add-sf-zmm-5.c: Likewise.
	* gcc.target/i386/avx512f-add-sf-zmm-6.c: Likewise.
	* gcc.target/i386/avx512f-sub-df-zmm-1.c: Likewise.
	* gcc.target/i386/avx512f-sub-sf-zmm-1.c: Likewise.
	* gcc.target/i386/avx512f-sub-sf-zmm-2.c: Likewise.
	* gcc.target/i386/avx512f-sub-sf-zmm-3.c: Likewise.
	* gcc.target/i386/avx512f-sub-sf-zmm-4.c: Likewise.
	* gcc.target/i386/avx512f-sub-sf-zmm-5.c: Likewise.
	* gcc.target/i386/avx512vl-add-sf-xmm-1.c: Likewise.
	* gcc.target/i386/avx512vl-add-sf-ymm-1.c: Likewise.
	* gcc.target/i386/avx512vl-sub-sf-xmm-1.c: Likewise.
	* gcc.target/i386/avx512vl-sub-sf-ymm-1.c: Likewise.
---
 gcc/config/i386/sse.md| 28 +++
 .../gcc.target/i386/avx512-binop-1.h  | 12 
 .../gcc.target/i386/avx512-binop-2.h  | 12 
 .../gcc.target/i386/avx512-binop-3.h  | 15 ++
 .../gcc.target/i386/avx512-binop-4.h  | 12 
 .../gcc.target/i386/avx512-binop-5.h  | 14 ++
 .../gcc.target/i386/avx512-binop-6.h  | 14 ++
 .../gcc.target/i386/avx512f-add-df-zmm-1.c| 12 
 .../gcc.target/i386/avx512f-add-sf-zmm-1.c| 12 
 .../gcc.target/i386/avx512f-add-sf-zmm-2.c| 12 
 .../gcc.target/i386/avx512f-add-sf-zmm-3.c| 12 
 .../gcc.target/i386/avx512f-add-sf-zmm-4.c| 12 
 .../gcc.target/i386/avx512f-add-sf-zmm-5.c| 12 
 .../gcc.target/i386/avx512f-add-sf-zmm-6.c| 12 
 .../gcc.target/i386/avx512f-sub-df-zmm-1.c| 12 
 .../gcc.target/i386/avx512f-sub-sf-zmm-1.c| 12 
 .../gcc.target/i386/avx512f-sub-sf-zmm-2.c| 12 
 .../gcc.target/i386/avx512f-sub-sf-zmm-3.c| 12 
 .../gcc.target/i386/avx512f-sub-sf-zmm-4.c| 12 
 .../gcc.target/i386/avx512f-sub-sf-zmm-5.c| 12 
 .../gcc.target/i386/avx512vl-add-sf-xmm-1.

Re: [PATCH] i386: Enable AVX512 memory broadcast for FP add

2018-10-19 Thread H.J. Lu
On 10/19/18, Uros Bizjak  wrote:
> On Thu, Oct 18, 2018 at 11:44 PM H.J. Lu  wrote:
>>
>> Many AVX512 vector operations can broadcast from a scalar memory source.
>> This patch enables memory broadcast for FP add operations.
>>
>> gcc/
>>
>> PR target/72782
>> * config/i386/sse.md
>> (*3_bcst_1): New.
>> (*add3_bcst_2): Likewise.
>>
>> gcc/testsuite/
>>
>> PR target/72782
>> * gcc.target/i386/avx512-binop-1.h: New file.
>> * gcc.target/i386/avx512-binop-2.h: Likewise.
>> * gcc.target/i386/avx512-binop-3.h: Likewise.
>> * gcc.target/i386/avx512-binop-4.h: Likewise.
>> * gcc.target/i386/avx512-binop-5.h: Likewise.
>> * gcc.target/i386/avx512-binop-6.h: Likewise.
>> * gcc.target/i386/avx512f-add-df-zmm-1.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-1.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-2.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-3.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-4.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-5.c: Likewise.
>> * gcc.target/i386/avx512f-add-sf-zmm-6.c: Likewise.
>> * gcc.target/i386/avx512f-sub-df-zmm-1.c: Likewise.
>> * gcc.target/i386/avx512f-sub-sf-zmm-1.c: Likewise.
>> * gcc.target/i386/avx512f-sub-sf-zmm-2.c: Likewise.
>> * gcc.target/i386/avx512f-sub-sf-zmm-3.c: Likewise.
>> * gcc.target/i386/avx512f-sub-sf-zmm-4.c: Likewise.
>> * gcc.target/i386/avx512f-sub-sf-zmm-5.c: Likewise.
>> * gcc.target/i386/avx512vl-add-sf-xmm-1.c: Likewise.
>> * gcc.target/i386/avx512vl-add-sf-ymm-1.c: Likewise.
>> * gcc.target/i386/avx512vl-sub-sf-xmm-1.c: Likewise.
>> * gcc.target/i386/avx512vl-sub-sf-ymm-1.c: Likewise.
>
> Please use "register_operand" when only registers are involved. Please
> change "nonimmediate_operand" to "register_operand" also in your
> previous FMA patch.

I am checking in this patch.

-- 
H.J.
From 1cf96b0e724ac0f2d533dc1b8cc1589176431535 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Fri, 19 Oct 2018 01:48:31 -0700
Subject: [PATCH] i386: Use register_operand in AVX512 FMA with memory
 broadcast

Use "register_operand" in AVX512 FMA with memory broadcast when only
registers are allowed.

	* config/i386/sse.md
	(*fma_fmadd__bcst_1):
	Replace nonimmediate_operand with register_operand.
	(*fma_fmadd__bcst_2):
	Likewise.
	(*fma_fmadd__bcst_3):
	Likewise.
---
 gcc/ChangeLog  | 10 ++
 gcc/config/i386/sse.md | 12 ++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 88ec6863128..ca94f822d75 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2018-10-19  H.J. Lu  
+
+	* config/i386/sse.md
+	(*fma_fmadd__bcst_1):
+	Replace nonimmediate_operand with register_operand.
+	(*fma_fmadd__bcst_2):
+	Likewise.
+	(*fma_fmadd__bcst_3):
+	Likewise.
+
 2018-10-19  Eric Botcazou  
 
 	* cfgexpand.c (expand_one_var): Use specific wording in error message
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 71684d63423..06144dc4662 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3749,8 +3749,8 @@
 (define_insn "*fma_fmadd__bcst_1"
   [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
 	(fma:VF_AVX512
-	  (match_operand:VF_AVX512 1 "nonimmediate_operand" "0,v")
-	  (match_operand:VF_AVX512 2 "nonimmediate_operand" "v,0")
+	  (match_operand:VF_AVX512 1 "register_operand" "0,v")
+	  (match_operand:VF_AVX512 2 "register_operand" "v,0")
 	  (vec_duplicate:VF_AVX512
 	(match_operand: 3 "memory_operand" "m,m"]
   "TARGET_AVX512F && "
@@ -3763,8 +3763,8 @@
 	(fma:VF_AVX512
 	  (vec_duplicate:VF_AVX512
 	(match_operand: 1 "memory_operand" "m,m"))
-	  (match_operand:VF_AVX512 2 "nonimmediate_operand" "0,v")
-	  (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0")))]
+	  (match_operand:VF_AVX512 2 "register_operand" "0,v")
+	  (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
   "TARGET_AVX512F && "
   "@
vfmadd132\t{%1, %3, %0|%0, %3, %1}
@@ -3775,10 +3775,10 @@
 (define_insn "*fma_fmadd__bcst_3"
   [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
 	(fma:VF_AVX512
-	  (match_operand:VF_AVX512 1 "nonimmediate_operand" "0,v")
+	  (match_operand:VF_AVX512 1 "register_operand" "0,v")
 	  (vec_duplicate:VF_AVX512
 	(match_operand: 2 "memory_operand" "m,m"))
-	  (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0")))]
+	  (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
   "TARGET_AVX512F && "
   "@
vfmadd132\t{%2, %3, %0|%0, %3, %2}
-- 
2.17.2



V2 [PATCH] i386: Add pass_remove_partial_avx_dependency

2018-10-19 Thread H.J. Lu
On 10/18/18, Jan Hubicka  wrote:
>> we need to generate
>>
>>  vxorp[ds]   %xmmN, %xmmN, %xmmN
>>  ...
>>  vcvtss2sd   f(%rip), %xmmN, %xmmX
>>  ...
>>  vcvtsi2ss   i(%rip), %xmmN, %xmmY
>>
>> to avoid partial XMM register stall.  This patch adds a pass to generate
>> a single
>>
>>  vxorps  %xmmN, %xmmN, %xmmN
>>
>> at function entry, which is shared by all SF and DF conversions, instead
>> of generating one
>>
>>  vxorp[ds]   %xmmN, %xmmN, %xmmN
>>
>> for each SF/DF conversion.
>>
>> Performance impacts on SPEC CPU 2017 rate with 1 copy using
>>
>> -Ofast -march=native -mfpmath=sse -fno-associative-math -funroll-loops
>>
>> are
>>
>> 1. On Broadwell server:
>>
>> 500.perlbench_r (-0.82%)
>> 502.gcc_r (0.73%)
>> 505.mcf_r (-0.24%)
>> 520.omnetpp_r (-2.22%)
>> 523.xalancbmk_r (-1.47%)
>> 525.x264_r (0.31%)
>> 531.deepsjeng_r (0.27%)
>> 541.leela_r (0.85%)
>> 548.exchange2_r (-0.11%)
>> 557.xz_r (-0.34%)
>> Geomean: (-0.23%)
>>
>> 503.bwaves_r (0.00%)
>> 507.cactuBSSN_r (-1.88%)
>> 508.namd_r (0.00%)
>> 510.parest_r (-0.56%)
>> 511.povray_r (0.49%)
>> 519.lbm_r (-1.28%)
>> 521.wrf_r (-0.28%)
>> 526.blender_r (0.55%)
>> 527.cam4_r (-0.20%)
>> 538.imagick_r (2.52%)
>> 544.nab_r (-0.18%)
>> 549.fotonik3d_r (-0.51%)
>> 554.roms_r (-0.22%)
>> Geomean: (0.00%)
>
> I wonder why the patch seems to have more effect on specint that should not
> care much
> about float<->double conversions?

These are within noise range.

>> number of vxorp[ds]:
>>
>> before   after   difference
>> 145704515-69%
>>
>> OK for trunk?
>
> This looks very nice though.
>

> +  if (v4sf_const0)
> +{
> +  /* Generate a single vxorps at function entry and preform df
> +  rescan. */
> +  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
> +  insn = BB_HEAD (bb);
> +  set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
> +  set_insn = emit_insn_after (set, insn);
> +  df_insn_rescan (set_insn);
> +  df_process_deferred_rescans ();
> +}
>
> It seems suboptimal to place the const0 at the entry of function - if the
> conversoin happens in cold region of function this will just increase
> register
> pressure.  I guess right answer would be to look for the postdominance
> frontier

Did you mean "the nearest common dominator"?

> of the set of all uses of the zero register?
>

Here is the updated patch to adds a pass to generate a single

vxorps  %xmmN, %xmmN, %xmmN

at entry of the nearest common dominator for basic blocks with SF/DF
conversions.  OK for trunk?

Thanks.


-- 
H.J.
From e2a437f48778ae9586f2038220840ecc41566f69 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 15 Aug 2018 09:58:31 -0700
Subject: [PATCH] i386: Add pass_remove_partial_avx_dependency

With -mavx, for

[hjl@gnu-cfl-1 skx-2]$ cat foo.i
extern float f;
extern double d;
extern int i;

void
foo (void)
{
  d = f;
  f = i;
}

we need to generate

	vxorp[ds]	%xmmN, %xmmN, %xmmN
	...
	vcvtss2sd	f(%rip), %xmmN, %xmmX
	...
	vcvtsi2ss	i(%rip), %xmmN, %xmmY

to avoid partial XMM register stall.  This patch adds a pass to generate
a single

	vxorps		%xmmN, %xmmN, %xmmN

at entry of the nearest common dominator for basic blocks with SF/DF
conversions, instead of generating one

	vxorp[ds]	%xmmN, %xmmN, %xmmN

for each SF/DF conversion.

Performance impacts on SPEC CPU 2017 rate with 1 copy using

-Ofast -march=native -mfpmath=sse -fno-associative-math -funroll-loops

are

1. On Broadwell server:

500.perlbench_r (-0.82%)
502.gcc_r (0.73%)
505.mcf_r (-0.24%)
520.omnetpp_r (-2.22%)
523.xalancbmk_r (-1.47%)
525.x264_r (0.31%)
531.deepsjeng_r (0.27%)
541.leela_r (0.85%)
548.exchange2_r (-0.11%)
557.xz_r (-0.34%)
Geomean: (-0.23%)

503.bwaves_r (0.00%)
507.cactuBSSN_r (-1.88%)
508.namd_r (0.00%)
510.parest_r (-0.56%)
511.povray_r (0.49%)
519.lbm_r (-1.28%)
521.wrf_r (-0.28%)
526.blender_r (0.55%)
527.cam4_r (-0.20%)
538.imagick_r (2.52%)
544.nab_r (-0.18%)
549.fotonik3d_r (-0.51%)
554.roms_r (-0.22%)
Geomean: (0.00%)

2. On Skylake client:

500.perlbench_r (-0.29%)
502.gcc_r (-0.36%)
505.mcf_r (1.77%)
520.omnetpp_r (-0.26%)
523.xalancbmk_r (-3.69%)
525.x264_r (-0.32%)
531.deepsjeng_r (0.00%)
541.leela_r (-0.46%)
548.exchange2_r (0.00%)
557.xz_r (0.00%)
Geomean: (-0.34%)

503.bwaves_r (0.00%)
507.cactuBSSN_r (-0.56%)
508.namd_r (0.87%)
510.parest_r (0.00%)
511.povray_r (-0.73%)
519.lbm_r (0.84%)
521.wrf_r (0.00%)
526.blender_r (-0.81%)
527.cam4_r (-0.43%)
538.imagick_r (2.55%)
544.nab_r (0.28%)
549.fotonik3d_r (0.00%)
554.roms_r (0.32%)
Geomean: (0.12%)

3. On Skylake server:

500.perlbench_r (-0.55%)
502.gcc_r (0.69%)
505.mcf_r (0.00%)
520.omnetpp_r (-0.33%)
523.xalancbmk_r (-0.21%)
525.x264_r (-0.27%)
531.deepsjeng_r (0.00%)
541.leela_r (0.00%)
548.exchange2_r (-0.11%)
557.xz_r (0.00%)
Geomean: (0.00%)

503.bwaves_r (0.58%)
507.cactuBSSN_r (0.00%)
508.namd_r (0.00%)
510.parest_r (0.18%)
511.povray_r (-0.58%)
519.lbm_r (0.25%)
521.wrf_

[gomp5] Reject range-based for loops with ordered(n) clause

2018-10-19 Thread Jakub Jelinek
Hi!

The spec says:
"The loops associated with an ordered clause with a parameter may not include 
range-for
loops."
This patch implements this restriction.  Committed to gomp-5_0-branch.

2018-10-19  Jakub Jelinek  

* parser.c (cp_parser_omp_for_loop): Disallow ordered clause with
argument for range-for loops.

* g++.dg/gomp/doacross-1.C: New test.

--- gcc/cp/parser.c.jj  2018-10-18 10:17:14.098009735 +0200
+++ gcc/cp/parser.c 2018-10-19 10:36:05.631549494 +0200
@@ -36139,6 +36139,11 @@ cp_parser_omp_for_loop (cp_parser *parse
pre_body = this_pre_body;
}
 
+ if (ordered_cl)
+   error_at (OMP_CLAUSE_LOCATION (ordered_cl),
+ "% clause with parameter on "
+ "range-based % loop");
+
  goto parse_close_paren;
}
}
--- gcc/testsuite/g++.dg/gomp/doacross-1.C.jj   2018-10-19 10:32:30.132148779 
+0200
+++ gcc/testsuite/g++.dg/gomp/doacross-1.C  2018-10-19 10:37:44.930890996 
+0200
@@ -0,0 +1,21 @@
+// { dg-do compile { target c++11 } }
+// { dg-options "-fopenmp" }
+
+int a[42];
+
+void
+foo ()
+{
+  #pragma omp for ordered (1)  // { dg-error "'ordered' clause with parameter 
on range-based 'for' loop" }
+  for (auto x : a)
+;
+}
+
+void
+bar ()
+{
+  #pragma omp for ordered (2)  // { dg-error "'ordered' clause with parameter 
on range-based 'for' loop" }
+  for (int i = 0; i < 1; i++)
+for (auto x : a)
+  ;
+}


Jakub


[PATCH] S/390: Make "b" constraint match literal pool references

2018-10-19 Thread Ilya Leoshkevich
Improves the code generation by getting rid of redundant LAs, as seen
in the following example:

-   la  %r1,0(%r13)
-   lg  %r4,0(%r1)
+   lg  %r4,0(%r13)

Also allows to proceed with the merge of movdi_64 and movdi_larl.
Currently LRA decides to spill literal pool references back to the
literal pool, because it preliminarily chooses alternatives with
CT_MEMORY constraints without calling
satisfies_memory_constraint_p (). Later on it notices that the
constraint is wrong and fixes it by spilling.  The constraint in this
case is "b", and the operand is a literal pool reference.  There is
no reason to reject them.  The current behavior was introduced,
apparently unintentionally, by
https://gcc.gnu.org/ml/gcc-patches/2010-09/msg00812.html

The patch affects a little bit more than mentioned in the subject,
because it changes s390_loadrelative_operand_p (), which is called not
only for checking the "b" constraint.  However, the only caller for
which it should really not accept literal pool references is
s390_check_qrst_address (), so it was changed to explicitly do so.

gcc/ChangeLog:

2018-10-18  Ilya Leoshkevich  

* config/s390/s390.c (s390_loadrelative_operand_p): Accept
literal pool references.
(s390_check_qrst_address): Adapt to the new behavior of
s390_loadrelative_operand_p ().

gcc/testsuite/ChangeLog:

2018-10-18  Ilya Leoshkevich  

* gcc.target/s390/litpool-int.c: New test.
---
 gcc/config/s390/s390.c  |  9 +
 gcc/testsuite/gcc.target/s390/litpool-int.c | 12 
 2 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/litpool-int.c

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index ed307c3598b..0b7e44b62e1 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -3110,8 +3110,7 @@ s390_legitimate_address_without_index_p (rtx op)
Valid addresses are single references or a sum of a reference and a
constant integer. Return these parts in SYMREF and ADDEND.  You can
pass NULL in REF and/or ADDEND if you are not interested in these
-   values.  Literal pool references are *not* considered symbol
-   references.  */
+   values.  */
 
 static bool
 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
@@ -3130,7 +3129,7 @@ s390_loadrelative_operand_p (rtx addr, rtx *symref, 
HOST_WIDE_INT *addend)
   addr = XEXP (addr, 0);
 }
 
-  if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
+  if (GET_CODE (addr) == SYMBOL_REF
   || (GET_CODE (addr) == UNSPEC
  && (XINT (addr, 1) == UNSPEC_GOTENT
  || XINT (addr, 1) == UNSPEC_PLT)))
@@ -3153,6 +3152,7 @@ s390_loadrelative_operand_p (rtx addr, rtx *symref, 
HOST_WIDE_INT *addend)
 static int
 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
 {
+  rtx symref;
   struct s390_address addr;
   bool decomposed = false;
 
@@ -3161,7 +3161,8 @@ s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
 
   /* This check makes sure that no symbolic address (except literal
  pool references) are accepted by the R or T constraints.  */
-  if (s390_loadrelative_operand_p (op, NULL, NULL))
+  if (s390_loadrelative_operand_p (op, &symref, NULL)
+  && (!lit_pool_ok || !CONSTANT_POOL_ADDRESS_P (symref)))
 return 0;
 
   /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
diff --git a/gcc/testsuite/gcc.target/s390/litpool-int.c 
b/gcc/testsuite/gcc.target/s390/litpool-int.c
new file mode 100644
index 000..a6bf0c01f4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/litpool-int.c
@@ -0,0 +1,12 @@
+/* Test that we do not generate useless LAs.  */
+
+/* { dg-do compile } */
+/* { dg-options "-march=z10 -O1" } */
+
+int a;
+
+void b()
+{
+  a /= 100;
+  /* { dg-final { scan-assembler-not {(?n)\n\tla\t%r\d+,.+\(%r13\)\n} } } */
+}
-- 
2.19.0



Re: [PATCH] Add splay-tree "view" for bitmap

2018-10-19 Thread Steven Bosscher
On Fri, Oct 19, 2018 at 8:46 AM Richard Biener <> wrote:
> Yeah. I also noticed some 'obvious' shortcomings in the heuristics...
> I guess in the end well predicted branches in the out of line code are 
> important...

What also would help is to put bitmaps on their own obstack to improve
cache locality.

As for the patch, I never hacked it with "production code" in mind, it
was just a proof of concept. Not all of it is optimal or even safe
as-is. For example you probably should add
"gcc_checking_assert(!(BITMAP)->tree-form)" tests in the
bmp_iter_*_init functions. And perhaps semi-splaying trees work better
for the use cases of GCC (x.f. "Rehabilitation of an unloved child:
semi-splaying"). I implemented classic splay trees because I could not
find a semi-splay tree implementation in any of the usual text books
while classic splay tree implementations were given in all of those
books ;-)

Ciao!
Steven


Re: [RFC] GCC support for live-patching

2018-10-19 Thread Bernhard Reutner-Fischer
On 18 October 2018 19:34:52 CEST, Qing Zhao  wrote:

>A. an option to control GCC's IPA optimizations to provide a safe 
>compilation for live-patching purpose. At the same time, provides
>multiple-level control of patch code-size and run time performance 
>tradeoff. 
>
>-fease-live-patching={none|only-inline-static|inline|inline-clone}

s/-fease-live-patching/-flive-patching/g

please.
TIA


Re: [PATCH] Add splay-tree "view" for bitmap

2018-10-19 Thread Richard Sandiford
Richard Biener  writes:
> On October 18, 2018 11:05:32 PM GMT+02:00, Richard Sandiford
>  wrote:
>>Richard Biener  writes:
>>> On Thu, 18 Oct 2018, Richard Sandiford wrote:
>>>
 Richard Biener  writes:
 > PR63155 made me pick up this old work from Steven, it turns our
 > linked-list implementation to a two-mode one with one being a
 > splay tree featuring O(log N) complexity for find/remove.
 >
 > Over Stevens original patch I added a bitmap_tree_to_vec helper
 > that I use from the debug/print methods to avoid changing view
 > there.  In theory the bitmap iterator could get a "stack"
 > as well and we could at least support EXECUTE_IF_SET_IN_BITMAP.
 >
 > This can be used to fix the two biggest bottlenecks in the PRs
 > testcase, namely SSA propagator worklist handling and out-of-SSA
 > coalesce list building.  perf shows the following data, first
 > unpatched, second patched - also watch the thrid coulumn (samples)
 > when comparing percentages.
 >
 > -O0
 > -   18.19%17.35%   407  cc1  cc1   [.]
>>bitmap_set_b▒
 >- bitmap_set_bit   
>>▒
 >   + 8.77% create_coalesce_list_for_region 
>>▒
 >   + 4.21% calculate_live_ranges   
>>▒
 >   + 2.02% build_ssa_conflict_graph
>>▒
 >   + 1.66% insert_phi_nodes_for
>>▒
 >   + 0.86% coalesce_ssa_name  
 > patched:
 > -   12.39%10.48%   129  cc1  cc1   [.]
>>bitmap_set_b▒
 >- bitmap_set_bit   
>>▒
 >   + 5.27% calculate_live_ranges   
>>▒
 >   + 2.76% insert_phi_nodes_for
>>▒
 >   + 1.90% create_coalesce_list_for_region 
>>▒
 >   + 1.63% build_ssa_conflict_graph
>>▒
 >   + 0.35% coalesce_ssa_name   
 >
 > -O1
 > -   17.53%17.53%   842  cc1  cc1   [.]
>>bitmap_set_b▒
 >- bitmap_set_bit   
>>▒
 >   + 12.39% add_ssa_edge   
>>▒
 >   + 1.48% create_coalesce_list_for_region 
>>▒
 >   + 0.82% solve_constraints   
>>▒
 >   + 0.71% calculate_live_ranges   
>>▒
 >   + 0.64% add_implicit_graph_edge 
>>▒
 >   + 0.41% insert_phi_nodes_for
>>▒
 >   + 0.34% build_ssa_conflict_graph  
 > patched:
 > -5.79% 5.00%   167  cc1  cc1   [.]
>>bitmap_set_b▒
 >- bitmap_set_bit   
>>▒
 >   + 1.41% add_ssa_edge
>>▒
 >   + 0.88% calculate_live_ranges   
>>▒
 >   + 0.75% add_implicit_graph_edge 
>>▒
 >   + 0.68% solve_constraints   
>>▒
 >   + 0.48% insert_phi_nodes_for
>>▒
 >   + 0.45% build_ssa_conflict_graph   
 >
 > -O3
 > -   12.37%12.34%  1145  cc1  cc1   [.]
>>bitmap_set_b▒
 >- bitmap_set_bit   
>>▒
 >   + 9.14% add_ssa_edge
>>▒
 >   + 0.80% create_coalesce_list_for_region 
>>▒
 >   + 0.69% add_implicit_graph_edge 
>>▒
 >   + 0.54% solve_constraints   
>>▒
 >   + 0.34% calculate_live_ranges   
>>▒
 >   + 0.27% insert_phi_nodes_for
>>▒
 >   + 0.21% build_ssa_conflict_graph 
 > -4.36% 3.86%   227  cc1  cc1   [.]
>>bitmap_set_b▒
 >- bitmap_set_bit   
>>▒
 >   + 0.98% add_ssa_edge
>>▒
 >   + 0.86% add_implicit_graph_edge 
>>▒
 >   + 0.64% solve_constraints

Improve wording for error on too large non-local frames

2018-10-19 Thread Eric Botcazou
The compiler currently issues a warning/error mentioning a variable "frame", 
which is not very user-friendly.  This is changed to using the same wording as 
frame_offset_overflow, i.e. "total size of local objects".

Tested on x86-64/Linux, applied on the mainline as obvious.

Btw, in most cases, the compiler now issues both a warning and an error when a 
variable has too large a size.  What's the rationale for this oddity?  IMO it 
needs to make a choice here, it's either a warning or an error.


2018-10-19  Eric Botcazou  

* cfgexpand.c (expand_one_var): Use specific wording in error message
for non-local frame variables.
* stor-layout.c (layout_decl): Do not issue a warning for them.


2018-10-19  Eric Botcazou  

* gnat.dg/frame_overflow2.adb: New test.

-- 
Eric BotcazouIndex: cfgexpand.c
===
--- cfgexpand.c	(revision 265259)
+++ cfgexpand.c	(working copy)
@@ -1674,7 +1674,12 @@ expand_one_var (tree var, bool toplevel,
   /* Reject variables which cover more than half of the address-space.  */
   if (really_expand)
 	{
-	  error ("size of variable %q+D is too large", var);
+	  if (DECL_NONLOCAL_FRAME (var))
+	error_at (DECL_SOURCE_LOCATION (current_function_decl),
+		  "total size of local objects is too large");
+	  else
+	error_at (DECL_SOURCE_LOCATION (var),
+		  "size of variable %q+D is too large", var);
 	  expand_one_error_var (var);
 	}
 }
Index: stor-layout.c
===
--- stor-layout.c	(revision 265259)
+++ stor-layout.c	(working copy)
@@ -755,8 +755,8 @@ layout_decl (tree decl, unsigned int kno
 DECL_SIZE_UNIT (decl) = variable_size (DECL_SIZE_UNIT (decl));
 
   /* If requested, warn about definitions of large data objects.  */
-  if ((code == VAR_DECL || code == PARM_DECL)
-  && ! DECL_EXTERNAL (decl))
+  if ((code == PARM_DECL || (code == VAR_DECL && !DECL_NONLOCAL_FRAME (decl)))
+  && !DECL_EXTERNAL (decl))
 {
   tree size = DECL_SIZE_UNIT (decl);
 
-- { dg-do compile }

with System;

procedure Frame_Overflow2 is -- { dg-error "too large" }

  type Index_T is range 1 .. 2**(System.Word_Size - 1) - 1;

  type SetArray is array (Index_T) of Boolean;

  type Set is record
Store: SetArray := (Others => False);
  end record;

  Phi: constant Set := (Store => (Others => False));

  function F return Set is
  begin
return Phi;
  end;

begin
  null;
end;


Re: [PATCH] i386: Enable AVX512 memory broadcast for FP add

2018-10-19 Thread Uros Bizjak
On Thu, Oct 18, 2018 at 11:44 PM H.J. Lu  wrote:
>
> Many AVX512 vector operations can broadcast from a scalar memory source.
> This patch enables memory broadcast for FP add operations.
>
> gcc/
>
> PR target/72782
> * config/i386/sse.md
> (*3_bcst_1): New.
> (*add3_bcst_2): Likewise.
>
> gcc/testsuite/
>
> PR target/72782
> * gcc.target/i386/avx512-binop-1.h: New file.
> * gcc.target/i386/avx512-binop-2.h: Likewise.
> * gcc.target/i386/avx512-binop-3.h: Likewise.
> * gcc.target/i386/avx512-binop-4.h: Likewise.
> * gcc.target/i386/avx512-binop-5.h: Likewise.
> * gcc.target/i386/avx512-binop-6.h: Likewise.
> * gcc.target/i386/avx512f-add-df-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-5.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-6.c: Likewise.
> * gcc.target/i386/avx512f-sub-df-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-sub-sf-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-sub-sf-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-sub-sf-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-sub-sf-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-sub-sf-zmm-5.c: Likewise.
> * gcc.target/i386/avx512vl-add-sf-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-add-sf-ymm-1.c: Likewise.
> * gcc.target/i386/avx512vl-sub-sf-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-sub-sf-ymm-1.c: Likewise.

Please use "register_operand" when only registers are involved. Please
change "nonimmediate_operand" to "register_operand" also in your
previous FMA patch.

OK with that change.

Thanks,
Uros.

>  gcc/config/i386/sse.md| 28 +++
>  .../gcc.target/i386/avx512-binop-1.h  | 12 
>  .../gcc.target/i386/avx512-binop-2.h  | 12 
>  .../gcc.target/i386/avx512-binop-3.h  | 15 ++
>  .../gcc.target/i386/avx512-binop-4.h  | 12 
>  .../gcc.target/i386/avx512-binop-5.h  | 14 ++
>  .../gcc.target/i386/avx512-binop-6.h  | 14 ++
>  .../gcc.target/i386/avx512f-add-df-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-2.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-3.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-4.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-5.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-6.c| 12 
>  .../gcc.target/i386/avx512f-sub-df-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-sub-sf-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-sub-sf-zmm-2.c| 12 
>  .../gcc.target/i386/avx512f-sub-sf-zmm-3.c| 12 
>  .../gcc.target/i386/avx512f-sub-sf-zmm-4.c| 12 
>  .../gcc.target/i386/avx512f-sub-sf-zmm-5.c| 12 
>  .../gcc.target/i386/avx512vl-add-sf-xmm-1.c   | 12 
>  .../gcc.target/i386/avx512vl-add-sf-ymm-1.c   | 12 
>  .../gcc.target/i386/avx512vl-sub-sf-xmm-1.c   | 12 
>  .../gcc.target/i386/avx512vl-sub-sf-ymm-1.c   | 12 
>  24 files changed, 311 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-1.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-2.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-3.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-4.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-5.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-6.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-df-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-df-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-sf-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-sf-zmm-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-sf-zmm-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-sf-zmm-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-sf-zmm-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-add-sf-xmm-1.c
>  create m

Re: [PATCH] Reset insn priority after inc/ref replacement in haifa sched

2018-10-19 Thread Robin Dapp
> Still OK :-)

Committed as r265304.

Regards
 Robin



Re: [PATCH] i386: Enable AVX512 memory broadcast for FP add

2018-10-19 Thread Uros Bizjak
On Thu, Oct 18, 2018 at 11:44 PM H.J. Lu  wrote:
>
> Many AVX512 vector operations can broadcast from a scalar memory source.
> This patch enables memory broadcast for FP add operations.
>
> gcc/
>
> PR target/72782
> * config/i386/sse.md
> (*3_bcst_1): New.
> (*add3_bcst_2): Likewise.
>
> gcc/testsuite/
>
> PR target/72782
> * gcc.target/i386/avx512-binop-1.h: New file.
> * gcc.target/i386/avx512-binop-2.h: Likewise.
> * gcc.target/i386/avx512-binop-3.h: Likewise.
> * gcc.target/i386/avx512-binop-4.h: Likewise.
> * gcc.target/i386/avx512-binop-5.h: Likewise.
> * gcc.target/i386/avx512-binop-6.h: Likewise.
> * gcc.target/i386/avx512f-add-df-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-5.c: Likewise.
> * gcc.target/i386/avx512f-add-sf-zmm-6.c: Likewise.
> * gcc.target/i386/avx512f-sub-df-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-sub-sf-zmm-1.c: Likewise.
> * gcc.target/i386/avx512f-sub-sf-zmm-2.c: Likewise.
> * gcc.target/i386/avx512f-sub-sf-zmm-3.c: Likewise.
> * gcc.target/i386/avx512f-sub-sf-zmm-4.c: Likewise.
> * gcc.target/i386/avx512f-sub-sf-zmm-5.c: Likewise.
> * gcc.target/i386/avx512vl-add-sf-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-add-sf-ymm-1.c: Likewise.
> * gcc.target/i386/avx512vl-sub-sf-xmm-1.c: Likewise.
> * gcc.target/i386/avx512vl-sub-sf-ymm-1.c: Likewise.

LGTM.

Thanks,
Uros.

> ---
>  gcc/config/i386/sse.md| 28 +++
>  .../gcc.target/i386/avx512-binop-1.h  | 12 
>  .../gcc.target/i386/avx512-binop-2.h  | 12 
>  .../gcc.target/i386/avx512-binop-3.h  | 15 ++
>  .../gcc.target/i386/avx512-binop-4.h  | 12 
>  .../gcc.target/i386/avx512-binop-5.h  | 14 ++
>  .../gcc.target/i386/avx512-binop-6.h  | 14 ++
>  .../gcc.target/i386/avx512f-add-df-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-2.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-3.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-4.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-5.c| 12 
>  .../gcc.target/i386/avx512f-add-sf-zmm-6.c| 12 
>  .../gcc.target/i386/avx512f-sub-df-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-sub-sf-zmm-1.c| 12 
>  .../gcc.target/i386/avx512f-sub-sf-zmm-2.c| 12 
>  .../gcc.target/i386/avx512f-sub-sf-zmm-3.c| 12 
>  .../gcc.target/i386/avx512f-sub-sf-zmm-4.c| 12 
>  .../gcc.target/i386/avx512f-sub-sf-zmm-5.c| 12 
>  .../gcc.target/i386/avx512vl-add-sf-xmm-1.c   | 12 
>  .../gcc.target/i386/avx512vl-add-sf-ymm-1.c   | 12 
>  .../gcc.target/i386/avx512vl-sub-sf-xmm-1.c   | 12 
>  .../gcc.target/i386/avx512vl-sub-sf-ymm-1.c   | 12 
>  24 files changed, 311 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-1.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-2.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-3.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-4.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-5.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-6.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-df-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-df-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-sf-zmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-sf-zmm-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-sf-zmm-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-sf-zmm-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-sub-sf-zmm-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-add-sf-xmm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-add-sf-ymm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-sub-sf-xmm-1.c
>  create mode 100644 gcc