[PATCH] Fix insn does not satisfy its constraints: sse2_lshrv1ti3

2022-06-06 Thread liuhongt via Gcc-patches
21114(define_insn_and_split "ssse3_palignrdi"
21115  [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
21116(unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
21117(match_operand:DI 2 "register_mmxmem_operand" 
"ym,x,Yv")
21118(match_operand:SI 3 "const_0_to_255_mul_8_operand")]
21119   UNSPEC_PALIGNR))]
21120  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"

Alternative 2 requires Yw instead of Yv since it's splitted to vpsrldq
which requires AVX512VL & AVX512BW for evex version.

Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

PR target/105854
* config/i386/sse.md (ssse3_palignrdi): Change alternative 2
from Yv to Yw.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr105854.c: New test.
---
 gcc/config/i386/sse.md   |  6 ++---
 gcc/testsuite/gcc.target/i386/pr105854.c | 32 
 2 files changed, 35 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr105854.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 62688f8e29d..200308445db 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -21123,9 +21123,9 @@ (define_insn "_palignr"
(set_attr "mode" "")])
 
 (define_insn_and_split "ssse3_palignrdi"
-  [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
-   (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
-   (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
+  [(set (match_operand:DI 0 "register_operand" "=y,x,Yw")
+   (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yw")
+   (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yw")
(match_operand:SI 3 "const_0_to_255_mul_8_operand")]
   UNSPEC_PALIGNR))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
diff --git a/gcc/testsuite/gcc.target/i386/pr105854.c 
b/gcc/testsuite/gcc.target/i386/pr105854.c
new file mode 100644
index 000..28abef67915
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr105854.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fcaller-saves -mavx512vl -mno-avx512bw" } */
+
+typedef int __attribute__((__vector_size__ (8))) T;
+typedef signed char __attribute__((__vector_size__ (64))) U;
+typedef int __attribute__((__vector_size__ (16))) V;
+typedef long long __attribute__((__vector_size__ (8))) W;
+typedef int __attribute__((__vector_size__ (64))) X;
+typedef _Decimal128 __attribute__((__vector_size__ (64))) D;
+
+D d;
+T t;
+U u;
+V v;
+W w;
+
+void
+foo (void)
+{
+  T t0 = t;
+  T t1 = (T) __builtin_ia32_palignr (w, (W) { }, 0);
+  U u1 = __builtin_shufflevector (u, u, 7, 6, 2, 3, 6, 4, 5, 2, 3, 8, 3, 2, 0,
+ 4, 0, 6, 2, 2, 5, 3, 1, 0, 7, 5, 3, 3, 7, 6,
+ 2, 0, 4, 5, 4, 1, 7, 7, 0, 6, 1, 9, 3, 0, 3,
+ 5, 5, 0, 0, 2, 1, 5, 4, 8, 7,
+ 2, 1, 1, 6, 4, 9, 9, 1, 5, 0, 2);
+  V v1 = v;
+  d += 0.;
+  U u0 = u + u + u1 + (U) d;
+  V v0 = ((X)u0)[0] + v + v;
+  t = (T) (long) (__int128) v0 + t + t + t1;
+}
-- 
2.18.1



Re: [PATCH] Update document for VECTOR_MODES_WITH_PREFIX

2022-06-06 Thread Kewen.Lin via Gcc-patches
on 2022/6/6 18:01, Richard Sandiford wrote:
> "Kewen.Lin"  writes:
>> Hi,
>>
>> r10-3912 updated the format of VECTOR_MODES_WITH_PREFIX by
>> adding one more parameter ORDER, the related document is out
>> of date.  So update the document for ORDER.
>>
>> Is it ok for trunk?
>>
>> BR,
>> Kewen
>> -
>>
>> gcc/ChangeLog:
>>
>>  * machmode.def (VECTOR_MODES_WITH_PREFIX): Update document for
>>  parameter ORDER.
> 
> OK, thanks.

Thanks Richard!  Pushed as r13-997.

BR,
Kewen


[PATCH] RISC-V: Compute default ABI from -mcpu or -march

2022-06-06 Thread wangpc via Gcc-patches
If -mcpu or -march is specified and there is no -mabi, we will calculate
default ABI from arch string provided by -march or defined in CPU info.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (compute_default_abi): 
Implementation
to calculate -mabi from arch string.
(riscv_expand_abi_from_arch): New spec function to calcalute -mabi from 
arch
string provided by -march option.
(riscv_expand_abi_from_cpu): New spec function to find CPU info and 
calculate
-mabi from arch string defined in CPU info.
* config/riscv/riscv.h (EXTRA_SPEC_FUNCTIONS): Add above spec functions.
(OPTION_DEFAULT_SPECS): Use new spec functions to calculate -mabi and 
-march
has higher priority than -mcpu.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/mabi-1.c: ilp32e test.
* gcc.target/riscv/mabi-2.c: ilp32 test.
* gcc.target/riscv/mabi-3.c: ilp32f test.
* gcc.target/riscv/mabi-4.c: ilp32d test.
* gcc.target/riscv/mabi-5.c: lp64 test.
* gcc.target/riscv/mabi-6.c: lp64f test.
* gcc.target/riscv/mabi-7.c: lp64d test.
* gcc.target/riscv/mabi-8.c: -march override -mcpu.
---
 gcc/common/config/riscv/riscv-common.cc | 66 +
 gcc/config/riscv/riscv.h| 15 --
 gcc/testsuite/gcc.target/riscv/mabi-1.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-2.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-3.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-4.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-5.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-6.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-7.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-8.c |  7 +++
 10 files changed, 134 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-8.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 0e5be2ce105..f8e40549d18 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1266,6 +1266,72 @@ riscv_default_mtune (int argc, const char **argv)
 return default_mtune;
 }
 
+/* Compute default -mabi option from arch string.  */
+
+static const char *
+compute_default_abi (const char* arch_str)
+{
+  location_t loc = UNKNOWN_LOCATION;
+
+  riscv_parse_arch_string (arch_str, NULL, loc);
+
+  if (current_subset_list->xlen () == 64)
+{
+  if (current_subset_list->lookup ("d", RISCV_DONT_CARE_VERSION,
+RISCV_DONT_CARE_VERSION))
+   return "lp64d";
+  if (current_subset_list->lookup ("f", RISCV_DONT_CARE_VERSION,
+RISCV_DONT_CARE_VERSION))
+   return "lp64f";
+  return "lp64";
+}
+  else
+{
+  if (current_subset_list->lookup ("e", RISCV_DONT_CARE_VERSION,
+RISCV_DONT_CARE_VERSION))
+   return "ilp32e";
+  if (current_subset_list->lookup ("d", RISCV_DONT_CARE_VERSION,
+RISCV_DONT_CARE_VERSION))
+   return "ilp32d";
+  if (current_subset_list->lookup ("f", RISCV_DONT_CARE_VERSION,
+RISCV_DONT_CARE_VERSION))
+   return "ilp32f";
+  return "ilp32";
+}
+}
+
+/* Expand default -mabi option from -march option.  */
+
+const char *
+riscv_expand_abi_from_arch (int argc, const char **argv)
+{
+  gcc_assert (argc == 1);
+  return xasprintf ("-mabi=%s", compute_default_abi (argv[0]));
+}
+
+/* Expand default -mabi option from -mcpu option.  */
+
+const char *
+riscv_expand_abi_from_cpu (int argc, const char **argv)
+{
+  gcc_assert (argc > 0 && argc <= 2);
+  const char *default_abi_str = NULL;
+  if (argc >= 2)
+default_abi_str = argv[1];
+
+  const riscv_cpu_info *cpu = riscv_find_cpu (argv[0]);
+
+  if (cpu == NULL)
+{
+  if (default_abi_str == NULL)
+   return "";
+  else
+   return xasprintf ("-mabi=%s", default_abi_str);
+}
+  else
+return xasprintf ("-mabi=%s", compute_default_abi (cpu->arch));
+}
+
 /* Expand arch string with implied extensions from -mcpu option.  */
 
 const char *
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 6f7f4d3fbdc..14cd695f37f 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -50,15 +50,20 @@ along with GCC; see the file COPYING3.  If not see
 extern const char *riscv_expand_arch (int argc, const char **argv);
 extern const char *riscv_expand_arch_from_cpu (int argc, const char 

Re: [PATCH,RS6000 2/5] Rework the RS6000_BTM defines.

2022-06-06 Thread Kewen.Lin via Gcc-patches
Hi Will,

The whole series looks good to me, thanks!  IMHO one place can be
further refactored, not sure if it's worth to updating together in
this series, it's ...

on 2022/6/7 06:05, will schmidt wrote:
> [PATCH,RS6000 2/5) Rework the RS6000_BTM defines.
> 
> The RS6000_BTM_ definitions are mostly unused after the rs6000
> builtin code was reworked.  The remaining references can be replaced
> with the OPTION_MASK_ and MASK_ equivalents.
> 
> This patch remvoes the defines:
> RS6000_BTM_FRES, RS6000_BTM_FRSQRTE, RS6000_BTM_FRSQRTES,
> RS6000_BTM_POPCNTD, RS6000_BTM_CELL, RS6000_BTM_DFP,
> RS6000_BTM_HARD_FLOAT, RS6000_BTM_LDBL128, RS6000_BTM_64BIT,
> RS6000_BTM_POWERPC64, RS6000_BTM_FLOAT128, RS6000_BTM_FLOAT128_HW
> RS6000_BTM_MMA, RS6000_BTM_P10.
> 
> I note that the BTM -> OPTION_MASK mappings are not always 1-to-1.
> in particular the BTM_FRES and BTM_FRSQRTE values were both mapped to
> OPTION_MASK_PPC_GFXOPT, while the BTM_FRE and BTM_FRSQRTES both mapped
> to OPTION_MASK_POPCNTB.  In total I spent quite a bit of time
> double-checking these since it looked like copy/paste errors.  I split
> some of these changes out into a subsequent patch to limit the amount
> of potential confusion in any particular patch.
> 
> gcc/
>   * config/rs6000/rs6000-c.cc: Update comments.
>   * config/rs6000/rs6000.cc (RS6000_BTM_FRES, RS6000_BTM_FRSQRTE,
>   RS6000_BTM_FRSQRTES, RS6000_BTM_POPCNTD, RS6000_BTM_CELL,
>   RS6000_BTM_64BIT, RS6000_BTM_POWERPC64, RS6000_BTM_DFP,
>   RS6000_BTM_HARD_FLOAT,RS6000_BTM_LDBL128, RS6000_BTM_FLOAT128,
>   RS6000_BTM_FLOAT128_HW, RS6000_BTM_MMA, RS6000_BTM_P10): Replace
>   with OPTION_MASK_PPC_GFXOPT, OPTION_MASK_PPC_GFXOPT,
>   OPTION_MASK_POPCNTB, OPTION_MASK_POPCNTD,
>   OPTION_MASK_FPRND, MASK_64BIT, MASK_POWERPC64,
>   OPTION_MASK_DFP, OPTION_MASK_SOFT_FLOAT, OPTION_MASK_MULTIPLE,
>   OPTION_MASK_FLOAT128_KEYWORD, OPTION_MASK_FLOAT128_HW,
>   OPTION_MASK_MMA, OPTION_MASK_POWER10.
>   * config/rs6000/rs6000.h (RS6000_BTM_FRES, RS6000_BTM_FRSQRTE,
>   RS6000_BTM_FRSQRTES, RS6000_BTM_POPCNTD, RS6000_BTM_CELL,
>   RS6000_BTM_DFP, RS6000_BTM_HARD_FLOAT, RS6000_BTM_LDBL128,
>   RS6000_BTM_64BIT, RS6000_BTM_POWERPC64, RS6000_BTM_FLOAT128,
>   RS6000_BTM_FLOAT128_HW, RS6000_BTM_MMA, RS6000_BTM_P10): Delete.
> 
> diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
> index 9c8cbd7a66e4..4c99afc761ae 100644
> --- a/gcc/config/rs6000/rs6000-c.cc
> +++ b/gcc/config/rs6000/rs6000-c.cc
> @@ -594,13 +594,13 @@ rs6000_target_modify_macros (bool define_p, 
> HOST_WIDE_INT flags,
>   via the target attribute/pragma.  */
>if ((flags & OPTION_MASK_FLOAT128_HW) != 0)
>  rs6000_define_or_undefine_macro (define_p, "__FLOAT128_HARDWARE__");
>  
>/* options from the builtin masks.  */
> -  /* Note that RS6000_BTM_CELL is enabled only if (rs6000_cpu ==
> - PROCESSOR_CELL) (e.g. -mcpu=cell).  */
> -  if ((bu_mask & RS6000_BTM_CELL) != 0)
> +  /* Note that OPTION_MASK_FPRND is enabled only if
> + (rs6000_cpu == PROCESSOR_CELL) (e.g. -mcpu=cell).  */
> +  if ((bu_mask & OPTION_MASK_FPRND) != 0)
>  rs6000_define_or_undefine_macro (define_p, "__PPU__");
>  

... here.  In function rs6000_target_modify_macros, bu_mask is used by
two places, the beginning debug outputting and the above OPTION_MASK_FPRND
check.  I wonder if we can get rid of bu_mask and just use sth. like:

(rs6000_cpu == PROCESSOR_CELL) && (flags & OPTION_MASK_FPRND)

// the others are using "flags &", it's passed by rs6000_isa_flags,
// should be the same as just using OPTION_MASK_FPRND.

If we drop bu_mask in function rs6000_target_modify_macros, function
rs6000_builtin_mask_calculate will have only one use place in function
rs6000_option_override_internal.  IMHO this function
rs6000_builtin_mask_calculate also becomes stale after built-in function
rewriting and needs some updates with new bif framework later.

BR,
Kewen


Re: [PATCH v3, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-06-06 Thread HAO CHEN GUI via Gcc-patches
Hi,

On 2/6/2022 上午 5:01, Segher Boessenkool wrote:
> Hi!
> 
> Some more nitpicking...
> 
> On Wed, May 18, 2022 at 04:52:26PM +0800, HAO CHEN GUI wrote:
>>const double __builtin_vsx_xsmaxdp (double, double);
>> -XSMAXDP smaxdf3 {}
>> +XSMAXDP fmaxdf3 {}
>>
>>const double __builtin_vsx_xsmindp (double, double);
>> -XSMINDP smindf3 {}
>> +XSMINDP fmindf3 {}
> 
> Are s{min,max}df3 still used after this?

Expands reduc_s[min|max]_scal are still using s[min|max]df3.

OPTAB_D (reduc_smax_scal_optab, "reduc_smax_scal_$a")
OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")

Also we could implement reduc_f[min|max]_scal after committing
this patch.

Thanks.
Gui Haochen

> 
>> +   UNSPEC_FMAX
>> +   UNSPEC_FMIN
> 
> Pity we have to do this as an unspec still, this should be handled by
> some generic code, with some new operator (fmin/fmax would be obvious
> names :-) )
> 
>> +(define_insn "f3"
>> +  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
>> +(unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
>> +  (match_operand:SFDF 2 "vsx_register_operand" "wa")]
>> +  FMINMAX))]
>> +"TARGET_VSX"
>> +"xsdp %x0,%x1,%x2"
>> +[(set_attr "type" "fp")]
>> +)
> 
> Indentation is broken here, correct is
> 
> (define_insn "f3"
>   [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
>   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
> (match_operand:SFDF 2 "vsx_register_operand" "wa")]
>FMINMAX))]
>   "TARGET_VSX"
>   "xsdp %x0,%x1,%x2"
>   [(set_attr "type" "fp")])
> 
> (FMINMAX has the same indent as the preceding [, its sibling;
> "TARGET_VSX" and the next two lines are indented like the same thing
> before it at the same level (the "[(set"); the finishing ) does never
> start a new line).
> 
> 
> Segher


[PATCH 3/3] Adjust MMA tests to account for no store vector pair.

2022-06-06 Thread Michael Meissner via Gcc-patches
[PATCH 3/3] Adjust MMA tests to account for no store vector pair.

In changing the default for generating the store vector pair instructions,
I had to adjust several of the MMA tests to remove checking for these
instructions.  Mostly I just deleted the scan-assembler lines checking for
stxvp.  In two of the tests, I added the -mstore-vector-pair option since
the point of the test was to check for specific cases with store vector
pair instructions.

I have built bootstrap compilers and run the regression tests on three
different systems:

1)  Little endian power10 using the --with-cpu=power10 option.

2)  Little endian power9 using the --with-cpu=power9 option.

3)  Big endian power8 using the --with-cpu=power8 option.  On this system,
both 64-bit and 32-bit code generation was tested.

There were no regressions in the runs.  Can I check this patch into the
trunk?  If there are no changes needed for the backports, can I check this
code into the active branches after a burn-in period?

2022-06-06   Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/mma-builtin-1.c: Eliminate checking for store
vector pair instructions.
* gcc.target/powerpc/mma-builtin-10-pair.c: Likewise.
* gcc.target/powerpc/mma-builtin-10-quit.c: Likewise.
* gcc.target/powerpc/mma-builtin-2.c: Likewise.
* gcc.target/powerpc/mma-builtin-3.c: Likewise.
* gcc.target/powerpc/mma-builtin-4.c: Likewise.
* gcc.target/powerpc/mma-builtin-5.c: Likewise.
* gcc.target/powerpc/mma-builtin-6.c: Likewise.
* gcc.target/powerpc/mma-builtin-7.c: Likewise.
* gcc.target/powerpc/mma-builtin-9.c: Likewise.
* gcc.target/powerpc/mma-builtin-8.c: Add -mstore-vector-pair.
* gcc.target/powerpc/pr102976.c: Likewise.
---
 gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c   | 1 -
 gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c | 2 --
 gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c | 2 --
 gcc/testsuite/gcc.target/powerpc/mma-builtin-2.c   | 1 -
 gcc/testsuite/gcc.target/powerpc/mma-builtin-3.c   | 1 -
 gcc/testsuite/gcc.target/powerpc/mma-builtin-4.c   | 2 --
 gcc/testsuite/gcc.target/powerpc/mma-builtin-5.c   | 2 --
 gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c   | 1 -
 gcc/testsuite/gcc.target/powerpc/mma-builtin-7.c   | 2 --
 gcc/testsuite/gcc.target/powerpc/mma-builtin-8.c   | 2 +-
 gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c   | 2 --
 gcc/testsuite/gcc.target/powerpc/pr102976.c| 6 +-
 12 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c 
b/gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c
index 69ee826e1be..47b45b00403 100644
--- a/gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c
@@ -260,7 +260,6 @@ foo13b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
 
 /* { dg-final { scan-assembler-times {\mlxv\M} 40 } } */
 /* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
-/* { dg-final { scan-assembler-times {\mstxvp\M} 40 } } */
 /* { dg-final { scan-assembler-times {\mxxmfacc\M} 20 } } */
 /* { dg-final { scan-assembler-times {\mxxmtacc\M} 6 } } */
 /* { dg-final { scan-assembler-times {\mxvbf16ger2\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c 
b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c
index d8748d8e7d0..9522673d83e 100644
--- a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c
@@ -16,6 +16,4 @@ foo (__vector_pair *dst, vec_t *src)
 }
 
 /* { dg-final { scan-assembler-not {\mlxv\M} } } */
-/* { dg-final { scan-assembler-not {\mstxv\M} } } */
 /* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c 
b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c
index 02342c76f5f..3cbdffc15ba 100644
--- a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c
@@ -16,8 +16,6 @@ foo (__vector_quad *dst, vec_t *src)
 }
 
 /* { dg-final { scan-assembler-not {\mlxv\M} } } */
-/* { dg-final { scan-assembler-not {\mstxv\M} } } */
 /* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
 /* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
-/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-2.c 
b/gcc/testsuite/gcc.target/powerpc/mma-builtin-2.c
index 0230d727657..5943702d8f3 100644
--- a/gcc/testsuite/gcc.target/powerpc/mma-builtin-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-2.c
@@ -59,7 +59,6 @@ foo3 (__vector_quad *dst, __vector_quad *src, vec_t *vec, 
__vector_pair *pvecp)
 /* { dg-final { scan-assembler-times 

[PATCH 2/3] Disable generating load/store vector pairs for block copies.

2022-06-06 Thread Michael Meissner via Gcc-patches
[PATCH 2/3] Disable generating load/store vector pairs for block copies.

If the store vector pair instruction is disabled, do not generate block
copies that use load and store vector pair instructions.

I have built bootstrap compilers and run the regression tests on three
different systems:

1)  Little endian power10 using the --with-cpu=power10 option.

2)  Little endian power9 using the --with-cpu=power9 option.

3)  Big endian power8 using the --with-cpu=power8 option.  On this system,
both 64-bit and 32-bit code generation was tested.

There were no regressions in the runs.  Can I check this patch into the
trunk?  If there are no changes needed for the backports, can I check this
code into the active branches after a burn-in period?

2022-06-06   Michael Meissner  

gcc/

* config/rs6000/rs6000-string.cc (expand_block_move): If the store
vector pair instructions are disabled, do not generate block
copies using load and store vector pairs.
---
 gcc/config/rs6000/rs6000-string.cc | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 59d901ac68d..1b18e043269 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2787,14 +2787,16 @@ expand_block_move (rtx operands[], bool might_overlap)
   rtx src, dest;
   bool move_with_length = false;
 
-  /* Use OOmode for paired vsx load/store.  Use V2DI for single
-unaligned vsx load/store, for consistency with what other
-expansions (compare) already do, and so we can use lxvd2x on
-p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
-with length < 16 (if allowed), then gpr load/store.  */
+  /* Use OOmode for paired vsx load/store unless the store vector pair
+instructions are disabled.  Use V2DI for single unaligned vsx
+load/store, for consistency with what other expansions (compare)
+already do, and so we can use lxvd2x on p8.  Order is VSX pair
+unaligned, VSX unaligned, Altivec, VSX with length < 16 (if allowed),
+then gpr load/store.  */
 
   if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX
  && TARGET_BLOCK_OPS_VECTOR_PAIR
+ && TARGET_STORE_VECTOR_PAIR
  && bytes >= 32
  && (align >= 256 || !STRICT_ALIGNMENT))
{
-- 
2.35.3


-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


[PATCH 1/3] Disable generating store vector pair.

2022-06-06 Thread Michael Meissner via Gcc-patches
[PATCH 1/3] Disable generating store vector pair.

Testing has revealed that the power10 has some slowdowns if the store
vector pair instruction is generated in some cases.  This patch disables
generating the store vector pair instructions (stxvp, pstxvp, and stxvpx)
unless an undocumented switch is used.  It is anticipated that perhaps
with future machines we can generate the store vector pair instruction.

This patch does a split after reload to convert a store vector pair
instruction into a pair of store vector instructions.

We do continue to generate the load vector pair instructions (lxvp, plxvp,
and lxvpx), since we have found that in code that heavily uses MMA, it is
still a win to generate the load vector pair instructions.

There are two future patches planed:

1)  Disable block moves from generating load/store vector pair
instructions unless the the store vector pair instructions are
being generted.

2)  Make the built-in functions for generating store vector pair
always generate those instructions even if store vector pair
instructions are disabled.

I have built bootstrap compilers and run the regression tests on three
different systems:

1)  Little endian power10 using the --with-cpu=power10 option.

2)  Little endian power9 using the --with-cpu=power9 option.

3)  Big endian power8 using the --with-cpu=power8 option.  On this system,
both 64-bit and 32-bit code generation was tested.

There were no regressions in the runs except for the tests that are
modified in patch #3 in these series of patches.  Can I check this patch
into the trunk?  If there are no changes needed for the backports, can I
check this code into the active branches after a burn-in period?

2022-06-06   Michael Meissner  

gcc/

* config/rs6000/mma.md (movoo): Disable generating store vector
pair instructions unless these are enabled by the user.
(movxo): Likewise.
* config/rs6000/rs6000.cc (rs6000_setup_reg_addr_masks): If store
vector pair instructions are disabled, do not allow vector pair
addresses to be indexed.
(rs6000_split_multireg_move): Do not split XOmode stores into two
store vector pair instructions unless store vector pair
instructions are enabled.
* config/rs6000/rs6000.md (isa attribute): Add stxvp attribute.
(enabled attribute): Disable alternative using store vector pair
instructions unless they are enabled.
* config/rs6000/rs6000.opt (-mstore-vector-pair): New option.

gcc/testsuite/

* gcc.target/powerpc/p10-store-vector-pair-1.c: New test.
* gcc.target/powerpc/p10-store-vector-pair-2.c: New test.
---
 gcc/config/rs6000/mma.md  | 41 ++
 gcc/config/rs6000/rs6000.cc   |  9 +-
 gcc/config/rs6000/rs6000.md   |  8 +-
 gcc/config/rs6000/rs6000.opt  |  4 +
 .../powerpc/p10-store-vector-pair-1.c | 82 +++
 .../powerpc/p10-store-vector-pair-2.c | 81 ++
 6 files changed, 206 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a183b6a168a..9b5f243b88d 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -274,26 +274,35 @@ (define_expand "movoo"
   DONE;
 })
 
+;; By default for power10, do not generate the stxvp/pstxvp/stxvpx
+;; instructions.  Instead, split these instructions into two separate store
+;; vector instructions.  We do always generate a lxvp/plxvp/lxvpx instruction.
+;; We leave in the support for generating stxvp/pstxvp/stxvpx in future
+;; machines.
 (define_insn_and_split "*movoo"
-  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
-   (match_operand:OO 1 "input_operand" "m,wa,wa"))]
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m, o, wa")
+(match_operand:OO 1 "input_operand" "m, wa,wa,wa"))]
   "TARGET_MMA
&& (gpc_reg_operand (operands[0], OOmode)
|| gpc_reg_operand (operands[1], OOmode))"
   "@
lxvp%X1 %x0,%1
stxvp%X0 %x1,%0
+   #
#"
   "&& reload_completed
-   && (!MEM_P (operands[0]) && !MEM_P (operands[1]))"
+   && ((MEM_P (operands[0]) && !TARGET_STORE_VECTOR_PAIR)
+   || (!MEM_P (operands[0]) && !MEM_P (operands[1])))"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
   DONE;
 }
-  [(set_attr "type" "vecload,vecstore,veclogical")
-   (set_attr "size" "256")
-   (set_attr "length" "*,*,8")])
+  [(set_attr "type"   "vecload,vecstore,vecstore,veclogical")
+   (set_attr "max_prefixed_insns" "*,  *,   2,   *")
+   (set_attr "length" "*,  *,   *,   8")
+   (set_attr "isa""*,  stxvp,   *,   *")])
+   

[PATCH, 0/3] Disable generating store vector pair.

2022-06-06 Thread Michael Meissner via Gcc-patches
[PATCH 0/3] Disable generating store vector pair.

Testing has revealed that the power10 has some slowdowns if the store vector
pair instruction is generated in some cases.  This patch disables generating
the store vector pair instructions (stxvp, pstxvp, and stxvpx) unless an
undocumented switch (-mstore-vector-pair) is used.  It is anticipated that
perhaps with future machines we can generate the store vector pair instruction.

This patch does a split after reload to convert a store vector pair
instruction into a pair of store vector instructions.

We do continue to generate the load vector pair instructions (lxvp, plxvp,
and lxvpx), since we have found that in code that heavily uses MMA, it is
still a win to generate the load vector pair instructions.

There are 3 patches in this set:

1)  Disable the generation of the stxvp, stxvpx, and pstxvp instructions
for stores of OOmode and XOmodes.

2)  Disable block moves from generating load/store vector pair
instructions unless the the store vector pair instructions are
being generted.  With patch #1 installed, the block move code will
generate a load vector pair and store vector pair combination, but
after reload, the store vector pair instructions are split into two
separate store vector instructions.

2)  Fix up the mma test suite to deal with store vector pair not being
generated by default.  In most of the tests, I just deleted the lines
that counted the store vector pair instructions.  In a few of the
tests, I explicitly passed the -mstore-vector-pair instruction since
the point of the test was to generate store vector pair instructions.

There is a 4th patch that Peter Bergner will be developing.  This patch will
update the built-in functions for load and store vector pair, so that these
built-ins will always generate the lxvp and stxvp instructions.

I have built bootstrap compilers and run the regression tests on three
different systems:

1)  Little endian power10 using the --with-cpu=power10 option.

2)  Little endian power9 using the --with-cpu=power9 option.

3)  Big endian power8 using the --with-cpu=power8 option.  On this system,
both 64-bit and 32-bit code generation was tested.

Once all 3 patches have been applied, there are no regressions in the runs.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


[PATCH, V3] Optimize vec_splats of constant vec_extract for V2DI/V2DF, PR target 99293

2022-06-06 Thread Michael Meissner via Gcc-patches
Optimize vec_splats of constant vec_extract for V2DI/V2DF, PR target 99293.

This is version 3 of the patch.  The original patch was:

| Date: Mon, 28 Mar 2022 12:26:02 -0400
| Subject: [PATCH 1/4] Optimize vec_splats of constant vec_extract for 
V2DI/V2DF, PR target 99293.
| Message-ID: 
| https://gcc.gnu.org/pipermail/gcc-patches/2022-March/592420.html

Version 2 of the patch was:

| Date: Fri, 13 May 2022 10:49:26 -0400
| Subject: [PATCH] Optimize vec_splats of constant V2DI/V2DF vec_extract, PR 
target/99293
| Message-ID: 
| https://gcc.gnu.org/pipermail/gcc-patches/2022-May/594797.html

The differences between version 2 and version 3 was to clean up the description
of what the patch does, and to make the example test case clear.

In PR target/99293, it was pointed out that doing:

vector long long dest0, dest1, src;
/* ... */
dest0 = vec_splats (vec_extract (src, 0));
dest1 = vec_splats (vec_extract (src, 1));

would generate slower code.

It generates the following code on power8:

;; vec_splats (vec_extract (src, 0))
xxpermdi 0,34,34,3
xxpermdi 34,0,0,0

;; vec_splats (vec_extract (src, 1))
xxlor 0,34,34
xxpermdi 34,0,0,0

However on power9 and power10 it generates:

;; vec_splats (vec_extract (src, 0))
mfvsld 3,34
mtvsrdd 34,9,9

;; vec_splats (vec_extract (src, 1))
mfvsrd 9,34
mtvsrdd 34,9,9

This is due to the power9 having the mfvsrld instruction which can extract
either 64-bit element into a GPR.  While there are alternatives for both
vector registers and GPR registers, the register allocator prefers to put
DImode into GPR registers.

In this case, it is better to have a single combiner pattern that can generate
a single xxpermdi, instead of 2 insnsns (the extract and then the concat).
This is true if the two operations are move from vector register and move to
vector register.  As Segher pointed out in a previous version of the patch, the
combiner already tries doing creating a (vec_duplicate (vec_select ...))
pattern, but we didn't provide one.

This patch reworks vsx_xxspltd_ for V2DImode and V2DFmode so that it now
uses VEC_DUPLICATE, which the combiner checks for.

I have built Spec 2017 with this patch installed, and the cam4_r benchmark
is the only benchmark that generated different code (3 mfvsrld/mtvsrdd
pairs of instructions were replaced with xxpermdi).

I have built bootstrap versions on the following systems and I have run
the regression tests.  There were no regressions in the runs:

Power9 little endian, --with-cpu=power9
Power10 little endian, --with-cpu=power10
Power8 big endian, --with-cpu=power8 (both 32-bit & 64-bit tests)

Can I install this into the trunk?  After a burn-in period, can I backport
and install this into GCC 11 and GCC 10 branches?

2022-06-06   Michael Meissner  

gcc/
PR target/99293
* config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
UNSPEC_VSX_XXSPLTD case.
* config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
(vsx_xxspltd_): Rewrite to use VEC_DUPLICATE.

gcc/testsuite:
PR target/99293
* gcc.target/powerpc/builtins-1.c: Update insn count.
* gcc.target/powerpc/pr99293.c: New test.
---
 gcc/config/rs6000/rs6000-p8swap.cc|  1 -
 gcc/config/rs6000/vsx.md  | 19 +++
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c| 51 +++
 4 files changed, 62 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99293.c

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
b/gcc/config/rs6000/rs6000-p8swap.cc
index 275702fee1b..3160fcbdeca 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -807,7 +807,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
  case UNSPEC_VUPKLU_V4SF:
return 0;
  case UNSPEC_VSPLT_DIRECT:
- case UNSPEC_VSX_XXSPLTD:
*special = SH_SPLAT;
return 1;
  case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 1b75538f42f..a1a1ce95195 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,7 +296,6 @@ (define_c_enum "unspec"
UNSPEC_VSX_XXPERM
 
UNSPEC_VSX_XXSPLTW
-   UNSPEC_VSX_XXSPLTD
UNSPEC_VSX_DIVSD
UNSPEC_VSX_DIVUD
UNSPEC_VSX_DIVSQ
@@ -4673,16 +4672,18 @@ (define_insn "vsx_vsplt_di"
 ;; V2DF/V2DI splat for use by vec_splat builtin
 (define_insn "vsx_xxspltd_"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-(unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
-  (match_operand:QI 2 "u5bit_cint_operand" "i")]
-  UNSPEC_VSX_XXSPLTD))]
+   (vec_duplicate:VSX_D
+(vec_select:
+ (match_operand:VSX_D 1 "gpc_reg_operand" 

Re: [PATCH] libgccjit: Fix infinite recursion in gt_ggc_mx_lang_tree_node

2022-06-06 Thread David Malcolm via Gcc-patches
On Thu, 2022-06-02 at 21:23 -0400, Antoni Boucher via Gcc-patches
wrote:
> Sorry, forgot to attach the patch.
> 
> Here it is.
> 
> On Thu, 2022-06-02 at 21:20 -0400, Antoni Boucher via Jit wrote:
> > Hi.
> > The attached patch fix bug 105827:
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105827
> > 
> > I'm not sure how to test this, so please share ideas.

Do you have a reproducer for this?

With garbage-collections issues in libgccjit, you can set:

  gcc_jit_context_set_bool_option (ctxt,
   GCC_JIT_BOOL_OPTION_SELFCHECK_GC,
   1);

which will force a full garbage collection at every opportunity that
the collector considers doing one (rather than following heuristics)

This really slows things down, but makes reproducing crashes much more
deterministic, often turning "it crashes every now and then" to "it
crashes every time" (and the test suite runs with this enabled).

> > 
> > Thanks for the review.
> 

> diff --git a/gcc/jit/dummy-frontend.cc b/gcc/jit/dummy-frontend.cc
> index 84ff359bfe3..8bb5d03d630 100644
> --- a/gcc/jit/dummy-frontend.cc
> +++ b/gcc/jit/dummy-frontend.cc
> @@ -506,13 +506,14 @@ struct GTY(()) lang_identifier
>  
>  /* The resulting tree type.  */
>  
> +/* See lang_tree_node in gcc/c/c-decl.cc.  */
>  union GTY((desc ("TREE_CODE (&%h.generic) == IDENTIFIER_NODE"),
> -chain_next ("CODE_CONTAINS_STRUCT (TREE_CODE (&%h.generic), 
> TS_COMMON) ? ((union lang_tree_node *) TREE_CHAIN (&%h.generic)) : NULL")))
> -lang_tree_node
> -{
> -  union tree_node GTY((tag ("0"),
> -desc ("tree_node_structure (&%h)"))) generic;
> -  struct lang_identifier GTY((tag ("1"))) identifier;
> +   chain_next ("(union lang_tree_node *) jit_tree_chain_next 
> (&%h.generic)"))) lang_tree_node
> + {
> +  union tree_node GTY ((tag ("0"),
> + desc ("tree_node_structure (&%h)")))
> +generic;
> +  struct lang_identifier GTY ((tag ("1"))) identifier;
>  };

Those GTY markings on gcc/jit/dummy-frontend.cc's lang_tree_node union
have been like that since my initial proof-of-concept patch back in
2013:
  https://gcc.gnu.org/legacy-ml/gcc-patches/2013-10/msg00228.html
so presumably I simply copied and pasted that from another frontend
when I was initially prototyping libgccjit.  There was an element of
"cargo cult programming" as I was getting the project started.

Jakub had changed the C and C++ frontends in June 2011 with
563007852e8d19b66ec8c1e42e431efaaa967dc6, which introduced the
c_tree_chain_next that you're now copying in your patch, so presumably
I copied from a different frontend.  My notes say I created the first
prototype in July 2013, so that's when I would have copied the
code.

Dave








Re: [PATCH v2] libgccjit: allow common objects in $(EXTRA_GCC_OBJS) and $(EXTRA_OBJS)

2022-06-06 Thread David Malcolm via Gcc-patches
On Mon, 2022-05-30 at 18:09 +0800, Xi Ruoyao wrote:
> Ping.  I'd like to see libgccjit working on LoongArch so I would be
> able
> to submit a Rust port to upstream.
> 
> If the result is NACK I'd like to know alternative approaches to fix
> the
> build failure.
> 
> I doubt if "j...@gcc.gnu.org" is really used, so CC'ed the JIT
> maintainer
> listed in MAINTAINERS.

Sorry about that, it turns out there's something wrong with my email
filters.  Unfortunately, I haven't been able to fix them, so please CC
me directly on jit-related patches, and do ping me if I seem not to
have seen one.  Sorry again.

> 
> On Thu, 2022-05-19 at 16:10 +0800, Yang Yujie wrote:
> > Hello,
> > 
> > This patch fixes libgccjit build failure on loongarch* targets,
> > and could probably be useful for future ports.
> > 
> > For now, libgccjit is linked with objects from $(EXTRA_GCC_OBJS)
> > and
> > libbackend.a, which contains object files from $(EXTRA_OBJS).
> > 
> > This effectively forbids any overlap between those two lists, i.e.
> > all
> > target-specific shared code between the gcc driver and compiler
> > executables must go into gcc/common/config//-common.cc,
> > which feels a bit inconvenient when there are a lot of "common"
> > stuff
> > that we want to put into separate source files.
> > 
> > By linking libgccjit with $(EXTRA_GCC_OBJS_EXCLUSIVE), which
> > contains
> > all elements from $(EXTRA_GCC_OBJS) but not $(EXTRA_OBJS), this
> > problem
> > can be alleviated.

Yujie: thanks for figuring this out, and coming up with a solution. 
Presumably libgccjit is the only GCC "frontend" that also has driver
code directly linked into it, and thus the only one that needs this
workaround.

> > 
> > This patch does not affect any other target architecture than
> > loongarch,
> > and has been bootstrapped and regression-tested on loongarch64-
> > linux-
> > gnuf64
> > an x86_64-pc-linux-gnu.
> > 
> > Any recommendations? Please review. Thanks a lot.

The patch looks good to me.

Thanks.
Dave


> > 
> > Yujie
> > 
> > * gcc/jit/ChangeLog:
> > 
> > * Make-lang.in: only link objects from $(EXTRA_GCC_OBJS)
> > that's not in $(EXTRA_OBJS) into libgccjit.
> > ---
> >  gcc/jit/Make-lang.in | 9 +++--
> >  1 file changed, 7 insertions(+), 2 deletions(-)
> > 
> > diff --git a/gcc/jit/Make-lang.in b/gcc/jit/Make-lang.in
> > index 6e10abfd0ac..248ec45b729 100644
> > --- a/gcc/jit/Make-lang.in
> > +++ b/gcc/jit/Make-lang.in
> > @@ -157,18 +157,23 @@ LIBGCCJIT_EXTRA_OPTS =
> > $(LIBGCCJIT_VERSION_SCRIPT_OPTION) \
> >  endif
> >  endif
> >  
> > +# Only link objects from $(EXTRA_GCC_OBJS) that's not already
> > +# included in libbackend.a ($(EXTRA_OBJS)).
> > +EXTRA_GCC_OBJS_EXCLUSIVE = $(foreach _obj1, $(EXTRA_GCC_OBJS), \
> > +   $(if $(filter $(_obj1), $(EXTRA_OBJS)),, $(_obj1)))
> > +
> >  # We avoid using $(BACKEND) from Makefile.in in order to avoid
> > pulling
> >  # in main.o
> >  $(LIBGCCJIT_FILENAME): $(jit_OBJS) \
> > libbackend.a libcommon-target.a libcommon.a \
> > $(CPPLIB) $(LIBDECNUMBER) \
> > $(LIBDEPS) $(srcdir)/jit/libgccjit.map \
> > -   $(EXTRA_GCC_OBJS) $(jit.prev)
> > +   $(EXTRA_GCC_OBJS_EXCLUSIVE) $(jit.prev)
> > @$(call LINK_PROGRESS,$(INDEX.jit),start)
> > +$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ -shared \
> >  $(jit_OBJS) libbackend.a libcommon-target.a
> > libcommon.a \
> >  $(CPPLIB) $(LIBDECNUMBER) $(EXTRA_GCC_LIBS) $(LIBS)
> > $(BACKENDLIBS) \
> > -    $(EXTRA_GCC_OBJS) \
> > +    $(EXTRA_GCC_OBJS_EXCLUSIVE) \
> >  $(LIBGCCJIT_EXTRA_OPTS)
> > @$(call LINK_PROGRESS,$(INDEX.jit),end)
> >  
> 




Re: [PATCH] c++: Allow mixing GNU/std-style attributes [PR69585]

2022-06-06 Thread Joseph Myers
On Fri, 3 Jun 2022, Marek Polacek via Gcc-patches wrote:

> However, the patch does not touch the C FE.  The C FE doesn't have
> a counterpart to C++'s cp_parser_attributes_opt -- it only has
> c_parser_transaction_attributes (which parses both GNU and [[]]
> attributes), but that's TM-specific.  The C FE seems to use either
> c_parser_gnu_attributes or c_parser_std_attribute_specifier_sequence.
> As a consequence, this works:
> 
>   [[maybe_unused]] __attribute__((deprecated)) void f2 ();
> 
> but this doesn't:
> 
>   __attribute__((deprecated)) [[maybe_unused]] void f1 ();
> 
> I'm not sure what, if anything, should be done about this.

In many places, both GNU and [[]] attributes are valid, but appertain to 
different entities, so it would be hard to define semantics for mixing 
them.  At the start of a declaration like that, for example, GNU 
attributes can be mixed arbitrarily among the declaration specifiers (and 
appertain to the declaration) - but [[]] attributes can appear only before 
the declaration specifiers (appertaining to the declaration) or after them 
(appertaining to the type determined by those declaration specifiers), not 
in the middle.

-- 
Joseph S. Myers
jos...@codesourcery.com


[patch] libgompd: Add thread handles

2022-06-06 Thread Ahmed Sayed Mousse via Gcc-patches
This patch is the initial implementation of OpenMP-API specs book section
20.5.5 with title "Thread Handles"

libgomp/ChangeLog

2022-05-06 Ahmed Sayed 

* Makefile.am (libgompd_la_SOURCES): Add ompd-threads.c.

* Makefile.in: Regenerate.

* ompd-support.h ( gompd_thread_initial_tls_bias ): New Variable.

* ompd-support.c ( gompd_thread_initial_tls_bias ): New Variable.

( gompd_load ): ( gompd_thread_initial_tls_bias ): Initialized with
_tls_data - pthread_self ().

* ompd-threads.c: New.
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 6d913a93e7f..23f5bede1bf 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -94,7 +94,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c 
env.c error.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
oacc-target.c ompd-support.c

-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-threads.c

 include $(top_srcdir)/plugin/Makefrag.am

diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 40f896b5f03..7acdcbf31d5 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -233,7 +233,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo 
critical.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
oacc-target.lo ompd-support.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
-am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo
+am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo \
+   ompd-threads.lo
 libgompd_la_OBJECTS = $(am_libgompd_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -583,7 +584,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c 
env.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
oacc-target.c ompd-support.c $(am__append_7)
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-threads.c

 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info 
$(libtool_VERSION)
@@ -801,6 +802,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-icv.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-init.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-support.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-threads.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
diff --git a/libgomp/ompd-support.c b/libgomp/ompd-support.c
index d8a7174b2f3..c7da3ef98a6 100644
--- a/libgomp/ompd-support.c
+++ b/libgomp/ompd-support.c
@@ -36,6 +36,10 @@
 const char **ompd_dll_locations = NULL;
 __UINT64_TYPE__ gompd_state;

+#if (defined HAVE_TLS || defined USE_EMUTLS)
+__UINT64_TYPE__ gompd_thread_initial_tls_bias;
+#endif
+
 void
 gompd_load (void)
 {
@@ -61,7 +65,11 @@ gompd_load (void)
   = (__UINT64_TYPE__) & (((struct gomp_thread *) NULL)->handle);
 __UINT64_TYPE__ gompd_sizeof_gomp_thread_handle
   = sizeof (((struct gomp_thread *) NULL)->handle);
+  #elif (defined HAVE_TLS || defined USE_EMUTLS)
+gompd_thread_initial_tls_bias = (__UINT64_TYPE__) \
+(_tls_data - pthread_self ());
   #endif
+
   gomp_debug (2, "OMP OMPD active\n");
   static const char *ompd_dll_locations_array[2]
 = {"libgompd" SONAME_SUFFIX (1) , NULL};
diff --git a/libgomp/ompd-support.h b/libgomp/ompd-support.h
index 39d55161132..2dd88af2d73 100644
--- a/libgomp/ompd-support.h
+++ b/libgomp/ompd-support.h
@@ -69,6 +69,10 @@
 void gompd_load (void);
 extern __UINT64_TYPE__ gompd_state;

+#if (defined HAVE_TLS || defined USE_EMUTLS)
+extern __UINT64_TYPE__ gompd_thread_initial_tls_bias;
+#endif
+
 #define OMPD_ENABLED 0x1

 #define GOMPD_FOREACH_ACCESS(gompd_access) \
diff --git a/libgomp/ompd-support.h.gch b/libgomp/ompd-support.h.gch
new file mode 100644
index 000..ee98f7091c0
Binary files /dev/null and b/libgomp/ompd-support.h.gch differ
diff --git a/libgomp/ompd-threads.c b/libgomp/ompd-threads.c
new file mode 100644
index 000..bc370e1af9c
--- /dev/null
+++ b/libgomp/ompd-threads.c
@@ -0,0 +1,310 @@
+/* Copyright (C) The GNU Toolchain Authors.
+   Contributed by Ahmed Sayed .
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   

Re: [PATCH] libgccjit: Support getting the size of a float

2022-06-06 Thread David Malcolm via Gcc-patches
On Thu, 2022-06-02 at 23:09 -0400, Antoni Boucher via Gcc-patches
wrote:
> Hi.
> The attached patch fix bug 105829:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105829
> 
> Thanks for the review.

> diff --git a/gcc/jit/libgccjit.cc b/gcc/jit/libgccjit.cc
> index cc6486c9cad..12e7679988b 100644
> --- a/gcc/jit/libgccjit.cc
> +++ b/gcc/jit/libgccjit.cc
> @@ -545,8 +545,8 @@ gcc_jit_type_get_size (gcc_jit_type *type)
>  {
>RETURN_VAL_IF_FAIL (type, -1, NULL, NULL, "NULL type");
>RETURN_VAL_IF_FAIL
> -(type->is_int (), -1, NULL, NULL,
> - "only getting the size of an integer type is supported for now");
> +(type->is_int () || type->is_float (), -1, NULL, NULL,
> + "only getting the size of an int or float type is supported for now");

Reword to:

  "only getting the size of integer or floating-point types is supported for 
now"

to avoid using the words "int" and "float" to refer to those kinds of
types, and to follow:
  https://gcc.gnu.org/codingconventions.html#Spelling

>return type->get_size ();
>  }
>  
> diff --git a/gcc/testsuite/jit.dg/test-types.c 
> b/gcc/testsuite/jit.dg/test-types.c
> index 6836597d14e..53bdeafed61 100644
> --- a/gcc/testsuite/jit.dg/test-types.c
> +++ b/gcc/testsuite/jit.dg/test-types.c
> @@ -489,4 +489,7 @@ verify_code (gcc_jit_context *ctxt, gcc_jit_result 
> *result)
>  CHECK (gcc_jit_compatible_types (
>gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_LONG),
>gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_INT64_T)));
> +
> +  CHECK_VALUE (gcc_jit_type_get_size (gcc_jit_context_get_type (ctxt, 
> GCC_JIT_TYPE_FLOAT)), 4);
> +  CHECK_VALUE (gcc_jit_type_get_size (gcc_jit_context_get_type (ctxt, 
> GCC_JIT_TYPE_DOUBLE)), 8);

I'm not sure these sizes are 4 and 8 on every target we support.  I got
curious and found e.g. gcc/config/rl78/rl78.h has:

#define FLOAT_TYPE_SIZE 32
#define DOUBLE_TYPE_SIZE32 /*64*/

Maybe use sizeof (float) and sizeof (double) in the assertion, rather
than hardcoding the values? (though this still assumes host == target).

Otherwise LGTM - OK for trunk with those nits fixed.

Thanks
Dave




[PATCH,RS6000 4/5] Replace MASK_ with OPTION_MASK_

2022-06-06 Thread will schmidt via Gcc-patches
[PATCH,RS6000 4/5] Replace MASK_ with OPTION_MASK_

This replaces the MASK_ references with OPTION_MASK_
and removes the now unused defines.

This patch removes the defines for
MASK_ALTIVEC, MASK_CMPB, MASK_CRYPTO, MASK_DFP,
MASK_DIRECT_MOVE, MASK_DLMZB, MASK_EABI, MASK_FLOAT128_KEYWORD,
MASK_FLOAT128_HW, MASK_FPRND, MASK_P8_FUSION, MASK_HARD_FLOAT,
MASK_HTM, MASK_MFCRF, MASK_MMA, MASK_MULHW, MASK_MULTIPLE,
MASK_NO_UPDATE.

gcc/
* config/rs6000/aix71.h (TARGET_DEFAULT): Replace MASK_MFCRF with
OPTION_MASK_MFCRF.
* config/rs6000/darwin.h (TARGET_DEFAULT): Replace MASK_MULTIPLE with
OPTION_MASK_MULTIPLE.
* config/rs6000/darwin64-biarch.h (TARGET_DEFAULT): Same.
* config/rs6000/default.h (TARGET_DEFAULT): Replace MASK_MFCRF with
OPTION_MASK_MFCRF.
* config/rs6000/eabi.h (TARGET_DEFAULT): Replace MASK_EABI with
OPTION_MASK_EABI.
* config/rs6000/eabialtivec.h (TARGET_DEFAULT): Same.
* config/rs6000/linuxaltivec.h (TARGET_DEFAULT): Replace
MASK_ALTIVEC with OPTION_MASK_ALTIVEC.
* config/rs6000/rs6000-cpus.def (MASK_ALTIVEC, MASK_CMPB,
MASK_CRYPTO, MASK_DFP, MASK_DIRECT_MOVE, MASK_DLMZB, MASK_EABI,
MASK_FLOAT128_KEYWORD, MASK_FLOAT128_HW, MASK_FPRND,
MASK_P8_FUSION, MASK_HARD_FLOAT, MASK_HTM, MASK_ISEL, MASK_MFCRF,
MASK_MMA, MASK_MULHW, MASK_MULTIPLE, MASK_NO_UPDATE):
Replace with
OPTION_MASK_ALTIVEC, OPTION_MASK_CMPB, OPTION_MASK_CRYPTO,
OPTION_MASK_DFP, OPTION_MASK_DIRECT_MOVE, OPTION_MASK_DLMZB,
OPTION_MASK_EABI, OPTION_MASK_FLOAT128_KEYWORD,
OPTION_MASK_FLOAT128_HW, OPTION_MASK_FPRND, OPTION_MASK_P8_FUSION,
OPTION_MASK_HARD_FLOAT, OPTION_MASK_HTM, OPTION_MASK_ISEL,
OPTION_MASK_MFCRF, OPTION_MASK_MMA, OPTION_MASK_MULHW,
OPTION_MASK_MULTIPLE, OPTION_MASK_NO_UPDATE.
* config/rs6000/rs6000.cc (rs6000_darwin_file_start): Replace
MASK_MFCRF, MASK_ALTIVEC with OPTION_MASK_MFCRF, OPTION_MASK_ALTIVEC.
* config/rs6000/rs6000.h (TARGET_DEFAULT): Replace MASK_MULTIPLE
with OPTION_MASK_MULTIPLE.
(MASK_ALTIVEC, MASK_CMPB, MASK_CRYPTO, MASK_DFP,
MASK_DIRECT_MOVE, MASK_DLMZB, MASK_EABI, MASK_FLOAT128_KEYWORD,
MASK_FLOAT128_HW, MASK_FPRND, MASK_P8_FUSION, MASK_HARD_FLOAT,
MASK_HTM, MASK_ISEL, MASK_MFCRF, MASK_MMA, MASK_MULHW,
MASK_MULTIPLE, MASK_NO_UPDATE): Delete.
* config/rs6000/vxworks.h (TARGET_DEFAULT): Replace MASK_EABI
with OPTION_MASK_EABI.

diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h
index 57e07bcc65ee..3f7e6e380ca8 100644
--- a/gcc/config/rs6000/aix71.h
+++ b/gcc/config/rs6000/aix71.h
@@ -135,13 +135,14 @@ do {  
\
 #include "rs6000-cpus.def"
 #undef RS6000_CPU
 
 #undef  TARGET_DEFAULT
 #ifdef RS6000_BI_ARCH
-#define TARGET_DEFAULT (MASK_PPC_GPOPT | MASK_PPC_GFXOPT | MASK_MFCRF | 
MASK_POWERPC64 | MASK_64BIT)
+#define TARGET_DEFAULT (MASK_PPC_GPOPT | MASK_PPC_GFXOPT \
+   | OPTION_MASK_MFCRF | MASK_POWERPC64 | MASK_64BIT)
 #else
-#define TARGET_DEFAULT (MASK_PPC_GPOPT | MASK_PPC_GFXOPT | MASK_MFCRF)
+#define TARGET_DEFAULT (MASK_PPC_GPOPT | MASK_PPC_GFXOPT | OPTION_MASK_MFCRF)
 #endif
 
 #undef  PROCESSOR_DEFAULT
 #define PROCESSOR_DEFAULT PROCESSOR_POWER7
 #undef  PROCESSOR_DEFAULT64
diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h
index b5cef42610f7..ec02022c6a9f 100644
--- a/gcc/config/rs6000/darwin.h
+++ b/gcc/config/rs6000/darwin.h
@@ -365,11 +365,11 @@
 /* Default target flag settings.  Despite the fact that STMW/LMW
serializes, it's still a big code size win to use them.  Use FSEL by
default as well.  */
 
 #undef  TARGET_DEFAULT
-#define TARGET_DEFAULT (MASK_MULTIPLE | MASK_PPC_GFXOPT)
+#define TARGET_DEFAULT (OPTION_MASK_MULTIPLE | MASK_PPC_GFXOPT)
 
 /* Darwin always uses IBM long double, never IEEE long double.  */
 #undef  TARGET_IEEEQUAD
 #define TARGET_IEEEQUAD 0
 
diff --git a/gcc/config/rs6000/darwin64-biarch.h 
b/gcc/config/rs6000/darwin64-biarch.h
index 57b0fab084e3..a53e567f8b73 100644
--- a/gcc/config/rs6000/darwin64-biarch.h
+++ b/gcc/config/rs6000/darwin64-biarch.h
@@ -19,11 +19,11 @@
along with GCC; see the file COPYING3.  If not see
.  */
 
 #undef  TARGET_DEFAULT
 #define TARGET_DEFAULT (MASK_POWERPC64 | MASK_64BIT \
-   | MASK_MULTIPLE | MASK_PPC_GFXOPT)
+   | OPTION_MASK_MULTIPLE | MASK_PPC_GFXOPT)
 
 #undef DARWIN_ARCH_SPEC
 #define DARWIN_ARCH_SPEC "%{m32:ppc;:ppc64}"
 
 /* Actually, there's really only 970 as an active option.  */
diff --git a/gcc/config/rs6000/default64.h b/gcc/config/rs6000/default64.h
index 4bf0feef2f8e..f3a81404eff3 100644
--- a/gcc/config/rs6000/default64.h
+++ b/gcc/config/rs6000/default64.h
@@ -22,14 +22,16 @@ along with GCC; see the file COPYING3.  If not see
 

[PATCH,RS6000 5/5] Replace MASK_ usage with OPTION_MASK_

2022-06-06 Thread will schmidt via Gcc-patches
[PATCH,RS6000 5/5] Replace MASK_ usage with OPTION_MASK_

This continues the changes of replacing the MASK_ defines
with their OPTION_MASK_ equivalents.

This patch removes the defines for
MASK_P8_VECTOR, MASK_P9_VECTOR, MASK_P9_MISC, MASK_POPCNTB,
MASK_POPCNTD, MASK_PPC_GFXOPT, MASK_PPC_GPOPT, MASK_RECIP_PRECISION,
MASK_SOFT_FLOAT, MASK_VSX, MASK_POWER10, MASK_P10_FUSION.

gcc/
* config/rs6000/aix71.h (MASK_PPC_GPOPT, MASK_PPC_GFXOPT): Replace with
OPTION_MASK_PPC_GPOPT, OPTION_MASK_PPC_GFXOPT.
* config/rs6000/darwin.h (MASK_PPC_GFXOPT): Replace with
OPTION_MASK_PPC_GFXOPT.
* config/rs6000/darwin64-biarch.h (MASK_PPC_GFXOPT): Same.
* config/rs6000/default64.h (MASK_PPC_GPOPT, MASK_PPC_GFXOPT): Replace 
with
OPTION_MASK_PPC_GPOPT, OPTION_MASK_PPC_GFXOPT.
* config/rs6000/rs6000-c.cc: Update comment.
* config/rs6000/rs6000-cpus.def: Update RS6000_CPU macro calls.
* config/rs6000/rs6000.cc (rs6000_darwin_file_start): Replace
MASK_PPC_GPOPT with OPTION_MASK_PPC_GPOPT.
(rs6000_builtin_mask_names): Replace MASK_PPC_GFXOPT, MASK_POPCNTB
with OPTION_MASK_PPC_GFXOPT, OPTION_MASK_POPCNTB.
* config/rs6000/rs6000.h: (MASK_P8_VECTOR, MASK_P9_VECTOR,
MASK_P9_MISC, MASK_POPCNTB, MASK_POPCNTD, MASK_PPC_GFXOPT,
MASK_PPC_GPOPT, MASK_RECIP_PRECISION, MASK_SOFT_FLOAT,
MASK_VSX, MASK_POWER10, MASK_P10_FUSION): Delete.

diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h
index 3f7e6e380ca8..323d7c884d18 100644
--- a/gcc/config/rs6000/aix71.h
+++ b/gcc/config/rs6000/aix71.h
@@ -135,14 +135,15 @@ do {  
\
 #include "rs6000-cpus.def"
 #undef RS6000_CPU
 
 #undef  TARGET_DEFAULT
 #ifdef RS6000_BI_ARCH
-#define TARGET_DEFAULT (MASK_PPC_GPOPT | MASK_PPC_GFXOPT \
+#define TARGET_DEFAULT (OPTION_MASK_PPC_GPOPT | OPTION_MASK_PPC_GFXOPT \
| OPTION_MASK_MFCRF | MASK_POWERPC64 | MASK_64BIT)
 #else
-#define TARGET_DEFAULT (MASK_PPC_GPOPT | MASK_PPC_GFXOPT | OPTION_MASK_MFCRF)
+#define TARGET_DEFAULT (OPTION_MASK_PPC_GPOPT | OPTION_MASK_PPC_GFXOPT \
+   | OPTION_MASK_MFCRF)
 #endif
 
 #undef  PROCESSOR_DEFAULT
 #define PROCESSOR_DEFAULT PROCESSOR_POWER7
 #undef  PROCESSOR_DEFAULT64
diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h
index ec02022c6a9f..6a8845eb3bb7 100644
--- a/gcc/config/rs6000/darwin.h
+++ b/gcc/config/rs6000/darwin.h
@@ -365,11 +365,11 @@
 /* Default target flag settings.  Despite the fact that STMW/LMW
serializes, it's still a big code size win to use them.  Use FSEL by
default as well.  */
 
 #undef  TARGET_DEFAULT
-#define TARGET_DEFAULT (OPTION_MASK_MULTIPLE | MASK_PPC_GFXOPT)
+#define TARGET_DEFAULT (OPTION_MASK_MULTIPLE | OPTION_MASK_PPC_GFXOPT)
 
 /* Darwin always uses IBM long double, never IEEE long double.  */
 #undef  TARGET_IEEEQUAD
 #define TARGET_IEEEQUAD 0
 
diff --git a/gcc/config/rs6000/darwin64-biarch.h 
b/gcc/config/rs6000/darwin64-biarch.h
index a53e567f8b73..6515bcc8bf5a 100644
--- a/gcc/config/rs6000/darwin64-biarch.h
+++ b/gcc/config/rs6000/darwin64-biarch.h
@@ -19,11 +19,11 @@
along with GCC; see the file COPYING3.  If not see
.  */
 
 #undef  TARGET_DEFAULT
 #define TARGET_DEFAULT (MASK_POWERPC64 | MASK_64BIT \
-   | OPTION_MASK_MULTIPLE | MASK_PPC_GFXOPT)
+   | OPTION_MASK_MULTIPLE | OPTION_MASK_PPC_GFXOPT)
 
 #undef DARWIN_ARCH_SPEC
 #define DARWIN_ARCH_SPEC "%{m32:ppc;:ppc64}"
 
 /* Actually, there's really only 970 as an active option.  */
diff --git a/gcc/config/rs6000/default64.h b/gcc/config/rs6000/default64.h
index f3a81404eff3..0bec94935e2b 100644
--- a/gcc/config/rs6000/default64.h
+++ b/gcc/config/rs6000/default64.h
@@ -28,10 +28,10 @@ along with GCC; see the file COPYING3.  If not see
| MASK_LITTLE_ENDIAN)
 #undef ASM_DEFAULT_SPEC
 #define ASM_DEFAULT_SPEC "-mpower8"
 #else
 #undef TARGET_DEFAULT
-#define TARGET_DEFAULT (MASK_PPC_GFXOPT | MASK_PPC_GPOPT \
+#define TARGET_DEFAULT (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT \
| OPTION_MASK_MFCRF | MASK_POWERPC64 | MASK_64BIT)
 #undef ASM_DEFAULT_SPEC
 #define ASM_DEFAULT_SPEC "-mpower4"
 #endif
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 4c99afc761ae..0d13645040ff 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -382,11 +382,11 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
 
  3. If either of the above two conditions apply except that the
TARGET_DEFAULT macro is defined to equal zero, and
TARGET_POWERPC64 and
a) BYTES_BIG_ENDIAN and the flag to be enabled is either
-  MASK_PPC_GFXOPT or MASK_POWERPC64 (flags for "powerpc64"
+  OPTION_MASK_PPC_GFXOPT or MASK_POWERPC64 (flags for "powerpc64"
   

[PATCH, RS6000 3/5] Rework the RS6000_BTM defines, continued.

2022-06-06 Thread will schmidt via Gcc-patches
[PATCH, RS6000 3/5] Rework the RS6000_BTM defines, continued.

The RS6000_BTM_ definitions are mostly unused after
the rs6000 builtin code was reworked.   This cleans
up the remaining RS6000_BTM_ references by replacing
them with their OPTION_MASK_ equivalents.

This patch removes the defines
RS6000_BTM_MODULO, RS6000_BTM_ALTIVEC, RS6000_BTM_CMPB,
RS6000_BTM_VSX, RS6000_BTM_P8_VECTOR, RS6000_BTM_P9_VECTOR,
RS6000_BTM_P9_MISC, RS6000_BTM_CRYPTO, RS6000_BTM_HTM,
RS6000_BTM_FRE.

gcc/
* config/rs6000/rs6000.cc (RS6000_BTM_ALTIVEC, RS6000_BTM_CMPB,
RS6000_BTM_VSX, RS6000_BTM_FRE, RS6000_BTM_P8_VECTOR,
RS6000_BTM_P9_VECTOR, RS6000_BTM_P9_MISC, RS6000_BTM_MODULO,
RS6000_BTM_CRYPTO, RS6000_BTM_HTM): Replace with OPTION_MASK_ALTIVEC,
OPTION_MASK_CMPB, OPTION_MASK_VSX, OPTION_MASK_POPCNTB,
OPTION_MASK_P8_VECTOR, OPTION_MASK_P9_VECTOR, OPTION_MASK_P9_MISC,
OPTION_MASK_MODULO, OPTION_MASK_CRYPTO, OPTION_MASK_HTM.
* config/rs6000/rs6000.h (RS6000_BTM_MODULO, RS6000_BTM_ALTIVEC,
RS6000_BTM_CMPB, RS6000_BTM_VSX, RS6000_BTM_P8_VECTOR,
RS6000_BTM_P9_VECTOR, RS6000_BTM_P9_MISC, RS6000_BTM_CRYPTO,
RS6000_BTM_HTM, RS6000_BTM_FRE): Remove.

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 253110910bfa..6b7a6db9a445 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3377,27 +3377,27 @@ darwin_rs6000_override_options (void)
bits, and some options are no longer in target_flags.  */
 
 HOST_WIDE_INT
 rs6000_builtin_mask_calculate (void)
 {
-  return (((TARGET_ALTIVEC)? RS6000_BTM_ALTIVEC   : 0)
- | ((TARGET_CMPB)  ? RS6000_BTM_CMPB  : 0)
- | ((TARGET_VSX)   ? RS6000_BTM_VSX   : 0)
- | ((TARGET_FRE)   ? RS6000_BTM_FRE   : 0)
+  return (((TARGET_ALTIVEC)? OPTION_MASK_ALTIVEC: 0)
+ | ((TARGET_CMPB)  ? OPTION_MASK_CMPB   : 0)
+ | ((TARGET_VSX)   ? OPTION_MASK_VSX: 0)
+ | ((TARGET_FRE)   ? OPTION_MASK_POPCNTB: 0)
  | ((TARGET_FRES)  ? OPTION_MASK_PPC_GFXOPT : 0)
  | ((TARGET_FRSQRTE)   ? OPTION_MASK_PPC_GFXOPT : 0)
  | ((TARGET_FRSQRTES)  ? OPTION_MASK_POPCNTB: 0)
  | ((TARGET_POPCNTD)   ? OPTION_MASK_POPCNTD: 0)
  | ((rs6000_cpu == PROCESSOR_CELL) ? OPTION_MASK_FPRND  : 0)
- | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
- | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
- | ((TARGET_P9_MISC)   ? RS6000_BTM_P9_MISC   : 0)
- | ((TARGET_MODULO)? RS6000_BTM_MODULO: 0)
+ | ((TARGET_P8_VECTOR) ? OPTION_MASK_P8_VECTOR  : 0)
+ | ((TARGET_P9_VECTOR) ? OPTION_MASK_P9_VECTOR  : 0)
+ | ((TARGET_P9_MISC)   ? OPTION_MASK_P9_MISC: 0)
+ | ((TARGET_MODULO)? OPTION_MASK_MODULO : 0)
  | ((TARGET_64BIT) ? MASK_64BIT : 0)
  | ((TARGET_POWERPC64) ? MASK_POWERPC64 : 0)
- | ((TARGET_CRYPTO)? RS6000_BTM_CRYPTO: 0)
- | ((TARGET_HTM)   ? RS6000_BTM_HTM   : 0)
+ | ((TARGET_CRYPTO)? OPTION_MASK_CRYPTO : 0)
+ | ((TARGET_HTM)   ? OPTION_MASK_HTM: 0)
  | ((TARGET_DFP)   ? OPTION_MASK_DFP: 0)
  | ((TARGET_HARD_FLOAT)? OPTION_MASK_SOFT_FLOAT : 0)
  | ((TARGET_LONG_DOUBLE_128
  && TARGET_HARD_FLOAT
  && !TARGET_IEEEQUAD)  ? OPTION_MASK_MULTIPLE   : 0)
@@ -24044,23 +24044,23 @@ static struct rs6000_opt_mask const 
rs6000_opt_masks[] =
 };
 
 /* Builtin mask mapping for printing the flags.  */
 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
 {
-  { "altivec",  RS6000_BTM_ALTIVEC,false, false },
-  { "vsx",  RS6000_BTM_VSX,false, false },
-  { "fre",  RS6000_BTM_FRE,false, false },
+  { "altivec",  OPTION_MASK_ALTIVEC,   false, false },
+  { "vsx",  OPTION_MASK_VSX,   false, false },
+  { "fre",  OPTION_MASK_POPCNTB,   false, false },
   { "fres", OPTION_MASK_PPC_GFXOPT, false, false },
   { "frsqrte",  OPTION_MASK_PPC_GFXOPT, false, false },
   { "frsqrtes", OPTION_MASK_POPCNTB,   false, false },
   { "popcntd",  OPTION_MASK_POPCNTD,   false, false },
   { "cell", OPTION_MASK_FPRND, false, false },
-  { "power8-vector",RS6000_BTM_P8_VECTOR,  false, false },
-  { "power9-vector",RS6000_BTM_P9_VECTOR,  false, false },
-  { "power9-misc",  RS6000_BTM_P9_MISC,false, false },
-  { "crypto",  

[PATCH,RS6000 2/5] Rework the RS6000_BTM defines.

2022-06-06 Thread will schmidt via Gcc-patches
[PATCH,RS6000 2/5) Rework the RS6000_BTM defines.

The RS6000_BTM_ definitions are mostly unused after the rs6000
builtin code was reworked.  The remaining references can be replaced
with the OPTION_MASK_ and MASK_ equivalents.

This patch remvoes the defines:
RS6000_BTM_FRES, RS6000_BTM_FRSQRTE, RS6000_BTM_FRSQRTES,
RS6000_BTM_POPCNTD, RS6000_BTM_CELL, RS6000_BTM_DFP,
RS6000_BTM_HARD_FLOAT, RS6000_BTM_LDBL128, RS6000_BTM_64BIT,
RS6000_BTM_POWERPC64, RS6000_BTM_FLOAT128, RS6000_BTM_FLOAT128_HW
RS6000_BTM_MMA, RS6000_BTM_P10.

I note that the BTM -> OPTION_MASK mappings are not always 1-to-1.
in particular the BTM_FRES and BTM_FRSQRTE values were both mapped to
OPTION_MASK_PPC_GFXOPT, while the BTM_FRE and BTM_FRSQRTES both mapped
to OPTION_MASK_POPCNTB.  In total I spent quite a bit of time
double-checking these since it looked like copy/paste errors.  I split
some of these changes out into a subsequent patch to limit the amount
of potential confusion in any particular patch.

gcc/
* config/rs6000/rs6000-c.cc: Update comments.
* config/rs6000/rs6000.cc (RS6000_BTM_FRES, RS6000_BTM_FRSQRTE,
RS6000_BTM_FRSQRTES, RS6000_BTM_POPCNTD, RS6000_BTM_CELL,
RS6000_BTM_64BIT, RS6000_BTM_POWERPC64, RS6000_BTM_DFP,
RS6000_BTM_HARD_FLOAT,RS6000_BTM_LDBL128, RS6000_BTM_FLOAT128,
RS6000_BTM_FLOAT128_HW, RS6000_BTM_MMA, RS6000_BTM_P10): Replace
with OPTION_MASK_PPC_GFXOPT, OPTION_MASK_PPC_GFXOPT,
OPTION_MASK_POPCNTB, OPTION_MASK_POPCNTD,
OPTION_MASK_FPRND, MASK_64BIT, MASK_POWERPC64,
OPTION_MASK_DFP, OPTION_MASK_SOFT_FLOAT, OPTION_MASK_MULTIPLE,
OPTION_MASK_FLOAT128_KEYWORD, OPTION_MASK_FLOAT128_HW,
OPTION_MASK_MMA, OPTION_MASK_POWER10.
* config/rs6000/rs6000.h (RS6000_BTM_FRES, RS6000_BTM_FRSQRTE,
RS6000_BTM_FRSQRTES, RS6000_BTM_POPCNTD, RS6000_BTM_CELL,
RS6000_BTM_DFP, RS6000_BTM_HARD_FLOAT, RS6000_BTM_LDBL128,
RS6000_BTM_64BIT, RS6000_BTM_POWERPC64, RS6000_BTM_FLOAT128,
RS6000_BTM_FLOAT128_HW, RS6000_BTM_MMA, RS6000_BTM_P10): Delete.

diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 9c8cbd7a66e4..4c99afc761ae 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -594,13 +594,13 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
  via the target attribute/pragma.  */
   if ((flags & OPTION_MASK_FLOAT128_HW) != 0)
 rs6000_define_or_undefine_macro (define_p, "__FLOAT128_HARDWARE__");
 
   /* options from the builtin masks.  */
-  /* Note that RS6000_BTM_CELL is enabled only if (rs6000_cpu ==
- PROCESSOR_CELL) (e.g. -mcpu=cell).  */
-  if ((bu_mask & RS6000_BTM_CELL) != 0)
+  /* Note that OPTION_MASK_FPRND is enabled only if
+ (rs6000_cpu == PROCESSOR_CELL) (e.g. -mcpu=cell).  */
+  if ((bu_mask & OPTION_MASK_FPRND) != 0)
 rs6000_define_or_undefine_macro (define_p, "__PPU__");
 
   /* Tell the user if we support the MMA instructions.  */
   if ((flags & OPTION_MASK_MMA) != 0)
 rs6000_define_or_undefine_macro (define_p, "__MMA__");
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index d4defc855d02..253110910bfa 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3381,32 +3381,32 @@ rs6000_builtin_mask_calculate (void)
 {
   return (((TARGET_ALTIVEC)? RS6000_BTM_ALTIVEC   : 0)
  | ((TARGET_CMPB)  ? RS6000_BTM_CMPB  : 0)
  | ((TARGET_VSX)   ? RS6000_BTM_VSX   : 0)
  | ((TARGET_FRE)   ? RS6000_BTM_FRE   : 0)
- | ((TARGET_FRES)  ? RS6000_BTM_FRES  : 0)
- | ((TARGET_FRSQRTE)   ? RS6000_BTM_FRSQRTE   : 0)
- | ((TARGET_FRSQRTES)  ? RS6000_BTM_FRSQRTES  : 0)
- | ((TARGET_POPCNTD)   ? RS6000_BTM_POPCNTD   : 0)
- | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL  : 0)
+ | ((TARGET_FRES)  ? OPTION_MASK_PPC_GFXOPT : 0)
+ | ((TARGET_FRSQRTE)   ? OPTION_MASK_PPC_GFXOPT : 0)
+ | ((TARGET_FRSQRTES)  ? OPTION_MASK_POPCNTB: 0)
+ | ((TARGET_POPCNTD)   ? OPTION_MASK_POPCNTD: 0)
+ | ((rs6000_cpu == PROCESSOR_CELL) ? OPTION_MASK_FPRND  : 0)
  | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
  | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
  | ((TARGET_P9_MISC)   ? RS6000_BTM_P9_MISC   : 0)
  | ((TARGET_MODULO)? RS6000_BTM_MODULO: 0)
- | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
- | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
+ | ((TARGET_64BIT) ? MASK_64BIT : 0)
+ | ((TARGET_POWERPC64) ? MASK_POWERPC64 : 0)
  | ((TARGET_CRYPTO)? 

[PATCH,RS6000 1/5] Clean-up MASK_ and RS6000_BTM_ definitions.

2022-06-06 Thread will schmidt via Gcc-patches
[PATCH,RS6000 1/5] Clean-up MASK_ and RS6000_BTM_ definitions.

Hi,

This patch removes the defines that are no longer used, and
updates the comment for the set of MASK_ defines.

This patch removes the defines for
MASK_REGNAMES, MASK_PROTOTYPE, RS6000_BTM_ALWAYS, RS6000_BTM_COMMON.

gcc/
* config/rs6000/rs6000.c (RS6000_BTM_COMMON, RS6000_BTM_ALWAYS,
MASK_REGNAMES, OPTION_MASK_REGNAMES, MASK_PROTOTYPE,
OPTION_MASK_PROTOTYPE, MASK_UPDATE, OPTION_MASK_UPDATE): Remove.

diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3b8941a86584..2ff17a16e43c 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -503,12 +503,13 @@ extern int rs6000_vector_align[];
answers if the arguments are not in the normal range.  */
 #define TARGET_MINMAX  (TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \
 && (TARGET_P9_MINMAX || !flag_trapping_math))
 
 /* In switching from using target_flags to using rs6000_isa_flags, the options
-   machinery creates OPTION_MASK_ instead of MASK_.  For now map
-   OPTION_MASK_ back into MASK_.  */
+   machinery creates OPTION_MASK_ instead of MASK_.  The MASK_
+   options that have not yet been replaced by their OPTION_MASK_
+   equivalents are defined here.  */
 #define MASK_ALTIVEC   OPTION_MASK_ALTIVEC
 #define MASK_CMPB  OPTION_MASK_CMPB
 #define MASK_CRYPTOOPTION_MASK_CRYPTO
 #define MASK_DFP   OPTION_MASK_DFP
 #define MASK_DIRECT_MOVE   OPTION_MASK_DIRECT_MOVE
@@ -534,11 +535,10 @@ extern int rs6000_vector_align[];
 #define MASK_PPC_GFXOPTOPTION_MASK_PPC_GFXOPT
 #define MASK_PPC_GPOPT OPTION_MASK_PPC_GPOPT
 #define MASK_RECIP_PRECISION   OPTION_MASK_RECIP_PRECISION
 #define MASK_SOFT_FLOATOPTION_MASK_SOFT_FLOAT
 #define MASK_STRICT_ALIGN  OPTION_MASK_STRICT_ALIGN
-#define MASK_UPDATEOPTION_MASK_UPDATE
 #define MASK_VSX   OPTION_MASK_VSX
 #define MASK_POWER10   OPTION_MASK_POWER10
 #define MASK_P10_FUSIONOPTION_MASK_P10_FUSION
 
 #ifndef IN_LIBGCC2
@@ -551,18 +551,10 @@ extern int rs6000_vector_align[];
 
 #ifdef TARGET_LITTLE_ENDIAN
 #define MASK_LITTLE_ENDIAN OPTION_MASK_LITTLE_ENDIAN
 #endif
 
-#ifdef TARGET_REGNAMES
-#define MASK_REGNAMES  OPTION_MASK_REGNAMES
-#endif
-
-#ifdef TARGET_PROTOTYPE
-#define MASK_PROTOTYPE OPTION_MASK_PROTOTYPE
-#endif
-
 #ifdef TARGET_MODULO
 #define RS6000_BTM_MODULO  OPTION_MASK_MODULO
 #endif
 
 
@@ -2250,11 +2242,10 @@ extern int frame_pointer_needed;
 
 
 /* Builtin targets.  For now, we reuse the masks for those options that are in
target flags, and pick a random bit for ldbl128, which isn't in
target_flags.  */
-#define RS6000_BTM_ALWAYS  0   /* Always enabled.  */
 #define RS6000_BTM_ALTIVEC MASK_ALTIVEC/* VMX/altivec vectors.  */
 #define RS6000_BTM_CMPBMASK_CMPB   /* ISA 2.05: compare 
bytes.  */
 #define RS6000_BTM_VSX MASK_VSX/* VSX (vector/scalar).  */
 #define RS6000_BTM_P8_VECTOR   MASK_P8_VECTOR  /* ISA 2.07 vector.  */
 #define RS6000_BTM_P9_VECTOR   MASK_P9_VECTOR  /* ISA 3.0 vector.  */
@@ -2275,32 +2266,10 @@ extern int frame_pointer_needed;
 #define RS6000_BTM_FLOAT128MASK_FLOAT128_KEYWORD /* IEEE 128-bit float.  */
 #define RS6000_BTM_FLOAT128_HW MASK_FLOAT128_HW /* IEEE 128-bit float h/w.  */
 #define RS6000_BTM_MMA MASK_MMA/* ISA 3.1 MMA.  */
 #define RS6000_BTM_P10 MASK_POWER10
 
-#define RS6000_BTM_COMMON  (RS6000_BTM_ALTIVEC \
-| RS6000_BTM_VSX   \
-| RS6000_BTM_P8_VECTOR \
-| RS6000_BTM_P9_VECTOR \
-| RS6000_BTM_P9_MISC   \
-| RS6000_BTM_MODULO\
-| RS6000_BTM_CRYPTO\
-| RS6000_BTM_FRE   \
-| RS6000_BTM_FRES  \
-| RS6000_BTM_FRSQRTE   \
-| RS6000_BTM_FRSQRTES  \
-| RS6000_BTM_HTM   \
-| RS6000_BTM_POPCNTD   \
-| RS6000_BTM_CELL  \
-| RS6000_BTM_DFP   \
-| RS6000_BTM_HARD_FLOAT\
-| RS6000_BTM_LDBL128   \
-   

[PATCH,RS6000 0/5] Clean up MASK_ and RS6000_BTM_ defines

2022-06-06 Thread will schmidt via Gcc-patches
Hi,
  This series cleans up the assorted MASK_, OPTION_MASK_,
and RS6000_BTM_ defines that we have sprinkled through the
rs6000 target code.

The MASK_ entries are currently defined as their OPTION_MASK_
equivalents since their introduction when the rs6000_isa_flags was
added via commit 4d9675496a28ef6184f2a9c3ac5e6e3ea63606c1 .
This series replaces references to the MASK_ entries with their
OPTION_MASK equivalents as much as possible.

The RS6000_BTM_ defines are mostly unused since the built-in rewrites
from late 2021 and early 2022, and the remaining usage is
straightforward to replace with OPTION_MASK_ values.

The OPTION_MASK_ definitions themselves remain.

Due to size and to keep some of these changes clean I have split this
into several parts.

After this series there are a few remaining MASK_ entries
(MASK_POWERPC64, MASK_64BIT and MASK_LITTLE_ENDIAN) which are
conditionally defined, and potentially more invasive to resolve.
Those are deliberately not addressed as part of this series.

This has cleanly regtested (no functional change).  When approved
this series will be committed as a group, though it should be
bisectable.

OK for trunk?

1/5: Remove unused defines and touch up comments.
2/5: Rework RS6000_BTM_foo defines, part 1.
3/5: Rework RS6000_BTM_foo defines, part 2.
4/5: Rework MASK_foo defines, part 1.
5/5. Rework MASK_foo defines, part 2.



[PATCH] libcpp: Update cpp_wcwidth() to Unicode 14.0.0

2022-06-06 Thread Lewis Hyatt via Gcc-patches
Hello-

The attached patch upgrades the cpp_wcwidth() function (needed for
computing display columns in diagnostics output) from Unicode 13 to
Unicode 14. I just mechanically followed the procedure in
contrib/unicode/README with nothing unexpected coming up. I attached it
compressed since it's a bit large, and not really human readable
anyway. Please let me know if it's OK to commit. I did bootstrap/regtest
all languages with no issues on x86-64 Linux. (Although I had to revert
locally one unrelated commit that seems to break bootstrap for Ada on the
master branch currently.)

BTW, is this something simple enough I should just commit it without bugging
the list for approval?

Thanks!

-Lewis

==

Subject: [PATCH] libcpp: Update cpp_wcwidth() to Unicode 14.0.0

The procedure detailed in contrib/unicode/README was followed with nothing
notable coming up. The glibc scripts did not require any update, so the
only change was retrieving new versions of the Unicode data files and
rerunning gen_wcwidth.py.

contrib/ChangeLog:

* unicode/EastAsianWidth.txt: Update to Unicode 14.0.0.
* unicode/PropList.txt: Likewise.
* unicode/README: Likewise.
* unicode/UnicodeData.txt: Likewise.

libcpp/ChangeLog:

* generated_cpp_wcwidth.h: Generated from updated Unicode data files.


unicode_14.txt.gz
Description: application/gunzip


Re: [PATCH] Fix behavior of using enum in dependent contexts [PR105787]

2022-06-06 Thread Jason Merrill via Gcc-patches

On 6/6/22 15:39, Michael Colavita via Gcc-patches wrote:

Per #105787, "using enum" in a dependent context leads to an ICE. This
is because the type substitution logic doesn't properly juggle the
context and abstract origin for CONST_DECLs introduced via using enum.
When we are performing type substitution on the CONST_DECL, we want to
do so in the context of the original enum type. When we return the
resulting value, we want to replace the context with the class in which
the CONST_DECL exists. This amounts to using the abstract origin for the
type substitution, and performing the same clone/abstract origin update
as we do for parsing "using enum" in non-dependent contexts.


Thanks for the patch!  It looks like you don't have a copyright 
assignment on file, so we need you to certify the DCO in the commit 
message of your patch; see


  https://gcc.gnu.org/contribute.html#legal

for more details.

Also, per the section on e-mail subject lines further down in that page, 
you need a "c++:" tag in your subject line.



PR c++/105787 - internal compiler error: tree check: expected enumeral_type, 
have record_type in tsubst_copy

 PR c++/105787

gcc/cp/ChangeLog:

 * pt.cc (tsubst_copy): Handle CONST_DECLs involving "using enum"
   in dependent contexts.

gcc/testsuite/ChangeLog:

 * g++.dg/cpp2a/using-enum-10.C: New test.
 * g++.dg/cpp2a/using-enum-11.C: New test.
---
  gcc/cp/pt.cc   | 24 +-
  gcc/testsuite/g++.dg/cpp2a/using-enum-10.C | 35 
  gcc/testsuite/g++.dg/cpp2a/using-enum-11.C | 38 ++
  3 files changed, 96 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/using-enum-10.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/using-enum-11.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 6de8e496859..c11c682ded7 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -16892,6 +16892,7 @@ tsubst_copy (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
{
tree enum_type;
tree v;
+   tree original;


Note that existing variable declarations at the beginning of the block 
are vestiges of when GCC was written in C; now that we can write C++11, 
it's better to declare variables at the point when they are first 
initialized.



if (DECL_TEMPLATE_PARM_P (t))
  return tsubst_copy (DECL_INITIAL (t), args, complain, in_decl);
@@ -16903,6 +16904,14 @@ tsubst_copy (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
if (args == NULL_TREE)
  return scalar_constant_value (t);
  
+	/* When the CONST_DECL is a class-scope clone from using enum, we want

+  to perform the type substitution in the context of the original enum
+  (the abstract origin), but return a value in the context of the
+  class. */
+   original = t;
+   if (CONST_DECL_USING_P (original))
+ t = DECL_ABSTRACT_ORIGIN (original);
+
/* Unfortunately, we cannot just call lookup_name here.
   Consider:
  
@@ -16921,7 +16930,20 @@ tsubst_copy (tree t, tree args, tsubst_flags_t complain, tree in_decl)

 v != NULL_TREE;
 v = TREE_CHAIN (v))
  if (TREE_PURPOSE (v) == DECL_NAME (t))
-   return TREE_VALUE (v);
+   {
+ tree value = TREE_VALUE (v);
+ if (!CONST_DECL_USING_P (original))
+   return value;
+ else
+   {
+ value = copy_decl (value);
+ DECL_ARTIFICIAL (value) = true;
+ DECL_CONTEXT (value) = DECL_CONTEXT (original);
+ DECL_IGNORED_P (value) = true;
+ DECL_ABSTRACT_ORIGIN (value) = DECL_ABSTRACT_ORIGIN 
(original);
+ return value;


We shouldn't be creating new decls in tsubst_copy, which is called for 
uses of a decl in an expression; if instantiate_class_template (calling 
tsubst_decl) is doing something wrong when instantiating the clone, 
that's the place to fix it.



+   }
+   }
  
  	  /* We didn't find the name.  That should never happen; if

 name-lookup found it during preliminary parsing, we
diff --git a/gcc/testsuite/g++.dg/cpp2a/using-enum-10.C 
b/gcc/testsuite/g++.dg/cpp2a/using-enum-10.C
new file mode 100644
index 000..ab4cb648e03
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/using-enum-10.C
@@ -0,0 +1,35 @@
+// PR c++/105787
+// { dg-do compile { target c++20 } }
+
+enum class fruit { orange, apple };
+
+struct A {
+public:
+  using enum fruit;
+private:
+};
+
+struct B {
+protected:
+  using enum fruit;
+public:
+};
+
+struct C {
+private:
+  using enum fruit;
+public:
+};
+
+template  struct D {
+  char a1 = (char) A::orange;
+  char a2 = (char) A::apple;
+  char b1 = (char) B::orange; // { dg-error "protected" }
+  char b2 = (char) B::apple; // { dg-error "protected" }
+  char c1 = (char) C::orange; // { dg-error "private" }
+  char 

[PATCH] Fix behavior of using enum in dependent contexts [PR105787]

2022-06-06 Thread Michael Colavita via Gcc-patches
Per #105787, "using enum" in a dependent context leads to an ICE. This
is because the type substitution logic doesn't properly juggle the
context and abstract origin for CONST_DECLs introduced via using enum.
When we are performing type substitution on the CONST_DECL, we want to
do so in the context of the original enum type. When we return the
resulting value, we want to replace the context with the class in which
the CONST_DECL exists. This amounts to using the abstract origin for the
type substitution, and performing the same clone/abstract origin update
as we do for parsing "using enum" in non-dependent contexts.

PR c++/105787 - internal compiler error: tree check: expected enumeral_type, 
have record_type in tsubst_copy

PR c++/105787

gcc/cp/ChangeLog:

* pt.cc (tsubst_copy): Handle CONST_DECLs involving "using enum"
  in dependent contexts.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/using-enum-10.C: New test.
* g++.dg/cpp2a/using-enum-11.C: New test.
---
 gcc/cp/pt.cc   | 24 +-
 gcc/testsuite/g++.dg/cpp2a/using-enum-10.C | 35 
 gcc/testsuite/g++.dg/cpp2a/using-enum-11.C | 38 ++
 3 files changed, 96 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/using-enum-10.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/using-enum-11.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 6de8e496859..c11c682ded7 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -16892,6 +16892,7 @@ tsubst_copy (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
   {
tree enum_type;
tree v;
+   tree original;
 
if (DECL_TEMPLATE_PARM_P (t))
  return tsubst_copy (DECL_INITIAL (t), args, complain, in_decl);
@@ -16903,6 +16904,14 @@ tsubst_copy (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
if (args == NULL_TREE)
  return scalar_constant_value (t);
 
+   /* When the CONST_DECL is a class-scope clone from using enum, we want
+  to perform the type substitution in the context of the original enum
+  (the abstract origin), but return a value in the context of the
+  class. */
+   original = t;
+   if (CONST_DECL_USING_P (original))
+ t = DECL_ABSTRACT_ORIGIN (original);
+
/* Unfortunately, we cannot just call lookup_name here.
   Consider:
 
@@ -16921,7 +16930,20 @@ tsubst_copy (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
 v != NULL_TREE;
 v = TREE_CHAIN (v))
  if (TREE_PURPOSE (v) == DECL_NAME (t))
-   return TREE_VALUE (v);
+   {
+ tree value = TREE_VALUE (v);
+ if (!CONST_DECL_USING_P (original))
+   return value;
+ else
+   {
+ value = copy_decl (value);
+ DECL_ARTIFICIAL (value) = true;
+ DECL_CONTEXT (value) = DECL_CONTEXT (original);
+ DECL_IGNORED_P (value) = true;
+ DECL_ABSTRACT_ORIGIN (value) = DECL_ABSTRACT_ORIGIN 
(original);
+ return value;
+   }
+   }
 
  /* We didn't find the name.  That should never happen; if
 name-lookup found it during preliminary parsing, we
diff --git a/gcc/testsuite/g++.dg/cpp2a/using-enum-10.C 
b/gcc/testsuite/g++.dg/cpp2a/using-enum-10.C
new file mode 100644
index 000..ab4cb648e03
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/using-enum-10.C
@@ -0,0 +1,35 @@
+// PR c++/105787
+// { dg-do compile { target c++20 } }
+
+enum class fruit { orange, apple };
+
+struct A {
+public:
+  using enum fruit;
+private:
+};
+
+struct B {
+protected:
+  using enum fruit;
+public:
+};
+
+struct C {
+private:
+  using enum fruit;
+public:
+};
+
+template  struct D {
+  char a1 = (char) A::orange;
+  char a2 = (char) A::apple;
+  char b1 = (char) B::orange; // { dg-error "protected" }
+  char b2 = (char) B::apple; // { dg-error "protected" }
+  char c1 = (char) C::orange; // { dg-error "private" }
+  char c2 = (char) C::apple; // { dg-error "private" }
+};
+
+int main() {
+D<0> d;
+}
diff --git a/gcc/testsuite/g++.dg/cpp2a/using-enum-11.C 
b/gcc/testsuite/g++.dg/cpp2a/using-enum-11.C
new file mode 100644
index 000..1899e05c68d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/using-enum-11.C
@@ -0,0 +1,38 @@
+// PR c++/105787
+// { dg-do compile { target c++20 } }
+
+enum class fruit { orange, apple };
+
+struct A {
+public:
+  using fruit::apple;
+  using fruit::orange;
+private:
+};
+
+struct B {
+protected:
+  using fruit::apple;
+  using fruit::orange;
+public:
+};
+
+struct C {
+private:
+  using fruit::apple;
+  using fruit::orange;
+public:
+};
+
+template  struct D {
+  char a1 = (char) A::orange;
+  char a2 = (char) A::apple;
+  char b1 = (char) B::orange; // { dg-error "protected" }
+  char b2 = (char) B::apple; // { dg-error "protected" }
+  char c1 = 

Re: [PATCH] c++: function NTTP argument considered unused [PR53164, PR105848]

2022-06-06 Thread Jason Merrill via Gcc-patches

On 6/6/22 14:27, Patrick Palka wrote:

On Thu, 7 Oct 2021, Jason Merrill wrote:


On 10/7/21 11:17, Patrick Palka wrote:

On Wed, 6 Oct 2021, Jason Merrill wrote:


On 10/6/21 15:52, Patrick Palka wrote:

On Wed, 6 Oct 2021, Patrick Palka wrote:


On Tue, 5 Oct 2021, Jason Merrill wrote:


On 10/5/21 15:17, Patrick Palka wrote:

On Mon, 4 Oct 2021, Patrick Palka wrote:


When passing a function template as the argument to a function
NTTP
inside a template, we resolve it to the right specialization
ahead
of
time via resolve_address_of_overloaded_function, though the call
to
mark_used within defers odr-using it until instantiation time
(as
usual).
But at instantiation time we end up never calling mark_used on
the
specialization.

This patch fixes this by adding a call to mark_used in
convert_nontype_argument_function.

PR c++/53164

gcc/cp/ChangeLog:

* pt.c (convert_nontype_argument_function): Call mark_used.

gcc/testsuite/ChangeLog:

* g++.dg/template/non-dependent16.C: New test.
---
 gcc/cp/pt.c |  3 +++
 gcc/testsuite/g++.dg/template/non-dependent16.C | 16

 2 files changed, 19 insertions(+)
 create mode 100644
gcc/testsuite/g++.dg/template/non-dependent16.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index f950f4a21b7..5e819c9598c 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -6668,6 +6668,9 @@ convert_nontype_argument_function (tree
type,
tree
expr,
   return NULL_TREE;
 }
 +  if (!mark_used (fn_no_ptr, complain) && !(complain &
tf_error))
+return NULL_TREE;
+
   linkage = decl_linkage (fn_no_ptr);
   if (cxx_dialect >= cxx11 ? linkage == lk_none : linkage !=
lk_external)
 {
diff --git a/gcc/testsuite/g++.dg/template/non-dependent16.C
b/gcc/testsuite/g++.dg/template/non-dependent16.C
new file mode 100644
index 000..b7dca8f6752
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/non-dependent16.C
@@ -0,0 +1,16 @@
+// PR c++/53164
+
+template
+void f(T) {
+  T::fail; // { dg-error "not a member" }
+}
+
+template
+struct A { };
+
+template
+void g() {
+  A a;
+}


I should mention that the original testcase in the PR was slightly
different than this one in that it also performed a call to the
NTTP,
e.g.

  template
  struct A {
static void h() {
  p(0);
}
  };

  template
  void g() {
A::h();
  }

  templated void g<0>();

and not even the call was enough to odr-use f, apparently because
the
CALL_EXPR case of tsubst_expr calls mark_used on the callee only
when
it's a FUNCTION_DECL, but in this case after substitution it's an
ADDR_EXPR of a FUNCTION_DECL.  Fixing this by looking through the
ADDR_EXPR
worked, but IIUC the call isn't necessary for f to be odr-used,
simply
using f as a template argument should be sufficient, so it seems
the
above is better fix.


I agree that pedantically the use happens when substituting into the
use
of
A, but convert_nontype_argument_function seems like a weird place
to
implement that; it's only called again during instantiation of A,
when we
instantiate the injected-class-name.  If A isn't instantiated,
e.g.
if 'a'
is a pointer to A, we again don't instantiate f.


I see, makes sense..  I'm not sure where else we can mark the use,
then.
Since we resolve the OVERLOAD f to the FUNCTION_DECL f ahead of
time (during which mark_used doesn't actually instantiate f
because
we're inside a template), at instantiation time the type A is
already
non-dependent so tsubst_aggr_type avoids doing the work that would end
up calling convert_nontype_argument_function.



I see that clang doesn't reject your testcase, either, but MSVC and
icc
do
(even with 'a' a pointer): https://godbolt.org/z/MGE6TcMch


FWIW although Clang doesn't reject 'A a;', it does reject
'using type = A;' weirdly enough: https://godbolt.org/z/T9qEn6bWW


Shall we just go with the other more specific approach, that makes
sure
the CALL_EXPR case of tsubst_expr calls mark_used when the callee is
an
ADDR_EXPR?  Something like (bootstrapped and regtested):


Err, this approach is wrong because by stripping the ADDR_EXPR here we
end up checking access of the unwrapped FUNCTION_DECL again after
substituting into the call.  So we incorrectly reject e.g.

 template
 void g() {
   P(); // error: ‘static void A::h()’ is private within this context
 }

 struct A {
   void f() {
 g();
   }
 private:
   static void h();
 };

since A::h isn't accessible from g.


I guess you could call mark_used directly instead of stripping the
ADDR_EXPR.


That seems to work nicely, how does the below look?  Bootstrapped and
regtested on x86_64-pc-linux-gnu.



Or for the general problem, perhaps we could mark the TEMPLATE_INFO or
TI_ARGS
to indicate that we still need to mark_used the arguments when we
encounter
A again during instantiation?


That sounds plausible, though I suppose it might not be worth 

Re: [PATCH] x86: harmonize __builtin_ia32_psadbw*() types

2022-06-06 Thread H.J. Lu via Gcc-patches
On Sun, Jun 5, 2022 at 7:27 PM Hongtao Liu via Gcc-patches
 wrote:
>
> On Mon, Jun 6, 2022 at 3:17 AM Uros Bizjak via Gcc-patches
>  wrote:
> >
> > On Thu, Jun 2, 2022 at 5:04 PM Jan Beulich  wrote:
> > >
> > > The 64-bit, 128-bit, and 512-bit variants have VDI return type, in
> > > line with instruction behavior. Make the 256-bit builtin match, thus
> > > also making it match the insn it expands to (using VI8_AVX2_AVX512BW).
> > >
> > > gcc/
> > >
> > > * config/i386/i386-builtin.def (__builtin_ia32_psadbw256):
> > > Change type.
> > > * config/i386/i386-builtin-types.def: New function type
> > > (V4DI, V32QI, V32QI).
> > > * config/i386/i386-expand.cc (ix86_expand_args_builtin): Handle
> > > V4DI_FTYPE_V32QI_V32QI.
> >
> > LGTM, but please let HJ have the final approval.
> I think it was just a typo and not intentional, so Ok for the trunk.

LGTM too.

Thanks.

> >
> > Uros.
> >
> > >
> > > --- a/gcc/config/i386/i386-builtin.def
> > > +++ b/gcc/config/i386/i386-builtin.def
> > > @@ -1217,7 +1217,7 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR
> > >  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_mulv8si3, 
> > > "__builtin_ia32_pmulld256"  , IX86_BUILTIN_PMULLD256  , UNKNOWN, (int) 
> > > V8SI_FTYPE_V8SI_V8SI)
> > >  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_vec_widen_umult_even_v8si, 
> > > "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) 
> > > V4DI_FTYPE_V8SI_V8SI)
> > >  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_iorv4di3, 
> > > "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) 
> > > V4DI_FTYPE_V4DI_V4DI)
> > > -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_psadbw, 
> > > "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) 
> > > V16HI_FTYPE_V32QI_V32QI)
> > > +BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_psadbw, 
> > > "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) 
> > > V4DI_FTYPE_V32QI_V32QI)
> > >  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pshufbv32qi3, 
> > > "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) 
> > > V32QI_FTYPE_V32QI_V32QI)
> > >  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pshufdv3, 
> > > "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) 
> > > V8SI_FTYPE_V8SI_INT)
> > >  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pshufhwv3, 
> > > "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) 
> > > V16HI_FTYPE_V16HI_INT)
> > > --- a/gcc/config/i386/i386-builtin-types.def
> > > +++ b/gcc/config/i386/i386-builtin-types.def
> > > @@ -516,6 +516,7 @@ DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, INT
> > >  DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, INT, V8DI, UQI)
> > >  DEF_FUNCTION_TYPE (V8DI, V8DI, V4DI, INT, V8DI, UQI)
> > >  DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI)
> > > +DEF_FUNCTION_TYPE (V4DI, V32QI, V32QI)
> > >  DEF_FUNCTION_TYPE (V8DI, V64QI, V64QI)
> > >  DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI)
> > >  DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI)
> > > --- a/gcc/config/i386/i386-expand.cc
> > > +++ b/gcc/config/i386/i386-expand.cc
> > > @@ -10359,6 +10359,7 @@ ix86_expand_args_builtin (const struct b
> > >  case V8SI_FTYPE_V16HI_V16HI:
> > >  case V4DI_FTYPE_V4DI_V4DI:
> > >  case V4DI_FTYPE_V8SI_V8SI:
> > > +case V4DI_FTYPE_V32QI_V32QI:
> > >  case V8DI_FTYPE_V64QI_V64QI:
> > >if (comparison == UNKNOWN)
> > > return ix86_expand_binop_builtin (icode, exp, target);
> > >
>
>
>
> --
> BR,
> Hongtao



-- 
H.J.


Re: [C++ PATCH take #2] PR c++/96442: Improved error recovery in enumerations.

2022-06-06 Thread Jason Merrill via Gcc-patches

On 6/5/22 06:09, Roger Sayle wrote:


Hi Jason,
My apologies for the long delay, but I've finally got around to
implementing your suggested improvements (implied by your review):
https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591504.html
of my patch for PR c++/96442:
https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590716.html

The "How does that happen?" is insightful and leads to a cleaner
solution, setting ENUM_UNDERLYING_TYPE to integer_type_node when
issuing an error, so that this invariant holds during the parser's
error recovery.  I've also moved the new testcase to the g++.dg/parse
subdirectory as per your feedback on my previous ICE-on-invalid fixes.

This patch has been tested on x86_64-pc-linunx-gnu with make bootstrap
and make -k check with no new (unexpected) failures.  Ok for mainline?


OK.


2022-06-05  Roger Sayle  

gcc/cp/ChangeLog
 PR c++/96442
 * decl.cc (start_enum): When emitting a "must be integral" error,
 set ENUM_UNDERLYING_TYPE to integer_type_node, to avoid an ICE
 downstream in build_enumeration.

gcc/testsuite/ChangeLog
 PR c++/96442
 * g++.dg/parse/pr96442.C: New test cae.

Thanks again,
Roger
--


-Original Message-
From: Jason Merrill 
Sent: 10 March 2022 05:06
To: Roger Sayle ; gcc-patches@gcc.gnu.org
Subject: Re: [C++ PATCH] PR c++/96442: Another improved error recovery in
enumerations.

On 2/22/22 08:02, Roger Sayle wrote:


This patch resolves PR c++/96442, another ICE-after-error regression.
In this case, invalid code attempts to use a non-integral type as the
underlying type for an enumeration (a record_type in the example given
in the bugzilla PR), for which the parser emits an error message but
allows the inappropriate type to leak to downstream code.


How does that happen?

Would it help to change dependent_type_p in start_enum to
WILDCARD_TYPE_P?


The minimal
safe fix is to double check that the enumeration's underlying type
EUTYPE satisfies INTEGRAL_TYPE_P before calling int_fits_type_p in
build_enumerator.  This is a one line fix, but correcting indentation
and storing a common subexpression in a variable makes the change look
a little bigger.

This patch has been tested on x86_64-pc-linunx-gnu with make bootstrap
and make -k check with no new (unexpected) failures.  Ok for mainline?


2022-02-22  Roger Sayle  

gcc/cp/ChangeLog
PR c++/96442
* decl.cc (build_enumeration): Check ENUM_UNDERLYING_TYPE is
INTEGRAL_TYPE_P before calling int_fits_type_p.

gcc/testsuite/ChangeLog
PR c++/96442
* g++.dg/pr96442.C: New test cae.


Thanks in advance,
Roger
--







[PATCH] c++: function NTTP argument considered unused [PR53164, PR105848]

2022-06-06 Thread Patrick Palka via Gcc-patches
On Thu, 7 Oct 2021, Jason Merrill wrote:

> On 10/7/21 11:17, Patrick Palka wrote:
> > On Wed, 6 Oct 2021, Jason Merrill wrote:
> > 
> > > On 10/6/21 15:52, Patrick Palka wrote:
> > > > On Wed, 6 Oct 2021, Patrick Palka wrote:
> > > > 
> > > > > On Tue, 5 Oct 2021, Jason Merrill wrote:
> > > > > 
> > > > > > On 10/5/21 15:17, Patrick Palka wrote:
> > > > > > > On Mon, 4 Oct 2021, Patrick Palka wrote:
> > > > > > > 
> > > > > > > > When passing a function template as the argument to a function
> > > > > > > > NTTP
> > > > > > > > inside a template, we resolve it to the right specialization
> > > > > > > > ahead
> > > > > > > > of
> > > > > > > > time via resolve_address_of_overloaded_function, though the call
> > > > > > > > to
> > > > > > > > mark_used within defers odr-using it until instantiation time
> > > > > > > > (as
> > > > > > > > usual).
> > > > > > > > But at instantiation time we end up never calling mark_used on
> > > > > > > > the
> > > > > > > > specialization.
> > > > > > > > 
> > > > > > > > This patch fixes this by adding a call to mark_used in
> > > > > > > > convert_nontype_argument_function.
> > > > > > > > 
> > > > > > > > PR c++/53164
> > > > > > > > 
> > > > > > > > gcc/cp/ChangeLog:
> > > > > > > > 
> > > > > > > > * pt.c (convert_nontype_argument_function): Call 
> > > > > > > > mark_used.
> > > > > > > > 
> > > > > > > > gcc/testsuite/ChangeLog:
> > > > > > > > 
> > > > > > > > * g++.dg/template/non-dependent16.C: New test.
> > > > > > > > ---
> > > > > > > > gcc/cp/pt.c |  3 +++
> > > > > > > > gcc/testsuite/g++.dg/template/non-dependent16.C | 16
> > > > > > > > 
> > > > > > > > 2 files changed, 19 insertions(+)
> > > > > > > > create mode 100644
> > > > > > > > gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > 
> > > > > > > > diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
> > > > > > > > index f950f4a21b7..5e819c9598c 100644
> > > > > > > > --- a/gcc/cp/pt.c
> > > > > > > > +++ b/gcc/cp/pt.c
> > > > > > > > @@ -6668,6 +6668,9 @@ convert_nontype_argument_function (tree
> > > > > > > > type,
> > > > > > > > tree
> > > > > > > > expr,
> > > > > > > >   return NULL_TREE;
> > > > > > > > }
> > > > > > > > +  if (!mark_used (fn_no_ptr, complain) && !(complain &
> > > > > > > > tf_error))
> > > > > > > > +return NULL_TREE;
> > > > > > > > +
> > > > > > > >   linkage = decl_linkage (fn_no_ptr);
> > > > > > > >   if (cxx_dialect >= cxx11 ? linkage == lk_none : linkage !=
> > > > > > > > lk_external)
> > > > > > > > {
> > > > > > > > diff --git a/gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > b/gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > new file mode 100644
> > > > > > > > index 000..b7dca8f6752
> > > > > > > > --- /dev/null
> > > > > > > > +++ b/gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > @@ -0,0 +1,16 @@
> > > > > > > > +// PR c++/53164
> > > > > > > > +
> > > > > > > > +template
> > > > > > > > +void f(T) {
> > > > > > > > +  T::fail; // { dg-error "not a member" }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +template
> > > > > > > > +struct A { };
> > > > > > > > +
> > > > > > > > +template
> > > > > > > > +void g() {
> > > > > > > > +  A a;
> > > > > > > > +}
> > > > > > > 
> > > > > > > I should mention that the original testcase in the PR was slightly
> > > > > > > different than this one in that it also performed a call to the
> > > > > > > NTTP,
> > > > > > > e.g.
> > > > > > > 
> > > > > > >  template
> > > > > > >  struct A {
> > > > > > >static void h() {
> > > > > > >  p(0);
> > > > > > >}
> > > > > > >  };
> > > > > > > 
> > > > > > >  template
> > > > > > >  void g() {
> > > > > > >A::h();
> > > > > > >  }
> > > > > > > 
> > > > > > >  templated void g<0>();
> > > > > > > 
> > > > > > > and not even the call was enough to odr-use f, apparently because
> > > > > > > the
> > > > > > > CALL_EXPR case of tsubst_expr calls mark_used on the callee only
> > > > > > > when
> > > > > > > it's a FUNCTION_DECL, but in this case after substitution it's an
> > > > > > > ADDR_EXPR of a FUNCTION_DECL.  Fixing this by looking through the
> > > > > > > ADDR_EXPR
> > > > > > > worked, but IIUC the call isn't necessary for f to be odr-used,
> > > > > > > simply
> > > > > > > using f as a template argument should be sufficient, so it seems
> > > > > > > the
> > > > > > > above is better fix.
> > > > > > 
> > > > > > I agree that pedantically the use happens when substituting into the
> > > > > > use
> > > > > > of
> > > > > > A, but convert_nontype_argument_function seems like a weird place
> > > > > > to
> > > > > > implement that; it's only called again during instantiation of A,
> > > > > > when we
> > > > > > instantiate the injected-class-name.  If A isn't instantiated,
> > > > > > e.g.
> > > > > > if 'a'
> > > > > > 

Re: [PATCH]: libgompd add parallel handle functions

2022-06-06 Thread Jakub Jelinek via Gcc-patches
On Mon, Jun 06, 2022 at 07:32:31PM +0200, Mohamed Atef wrote:
> So for both cases one should read the value of *team and if it's NULL, the
> function returns some error state (eg. ompd_rc_unavailable)

No, I think for team NULL it should simply push something different
to ompd_parallel_handle_t, something that would mean for uses of the handle
that it is not a normal explicit parallel, but the implicit parallel
and handle those cases differently.
E.g. if one has
int
main ()
{
  #pragma omp parallel
  sleep (1024);
}
then when you get the explicit parallel's handle, there is struct gomp_team
one can store and work with, but its enclosing parallel isn't non-existent,
it is an implicit parallel, only its enclosing parallel doesn't exist.

Unfortunately, it isn't that easy.  We sometimes do create struct gomp_team
even for the implicit parallel.
See libgomp/target.c (GOMP_target_ext) which is there for cases like
int
main ()
{
  #pragma omp target nowait
  something;
  something_else;
  #pragma omp taskwait
}
where we want the asynchronous target to be really asynchronous with
something_else; and need struct gomp_team for that.
But in the case of artificial struct gomp_team for this case
thr->ts.level will be 0 rather than > 0.

> > ompd_get_task_parallel_handle when you'll have struct gomp_task *
> > and want the struct gomp_team it is in.
> > I'm afraid the library doesn't track this, it doesn't need it for anything.
> > One possibility to resolve this is perhaps if all functions that
> > allocate ompd_task_handle_t can't know the corresponding struct gomp_thread
> > too, then you could store in the private structure or ompd_task_handle_t
> > both struct gomp_task * and struct gomp_thread *.
> >
> I will ask the guys to try this if it's impossible then we delay this
> function.

Perhaps the function can be added but could just error unconditionally
until some solution is found.

BTW, when looking at the patch, I found I've missed some things in the first
already committed patch.

+  #define gompd_init_access(t, m)  \   

  
+gompd_access_##t##_##m = (__UINT64_TYPE__) & (((struct t *) NULL)->m); 

  
This is UB, should be using offsetof (struct t, m) instead.

Also, using __UINT64_TYPE__ for those offsets or sizes seems to be very
excessive, on x86_64-linux the largest struct from looking at debug info
is struct gomp_team right now with 1344 bytes.
So for the time being, I think using __UINT16_TYPE__ for all those sizes and
offsets should be 4 times as compact.

And, it would be nice to initialize those at least when possible at compile
time, not in gompd_load, offsetof of a non-VLA type is a constant
expression, similarly sizeof, so making all those const and initialized
directly would mean they don't waste a writable section (so all processes
can share those).  Probably it would be nice to stick them at least for ELF
into a names section so that they'd be together and not needing to be ever
touched unless OMPD is enabled.
Of course, I don't rule out the possibility of some values needing to be
initialized at runtime, they'd simply just not be const like the rest.

Jakub



Re: [PATCH]: libgompd add parallel handle functions

2022-06-06 Thread Jakub Jelinek via Gcc-patches
On Mon, Jun 06, 2022 at 05:13:24PM +0200, Mohamed Sayed via Gcc-patches wrote:
> > 2022-06-06  Mohamed Sayed  
> >
> >
> > * Makefile.am: (libgompd_la_SOURCES): Add ompd-parallel.c.

The ChangeLog formatting is wrong.  The above line should be:
* Makefile.am (libgompd_la_SOURCES): Add ompd-parallel.c.
i.e. tab, * space filename and if there is something in the
file that is being modified, followed by space ( name )
All this then followed by : and description.

> > * Makefile.in: Regenerate.
> > * libgompd.map: Add ompd_get_curr_parallel_handle,
> > ompd_get_enclosing_parallel_handle, ompd_rel_parallel_handle
> > and ompd_parallel_handle_compare symbol versions.

We aren't very consistent on the map files, either it should be
* libgompd.map (ompd_get_curr_parallel_handle,
ompd_get_enclosing_parallel_handle, ompd_rel_parallel_handle,
ompd_parallel_handle_compare): Export at OMPD_5.1.
or
* libgompd.map (OMPD_5.1): Export ompd_get_curr_parallel_handle,
ompd_get_enclosing_parallel_handle, ompd_rel_parallel_handle and
ompd_parallel_handle_compare.

> > * ompd-support.h:() : Add gompd_access (gomp_team_state, team) and
> > gompd_access (gomp_team, prev_ts).

This is totally wrong.
You are modifying the GOMPD_FOREACH_ACCESS macro I think.
So you should say
* ompd-support.h (GOMPD_FOREACH_ACCESS): Add
gompd_access (gomp_team_state, team) and
gompd_access (gomp_team, prev_ts).

> +return ompd_rc_stale_handle;
> +
> +  CHECK (parallel_handle->ah);
> +
> +  ompd_address_space_context_t *context = parallel_handle->ah->context;  
> +  ompd_address_t symbol_address = parallel_handle->th;
> +  ompd_address_t temp_symbol_address = {OMPD_SEGMENT_UNSPECIFIED, 0};
> +  ompd_word_t temp_offset;
> +  ompd_rc_t ret;
> +
> +  /* get prev_ts offset.  */
> +  GET_VALUE (context, NULL, "gompd_access_gomp_team_prev_ts",
> +  temp_offset, temp_offset, target_sizes.sizeof_long_long, 1, ret,
> + temp_symbol_address);

Formatting.  You can see the two lines are badly indented, each one in
a different wrong way.  The indenting rule is that the lines after the first
one in this case should start aligned below context on the GET_VALUE
line, and each 8 spaces should be replaced by a tab.

> +
> +  symbol_address.address += temp_offset;
> +
> +  /* get team offset.  */
> +  GET_VALUE (context, NULL, "gompd_access_gomp_team_state_team",
> +  temp_offset, temp_offset, target_sizes.sizeof_long_long, 1, ret,
> + temp_symbol_address);

Likewise.
> +
> +  symbol_address.address += temp_offset;
> +
> +  ret = callbacks->read_memory (context, NULL, _address,
> +target_sizes.sizeof_pointer,
> +_address.address);
> +  CHECK_RET (ret); 
> +  ret = callbacks->alloc_memory (sizeof (ompd_parallel_handle_t),
> + (void **) (enc_par_handle));

Why the ()s around enc_par_handle?
Anyway, seems like an aliasing violation, if the callback stores
through void **, then you should just pass it address of a local
void * variable and *enc_par_handle = (ompd_parallel_handle_t) var;

> +ompd_rc_t
> +ompd_rel_parallel_handle (ompd_parallel_handle_t *parallel_handle)
> +{
> +  if (parallel_handle == NULL)
> +return ompd_rc_stale_handle;
> +
> +  ompd_rc_t ret = callbacks->free_memory ((void *) (parallel_handle));

Likewise (both aliasing and the unnecessary ()s.
>  #define GOMPD_SIZES(gompd_size) \
>gompd_size (gomp_thread) \
>gompd_size (gomp_task_icv) \
> -  gompd_size (gomp_task)
> +  gompd_size (gomp_task) 
> +  

Why?

Jakub



Re: [PATCH]: libgompd add parallel handle functions

2022-06-06 Thread Mohamed Atef via Gcc-patches
في الاثنين، ٦ يونيو، ٢٠٢٢ ٧:١٣ م Jakub Jelinek via Gcc-patches <
gcc-patches@gcc.gnu.org> كتب:

> On Mon, Jun 06, 2022 at 01:48:22AM +0200, Mohamed Sayed via Gcc-patches
> wrote:
> > This patch adds parallel region handles specified in section 5.5.3.
> > >From examining libgomp code, I found that struct gomp_team describes the
> > parallel region.
>
> I think it would be nice to first find out what the different
> ompd_{parallel,thread,task}_handle_t should actually contain.
>
> In GOMP, the first one maps to struct gomp_team, the middle one to
> struct gomp_thread and the last one to struct gomp_task.
>
> Functions that create those are:
> For ompd_thread_handle_t that is ompd_get_thread_in_parallel and
> ompd_get_thread_handle.
> For ompd_parallel_handle_t that is ompd_get_curr_parallel_handle,
> ompd_get_enclosing_parallel_handle and ompd_get_task_parallel_handle.
> For ompd_task_handle_t that is ompd_get_curr_task_handle,
> ompd_get_generating_task_handle, ompd_get_scheduling_task_handle and
> ompd_get_task_in_parallel.
>
> What those handles point to is something libgompd allocates using
> the allocator callback and it is up to the library what it puts there,
> typically it should contain the address of those corresponding
> gomp structures and whatever else is needed (say address space handle
> or some contexts).
>
> > The Thread handle gives the address of gomp_thread so, I tried to
> > access *team
> > gomp_thread->ts->team.
>
> Actually gomp_thread's ts.team (ts is not a pointer, but a nested struct).
>
I think the implementation is correct But there's a typo in the comment.

>
> > The parallel handle is the team pointer in team state.
> > I have a question about ompd_get_task_parallel_handle
> > https://www.openmp.org/spec-html/5.0/openmpsu218.html
> > How can i reach gomp_team from gomp_taske
> > And the union in gomp_task has two entries gomp_sem_t and gomp_team
>
> So, for the 4 functions you want to implement:
> ompd_get_curr_parallel_handle when you'll have struct gomp_thread *
> and want struct gomp_team * you load thr->ts.team.
> Note, the implicit parallel is usually represented by NULL in thr->ts.team,
> that case then means it has just a single thread etc.
>
> ompd_get_enclosing_parallel_handle when you'll have struct gomp_team *
> and want the enclosing team.  team->prev_ts.team is what you are after,
> if it is non-NULL, it is the enclosing parallel, if it is NULL, I think
> one should verify e.g. host teams construct in that case, but
> otherwise it is the implicit parallel that one probably needs to represent
> somehow too.
>
So for both cases one should read the value of *team and if it's NULL, the
function returns some error state (eg. ompd_rc_unavailable)

>
> ompd_get_task_parallel_handle when you'll have struct gomp_task *
> and want the struct gomp_team it is in.
> I'm afraid the library doesn't track this, it doesn't need it for anything.
> One possibility to resolve this is perhaps if all functions that
> allocate ompd_task_handle_t can't know the corresponding struct gomp_thread
> too, then you could store in the private structure or ompd_task_handle_t
> both struct gomp_task * and struct gomp_thread *.
>
I will ask the guys to try this if it's impossible then we delay this
function.

> If that is impossible, we could add such a pointer, but it would increase
> both memory and runtime overhead of the library.
>
> Jakub
>
>


RE: [ping][vect-patterns] Refactor widen_plus/widen_minus as internal_fns

2022-06-06 Thread Joel Hutton via Gcc-patches
> > Patches attached. They already incorporated the .cc rename, now
> > rebased to be after the change to tree.h
> 
> @@ -1412,8 +1412,7 @@ vect_recog_widen_op_pattern (vec_info *vinfo,
>2, oprnd, half_type, unprom, vectype);
> 
>tree var = vect_recog_temp_ssa_var (itype, NULL);
> -  gimple *pattern_stmt = gimple_build_assign (var, wide_code,
> - oprnd[0], oprnd[1]);
> +  gimple *pattern_stmt = gimple_build (var, wide_code, oprnd[0],
> oprnd[1]);
> 
> 
> you should be able to do without the new gimple_build overload
> by using
> 
>gimple_seq stmts = NULL;
>gimple_build (, wide_code, itype, oprnd[0], oprnd[1]);
>gimple *pattern_stmt = gimple_seq_last_stmt (stmts);
> 
> because 'gimple_build' is an existing API.

Done.

The gimple_build overload was at the request of Richard Sandiford, I assume 
removing it is ok with you Richard S?
>From Richard Sandiford:
> For example, I think we should hide this inside a new:
> 
>   gimple_build (var, wide_code, oprnd[0], oprnd[1]);
> 
> that works directly on code_helper, similarly to the new code_helper 
> gimple_build interfaces.




> 
> 
> -  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
> +  if (gimple_get_lhs (stmt) == NULL_TREE ||
> +  TREE_CODE(gimple_get_lhs (stmt)) != SSA_NAME)
>  return false;
> 
> || go to the next line, space after TREE_CODE
> 

Done.

> +  bool widen_arith = false;
> +  gimple_match_op res_op;
> +  if (!gimple_extract_op (stmt, _op))
> +return false;
> +  code = res_op.code;
> +  op_type = res_op.num_ops;
> +
> +  if (is_gimple_assign (stmt))
> +  {
> +  widen_arith = (code == WIDEN_PLUS_EXPR
> +|| code == WIDEN_MINUS_EXPR
> +|| code == WIDEN_MULT_EXPR
> +|| code == WIDEN_LSHIFT_EXPR);
> + }
> +  else
> +  widen_arith = gimple_call_flags (stmt) & ECF_WIDEN;
> 
> there seem to be formatting issues.  Also shouldn't you check
> if (res_op.code.is_tree_code ()) instead if is_gimple_assign?
> I also don't like the ECF_WIDEN "trick", just do as with the
> tree codes and explicitely enumerate widening ifns here.
> 

Done. I've set widen_arith to False for the first patch as the second patch 
introduces the widening ifns.

> gimple_extract_op is a bit heavy-weight as well, so maybe
> instead simply do
> 
>   if (is_gimple_assign (stmt))
> {
>   code = gimple_assign_rhs_code (stmt);
> ...
> }
>   else if (gimple_call_internal_p (stmt))
> {
>   code = gimple_call_internal_fn (stmt);
> ...
> }
>   else
> return false;

The patch was originally written as above, it was changed to use 
gimple_extract_op at the request of Richard Sandiford. I prefer 
gimple_extract_op as it's more compact, but I don't have strong feelings. If 
the Richards can agree on either version I'm happy.


>From Richard Sandiford:
> > +  if (is_gimple_assign (stmt))
> > +  {
> > +code_or_ifn = gimple_assign_rhs_code (stmt);  }  else
> > +code_or_ifn = gimple_call_combined_fn (stmt);
> 
> It might be possible to use gimple_extract_op here (only recently added).
> This would also provide the number of operands directly, instead of 
> needing "op_type".  It would also provide an array of operands.


> 
> +  code_helper c1=MAX_TREE_CODES, c2=MAX_TREE_CODES;
> 
> spaces before/after '='
> 

Done.

> @@ -12061,13 +12105,16 @@ supportable_widening_operation (vec_info
> *vinfo,
>if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
>  std::swap (c1, c2);
> 
> +
>if (code == FIX_TRUNC_EXPR)
>  {
> 
> unnecessary whitespace change.
> 
Fixed.

> diff --git a/gcc/tree.h b/gcc/tree.h
> index
> f84958933d51144bb6ce7cc41eca5f7f06814550..e51e34c051d9b91d1c02a4b2
> fefdb2b15606a36f
> 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -92,6 +92,10 @@ public:
>bool is_fn_code () const { return rep < 0; }
>bool is_internal_fn () const;
>bool is_builtin_fn () const;
> +  enum tree_code as_tree_code () const { return is_tree_code () ?
> +(tree_code)* this : MAX_TREE_CODES; }
> +  combined_fn as_fn_code () const { return is_fn_code () ? (combined_fn)
> *this
> +: CFN_LAST;}
> 
> hmm, the other as_* functions we have are not member functions.
> Also this semantically differs from the tree_code () conversion
> operator (that one was supposed to be "cheap").  The existing
> as_internal_fn for example is documented as being valid only if
> the code is actually an internal fn.  I see you are introducing
> the new function as convenience to get a "safe" not-a-X value,
> so maybe they should be called safe_as_tree_code () instead?
> 
SGTM. Done

> 
>int get_rep () const { return rep; }
>bool operator== (const code_helper ) { return rep == other.rep; }
>bool operator!= (const code_helper ) { return rep != other.rep; }
> @@ -6657,6 +6661,54 @@ extern unsigned fndecl_dealloc_argno (tree);
> if nonnull, set the second argument to the referenced enclosing
> 

Re: [PATCH]: libgompd add parallel handle functions

2022-06-06 Thread Jakub Jelinek via Gcc-patches
On Mon, Jun 06, 2022 at 01:48:22AM +0200, Mohamed Sayed via Gcc-patches wrote:
> This patch adds parallel region handles specified in section 5.5.3.
> >From examining libgomp code, I found that struct gomp_team describes the
> parallel region.

I think it would be nice to first find out what the different
ompd_{parallel,thread,task}_handle_t should actually contain.

In GOMP, the first one maps to struct gomp_team, the middle one to
struct gomp_thread and the last one to struct gomp_task.

Functions that create those are:
For ompd_thread_handle_t that is ompd_get_thread_in_parallel and
ompd_get_thread_handle.
For ompd_parallel_handle_t that is ompd_get_curr_parallel_handle,
ompd_get_enclosing_parallel_handle and ompd_get_task_parallel_handle.
For ompd_task_handle_t that is ompd_get_curr_task_handle,
ompd_get_generating_task_handle, ompd_get_scheduling_task_handle and
ompd_get_task_in_parallel.

What those handles point to is something libgompd allocates using
the allocator callback and it is up to the library what it puts there,
typically it should contain the address of those corresponding
gomp structures and whatever else is needed (say address space handle
or some contexts).

> The Thread handle gives the address of gomp_thread so, I tried to
> access *team
> gomp_thread->ts->team.

Actually gomp_thread's ts.team (ts is not a pointer, but a nested struct).

> The parallel handle is the team pointer in team state.
> I have a question about ompd_get_task_parallel_handle
> https://www.openmp.org/spec-html/5.0/openmpsu218.html
> How can i reach gomp_team from gomp_taske
> And the union in gomp_task has two entries gomp_sem_t and gomp_team

So, for the 4 functions you want to implement:
ompd_get_curr_parallel_handle when you'll have struct gomp_thread *
and want struct gomp_team * you load thr->ts.team.
Note, the implicit parallel is usually represented by NULL in thr->ts.team,
that case then means it has just a single thread etc.

ompd_get_enclosing_parallel_handle when you'll have struct gomp_team *
and want the enclosing team.  team->prev_ts.team is what you are after,
if it is non-NULL, it is the enclosing parallel, if it is NULL, I think
one should verify e.g. host teams construct in that case, but
otherwise it is the implicit parallel that one probably needs to represent
somehow too.

ompd_get_task_parallel_handle when you'll have struct gomp_task *
and want the struct gomp_team it is in.
I'm afraid the library doesn't track this, it doesn't need it for anything.
One possibility to resolve this is perhaps if all functions that
allocate ompd_task_handle_t can't know the corresponding struct gomp_thread
too, then you could store in the private structure or ompd_task_handle_t
both struct gomp_task * and struct gomp_thread *.
If that is impossible, we could add such a pointer, but it would increase
both memory and runtime overhead of the library.

Jakub



Re: [committed] amdgcn: Remove LLVM 9 assembler/linker support

2022-06-06 Thread Andrew Stubbs

On 27/05/2022 20:16, Thomas Schwinge wrote:

Hi Andrew!

On 2022-05-24T16:27:52+0100, Andrew Stubbs  wrote:

I've committed this patch to set the minimum required LLVM version, for
the assembler and linker, to 13.0.1. An upgrade from LLVM 9 is a
prerequisite for the gfx90a support, and 13.0.1 is now the oldest
version not known to have compatibility issues.

The patch removes all the obsolete feature detection tests from
configure and adds a new version test. Likewise the version dependencies
in the backend are removed.


I've not otherwise reviewed your
commit r13-742-g8086230e7ac619c0b0eeb6e15df7975ac214725f
"amdgcn: Remove LLVM 9 assembler/linker support", but happened to notice
that your commit also removed the ARM-target 'HAVE_GAS_ARM_EXTENDED_ARCH'
-- is that intentional; I suppose not?  That had been added by Richard
Earnshaw in commit r12-3725-g4e7a92c0ff3871d955ca8fb133f869b216d7224d
"arm: pass architecture extensions to assembler if supported".


Ooops, this was not intentional. I've just committed the attached to 
reinstate it.


Richard, please confirm that it works as intended.

The problem was that the ARM test was inserted into a case statement 
that I created solely for the use of amdgcn. When I then removed that 
block of code I didn't notice the additional, unrelated code hiding in 
there.


Apologies for the inconvenience.

Andrewarm: reinstate HAVE_GAS_ARM_EXTENDED_ARCH

The check was removed by accident.

gcc/ChangeLog:

* config.in: Regenerate.
* configure: Regenerate.
* configure.ac: Reinstate HAVE_GAS_ARM_EXTENDED_ARCH test.

diff --git a/gcc/config.in b/gcc/config.in
index 6a4f8856c4f..16bb963b45b 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -1331,6 +1331,13 @@
 #endif
 
 
+/* Define if your Arm assembler permits context-specific feature extensions.
+   */
+#ifndef USED_FOR_TARGET
+#undef HAVE_GAS_ARM_EXTENDED_ARCH
+#endif
+
+
 /* Define if your assembler supports .balign and .p2align. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_GAS_BALIGN_AND_P2ALIGN
diff --git a/gcc/configure b/gcc/configure
index 095065ed01a..a52574d02b5 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -731,10 +731,10 @@ gcc_cv_dsymutil
 gcc_cv_otool
 gcc_cv_readelf
 gcc_cv_objdump
-ORIGINAL_NM_FOR_TARGET
-gcc_cv_nm
 ORIGINAL_OBJCOPY_FOR_TARGET
 gcc_cv_objcopy
+ORIGINAL_NM_FOR_TARGET
+gcc_cv_nm
 ORIGINAL_LD_GOLD_FOR_TARGET
 ORIGINAL_LD_BFD_FOR_TARGET
 ORIGINAL_LD_FOR_TARGET
@@ -19676,7 +19676,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19676 "configure"
+#line 19679 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19782,7 +19782,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19782 "configure"
+#line 19785 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -29118,6 +29118,44 @@ $as_echo "$gcc_cv_as_version, ok" >&6; }
 ;;
 esac
 
+case "$target" in
+  arm*)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for assembler 
for arm accepts context-specific architecture extensions" >&5
+$as_echo_n "checking assembler for assembler for arm accepts context-specific 
architecture extensions... " >&6; }
+if ${gcc_cv_as_arm_option_extensions+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_arm_option_extensions=no
+  if test x$gcc_cv_as != x; then
+$as_echo '.text
+   .thumb
+   .syntax unified
+   vmov.f32 s0, s1' > conftest.s
+if { ac_try='$gcc_cv_as $gcc_cv_as_flags -march=armv8.1-m.main+mve -o 
conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+then
+   gcc_cv_as_arm_option_extensions=yes
+else
+  echo "configure: failed program was" >&5
+  cat conftest.s >&5
+fi
+rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_as_arm_option_extensions" >&5
+$as_echo "$gcc_cv_as_arm_option_extensions" >&6; }
+if test $gcc_cv_as_arm_option_extensions = yes; then
+
+$as_echo "#define HAVE_GAS_ARM_EXTENDED_ARCH 1" >>confdefs.h
+
+fi
+
+esac
+
 # ??? Not all targets support dwarf2 debug_line, even within a version
 # of gas.  Moreover, we need to emit a valid instruction to trigger any
 # info to the output file.  So, as supported targets are added to gas 2.11,
diff --git a/gcc/configure.ac b/gcc/configure.ac
index e098b905622..5fe826aa4eb 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5427,6 +5427,19 @@ case "$target" in
 ;;
 esac
 
+case "$target" in
+  arm*)
+gcc_GAS_CHECK_FEATURE([assembler for arm accepts context-specific 
architecture extensions],
+  gcc_cv_as_arm_option_extensions,
+  [-march=armv8.1-m.main+mve],
+  [.text
+   .thumb
+   .syntax unified
+   

Re: [PING] PR middle-end/95126: Expand small const structs as immediate constants

2022-06-06 Thread Rainer Orth
Hi Roger,

>> I just tried this on i386-pc-solaris2.11: unfortunately, it made no
> difference.
>
> Awesome!  Very many thanks for trying this.  Alas it confirms that the patch
> I'm currently spinning won't have any affect.  So by a process of
> elimination,
> the miscompilation must be triggered by one of the other two changes:
>
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index fb062dc..37f1405 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -4871,7 +4871,7 @@ emit_push_insn (rtx x, machine_mode mode, tree type,
> rtx s
>   /* If source is a constant VAR_DECL with a simple constructor,
>   store the constructor to the stack instead of moving it.  */
>   const_tree decl;
> - if (partial == 0
> + if (0 && partial == 0
>   && MEM_P (xinner)
>   && SYMBOL_REF_P (XEXP (xinner, 0))
>   && (decl = SYMBOL_REF_DECL (XEXP (xinner, 0))) != NULL_TREE
> @@ -10603,7 +10603,7 @@ expand_expr_real_1 (tree exp, rtx target,
> machine_mode t
> }
>/* Expand const VAR_DECLs with CONSTRUCTOR initializers that
>  have scalar integer modes to a reg via store_constructor.  */
> -  if (TREE_READONLY (exp)
> +  if (0 && TREE_READONLY (exp)
>   && !TREE_SIDE_EFFECTS (exp)
>   && (modifier == EXPAND_NORMAL || modifier == EXPAND_STACK_PARM)
>   && immediate_const_ctor_p (DECL_INITIAL (exp))

I've tried both chunks individually: the second one allowed the build to
finish successfully.

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH]: libgompd add parallel handle functions

2022-06-06 Thread Mohamed Sayed via Gcc-patches
This is the final diff

On Mon, Jun 6, 2022 at 1:48 AM Mohamed Sayed 
wrote:

> This patch adds parallel region handles specified in section 5.5.3.
> From examining libgomp code, I found that struct gomp_team describes the
> parallel region.
> The Thread handle gives the address of gomp_thread so, I tried to
> access *team
> gomp_thread->ts->team.
> The parallel handle is the team pointer in team state.
> I have a question about ompd_get_task_parallel_handle
> https://www.openmp.org/spec-html/5.0/openmpsu218.html
> How can i reach gomp_team from gomp_task
> And the union in gomp_task has two entries gomp_sem_t and gomp_team
>
> libgomp/ChangeLog
>
> 2022-06-06  Mohamed Sayed  
>
>
> * Makefile.am: (libgompd_la_SOURCES): Add ompd-parallel.c.
> * Makefile.in: Regenerate.
> * libgompd.map: Add ompd_get_curr_parallel_handle,
> ompd_get_enclosing_parallel_handle, ompd_rel_parallel_handle
> and ompd_parallel_handle_compare symbol versions.
> * ompd-support.h:() : Add gompd_access (gomp_team_state, team) and
> gompd_access (gomp_team, prev_ts).
>
>
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 6d913a93e7f..4e215450b25 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -94,7 +94,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c 
env.c error.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
oacc-target.c ompd-support.c
 
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-parallel.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 40f896b5f03..ab66ad1c8f0 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -233,7 +233,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo 
critical.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
oacc-target.lo ompd-support.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
-am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo
+am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo \
+   ompd-parallel.lo
 libgompd_la_OBJECTS = $(am_libgompd_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -583,7 +584,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c 
env.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
oacc-target.c ompd-support.c $(am__append_7)
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-parallel.c
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info 
$(libtool_VERSION)
@@ -800,6 +801,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-helper.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-icv.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-init.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-parallel.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-support.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
diff --git a/libgomp/libgompd.map b/libgomp/libgompd.map
index 85bdc3695f6..1662dc56962 100644
--- a/libgomp/libgompd.map
+++ b/libgomp/libgompd.map
@@ -16,6 +16,10 @@ OMPD_5.1 {
 ompd_thread_handle_compare;
 ompd_get_thread_id;
 ompd_get_device_from_thread;
+ompd_get_curr_parallel_handle;
+ompd_get_enclosing_parallel_handle;
+ompd_rel_parallel_handle;
+ompd_parallel_handle_compare;
   local:
 *;
 };
diff --git a/libgomp/ompd-parallel.c b/libgomp/ompd-parallel.c
new file mode 100644
index 000..3a278f7428a
--- /dev/null
+++ b/libgomp/ompd-parallel.c
@@ -0,0 +1,144 @@
+/* Copyright (C) The GNU Toolchain Authors.
+   Contributed by Mohamed Sayed .
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 

RE: [PATCH 2/3][ARM] STAR-MC1 CPU Support - arm: Add individual star-mc1 cost tables and cost functions

2022-06-06 Thread Kyrylo Tkachov via Gcc-patches
Hi jasonwucj,

> -Original Message-
> From: Gcc-patches  bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Chung-Ju Wu
> via Gcc-patches
> Sent: Thursday, May 26, 2022 8:18 AM
> To: Richard Earnshaw ; gcc-patches  patc...@gcc.gnu.org>
> Cc: jason...@anshingtek.com.tw
> Subject: [PATCH 2/3][ARM] STAR-MC1 CPU Support - arm: Add individual
> star-mc1 cost tables and cost functions
> 
> Hi,
> 
> Attached is the patch to provide star-mc1 specific cost functions and tables.
> Given these individual implementation, developers are able to make
> their own adjustment to fine-tune star-mc1 performance without affecting
> other cpu configurations.
> 
> Bootstrapped and tested on arm-none-eabi.
> 
> Is it OK for trunk?

It looks like arm_star_mc1_tune, star_mc1_extra_costs and 
arm_star_mc1_branch_cost are identical to arm_v7m_tune, v7m_extra_costs and 
arm_cortex_m_branch_cost.
I'd rather not duplicate those structures and functions in the master branch, 
as they provide a maintenance burden to the community.
If some tuning parameters need to be modified in the future for better 
performance we can create star-mc1-specific structures on demand then.
Thus, I think we don't want this patch.
Thanks,
Kyrill


RE: [PATCH 1/3][ARM] STAR-MC1 CPU Support - arm: Add star-mc1 core

2022-06-06 Thread Kyrylo Tkachov via Gcc-patches
Hi jasonwucj,

> -Original Message-
> From: Gcc-patches  bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Chung-Ju Wu
> via Gcc-patches
> Sent: Thursday, May 26, 2022 8:18 AM
> To: Richard Earnshaw ; gcc-patches  patc...@gcc.gnu.org>
> Cc: jason...@anshingtek.com.tw
> Subject: [PATCH 1/3][ARM] STAR-MC1 CPU Support - arm: Add star-mc1 core
> 
> Hi,
> 
> STAR-MC1 is an embedded processor with armv8m architecture. Majorly it
> is designed to meet the requirements of AIoT application performance,
> power consumption and security. Early this month, star-mc1 is supported by
> the latest releases of MDK and CMSIS. For the completeness of Arm
> ecosystem,
> it would be great if we can have star-mc1 support in official GCC as well.
> 
> Attached is the patch to support star-mc1 cpu in GCC:
> 
> * Fundamental of -mcpu=star-mc1 option
>- Based on latest upstream commit:
> https://gcc.gnu.org/g:3dff965cae6709a5fd1b7b05c51c3c8aba786961
>- Add star-mc1 cpu in arm-cpus.in and regenerate necessary
> implementation
> 
> * Include VLLDM bugfix
>- CVE-2021-35465 also affects star-mc1 configuration [1]
>- We apply quirk_vlldm strategy for star-mc1 cpu
> 
> Successfully bootstrapped and tested on arm-none-eabi.
> 
> Is it OK for trunk?

This is okay (together with the documentation additions in 3/3)
Thanks for the patch,
Kyrill

> 
> [1] https://www.cve.org/CVERecord?id=CVE-2021-35465
> 
> Regards,
> jasonwucj


RE: [PATCH 3/3][ARM] STAR-MC1 CPU Support - docs: Add star-mc1 core

2022-06-06 Thread Kyrylo Tkachov via Gcc-patches
Hi jasonwucj,

> -Original Message-
> From: Gcc-patches  bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Chung-Ju Wu
> via Gcc-patches
> Sent: Thursday, May 26, 2022 8:19 AM
> To: Richard Earnshaw ; gcc-patches  patc...@gcc.gnu.org>
> Cc: jason...@anshingtek.com.tw
> Subject: [PATCH 3/3][ARM] STAR-MC1 CPU Support - docs: Add star-mc1 core
> 
> Hi,
> 
> This is the patch to add star-mc1 in the Arm -mtune and
> -mfix-cmse-cve-2021-35465 sections of gcc invoke.texi documentation.
> 
> Is it OK for trunk?

This is okay but I'd rather have it merged with patch 1/3 (I'll review that 
separately).
There's no reason to keep such a simple documentation addition separate from 
the patch that actually adds the feature being documented.
Thanks,
Kyrill

> 
> Regards,
> jasonwucj


[PATCH] PR middle-end/105853: Call store_constructor directly from calls.cc.

2022-06-06 Thread Roger Sayle

This patch fixes both ICE regressions PR middle-end/105853 and
PR target/105856 caused by my recent patch to expand small const structs
as immediate constants.  That patch updated code generation in three
places: two in expr.cc that call store_constructor directly, and the
third in calls.cc's load_register_parameters that expands its CONSTRUCTOR
via expand_expr, as store_constructor is local/static to expr.cc, and
the "public" API, should usually simply forward the constructor to the
appropriate store_constructor function.

Alas, despite the clean regression testing on multiple targets, the above
ICEs show that expand_expr isn't a suitable proxy for store_constructor,
and things that (I'd assumed) shouldn't affect how/whether a struct is
placed in a register [such as whether the struct is considered packed/
aligned or not] actually interfere with the optimization that is being
attempted.

The (proposed) solution is to export store_constructor (and it's helper
function int_expr_size) from expr.cc, by removing their static qualifier
and prototyping both functions in expr.h, so they can be called directly
from load_register_parameters in calls.cc.  This cures both ICEs, but
almost as important produces much better code generation than GCC 12.

For PR 105853, GCC 12 generates:

compose_nd_na_ipv6_src:
movzx eax, WORD PTR eth_addr_zero[rip+2]
movzx edx, WORD PTR eth_addr_zero[rip]
movzx edi, WORD PTR eth_addr_zero[rip+4]
sal rax, 16
or rax, rdx
sal rdi, 32
or rdi, rax
xor eax, eax
jmp packet_set_nd
eth_addr_zero:  .zero 6

where now (with this fix) GCC 13 generates:
compose_nd_na_ipv6_src:
xorl%edi, %edi
xorl%eax, %eax
jmp packet_set_nd

Likewise, for PR 105856 on ARM, we'd previously generate:
g_329_3:
movw r3, #:lower16:.LANCHOR0
movt r3, #:upper16:.LANCHOR0
ldr r0, [r3]
b func_19

but with this optimization we now generate:
g_329_3:
mov r0, #6
b   func_19


This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check with no new failures.  I've also confirmed that on a
cross-compiler to arm-linux-gnueabihf --with-arch=armv6 this fixes the
target specific ICE in PR105856.  The make check is currently running
with --target_board=unix{-m32}, OK for mainline if that also passes?

My sincere apologies for the inconvenience.


2022-06-06  Roger Sayle  

gcc/ChangeLog
PR middle-end/105853
PR target/105856
* calls.cc (load_register_parameters): Call store_constructor
(and int_Expr_size) directly instead of expanding via expand_expr.
* expr.cc (static void store_constructor): Don't prototype here.
(static HOST_WIDE_INT int_expr_size): Likewise.
(store_constructor): No longer static.
(int_expr_size): Likewise, no longer static.
* expr.h (store_constructor): Prototype here.
(int_expr_size): Prototype here.

gcc/testsuite/ChangeLog
PR middle-end/105853
PR target/105856
* gcc.dg/pr105853.c: New test case.
* gcc.dg/pr105856.c: New test case.


Roger
--

diff --git a/gcc/calls.cc b/gcc/calls.cc
index a4336c1..f4e1299 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -2186,10 +2186,11 @@ load_register_parameters (struct arg_data *args, int 
num_actuals,
   && immediate_const_ctor_p (DECL_INITIAL (tree_value)))
{
  rtx target = gen_reg_rtx (word_mode);
- rtx x = expand_expr (DECL_INITIAL (tree_value),
-  target, word_mode, EXPAND_NORMAL);
+ store_constructor (DECL_INITIAL (tree_value), target, 0,
+int_expr_size (DECL_INITIAL (tree_value)),
+false);
  reg = gen_rtx_REG (word_mode, REGNO (reg));
- emit_move_insn (reg, x);
+ emit_move_insn (reg, target);
}
  else if (partial == 0 || args[i].pass_on_stack)
{
diff --git a/gcc/expr.cc b/gcc/expr.cc
index fb062dc..85cb414 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -84,7 +84,6 @@ static void emit_block_move_via_loop (rtx, rtx, rtx, 
unsigned);
 static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int);
 static rtx_insn *compress_float_constant (rtx, rtx);
 static rtx get_subtarget (rtx);
-static void store_constructor (tree, rtx, int, poly_int64, bool);
 static rtx store_field (rtx, poly_int64, poly_int64, poly_uint64, poly_uint64,
machine_mode, tree, alias_set_type, bool, bool);
 
@@ -100,7 +99,6 @@ static void do_tablejump (rtx, machine_mode, rtx, rtx, rtx,
  profile_probability);
 static rtx const_vector_from_tree (tree);
 static tree tree_expr_size (const_tree);
-static HOST_WIDE_INT int_expr_size (const_tree);
 static void convert_mode_scalar (rtx, rtx, int);
 
 
@@ -6757,7 +6755,7 @@ 

Re: [PATCH, OpenMP, v2] Implement uses_allocators clause for target regions

2022-06-06 Thread Jakub Jelinek via Gcc-patches
On Mon, Jun 06, 2022 at 09:38:30PM +0800, Chung-Lin Tang wrote:
> I'll file a bugzilla for the target construct.

Thanks.

> That said, can we delay FIELD_DECL support for uses_allocators? (which is 
> target construct only)
> Since it appears to be not trivial at the moment.

Sure.
But would be nice to file a PR to track it once the patch is committed.

Jakub



Re: [PATCH, OpenMP, v2] Implement uses_allocators clause for target regions

2022-06-06 Thread Chung-Lin Tang




On 2022/6/6 9:22 下午, Jakub Jelinek wrote:

On Mon, Jun 06, 2022 at 09:19:18PM +0800, Chung-Lin Tang wrote:

On 2022/5/31 6:02 PM, Jakub Jelinek wrote:

5) for C++, we should handle FIELD_DECLs, but it shouldn't be hard, just
 look how it is handled for private too

Jakub


About private() for non-static members, is it really working right now?


Perhaps we have a bug that we should file in bugzilla and should fix.

Can you try omp parallel or omp target in the test instead?


I see it works for omp parallel/task, gimplify results:

void C::foo (struct C * const this)
{
  omp_allocator_handle_t a [value-expr: ((struct C *) this)->a];

  #pragma omp parallel private(a)
{
  a = 0;
}
}

I'll file a bugzilla for the target construct.

That said, can we delay FIELD_DECL support for uses_allocators? (which is 
target construct only)
Since it appears to be not trivial at the moment.

Thanks,
Chung-Lin



A simple test:

struct C {
   omp_allocator_handle_t a;
   void foo (void) {
 #pragma omp target private (a)
  a = (omp_allocator_handle_t) 0;
   }
};

int main (void)
{
   C c;
   c.foo ();
   return 0;
}


Jakub



Re: [PATCH, OpenMP, v2] Implement uses_allocators clause for target regions

2022-06-06 Thread Jakub Jelinek via Gcc-patches
On Mon, Jun 06, 2022 at 09:19:18PM +0800, Chung-Lin Tang wrote:
> On 2022/5/31 6:02 PM, Jakub Jelinek wrote:
> > 5) for C++, we should handle FIELD_DECLs, but it shouldn't be hard, just
> > look how it is handled for private too
> > 
> > Jakub
> 
> About private() for non-static members, is it really working right now?

Perhaps we have a bug that we should file in bugzilla and should fix.

Can you try omp parallel or omp target in the test instead?

> A simple test:
> 
> struct C {
>   omp_allocator_handle_t a;
>   void foo (void) {
> #pragma omp target private (a)
>  a = (omp_allocator_handle_t) 0;
>   }
> };
> 
> int main (void)
> {
>   C c;
>   c.foo ();
>   return 0;
> }

Jakub



Re: [PATCH, OpenMP, v2] Implement uses_allocators clause for target regions

2022-06-06 Thread Chung-Lin Tang

On 2022/5/31 6:02 PM, Jakub Jelinek wrote:

5) for C++, we should handle FIELD_DECLs, but it shouldn't be hard, just
look how it is handled for private too

Jakub


About private() for non-static members, is it really working right now?
A simple test:

struct C {
  omp_allocator_handle_t a;
  void foo (void) {
#pragma omp target private (a)
 a = (omp_allocator_handle_t) 0;
  }
};

int main (void)
{
  C c;
  c.foo ();
  return 0;
}

After C++ front-end processing we get:

{
omp_allocator_handle_t D.2823 [value-expr: ((struct C *) this)->a];
  #pragma omp target private(D.2823)
{
  {
<;
  }
}
}

The OMP field privatization seems to be doing something here.
However gimplify turns this into:

void C::foo (struct C * const this)
{
  omp_allocator_handle_t a [value-expr: ((struct C *) this)->a];

  #pragma omp target num_teams(1) thread_limit(0) private(a) \
  map(alloc:MEM[(char *)this] [len: 0]) map(firstprivate:this [pointer 
assign, bias: 0])
{
  this->a = 0;
}
}

This doesn't look quite right for private clause. I don't quite expect a 
zero-length mapping of this[:0],
nor reverting the gimple to use "this->a" for a private copy.

Chung-Lin


RE: [PING] PR middle-end/95126: Expand small const structs as immediate constants

2022-06-06 Thread Roger Sayle


Hi Rainer,

> > The one experiment I'd like to be able to try, to investigate the
> > cause/cure of this, is:
> >
> > diff --git a/gcc/calls.cc b/gcc/calls.cc index a4336c1..05fdd24 100644
> > --- a/gcc/calls.cc
> > +++ b/gcc/calls.cc
> > @@ -2177,7 +2177,7 @@ load_register_parameters (struct arg_data *args,
> > int num_a
> >  VAR_DECL with a simple constructor, expand that constructor
> >  via a pseudo rather than read from (possibly misaligned)
> >  memory.  PR middle-end/95126.  */
> > - else if (nregs == 1
> > + else if (0 && nregs == 1
> >&& partial == 0
> >&& !args[i].pass_on_stack
> >&& VAR_P (tree_value)
> >
> > My "small const structs" patch affected code generation in three
> > places, and identifying which of these is causing the miscompilation
> > issue for gnat will help narrow down the problem, or worst case allow
> > reverting less of the problematic patch.
> 
> I just tried this on i386-pc-solaris2.11: unfortunately, it made no
difference.

Awesome!  Very many thanks for trying this.  Alas it confirms that the patch
I'm currently spinning won't have any affect.  So by a process of
elimination,
the miscompilation must be triggered by one of the other two changes:

diff --git a/gcc/expr.cc b/gcc/expr.cc
index fb062dc..37f1405 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -4871,7 +4871,7 @@ emit_push_insn (rtx x, machine_mode mode, tree type,
rtx s
  /* If source is a constant VAR_DECL with a simple constructor,
  store the constructor to the stack instead of moving it.  */
  const_tree decl;
- if (partial == 0
+ if (0 && partial == 0
  && MEM_P (xinner)
  && SYMBOL_REF_P (XEXP (xinner, 0))
  && (decl = SYMBOL_REF_DECL (XEXP (xinner, 0))) != NULL_TREE
@@ -10603,7 +10603,7 @@ expand_expr_real_1 (tree exp, rtx target,
machine_mode t
}
   /* Expand const VAR_DECLs with CONSTRUCTOR initializers that
 have scalar integer modes to a reg via store_constructor.  */
-  if (TREE_READONLY (exp)
+  if (0 && TREE_READONLY (exp)
  && !TREE_SIDE_EFFECTS (exp)
  && (modifier == EXPAND_NORMAL || modifier == EXPAND_STACK_PARM)
  && immediate_const_ctor_p (DECL_INITIAL (exp))


p.s.  I've just set up a build environment on another machine where I'm
using 
GNAT 11.3.1 20220421 (Red Hat 11.3.1-2)
as the bootstrap compiler, but it'll be a few hours before I'm investigating
in
gdb.

Many thanks again for your help.  Sorry again for the
breakage/inconvenience.

Roger
==




Re: [PING] PR middle-end/95126: Expand small const structs as immediate constants

2022-06-06 Thread Rainer Orth
Andreas Schwab  writes:

> On Jun 06 2022, Roger Sayle wrote:
>
>> gcc -std=gnu99 -c -g  -gnatpg -gnatwns -gnata -W -Wall -I- -I.
>> -Iada/generated -Iada -I../../gcc/gcc/ada ../../gcc/gcc/ada/osint.adb -o
>> ada/osint.o
>> osint.adb:438:31: "strlen" not declared in "CRTL"
>> osint.adb:441:14: "strncpy" not declared in "CRTL"
>> osint.adb:675:21: "strlen" not declared in "CRTL"
>> osint.adb:728:16: "Open_Append" is undefined
>> osint.adb:1108:41: "int64" not declared in "CRTL"
>> osint.adb:3144:28: "strlen" not declared in "CRTL"
>> osint.adb:3147:11: "strncpy" not declared in "CRTL"
>
> What's your bootstrap compiler?

FWIW, I'm using GCC 9 (9.4.0 on Solaris/x86, 9.3.0 on Linux/x86_64) as
bootstrap compiler.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PING] PR middle-end/95126: Expand small const structs as immediate constants

2022-06-06 Thread Rainer Orth
Hi Roger,

> My sincere apologies for the breakage.  I'm currently regression testing a
> solution to the ICEs introduced by my "small const struct" patch, which
> fingers-crossed might also fix this Ada bootstrap.  For a while when
> Rainer also reported the bootstrap failure is visible, on
> x86_64-pc-linux-gnu,
> I thought I'd be able to diagnose and fix the issue myself, but alas my
> --enable-languages="all" builds (still) fail earlier with:
>
> gcc -std=gnu99 -c -g  -gnatpg -gnatwns -gnata -W -Wall -I- -I.
> -Iada/generated -Iada -I../../gcc/gcc/ada ../../gcc/gcc/ada/osint.adb -o
> ada/osint.o
> osint.adb:438:31: "strlen" not declared in "CRTL"
> osint.adb:441:14: "strncpy" not declared in "CRTL"
> osint.adb:675:21: "strlen" not declared in "CRTL"
> osint.adb:728:16: "Open_Append" is undefined
> osint.adb:1108:41: "int64" not declared in "CRTL"
> osint.adb:3144:28: "strlen" not declared in "CRTL"
> osint.adb:3147:11: "strncpy" not declared in "CRTL"
>
> The one experiment I'd like to be able to try, to investigate the cause/cure
> of this, is:
>
> diff --git a/gcc/calls.cc b/gcc/calls.cc
> index a4336c1..05fdd24 100644
> --- a/gcc/calls.cc
> +++ b/gcc/calls.cc
> @@ -2177,7 +2177,7 @@ load_register_parameters (struct arg_data *args, int
> num_a
>  VAR_DECL with a simple constructor, expand that constructor
>  via a pseudo rather than read from (possibly misaligned)
>  memory.  PR middle-end/95126.  */
> - else if (nregs == 1
> + else if (0 && nregs == 1
>&& partial == 0
>&& !args[i].pass_on_stack
>&& VAR_P (tree_value)
>
> My "small const structs" patch affected code generation in three places, and
> identifying
> which of these is causing the miscompilation issue for gnat will help narrow
> down the
> problem, or worst case allow reverting less of the problematic patch.

I just tried this on i386-pc-solaris2.11: unfortunately, it made no
difference.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PING] PR middle-end/95126: Expand small const structs as immediate constants

2022-06-06 Thread Andreas Schwab
On Jun 06 2022, Roger Sayle wrote:

> Hi Andreas, 
>> > gcc -std=gnu99 -c -g  -gnatpg -gnatwns -gnata -W -Wall -I- -I.
>> > -Iada/generated -Iada -I../../gcc/gcc/ada ../../gcc/gcc/ada/osint.adb
>> > -o ada/osint.o
>> > osint.adb:438:31: "strlen" not declared in "CRTL"
>> > osint.adb:441:14: "strncpy" not declared in "CRTL"
>> > osint.adb:675:21: "strlen" not declared in "CRTL"
>> > osint.adb:728:16: "Open_Append" is undefined
>> > osint.adb:1108:41: "int64" not declared in "CRTL"
>> > osint.adb:3144:28: "strlen" not declared in "CRTL"
>> > osint.adb:3147:11: "strncpy" not declared in "CRTL"
>> 
>> What's your bootstrap compiler?
>
> GNAT 4.8.5 20150623 (Red Hat 4.8.5-44)

That's very much out of date.

In order to build GNAT, the Ada compiler, you need a working GNAT
compiler (GCC version 5.1 or later).

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


Re: [x86 PATCH] Double word implementation of and; cmp to not; test optimization.

2022-06-06 Thread Uros Bizjak via Gcc-patches
On Sun, Jun 5, 2022 at 7:19 PM Roger Sayle  wrote:
>
>
> This patch extends the recent and;cmp to not;test optimization to also
> perform this transformation for TImode on TARGET_64BIT and DImode on -m32,
> One motivation for this is that it's a step to fixing the current failure
> of gcc.target/i386/pr65105-5.c on -m32.
>
> A more direct benefit for x86_64 is that the following code:
>
> int foo(__int128 x, __int128 y)
> {
>   return (x & y) == y;
> }
>
> improves (with -O2 -mbmi) from:
>
> movq%rdi, %r8
> movq%rsi, %rdi
> movq%rdx, %rsi
> andq%rcx, %rdi
> movq%r8, %rax
> andq%rdx, %rax
> movq%rdi, %rdx
> xorq%rsi, %rax
> xorq%rcx, %rdx
> orq %rdx, %rax
> sete%al
> movzbl  %al, %eax
> ret
>
> to the much better:
>
> movq%rdi, %r8
> movq%rsi, %rdi
> andn%rdx, %r8, %rax
> andn%rcx, %rdi, %rsi
> orq %rsi, %rax
> sete%al
> movzbl  %al, %eax
> ret
>
> The major theme of this patch is to generalize many of i386.md's
> *di3_doubleword patterns to become *_doubleword patterns, i.e.
> whenever there exists a "double word" optimization for DImode with -m32,
> there should be an equivalent TImode optimization on TARGET_64BIT.
>
> The following patch has been tested on x86_64-pc-linux-gnu with
> make bootstrap and make -k check, where on TARGET_64BIT there are
> no new failures, but paradoxically with --target_board=unix{-m32}
> the other dg-final clause in gcc.target/i386/pr65105-5.c now fails.
> Counter-intuitively, this is progress, and pr65105-5.c may now be
> fixed (without using peephole2) simply by tweaking the STV pass to
> handle andn/test (in a follow-up patch).
> OK for mainline?
>
>
> 2022-06-05  Roger Sayle  
>
> gcc/ChangeLog
> * config/i386/i386.cc (ix86_rtx_costs) : Provide costs
> for double word comparisons and tests (comparisons against zero).
> * config/i386/i386.md (*test_not_doubleword): Split DWI
> and;cmp into andn;cmp $0 as a pre-reload splitter.
> (define_expand and3): Generalize from SWIM1248x to SWIDWI.
> (define_insn_and_split "*anddi3_doubleword"): Rename/generalize...
> (define_insn_and_split "*and3_doubleword"): ... to this.
> (define_insn "*andndi3_doubleword"): Rename and generalize...
> (define_insn "*andn3_doubleword): ... to this.
> (define_split): Split andn when TARGET_BMI for both  modes.
> (define_split): Split andn when !TARGET_BMI for both  modes.
> (define_expand 3): Generalize from SWIM1248x to
> SWIDWI.
> (define_insn_and_split "*3_doubleword): Generalize
> from DI mode to both  modes.
>
> gcc/testsuite/ChangeLog
> * gcc.target/i386/testnot-3.c: New test case.

 (define_expand "and3"
-  [(set (match_operand:SWIM1248x 0 "nonimmediate_operand")
-(and:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")
-   (match_operand:SWIM1248x 2 "")))]
+  [(set (match_operand:SWIDWI 0 "nonimmediate_operand")
+(and:SWIDWI (match_operand:SWIDWI 1 "nonimmediate_operand")
+(match_operand:SWIDWI 2 "")))]


SWIM1248x should be changed to SDWIM mode iterator, not SWIDWI:

;; Math-dependant integer modes with DImode.
(define_mode_iterator SWIM1248x
[(QI "TARGET_QIMODE_MATH")
 (HI "TARGET_HIMODE_MATH")
 SI DI])

;; All math-dependant single and double word integer modes.
(define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH")
 (HI "TARGET_HIMODE_MATH")
 SI DI (TI "TARGET_64BIT")])

but

;; SWI and DWI together.
(define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")])

does not handle QI and HI correctly w.r.t. TARGET_[QI,HI]MODE_MATH.

Uros.


RE: [PING] PR middle-end/95126: Expand small const structs as immediate constants

2022-06-06 Thread Roger Sayle


Hi Andreas, 
> > gcc -std=gnu99 -c -g  -gnatpg -gnatwns -gnata -W -Wall -I- -I.
> > -Iada/generated -Iada -I../../gcc/gcc/ada ../../gcc/gcc/ada/osint.adb
> > -o ada/osint.o
> > osint.adb:438:31: "strlen" not declared in "CRTL"
> > osint.adb:441:14: "strncpy" not declared in "CRTL"
> > osint.adb:675:21: "strlen" not declared in "CRTL"
> > osint.adb:728:16: "Open_Append" is undefined
> > osint.adb:1108:41: "int64" not declared in "CRTL"
> > osint.adb:3144:28: "strlen" not declared in "CRTL"
> > osint.adb:3147:11: "strncpy" not declared in "CRTL"
> 
> What's your bootstrap compiler?

GNAT 4.8.5 20150623 (Red Hat 4.8.5-44)

I'm currently investigating other options...




Re: [PING] PR middle-end/95126: Expand small const structs as immediate constants

2022-06-06 Thread Andreas Schwab
On Jun 06 2022, Roger Sayle wrote:

> gcc -std=gnu99 -c -g  -gnatpg -gnatwns -gnata -W -Wall -I- -I.
> -Iada/generated -Iada -I../../gcc/gcc/ada ../../gcc/gcc/ada/osint.adb -o
> ada/osint.o
> osint.adb:438:31: "strlen" not declared in "CRTL"
> osint.adb:441:14: "strncpy" not declared in "CRTL"
> osint.adb:675:21: "strlen" not declared in "CRTL"
> osint.adb:728:16: "Open_Append" is undefined
> osint.adb:1108:41: "int64" not declared in "CRTL"
> osint.adb:3144:28: "strlen" not declared in "CRTL"
> osint.adb:3147:11: "strncpy" not declared in "CRTL"

What's your bootstrap compiler?

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


Re: [1/2] PR96463 - aarch64 specific changes

2022-06-06 Thread Richard Sandiford via Gcc-patches
Prathamesh Kulkarni  writes:
>> >  {
>> >/* The pattern matching functions above are written to look for a small
>> >   number to begin the sequence (0, 1, N/2).  If we begin with an index
>> > @@ -24084,6 +24112,12 @@ aarch64_expand_vec_perm_const_1 (struct 
>> > expand_vec_perm_d *d)
>> > || d->vec_flags == VEC_SVE_PRED)
>> >&& known_gt (nelt, 1))
>> >  {
>> > +  /* If operand and result modes differ, then only check
>> > +  for dup case.  */
>> > +  if (d->vmode != op_mode)
>> > + return (d->vec_flags == VEC_SVE_DATA)
>> > + ? aarch64_evpc_sve_dup (d, op_mode) : false;
>> > +
>>
>> I think it'd be more future-proof to format this as:
>>
>> if (d->vmod == d->op_mode)
>>   {
>> …existing code…
>>   }
>> else
>>   {
>> if (aarch64_evpc_sve_dup (d))
>>   return true;
>>   }
>>
>> with the d->vec_flags == VEC_SVE_DATA check being in aarch64_evpc_sve_dup,
>> alongside the op_mode check.  I think we'll be adding more checks here
>> over time.
> Um I was wondering if we should structure it as:
> if (d->vmode == d->op_mode)
>   {
>  ...existing code...
>   }
> if (aarch64_evpc_sve_dup (d))
>   return true;
>
> So we check for dup irrespective of  d->vmode == d->op_mode ?

Yeah, I can see the attraction of that.  I think the else is better
though because the fallback TBL handling will (rightly) come at the end
of the existing code.  Without the else, we'd have specific tests like
DUP after generic ones like TBL, so the reader would have to work out
for themselves that DUP and TBL handle disjoint cases.

>> >if (aarch64_evpc_rev_local (d))
>> >   return true;
>> >else if (aarch64_evpc_rev_global (d))
>> > @@ -24105,7 +24139,12 @@ aarch64_expand_vec_perm_const_1 (struct 
>> > expand_vec_perm_d *d)
>> >else if (aarch64_evpc_reencode (d))
>> >   return true;
>> >if (d->vec_flags == VEC_SVE_DATA)
>> > - return aarch64_evpc_sve_tbl (d);
>> > + {
>> > +   if (aarch64_evpc_sve_tbl (d))
>> > + return true;
>> > +   else if (aarch64_evpc_sve_dup (d, op_mode))
>> > + return true;
>> > + }
>> >else if (d->vec_flags == VEC_ADVSIMD)
>> >   return aarch64_evpc_tbl (d);
>> >  }
>>
>> Is this part still needed, given the above?
>>
>> Thanks,
>> Richard
>>
>> > @@ -24119,9 +24158,6 @@ aarch64_vectorize_vec_perm_const (machine_mode 
>> > vmode, machine_mode op_mode,
>> > rtx target, rtx op0, rtx op1,
>> > const vec_perm_indices )
>> >  {
>> > -  if (vmode != op_mode)
>> > -return false;
>> > -
>> >struct expand_vec_perm_d d;
>> >
>> >/* Check whether the mask can be applied to a single vector.  */
>> > @@ -24154,10 +24190,10 @@ aarch64_vectorize_vec_perm_const (machine_mode 
>> > vmode, machine_mode op_mode,
>> >d.testing_p = !target;
>> >
>> >if (!d.testing_p)
>> > -return aarch64_expand_vec_perm_const_1 ();
>> > +return aarch64_expand_vec_perm_const_1 (, op_mode);
>> >
>> >rtx_insn *last = get_last_insn ();
>> > -  bool ret = aarch64_expand_vec_perm_const_1 ();
>> > +  bool ret = aarch64_expand_vec_perm_const_1 (, op_mode);
>> >gcc_assert (last == get_last_insn ());
>> >
>> >return ret;
>
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index bee410929bd..1a804b1ab73 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -44,6 +44,7 @@
>  #include "aarch64-sve-builtins-shapes.h"
>  #include "aarch64-sve-builtins-base.h"
>  #include "aarch64-sve-builtins-functions.h"
> +#include "ssa.h"
>  
>  using namespace aarch64_sve;
>  
> @@ -1207,6 +1208,64 @@ public:
>  insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
>  return e.use_contiguous_load_insn (icode);
>}
> +
> +  gimple *
> +  fold (gimple_folder ) const override
> +  {
> +tree arg0 = gimple_call_arg (f.call, 0);
> +tree arg1 = gimple_call_arg (f.call, 1);
> +
> +/* Transform:
> +   lhs = svld1rq ({-1, -1, ... }, arg1)
> +   into:
> +   tmp = mem_ref [(int * {ref-all}) arg1]
> +   lhs = vec_perm_expr.
> +   on little endian target.
> +   vectype is the corresponding ADVSIMD type.  */
> +
> +if (!BYTES_BIG_ENDIAN
> + && integer_all_onesp (arg0))
> +  {
> + tree lhs = gimple_call_lhs (f.call);
> + tree lhs_type = TREE_TYPE (lhs);
> + poly_uint64 lhs_len = TYPE_VECTOR_SUBPARTS (lhs_type);
> + tree eltype = TREE_TYPE (lhs_type);
> +
> + scalar_mode elmode = GET_MODE_INNER (TYPE_MODE (lhs_type));
> + machine_mode vq_mode = aarch64_vq_mode (elmode).require ();
> + tree vectype = build_vector_type_for_mode (eltype, vq_mode);
> +
> + tree elt_ptr_type
> +   = build_pointer_type_for_mode (eltype, VOIDmode, true);
> + tree zero = 

Re: [x86 PATCH] Double word implementation of and; cmp to not; test optimization.

2022-06-06 Thread Uros Bizjak via Gcc-patches
On Sun, Jun 5, 2022 at 7:19 PM Roger Sayle  wrote:
>
>
> This patch extends the recent and;cmp to not;test optimization to also
> perform this transformation for TImode on TARGET_64BIT and DImode on -m32,
> One motivation for this is that it's a step to fixing the current failure
> of gcc.target/i386/pr65105-5.c on -m32.
>
> A more direct benefit for x86_64 is that the following code:
>
> int foo(__int128 x, __int128 y)
> {
>   return (x & y) == y;
> }
>
> improves (with -O2 -mbmi) from:
>
> movq%rdi, %r8
> movq%rsi, %rdi
> movq%rdx, %rsi
> andq%rcx, %rdi
> movq%r8, %rax
> andq%rdx, %rax
> movq%rdi, %rdx
> xorq%rsi, %rax
> xorq%rcx, %rdx
> orq %rdx, %rax
> sete%al
> movzbl  %al, %eax
> ret
>
> to the much better:
>
> movq%rdi, %r8
> movq%rsi, %rdi
> andn%rdx, %r8, %rax
> andn%rcx, %rdi, %rsi
> orq %rsi, %rax
> sete%al
> movzbl  %al, %eax
> ret
>
> The major theme of this patch is to generalize many of i386.md's
> *di3_doubleword patterns to become *_doubleword patterns, i.e.
> whenever there exists a "double word" optimization for DImode with -m32,
> there should be an equivalent TImode optimization on TARGET_64BIT.
>
> The following patch has been tested on x86_64-pc-linux-gnu with
> make bootstrap and make -k check, where on TARGET_64BIT there are
> no new failures, but paradoxically with --target_board=unix{-m32}
> the other dg-final clause in gcc.target/i386/pr65105-5.c now fails.
> Counter-intuitively, this is progress, and pr65105-5.c may now be
> fixed (without using peephole2) simply by tweaking the STV pass to
> handle andn/test (in a follow-up patch).
> OK for mainline?
>
>
> 2022-06-05  Roger Sayle  
>
> gcc/ChangeLog
> * config/i386/i386.cc (ix86_rtx_costs) : Provide costs
> for double word comparisons and tests (comparisons against zero).
> * config/i386/i386.md (*test_not_doubleword): Split DWI
> and;cmp into andn;cmp $0 as a pre-reload splitter.
> (define_expand and3): Generalize from SWIM1248x to SWIDWI.
> (define_insn_and_split "*anddi3_doubleword"): Rename/generalize...
> (define_insn_and_split "*and3_doubleword"): ... to this.
> (define_insn "*andndi3_doubleword"): Rename and generalize...
> (define_insn "*andn3_doubleword): ... to this.
> (define_split): Split andn when TARGET_BMI for both  modes.
> (define_split): Split andn when !TARGET_BMI for both  modes.
> (define_expand 3): Generalize from SWIM1248x to
> SWIDWI.
> (define_insn_and_split "*3_doubleword): Generalize
> from DI mode to both  modes.
>
> gcc/testsuite/ChangeLog
> * gcc.target/i386/testnot-3.c: New test case.

-(define_insn_and_split "*anddi3_doubleword"
-  [(set (match_operand:DI 0 "nonimmediate_operand")
- (and:DI
- (match_operand:DI 1 "nonimmediate_operand")
- (match_operand:DI 2 "x86_64_szext_general_operand")))
+(define_insn_and_split "*and3_doubleword"
+  [(set (match_operand: 0 "nonimmediate_operand")
+ (and:
+ (match_operand: 1 "nonimmediate_operand")
+ (match_operand: 2 "")))
(clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT
-   && ix86_binary_operator_ok (AND, DImode, operands)
+  "ix86_binary_operator_ok (AND, mode, operands)
&& ix86_pre_reload_split ()"
   "#"
   "&& 1"
-  [(const_int 0)]
+  [(parallel
+[(set (match_dup 0) (and:DWIH (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])
+   (parallel
+[(set (match_dup 3) (and:DWIH (match_dup 4) (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])]

Please also note that the above pattern generation will never be
reached, you have unconditional DONE in the insn preparation
statement.

 {
-  split_double_mode (DImode, [0], 3, [0], [3]);
+  split_double_mode (mode, [0], 3, [0], [3]);

   if (operands[2] == const0_rtx)
 emit_move_insn (operands[0], const0_rtx);
   else if (operands[2] == constm1_rtx)
 emit_move_insn (operands[0], operands[1]);
   else
-emit_insn (gen_andsi3 (operands[0], operands[1], operands[2]));
+ix86_expand_binary_operator (AND, mode, [0]);

   if (operands[5] == const0_rtx)
 emit_move_insn (operands[3], const0_rtx);
   else if (operands[5] == constm1_rtx)
 emit_move_insn (operands[3], operands[4]);
   else
-emit_insn (gen_andsi3 (operands[3], operands[4], operands[5]));
+ix86_expand_binary_operator (AND, mode, [3]);

   DONE;
 })

-(define_insn_and_split "*di3_doubleword"
-  [(set (match_operand:DI 0 "nonimmediate_operand")
- (any_or:DI
- (match_operand:DI 1 "nonimmediate_operand")
- (match_operand:DI 2 "x86_64_szext_general_operand")))
+(define_insn_and_split "*3_doubleword"
+  [(set (match_operand: 0 "nonimmediate_operand")
+ (any_or:
+ (match_operand: 1 "nonimmediate_operand")
+ (match_operand: 2 "")))
(clobber (reg:CC 

Re: [PATCH] Update document for VECTOR_MODES_WITH_PREFIX

2022-06-06 Thread Richard Sandiford via Gcc-patches
"Kewen.Lin"  writes:
> Hi,
>
> r10-3912 updated the format of VECTOR_MODES_WITH_PREFIX by
> adding one more parameter ORDER, the related document is out
> of date.  So update the document for ORDER.
>
> Is it ok for trunk?
>
> BR,
> Kewen
> -
>
> gcc/ChangeLog:
>
>   * machmode.def (VECTOR_MODES_WITH_PREFIX): Update document for
>   parameter ORDER.

OK, thanks.

Richard

> ---
>  gcc/machmode.def | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/machmode.def b/gcc/machmode.def
> index b62a5fbc683..d62563601fc 100644
> --- a/gcc/machmode.def
> +++ b/gcc/machmode.def
> @@ -142,9 +142,10 @@ along with GCC; see the file COPYING3.  If not see
>   than two bytes (if CLASS is FLOAT).  CLASS must be INT or
>   FLOAT.  The names follow the same rule as VECTOR_MODE uses.
>
> - VECTOR_MODES_WITH_PREFIX (PREFIX, CLASS, WIDTH);
> + VECTOR_MODES_WITH_PREFIX (PREFIX, CLASS, WIDTH, ORDER);
>   Like VECTOR_MODES, but start the mode names with PREFIX instead
> - of the usual "V".
> + of the usual "V".  ORDER is the top-level sorting order of the
> + mode, with smaller numbers indicating a higher priority.
>
>   VECTOR_BOOL_MODE (NAME, COUNT, COMPONENT, BYTESIZE)
>  Create a vector mode called NAME that contains COUNT boolean
> --
> 2.27.0


Re: [PATCH] c++ bugfix: inconsistent runtime beahavior when have -O2

2022-06-06 Thread Andrew Pinski via Gcc-patches
On Mon, Jun 6, 2022 at 2:12 AM Hongbo Liu via Gcc-patches
 wrote:
>
> Hi,
>
> This is my first time using email patch, please correct me if there is any 
> error. Thanks!
>
>
> I found a c++ optimization bug first introduced via 
> commit520d5ad337eaa15860a5a964daf7ca46cf31c029, which will make program 
> compiled with -O2 have unexpected behavior.

Except your testcase violates C/C++ aliasing rules.
You store to offset_delta via int64_t (which is long on x86_64) but
then do a load via long long.
If you fix the testcase, it works correctly at -O2 and above. If you
don't want strict aliasing to be turned on you can use
-fno-strict-aliasing.

Your patch is wrong as now none of the modref data will be used which
is exposing the bug in the testcase and it is a bug in the source code
you are compiling.

As for how to add the testcase,
https://gcc.gnu.org/wiki/HowToPrepareATestcase does have some
information.
So does https://gcc.gnu.org/onlinedocs/gccint/Testsuites.html#Testsuites .


Thanks,
Andrew Pinski

>
>
> How to produce
>
>
> 1. Checkout to latest master 
> commit:c4d702fb3c1e2f6e1bc8711da81bff59543b1b19
> 2. Build and install the gcc;
> 3. Compile the attached file `test.cc` with command: `latest-g++ -std=c++20 
> -g0 -O1 test.cc -o test_o1`;
> 4. The output of `./test_o1` will be `offset delta: 5`;
> 5. Compile the attached file `test.cc` with command: `latest-g++ 
> -std=c++20 -g0 -O2 test.cc -o test_o2`;
> 6. The output of `./test_o2` will be `offset delta: 0`;
>
>
> The program behavior with `-O2` is inconsistent with `-O1` and unexpected.
>
>
> How to fix
>
>
> The bug was introduced via commit 520d5ad337eaa15860a5a964daf7ca46cf31c029. 
> The attached patch file could fix the problem and make `-O2` and `-O1` have 
> same behavior.
>
>
> Request for suggestion
>
>
> I could not find and document about how to add this kind of test, could 
> anyone give me some suggestions?


[PATCH] Mips: Fix the ASAN shadow offset hook for the n32 ABI

2022-06-06 Thread Dimitrije Milosevic
Fix the ASAN shadow offset hook for the n32 ABI.

gcc/ChangeLog:

* config/mips/mips.cc (mips_asan_shadow_offset): Reformat
to handle the N32 ABI.
* config/mips/mips.h (SUBTARGET_SHADOW_OFFSET): Remove
the macro, as it is not needed anymore.

---

 gcc/config/mips/mips.cc | 7 ++-
 gcc/config/mips/mips.h  | 7 ---
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 2dce4007678..91e651c458e 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -22745,7 +22745,12 @@ mips_constant_alignment (const_tree exp, HOST_WIDE_INT 
align)
 static unsigned HOST_WIDE_INT
 mips_asan_shadow_offset (void)
 {
-  return SUBTARGET_SHADOW_OFFSET;
+  if (mips_abi == ABI_N32)
+return (HOST_WIDE_INT_1 << 29);
+  if (POINTER_SIZE == 64)
+return (HOST_WIDE_INT_1 << 37);
+  else
+return HOST_WIDE_INT_C (0x0aaa);
 }

 /* Implement TARGET_STARTING_FRAME_OFFSET.  See mips_compute_frame_info
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 858bbba3a36..0029864fdcd 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -3463,10 +3463,3 @@ struct GTY(())  machine_function {
&& !TARGET_MICROMIPS && !TARGET_FIX_24K)

 #define NEED_INDICATE_EXEC_STACK 0
-
-/* Define the shadow offset for asan. Other OS's can override in the
-   respective tm.h files.  */
-#ifndef SUBTARGET_SHADOW_OFFSET
-#define SUBTARGET_SHADOW_OFFSET \
-  (POINTER_SIZE == 64 ? HOST_WIDE_INT_1 << 37 : HOST_WIDE_INT_C (0x0aaa))
-#endif

---



Re: [x86 PATCH] Double word implementation of and; cmp to not; test optimization.

2022-06-06 Thread Uros Bizjak via Gcc-patches
On Mon, Jun 6, 2022 at 10:23 AM Roger Sayle  wrote:
>
>
> Hi Uros,
> > > The major theme of this patch is to generalize many of i386.md's
> > > *di3_doubleword patterns to become *_doubleword patterns, i.e.
> > > whenever there exists a "double word" optimization for DImode with
> > > -m32, there should be an equivalent TImode optimization on TARGET_64BIT.
> >
> > No, please do not mix two different themes in one patch.
> >
> > OTOH, the only TImode optimization that can be used with SSE registers is 
> > with
> > logic instructions and some constant shifts, but there is no TImode 
> > arithmetic. I
> > assume your end goal is to introduce STV for TImode on 64-bit targets, 
> > because
> > DImode patterns for x86_32 were introduced to avoid early decomposition by
> > middle end and to split instructions that STV didn't convert to vector 
> > instructions
> > after STV pass. So, let's start with basic V1TImode support before 
> > optimizations
> > are introduced.
>
> I'm not sure I understand.  What basic V1TImode support do you/we want next?
>
> This testcase and worked example with this patch shows its benefits without 
> STV
> nor using V1TI mode vectors.  As explained in the subject, and;cmp can be 
> turned
> into the cheaper not;cmp $0, for TImode (and DImode with -m32) in the same way
> as we currently do for SImode everywhere.  Having double word modes visible to
> combine, allows it to work its magic.  A recent patch ensured that double word
> compares were visible to combine, this optimization just required that double
> word logic (AND, IOR and XOR) are visible after combine, and in fact for -m32 
> DImode
> they already are, it's just that TImode is inconsistent, leading to missed 
> optimizations.
> Likewise, STV can't choose between implementations before there are 
> alternative
> Implementations to choose from.

Let me clarify my statement:

When double-mode patterns are NOT present, the middle-end splits
double-mode operations to word-mode at expansion time, taking into
account constant propagation on split operations, etc. The reason that
DImode patterns are present are due to STV on a 32-bit target, which
wants double-word operations to be unsplit until STV pass.
Unfortunately, this approach inhibits constant propagation, and the
missing functionality was implemented in a "manual way" when
operations are split to word-mode. Without targeting STV, optimization
opportunities are quite small (one of them is the conversion you
proposed above), so there is no pressing need to introduce TImode
operations.

So, by extending all DImode logic patterns to also handle TImode on
x86_64, we can also use them to implement TImode STV pass on x86_64.
This is something that would have a noticeable impact on the generated
code.

Uros.

> As always I'm happy to do things in the order you want (modulo my 36 hour spin
> cycle), in fact the reason this is being done now is that you recommended it 
> best
> to fix pr65105-5.c after the "double word comparison", which I fully agree 
> with,
> as it leads to a better solution that doesn’t require peephole2 (in your own 
> words,
> "why isn't this being done in combine?").
>
> I'm also certainly misunderstanding.  Which piece needs to be done next?
>
> Perhaps I should have used the term "the common theme" rather than
> "the major theme" that may have made it sound like there were unrelated
> or Independent bits in this patch.  But there are no V1TI changes in it.
>
> Thanks in advance, for any clarification.
>
> Cheers,
> Roger
> --
>
>


[PATCH] c++ bugfix: inconsistent runtime beahavior when have -O2

2022-06-06 Thread Hongbo Liu via Gcc-patches
Hi,

This is my first time using email patch, please correct me if there is any 
error. Thanks!


I found a c++ optimization bug first introduced via 
commit520d5ad337eaa15860a5a964daf7ca46cf31c029, which will make program 
compiled with -O2 have unexpected behavior.


How to produce


1. Checkout to latest master 
commit:c4d702fb3c1e2f6e1bc8711da81bff59543b1b19
2. Build and install the gcc;
3. Compile the attached file `test.cc` with command: `latest-g++ -std=c++20 -g0 
-O1 test.cc -o test_o1`;
4. The output of `./test_o1` will be `offset delta: 5`;
5. Compile the attached file `test.cc` with command: `latest-g++ 
-std=c++20 -g0 -O2 test.cc -o test_o2`;
6. The output of `./test_o2` will be `offset delta: 0`;


The program behavior with `-O2` is inconsistent with `-O1` and unexpected.


How to fix


The bug was introduced via commit 520d5ad337eaa15860a5a964daf7ca46cf31c029. The 
attached patch file could fix the problem and make `-O2` and `-O1` have same 
behavior.


Request for suggestion


I could not find and document about how to add this kind of test, could anyone 
give me some suggestions?

test.cc
Description: Binary data


0001-PATCH-c-bugfix-inconsistent-logic-when-have-O2.patch
Description: Binary data


Re: [PATCH] Simplify vec_unpack of uniform_vector_p constructors in match.pd.

2022-06-06 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> On Sat, May 21, 2022 at 5:31 PM Roger Sayle  
> wrote:
>> This patch simplifies vec_unpack_hi_expr/vec_unpack_lo_expr of a uniform
>> constructor or vec_duplicate operand.  The motivation is from PR 105621
>> where after optimization, we're left with:
>>
>>   vect_cst__21 = {c_8(D), c_8(D), c_8(D), c_8(D)};
>>   vect_iftmp.7_4 = [vec_unpack_hi_expr] vect_cst__21;
>>
>> It turns out that there are no constant folding/simplification patterns
>> in match.pd, but the above can be simplified further to the equivalent:
>>
>>   _20 = (long int) c_8(D);
>>   vect_iftmp.7_4 = [vec_duplicate_expr] _20;
>>
>> which on x86-64 results in one less instruction, replacing pshufd $0
>> then punpackhq, with punpcklqdq.  This transformation is also useful
>> for helping CSE to spot that unpack_hi and unpack_lo are equivalent.
>>
>> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
>> and make -k check with no new failures.  Ok for mainline?
>
> I think we need a way to query whether the target can do a VEC_DUPLICATE_EXPR.
> Currently we only ever have them for VL vectors and expand via
> expand_vector_broadcast which eventually simply gives up when there's no
> vec_duplicate or vec_init optabs suitable.
>
> IIRC with the VEC_PERM extension we should be able to handle
> VEC_DUPLICATE via VEC_PERM?  (but we don't yet accept a scalar
> input, just V1?)

Yeah, should be possible.  Not sure whether it would really help though.
A VEC_PERM_EXPR with only one scalar argument could only have one sensible
permute mask[*], so there'd be a bit of false generality.

Maybe allowing scalar arguments would be more useful for 2 distinct
scalar arguments, but then I guess the question is: why stop at 2?
So if we go down the route of accepting scalars, it might be more
consistent to make VEC_PERM_EXPR support any number of operands
and use it as a replacement for CONSTRUCTOR as well.

Thanks,
Richard

[*] At least until we support “don't care” elements.  However, like I
mentioned before, I'd personally prefer a “don't care” mask to be
a separate operand, rather than treating something like -1 as a
special value.  Special values like that don't really fit the
current encoding scheme for VL constants, but a separate mask would.

A separate don't-care mask would also work for variable permute masks.
>
> I see most targets have picked up vec_duplicate but sparc, but still
> we'd need to check the specific mode.  I think we can disregart
> vec_init checking and only apply the transforms when vec_duplicate
> is available.
>
> Richard.
>
>>
>> 2022-05-21  Roger Sayle  
>>
>> gcc/ChangeLog
>> * match.pd (simplify vec_unpack_hi): Simplify VEC_UNPACK_*_EXPR
>> of uniform vector constructors and vec_duplicate.
>>
>> gcc/testsuite/ChangeLog
>> * g++.dg/vect/pr105621.cc: New test case.
>>
>>
>> Thanks in advance,
>> Roger
>> --
>>


Re: [PATCH] RISC-V:Fix a bug that is the CMO builtins are missing parameter

2022-06-06 Thread Kito Cheng via Gcc-patches
On Mon, Jun 6, 2022 at 3:29 PM  wrote:
>
> From: yulong 
>
> We changed the RTL mode and builtins format about zicbom and zicboz 
> subextensions.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-cmo.def (RISCV_BUILTIN): changed 
> "RISCV_SI(DI)_FTYPE" to "RISCV_SI(DI)_FTPYE_SI(DI)"
> * config/riscv/riscv-ftypes.def (0): deleted DEF_RISCV_FTYPE (0,(SI)) 
> and DEF_RISCV_FTYPE (0,(DI))
> * config/riscv/riscv.md: added a immediate_operand about cbo.clean, 
> cbo.flush, cbo.inval and cbo.zero instructions
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/cmo-zicbom-1.c: added a parameter
> * gcc.target/riscv/cmo-zicbom-2.c: added a parameter
> * gcc.target/riscv/cmo-zicboz-1.c: added a parameter
> * gcc.target/riscv/cmo-zicboz-2.c: added a parameter
>
> ---
>  gcc/config/riscv/riscv-cmo.def| 16 
>  gcc/config/riscv/riscv-ftypes.def |  2 --
>  gcc/config/riscv/riscv.md | 12 
>  gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c |  6 +++---
>  gcc/testsuite/gcc.target/riscv/cmo-zicbom-2.c |  6 +++---
>  gcc/testsuite/gcc.target/riscv/cmo-zicboz-1.c |  2 +-
>  gcc/testsuite/gcc.target/riscv/cmo-zicboz-2.c |  2 +-
>  7 files changed, 24 insertions(+), 22 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-cmo.def b/gcc/config/riscv/riscv-cmo.def
> index b30ecf96ec1..d43cbf62954 100644
> --- a/gcc/config/riscv/riscv-cmo.def
> +++ b/gcc/config/riscv/riscv-cmo.def
> @@ -1,16 +1,16 @@
>  // zicbom
> -RISCV_BUILTIN (clean_si, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE, clean32),
> -RISCV_BUILTIN (clean_di, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE, clean64),
> +RISCV_BUILTIN (clean_si, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE_SI, clean32),
> +RISCV_BUILTIN (clean_di, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE_DI, clean64),

That should be RISCV_VOID_FTYPE_SI and RISCV_VOID_FTYPE_DI, it takes
one argument but no return value.

>
> -RISCV_BUILTIN (flush_si, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE, flush32),
> -RISCV_BUILTIN (flush_di, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE, flush64),
> +RISCV_BUILTIN (flush_si, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE_SI, flush32),
> +RISCV_BUILTIN (flush_di, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE_DI, flush64),
>
> -RISCV_BUILTIN (inval_si, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE, inval32),
> -RISCV_BUILTIN (inval_di, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE, inval64),
> +RISCV_BUILTIN (inval_si, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE_SI, inval32),
> +RISCV_BUILTIN (inval_di, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE_DI, inval64),
>
>  // zicboz
> -RISCV_BUILTIN (zero_si, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE, zero32),
> -RISCV_BUILTIN (zero_di, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE, zero64),
> +RISCV_BUILTIN (zero_si, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE_SI, zero32),
> +RISCV_BUILTIN (zero_di, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE_DI, zero64),
>
>  // zicbop
>  RISCV_BUILTIN (prefetchi_si, "zicbop_cbo_prefetchi", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE_SI, prefetchi32),
> diff --git a/gcc/config/riscv/riscv-ftypes.def 
> b/gcc/config/riscv/riscv-ftypes.def
> index 62421292ce7..445eb8ee05d 100644
> --- a/gcc/config/riscv/riscv-ftypes.def
> +++ b/gcc/config/riscv/riscv-ftypes.def
> @@ -28,7 +28,5 @@ along with GCC; see the file COPYING3.  If not see
>
>  DEF_RISCV_FTYPE (0, (USI))
>  DEF_RISCV_FTYPE (1, (VOID, USI))
> -DEF_RISCV_FTYPE (0, (SI))
> -DEF_RISCV_FTYPE (0, (DI))
>  DEF_RISCV_FTYPE (1, (SI, SI))
>  DEF_RISCV_FTYPE (1, (DI, DI))
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index b8ab0cf169a..2d7d94eebd3 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -2893,28 +2893,32 @@
>[(set_attr "length" "12")])
>
>  (define_insn "riscv_clean_"
> -  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")]
> +  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")
> +   (match_operand:X 1 "immediate_operand" "i")]

No need for a second input operand.

>  UNSPECV_CLEAN)]
>"TARGET_ZICBOM"
>"cbo.clean\t%a0"
>  )
>
>  (define_insn "riscv_flush_"
> -  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")]
> +  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")
> +   (match_operand:X 1 "immediate_operand" "i")]
>  UNSPECV_FLUSH)]
>"TARGET_ZICBOM"
>"cbo.flush\t%a0"
>  )
>
>  (define_insn "riscv_inval_"
> -  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")]
> +  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")
> +   (match_operand:X 1 "immediate_operand" 

PING^1 [PATCH v3] rs6000: Adjust mov optabs for opaque modes [PR103353]

2022-06-06 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595209.html

BR,
Kewen

on 2022/5/18 22:07, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> As PR103353 shows, we may want to continue to expand a MMA built-in
> function like a normal function, even if we have already emitted
> error messages about some missing required conditions.  As shown in
> that PR, without one explicit mov optab on OOmode provided, it would
> call emit_move_insn recursively.
> 
> So this patch is to allow the mov pattern to be generated when we are
> expanding to RTL and have seen errors even without MMA supported, it's
> expected that the generated pattern would not cause further ICEs as the
> compilation would stop soon after expanding.
> 
> Bootstrapped and regtested on powerpc64-linux-gnu P8 and
> powerpc64le-linux-gnu P9 and P10.
> 
> v3: Update test case with dg-excess-errors.
> 
> v2: Polish some comments and add one test case as Will and Peter suggested.
> https://gcc.gnu.org/pipermail/gcc-patches/2022-April/592916.html
> 
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591150.html
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -
>   PR target/103353
> 
> gcc/ChangeLog:
> 
>   * config/rs6000/mma.md (define_expand movoo): Move TARGET_MMA condition
>   check to preparation statements and add handlings for !TARGET_MMA.
>   (define_expand movxo): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/pr103353.c: New test.
> ---
>  gcc/config/rs6000/mma.md| 42 ++---
>  gcc/testsuite/gcc.target/powerpc/pr103353.c | 22 +++
>  2 files changed, 58 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103353.c
> 
> diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
> index 907c9d6d516..746a77a0957 100644
> --- a/gcc/config/rs6000/mma.md
> +++ b/gcc/config/rs6000/mma.md
> @@ -268,10 +268,25 @@ (define_int_attr avvi4i4i4  
> [(UNSPEC_MMA_PMXVI8GER4PP   "pmxvi8ger4pp")
>  (define_expand "movoo"
>[(set (match_operand:OO 0 "nonimmediate_operand")
>   (match_operand:OO 1 "input_operand"))]
> -  "TARGET_MMA"
> +  ""
>  {
> -  rs6000_emit_move (operands[0], operands[1], OOmode);
> -  DONE;
> +  if (TARGET_MMA) {
> +rs6000_emit_move (operands[0], operands[1], OOmode);
> +DONE;
> +  }
> +  /* Opaque modes are only expected to be available when MMA is supported,
> + but PR103353 shows we may want to continue to expand a MMA built-in
> + function, even if we have already emitted error messages about some
> + missing required conditions.  As shown in that PR, without one
> + explicit mov optab on OOmode provided, it would call emit_move_insn
> + recursively.  So we allow this pattern to be generated when we are
> + expanding to RTL and have seen errors, even though there is no MMA
> + support.  It would not cause further ICEs as the compilation would
> + stop soon after expanding.  */
> +  else if (currently_expanding_to_rtl && seen_error ())
> +;
> +  else
> +gcc_unreachable ();
>  })
> 
>  (define_insn_and_split "*movoo"
> @@ -300,10 +315,25 @@ (define_insn_and_split "*movoo"
>  (define_expand "movxo"
>[(set (match_operand:XO 0 "nonimmediate_operand")
>   (match_operand:XO 1 "input_operand"))]
> -  "TARGET_MMA"
> +  ""
>  {
> -  rs6000_emit_move (operands[0], operands[1], XOmode);
> -  DONE;
> +  if (TARGET_MMA) {
> +rs6000_emit_move (operands[0], operands[1], XOmode);
> +DONE;
> +  }
> +  /* Opaque modes are only expected to be available when MMA is supported,
> + but PR103353 shows we may want to continue to expand a MMA built-in
> + function, even if we have already emitted error messages about some
> + missing required conditions.  As shown in that PR, without one
> + explicit mov optab on XOmode provided, it would call emit_move_insn
> + recursively.  So we allow this pattern to be generated when we are
> + expanding to RTL and have seen errors, even though there is no MMA
> + support.  It would not cause further ICEs as the compilation would
> + stop soon after expanding.  */
> +  else if (currently_expanding_to_rtl && seen_error ())
> +;
> +  else
> +gcc_unreachable ();
>  })
> 
>  (define_insn_and_split "*movxo"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103353.c 
> b/gcc/testsuite/gcc.target/powerpc/pr103353.c
> new file mode 100644
> index 000..5d519fb1b7b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103353.c
> @@ -0,0 +1,22 @@
> +/* { dg-require-effective-target powerpc_altivec_ok } */
> +/* If the default cpu type is power10 or later, MMA is enabled by default.
> +   To keep the test point available all the time, this case specifies
> +   -mdejagnu-cpu=power6 to make it be tested without MMA.  */
> +/* { dg-options "-maltivec -mdejagnu-cpu=power6" } */
> +
> +/* Verify there is no ICE and don't check the error messages on MMA
> +   

PING^1 [PATCH v3] rs6000: Fix the check of bif argument number [PR104482]

2022-06-06 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595209.html

BR,
Kewen

on 2022/5/18 22:07, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> As PR104482 shown, it's one regression about the handlings when
> the argument number is more than the one of built-in function
> prototype.  The new bif support only catches the case that the
> argument number is less than the one of function prototype, but
> it misses the case that the argument number is more than the one
> of function prototype.  Because it uses "n != expected_args",
> n is updated in
> 
>for (n = 0; !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
> fnargs = TREE_CHAIN (fnargs), n++)
> 
> , it's restricted to be less than or equal to expected_args with
> the guard !VOID_TYPE_P (TREE_VALUE (fnargs)), so it's wrong.
> 
> The fix is to use nargs instead, also move the checking hunk's
> location ahead to avoid useless further scanning when the counts
> mismatch.
> 
> Bootstrapped and regtested on powerpc64-linux-gnu P8 and
> powerpc64le-linux-gnu P9 and P10.
> 
> v3: Update test case with dg-excess-errors.
> 
> v2: Add one test case and refine commit logs.
> https://gcc.gnu.org/pipermail/gcc-patches/2022-April/593155.html
> 
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591768.html
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -
>   PR target/104482
> 
> gcc/ChangeLog:
> 
>   * config/rs6000/rs6000-c.cc (altivec_resolve_overloaded_builtin): Fix
>   the equality check for argument number, and move this hunk ahead.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/pr104482.c: New test.
> ---
>  gcc/config/rs6000/rs6000-c.cc   | 60 ++---
>  gcc/testsuite/gcc.target/powerpc/pr104482.c | 16 ++
>  2 files changed, 46 insertions(+), 30 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr104482.c
> 
> diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
> index 9c8cbd7a66e..61881f29230 100644
> --- a/gcc/config/rs6000/rs6000-c.cc
> +++ b/gcc/config/rs6000/rs6000-c.cc
> @@ -1756,6 +1756,36 @@ altivec_resolve_overloaded_builtin (location_t loc, 
> tree fndecl,
>vec *arglist = static_cast *> 
> (passed_arglist);
>unsigned int nargs = vec_safe_length (arglist);
> 
> +  /* If the number of arguments did not match the prototype, return NULL
> + and the generic code will issue the appropriate error message.  Skip
> + this test for functions where we don't fully describe all the possible
> + overload signatures in rs6000-overload.def (because they aren't relevant
> + to the expansion here).  If we don't, we get confusing error messages.  
> */
> +  /* As an example, for vec_splats we have:
> +
> +; There are no actual builtins for vec_splats.  There is special handling for
> +; this in altivec_resolve_overloaded_builtin in rs6000-c.cc, where the call
> +; is replaced by a constructor.  The single overload here causes
> +; __builtin_vec_splats to be registered with the front end so that can 
> happen.
> +[VEC_SPLATS, vec_splats, __builtin_vec_splats]
> +  vsi __builtin_vec_splats (vsi);
> +ABS_V4SI SPLATS_FAKERY
> +
> +So even though __builtin_vec_splats accepts all vector types, the
> +infrastructure cheats and just records one prototype.  We end up getting
> +an error message that refers to this specific prototype even when we
> +are handling a different argument type.  That is completely confusing
> +to the user, so it's best to let these cases be handled individually
> +in the resolve_vec_splats, etc., helper functions.  */
> +
> +  if (expected_args != nargs
> +  && !(fcode == RS6000_OVLD_VEC_PROMOTE
> +|| fcode == RS6000_OVLD_VEC_SPLATS
> +|| fcode == RS6000_OVLD_VEC_EXTRACT
> +|| fcode == RS6000_OVLD_VEC_INSERT
> +|| fcode == RS6000_OVLD_VEC_STEP))
> +return NULL;
> +
>for (n = 0;
> !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
> fnargs = TREE_CHAIN (fnargs), n++)
> @@ -1816,36 +1846,6 @@ altivec_resolve_overloaded_builtin (location_t loc, 
> tree fndecl,
>types[n] = type;
>  }
> 
> -  /* If the number of arguments did not match the prototype, return NULL
> - and the generic code will issue the appropriate error message.  Skip
> - this test for functions where we don't fully describe all the possible
> - overload signatures in rs6000-overload.def (because they aren't relevant
> - to the expansion here).  If we don't, we get confusing error messages.  
> */
> -  /* As an example, for vec_splats we have:
> -
> -; There are no actual builtins for vec_splats.  There is special handling for
> -; this in altivec_resolve_overloaded_builtin in rs6000-c.cc, where the call
> -; is replaced by a constructor.  The single overload here causes
> -; __builtin_vec_splats to be registered with the front end so that can 
> happen.
> -[VEC_SPLATS, vec_splats, __builtin_vec_splats]
> -  vsi 

PING^1 [PATCH] rs6000: Handle unresolved overloaded builtin [PR105485]

2022-06-06 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping https://gcc.gnu.org/pipermail/gcc-patches/2022-May/594699.html

BR,
Kewen

on 2022/5/13 13:29, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> PR105485 exposes that new builtin function framework doesn't handle
> unresolved overloaded builtin function well.  With new builtin
> function support, we don't have builtin info for any overloaded
> rs6000_gen_builtins enum, since they are expected to be resolved to
> one specific instance.  So when function rs6000_gimple_fold_builtin
> faces one unresolved overloaded builtin, the access for builtin info
> becomes out of bound and gets ICE then.
> 
> We should not try to fold one unresolved overloaded builtin there
> and as the previous support we should emit one error message during
> expansion phase like "unresolved overload for builtin ...".
> 
> Bootstrapped and regtested on powerpc64-linux-gnu P8 and
> powerpc64le-linux-gnu P9 and P10.
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -
>   PR target/105485
> 
> gcc/ChangeLog:
> 
>   * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Add
>   the handling for unresolved overloaded builtin function.
>   (rs6000_expand_builtin): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.target/powerpc/pr105485.C: New test.
> 
> ---
>  gcc/config/rs6000/rs6000-builtin.cc | 13 +
>  gcc/testsuite/g++.target/powerpc/pr105485.C |  9 +
>  2 files changed, 22 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/powerpc/pr105485.C
> 
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
> b/gcc/config/rs6000/rs6000-builtin.cc
> index e925ba9fad9..e102305c90c 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -1294,6 +1294,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>enum tree_code bcode;
>gimple *g;
> 
> +  /* For an unresolved overloaded builtin, return early here since there
> + is no builtin info for it and we are unable to fold it.  */
> +  if (fn_code > RS6000_OVLD_NONE)
> +return false;
> +
>size_t uns_fncode = (size_t) fn_code;
>enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
>const char *fn_name1 = rs6000_builtin_info[uns_fncode].bifname;
> @@ -3295,6 +3300,14 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* 
> subtarget */,
>tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
>enum rs6000_gen_builtins fcode
>  = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
> +
> +  /* Emit error message if it's an unresolved overloaded builtin.  */
> +  if (fcode > RS6000_OVLD_NONE)
> +{
> +  error ("unresolved overload for builtin %qF", fndecl);
> +  return const0_rtx;
> +}
> +
>size_t uns_fcode = (size_t)fcode;
>enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
> 
> diff --git a/gcc/testsuite/g++.target/powerpc/pr105485.C 
> b/gcc/testsuite/g++.target/powerpc/pr105485.C
> new file mode 100644
> index 000..a3b8290df8c
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/powerpc/pr105485.C
> @@ -0,0 +1,9 @@
> +/* It's to verify no ICE here, ignore error/warning messages since
> +   they are not test points here.  */
> +/* { dg-excess-errors "pr105485" } */
> +
> +template  void __builtin_vec_vslv();
> +typedef  __attribute__((altivec(vector__))) char T;
> +T b (T c, T d) {
> +return __builtin_vec_vslv(c, d);
> +}


Re: [PATCH-1, rs6000] Replace shift and ior insns with one rotate and mask insn for bswap pattern [PR93453]

2022-06-06 Thread Kewen.Lin via Gcc-patches
Hi Haochen,

on 2022/6/6 10:21, HAO CHEN GUI wrote:
> Hi,
>   This patch replaces shift and ior insns with one rotate and mask
> insn for the split patterns which are for DI byte swap on Power6 and
> before. The test cases shows the optimization.

Nit: I noticed two splitting which are updated in this patch are guarded with 

  "TARGET_POWERPC64 && !TARGET_LDBRX && reload_completed"

and

  "TARGET_POWERPC64 && !TARGET_P9_VECTOR && reload_completed"

separately, "Power6 and before" doesn't match the later condition well.

Maybe you can remove "Power6 and before" in commit log to avoid confusion.  :)

> 
>   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-06-06 Haochen Gui 
> 
> gcc/
>   * config/rs6000/rs6000.md (split for DI load byte swap): Merge shift
>   and ior insns to one rotate and mask insn.
>   (split for DI register byte swap): Likewise.

Nit: (define_split for bswapdi load) and (define_split for bswapdi reg)?

> 
> gcc/testsuite/
>   * gcc.target/powerpc/pr93453-1.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index bf85baa5370..2e38195aaac 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -2828,8 +2828,8 @@ (define_split
>emit_insn (gen_bswapsi2 (dest_32, word2));
>  }
> 
> -  emit_insn (gen_ashldi3 (op3, op3, GEN_INT (32)));
> -  emit_insn (gen_iordi3 (dest, dest, op3));
> +  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
> +GEN_INT ((HOST_WIDE_INT_1U << 32) - 1)));

Very nit: I found two other used places of gen_rotldi3_insert_3 are using
GEN_INT (0x) directly, maybe it's good to keep this same with them ...

>DONE;
>  })
> 
> @@ -2914,10 +2914,10 @@ (define_split
>rtx op3_si  = simplify_gen_subreg (SImode, op3, DImode, lo_off);
> 
>emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32)));
> -  emit_insn (gen_bswapsi2 (dest_si, src_si));
> -  emit_insn (gen_bswapsi2 (op3_si, op2_si));
> -  emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32)));
> -  emit_insn (gen_iordi3 (dest, dest, op3));
> +  emit_insn (gen_bswapsi2 (op3_si, src_si));
> +  emit_insn (gen_bswapsi2 (dest_si, op2_si));
> +  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
> +GEN_INT ((HOST_WIDE_INT_1U << 32) - 1)));


... Same here.

OK with those nits fixed.  Thanks!

BR,
Kewen

>DONE;
>  })
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c 
> b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
> new file mode 100644
> index 000..4271886561f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-mdejagnu-cpu=power6 -O2" } */
> +
> +unsigned long load_byte_reverse (unsigned long *in)
> +{
> +   return __builtin_bswap64 (*in);
> +}
> +
> +unsigned long byte_reverse (unsigned long in)
> +{
> +   return __builtin_bswap64 (in);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */





RE: [PING] PR middle-end/95126: Expand small const structs as immediate constants

2022-06-06 Thread Roger Sayle


Hi Rainer (and Andreas),
My sincere apologies for the breakage.  I'm currently regression testing a
solution to the ICEs introduced by my "small const struct" patch, which
fingers-crossed might also fix this Ada bootstrap.  For a while when
Rainer also reported the bootstrap failure is visible, on
x86_64-pc-linux-gnu,
I thought I'd be able to diagnose and fix the issue myself, but alas my
--enable-languages="all" builds (still) fail earlier with:

gcc -std=gnu99 -c -g  -gnatpg -gnatwns -gnata -W -Wall -I- -I.
-Iada/generated -Iada -I../../gcc/gcc/ada ../../gcc/gcc/ada/osint.adb -o
ada/osint.o
osint.adb:438:31: "strlen" not declared in "CRTL"
osint.adb:441:14: "strncpy" not declared in "CRTL"
osint.adb:675:21: "strlen" not declared in "CRTL"
osint.adb:728:16: "Open_Append" is undefined
osint.adb:1108:41: "int64" not declared in "CRTL"
osint.adb:3144:28: "strlen" not declared in "CRTL"
osint.adb:3147:11: "strncpy" not declared in "CRTL"

The one experiment I'd like to be able to try, to investigate the cause/cure
of this, is:

diff --git a/gcc/calls.cc b/gcc/calls.cc
index a4336c1..05fdd24 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -2177,7 +2177,7 @@ load_register_parameters (struct arg_data *args, int
num_a
 VAR_DECL with a simple constructor, expand that constructor
 via a pseudo rather than read from (possibly misaligned)
 memory.  PR middle-end/95126.  */
- else if (nregs == 1
+ else if (0 && nregs == 1
   && partial == 0
   && !args[i].pass_on_stack
   && VAR_P (tree_value)

My "small const structs" patch affected code generation in three places, and
identifying
which of these is causing the miscompilation issue for gnat will help narrow
down the
problem, or worst case allow reverting less of the problematic patch.

Again my deep apologies for the breakage.  Hopefully the fix I'm testing
will cure this as
well (but an ICE is different symptom to a silent miscompilation).

Sorry again,
Roger
--

> -Original Message-
> From: Rainer Orth 
> Sent: 05 June 2022 21:31
> To: Andreas Schwab 
> Cc: Roger Sayle ; gcc-patches@gcc.gnu.org
> Subject: Re: [PING] PR middle-end/95126: Expand small const structs as
> immediate constants
> 
> Andreas Schwab  writes:
> 
> > This breaks Ada on aarch64 in stage3, probably a miscompiled stage2
> > compiler.  For example:
> >
> > /opt/gcc/gcc-20220605/Build/./prev-gcc/xgcc
> > -B/opt/gcc/gcc-20220605/Build/./prev-gcc/
> > -B/usr/aarch64-suse-linux/bin/ -B/usr/aarch64-suse-linux/bin/
> > -B/usr/aarch64-suse-linux/lib/ -isystem
> > /usr/aarch64-suse-linux/include -isystem
> > /usr/aarch64-suse-linux/sys-include -fchecking=1 -c -g -O2
> > -fchecking=1 -gnatpg -gnata -W -Wall -nostdinc -I- -I. -Iada/generated
> > -Iada -I../../gcc/ada -Iada/libgnat -I../../gcc/ada/libgnat
> > -Iada/gcc-interface -I../../gcc/ada/gcc-interface
> > ../../gcc/ada/spark_xrefs.adb -o ada/spark_xrefs.o
> > +===GNAT BUG
> > +DETECTED==+
> > | 13.0.0 20220605 (experimental) [master ad6919374be]
(aarch64-suse-linux)
> |
> > | Assert_Failure failed precondition from sinfo-nodes.ads:5419
|
> > | Error detected at types.ads:53:28
|
> > | Compiling ../../gcc/ada/spark_xrefs.adb
|
> > | Please submit a bug report; see https://gcc.gnu.org/bugs/ .
|
> > | Use a subject line meaningful to you and us to track the bug.
|
> > | Include the entire contents of this bug box in the report.
|
> > | Include the exact command that you entered.
|
> > | Also include sources listed below.
|
> >
> +
> =
> > +=+
> 
> Confirmed: this also happens on i386-pc-solaris2.11,
sparc-sun-solaris2.11, and
> x86_64-pc-linux-gnu.



RE: [x86 PATCH] Double word implementation of and; cmp to not; test optimization.

2022-06-06 Thread Roger Sayle


Hi Uros,
> > The major theme of this patch is to generalize many of i386.md's
> > *di3_doubleword patterns to become *_doubleword patterns, i.e.
> > whenever there exists a "double word" optimization for DImode with
> > -m32, there should be an equivalent TImode optimization on TARGET_64BIT.
> 
> No, please do not mix two different themes in one patch.
> 
> OTOH, the only TImode optimization that can be used with SSE registers is with
> logic instructions and some constant shifts, but there is no TImode 
> arithmetic. I
> assume your end goal is to introduce STV for TImode on 64-bit targets, because
> DImode patterns for x86_32 were introduced to avoid early decomposition by
> middle end and to split instructions that STV didn't convert to vector 
> instructions
> after STV pass. So, let's start with basic V1TImode support before 
> optimizations
> are introduced.

I'm not sure I understand.  What basic V1TImode support do you/we want next?

This testcase and worked example with this patch shows its benefits without STV
nor using V1TI mode vectors.  As explained in the subject, and;cmp can be turned
into the cheaper not;cmp $0, for TImode (and DImode with -m32) in the same way
as we currently do for SImode everywhere.  Having double word modes visible to
combine, allows it to work its magic.  A recent patch ensured that double word
compares were visible to combine, this optimization just required that double
word logic (AND, IOR and XOR) are visible after combine, and in fact for -m32 
DImode
they already are, it's just that TImode is inconsistent, leading to missed 
optimizations.
Likewise, STV can't choose between implementations before there are alternative
Implementations to choose from.

As always I'm happy to do things in the order you want (modulo my 36 hour spin
cycle), in fact the reason this is being done now is that you recommended it 
best
to fix pr65105-5.c after the "double word comparison", which I fully agree with,
as it leads to a better solution that doesn’t require peephole2 (in your own 
words,
"why isn't this being done in combine?").

I'm also certainly misunderstanding.  Which piece needs to be done next?

Perhaps I should have used the term "the common theme" rather than
"the major theme" that may have made it sound like there were unrelated
or Independent bits in this patch.  But there are no V1TI changes in it.

Thanks in advance, for any clarification.

Cheers,
Roger
--




Re: [PATCH v2, rs6000] Fix ICE on expand bcd__ [PR100736]

2022-06-06 Thread HAO CHEN GUI via Gcc-patches



On 2/6/2022 下午 5:04, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Jun 02, 2022 at 01:30:04PM +0800, HAO CHEN GUI wrote:
>> Segher,
>>   Does BCD comparison return false when either operand is invalid coding?
> 
> It sets all of LT, GT, and EQ to 0 (it normally sets exactly one of them
> to 1).  It sets bit 3 (the "SO" bit usually) to 1.
> 
> That is what the machine insns do.  What the builtins do is undefined as
> far as I know?  If So we can do whatever is most convenient, so, not
> handle it specifically at all, just go with what falls out.

We defined the following unordered BCD builtins in rs6000-builtin.def.
They check the bit 3 for overflow.

  const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>);
BCDADD_OV_V1TI bcdadd_unordered_v1ti {}

  const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>);
BCDADD_OV_V16QI bcdadd_unordered_v16qi {}

Also Xlc defines three BCD builtins for overflow and invalid coding.
https://www.ibm.com/docs/en/xl-c-and-cpp-linux/16.1.1?topic=functions-bcd-test-add-subtract-overflow
Shall GCC keep up with Xlc? Please advise.

Thanks
Gui Haochen

> 
>> If yes, the result could be 3-way. We can check gt and eq bits for ge.
> 
> You can check the LT bit, instead: it is only one branch insn, and also
> only one setbc[r] insn (it can be slightly more expensive if you can use
> only older insns).
> 
>> We still can't use crnot to only check lt bit as there could be invalid
>> coding.
>>   Also, do you think finite-math-only excludes invalid coding? Seems GCC
>> doesn't clear define it.
> 
> This is not floating-point code at all, it should not be influenced at
> all by finite-math-only!
> 
> 
> Segher


[PATCH] RISC-V:Fix a bug that is the CMO builtins are missing parameter

2022-06-06 Thread shiyulong
From: yulong 

We changed the RTL mode and builtins format about zicbom and zicboz 
subextensions.

gcc/ChangeLog:

* config/riscv/riscv-cmo.def (RISCV_BUILTIN): changed 
"RISCV_SI(DI)_FTYPE" to "RISCV_SI(DI)_FTPYE_SI(DI)"
* config/riscv/riscv-ftypes.def (0): deleted DEF_RISCV_FTYPE (0,(SI)) 
and DEF_RISCV_FTYPE (0,(DI))
* config/riscv/riscv.md: added a immediate_operand about cbo.clean, 
cbo.flush, cbo.inval and cbo.zero instructions

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmo-zicbom-1.c: added a parameter
* gcc.target/riscv/cmo-zicbom-2.c: added a parameter
* gcc.target/riscv/cmo-zicboz-1.c: added a parameter
* gcc.target/riscv/cmo-zicboz-2.c: added a parameter

---
 gcc/config/riscv/riscv-cmo.def| 16 
 gcc/config/riscv/riscv-ftypes.def |  2 --
 gcc/config/riscv/riscv.md | 12 
 gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c |  6 +++---
 gcc/testsuite/gcc.target/riscv/cmo-zicbom-2.c |  6 +++---
 gcc/testsuite/gcc.target/riscv/cmo-zicboz-1.c |  2 +-
 gcc/testsuite/gcc.target/riscv/cmo-zicboz-2.c |  2 +-
 7 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/gcc/config/riscv/riscv-cmo.def b/gcc/config/riscv/riscv-cmo.def
index b30ecf96ec1..d43cbf62954 100644
--- a/gcc/config/riscv/riscv-cmo.def
+++ b/gcc/config/riscv/riscv-cmo.def
@@ -1,16 +1,16 @@
 // zicbom
-RISCV_BUILTIN (clean_si, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, clean32),
-RISCV_BUILTIN (clean_di, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, clean64),
+RISCV_BUILTIN (clean_si, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE_SI, clean32),
+RISCV_BUILTIN (clean_di, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE_DI, clean64),
 
-RISCV_BUILTIN (flush_si, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, flush32),
-RISCV_BUILTIN (flush_di, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, flush64),
+RISCV_BUILTIN (flush_si, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE_SI, flush32),
+RISCV_BUILTIN (flush_di, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE_DI, flush64),
 
-RISCV_BUILTIN (inval_si, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, inval32),
-RISCV_BUILTIN (inval_di, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, inval64),
+RISCV_BUILTIN (inval_si, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE_SI, inval32),
+RISCV_BUILTIN (inval_di, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE_DI, inval64),
 
 // zicboz
-RISCV_BUILTIN (zero_si, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, zero32),
-RISCV_BUILTIN (zero_di, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, zero64),
+RISCV_BUILTIN (zero_si, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE_SI, zero32),
+RISCV_BUILTIN (zero_di, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE_DI, zero64),
 
 // zicbop
 RISCV_BUILTIN (prefetchi_si, "zicbop_cbo_prefetchi", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE_SI, prefetchi32),
diff --git a/gcc/config/riscv/riscv-ftypes.def 
b/gcc/config/riscv/riscv-ftypes.def
index 62421292ce7..445eb8ee05d 100644
--- a/gcc/config/riscv/riscv-ftypes.def
+++ b/gcc/config/riscv/riscv-ftypes.def
@@ -28,7 +28,5 @@ along with GCC; see the file COPYING3.  If not see
 
 DEF_RISCV_FTYPE (0, (USI))
 DEF_RISCV_FTYPE (1, (VOID, USI))
-DEF_RISCV_FTYPE (0, (SI))
-DEF_RISCV_FTYPE (0, (DI))
 DEF_RISCV_FTYPE (1, (SI, SI))
 DEF_RISCV_FTYPE (1, (DI, DI))
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b8ab0cf169a..2d7d94eebd3 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2893,28 +2893,32 @@
   [(set_attr "length" "12")])
 
 (define_insn "riscv_clean_"
-  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")]
+  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")
+   (match_operand:X 1 "immediate_operand" "i")]
 UNSPECV_CLEAN)]
   "TARGET_ZICBOM"
   "cbo.clean\t%a0"
 )
 
 (define_insn "riscv_flush_"
-  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")]
+  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")
+   (match_operand:X 1 "immediate_operand" "i")]
 UNSPECV_FLUSH)]
   "TARGET_ZICBOM"
   "cbo.flush\t%a0"
 )
 
 (define_insn "riscv_inval_"
-  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")]
+  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")
+   (match_operand:X 1 "immediate_operand" "i")]
 UNSPECV_INVAL)]
   "TARGET_ZICBOM"
   "cbo.inval\t%a0"
 )
 
 (define_insn "riscv_zero_"
-  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")]
+  [(unspec_volatile:X [(match_operand:X 0 "register_operand" "r")
+   (match_operand:X 1 "immediate_operand" "i")]
 UNSPECV_ZERO)]
   "TARGET_ZICBOZ"
   "cbo.zero\t%a0"
diff --git 

[PATCH] Update document for VECTOR_MODES_WITH_PREFIX

2022-06-06 Thread Kewen.Lin via Gcc-patches
Hi,

r10-3912 updated the format of VECTOR_MODES_WITH_PREFIX by
adding one more parameter ORDER, the related document is out
of date.  So update the document for ORDER.

Is it ok for trunk?

BR,
Kewen
-

gcc/ChangeLog:

* machmode.def (VECTOR_MODES_WITH_PREFIX): Update document for
parameter ORDER.
---
 gcc/machmode.def | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/machmode.def b/gcc/machmode.def
index b62a5fbc683..d62563601fc 100644
--- a/gcc/machmode.def
+++ b/gcc/machmode.def
@@ -142,9 +142,10 @@ along with GCC; see the file COPYING3.  If not see
than two bytes (if CLASS is FLOAT).  CLASS must be INT or
FLOAT.  The names follow the same rule as VECTOR_MODE uses.

- VECTOR_MODES_WITH_PREFIX (PREFIX, CLASS, WIDTH);
+ VECTOR_MODES_WITH_PREFIX (PREFIX, CLASS, WIDTH, ORDER);
Like VECTOR_MODES, but start the mode names with PREFIX instead
-   of the usual "V".
+   of the usual "V".  ORDER is the top-level sorting order of the
+   mode, with smaller numbers indicating a higher priority.

  VECTOR_BOOL_MODE (NAME, COUNT, COMPONENT, BYTESIZE)
 Create a vector mode called NAME that contains COUNT boolean
--
2.27.0


[PATCH] inline: Rebuild target option node for caller [PR105459]

2022-06-06 Thread Kewen.Lin via Gcc-patches
Hi,

PR105459 exposes one issue in inline_call handling that when it
decides to copy FP flags from callee to caller and rebuild the
optimization node for caller fndecl, it's possible that the target
option node is also necessary to be rebuilt.  Without updating
target option node early, it can make nodes share the same target
option node wrongly, later when we want to unshare it somewhere
(like in target hook) it can get unexpected results, like ICE on
uninitialized secondary member of target globals exposed in this PR.

Commit r12-3721 makes it get exact fp_expression info and causes
more optimization chances then exposes this issue.  Commit r11-5855
introduces two target options to shadow flag_excess_precision and
flag_unsafe_math_optimizations and shows the need to rebuild target
node in inline_call when optimization node changes.

As commented in PR105459, I tried to postpone init_function_start
in cgraph_node::expand, but abandoned it since I thought it just
concealed the issue.  And I also tried to adjust the target node
when current function switching, but failed since we get the NULL
cfun and fndecl in WPA phase.

Bootstrapped and regtested on x86_64-redhat-linux, powerpc64-linux-gnu
P8 and powerpc64le-linux-gnu P9.

Any thoughts?  Is it OK for trunk?

BR,
Kewen
-

PR tree-optimization/105459

gcc/ChangeLog:

* ipa-inline-transform.cc (inline_call): Rebuild target option node
once optimization node gets rebuilt.

gcc/testsuite/ChangeLog:

* gcc.dg/lto/pr105459_0.c: New test.
---
 gcc/ipa-inline-transform.cc   | 50 +--
 gcc/testsuite/gcc.dg/lto/pr105459_0.c | 35 +++
 2 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr105459_0.c

diff --git a/gcc/ipa-inline-transform.cc b/gcc/ipa-inline-transform.cc
index 07288e57c73..edba58377f4 100644
--- a/gcc/ipa-inline-transform.cc
+++ b/gcc/ipa-inline-transform.cc
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ipa-modref.h"
 #include "symtab-thunks.h"
 #include "symtab-clones.h"
+#include "target.h"

 int ncalls_inlined;
 int nfunctions_inlined;
@@ -469,8 +470,53 @@ inline_call (struct cgraph_edge *e, bool update_original,
 }

   /* Reload global optimization flags.  */
-  if (reload_optimization_node && DECL_STRUCT_FUNCTION (to->decl) == cfun)
-set_cfun (cfun, true);
+  if (reload_optimization_node)
+{
+  /* Only need to check and update target option node
+when target_option_default_node is not NULL.  */
+  if (target_option_default_node)
+   {
+ /* Save the current context for optimization and target option
+node.  */
+ tree old_optimize
+   = build_optimization_node (_options, _options_set);
+ tree old_target_opt
+   = build_target_option_node (_options, _options_set);
+
+ /* Restore optimization with new optimizatin node.  */
+ tree new_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl);
+ if (old_optimize != new_optimize)
+   cl_optimization_restore (_options, _options_set,
+TREE_OPTIMIZATION (new_optimize));
+
+ /* Restore target option with the one from caller fndecl.  */
+ tree cur_target_opt = DECL_FUNCTION_SPECIFIC_TARGET (to->decl);
+ if (!cur_target_opt)
+   cur_target_opt = target_option_default_node;
+ cl_target_option_restore (_options, _options_set,
+   TREE_TARGET_OPTION (cur_target_opt));
+
+ /* Update target option as optimization changes.  */
+ targetm.target_option.override ();
+
+ /* Rebuild target option node for caller fndecl and replace
+with it if the node changes.  */
+ tree new_target_opt
+   = build_target_option_node (_options, _options_set);
+ if (cur_target_opt != new_target_opt)
+   DECL_FUNCTION_SPECIFIC_TARGET (to->decl) = new_target_opt;
+
+ /* Restore the context with previous saved nodes.  */
+ if (old_optimize != new_optimize)
+   cl_optimization_restore (_options, _options_set,
+TREE_OPTIMIZATION (old_optimize));
+ cl_target_option_restore (_options, _options_set,
+   TREE_TARGET_OPTION (old_target_opt));
+   }
+
+  if (DECL_STRUCT_FUNCTION (to->decl) == cfun)
+   set_cfun (cfun, true);
+}

   /* If aliases are involved, redirect edge to the actual destination and
  possibly remove the aliases.  */
diff --git a/gcc/testsuite/gcc.dg/lto/pr105459_0.c 
b/gcc/testsuite/gcc.dg/lto/pr105459_0.c
new file mode 100644
index 000..c799e6ef23d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr105459_0.c
@@ -0,0 +1,35 @@
+/* { dg-lto-do link } */
+/* { dg-lto-options { { -flto -O1 } } } */
+
+double m;
+int n;
+
+__attribute__ ((optimize