[PATCH] RISC-V: Handle g extension in multilib-generator

2019-08-05 Thread Kito Cheng
gcc/ChangeLog

* gcc/config/riscv/multilib-generator: (canonical_order): Add 'g'.
(arch_canonicalize): Support rv32g and rv64g and fix error
handling.
---
 gcc/config/riscv/multilib-generator | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/multilib-generator 
b/gcc/config/riscv/multilib-generator
index e58231c2756..a4125ff01fd 100755
--- a/gcc/config/riscv/multilib-generator
+++ b/gcc/config/riscv/multilib-generator
@@ -36,17 +36,17 @@ abis = collections.OrderedDict()
 required = []
 reuse = []
 
-canonical_order = "mafdqlcbjtpvn"
+canonical_order = "mafdgqlcbjtpvn"
 
 def arch_canonicalize(arch):
   # TODO: Support Z, S, H, or X extensions.
   # TODO: Support implied extensions, e.g. D implied F in latest spec.
   # TODO: Support extension version.
   new_arch = ""
-  if arch[:5] in ['rv32e', 'rv32i', 'rv64i']:
+  if arch[:5] in ['rv32e', 'rv32i', 'rv32g', 'rv64i', 'rv64g']:
 new_arch = arch[:5]
   else:
-raise Exception("Unexpected arch: `%d`" % arch[:5])
+raise Exception("Unexpected arch: `%s`" % arch[:5])
 
   # Find any Z, S, H or X
   long_ext_prefixes = ['z', 's', 'h', 'x']
-- 
2.17.1



Re: [PATCH] RISC-V: Handle extensions combination correctly in multilib-generator.

2019-08-05 Thread Kito Cheng
Hi Jim. Andreas:

Thanks your review :)

Committed with English improvements and ChangeLog update as r274137

On Tue, Aug 6, 2019 at 5:48 AM Jim Wilson  wrote:
>
> On Mon, Aug 5, 2019 at 1:20 AM Kito Cheng  wrote:
> > gcc/ChangeLog
> > * gcc/config/riscv/multilib-generator: Handle extensions
> > combination correctly.
>
> A ChangeLog entry should generally describe what a patch changes, not
> what it does.  So this should mention a new variable canonical_order,
> a new function arch_canonicalize, and the call to pass alts through
> it.
>
> > +raise Exception("Unexpect arch: `%d`" % arch[:5])
>
> Unexpect -> Unexpected
>
> > +  long_ext_prefixs = ['z', 's', 'h', 'x']
>
> prefixs -> prefixes
>
> > +  # Concat rest multi-char extensions.
>
> rest multi-char -> rest of the multi-char
>
> This looks good to me with the minor English improvements Andreas and
> I suggested.
>
> Jim


[PATCH v2 18/18] Use DEBUG_INSN_P macro. Autogenerated patch by running ../contrib/rtl-pred.sh DEBUG_INSN_P

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* reload1.c: Convert GET_CODE (..) == DEBUG_INSN to
DEBUG_INSN_P (..).
* reorg.c: Likewise.

 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/reload1.c b/gcc/reload1.c
index 1a68d0567fc..d30badc0c4f 100644
--- a/gcc/reload1.c
+++ b/gcc/reload1.c
@@ -2850,7 +2850,7 @@ eliminate_regs_1 (rtx x, machine_mode mem_mode, rtx insn,
  || known_eq (x_size, new_size))
  )
return adjust_address_nv (new_rtx, GET_MODE (x), SUBREG_BYTE (x));
- else if (insn && GET_CODE (insn) == DEBUG_INSN)
+ else if (insn && DEBUG_INSN_P (insn))
return gen_rtx_raw_SUBREG (GET_MODE (x), new_rtx, SUBREG_BYTE (x));
  else
return gen_rtx_SUBREG (GET_MODE (x), new_rtx, SUBREG_BYTE (x));
diff --git a/gcc/reorg.c b/gcc/reorg.c
index 79684952595..4ec3d7efea6 100644
--- a/gcc/reorg.c
+++ b/gcc/reorg.c
@@ -1510,7 +1510,7 @@ redundant_insn (rtx insn, rtx_insn *target, const 
vec &delay_list)
  || GET_CODE (pat) == CLOBBER_HIGH)
continue;
 
-  if (GET_CODE (trial) == DEBUG_INSN)
+  if (DEBUG_INSN_P (trial))
continue;
 
   if (rtx_sequence *seq = dyn_cast  (pat))
@@ -1609,7 +1609,7 @@ redundant_insn (rtx insn, rtx_insn *target, const 
vec &delay_list)
  || GET_CODE (pat) == CLOBBER_HIGH)
continue;
 
-  if (GET_CODE (trial) == DEBUG_INSN)
+  if (DEBUG_INSN_P (trial))
continue;
 
   if (rtx_sequence *seq = dyn_cast  (pat))
@@ -2047,7 +2047,7 @@ fill_simple_delay_slots (int non_jumps_p)
continue;
 
  /* And DEBUG_INSNs never go into delay slots.  */
- if (GET_CODE (trial) == DEBUG_INSN)
+ if (DEBUG_INSN_P (trial))
continue;
 
  /* Check for resource conflict first, to avoid unnecessary
@@ -2174,7 +2174,7 @@ fill_simple_delay_slots (int non_jumps_p)
continue;
 
  /* And DEBUG_INSNs do not go in delay slots.  */
- if (GET_CODE (trial) == DEBUG_INSN)
+ if (DEBUG_INSN_P (trial))
continue;
 
  /* If this already has filled delay slots, get the insn needing
@@ -2442,7 +2442,7 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx 
condition,
  || GET_CODE (pat) == CLOBBER_HIGH)
continue;
 
-  if (GET_CODE (trial) == DEBUG_INSN)
+  if (DEBUG_INSN_P (trial))
continue;
 
   /* If TRIAL conflicts with the insns ahead of it, we lose.  Also,
-- 
2.21.0



[PATCH v2 17/18] Use SYMBOL_REF_P macro.

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* dwarf2out.c: Convert GET_CODE (..) == SYMBOL_REF to
SYMBOL_REF_P (..).

 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index b2b4f6d82b2..ea38963d177 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -23747,8 +23747,7 @@ gen_variable_die (tree decl, tree origin, dw_die_ref 
context_die)
  if (single_element_loc_list_p (loc)
  && loc->expr->dw_loc_opc == DW_OP_addr
  && loc->expr->dw_loc_next == NULL
- && GET_CODE (loc->expr->dw_loc_oprnd1.v.val_addr)
-== SYMBOL_REF)
+ && SYMBOL_REF_P (loc->expr->dw_loc_oprnd1.v.val_addr))
{
  rtx x = loc->expr->dw_loc_oprnd1.v.val_addr;
  loc->expr->dw_loc_oprnd1.v.val_addr
-- 
2.21.0



[PATCH v2 14/18] Use LABEL_REF_P macro. Autogenerated patch by running ../contrib/rtl-pred.sh LABEL_REF

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* alias.c: Convert GET_CODE (..) == LABEL_REF to
LABEL_REF_P (..).
* bb-reorder.c: Likewise.
* cfgbuild.c: Likewise.
* cfgexpand.c: Likewise.
* cfgrtl.c: Likewise.
* combine.c: Likewise.
* config/aarch64/aarch64.c: Likewise.
* config/alpha/predicates.md: Likewise.
* config/arc/arc.c: Likewise.
* config/arc/arc.h: Likewise.
* config/arm/arm.c: Likewise.
* config/arm/thumb1.md: Likewise.
* config/avr/avr.c: Likewise.
* config/bfin/bfin.c: Likewise.
* config/bfin/bfin.h: Likewise.
* config/bfin/predicates.md: Likewise.
* config/c6x/c6x.c: Likewise.
* config/c6x/c6x.md: Likewise.
* config/c6x/predicates.md: Likewise.
* config/cr16/cr16.c: Likewise.
* config/cr16/cr16.h: Likewise.
* config/cris/cris.c: Likewise.
* config/cris/cris.md: Likewise.
* config/csky/constraints.md: Likewise.
* config/csky/csky.c: Likewise.
* config/csky/csky.h: Likewise.
* config/darwin.c: Likewise.
* config/epiphany/epiphany.h: Likewise.
* config/epiphany/predicates.md: Likewise.
* config/frv/frv.c: Likewise.
* config/frv/predicates.md: Likewise.
* config/ft32/constraints.md: Likewise.
* config/ft32/ft32.c: Likewise.
* config/ft32/ft32.md: Likewise.
* config/ft32/predicates.md: Likewise.
* config/gcn/gcn.c: Likewise.
* config/gcn/gcn.md: Likewise.
* config/h8300/h8300.h: Likewise.
* config/i386/i386.c: Likewise.
* config/i386/i386.h: Likewise.
* config/i386/i386.md: Likewise.
* config/i386/predicates.md: Likewise.
* config/iq2000/iq2000.c: Likewise.
* config/iq2000/iq2000.h: Likewise.
* config/iq2000/predicates.md: Likewise.
* config/lm32/lm32.c: Likewise.
* config/lm32/lm32.h: Likewise.
* config/lm32/lm32.md: Likewise.
* config/m32r/constraints.md: Likewise.
* config/m32r/m32r.c: Likewise.
* config/m32r/m32r.h: Likewise.
* config/m32r/predicates.md: Likewise.
* config/m68k/constraints.md: Likewise.
* config/m68k/m68k.c: Likewise.
* config/m68k/m68k.h: Likewise.
* config/m68k/predicates.md: Likewise.
* config/mcore/constraints.md: Likewise.
* config/mcore/mcore.c: Likewise.
* config/mcore/mcore.h: Likewise.
* config/mcore/predicates.md: Likewise.
* config/microblaze/microblaze.c: Likewise.
* config/microblaze/predicates.md: Likewise.
* config/mips/mips.c: Likewise.
* config/mmix/mmix.c: Likewise.
* config/mmix/predicates.md: Likewise.
* config/mn10300/mn10300.c: Likewise.
* config/mn10300/mn10300.h: Likewise.
* config/moxie/constraints.md: Likewise.
* config/moxie/moxie.c: Likewise.
* config/moxie/predicates.md: Likewise.
* config/nds32/nds32-cost.c: Likewise.
* config/nds32/nds32-md-auxiliary.c: Likewise.
* config/nds32/nds32.h: Likewise.
* config/nios2/nios2.c: Likewise.
* config/nvptx/nvptx.c: Likewise.
* config/nvptx/nvptx.md: Likewise.
* config/pa/pa.c: Likewise.
* config/pa/pa.h: Likewise.
* config/pa/pa.md: Likewise.
* config/pa/predicates.md: Likewise.
* config/riscv/riscv.c: Likewise.
* config/rs6000/freebsd64.h: Likewise.
* config/rs6000/linux64.h: Likewise.
* config/rs6000/rs6000.c: Likewise.
* config/rs6000/rs6000.h: Likewise.
* config/rs6000/rtems.h: Likewise.
* config/rs6000/sysv4.h: Likewise.
* config/rs6000/xcoff.h: Likewise.
* config/rx/rx.c: Likewise.
* config/s390/predicates.md: Likewise.
* config/s390/s390.c: Likewise.
* config/s390/s390.h: Likewise.
* config/sh/predicates.md: Likewise.
* config/sh/sh.c: Likewise.
* config/sh/sh.h: Likewise.
* config/sparc/predicates.md: Likewise.
* config/sparc/sparc.c: Likewise.
* config/sparc/sparc.md: Likewise.
* config/spu/constraints.md: Likewise.
* config/spu/spu.c: Likewise.
* config/tilegx/predicates.md: Likewise.
* config/tilegx/tilegx.c: Likewise.
* config/tilepro/predicates.md: Likewise.
* config/tilepro/tilepro.c: Likewise.
* config/v850/v850.c: Likewise.
* config/vax/predicates.md: Likewise.
* config/vax/vax.c: Likewise.
* config/xtensa/xtensa.c: Likewise.
* config/xtensa/xtensa.h: Likewise.
* cprop.c: Likewise.
* cse.c: Likewise.
* dbxout.c: Likewise.
* explow.c: Likewise.
* expr.c: Likewise.
* final.c: Likewise.
* genattrtab.c: Likewise.
* gensupport.c: Likewise.
* ifcvt.c: Likewi

[PATCH v2 06/18] Use CONST_DOUBLE_P macro. Autogenerated patch by running ../contrib/rtl-pred.sh CONST_DOUBLE

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* config/aarch64/aarch64.c: Convert
GET_CODE (..) == CONST_DOUBLE to CONST_DOUBLE_P (..).
* config/aarch64/aarch64.md: Likewise.
* config/arc/arc.c: Likewise.
* config/arc/arc.md: Likewise.
* config/arc/fpx.md: Likewise.
* config/bfin/bfin.md: Likewise.
* config/c6x/c6x.h: Likewise.
* config/c6x/c6x.md: Likewise.
* config/cr16/predicates.md: Likewise.
* config/cris/cris.c: Likewise.
* config/cris/cris.md: Likewise.
* config/csky/csky.c: Likewise.
* config/darwin.c: Likewise.
* config/fr30/fr30.c: Likewise.
* config/frv/frv.c: Likewise.
* config/frv/frv.h: Likewise.
* config/gcn/gcn-valu.md: Likewise.
* config/gcn/gcn.c: Likewise.
* config/ia64/ia64.c: Likewise.
* config/ia64/vect.md: Likewise.
* config/iq2000/iq2000.md: Likewise.
* config/lm32/lm32.c: Likewise.
* config/m32r/m32r.c: Likewise.
* config/m68k/m68k.c: Likewise.
* config/m68k/m68k.md: Likewise.
* config/m68k/predicates.md: Likewise.
* config/mcore/mcore.c: Likewise.
* config/microblaze/microblaze.c: Likewise.
* config/mips/mips.c: Likewise.
* config/mips/mips.md: Likewise.
* config/mmix/mmix.c: Likewise.
* config/mmix/predicates.md: Likewise.
* config/nds32/constraints.md: Likewise.
* config/nds32/nds32-predicates.c: Likewise.
* config/nds32/nds32.c: Likewise.
* config/nds32/nds32.h: Likewise.
* config/pa/pa.c: Likewise.
* config/pa/pa.md: Likewise.
* config/pdp11/pdp11.c: Likewise.
* config/s390/s390.c: Likewise.
* config/sh/sh.c: Likewise.
* config/sh/sh.h: Likewise.
* config/sparc/predicates.md: Likewise.
* config/sparc/sparc.c: Likewise.
* config/sparc/sparc.md: Likewise.
* config/spu/predicates.md: Likewise.
* config/spu/spu.c: Likewise.
* config/tilegx/tilegx.c: Likewise.
* config/tilepro/tilepro.c: Likewise.
* config/v850/predicates.md: Likewise.
* config/v850/v850.c: Likewise.
* config/vax/vax.c: Likewise.
* config/vax/vax.md: Likewise.
* config/visium/visium.c: Likewise.
* config/xtensa/xtensa.c: Likewise.
* defaults.h: Likewise.
* genpreds.c: Likewise.
* recog.c: Likewise.
* reg-stack.c: Likewise.

 59 files changed, 161 insertions(+), 161 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a526b8be522..68ad4858c76 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7057,7 +7057,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned 
HOST_WIDE_INT *intval)
 }
 
   scalar_float_mode mode;
-  if (GET_CODE (value) != CONST_DOUBLE
+  if (!CONST_DOUBLE_P (value)
   || !is_a  (GET_MODE (value), &mode)
   || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
   /* Only support up to DF mode.  */
@@ -7097,7 +7097,7 @@ aarch64_float_const_rtx_p (rtx x)
  mov/movk pairs over ldr/adrp pairs.  */
   unsigned HOST_WIDE_INT ival;
 
-  if (GET_CODE (x) == CONST_DOUBLE
+  if (CONST_DOUBLE_P (x)
   && SCALAR_FLOAT_MODE_P (mode)
   && aarch64_reinterpret_float_as_int (x, &ival))
 {
@@ -7136,7 +7136,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
   scalar_int_mode imode;
   unsigned HOST_WIDE_INT ival;
 
-  if (GET_CODE (x) == CONST_DOUBLE
+  if (CONST_DOUBLE_P (x)
   && SCALAR_FLOAT_MODE_P (mode))
 {
   if (!aarch64_reinterpret_float_as_int (x, &ival))
@@ -14426,7 +14426,7 @@ aarch64_sve_float_arith_immediate_p (rtx x, bool 
negate_p)
   REAL_VALUE_TYPE r;
 
   if (!const_vec_duplicate_p (x, &elt)
-  || GET_CODE (elt) != CONST_DOUBLE)
+  || !CONST_DOUBLE_P (elt))
 return false;
 
   r = *CONST_DOUBLE_REAL_VALUE (elt);
@@ -14452,7 +14452,7 @@ aarch64_sve_float_mul_immediate_p (rtx x)
   /* GCC will never generate a multiply with an immediate of 2, so there is no
  point testing for it (even though it is a valid constant).  */
   return (const_vec_duplicate_p (x, &elt)
- && GET_CODE (elt) == CONST_DOUBLE
+ && CONST_DOUBLE_P (elt)
  && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf));
 }
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 873f2760cce..1fe96d5f772 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1246,7 +1246,7 @@
   }
 
 if (GET_CODE (operands[0]) == MEM
-&& ! (GET_CODE (operands[1]) == CONST_DOUBLE
+&& ! (CONST_DOUBLE_P (operands[1])
  && aarch64_float_const_zero_rtx_p (operands[1])))
   operands[1] = force_reg (mode, operands[1]);
   }
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 486053c98ca..abdb51fbb31 100644
--- a/gc

[PATCH v2 15/18] Use LABEL_REF_P macro.

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* rtlanal.c: Convert GET_CODE (..) == LABEL_REF to
LABEL_REF_P (..).

 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index cb6c8902353..a8becc85047 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -3347,8 +3347,7 @@ computed_jump_p (const rtx_insn *insn)
 
  for (i = len - 1; i >= 0; i--)
if (GET_CODE (XVECEXP (pat, 0, i)) == USE
-   && (GET_CODE (XEXP (XVECEXP (pat, 0, i), 0))
-   == LABEL_REF))
+   && LABEL_REF_P (XEXP (XVECEXP (pat, 0, i), 0)))
  {
has_use_labelref = 1;
break;
-- 
2.21.0



[PATCH v2 09/18] Add CONST_STRING_P rtx_code predicate.

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* rtl.h: Add a predicate macro for checking CONST_STRING.

 1 file changed, 3 insertions(+)

diff --git a/gcc/rtl.h b/gcc/rtl.h
index 45e2b85867d..d02772b65be 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -821,6 +821,9 @@ struct GTY(()) rtvec_def {
 /* Predicate yielding nonzero iff X is an rtx for a vector const.  */
 #define CONST_VECTOR_P(X) (GET_CODE (X) == CONST_VECTOR)
 
+/* Predicate yielding nonzero iff X is an rtx for an rtl string const.  */
+#define CONST_STRING_P(X) (GET_CODE (X) == CONST_STRING)
+
 /* Predicate yielding true iff X is an rtx for a integer const.  */
 #if TARGET_SUPPORTS_WIDE_INT
 #define CONST_SCALAR_INT_P(X) \
-- 
2.21.0



[PATCH v2 10/18] Use CONST_STRING_P macro. Autogenerated patch by running ../contrib/rtl-pred.sh CONST_STRING

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* config/avr/avr.c: Convert GET_CODE (..) == CONST_STRING to
CONST_STRING_P (..).
* dwarf2out.c: Likewise.
* genattrtab.c: Likewise.
* gensupport.c: Likewise.

 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index 760e9371a01..015d36728a3 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -2965,7 +2965,7 @@ avr_print_operand (FILE *file, rtx x, int code)
   REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
   fprintf (file, "0x%lx", val);
 }
-  else if (GET_CODE (x) == CONST_STRING)
+  else if (CONST_STRING_P (x))
 fputs (XSTR (x, 0), file);
   else if (code == 'j')
 fputs (cond_string (GET_CODE (x)), file);
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index c84885a24bb..7417551b120 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -18421,7 +18421,7 @@ loc_list_from_tree_1 (tree loc, int want_address,
  val &= GET_MODE_MASK (DECL_MODE (loc));
ret = int_loc_descriptor (val);
  }
-   else if (GET_CODE (rtl) == CONST_STRING)
+   else if (CONST_STRING_P (rtl))
  {
expansion_failed (loc, NULL_RTX, "CONST_STRING");
return 0;
@@ -19687,7 +19687,7 @@ add_const_value_attribute (dw_die_ref die, rtx rtl)
   return false;
 
 case MEM:
-  if (GET_CODE (XEXP (rtl, 0)) == CONST_STRING
+  if (CONST_STRING_P (XEXP (rtl, 0))
  && MEM_READONLY_P (rtl)
  && GET_MODE (rtl) == BLKmode)
{
@@ -20165,7 +20165,7 @@ add_location_or_const_value_attribute (dw_die_ref die, 
tree decl, bool cache_p)
  the location.  */
 
   rtl = rtl_for_decl_location (decl);
-  if (rtl && (CONSTANT_P (rtl) || GET_CODE (rtl) == CONST_STRING)
+  if (rtl && (CONSTANT_P (rtl) || CONST_STRING_P (rtl))
   && add_const_value_attribute (die, rtl))
 return true;
 
@@ -20186,7 +20186,7 @@ add_location_or_const_value_attribute (dw_die_ref die, 
tree decl, bool cache_p)
   rtl = NOTE_VAR_LOCATION_LOC (node->loc);
   if (GET_CODE (rtl) == EXPR_LIST)
rtl = XEXP (rtl, 0);
-  if ((CONSTANT_P (rtl) || GET_CODE (rtl) == CONST_STRING)
+  if ((CONSTANT_P (rtl) || CONST_STRING_P (rtl))
  && add_const_value_attribute (die, rtl))
 return true;
 }
@@ -29855,7 +29855,7 @@ resolve_one_addr (rtx *addr)
 {
   rtx rtl = *addr;
 
-  if (GET_CODE (rtl) == CONST_STRING)
+  if (CONST_STRING_P (rtl))
 {
   size_t len = strlen (XSTR (rtl, 0)) + 1;
   tree t = build_string (len, XSTR (rtl, 0));
@@ -29960,7 +29960,7 @@ optimize_one_addr_into_implicit_ptr (dw_loc_descr_ref 
loc)
   offset = INTVAL (XEXP (XEXP (rtl, 0), 1));
   rtl = XEXP (XEXP (rtl, 0), 0);
 }
-  if (GET_CODE (rtl) == CONST_STRING)
+  if (CONST_STRING_P (rtl))
 {
   size_t len = strlen (XSTR (rtl, 0)) + 1;
   tree t = build_string (len, XSTR (rtl, 0));
diff --git a/gcc/genattrtab.c b/gcc/genattrtab.c
index cdf0b5c12dc..6b24323f112 100644
--- a/gcc/genattrtab.c
+++ b/gcc/genattrtab.c
@@ -739,7 +739,7 @@ check_attr_test (file_location loc, rtx exp, attr_desc 
*attr)
  else
{
  for (av = attr2->first_value; av; av = av->next)
-   if (GET_CODE (av->value) == CONST_STRING
+   if (CONST_STRING_P (av->value)
&& ! strcmp (XSTR (exp, 1), XSTR (av->value, 0)))
  break;
 
@@ -892,7 +892,7 @@ check_attr_value (file_location loc, rtx exp, class 
attr_desc *attr)
}
 
   for (av = attr->first_value; av; av = av->next)
-   if (GET_CODE (av->value) == CONST_STRING
+   if (CONST_STRING_P (av->value)
&& ! strcmp (XSTR (av->value, 0), XSTR (exp, 0)))
  break;
 
@@ -5000,7 +5000,7 @@ make_automaton_attrs (void)
{
  if (val == tune_attr->default_val)
continue;
- gcc_assert (GET_CODE (val->value) == CONST_STRING);
+ gcc_assert (CONST_STRING_P (val->value));
  fprintf (dfa_file,
   "extern int internal_dfa_insn_code_%s (rtx_insn *);\n",
   XSTR (val->value, 0));
@@ -5012,7 +5012,7 @@ make_automaton_attrs (void)
{
  if (val == tune_attr->default_val)
continue;
- gcc_assert (GET_CODE (val->value) == CONST_STRING);
+ gcc_assert (CONST_STRING_P (val->value));
  fprintf (latency_file,
   "extern int insn_default_latency_%s (rtx_insn *);\n",
   XSTR (val->value, 0));
@@ -5024,7 +5024,7 @@ make_automaton_attrs (void)
{
  if (val == tune_attr->default_val)
continue;
- gcc_assert (GET_CODE (val->value) == CONST_STRING);
+ gcc_assert (CONST_STRING_P (val->value));
  fprintf (attr_file,
   "extern int internal_dfa_insn_code_%s (rtx_insn *);\n"
   "extern int insn_default_late

[PATCH v2 05/18] Use CONST_FIXED_P macro. Autogenerated patch by running ../contrib/rtl-pred.sh CONST_FIXED

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* cfgexpand.c: Convert GET_CODE (..) == CONST_FIXED
to CONST_FIXED_P.
* config/spu/spu.c: Likewise.
* varasm.c: Likewise.

 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 33af991573f..b141a57d3af 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -5481,7 +5481,7 @@ expand_debug_locations (void)
gcc_assert (mode == GET_MODE (val)
|| (GET_MODE (val) == VOIDmode
&& (CONST_SCALAR_INT_P (val)
-   || GET_CODE (val) == CONST_FIXED
+   || CONST_FIXED_P (val)
|| GET_CODE (val) == LABEL_REF)));
  }
 
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index 0e21e28b2e7..e6b25ab3b98 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -5985,7 +5985,7 @@ spu_expand_vector_init (rtx target, rtx vals)
   x = XVECEXP (vals, 0, i);
   if (!(CONST_INT_P (x)
|| GET_CODE (x) == CONST_DOUBLE
-   || GET_CODE (x) == CONST_FIXED))
+   || CONST_FIXED_P (x)))
++n_var;
   else
{
@@ -6026,7 +6026,7 @@ spu_expand_vector_init (rtx target, rtx vals)
  x = XVECEXP (constant_parts_rtx, 0, i);
  if (!(CONST_INT_P (x)
|| GET_CODE (x) == CONST_DOUBLE
-   || GET_CODE (x) == CONST_FIXED))
+   || CONST_FIXED_P (x)))
XVECEXP (constant_parts_rtx, 0, i) = first_constant;
}
 
@@ -6046,7 +6046,7 @@ spu_expand_vector_init (rtx target, rtx vals)
  x = XVECEXP (vals, 0, i);
  if (!(CONST_INT_P (x)
|| GET_CODE (x) == CONST_DOUBLE
-   || GET_CODE (x) == CONST_FIXED))
+   || CONST_FIXED_P (x)))
{
  if (!register_operand (x, GET_MODE (x)))
x = force_reg (GET_MODE (x), x);
diff --git a/gcc/varasm.c b/gcc/varasm.c
index e886cdc71b8..0e833b9bc58 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -2813,7 +2813,7 @@ assemble_integer (rtx x, unsigned int size, unsigned int 
align, int force)
 
   subsize = size > UNITS_PER_WORD? UNITS_PER_WORD : 1;
   subalign = MIN (align, subsize * BITS_PER_UNIT);
-  if (GET_CODE (x) == CONST_FIXED)
+  if (CONST_FIXED_P (x))
mclass = GET_MODE_CLASS (GET_MODE (x));
   else
mclass = MODE_INT;
-- 
2.21.0



[PATCH v2 08/18] Use CONST_VECTOR_P macro. Autogenerated patch by running ../contrib/rtl-pred.sh CONST_VECTOR

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* common.md: Convert GET_CODE (..) == CONST_VECTOR to
CONST_VECTOR_P (..).
* config/aarch64/aarch64.c: Likewise.
* config/alpha/alpha.c: Likewise.
* config/arc/arc.c: Likewise.
* config/arc/simdext.md: Likewise.
* config/arm/arm.c: Likewise.
* config/bfin/bfin.md: Likewise.
* config/darwin.c: Likewise.
* config/gcn/gcn-valu.md: Likewise.
* config/gcn/gcn.c: Likewise.
* config/i386/i386-expand.c: Likewise.
* config/i386/i386.c: Likewise.
* config/i386/predicates.md: Likewise.
* config/i386/sse.md: Likewise.
* config/ia64/ia64.c: Likewise.
* config/mips/mips.c: Likewise.
* config/nds32/nds32-dspext.md: Likewise.
* config/nds32/predicates.md: Likewise.
* config/rs6000/predicates.md: Likewise.
* config/rs6000/rs6000-p8swap.c: Likewise.
* config/rs6000/rs6000.c: Likewise.
* config/s390/s390.c: Likewise.
* config/spu/predicates.md: Likewise.
* config/spu/spu.c: Likewise.
* config/tilegx/tilegx.c: Likewise.
* config/tilepro/tilepro.c: Likewise.
* dwarf2out.c: Likewise.
* emit-rtl.c: Likewise.
* explow.c: Likewise.
* rtlanal.c: Likewise.
* simplify-rtx.c: Likewise.
* varasm.c: Likewise.

 32 files changed, 112 insertions(+), 112 deletions(-)

diff --git a/gcc/common.md b/gcc/common.md
index 42c574029b5..ee93f203bd2 100644
--- a/gcc/common.md
+++ b/gcc/common.md
@@ -80,14 +80,14 @@
 (define_constraint "E"
   "Matches a floating-point constant."
   (ior (match_test "CONST_DOUBLE_AS_FLOAT_P (op)")
-   (match_test "GET_CODE (op) == CONST_VECTOR
+   (match_test "CONST_VECTOR_P (op)
&& GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_FLOAT")))
 
 ;; There is no longer a distinction between "E" and "F".
 (define_constraint "F"
   "Matches a floating-point constant."
   (ior (match_test "CONST_DOUBLE_AS_FLOAT_P (op)")
-   (match_test "GET_CODE (op) == CONST_VECTOR
+   (match_test "CONST_VECTOR_P (op)
&& GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_FLOAT")))
 
 (define_constraint "X"
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 68ad4858c76..9bdbc198dae 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3429,7 +3429,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm,
}
  emit_insn (gen_vec_duplicate (dest, op));
}
-  else if (GET_CODE (imm) == CONST_VECTOR
+  else if (CONST_VECTOR_P (imm)
   && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ())
aarch64_expand_sve_const_vector (dest, imm);
   else
@@ -7503,7 +7503,7 @@ aarch64_const_vec_all_in_range_p (rtx vec,
  HOST_WIDE_INT minval,
  HOST_WIDE_INT maxval)
 {
-  if (GET_CODE (vec) != CONST_VECTOR
+  if (!CONST_VECTOR_P (vec)
   || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
 return false;
 
@@ -13203,7 +13203,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
   /* Support CSE and rematerialization of common constants.  */
   if (CONST_INT_P (x)
   || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
-  || GET_CODE (x) == CONST_VECTOR)
+  || CONST_VECTOR_P (x))
 return true;
 
   /* Do not allow vector struct mode constants for Advanced SIMD.
@@ -14621,7 +14621,7 @@ aarch64_simd_valid_immediate (rtx op, 
simd_immediate_info *info,
   scalar_mode elt_mode = GET_MODE_INNER (mode);
   rtx base, step;
   unsigned int n_elts;
-  if (GET_CODE (op) == CONST_VECTOR
+  if (CONST_VECTOR_P (op)
   && CONST_VECTOR_DUPLICATE_P (op))
 n_elts = CONST_VECTOR_NPATTERNS (op);
   else if ((vec_flags & VEC_SVE_DATA)
@@ -14636,7 +14636,7 @@ aarch64_simd_valid_immediate (rtx op, 
simd_immediate_info *info,
*info = simd_immediate_info (elt_mode, base, step);
   return true;
 }
-  else if (GET_CODE (op) == CONST_VECTOR
+  else if (CONST_VECTOR_P (op)
   && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
 /* N_ELTS set above.  */;
   else
@@ -15096,7 +15096,7 @@ aarch64_simd_make_constant (rtx vals)
   int n_const = 0;
   int i;
 
-  if (GET_CODE (vals) == CONST_VECTOR)
+  if (CONST_VECTOR_P (vals))
 const_vec = vals;
   else if (GET_CODE (vals) == PARALLEL)
 {
@@ -16548,7 +16548,7 @@ aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx 
op1, rtx sel)
   rtx sel_reg = force_reg (sel_mode, sel);
 
   /* Check if the sel only references the first values vector.  */
-  if (GET_CODE (sel) == CONST_VECTOR
+  if (CONST_VECTOR_P (sel)
   && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
 {
   emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
@@ -16570,7 +16570,7 @@ aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx 
op1, rtx sel)
   rtx 

[PATCH v2 07/18] Add CONST_VECTOR_P rtx_code predicate.

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* rtl.h: Add a predicate macro for checking CONST_VECTOR.

 1 file changed, 3 insertions(+)

diff --git a/gcc/rtl.h b/gcc/rtl.h
index 28b5a82d651..45e2b85867d 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -818,6 +818,9 @@ struct GTY(()) rtvec_def {
 #define CONST_DOUBLE_AS_FLOAT_P(X) \
   (GET_CODE (X) == CONST_DOUBLE && GET_MODE (X) != VOIDmode)
 
+/* Predicate yielding nonzero iff X is an rtx for a vector const.  */
+#define CONST_VECTOR_P(X) (GET_CODE (X) == CONST_VECTOR)
+
 /* Predicate yielding true iff X is an rtx for a integer const.  */
 #if TARGET_SUPPORTS_WIDE_INT
 #define CONST_SCALAR_INT_P(X) \
-- 
2.21.0



[PATCH v2 03/18] Use CONST_INT_P macro.

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* combine-stack-adj.c: Convert GET_CODE (..) == CONST_INT
to CONST_INT_P (..).

 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c
index 3638a1b10ee..f98a0d54c98 100644
--- a/gcc/combine-stack-adj.c
+++ b/gcc/combine-stack-adj.c
@@ -634,8 +634,7 @@ combine_stack_adjustments_for_block (basic_block bb)
  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == PLUS
  && XEXP (XEXP (XEXP (dest, 0), 1), 0)
 == stack_pointer_rtx
- && GET_CODE (XEXP (XEXP (XEXP (dest, 0), 1), 1))
-== CONST_INT
+ && CONST_INT_P (XEXP (XEXP (XEXP (dest, 0), 1), 1))
  && INTVAL (XEXP (XEXP (XEXP (dest, 0), 1), 1))
 == -last_sp_adjust))
  && XEXP (XEXP (dest, 0), 0) == stack_pointer_rtx
-- 
2.21.0



[PATCH v2 04/18] Use CONST_WIDE_INT_P macro. Autogenerated patch by running ../contrib/rtl-pred.sh CONST_WIDE_INT_P

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* config/darwin.c: Convert GET_CODE (..) == CONST_WIDE_INT
to CONST_WIDE_INT_P (..).
* config/s390/s390.c: Likewise.
* rtlanal.c: Likewise.

 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index d38867e4227..6eaccd73a9f 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -1751,19 +1751,19 @@ machopic_select_rtx_section (machine_mode mode, rtx x,
 {
   if (GET_MODE_SIZE (mode) == 8
   && (CONST_INT_P (x)
- || GET_CODE (x) == CONST_WIDE_INT
+ || CONST_WIDE_INT_P (x)
  || GET_CODE (x) == CONST_DOUBLE))
 return darwin_sections[literal8_section];
   else if (GET_MODE_SIZE (mode) == 4
   && (CONST_INT_P (x)
-  || GET_CODE (x) == CONST_WIDE_INT
+  || CONST_WIDE_INT_P (x)
   || GET_CODE (x) == CONST_DOUBLE))
 return darwin_sections[literal4_section];
   else if (HAVE_GAS_LITERAL16
   && TARGET_64BIT
   && GET_MODE_SIZE (mode) == 16
   && (CONST_INT_P (x)
-  || GET_CODE (x) == CONST_WIDE_INT
+  || CONST_WIDE_INT_P (x)
   || GET_CODE (x) == CONST_DOUBLE
   || GET_CODE (x) == CONST_VECTOR))
 return darwin_sections[literal16_section];
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index c863514325a..4c55707367d 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -4187,7 +4187,7 @@ legitimate_reload_constant_p (rtx op)
 return true;
 
   /* Accept double-word operands that can be split.  */
-  if (GET_CODE (op) == CONST_WIDE_INT
+  if (CONST_WIDE_INT_P (op)
   || (CONST_INT_P (op)
  && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
 {
@@ -4418,7 +4418,7 @@ s390_reload_symref_address (rtx reg, rtx mem, rtx 
scratch, bool tomem)
   /* Reload might have pulled a constant out of the literal pool.
  Force it back in.  */
   if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
-  || GET_CODE (mem) == CONST_WIDE_INT
+  || CONST_WIDE_INT_P (mem)
   || GET_CODE (mem) == CONST_VECTOR
   || GET_CODE (mem) == CONST)
 mem = force_const_mem (GET_MODE (reg), mem);
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 268a38799d6..7da11b399ba 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -6051,7 +6051,7 @@ split_double (rtx value, rtx *first, rtx *second)
}
}
 }
-  else if (GET_CODE (value) == CONST_WIDE_INT)
+  else if (CONST_WIDE_INT_P (value))
 {
   /* All of this is scary code and needs to be converted to
 properly work with any size integer.  */
-- 
2.21.0



[PATCH v2 01/18] Fix CONST_DOUBLE_AS_FLOAT_P comment.

2019-08-05 Thread Arvind Sankar
2019-08-05  Arvind Sankar  

gcc/ChangeLog:

* rtl.h: Fix comment for CONST_DOUBLE_AS_FLOAT_P and move
it together with the other CONST_DOUBLE predicates.

 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/rtl.h b/gcc/rtl.h
index 039ab05f951..28b5a82d651 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -814,6 +814,10 @@ struct GTY(()) rtvec_def {
 #define CONST_DOUBLE_AS_INT_P(X) \
   (GET_CODE (X) == CONST_DOUBLE && GET_MODE (X) == VOIDmode)
 
+/* Predicate yielding true iff X is an rtx for a floating point const.  */
+#define CONST_DOUBLE_AS_FLOAT_P(X) \
+  (GET_CODE (X) == CONST_DOUBLE && GET_MODE (X) != VOIDmode)
+
 /* Predicate yielding true iff X is an rtx for a integer const.  */
 #if TARGET_SUPPORTS_WIDE_INT
 #define CONST_SCALAR_INT_P(X) \
@@ -823,10 +827,6 @@ struct GTY(()) rtvec_def {
   (CONST_INT_P (X) || CONST_DOUBLE_AS_INT_P (X))
 #endif
 
-/* Predicate yielding true iff X is an rtx for a double-int.  */
-#define CONST_DOUBLE_AS_FLOAT_P(X) \
-  (GET_CODE (X) == CONST_DOUBLE && GET_MODE (X) != VOIDmode)
-
 /* Predicate yielding nonzero iff X is a label insn.  */
 #define LABEL_P(X) (GET_CODE (X) == CODE_LABEL)
 
-- 
2.21.0



Re: Use predicates for RTL objects

2019-08-05 Thread Arvind Sankar
Here's the patches split up. The ones that say autogenerated were
generated from the script below.

I haven't included that as a patch yet since not sure about the
copyright/licensing boilerplate to insert in it.

contrib/rtl-pred.sh:

#!/bin/sh

# 
codes="CONST_INT|CONST_WIDE_INT|CONST_FIXED|CONST_DOUBLE|CONST_VECTOR|CONST_STRING|REG|SUBREG|MEM|LABEL_REF|SYMBOL_REF|DEBUG_INSN"

if test $# != 1 -a $# != 2; then
echo "Usage: $0  []" >&2
exit 1
fi

rtx_code="$1"
rtx_pred="$2"

if [ "x${rtx_pred}" == "x" ]; then
rtx_pred=${rtx_code}
fi

find . -path ./testsuite -prune -o -type f -regex '.*\.\(c\|cc\|h\|md\)' \! 
-path ./rtl.h -print | xargs \
perl -pi -e 's/\bGET_CODE[ ]?(\((?:(?>[^()]+)|(?1))*\)) (?|(!)=|==) 
('${rtx_code}')\b/$2'${rtx_pred}'_P $1/g'



Re: [PATCH V5, rs6000] Support vrotr3 for int vector types

2019-08-05 Thread Kewen.Lin
Hi Segher,

on 2019/8/6 上午5:21, Segher Boessenkool wrote:
> On Mon, Aug 05, 2019 at 11:41:41AM +0800, Kewen.Lin wrote:
>> on 2019/8/4 上午4:52, Segher Boessenkool wrote:
>>> On Fri, Aug 02, 2019 at 04:59:44PM +0800, Kewen.Lin wrote:
> There are two cases: either all elements are rotated by the same amount,
> or they are not.  When they are, on p8 and later we can always use
> xxspltib, which allows immediates 0..255, and the rotates look only at
> the low bits they need, in that element, for that element (so you can
> always splat it to all bytes, all but the low-order bytes are just
> ignored by the rotate insns; before p8, we use vsplti[bhw], and those
> allow -16..15, so for vrlw you do *not* want to mask it with 31.
> There is some mechanics with easy_altivec_constant that should help
> here.  Maybe it can use some improvement.
> 
> The other case is if not all shift counts are the same.  I'm not sure
> we actually care much about this case :-)
> 

Got it, I think even for the "other" case, the neg operation without masking
is also safe since the hardware instructions do ignore the unrelated bits.

>> I thought -mdejagnu-cpu=power8 can only ensure power8 cpu setting takes
>> preference, but can't guarantee the current driver supports power8
>> complication.  As your comments, I guess since gcc configuration don't
>> have without-cpu= etc., the power8 support should be always guaranteed?
> 
> The compiler always supports all CPUs.
> 
> If you are using something like -maltivec, things are different: you
> might have selected a CPU that does not allow -maltivec, so we do need
> altivec_ok.  But if you use -mcpu=power8 (or -mdejagnu-cpu=power8), you
> can use all p8 insns, including the vector ones (unless you disable
> them again with -mno-vsx or similar; just don't do that ;-) )
> 
> [ In the past, it was possible to configure the compiler without support
> for p8 vector insns, if your assembler doesn't support them.  We do not
> do this anymore: now, if your compiler does support things that your
> assembler does not, you'll get errors from that assembler if you try to
> use those instructions.  Which is fine, just make sure you use a new
> enough assembler for the GCC version you use.  This always has been true,
> but with a somewhat larger window of allowed versions.  But this "don't
> support all insns if the assembler does not" means we need to test a lot
> more configurations (or leave them untested, even worse).
> 
> As a side effect, most *_ok now do nothing.  *_hw of course is still
> needed to check if the test system allows running the testcase.  ]
> 

Thanks a lot for the detailed explanation!  I'll remove p8vector_ok.

>> gcc/ChangeLog
>>
>> 2019-08-05  Kewen Lin  
>>
>>  * config/rs6000/vector.md (vrotr3): New define_expand.
>>
>> gcc/testsuite/ChangeLog
>>
>> 2019-08-05  Kewen Lin  
>>
>>  * gcc.target/powerpc/vec_rotate-1.c: New test.
>>  * gcc.target/powerpc/vec_rotate-2.c: New test.
>>  * gcc.target/powerpc/vec_rotate-3.c: New test.
>>  * gcc.target/powerpc/vec_rotate-4.c: New test.
> 
> Approved for trunk (with or without the p8vector_ok change).  Thank you!
> 

Thank you!  :)


Kewen



[PATCH][c++] Do not warn about unused macros while processing #pragma GCC optimize

2019-08-05 Thread Piotr H. Dabrowski
Fixes c++/91318.

libcpp/ChangeLog:

2019-08-06  Piotr Henryk Dabrowski  

PR c++/91318
* include/cpplib.h: Added cpp_define_unused(), 
cpp_define_formatted_unused()
* directives.c: Likewise.

gcc/c-family/ChangeLog:

2019-08-06  Piotr Henryk Dabrowski  

PR c++/91318
* c-cppbuiltin.c: c_cpp_builtins_optimize_pragma(): use 
cpp_define_unused()


diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index d389f8ca4a0..47d0cefb85a 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -576,41 +576,41 @@ c_cpp_builtins_optimize_pragma (cpp_reader *pfile, tree 
prev_tree,
   /* Other target-independent built-ins determined by command-line
  options.  */
   if (!prev->x_optimize_size && cur->x_optimize_size)
-cpp_define (pfile, "__OPTIMIZE_SIZE__");
+cpp_define_unused (pfile, "__OPTIMIZE_SIZE__");
   else if (prev->x_optimize_size && !cur->x_optimize_size)
 cpp_undef (pfile, "__OPTIMIZE_SIZE__");
 
   if (!prev->x_optimize && cur->x_optimize)
-cpp_define (pfile, "__OPTIMIZE__");
+cpp_define_unused (pfile, "__OPTIMIZE__");
   else if (prev->x_optimize && !cur->x_optimize)
 cpp_undef (pfile, "__OPTIMIZE__");
 
   prev_fast_math = fast_math_flags_struct_set_p (prev);
   cur_fast_math  = fast_math_flags_struct_set_p (cur);
   if (!prev_fast_math && cur_fast_math)
-cpp_define (pfile, "__FAST_MATH__");
+cpp_define_unused (pfile, "__FAST_MATH__");
   else if (prev_fast_math && !cur_fast_math)
 cpp_undef (pfile, "__FAST_MATH__");
 
   if (!prev->x_flag_signaling_nans && cur->x_flag_signaling_nans)
-cpp_define (pfile, "__SUPPORT_SNAN__");
+cpp_define_unused (pfile, "__SUPPORT_SNAN__");
   else if (prev->x_flag_signaling_nans && !cur->x_flag_signaling_nans)
 cpp_undef (pfile, "__SUPPORT_SNAN__");
 
   if (!prev->x_flag_errno_math && cur->x_flag_errno_math)
 cpp_undef (pfile, "__NO_MATH_ERRNO__");
   else if (prev->x_flag_errno_math && !cur->x_flag_errno_math)
-cpp_define (pfile, "__NO_MATH_ERRNO__");
+cpp_define_unused (pfile, "__NO_MATH_ERRNO__");
 
   if (!prev->x_flag_finite_math_only && cur->x_flag_finite_math_only)
 {
   cpp_undef (pfile, "__FINITE_MATH_ONLY__");
-  cpp_define (pfile, "__FINITE_MATH_ONLY__=1");
+  cpp_define_unused (pfile, "__FINITE_MATH_ONLY__=1");
 }
   else if (prev->x_flag_finite_math_only && !cur->x_flag_finite_math_only)
 {
   cpp_undef (pfile, "__FINITE_MATH_ONLY__");
-  cpp_define (pfile, "__FINITE_MATH_ONLY__=0");
+  cpp_define_unused (pfile, "__FINITE_MATH_ONLY__=0");
 }
 }
 
diff --git a/libcpp/directives.c b/libcpp/directives.c
index ddf8979d513..9a774c9ed04 100644
--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -2392,6 +2392,15 @@ cpp_define (cpp_reader *pfile, const char *str)
   run_directive (pfile, T_DEFINE, buf, count);
 }
 
+/* Like cpp_define, but does not warn about unused macro.  */
+void
+cpp_define_unused (cpp_reader *pfile, const char *str)
+{
+unsigned char warn_unused_macros = CPP_OPTION (pfile, warn_unused_macros);
+CPP_OPTION (pfile, warn_unused_macros) = 0;
+cpp_define (pfile, str);
+CPP_OPTION (pfile, warn_unused_macros) = warn_unused_macros;
+}
 
 /* Use to build macros to be run through cpp_define() as
described above.
@@ -2411,6 +2420,20 @@ cpp_define_formatted (cpp_reader *pfile, const char 
*fmt, ...)
   free (ptr);
 }
 
+/* Like cpp_define_formatted, but does not warn about unused macro.  */
+void
+cpp_define_formatted_unused (cpp_reader *pfile, const char *fmt, ...)
+{
+  char *ptr;
+
+  va_list ap;
+  va_start (ap, fmt);
+  ptr = xvasprintf (fmt, ap);
+  va_end (ap);
+
+  cpp_define_unused (pfile, ptr);
+  free (ptr);
+}
 
 /* Slight variant of the above for use by initialize_builtins.  */
 void
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index a645f8136a6..8d3f9082601 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -1052,8 +1052,12 @@ extern cppchar_t cpp_host_to_exec_charset (cpp_reader *, 
cppchar_t);
 /* Used to register macros and assertions, perhaps from the command line.
The text is the same as the command line argument.  */
 extern void cpp_define (cpp_reader *, const char *);
+extern void cpp_define_unused (cpp_reader *, const char *);
 extern void cpp_define_formatted (cpp_reader *pfile, 
  const char *fmt, ...) ATTRIBUTE_PRINTF_2;
+extern void cpp_define_formatted_unused (cpp_reader *pfile,
+const char *fmt,
+...) ATTRIBUTE_PRINTF_2;
 extern void cpp_assert (cpp_reader *, const char *);
 extern void cpp_undef (cpp_reader *, const char *);
 extern void cpp_unassert (cpp_reader *, const char *);


Re: C++ PATCH for DR 2413 - typename in conversion-function-ids

2019-08-05 Thread Jason Merrill
OK.

On Mon, Aug 5, 2019 at 8:03 PM Marek Polacek  wrote:
>
> While updating the C++ DR table, I noticed that one of the new DRs, DR 2413,
> is trivial:
> ---
> The “Down with typename!” paper, P0634R3, overlooked the case of a 
> conversion-type-id
> in a conversion-function-id:
>
>   template struct S {
> operator typename T::X(); // typename is not helpful here.
>   };
>
> This context should be added to the list of contexts in which a qualified-id 
> is assumed
> to name a type.
> ---
>
> It's still in "drafting", but it looks like a shoo-in to me.
>
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
>
> 2019-08-05  Marek Polacek  
>
> DR 2413 - typename in conversion-function-ids.
> * parser.c (cp_parser_conversion_type_id): Call
> cp_parser_type_specifier_seq with CP_PARSER_FLAGS_TYPENAME_OPTIONAL
> instead of CP_PARSER_FLAGS_NONE.
>
> * g++.dg/cpp2a/typename17.C: New test.
>
> diff --git gcc/cp/parser.c gcc/cp/parser.c
> index 86857e7d468..83e6d24a9c1 100644
> --- gcc/cp/parser.c
> +++ gcc/cp/parser.c
> @@ -14844,8 +14844,9 @@ cp_parser_conversion_type_id (cp_parser* parser)
>parser->type_definition_forbidden_message
>  = G_("types may not be defined in a conversion-type-id");
>
> -  /* Parse the type-specifiers.  */
> -  cp_parser_type_specifier_seq (parser, CP_PARSER_FLAGS_NONE,
> +  /* Parse the type-specifiers.  DR 2413 clarifies that `typename' is
> + optional in conversion-type-id.  */
> +  cp_parser_type_specifier_seq (parser, CP_PARSER_FLAGS_TYPENAME_OPTIONAL,
> /*is_declaration=*/false,
> /*is_trailing_return=*/false,
> &type_specifiers);
> diff --git gcc/testsuite/g++.dg/cpp2a/typename17.C 
> gcc/testsuite/g++.dg/cpp2a/typename17.C
> new file mode 100644
> index 000..bf534f1717f
> --- /dev/null
> +++ gcc/testsuite/g++.dg/cpp2a/typename17.C
> @@ -0,0 +1,6 @@
> +// DR 2413 - typename in conversion-function-ids.
> +// { dg-do compile { target c++2a } }
> +
> +template struct S {
> +  operator T::X();
> +};


[committed] add test for PR 50476

2019-08-05 Thread Martin Sebor

The expected warning is now issued so I resolved the bug and
committed the test below in r274135.

As an aside, the warning points to the pointer dereference
but mentions the name of the local variable to which it points.
Since the local variable is defined in a different function
whose name isn't printed the warning is liable to be hard to
deal with in non-trivial code.  I think it needs to print
the inlining stack to be more user-friendly.

Martin

Index: gcc/testsuite/gcc.dg/uninit-pr50476.c
===
--- gcc/testsuite/gcc.dg/uninit-pr50476.c   (nonexistent)
+++ gcc/testsuite/gcc.dg/uninit-pr50476.c   (working copy)
@@ -0,0 +1,18 @@
+/* PR middle-end/50476 - Warn of pointer set to object whose lifetime 
is limited

+   { dg-do compile }
+   { dg-options "-O1 -Wall" } */
+
+int *x = 0;
+
+void f (void)
+{
+  int y = 1;
+  x = &y;
+}
+
+int g (void)
+{
+  f ();
+
+  return *x;// { dg-warning "\\\[-Wuninitialized" }
+}


[committed] add C++ test for PR60517

2019-08-05 Thread Martin Sebor

The expected diagnostic is now issued so I resolved the bug
and committed the test in r274130.

Martin

Index: gcc/testsuite/g++.dg/pr60517.C
===
--- gcc/testsuite/g++.dg/pr60517.C  (nonexistent)
+++ gcc/testsuite/g++.dg/pr60517.C  (revision 274130)
@@ -0,0 +1,22 @@
+// PR c++/60517 - warning/error for taking address of member of a temporary
+// object
+// { dg-do compile }
+
+class B
+{
+public:
+  double x[2];
+};
+
+class A
+{
+  B b;
+public:
+  B getB () { return b; }
+};
+
+double foo (A a)
+{
+  double * x = &(a.getB().x[0]);   // { dg-error "taking address of 
rvalue" }

+  return x[0];
+}


C++ PATCH for DR 2413 - typename in conversion-function-ids

2019-08-05 Thread Marek Polacek
While updating the C++ DR table, I noticed that one of the new DRs, DR 2413,
is trivial:
---
The “Down with typename!” paper, P0634R3, overlooked the case of a 
conversion-type-id
in a conversion-function-id:

  template struct S {
operator typename T::X(); // typename is not helpful here.
  };

This context should be added to the list of contexts in which a qualified-id is 
assumed
to name a type.
---

It's still in "drafting", but it looks like a shoo-in to me.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2019-08-05  Marek Polacek  

DR 2413 - typename in conversion-function-ids.
* parser.c (cp_parser_conversion_type_id): Call
cp_parser_type_specifier_seq with CP_PARSER_FLAGS_TYPENAME_OPTIONAL
instead of CP_PARSER_FLAGS_NONE.

* g++.dg/cpp2a/typename17.C: New test.

diff --git gcc/cp/parser.c gcc/cp/parser.c
index 86857e7d468..83e6d24a9c1 100644
--- gcc/cp/parser.c
+++ gcc/cp/parser.c
@@ -14844,8 +14844,9 @@ cp_parser_conversion_type_id (cp_parser* parser)
   parser->type_definition_forbidden_message
 = G_("types may not be defined in a conversion-type-id");
 
-  /* Parse the type-specifiers.  */
-  cp_parser_type_specifier_seq (parser, CP_PARSER_FLAGS_NONE,
+  /* Parse the type-specifiers.  DR 2413 clarifies that `typename' is
+ optional in conversion-type-id.  */
+  cp_parser_type_specifier_seq (parser, CP_PARSER_FLAGS_TYPENAME_OPTIONAL,
/*is_declaration=*/false,
/*is_trailing_return=*/false,
&type_specifiers);
diff --git gcc/testsuite/g++.dg/cpp2a/typename17.C 
gcc/testsuite/g++.dg/cpp2a/typename17.C
new file mode 100644
index 000..bf534f1717f
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp2a/typename17.C
@@ -0,0 +1,6 @@
+// DR 2413 - typename in conversion-function-ids.
+// { dg-do compile { target c++2a } }
+
+template struct S {
+  operator T::X();
+};


Re: [doc PATCH] document variable attribute alias

2019-08-05 Thread Sandra Loosemore

On 8/5/19 4:43 PM, Martin Sebor wrote:

On 8/4/19 9:45 PM, Sandra Loosemore wrote:

[snip]

OK with that fixed.


Done in r274127.

Since people tend to refer to the manual for older versions
of the compiler I'd like to make this change on supported
release branches as well.  If you see any problem with it
please let me know, otherwise I'll go ahead later this week.


OK with me.  My understanding is that backporting documentation fixes is 
always allowed unless the branch is frozen.


-Sandra


Re: [PATCH] Add future.md placeholder to PowerPC

2019-08-05 Thread Michael Meissner
Whoops, I forgot to include the ChangeLog entry when I committed subversion id
274030 to add future.md.  I just added it in subversion id 274129.

2019-08-02  Michael Meissner  

* config/rs6000/future.md: New file.
* config/rs6000/rs6000.md: Include future.md.
* config/rs6000/t-rs6000 (MD_INCLUDES): Add future.md.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


[PATCH] RISC-V: Fix C ABI for flattened struct with 0-length bitfield.

2019-08-05 Thread Jim Wilson
This fixes a problem with the GCC implementation of the ABI, where we are
accidentally emitting different code for the same struct when compiled by the
C and C++ compilers.  This was found by LLVM developers comparing the LLVM ABI
to the GCC ABI.

This affects any struct with one or more zero-length bitfields, exactly two
non-zero length fields, one of which must have an FP type that can fit in an
FP register, and the other can be an FP type that fits in an FP register or
an integer type that fits in an integer register or a integer bit-field that
can fit in an integer register.  Also, the target must have FP register
support.

The affected structures are a bit obscure and not very useful, so it is hoped
no real code will be affected.  I've done an open-embedded world build with an
instrumented compiler, and I didn't see any case that triggered my code.  Not
everything built though, since some stuff still doesn't have RISC-V support
yet.  I did have over 30,000 tasks run, so quite a bit of stuff did build.

The fundamental problem is that the RISC-V backend is not checking for
zero-length bit-fields when deciding if a struct field can be passed in a FP
register or not.  Meanwhile, the C++ front end is stripping zero-length
bit-fields after struct layout.  So when compiling as C++ we decide that the
FP struct fields can be passed in FP regs.  But when compiling as C we decide
that there are too many struct fields and they all get passed in integer
registers.

This issue has already been discussed with the RISC-V psABI group and the
RISC-V LLVM developers and this is how we have agreed to move forward, by
changing the ABI implemented by the GCC C (but not C++) compiler.

I will wait a few days before committing in case someone wants to discuss this.

Jim

gcc/
PR target/91229
* config/riscv/riscv.c (riscv_flatten_aggregate_field): New arg
ignore_zero_width_bit_field_p.  Skip zero size bitfields when true.
Pass into recursive call.
(riscv_flatten_aggregate_argument): New arg.  Pass to
riscv_flatten_aggregate_field.
(riscv_pass_aggregate_in_fpr_pair_p): New local warned.  Call
riscv_flatten_aggregate_argument twice, with false and true as last
arg.  Process result twice.  Compare results and warn if different.
(riscv_pass_aggregate_in_fpr_and_gpr_p): Likewise.

gcc/testsuite/
* gcc.target/riscv/flattened-struct-abi-1.c: New test.
* gcc.target/riscv/flattened-struct-abi-2.c: New test.
---
 gcc/config/riscv/riscv.c  | 92 +++
 .../gcc.target/riscv/flattened-struct-abi-1.c |  9 ++
 .../gcc.target/riscv/flattened-struct-abi-2.c |  9 ++
 3 files changed, 94 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/flattened-struct-abi-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/flattened-struct-abi-2.c

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index e274f1bd10f..78a87e2b1d6 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -2383,7 +2383,8 @@ typedef struct {
 static int
 riscv_flatten_aggregate_field (const_tree type,
   riscv_aggregate_field fields[2],
-  int n, HOST_WIDE_INT offset)
+  int n, HOST_WIDE_INT offset,
+  bool ignore_zero_width_bit_field_p)
 {
   switch (TREE_CODE (type))
 {
@@ -2400,8 +2401,21 @@ riscv_flatten_aggregate_field (const_tree type,
if (!TYPE_P (TREE_TYPE (f)))
  return -1;
 
-   HOST_WIDE_INT pos = offset + int_byte_position (f);
-   n = riscv_flatten_aggregate_field (TREE_TYPE (f), fields, n, pos);
+   /* The C++ front end strips zero-length bit-fields from structs.
+  So we need to ignore them in the C front end to make C code
+  compatible with C++ code.  */
+   if (ignore_zero_width_bit_field_p
+   && DECL_BIT_FIELD (f)
+   && (DECL_SIZE (f) == NULL_TREE
+   || integer_zerop (DECL_SIZE (f
+ ;
+   else
+ {
+   HOST_WIDE_INT pos = offset + int_byte_position (f);
+   n = riscv_flatten_aggregate_field (TREE_TYPE (f),
+  fields, n, pos,
+  
ignore_zero_width_bit_field_p);
+ }
if (n < 0)
  return -1;
  }
@@ -2414,7 +2428,8 @@ riscv_flatten_aggregate_field (const_tree type,
tree index = TYPE_DOMAIN (type);
tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
-subfields, 0, offset);
+subfields, 0, offset,
+  

Re: [doc PATCH] document variable attribute alias

2019-08-05 Thread Martin Sebor

On 8/4/19 9:45 PM, Sandra Loosemore wrote:

On 7/31/19 5:05 PM, Martin Sebor wrote:

It was pointed out recently in another forum that GCC doesn't
document attribute alias for variables.  It was also noted in
the same discussion that the semantics of accessing aliases
and their targets can have "surprising" effects because GCC
(as do other compilers) assumes that distinct declarations
with external linkage denote distinct entities.

The attached patch adds text to the manual describing attribute
alias for variables and pointing out the caveat of accessing
the same object using both the alias and the target.

Martin


One minor nit:


+@item alias ("@var{target}")
+@cindex @code{alias} variable attribute
+The @code{alias} variable attribute causes the declaration to be emitted
+as an alias for another symbol known as an @dfn{alias target}.  Except
+for top-level qualifiers the alias target must have the same type as
+the alias.  For instance, the following
+
+@smallexample
+int var_target;
+extern int __attribute__ ((alias ("var_target"))) var_alias;
+@end smallexample
+
+defines @code{var_alias} to be an alias for the @code{var_target} 
variable.



Please use @noindent on the continuation of the sentence after the @end 
smallexample.


OK with that fixed.


Done in r274127.

Since people tend to refer to the manual for older versions
of the compiler I'd like to make this change on supported
release branches as well.  If you see any problem with it
please let me know, otherwise I'll go ahead later this week.

Thanks
Martin


Re: [PATCH 1/3] add -Wstruct-not-pod, -Wclass-is-pod, -Wmismatched-tags (PR 61339)

2019-08-05 Thread Jason Merrill
On Mon, Aug 5, 2019 at 5:50 PM Martin Sebor  wrote:
>
> On 8/5/19 1:25 PM, Jason Merrill wrote:
> > On 8/1/19 7:35 PM, Martin Sebor wrote:
> >> On 8/1/19 12:09 PM, Jason Merrill wrote:
> >>> On 7/22/19 12:34 PM, Martin Sebor wrote:
>  Ping: https://gcc.gnu.org/ml/gcc-patches/2019-07/msg00622.html
> 
>  On 7/8/19 3:58 PM, Martin Sebor wrote:
> > The attached patch implements three new warnings:
> >
> >   *  -Wstruct-not-pod triggers for struct definitions that are not
> >  POD structs,
> >   *  -Wclass-is-pod triggers for class definitions that satisfy
> >  the requirements on POD structs, and
> >   *  -Wmismatched-tags that triggers for class and struct declarations
> >  with class-key that doesn't match either their definition or
> >  the first declaration (if no definition is provided).
> >
> > The implementation of -Wclass-is-pod and -Wstruct-not-pod is fairly
> > straightforward but the -Wmismatched-tags solution is slightly
> > unusual.
> > It collects struct and class declarations first and diagnoses
> > mismatches
> > only after the whole tramslation unit has been processed.  This is so
> > that the definition of a class can guide which declarations to
> > diagnose
> > no matter which come first.
> >>>
> >>> As Jonathan and I were saying in the connected discussion, the *pod
> >>> warnings enforce questionable coding guidelines, so I'd rather not
> >>> have them in the compiler.
> >>
> >> What specifically do you consider questionable?
> >
> >> When defining a new class one has to decide whether to use 'struct'
> >> and when to use 'class'.  Since there's no difference, adopting any
> >> convention to help make a consistent choice seems like a natural
> >> thing to do.  You could say (as I think someone in this thread did):
> >> "make it 'class' when it has a ctor or dtor."  Someone else might
> >> add: "or assignment."  Someone else still might go even further
> >> and include "private members or bases or virtual functions."  Since
> >> all the type definitions end up equivalent regardless of the class-key,
> >> all these choices seem equally reasonable.  But all of them are also
> >> arbitrary.  The only difference is that by being based on a common
> >> (and at least historically, widely relied on) property, the POD
> >> convention is less arbitrary.  That's probably why it's apparently
> >> quite popular (as evident from the Stack Exchange thread).
> >
> > Yes, all of them are somewhat arbitrary.  This one seems like a poor
> > choice, for the reasons Jon and I gave in the other thread.  IMO
> > 'aggregate' would be a better property to use.
> >
> > Which Stack Exchange thread do you mean?
>
> The one I mentioned here:
>https://gcc.gnu.org/ml/gcc-patches/2019-07/msg01472.html
>
> i.e. the most popular answer to the question
>
>When should you use a class vs a struct in C++?
>
> is
>
>I would recommend using structs as plain-old-data structures
>without any class-like features, and using classes as aggregate
>data structures with private data and member functions.
>
>https://stackoverflow.com/questions/54585
>
> My goal with the warning is to provide a way to help enforce
> the convention we know is in use, both in GCC (at least until
> it's removed) and in other projects.  I'm not interested in
> inventing something new even if it's "better" (in some sense)
> than the convention we know is in use.

We don't know that this is in use as a strict division, anywhere.  All
the answers at that URL are using the term pretty loosely.  The top
answer says, "I would recommend using structs as plain-old-data
structures without any class-like features, and using classes as
aggregate data structures with private data and member functions."

A struct containing a std::string is not using any class-like
features, but is not a POD.  I oppose a coding convention that
requires the user to change that to a class with public: at the top.
And so I oppose introducing warnings to enforce such a convention.

> That said, as the C++ standard itself says, PODs (or in recent
> revisions, trivial standard-layout types) are most commonly
> associated with interoperability with C (and other languages).
> Aggregates can have data members of types that don't exist in
> C (references, pointers to members).  So a convention that uses
> the keyword 'struct' to reflect the common interoperability
> aspect or that wants to convey that a C++ struct looks and feels
> like one would expect of a C struct, will want to be based on
> the concept of POD, not that of an aggregate.

Note that the mention of pointers to members was removed from the POD
definition in C++03.  And then the term itself was deemed not a useful
category.

To me, the important C-like look and feel of a 'struct' is that it is
intended for users to access the data directly, rather than through
member functions.  A struct with a string qualif

Re: [C++ Patch] One more cp_expr_loc_or_input_loc

2019-08-05 Thread Jason Merrill
OK.

On Mon, Aug 5, 2019 at 5:09 PM Paolo Carlini  wrote:
>
> Hi,
>
> just spotted an additional error which can benefit from
> cp_expr_loc_or_input_loc. Tested x86_64-linux.
>
> Thanks, Paolo.
>
> 
>


[wwwdocs] Update C++ DR table

2019-08-05 Thread Marek Polacek
A couple of new DRs, plus some status updates.

Applied to CVS.

Index: cxx-dr-status.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/projects/cxx-dr-status.html,v
retrieving revision 1.25
diff -u -r1.25 cxx-dr-status.html
--- cxx-dr-status.html  1 Aug 2019 15:04:03 -   1.25
+++ cxx-dr-status.html  5 Aug 2019 22:14:44 -
@@ -4793,11 +4793,11 @@
   ?
   
 
-
+
   http://wg21.link/cwg682";>682
-  drafting
+  tentatively ready
   Missing description of lookup of template aliases
-  -
+  ?
   
 
 
@@ -12897,11 +12897,11 @@
   ?
   
 
-
+
   http://wg21.link/cwg1839";>1839
-  drafting
+  review
   Lookup of block-scope extern declarations
-  -
+  ?
   https://gcc.gnu.org/PR86181";>PR86181
 
 
@@ -15473,11 +15473,11 @@
   ?
   
 
-
+
   http://wg21.link/cwg2207";>2207
-  drafting
+  tentatively ready
   Alignment of allocation function return value
-  -
+  ?
   
 
 
@@ -15620,11 +15620,11 @@
   ?
   
 
-
+
   http://wg21.link/cwg2228";>2228
-  drafting
+  review
   Ambiguity resolution for cast to function type
-  -
+  ?
   
 
 
@@ -15657,7 +15657,7 @@
 
 
   http://wg21.link/cwg2233";>2233
-  tentatively ready
+  DRWP
   Function parameter packs following default arguments
   ?
   
@@ -15769,7 +15769,7 @@
 
 
   http://wg21.link/cwg2249";>2249
-  tentatively ready
+  DRWP
   identifiers and id-expressions
   ?
   
@@ -16021,7 +16021,7 @@
 
 
   http://wg21.link/cwg2285";>2285
-  tentatively ready
+  DRWP
   Issues with structured bindings
   ?
   
@@ -16124,11 +16124,11 @@
   ?
   
 
-
+
   http://wg21.link/cwg2300";>2300
-  drafting
+  tentatively ready
   Lambdas in multiple definitions
-  -
+  ?
   
 
 
@@ -16483,7 +16483,7 @@
 
 
   http://wg21.link/cwg2351";>2351
-  tentatively ready
+  DRWP
   void{}
   ?
   
@@ -16518,7 +16518,7 @@
 
 
   http://wg21.link/cwg2356";>2356
-  tentatively ready
+  DRWP
   Base class copy and move constructors should not be inherited
   ?
   
@@ -16588,7 +16588,7 @@
 
 
   http://wg21.link/cwg2366";>2366
-  drafting
+  tentatively ready
   Can default initialization be constant initialization?
   ?
   
@@ -16656,11 +16656,11 @@
   ?
   
 
-
+
   http://wg21.link/cwg2376";>2376
-  drafting
+  tentatively ready
   Class template argument deduction with array declarator
-  -
+  ?
   
 
 
@@ -16740,12 +16740,12 @@
   ?
   
 
-
+
   http://wg21.link/cwg2388";>2388
-  drafting
+  NAD
   Applicability of contract-attribute-specifiers
-  -
-  
+  Yes
+  contracts removed
 
 
   http://wg21.link/cwg2389";>2389
@@ -16754,11 +16754,11 @@
   -
   
 
-
+
   http://wg21.link/cwg2390";>2390
-  drafting
+  tentatively ready
   Is the argument of __has_cpp_attribute 
macro-expanded?
-  -
+  ?
   
 
 
@@ -16824,11 +16824,11 @@
   -
   
 
-
+
   http://wg21.link/cwg2400";>2400
-  drafting
+  tentatively ready
   Constexpr virtual functions and temporary objects
-  -
+  ?
   
 
 
@@ -16845,11 +16845,130 @@
   -
   
 
+
+  http://wg21.link/cwg2403";>2403
+  open
+  Temporary materialization and base/member initialization
+  -
+  
+
+
+  http://wg21.link/cwg2404";>2404
+  tentatively ready
+  [[no_unique_address]] and allocation order 
+  ?
+  
+
+
+  http://wg21.link/cwg2405";>2405
+  drafting
+  Additional type-dependent expressions
+  -
+  
+
+
+  http://wg21.link/cwg2406";>2406
+  tentatively ready
+  [[fallthrough]] attribute and iteration statements
+  ?
+  
+
+
+  http://wg21.link/cwg2407";>2407
+  review
+  Missing entry in Annex C for defaulted comparison operators
+  ?
+  
+
+
+  http://wg21.link/cwg2408";>2408
+  open
+  Temporaries and previously-initialized elements in aggregate 
initialization
+  -
+  
+
+
+  http://wg21.link/cwg2409";>2409
+  drafting
+  Explicit specializations of constexpr static data members
+  -
+  
+
+
+  http://wg21.link/cwg2410";>2410
+  review
+  Implicit calls of immediate functions
+  ?
+  
+
+
+  http://wg21.link/cwg2411";>2411
+  open
+  Comparison of pointers to members in template non-type arguments
+  -
+  
+
+
+  http://wg21

Re: [PATCH 02/10, OpenACC] Add OpenACC target kinds for decomposed kernels regions

2019-08-05 Thread Kwok Cheung Yeung
I have run the whole patch series through check_GNU_style.sh and fixed 
up the formatting where indicated. Do I need to post the reformatted 
patchset?


Thanks

Kwok

On 18/07/2019 10:24 am, Jakub Jelinek wrote:

On Wed, Jul 17, 2019 at 10:04:10PM +0100, Kwok Cheung Yeung wrote:

@@ -2319,7 +2339,8 @@ scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
  {
omp_context *tgt = enclosing_target_ctx (outer_ctx);

-  if (!tgt || is_oacc_parallel (tgt))
+  if (!tgt || (is_oacc_parallel (tgt)
+&& !was_originally_oacc_kernels (tgt)))
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
  {
char const *check = NULL;


Please watch up formatting, the above doesn't use tabs where it should.
Have you run the series through contrib/check_GNU_style.sh ?

Otherwise, no concerns about this particular patch, assuming Thomas is ok
with it.

Jakub



Re: [PATCH 06/10, OpenACC] Adjust parallelism of loops in gang-single parts of OpenACC kernels regions

2019-08-05 Thread Kwok Cheung Yeung
The change to patch 04 (Turn OpenACC kernels regions into a sequence of 
parallel regions) necessitates an additional include of 
'diagnostic-core.h' in omp-oacc-kernels.c, as it is no longer indirectly 
included by 'cp/cp-tree.h'.


Kwok

On 17/07/2019 10:12 pm, Kwok Cheung Yeung wrote:

Loops in gang-single parts of kernels regions cannot be executed in
gang-redundant mode. If the user specified gang clauses on such loops, 
emit an error and remove these clauses. Adjust automatic partitioning to 
exclude gang partitioning in gang-single regions.




Re: [PATCH 04/10, OpenACC] Turn OpenACC kernels regions into a sequence of, parallel regions

2019-08-05 Thread Kwok Cheung Yeung

On 18/07/2019 10:30 am, Jakub Jelinek wrote:

On Wed, Jul 17, 2019 at 10:06:07PM +0100, Kwok Cheung Yeung wrote:

--- a/gcc/omp-oacc-kernels.c
+++ b/gcc/omp-oacc-kernels.c
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "backend.h"
  #include "target.h"
  #include "tree.h"
+#include "cp/cp-tree.h"


No, you certainly don't want to do this.  Use langhooks if needed, though
that can be only for stuff done before IPA.  After IPA, because of LTO FE, you
must not rely on anything that is not in the IL generically.



I have modified the patch to use the get_generic_function_decl langhook 
to determine whether current_function_decl is an instantiation of a 
template (in this case, we don't care what the generic decl is - just 
whether the function decl has one).



+  /* Build the gang-single region.  */
+  gimple *single_region
+= gimple_build_omp_target (
+NULL,
+GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE,
+gang_single_clause);


Formatting, both lack of tab uses, and ( at the end of line is very ugly.
Either try to use shorter GF_OMP_TARGET_KIND names, or say set a local
variable to that value and use that as an argument to the function to make
it shorter and more readable.



I have fixed this and other formatting problems in the patch, though 
this particular code is modified in patch 08 (New OpenACC kernels region 
decompose algorithm) anyway.


Kwok


2019-07-16  Gergö Barany  
Kwok Cheung Yeung  

gcc/
* omp-oacc-kernels.c: Include langhooks.h.
(top_level_omp_for_in_stmt): New function.
(make_gang_single_region): Likewise.
(transform_kernels_loop_clauses, make_gang_parallel_loop_region):
Likewise.
(flatten_binds): Likewise.
(make_data_region_try_statement): Likewise.
(maybe_build_inner_data_region): Likewise.
(decompose_kernels_region_body): Likewise.
(transform_kernels_region): Delegate to decompose_kernels_region_body
and make_data_region_try_statement.

gcc/testsuite/
* c-c++-common/goacc/kernels-conversion.c: Test for a gang-single
region.
* gfortran.dg/goacc/kernels-conversion.f95: Likewise.
---
 gcc/omp-oacc-kernels.c | 562 
-

 .../c-c++-common/goacc/kernels-conversion.c|  11 +-
 .../gfortran.dg/goacc/kernels-conversion.f95   |  11 +-
 3 files changed, 561 insertions(+), 23 deletions(-)

diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c
index 5ec24f3..03c18dc 100644
--- a/gcc/omp-oacc-kernels.c
+++ b/gcc/omp-oacc-kernels.c
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "backend.h"
 #include "target.h"
 #include "tree.h"
+#include "langhooks.h"
 #include "gimple.h"
 #include "tree-pass.h"
 #include "cgraph.h"
@@ -45,16 +46,552 @@ along with GCC; see the file COPYING3.  If not see
For now, the translation is as follows:
- The entire kernels region is turned into a data region with clauses
  taken from the kernels region.  New "create" clauses are added 
for all

- variables declared at the top level in the kernels region.  */
+ variables declared at the top level in the kernels region.
+   - Any loop annotated with an OpenACC loop directive is wrapped in a new
+ parallel region.  Gang/worker/vector annotations are copied from the
+ original kernels region if present.
+ * Loops without an explicit "independent" or "seq" annotation get an
+   "auto" annotation; other annotations are preserved on the loop or
+   moved to the new surrounding parallel region.  Which annotations are
+   moved is determined by the constraints in the OpenACC spec; for
+   example, loops in the kernels region may have a gang clause, but
+   such annotations must now be moved to the new parallel region.
+   - Any sequences of other code (non-loops, non-OpenACC loops) are wrapped
+ in new "gang-single" parallel regions: Worker/vector annotations are
+ copied from the original kernels region if present, but num_gangs is
+ explicitly set to 1.  */
+
+/* Helper function for decompose_kernels_region_body.  If STMT contains a
+   "top-level" OMP_FOR statement, returns a pointer to that statement;
+   returns NULL otherwise.
+
+   A "top-level" OMP_FOR statement is one that is possibly accompanied by
+   small snippets of setup code.  Specifically, this function accepts an
+   OMP_FOR possibly wrapped in a singleton bind and a singleton try
+   statement to allow for a local loop variable, but not an OMP_FOR
+   statement nested in any other constructs.  Alternatively, it accepts a
+   non-singleton bind containing only assignments and then an OMP_FOR
+   statement at the very end.  The former style can be generated by the C
+   frontend, the latter by the Fortran frontend.  */
+
+static gimple *
+top_level_omp_for_in_stmt (gimple *stmt)
+{
+  if (gimple_code (s

Re: [PATCH 1/3] add -Wstruct-not-pod, -Wclass-is-pod, -Wmismatched-tags (PR 61339)

2019-08-05 Thread Martin Sebor

On 8/5/19 1:25 PM, Jason Merrill wrote:

On 8/1/19 7:35 PM, Martin Sebor wrote:

On 8/1/19 12:09 PM, Jason Merrill wrote:

On 7/22/19 12:34 PM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-07/msg00622.html

On 7/8/19 3:58 PM, Martin Sebor wrote:

The attached patch implements three new warnings:

  *  -Wstruct-not-pod triggers for struct definitions that are not
 POD structs,
  *  -Wclass-is-pod triggers for class definitions that satisfy
 the requirements on POD structs, and
  *  -Wmismatched-tags that triggers for class and struct declarations
 with class-key that doesn't match either their definition or
 the first declaration (if no definition is provided).

The implementation of -Wclass-is-pod and -Wstruct-not-pod is fairly
straightforward but the -Wmismatched-tags solution is slightly 
unusual.
It collects struct and class declarations first and diagnoses 
mismatches

only after the whole tramslation unit has been processed.  This is so
that the definition of a class can guide which declarations to 
diagnose

no matter which come first.


As Jonathan and I were saying in the connected discussion, the *pod 
warnings enforce questionable coding guidelines, so I'd rather not 
have them in the compiler.


What specifically do you consider questionable?



When defining a new class one has to decide whether to use 'struct'
and when to use 'class'.  Since there's no difference, adopting any
convention to help make a consistent choice seems like a natural
thing to do.  You could say (as I think someone in this thread did):
"make it 'class' when it has a ctor or dtor."  Someone else might
add: "or assignment."  Someone else still might go even further
and include "private members or bases or virtual functions."  Since
all the type definitions end up equivalent regardless of the class-key,
all these choices seem equally reasonable.  But all of them are also
arbitrary.  The only difference is that by being based on a common
(and at least historically, widely relied on) property, the POD
convention is less arbitrary.  That's probably why it's apparently
quite popular (as evident from the Stack Exchange thread).


Yes, all of them are somewhat arbitrary.  This one seems like a poor 
choice, for the reasons Jon and I gave in the other thread.  IMO 
'aggregate' would be a better property to use.


Which Stack Exchange thread do you mean?


The one I mentioned here:
  https://gcc.gnu.org/ml/gcc-patches/2019-07/msg01472.html

i.e. the most popular answer to the question

  When should you use a class vs a struct in C++?

is

  I would recommend using structs as plain-old-data structures
  without any class-like features, and using classes as aggregate
  data structures with private data and member functions.

  https://stackoverflow.com/questions/54585

My goal with the warning is to provide a way to help enforce
the convention we know is in use, both in GCC (at least until
it's removed) and in other projects.  I'm not interested in
inventing something new even if it's "better" (in some sense)
than the convention we know is in use.

That said, as the C++ standard itself says, PODs (or in recent
revisions, trivial standard-layout types) are most commonly
associated with interoperability with C (and other languages).
Aggregates can have data members of types that don't exist in
C (references, pointers to members).  So a convention that uses
the keyword 'struct' to reflect the common interoperability
aspect or that wants to convey that a C++ struct looks and feels
like one would expect of a C struct, will want to be based on
the concept of POD, not that of an aggregate.

-Wmismatched-tags is useful to have, given the MSVC/clang situation, 
but I wonder about memory consumption from all the record keeping.  
Do you have any overhead measurements?


I did think about the overhead but not knowing if the patch would
even be considered I didn't spend time optimizing it.

In an instrumented build of GCC with the patch I just did I have
collected the following stats for the number of elements in
the rec2loc hash table (for 998 files):

   rec2loc elements   locvec elements
   min:   0 0
   max:   2,785 3,303
   mean:    537 1,043
   median:  526 1,080

The locvec data are based on totals for the whole hash table, so
in the worst case the hash_map has 2,785 elements, and the sum of
all locvec lengths in all those elements is 3,303.  Each rec2loc
element takes up 16 bytes, plus the size of the locvec data.

If my math is right (which doesn't happen too often) in the worst
case the bookkeeping overhead is 43KB for the hash_map plus on
the order of 140KB for the vectors (just doubling the number of
elements to account for capacity = 2 X size, times 24 for
the flag_func_loc_t element type).  So 183K in the worst case
in GCC.


183k cumulative over all the GCC sources doesn't sound excessive, but...


There are a few ways t

Re: [PATCH] RISC-V: Handle extensions combination correctly in multilib-generator.

2019-08-05 Thread Jim Wilson
On Mon, Aug 5, 2019 at 1:20 AM Kito Cheng  wrote:
> gcc/ChangeLog
> * gcc/config/riscv/multilib-generator: Handle extensions
> combination correctly.

A ChangeLog entry should generally describe what a patch changes, not
what it does.  So this should mention a new variable canonical_order,
a new function arch_canonicalize, and the call to pass alts through
it.

> +raise Exception("Unexpect arch: `%d`" % arch[:5])

Unexpect -> Unexpected

> +  long_ext_prefixs = ['z', 's', 'h', 'x']

prefixs -> prefixes

> +  # Concat rest multi-char extensions.

rest multi-char -> rest of the multi-char

This looks good to me with the minor English improvements Andreas and
I suggested.

Jim


Re: [PATCH V5, rs6000] Support vrotr3 for int vector types

2019-08-05 Thread Segher Boessenkool
On Mon, Aug 05, 2019 at 11:41:41AM +0800, Kewen.Lin wrote:
> on 2019/8/4 上午4:52, Segher Boessenkool wrote:
> > On Fri, Aug 02, 2019 at 04:59:44PM +0800, Kewen.Lin wrote:
> >> As to the predicate name and usage, I checked the current vector shifts, 
> >> they don't need to check const_vector specially (like right to left 
> >> conversion), excepting for the one "vec_shr_", but it checks for
> >> scalar const int.
> > 
> > I don't understand why we want to expand rotate-by-vector-of-immediates
> > if we have no insns for that?  If you just use vint_operand, what happens
> > then?
> 
> You are right, if we just use vint_operand, the functionality should be the
> same, the only small difference is the adjusted constant rotation number 
> isn't masked, but it would be fine for functionality.
> 
> One example for ULL >r 8, with const vector handling, it gets
>   xxspltib 33,56
> 
> Without the handling, it gets 
>   xxsplitb 33,248
> 
> But I agree that it's trivial and unified it as below attached patch.

There are two cases: either all elements are rotated by the same amount,
or they are not.  When they are, on p8 and later we can always use
xxspltib, which allows immediates 0..255, and the rotates look only at
the low bits they need, in that element, for that element (so you can
always splat it to all bytes, all but the low-order bytes are just
ignored by the rotate insns; before p8, we use vsplti[bhw], and those
allow -16..15, so for vrlw you do *not* want to mask it with 31.
There is some mechanics with easy_altivec_constant that should help
here.  Maybe it can use some improvement.

The other case is if not all shift counts are the same.  I'm not sure
we actually care much about this case :-)

> >> +/* { dg-options "-O3" } */
> >> +/* { dg-require-effective-target powerpc_altivec_ok } */
> > 
> > If you use altivec_ok, you need to use -maltivec in the options, too.
> > This test should probably work with -O2 as well; use that, if possible.
> 
> Sorry, the test case depends on vectorization which isn't enabled at -O2
> by default.

Ah yes, that is pretty obvious.

> >> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> > 
> > I don't think we need this anymore?  Not sure.
> 
> I thought -mdejagnu-cpu=power8 can only ensure power8 cpu setting takes
> preference, but can't guarantee the current driver supports power8
> complication.  As your comments, I guess since gcc configuration don't
> have without-cpu= etc., the power8 support should be always guaranteed?

The compiler always supports all CPUs.

If you are using something like -maltivec, things are different: you
might have selected a CPU that does not allow -maltivec, so we do need
altivec_ok.  But if you use -mcpu=power8 (or -mdejagnu-cpu=power8), you
can use all p8 insns, including the vector ones (unless you disable
them again with -mno-vsx or similar; just don't do that ;-) )

[ In the past, it was possible to configure the compiler without support
for p8 vector insns, if your assembler doesn't support them.  We do not
do this anymore: now, if your compiler does support things that your
assembler does not, you'll get errors from that assembler if you try to
use those instructions.  Which is fine, just make sure you use a new
enough assembler for the GCC version you use.  This always has been true,
but with a somewhat larger window of allowed versions.  But this "don't
support all insns if the assembler does not" means we need to test a lot
more configurations (or leave them untested, even worse).

As a side effect, most *_ok now do nothing.  *_hw of course is still
needed to check if the test system allows running the testcase.  ]

> gcc/ChangeLog
> 
> 2019-08-05  Kewen Lin  
> 
>   * config/rs6000/vector.md (vrotr3): New define_expand.
> 
> gcc/testsuite/ChangeLog
> 
> 2019-08-05  Kewen Lin  
> 
>   * gcc.target/powerpc/vec_rotate-1.c: New test.
>   * gcc.target/powerpc/vec_rotate-2.c: New test.
>   * gcc.target/powerpc/vec_rotate-3.c: New test.
>   * gcc.target/powerpc/vec_rotate-4.c: New test.

Approved for trunk (with or without the p8vector_ok change).  Thank you!


Segher


[C++ Patch] One more cp_expr_loc_or_input_loc

2019-08-05 Thread Paolo Carlini

Hi,

just spotted an additional error which can benefit from 
cp_expr_loc_or_input_loc. Tested x86_64-linux.


Thanks, Paolo.



/cp
2019-08-05  Paolo Carlini  

* decl.c (check_array_designated_initializer): Use
cp_expr_loc_or_input_loc in one place.

/testsuite
2019-08-05  Paolo Carlini  

* g++.dg/cpp0x/desig1.C: Check location too.
Index: cp/decl.c
===
--- cp/decl.c   (revision 274124)
+++ cp/decl.c   (working copy)
@@ -5520,8 +5520,9 @@ check_array_designated_initializer (constructor_el
sorry ("non-trivial designated initializers not supported");
}
   else
-   error ("C99 designator %qE is not an integral constant-expression",
-  ce->index);
+   error_at (cp_expr_loc_or_input_loc (ce->index),
+ "C99 designator %qE is not an integral constant-expression",
+ ce->index);
 
   return false;
 }
Index: testsuite/g++.dg/cpp0x/desig1.C
===
--- testsuite/g++.dg/cpp0x/desig1.C (revision 274123)
+++ testsuite/g++.dg/cpp0x/desig1.C (working copy)
@@ -25,5 +25,5 @@ struct C
   constexpr operator SE() const { return SE::se0; }
 };
 
-int c[] = { [C()] = 0 }; // { dg-error "integral constant-expression" }
+int c[] = { [C()] = 0 }; // { dg-error "14:C99 designator .C\\\(\\\). is not 
an integral constant-expression" }
 // { dg-warning "does not allow C99 designated 
initializers" "" { target *-*-* } .-1 }


PING^1 [PATCH] i386: Separate costs of pseudo registers from hard registers

2019-08-05 Thread H.J. Lu
On Tue, Jul 23, 2019 at 2:57 PM H.J. Lu  wrote:
>
> On Mon, Jun 24, 2019 at 9:16 AM H.J. Lu  wrote:
> >
> > On Mon, Jun 24, 2019 at 6:37 AM Richard Biener  wrote:
> > >
> > > On Thu, 20 Jun 2019, Jan Hubicka wrote:
> > >
> > > > > > Currently, costs of moves are also used for costs of RTL 
> > > > > > expressions.   This
> > > > > > patch:
> > > > > >
> > > > > > https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00405.html
> > > > > >
> > > > > > includes:
> > > > > >
> > > > > > diff --git a/gcc/config/i386/x86-tune-costs.h 
> > > > > > b/gcc/config/i386/x86-tune-costs.h
> > > > > > index e943d13..8409a5f 100644
> > > > > > --- a/gcc/config/i386/x86-tune-costs.h
> > > > > > +++ b/gcc/config/i386/x86-tune-costs.h
> > > > > > @@ -1557,7 +1557,7 @@ struct processor_costs skylake_cost = {
> > > > > >{4, 4, 4}, /* cost of loading integer registers
> > > > > >  in QImode, HImode and SImode.
> > > > > >  Relative to reg-reg move (2).  */
> > > > > > -  {6, 6, 6}, /* cost of storing integer registers */
> > > > > > +  {6, 6, 3}, /* cost of storing integer registers */
> > > > > >2, /* cost of reg,reg fld/fst */
> > > > > >{6, 6, 8}, /* cost of loading fp registers
> > > > > >  in SFmode, DFmode and XFmode */
> > > >
> > > > Well, it seems that the patch was fixing things on wrong spot - the
> > > > tables are intended to be mostly latency based. I think we ought to
> > > > document divergences from these including benchmarks where the change
> > > > helped. Otherwise it is very hard to figure out why the entry does not
> > > > match the reality.
> > > > > >
> > > > > > It lowered the cost for SImode store and made it cheaper than 
> > > > > > SSE<->integer
> > > > > > register move.  It caused a regression:
> > > > > >
> > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90878
> > > > > >
> > > > > > Since the cost for SImode store is also used to compute scalar_store
> > > > > > in ix86_builtin_vectorization_cost, it changed loop costs in
> > > > > >
> > > > > > void
> > > > > > foo (long p2, long *diag, long d, long i)
> > > > > > {
> > > > > >   long k;
> > > > > >   k = p2 < 3 ? p2 + p2 : p2 + 3;
> > > > > >   while (i < k)
> > > > > > diag[i++] = d;
> > > > > > }
> > > > > >
> > > > > > As the result, the loop is unrolled 4 times with -O3 -march=skylake,
> > > > > > instead of 3.
> > > > > >
> > > > > > My patch separates costs of moves from costs of RTL expressions.  
> > > > > > We have
> > > > > > a follow up patch which restores the cost for SImode store back to 
> > > > > > 6 and leave
> > > > > > the cost of scalar_store unchanged.  It keeps loop unrolling 
> > > > > > unchanged and
> > > > > > improves powf performance in glibc by 30%.  We are collecting SPEC 
> > > > > > CPU 2017
> > > > > > data now.
> > > >
> > > > I have seen the problem with scalar_store with AMD tuning as well.
> > > > It seems to make SLP vectorizer to be happy about idea of turning
> > > > sequence of say integer tores into code which moves all the values into
> > > > AVX register and then does one vector store.
> > > >
> > > > The cost basically compare cost of N scalar stores to 1 scalar store +
> > > > vector construction. Vector construction then N*sse_op+addss.
> > > >
> > > > With testcase:
> > > >
> > > > short array[8];
> > > > test (short a,short b,short c,short d,short e,short f,short g,short h)
> > > > {
> > > >   array[0]=a;
> > > >   array[1]=b;
> > > >   array[2]=c;
> > > >   array[3]=d;
> > > >   array[4]=e;
> > > >   array[5]=f;
> > > >   array[6]=g;
> > > >   array[7]=h;
> > > > }
> > > > int iarray[8];
> > > > test2 (int a,int b,int c,int d,int e,int f,int g,int h)
> > > > {
> > > >   iarray[0]=a;
> > > >   iarray[1]=b;
> > > >   iarray[2]=c;
> > > >   iarray[3]=d;
> > > >   iarray[4]=e;
> > > >   iarray[5]=f;
> > > >   iarray[6]=g;
> > > >   iarray[7]=h;
> > > > }
> > > >
> > > > I get the following codegen:
> > > >
> > > >
> > > > test:
> > > > vmovd   %edi, %xmm0
> > > > vmovd   %edx, %xmm2
> > > > vmovd   %r8d, %xmm1
> > > > vmovd   8(%rsp), %xmm3
> > > > vpinsrw $1, 16(%rsp), %xmm3, %xmm3
> > > > vpinsrw $1, %esi, %xmm0, %xmm0
> > > > vpinsrw $1, %ecx, %xmm2, %xmm2
> > > > vpinsrw $1, %r9d, %xmm1, %xmm1
> > > > vpunpckldq  %xmm2, %xmm0, %xmm0
> > > > vpunpckldq  %xmm3, %xmm1, %xmm1
> > > > vpunpcklqdq %xmm1, %xmm0, %xmm0
> > > > vmovaps %xmm0, array(%rip)
> > > > ret
> > > >
> > > > test2:
> > > > vmovd   %r8d, %xmm5
> > > > vmovd   %edx, %xmm6
> > > > vmovd   %edi, %xmm7
> > > > vpinsrd $1, %r9d, %xmm5, %xmm1
> > > > vpinsrd $1, %ecx, %xmm6, %xmm3
> > > > vpinsrd $1, %esi, %xmm7, %xmm0
> > > > vpunpcklqdq %xmm3, %xmm0, %xmm0
> > > > vmovd   16(%rbp), %xmm4
> > > > vpinsrd $1, 24(%rbp), %xmm4, %xmm2
> > > > vpunpcklqdq %xmm2, %xmm1, %xmm1
> > > > vinserti128 $0x1, 

[PATCH] [LRA] Fix wrong-code PR 91109

2019-08-05 Thread Bernd Edlinger
Hi!


PR 91109 is a wrong-code bug, where LRA is using a scratch register
which is actually not available for use, and thus gets clobbered
when it should not.  That seems to be mostly because the live range
info of the cloned schatch register is not working the way how update_scrach_ops
sets up the new register.  Moreover for the new register there is
a much better alternative free register available, so that just not
trying the re-use the previous hard register assignment solves the problem.

For more background please see the bugzilla PR 91109.

Since I am able to reproduce this bug with latest gcc-9 branch, I want
to ask right away, if it is okay to back-port after a while.


Boot-strapped and reg-tested on x86_64-pc-linux-gnu and armv7-linux-gnueabihf.
Is it OK for trunk?


Thanks,
Bernd.


2019-07-30  Bernd Edlinger  

	PR tree-optimization/91109
	* lra-remat.c (update_scratch_ops): Remove assignment of the
	hard register.

Index: gcc/lra-remat.c
===
--- gcc/lra-remat.c	(revision 273767)
+++ gcc/lra-remat.c	(working copy)
@@ -1021,7 +1021,6 @@ get_hard_regs (struct lra_insn_reg *reg, int &nreg
 static void
 update_scratch_ops (rtx_insn *remat_insn)
 {
-  int hard_regno;
   lra_insn_recog_data_t id = lra_get_insn_recog_data (remat_insn);
   struct lra_static_insn_data *static_id = id->insn_static_data;
   for (int i = 0; i < static_id->n_operands; i++)
@@ -1032,17 +1031,9 @@ update_scratch_ops (rtx_insn *remat_insn)
   int regno = REGNO (*loc);
   if (! lra_former_scratch_p (regno))
 	continue;
-  hard_regno = reg_renumber[regno];
   *loc = lra_create_new_reg (GET_MODE (*loc), *loc,
  lra_get_allocno_class (regno),
  "scratch pseudo copy");
-  if (hard_regno >= 0)
-	{
-	  reg_renumber[REGNO (*loc)] = hard_regno;
-	  if (lra_dump_file)
-	fprintf (lra_dump_file, "	 Assigning the same %d to r%d\n",
-		 REGNO (*loc), hard_regno);
-	}
   lra_register_new_scratch_op (remat_insn, i, id->icode);
 }
   


[PATCH] Sync MIPS support from libffi master repository

2019-08-05 Thread Aurelien Jarno
This updates the libffi MIPS support up to commit 746dbe3a6a79, with the
exception of commit bd72848c7af9 which prefixes the ALIGN macro with
FFI_ for all ports.

These patches, with the exception of the softfloat one, have been used
on the Debian GCC packages for quite some time.

libffi/Changelog:

2019-08-05  Aurelien Jarno  

Import from upstream
* src/mips/ffi.c (ffi_call_O32, ffi_call_N32,
ffi_closure_mips_inner_O32, ffi_closure_mips_inner_N32): Adjust
interface.
(ffi_call_int): Renamed from ffi_call.
(ffi_call, ffi_call_go, ffi_prep_go_closure): New functions.
(ffi_prep_closure_loc): Define jr instruction for R6.
* src/mips/ffitarget.h (FFI_GO_CLOSURES): Define.
(FFI_TRAMPOLINE_SIZE): Define to 56 for N64.
Test for __linux__ instead of linux.
* src/mips/n32.S (ffi_go_closure_N32): New function.
(ffi_call_N32): Adjust code for softfloat.
(.set mips4): Guard with !defined(__mips_isa_rev) ||
(__mips_isa_rev<6).
* src/mips/o32.S (ffi_go_closure_O32): New function.
(ffi_call_O32): Adjust code for softfloat.

diff --git a/libffi/src/mips/ffi.c b/libffi/src/mips/ffi.c
index 5d0dd70cb32..70a2081b498 100644
--- a/libffi/src/mips/ffi.c
+++ b/libffi/src/mips/ffi.c
@@ -581,14 +581,15 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
 /* Low level routine for calling O32 functions */
 extern int ffi_call_O32(void (*)(char *, extended_cif *, int, int), 
extended_cif *, unsigned, 
-   unsigned, unsigned *, void (*)(void));
+   unsigned, unsigned *, void (*)(void), void *closure);
 
 /* Low level routine for calling N32 functions */
 extern int ffi_call_N32(void (*)(char *, extended_cif *, int, int), 
extended_cif *, unsigned, 
-   unsigned, void *, void (*)(void));
+   unsigned, void *, void (*)(void), void *closure);
 
-void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+void ffi_call_int(ffi_cif *cif, void (*fn)(void), void *rvalue, 
+ void **avalue, void *closure)
 {
   extended_cif ecif;
 
@@ -610,7 +611,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, 
void **avalue)
 case FFI_O32:
 case FFI_O32_SOFT_FLOAT:
   ffi_call_O32(ffi_prep_args, &ecif, cif->bytes, 
-  cif->flags, ecif.rvalue, fn);
+  cif->flags, ecif.rvalue, fn, closure);
   break;
 #endif
 
@@ -642,7 +643,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, 
void **avalue)
 #endif
  }
 ffi_call_N32(ffi_prep_args, &ecif, cif->bytes,
- cif->flags, rvalue_copy, fn);
+ cif->flags, rvalue_copy, fn, closure);
 if (copy_rvalue)
   memcpy(ecif.rvalue, rvalue_copy + copy_offset, cif->rtype->size);
   }
@@ -655,11 +656,27 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void 
*rvalue, void **avalue)
 }
 }
 
+void
+ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+void **avalue, void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+
 #if FFI_CLOSURES
 #if defined(FFI_MIPS_O32)
 extern void ffi_closure_O32(void);
+extern void ffi_go_closure_O32(void);
 #else
 extern void ffi_closure_N32(void);
+extern void ffi_go_closure_N32(void);
 #endif /* FFI_MIPS_O32 */
 
 ffi_status
@@ -698,7 +715,11 @@ ffi_prep_closure_loc (ffi_closure *closure,
   /* lui  $12,high(codeloc) */
   tramp[2] = 0x3c0c | ((unsigned)codeloc >> 16);
   /* jr   $25  */
+#if !defined(__mips_isa_rev) || (__mips_isa_rev<6)
   tramp[3] = 0x0328;
+#else
+  tramp[3] = 0x0329;
+#endif
   /* ori  $12,low(codeloc)  */
   tramp[4] = 0x358c | ((unsigned)codeloc & 0x);
 #else
@@ -726,7 +747,11 @@ ffi_prep_closure_loc (ffi_closure *closure,
   /* ori  $25,low(fn)  */
   tramp[10] = 0x3739 | ((unsigned long)fn  & 0x);
   /* jr   $25  */
+#if !defined(__mips_isa_rev) || (__mips_isa_rev<6)
   tramp[11] = 0x0328;
+#else
+  tramp[11] = 0x0329;
+#endif
   /* ori  $12,low(codeloc)  */
   tramp[12] = 0x358c | ((unsigned long)codeloc & 0x);
 
@@ -762,17 +787,17 @@ ffi_prep_closure_loc (ffi_closure *closure,
  * Based on the similar routine for sparc.
  */
 int
-ffi_closure_mips_inner_O32 (ffi_closure *closure,
+ffi_closure_mips_inner_O32 (ffi_cif *cif,
+void (*fun)(ffi_cif*, void*, void**, void*),
+   void *user_data,
void *rvalue, ffi_arg *ar,
double *fpr)
 {
-  ffi_cif *cif;
   void **avaluep;
   ffi_arg *avalue;
   ffi_type **arg_types;
   int i, avn, argn, seen_int;
 
-  cif = closure->cif;
   avalue = alloca (cif->nargs * sizeof (ffi_

Re: C++ PATCH for c++/91264 - detect modifying const objects in constexpr

2019-08-05 Thread Jason Merrill

On 7/31/19 3:26 PM, Marek Polacek wrote:

One of the features of constexpr is that it doesn't allow UB; and such UB must
be detected at compile-time.  So running your code in a context that requires
a constant expression should ensure that the code in question is free of UB.
In effect, constexpr can serve as a sanitizer.  E.g. this article describes in
in more detail:


[dcl.type.cv]p4 says "Any attempt to modify a const object during its lifetime
results in undefined behavior." However, as the article above points out, we
aren't detecting that case in constexpr evaluation.

This patch fixes that.  It's not that easy, though, because we have to keep in
mind [class.ctor]p5:
"A constructor can be invoked for a const, volatile or const volatile object.
const and volatile semantics are not applied on an object under construction.
They come into effect when the constructor for the most derived object ends."

I handled this by keeping a hash set which tracks objects under construction.
I considered other options, such as going up call_stack, but that wouldn't
work with trivial constructor/op=.  It was also interesting to find out that
the definition of TREE_HAS_CONSTRUCTOR says "When appearing in a FIELD_DECL,
it means that this field has been duly initialized in its constructor" though
nowhere in the codebase do we set TREE_HAS_CONSTRUCTOR on a FIELD_DECL as far
as I can see.  Unfortunately, using this bit proved useless for my needs here.



Also, be mindful of mutable subobjects.

Does this approach look like an appropriate strategy for tracking objects'
construction?


For scalar objects, we should be able to rely on INIT_EXPR vs. 
MODIFY_EXPR to distinguish between initialization and modification; for 
class objects, I wonder about setting a flag on the CONSTRUCTOR after 
initialization is complete to indicate that the value is now constant.


Jason


[Committed] PR fortran/91372 -- Fix parsing of DATA with implied do-loop

2019-08-05 Thread Steve Kargl
I've committed the attached patch as obvious.  My previous
patch was too strict (and incorrect) in the interpretation
of 

R837 data-stmt  is DATA data-stmt-set [ [ , ] data-stmt-set ] ...

The previous patch required whitespace after DATA, but it seems
that it id valid to do 'DATA(x(i),i=1,2)/1.,2./' where the implied
do-loop nestles up against DATA.

2019-08-05  Steven g. Kargl  

PR fortran/91372
* decl.c (gfc_match_data): Allow an implied do-loop to nestle against
DATA.

2019-08-05  Steven g. Kargl  

PR fortran/91372
* gfortran.dg/pr91372.f90: New test.

-- 
Steve
Index: gcc/fortran/decl.c
===
--- gcc/fortran/decl.c	(revision 274119)
+++ gcc/fortran/decl.c	(working copy)
@@ -624,9 +624,10 @@ gfc_match_data (void)
   char c;
 
   /* DATA has been matched.  In free form source code, the next character
- needs to be whitespace.  Check that here.  */
+ needs to be whitespace or '(' from an implied do-loop.  Check that
+ here.  */
   c = gfc_peek_ascii_char ();
-  if (gfc_current_form == FORM_FREE && !gfc_is_whitespace (c))
+  if (gfc_current_form == FORM_FREE && !gfc_is_whitespace (c) && c != '(')
 return MATCH_NO;
 
   /* Before parsing the rest of a DATA statement, check F2008:c1206.  */


Re: [patch][aarch64]: add intrinsics for vld1(q)_x4 and vst1(q)_x4

2019-08-05 Thread Jason Merrill

On 7/18/19 1:18 PM, James Greenhalgh wrote:

On Mon, Jun 10, 2019 at 06:21:05PM +0100, Sylvia Taylor wrote:

Greetings,

This patch adds the intrinsic functions for:
- vld1__x4
- vst1__x4
- vld1q__x4
- vst1q__x4

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk? If yes, I don't have any commit rights, so can someone
please commit it on my behalf.


Hi,

I'm concerned by this strategy for implementing the arm_neon.h builtins:


+__extension__ extern __inline int8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s8_x4 (const int8_t *__a)
+{
+  union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
+  __au.__o
+= __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __au.__i;
+}


As far as I know this is undefined behaviour in C++11. This was the best
resource I could find pointing to the relevant standards paragraphs.

   
https://stackoverflow.com/questions/11373203/accessing-inactive-union-member-and-undefined-behavior


Correct, it is undefined behavior in C++.


That said, GCC explicitly allows it, so maybe this is fine?

   
https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Optimize-Options.html#Type-punning


I don't know the relevant details of the TBAA implementation, but that 
certainly sounds like GCC uses C semantics for this pattern even in C++, 
so it should work as expected when compiled with GCC.


Jason


Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Uros Bizjak
On Mon, Aug 5, 2019 at 3:29 PM Richard Biener  wrote:

> > > > > > > (define_mode_iterator MAXMIN_IMODE [SI "TARGET_SSE4_1"] [DI 
> > > > > > > "TARGET_AVX512F"])
> > > > > > >
> > > > > > > and then we need to split DImode for 32bits, too.
> > > > > >
> > > > > > For now, please add "TARGET_64BIT && TARGET_AVX512F" for DImode
> > > > > > condition, I'll provide _doubleword splitter later.
> > > > >
> > > > > Shouldn't that be TARGET_AVX512VL instead?  Or does the insn use %g0 
> > > > > etc.
> > > > > to force use of %zmmN?
> > > >
> > > > It generates V4SI mode, so - yes, AVX512VL.
> > >
> > > case SMAX:
> > > case SMIN:
> > > case UMAX:
> > > case UMIN:
> > >   if ((mode == DImode && (!TARGET_64BIT || !TARGET_AVX512VL))
> > >   || (mode == SImode && !TARGET_SSE4_1))
> > > return false;
> > >
> > > so there's no way to use AVX512VL for 32bit?
> >
> > There is a way, but on 32bit targets, we need to split DImode
> > operation to a sequence of SImode operations for unconverted pattern.
> > This is of course doable, but somehow more complex than simply
> > emitting a DImode compare + DImode cmove, which is what current
> > splitter does. So, a follow-up task.
>
> Ah, OK.  So for the above condition we can elide the !TARGET_64BIT
> check we just need to properly split if we enable the scalar minmax
> pattern for DImode on 32bits, the STV conversion would go fine.

Yes, that is correct.

Uros.


Re: [PATCH 1/3] add -Wstruct-not-pod, -Wclass-is-pod, -Wmismatched-tags (PR 61339)

2019-08-05 Thread Jason Merrill

On 8/1/19 7:35 PM, Martin Sebor wrote:

On 8/1/19 12:09 PM, Jason Merrill wrote:

On 7/22/19 12:34 PM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-07/msg00622.html

On 7/8/19 3:58 PM, Martin Sebor wrote:

The attached patch implements three new warnings:

  *  -Wstruct-not-pod triggers for struct definitions that are not
 POD structs,
  *  -Wclass-is-pod triggers for class definitions that satisfy
 the requirements on POD structs, and
  *  -Wmismatched-tags that triggers for class and struct declarations
 with class-key that doesn't match either their definition or
 the first declaration (if no definition is provided).

The implementation of -Wclass-is-pod and -Wstruct-not-pod is fairly
straightforward but the -Wmismatched-tags solution is slightly unusual.
It collects struct and class declarations first and diagnoses 
mismatches

only after the whole tramslation unit has been processed.  This is so
that the definition of a class can guide which declarations to diagnose
no matter which come first.


As Jonathan and I were saying in the connected discussion, the *pod 
warnings enforce questionable coding guidelines, so I'd rather not 
have them in the compiler.


What specifically do you consider questionable?



When defining a new class one has to decide whether to use 'struct'
and when to use 'class'.  Since there's no difference, adopting any
convention to help make a consistent choice seems like a natural
thing to do.  You could say (as I think someone in this thread did):
"make it 'class' when it has a ctor or dtor."  Someone else might
add: "or assignment."  Someone else still might go even further
and include "private members or bases or virtual functions."  Since
all the type definitions end up equivalent regardless of the class-key,
all these choices seem equally reasonable.  But all of them are also
arbitrary.  The only difference is that by being based on a common
(and at least historically, widely relied on) property, the POD
convention is less arbitrary.  That's probably why it's apparently
quite popular (as evident from the Stack Exchange thread).


Yes, all of them are somewhat arbitrary.  This one seems like a poor 
choice, for the reasons Jon and I gave in the other thread.  IMO 
'aggregate' would be a better property to use.


Which Stack Exchange thread do you mean?

-Wmismatched-tags is useful to have, given the MSVC/clang situation, 
but I wonder about memory consumption from all the record keeping.  Do 
you have any overhead measurements?


I did think about the overhead but not knowing if the patch would
even be considered I didn't spend time optimizing it.

In an instrumented build of GCC with the patch I just did I have
collected the following stats for the number of elements in
the rec2loc hash table (for 998 files):

   rec2loc elements   locvec elements
   min:   0 0
   max:   2,785 3,303
   mean:    537 1,043
   median:  526 1,080

The locvec data are based on totals for the whole hash table, so
in the worst case the hash_map has 2,785 elements, and the sum of
all locvec lengths in all those elements is 3,303.  Each rec2loc
element takes up 16 bytes, plus the size of the locvec data.

If my math is right (which doesn't happen too often) in the worst
case the bookkeeping overhead is 43KB for the hash_map plus on
the order of 140KB for the vectors (just doubling the number of
elements to account for capacity = 2 X size, times 24 for
the flag_func_loc_t element type).  So 183K in the worst case
in GCC.


183k cumulative over all the GCC sources doesn't sound excessive, but...


There are a few ways to reduce that if it seems excessive.

One is by avoiding some waste in flag_func_loc_t which is

   pair>>

tree could come first and tag_types and the bool could share
space.  That should bring it down to 16 in LP64, for about
30% off the 183K.

Beyond that, we could change the algorithm to discard records
for prior declarations after the first definition has been seen
(and any mismatches diagnosed).

We could also only collect one record for each definition
in system headers rather than one for every declaration and
reference.


...these all sound worthwhile.

Jason


Re: [C++ Patch] Improve delete_sanity locations

2019-08-05 Thread Jason Merrill

On 8/2/19 8:27 AM, David Malcolm wrote:

On Fri, 2019-08-02 at 14:02 +0200, Paolo Carlini wrote:

Hi,

On 31/07/19 21:39, David Malcolm wrote:

[snip]

I don't care for "cp_expr_loc_or_loc".

By "_or_here" do you mean "or input_location"?

Calling it "cp_expr_loc_or_input_location" would spell out what it
does, but would be rather long.


Thanks for your feedback. At this point I don't feel very strongly
about
the issue, and, well, hopefully it's just temporary material, but I
tested the below which proposes the name cp_expr_loc_or_input_loc.
Seems
a good balance to me. What do you think?


I like it, FWIW.


OK.

Jason



Re: [PATCH]: Fix PR c++/88095, class template argument deduction for literal operator templates per P0732 for C++2a

2019-08-05 Thread Jason Merrill

On 8/2/19 9:59 AM, Tom Honermann wrote:

This patch fixes PR c++/88095:
- Bug 88095 - class nontype template parameter UDL string literals 
doesn't accepts deduction placeholder

- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88095.

It also addresses a latent issue; literal operator templates with 
template parameter packs of literal class type were previously accepted. 
  The patch corrects this and adds a test (udlit-class-nttp-neg.C).


In the change to gcc/cp/parser.c, it is not clear to me whether the 
'TREE_CODE (TREE_TYPE (parm)) == TEMPLATE_TYPE_PARM' comparison is 
necessary; it might be that 'CLASS_PLACEHOLDER_TEMPLATE' suffices on its 
own.


template_placeholder_p would be a shorter way to write these, but I 
think even better would be to just change CLASS_TYPE_P to 
MAYBE_CLASS_TYPE_P.  I'll make that change and commit the patch, since 
it looks like you don't have commit access yet.


If accepted, I'd like to request this change be applied to gcc 9 as it 
is needed for one of the char8_t remediation approaches documented in 
P1423, and may be helpful for existing code bases impacted by the 
char8_t changes adopted via P0482 for C++20.

- https://wg21.link/p1423#emulate


Seems reasonable.  It may be too late to make 9.2 at this point, though.

Jason


Re: [Committed] PR fortran/90985 -- DATA must be followed by whitespace

2019-08-05 Thread Steve Kargl
On Mon, Aug 05, 2019 at 12:01:36PM -0700, H.J. Lu wrote:
> On Fri, Aug 2, 2019 at 4:51 PM Steve Kargl
>  wrote:
> >
> > In free-form source code, DATA must be followed by whitespace.
> > This patch checks for whitespace, and if none is found, returns
> > MATCH_NO to give other matchers a chance to run.
> >
> > 2019-08-02  Steven G. Kargl  
> >
> > PR fortran/90985
> > * decl.c (gfc_match_data): In free-form code, DATA be followed by
> > whitespace.
> >
> > 2019-08-02  Steven G. Kargl  
> >
> > PR fortran/90985
> > * gfortran.dg/pr90985.f90: New test.
> >
> 
> This may have caused:
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91372
> 

Yep.

-- 
Steve


Re: [Committed] PR fortran/90985 -- DATA must be followed by whitespace

2019-08-05 Thread H.J. Lu
On Fri, Aug 2, 2019 at 4:51 PM Steve Kargl
 wrote:
>
> In free-form source code, DATA must be followed by whitespace.
> This patch checks for whitespace, and if none is found, returns
> MATCH_NO to give other matchers a chance to run.
>
> 2019-08-02  Steven G. Kargl  
>
> PR fortran/90985
> * decl.c (gfc_match_data): In free-form code, DATA be followed by
> whitespace.
>
> 2019-08-02  Steven G. Kargl  
>
> PR fortran/90985
> * gfortran.dg/pr90985.f90: New test.
>

This may have caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91372

-- 
H.J.


Re: C++ PATCH for c++/91338 - P1161R3: Deprecate a[b,c]

2019-08-05 Thread Marek Polacek
On Mon, Aug 05, 2019 at 02:31:15PM -0400, Jason Merrill wrote:
> On 8/3/19 9:54 PM, Marek Polacek wrote:
> > This patch implements P1161R3, Deprecate uses of the comma operator in
> > subscripting expressions:
> > 
> > which made its way to C++20.  New [depr.comma.subscript] shows:
> > 
> >void f(int *a, int b, int c) {
> >  a[b,c]; // deprecated
> >  a[(b,c)];   // OK
> >}
> > 
> > I've added a new option, -Wcomma-subscript, so that this warning can be
> > selectively turned off in c++2a, where it is enabled by default, and turned
> > on in lower standards, to check if your codebase has any occurrences of it,
> > without needing to change the -std=.
> 
> > +  /* -Wcomma-subscript is enabled by default in C++20.  */
> > +  if (!global_options_set.x_warn_comma_subscript)
> > +warn_comma_subscript = cxx_dialect >= cxx2a;
> 
> Let's add "&& warn_deprecated" here, so -Wno-deprecated also turns it off.

Fixed, and new test added.

> OK with that change.

I'm checking this in:

2019-08-05  Marek Polacek  

PR c++/91338 - Implement P1161R3: Deprecate a[b,c].
* c-opts.c (c_common_post_options): Enable -Wcomma-subscript by
default for C++2a, unless -Wno-deprecated.
* c.opt (Wcomma-subscript): New warning.

* parser.c (cp_parser_postfix_open_square_expression): Warn about uses
of a comma operator within a subscripting expression.
(cp_parser_skip_to_closing_square_bracket_1): New function, made out
of...
(cp_parser_skip_to_closing_square_bracket): ...this.

* doc/invoke.texi: Document -Wcomma-subscript.

* g++.dg/cpp2a/comma1.C: New test.
* g++.dg/cpp2a/comma2.C: New test.
* g++.dg/cpp2a/comma3.C: New test.
* g++.dg/cpp2a/comma4.C: New test.

diff --git gcc/c-family/c-opts.c gcc/c-family/c-opts.c
index e97bbdf5c6f..2d4af63cde4 100644
--- gcc/c-family/c-opts.c
+++ gcc/c-family/c-opts.c
@@ -916,6 +916,10 @@ c_common_post_options (const char **pfilename)
   if (!global_options_set.x_warn_register)
 warn_register = cxx_dialect >= cxx17;
 
+  /* -Wcomma-subscript is enabled by default in C++20.  */
+  if (!global_options_set.x_warn_comma_subscript)
+warn_comma_subscript = (cxx_dialect >= cxx2a && warn_deprecated);
+
   /* Declone C++ 'structors if -Os.  */
   if (flag_declone_ctor_dtor == -1)
 flag_declone_ctor_dtor = optimize_size;
diff --git gcc/c-family/c.opt gcc/c-family/c.opt
index 4c8b0026000..257cadfa5f1 100644
--- gcc/c-family/c.opt
+++ gcc/c-family/c.opt
@@ -428,6 +428,10 @@ Wclobbered
 C ObjC C++ ObjC++ Var(warn_clobbered) Warning EnabledBy(Wextra)
 Warn about variables that might be changed by \"longjmp\" or \"vfork\".
 
+Wcomma-subscript
+C++ ObjC++ Var(warn_comma_subscript) Warning
+Warn about uses of a comma operator within a subscripting expression.
+
 Wcomment
 C ObjC C++ ObjC++ CPP(warn_comments) CppReason(CPP_W_COMMENTS) 
Var(cpp_warn_comment) Init(0) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
 Warn about possibly nested block comments, and C++ comments spanning more than 
one physical line.
diff --git gcc/cp/parser.c gcc/cp/parser.c
index ebeffdb775f..1a5ae147b84 100644
--- gcc/cp/parser.c
+++ gcc/cp/parser.c
@@ -2669,6 +2669,8 @@ static bool cp_parser_init_statement_p
   (cp_parser *);
 static bool cp_parser_skip_to_closing_square_bracket
   (cp_parser *);
+static int cp_parser_skip_to_closing_square_bracket_1
+  (cp_parser *, enum cpp_ttype);
 
 /* Concept-related syntactic transformations */
 
@@ -7522,7 +7524,33 @@ cp_parser_postfix_open_square_expression (cp_parser 
*parser,
  index = cp_parser_braced_list (parser, &expr_nonconst_p);
}
   else
-   index = cp_parser_expression (parser);
+   {
+ /* [depr.comma.subscript]: A comma expression appearing as
+the expr-or-braced-init-list of a subscripting expression
+is deprecated.  A parenthesized comma expression is not
+deprecated.  */
+ if (warn_comma_subscript)
+   {
+ /* Save tokens so that we can put them back.  */
+ cp_lexer_save_tokens (parser->lexer);
+
+ /* Look for ',' that is not nested in () or {}.  */
+ if (cp_parser_skip_to_closing_square_bracket_1 (parser,
+ CPP_COMMA) == -1)
+   {
+ auto_diagnostic_group d;
+ warning_at (cp_lexer_peek_token (parser->lexer)->location,
+ OPT_Wcomma_subscript,
+ "top-level comma expression in array subscript "
+ "is deprecated");
+   }
+
+ /* Roll back the tokens we skipped.  */
+ cp_lexer_rollback_tokens (parser->lexer);
+   }
+
+ index = cp_parser_expression (parser);
+   }
 }
 
 

Re: Use predicates for RTL objects

2019-08-05 Thread Arvind Sankar
On Mon, Aug 05, 2019 at 01:29:26PM -0500, Segher Boessenkool wrote:
> On Mon, Aug 05, 2019 at 02:14:50PM -0400, Arvind Sankar wrote:
> > On Mon, Aug 05, 2019 at 12:48:46PM -0500, Segher Boessenkool wrote:
> > > > -/* Predicate yielding true iff X is an rtx for a double-int.  */
> > > > +/* Predicate yielding true iff X is an rtx for a floating point 
> > > > constant.  */
> > > >  #define CONST_DOUBLE_AS_FLOAT_P(X) \
> > > >(GET_CODE (X) == CONST_DOUBLE && GET_MODE (X) != VOIDmode)
> > > 
> > > Is const_double really only used for floating point these days?
> 
> > Are you asking if the CONST_DOUBLE_AS_INT_P possibility is dead code
> > now? That's beyond my current level of understanding of the code,
> > hopefully someone else will chime in.
> 
> I am asking if the change to this comment is correct.
> 

Ah. The comment was originally copy-pasted from the one for
CONST_DOUBLE_AS_INT_P, and it seemed clearly wrong. The comment for
CONST_DOUBLE_P (which only checks the code and not the machine mode)
says it can either be a double-int or a floating point constant.


Re: C++ PATCH for c++/91338 - P1161R3: Deprecate a[b,c]

2019-08-05 Thread Jason Merrill

On 8/3/19 9:54 PM, Marek Polacek wrote:

This patch implements P1161R3, Deprecate uses of the comma operator in
subscripting expressions:

which made its way to C++20.  New [depr.comma.subscript] shows:

   void f(int *a, int b, int c) {
 a[b,c]; // deprecated
 a[(b,c)];   // OK
   }

I've added a new option, -Wcomma-subscript, so that this warning can be
selectively turned off in c++2a, where it is enabled by default, and turned
on in lower standards, to check if your codebase has any occurrences of it,
without needing to change the -std=.



+  /* -Wcomma-subscript is enabled by default in C++20.  */
+  if (!global_options_set.x_warn_comma_subscript)
+warn_comma_subscript = cxx_dialect >= cxx2a;


Let's add "&& warn_deprecated" here, so -Wno-deprecated also turns it off.

OK with that change.

Jason


Re: Use predicates for RTL objects

2019-08-05 Thread Segher Boessenkool
On Mon, Aug 05, 2019 at 02:14:50PM -0400, Arvind Sankar wrote:
> On Mon, Aug 05, 2019 at 12:48:46PM -0500, Segher Boessenkool wrote:
> > First: do you have a copyright assignment?  See
> >   https://gcc.gnu.org/contribute.html
> > for instructions.
> 
> Nope, is this really substantial enough to warrant one?

Some might say it is, the sheer amount of code changed :-)  If some
maintainer okays it without assignment, that is fine with me of course.

This is also easier if it is all machine-generated and nice simple
patches :-)

> > This is easier to review, and even to commit as obvious, if you did a
> > patch per macro, certainly for the new macros; and, put the script in
> > contrib/, and then say with every patch that is just the output of the
> > script that that is the case.
> 
> Ok, I can split it up that way.

Thanks!

> > Please mention the exact macros you now use, and/or the actual rtx codes.
> That'll be easier once its split up by macro. I'll combine the backend +
> target code into one patch per macro.

Looking forward to it.

In general, writing changelogs for simpler patches is easier :-)

> > >   * gcc/config/rx/constraints.md, gcc/config/rx/rx.c, gcc/config/rx/rx.h: 
> > > Likewise.
> > 
> > And you normally have a separate entry for every file.
> 
> I tried to make the changelog a bit shorter by combining filenames into
> the same line-- should be easier all around to generate that with one entry 
> per file.

Yeah.

> > > -/* Predicate yielding true iff X is an rtx for a double-int.  */
> > > +/* Predicate yielding true iff X is an rtx for a floating point 
> > > constant.  */
> > >  #define CONST_DOUBLE_AS_FLOAT_P(X) \
> > >(GET_CODE (X) == CONST_DOUBLE && GET_MODE (X) != VOIDmode)
> > 
> > Is const_double really only used for floating point these days?

> Are you asking if the CONST_DOUBLE_AS_INT_P possibility is dead code
> now? That's beyond my current level of understanding of the code,
> hopefully someone else will chime in.

I am asking if the change to this comment is correct.

Thanks,


Segher


Re: Monotonically increasing counter (was Re: [Contrib PATCH] Add scripts to convert GCC repo from SVN to Git)

2019-08-05 Thread Jason Merrill

On 8/5/19 11:34 AM, Jakub Jelinek wrote:

On Mon, Aug 05, 2019 at 11:20:09AM -0400, Jason Merrill wrote:

I agree.  But for those who want a monotonically increasing
identifier, there's already one in git: CommitDate.  In the discussion
of this issue four years ago,


While commit date is monotonically increasing, it has the problem that at
certain dates there are very few commits, at others many.  When doing
bisection by hand, one does the revision computation (min+max)/2 in head
(it doesn't have to be precise of course, just roughly, and isn't perfect
either, because in svn all of trunk and branches contribute to the revision
numbers), with dates it would be division into further uneven chunks.


That's true, but is it a major problem?  If you have multiple commits on 
one day, you (can) have multiple binaries with the same date and 
different times, and you can adjust your heuristic for choosing the next 
bisection point accordingly.  Over longer periods, the number of commits 
per day averages out.



Could we tag the branchpoints, say when we branch off gcc 10, we tag the
branchpoint as tags/r11 and then we could use r11-123 as 123th commit on the
trunk since the branchpoint, and let bugzilla and web redirection handle
those rNN- style identifiers?
git describe --all --match 'r[0-9]*' ... | sed ...
to map hashes etc. to these rNN- identifiers and something to map them
back to hashes say for git web?


Well, having such tags would allow git describe to produce identifiers 
that you might find more readable.  For instance, if I do


git tag -a -m 'GCC 9 branchpoint' b9 $(git merge-base trunk gcc-9-branch)
git describe trunk

I get

b9-2260-gdb868bacf6a

i.e. commit #2260 since tag b9, with abbreviated hash gdb868bacf6a.

or if I do

git tag -a -m'Beginning of Time' r1 3cf0d8938a953ef13e57239613d42686f152b4fe
git describe --match r1 trunk

r1-170718-gdb868bacf6a

These tags don't need to be shared, this works fine locally.

Note that when you feed such an identifier to other git commands, they 
ignore the first two parts and just use the hash.


This might be a good alternative to dates for you, and people could 
refer to them interchangeably with raw hashes in the web interface.


Jason


Re: Use predicates for RTL objects

2019-08-05 Thread Arvind Sankar
On Mon, Aug 05, 2019 at 12:48:46PM -0500, Segher Boessenkool wrote:
> Hi Arvind,
> 
> First: do you have a copyright assignment?  See
>   https://gcc.gnu.org/contribute.html
> for instructions.

Nope, is this really substantial enough to warrant one?

> 
> This is easier to review, and even to commit as obvious, if you did a
> patch per macro, certainly for the new macros; and, put the script in
> contrib/, and then say with every patch that is just the output of the
> script that that is the case.

Ok, I can split it up that way.

> 
> Some (mostly changelog) comments:
> 
> On Mon, Aug 05, 2019 at 01:09:10PM -0400, Arvind Sankar wrote:
> > * gcc/alias.c, gcc/asan.c, gcc/bb-reorder.c, gcc/bt-load.c: Use 
> > predicate macros for rtx_code comparisons.
> 
> Many of your changelog lines are much too long.  Don't use more than 80
> columns (including the leading tab, which is 8 columns).
> 
> Please mention the exact macros you now use, and/or the actual rtx codes.
That'll be easier once its split up by macro. I'll combine the backend +
target code into one patch per macro.
> 
> Filenames are relative to the directory containing the changelog file
> itself, so you shouldn't have the gcc/ in those filenames.
> 
> > * gcc/config/microblaze/predicates.md, 
> 
> There shouldn't be trailing spaces.
> 
> > * gcc/config/rx/constraints.md, gcc/config/rx/rx.c, gcc/config/rx/rx.h: 
> > Likewise.
> 
> And you normally have a separate entry for every file.
> 

I tried to make the changelog a bit shorter by combining filenames into
the same line-- should be easier all around to generate that with one entry per 
file.
> > -/* Predicate yielding true iff X is an rtx for a double-int.  */
> > +/* Predicate yielding true iff X is an rtx for a floating point constant.  
> > */
> >  #define CONST_DOUBLE_AS_FLOAT_P(X) \
> >(GET_CODE (X) == CONST_DOUBLE && GET_MODE (X) != VOIDmode)
> 
> Is const_double really only used for floating point these days?
> 
> 
> Segher
Are you asking if the CONST_DOUBLE_AS_INT_P possibility is dead code
now? That's beyond my current level of understanding of the code,
hopefully someone else will chime in.


Re: [RFC][tree-vect]PR 88915: Further vectorize second loop when versioning

2019-08-05 Thread Andre Vieira (lists)

Hi Richard,

Thanks for the feedback! See comments inline.

On 01/08/2019 16:26, Richard Biener wrote:

On Tue, 30 Jul 2019, Andre Vieira (lists) wrote:




On 30/07/2019 13:16, Andre Vieira (lists) wrote:

Hi Richard,

I believe this is in line with what you were explaining to me earlier. The
one thing I haven't quite done here is the jump around for if there is no
prolog peeling. Though I think skip_vectors introduces the jumping we need.

The main gist of it is:
1) if '--param vect-epilogues-nomask=1' is passed we refrain from adding the
versioning threshold check when versioning a loop at first,
2) later we allow the vectorizer to use skip_vectors to basically check
niters to be able to skip the vectorized loop on to the right epilogue,
3) after vectorizing the epilogue, we check whether this was a versioned
loop and whether we skipped adding the versioning threshold, if so we add a
threshold based on the last VF

There is a caveat here, if we don't vectorize the epilogue, because say our
architecture doesn't know how, we will end up with a regression.
Let's say we have a loop for which our target can vectorize for 32x but not
16x, we get:

if (alias & threshold_for_16x ())
{
    if (niters > 31)
      vectorized_31 ();
    else
      scalar ();
}
else
   scalar ();

Imagine our iteration count is 18, all we hit is the scalar loop, but now we
go through 1x alias and 2x niters. Whereas potentially we could have done
with just 1x niters.


True.  Note we should swap the checks to

   if (threshold_for_16x && alias)


I agree, I just haven't figured out what the best way of doing it. I am 
trying TRUTH_ANDIF_EXPR now, but fold_build2 turns that into a 
TRUTH_AND_EXPR.  Can I assume it does that for aarch64 because it can 
use conditional compare to merge the two checks? To be fair the code 
generated for aarch64 for the two checks I am looking at doesn't look 
too bad.


I am pondering figuring out why fold decides to transform it back into a 
TRUTH_AND_EXPR. Otherwise I think I might just try splitting the basic 
block.  Though I am not entirely sure adding an extra jump in there is 
always a good thing.



A mitigation for this could be to only add the threshold check if we
actually vectorized the last loop, I believe a:
'if (LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) return NULL;' inside that new
"else" in vect_transform_loop would do the trick. Though then we will still
have that extra alias check... >
I am going to hack around in the back-end to "fake" a situation like this
and see what happens.


Right should have quickly tried this before sending the email, what happens is
actually vect_transform_loop never gets called because try_vectorize_loop_1
will recognize it can't vectorize the epilogue. So we end up with the
"mitigation" result I suggested, where we simply don't have a versioning
threshold which might still not be ideal.


I think the main issue is how we have implemented epilogue vectorization.
Ideally when vectorizing the loop () we'd recognize all VFs we can handle
and thus know beforehand.  I think that's already done for the sake
of openmp simd now so doing this when epilogue vectorization is enabled
as well wouldn't be too bad - so then we know, at the time we do the
versioning, what and how many vectorized epilogues we create.  See
vect_analyze_loop and the loop over vector sizes.



I think I managed to do this by passing a boolean initialized to the 
PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK) to vect_analyze_loop and also 
changing the looping over vector_sizes to count the number of 
vectorizable loops whilst using the same code path as loop->simdlen if 
vect_epilogues_nomask is true.


Then I set vect_epilogues_nomask to false if the number of vectorized 
loops isn't higher than 1. I'll follow up with a new patch for you to 
see, which will most likely make more sense than what I just wrote up 
there ;)




Another potential issue arises when the profitability threshold obtained
from the cost model isn't guaranteed to be either LE or EQ to the versioning
threshold. In that case there are two separate checks, which now we no
longer will attempt to fold.


And I just realized I don't take the cost model threshold into account when
creating the new threshold check, I guess if it is ordered_p we should again
take the max there between the new threshold check and the cost threshold for
the new check.


Also see https://gcc.gnu.org/ml/gcc-patches/2018-06/msg01397.html for
a patch that computes costs of each possible VF, with that we could
compute a better combined estimate for minimum number of iters
(also considering the extra jumps to reach the main VF/2 loop).

Started to look at this. Just to check something, this patch enables 
picking a lower VF loop if it determines that its single iteration cost 
is more than N times lower than the higher VF loop, where N is the 
ration between higher VF and lower VF no?


Sounds like a good check to have, but I don't see how this is related to 
m

[PATCH] Add --with-static-standard-libraries to the top level

2019-08-05 Thread Tom Tromey
gdb should normally not be linked with -static-libstdc++.  Currently
this has not caused problems, but it's incompatible with catching an
exception thrown from a shared library -- and a subsequent patch
changes gdb to do just this.

This patch adds a new --with-static-standard-libraries flag to the
top-level configure.  It defaults to "auto", which means enabled if
gcc is being built, and disabled otherwise.

Tom

2019-07-27  Tom Tromey  

* configure: Rebuild.
* configure.ac: Add --with-static-standard-libraries.

diff --git a/configure.ac b/configure.ac
index 854f71a34e5..7433badc217 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1602,6 +1602,19 @@ AC_ARG_WITH(stage1-libs,
 [stage1_libs=])
 AC_SUBST(stage1_libs)
 
+# Whether or not to use -static-libstdc++ and -static-libgcc.  The
+# default is yes if gcc is being built; no otherwise.  The reason for
+# this default is that gdb is sometimes linked against GNU Source
+# Highlight, which is a shared library that uses C++ exceptions.  In
+# this case, -static-libstdc++ will cause crashes.
+AC_ARG_WITH(static-standard-libraries,
+[AS_HELP_STRING([--with-static-standard-libraries],
+[use -static-libstdc++ and -static-libgcc (default=auto)])],
+[], [with_static_standard_libraries=auto])
+if test "$with_static_standard_libraries" = auto; then
+  with_static_standard_libraries=$have_compiler
+fi
+
 # Linker flags to use for stage1 or when not bootstrapping.
 AC_ARG_WITH(stage1-ldflags,
 [AS_HELP_STRING([--with-stage1-ldflags=FLAGS], [linker flags for stage1])],
@@ -1614,7 +1627,8 @@ AC_ARG_WITH(stage1-ldflags,
  # In stage 1, default to linking libstdc++ and libgcc statically with GCC
  # if supported.  But if the user explicitly specified the libraries to use,
  # trust that they are doing what they want.
- if test "$stage1_libs" = "" -a "$have_static_libs" = yes; then
+ if test "$with_static_standard_libraries" = yes -a "$stage1_libs" = "" \
+ -a "$have_static_libs" = yes; then
stage1_ldflags="-static-libstdc++ -static-libgcc"
  fi])
 AC_SUBST(stage1_ldflags)
-- 
2.20.1



[PATCH] issue a correct fix-it hint for bad argument in GCC diagnostics (PR 80619)

2019-08-05 Thread Martin Sebor

When the argument to a directive with a length modifier like %lu
in a call to a GCC diagnostic function such as warning()) is not
of the right integer type, GCC suggests to replace the length
modifier in the directive with 'w'.  For instance:

  warning: format ‘%lu’ expects argument of type ‘long unsigned int’, 
but argument 2 has type ‘int’ [-Wformat=]

   f ("%lu", 0);
   ~~^
   %wu

To issue the right hint (i.e., %u in the message above) the attached
patch introduces a new format_lengths enumerator to represent the 'w'
modifier.

Tested on x86_64-linux.

Martin
PR c/80619 - bad fix-it hint for GCC %lu directive with int argument: %wu

gcc/c-family/ChangeLog:

	PR c/80619
	* c-format.c (printf_length_specs): Set FMT_LEN_w for "w".
	(asm_fprintf_length_spec): Same.
	* c-format.h (format_lengths): Add FMT_LEN_w.

gcc/testsuite/ChangeLog:

	PR c/80619
	* gcc.dg/format/pr80619.c: New test.


diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index 6363fa4f686..1fa551957d9 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -497,7 +497,7 @@ static const format_length_info printf_length_specs[] =
 static const format_length_info asm_fprintf_length_specs[] =
 {
   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
-  { "w", FMT_LEN_none, STD_C89, NO_FMT, 0 },
+  { "w", FMT_LEN_w, STD_C89, NO_FMT, 0 },
   { NO_FMT, NO_FMT, 0 }
 };
 
@@ -505,7 +505,7 @@ static const format_length_info asm_fprintf_length_specs[] =
 static const format_length_info gcc_diag_length_specs[] =
 {
   { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
-  { "w", FMT_LEN_none, STD_C89, NO_FMT, 0 },
+  { "w", FMT_LEN_w, STD_C89, NO_FMT, 0 },
   { NO_FMT, NO_FMT, 0 }
 };
 
diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h
index 972ba46f109..6aa68dfe883 100644
--- a/gcc/c-family/c-format.h
+++ b/gcc/c-family/c-format.h
@@ -36,6 +36,7 @@ enum format_lengths
   FMT_LEN_H,
   FMT_LEN_D,
   FMT_LEN_DD,
+  FMT_LEN_w,   /* GCC's HOST_WIDE_INT.  */
   FMT_LEN_MAX
 };
 
diff --git a/gcc/testsuite/gcc.dg/format/pr80619.c b/gcc/testsuite/gcc.dg/format/pr80619.c
new file mode 100644
index 000..c9f0496a1c2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/format/pr80619.c
@@ -0,0 +1,89 @@
+/* PR c/80619 - bad fix-it hint for GCC %lu directive with int argument: %wu
+   { dg-do compile }
+   { dg-options "-Wall -fdiagnostics-show-caret" } */
+
+void T (const char*, ...) __attribute__ ((format (__gcc_diag__, 1, 2)));
+
+void test_suggested_modifier (void)
+{
+  T ("%ld", 0); // { dg-warning "format '%ld' expects argument of type 'long int', but argument 2 has type 'int'" }
+  /* { dg-begin-multiline-output "" }
+   T ("%ld", 0);
+   ~~^   ~
+ |   |
+ |   int
+ long int
+   %d
+   { dg-end-multiline-output "" } */
+
+  T ("%li", 0); // { dg-warning "format '%li' expects argument of type 'long int', but argument 2 has type 'int'" }
+  /* { dg-begin-multiline-output "" }
+   T ("%li", 0);
+   ~~^   ~
+ |   |
+ |   int
+ long int
+   %i
+   { dg-end-multiline-output "" } */
+
+  T ("%lu", 0); // { dg-warning "format '%lu' expects argument of type 'long unsigned int', but argument 2 has type 'int'" }
+  /* { dg-begin-multiline-output "" }
+   T ("%lu", 0);
+   ~~^   ~
+ |   |
+ |   int
+ long unsigned int
+   %u
+   { dg-end-multiline-output "" } */
+
+  T ("%lx", 0); // { dg-warning "format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'int'" }
+  /* { dg-begin-multiline-output "" }
+   T ("%lx", 0);
+   ~~^   ~
+ |   |
+ |   int
+ long unsigned int
+   %x
+   { dg-end-multiline-output "" } */
+
+  T ("%lli", 0);// { dg-warning "format '%lli' expects argument of type 'long long int', but argument 2 has type 'int'" }
+  /* { dg-begin-multiline-output "" }
+   T ("%lli", 0);
+   ~~~^   ~
+  |   |
+  |   int
+  long long int
+   %i
+   { dg-end-multiline-output "" } */
+
+  T ("%llo", 0);// { dg-warning "format '%llo' expects argument of type 'long long unsigned int', but argument 2 has type 'int'" }
+  /* { dg-begin-multiline-output "" }
+   T ("%llo", 0);
+   ~~~^   ~
+  |   |
+  |   int
+  long long unsigned int
+   %o
+   { dg-end-multiline-output "" } */
+
+  T ("%llu", 0);// { dg-warning "format '%llu' expects argument of type 'long long unsigned int', but argument 2 has type 'int'" }
+  /* { dg-begin-multiline-output "" }
+   T ("%llu", 0);
+   ~~~^   ~
+  |   |
+  |   int
+  long long unsigned int
+   %u
+   { dg-end-multiline-output "" } */
+
+  T ("%llx", 0);// { dg-warning "format '%llx' expects argument of type 'long long unsigned int', but argument 2 has type 'int'" }
+  /* { dg-begin-multiline-output "" }
+   T ("%llx", 0);
+   ~~~^   ~
+  |   |
+  |   int
+ 

Re: Use predicates for RTL objects

2019-08-05 Thread Segher Boessenkool
Hi Arvind,

First: do you have a copyright assignment?  See
  https://gcc.gnu.org/contribute.html
for instructions.

This is easier to review, and even to commit as obvious, if you did a
patch per macro, certainly for the new macros; and, put the script in
contrib/, and then say with every patch that is just the output of the
script that that is the case.

Some (mostly changelog) comments:

On Mon, Aug 05, 2019 at 01:09:10PM -0400, Arvind Sankar wrote:
>   * gcc/alias.c, gcc/asan.c, gcc/bb-reorder.c, gcc/bt-load.c: Use 
> predicate macros for rtx_code comparisons.

Many of your changelog lines are much too long.  Don't use more than 80
columns (including the leading tab, which is 8 columns).

Please mention the exact macros you now use, and/or the actual rtx codes.

Filenames are relative to the directory containing the changelog file
itself, so you shouldn't have the gcc/ in those filenames.

>   * gcc/config/microblaze/predicates.md, 

There shouldn't be trailing spaces.

>   * gcc/config/rx/constraints.md, gcc/config/rx/rx.c, gcc/config/rx/rx.h: 
> Likewise.

And you normally have a separate entry for every file.

> -/* Predicate yielding true iff X is an rtx for a double-int.  */
> +/* Predicate yielding true iff X is an rtx for a floating point constant.  */
>  #define CONST_DOUBLE_AS_FLOAT_P(X) \
>(GET_CODE (X) == CONST_DOUBLE && GET_MODE (X) != VOIDmode)

Is const_double really only used for floating point these days?


Segher


[wwwdocs] Another C++20 table update

2019-08-05 Thread Marek Polacek
I've opened a bunch of PRs to track the status of C++2a features in G++.  This
patch updates the C++2a table accordingly.

While at it, I made a small clarification regarding P1152R4: only certain uses
of volatile are being deprecated, not volatile itself.

Index: cxx-status.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/projects/cxx-status.html,v
retrieving revision 1.92
diff -u -r1.92 cxx-status.html
--- cxx-status.html 5 Aug 2019 15:13:48 -   1.92
+++ cxx-status.html 5 Aug 2019 16:53:26 -
@@ -320,7 +320,7 @@
 
 
   http://wg21.link/p0784r7";>P0784R7
-  No
+  No (https://gcc.gnu.org/PR91369";>PR91369)

 
 
@@ -354,14 +354,14 @@
 
Parenthesized initialization of aggregates 
   http://wg21.link/p0960r3";>P0960R3
-  No
+  No (https://gcc.gnu.org/PR91363";>PR91363)
__cpp_aggregate_paren_init >= 201902
 
 
Stronger Unicode requirements 
   http://wg21.link/p1041r4";>P1041R4
   http://wg21.link/p1139r2";>P1139R2
-  No
+  No (https://gcc.gnu.org/PR91370";>PR91370)

 
 
@@ -380,45 +380,45 @@
   
 
 
-  Deprecating volatile
+  Deprecating some uses of volatile
   http://wg21.link/p1152r4";>P1152R4
-  No
+  No (https://gcc.gnu.org/PR91361";>PR91361)
   
 
 
   [[nodiscard("with reason")]]
   http://wg21.link/p1301r4";>P1301R4
-  No
+  No (https://gcc.gnu.org/PR91368";>PR91368)
   
 
 
   using enum
   http://wg21.link/p1099r5";>P1099R5
-  No
+  No (https://gcc.gnu.org/PR91367";>PR91367)
   
 
 
   Class template argument deduction for aggregates
   http://wg21.link/p1816r0";>P1816R0
-  No
+  No (https://gcc.gnu.org/PR91366";>PR91366)
   
 
 
   Class template argument deduction for alias templates
   http://wg21.link/p1814r0";>P1814R0
-  No
+  No (https://gcc.gnu.org/PR91365";>PR91365)
   
 
 
   Permit conversions to arrays of unknown bound
   http://wg21.link/p0388r4";>P0388R4
-  No
+  No (https://gcc.gnu.org/PR91364";>PR91364)
   
 
 
   constinit
   http://wg21.link/p1143r2";>P1143R2
-  No
+  No (https://gcc.gnu.org/PR91360";>PR91360)
   
 
   


Re: [Contrib PATCH] Add scripts to convert GCC repo from SVN to Git

2019-08-05 Thread Mike Stump
On Aug 2, 2019, at 4:06 AM, Richard Biener  wrote:
> 
> IMHO voting is bike-shedding.
> 
> Those who do the work decide.  _They_ may ask questions _and_ decide whether
> to listen to the answer.

I'd tend to agree.  I also think the recent conversion work is a fine solution, 
and that my preference for that might influence my agreement here.

I don't think we should maintain a requirement that we have monotonic numbers 
going forward.  That's just not the git way.  I've been known to do git log, 
and then manually pick start and end, and then bisect based upon not date, but 
out of a large hash list.  The concerns that some dates have a ton and other 
dates have few, doesn't come in to play, as each hash is 1 unit of work, so a 
list of 10235 hashs, can be trivially split into 2, 3, 4 or 1042, if you have 
the machines for it.

Re: wrap math.h for M_PI et al in target/i386 tests

2019-08-05 Thread Mike Stump



> On Jul 30, 2019, at 5:22 AM, Uros Bizjak  wrote:
>> Most but not all of the tests that expect M_PI, M_PI_2 and/or M_PI_4
>> to be defined in math.h explicitly exclude one target system that does
>> not satisfy this non-standard assumption.
>> 
>> This patch introduces a wrapper header that includes math.h and then
>> conditionally supplies the missing non-standard macro definitions.
>> With that, we can drop the dg-skip-if "no M_PI" exclusions.
>> 
>> for  gcc/testsuite/ChangeLog
>> 
>> * gcc.target/i386/math_m_pi.h: New.
> 
> LGTM, patch also needs approval from testsuite maintainer (CC'd).

Not unreasonable for the x86 maintainer to approve patches like this.  It 
doesn't impact any non-386 target.

Ok.

Re: Monotonically increasing counter (was Re: [Contrib PATCH] Add scripts to convert GCC repo from SVN to Git)

2019-08-05 Thread Richard Earnshaw (lists)

On 05/08/2019 16:34, Jakub Jelinek wrote:

On Mon, Aug 05, 2019 at 11:20:09AM -0400, Jason Merrill wrote:

I agree.  But for those who want a monotonically increasing
identifier, there's already one in git: CommitDate.  In the discussion
of this issue four years ago,


While commit date is monotonically increasing, it has the problem that at
certain dates there are very few commits, at others many.  When doing
bisection by hand, one does the revision computation (min+max)/2 in head
(it doesn't have to be precise of course, just roughly, and isn't perfect
either, because in svn all of trunk and branches contribute to the revision
numbers), with dates it would be division into further uneven chunks.

Could we tag the branchpoints, say when we branch off gcc 10, we tag the
branchpoint as tags/r11 and then we could use r11-123 as 123th commit on the
trunk since the branchpoint, and let bugzilla and web redirection handle
those rNN- style identifiers?
git describe --all --match 'r[0-9]*' ... | sed ...
to map hashes etc. to these rNN- identifiers and something to map them
back to hashes say for git web?

Jakub



git rev-list --reverse branchtag..branchname

Will list all the revs on that branch from branchtag through to the head 
of the branch.  I guess you could then count the individual revs on that 
list to index them.


R


Re: Monotonically increasing counter (was Re: [Contrib PATCH] Add scripts to convert GCC repo from SVN to Git)

2019-08-05 Thread Jakub Jelinek
On Mon, Aug 05, 2019 at 11:20:09AM -0400, Jason Merrill wrote:
> I agree.  But for those who want a monotonically increasing
> identifier, there's already one in git: CommitDate.  In the discussion
> of this issue four years ago,

While commit date is monotonically increasing, it has the problem that at
certain dates there are very few commits, at others many.  When doing
bisection by hand, one does the revision computation (min+max)/2 in head
(it doesn't have to be precise of course, just roughly, and isn't perfect
either, because in svn all of trunk and branches contribute to the revision
numbers), with dates it would be division into further uneven chunks.

Could we tag the branchpoints, say when we branch off gcc 10, we tag the
branchpoint as tags/r11 and then we could use r11-123 as 123th commit on the
trunk since the branchpoint, and let bugzilla and web redirection handle
those rNN- style identifiers?
git describe --all --match 'r[0-9]*' ... | sed ...
to map hashes etc. to these rNN- identifiers and something to map them
back to hashes say for git web?

Jakub


Monotonically increasing counter (was Re: [Contrib PATCH] Add scripts to convert GCC repo from SVN to Git)

2019-08-05 Thread Jason Merrill
On Mon, Aug 5, 2019 at 9:20 AM Martin Liška  wrote:

> Based on the IRC discussion with Jakub, there's missing key element of the 
> transition.
> Jakub requests to have a monotonically increasing revisions (aka rXXX) to 
> be assigned
> for the future git revisions. These will be linked from bugzilla and 
> http://gcc.gnu.org/rN
>
> I don't like the suggested requirement and I would prefer to use git hashes 
> for both bugzilla
> links and general references to revisions. That's what all projects using git 
> do.

I agree.  But for those who want a monotonically increasing
identifier, there's already one in git: CommitDate.  In the discussion
of this issue four years ago,

https://gcc.gnu.org/ml/gcc/2015-09/threads.html#00028

I provided a set of git aliases to generate and use reposurgeon-style
action stamps for naming commits.  For Jakub's use-case, the committer
part of the action stamp is probably unnecessary, just the date/time
part should be enough.

Looking at it again, I notice that the different timezones in the
committer date would interfere with sorting, so this update to the
stamp alias uses UTC unconditionally:

stamp = "!f(){ TZ=UTC git show -s
--date='format-local:%Y-%m-%dT%H:%M:%SZ' --format='%cd!%ce'
${1:+\"$@\"}; }; f"

To drop the committer from the stamp, remove "!%ce" from the format argument.

Jakub, it seems to me that this should do the trick for you; binaries
would be named by date/time rather than by revision.  What do you
think?

Jason


[committed][MSP430] Add "cleanup-saved-temps" directive to gcc.target/msp430/pr80993.c

2019-08-05 Thread Jozef Lawrynowicz
This fixes linker errors about different memory models in object files
when the large/small memory models are tested one after the other in the same
build directory, by cleaning up the temporary LTO files required to be
generated for this test.

Applied on trunk.
>From a64c89e01c11b167e7113a49914eac79f4c1044c Mon Sep 17 00:00:00 2001
From: jozefl 
Date: Mon, 5 Aug 2019 14:02:35 +
Subject: [PATCH] 2019-08-05  Jozef Lawrynowicz  

	* gcc.target/msp430/pr80993.c: Add cleanup-saved-temps to final
	actions.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@274116 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/testsuite/ChangeLog   | 5 +
 gcc/testsuite/gcc.target/msp430/pr80993.c | 1 +
 2 files changed, 6 insertions(+)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index cff940e0b28..ca99dc67f1b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2019-08-05  Jozef Lawrynowicz  
+
+	* gcc.target/msp430/pr80993.c: Add cleanup-saved-temps to final
+	actions.
+
 2019-08-05  Martin Liska  
 
 	PR c++/91334
diff --git a/gcc/testsuite/gcc.target/msp430/pr80993.c b/gcc/testsuite/gcc.target/msp430/pr80993.c
index 4da5cf95532..6cea2b85434 100644
--- a/gcc/testsuite/gcc.target/msp430/pr80993.c
+++ b/gcc/testsuite/gcc.target/msp430/pr80993.c
@@ -1,6 +1,7 @@
 /* { dg-do link } */
 /* { dg-options "--save-temps -msim -flto -Os" } */
 /* { dg-final { scan-file "pr80993.exe.ltrans0.s" no_ref_handler } } */
+/* { dg-final { cleanup-saved-temps } } */
 
 void __attribute__((interrupt)) no_ref_handler (void)
 {
-- 
2.17.1



Re: [PATCH] Handle new operators with no arguments in DCE.

2019-08-05 Thread Marc Glisse

On Mon, 5 Aug 2019, Martin Liška wrote:


You are right. It can really lead to confusion of the DCE.

What we have is DECL_ABSTRACT_ORIGIN(decl) which we can use to indicate 
operators
that were somehow modified by an IPA optimization.


Looks similar to the cgraph_node->clone_of that Richard was talking about. 
I have no idea which one would be best.



Do you believe it will be sufficient?


In DCE only consider the operator delete that are not clones? (possibly 
the same for operator new? I don't know how much we can change the return 
value of a function in a clone) I guess that should work, and it wouldn't 
impact the common case with default, global operator new/delete.


An alternative would be to clear the DECL_IS_OPERATOR_DELETE flag when 
creating a clone (possibly only if it modified the first parameter?). 
There is probably not much information in the fact that a function that 
doesn't even take a pointer used to be an operator delete.



By the way, I just thought of something, now that we handle class-specific 
operator new/delete (but you could do the same with the global replacable 
ones as well):


#include 
int count = 0;
struct A {
  __attribute__((malloc,noinline))
  static void* operator new(unsigned long sz){++count;return ::operator 
new(sz);}
  static void operator delete(void* ptr){--count;::operator delete(ptr);}
};
int main(){
  delete new A;
  printf("%d\n",count);
}

If we do not inline anything, we can remove the pair and nothing touches 
count. If we inline both new and delete, we can then remove the inner pair 
instead, count increases and decreases, fine. If we inline only one of 
them, and DCE the mismatched pair new/delete, we get something 
inconsistent (count is -1).


This seems to indicate we should check that the new and delete match 
somehow...


--
Marc Glisse


Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Richard Biener
On Mon, 5 Aug 2019, Uros Bizjak wrote:

> On Mon, Aug 5, 2019 at 3:04 PM Richard Biener  wrote:
> >
> > On Mon, 5 Aug 2019, Uros Bizjak wrote:
> >
> > > On Mon, Aug 5, 2019 at 2:54 PM Jakub Jelinek  wrote:
> > > >
> > > > On Mon, Aug 05, 2019 at 02:51:01PM +0200, Uros Bizjak wrote:
> > > > > > (define_mode_iterator MAXMIN_IMODE [SI "TARGET_SSE4_1"] [DI 
> > > > > > "TARGET_AVX512F"])
> > > > > >
> > > > > > and then we need to split DImode for 32bits, too.
> > > > >
> > > > > For now, please add "TARGET_64BIT && TARGET_AVX512F" for DImode
> > > > > condition, I'll provide _doubleword splitter later.
> > > >
> > > > Shouldn't that be TARGET_AVX512VL instead?  Or does the insn use %g0 
> > > > etc.
> > > > to force use of %zmmN?
> > >
> > > It generates V4SI mode, so - yes, AVX512VL.
> >
> > case SMAX:
> > case SMIN:
> > case UMAX:
> > case UMIN:
> >   if ((mode == DImode && (!TARGET_64BIT || !TARGET_AVX512VL))
> >   || (mode == SImode && !TARGET_SSE4_1))
> > return false;
> >
> > so there's no way to use AVX512VL for 32bit?
> 
> There is a way, but on 32bit targets, we need to split DImode
> operation to a sequence of SImode operations for unconverted pattern.
> This is of course doable, but somehow more complex than simply
> emitting a DImode compare + DImode cmove, which is what current
> splitter does. So, a follow-up task.

Ah, OK.  So for the above condition we can elide the !TARGET_64BIT
check we just need to properly split if we enable the scalar minmax
pattern for DImode on 32bits, the STV conversion would go fine.

Richard.


Re: [Contrib PATCH] Add scripts to convert GCC repo from SVN to Git

2019-08-05 Thread Martin Liška
On 8/3/19 12:31 AM, Jason Merrill wrote:
> On Fri, Aug 2, 2019 at 7:35 AM Martin Liška  wrote:
>>
>> On 8/2/19 1:06 PM, Richard Biener wrote:
>>> On Fri, Aug 2, 2019 at 1:01 PM Martin Liška  wrote:

 On 8/2/19 12:54 PM, Maxim Kuvyrkov wrote:
>> On Aug 2, 2019, at 1:26 PM, Martin Liška  wrote:
>>
>> On 8/2/19 10:41 AM, Maxim Kuvyrkov wrote:
>>> In the end, I don't care much to which version of the repo we switch, 
>>> as long as we switch.
>>
>> Hi Maxim.
>>
>> I really appreciate that you've been working on that. Same as you I 
>> would like to see
>> any change that will lead to a git repository.
>>
>> I have couple of questions about the upcoming Cauldron:
>>
>> - Are you planning to attend?
>
> Unfortunately, I won't attend this time.

 I see.

>
>> - Would it be possible to prepare a voting during e.g. Steering 
>> Committee where
>>  we'll vote about transition options?
>> - Would it make sense to do an online questionnaire in advance in order
>>  to guess what's prevailing opinion?
>>
>> If you are interested, I can help you?
>
> Let's organize an online survey now.  While most active GCC developers 
> will attend Cauldron, many others will not, so we shouldn't rely on 
> Cauldron to make any final decisions.

 Sure, online is the best option as all active community members can vote.

>
> Martin, would you please organize the survey?

 Yes, but I haven't followed the discussion in recent weeks. Is the only 
 question
 whether we want the current GIT mirror or your rebased git repository?
 Is Eric Raymond's transition still in play or not?
>>>
>>> 1) Stay with SVN
>>> 2) Switch to the existing GIT mirror
>>> 3) Wait for ERS to complete his conversion to GIT
>>> 4) Use the existing new conversion to GIT fixing authors and commit messages
>>> 5) I don't care
>>> 6) I don't care as long as we switch to GIT
>>>
 Are there any other sub-question regarding commit message format, git 
 hooks, etc.
 that will deserve a place in the questionnaire?
>>>
>>> No, please do not make it unnecessarily complicated.  Maybe the questionaire
>>> can include a free-form text field for more comments.
>>>
>>> Btw, I do not believe we should do this kind of voting.  Who's eligible to 
>>> vote?
>>> Is the vote anonymous?  What happens when the majority (what is the 
>>> majority?)
>>> votes for option N?
>>>
>>> IMHO voting is bike-shedding.
>>>
>>> Those who do the work decide.  _They_ may ask questions _and_ decide whether
>>> to listen to the answer.
>>>
>>> Richard.
>>>
 Thank,
 Martin

>
> Regards,
>
> --
> Maxim Kuvyrkov
> www.linaro.org
>

>>
>> So Richi is suggesting to finish all necessary for transition before we'll 
>> vote.
>> That should include bugzilla reporting script and maybe other git hooks?
>> Do we have a checklist of these? Jason?
> 
> As far as I can see, the SVN hooks only send email to the *cvs and
> gcc-bugzilla lists, that shouldn't be hard to mimic.
> 
> I think we also want to decide on policies for creating branches/tags,
> deleting refs, or pushing non-fast-forward updates.  In the current
> mirror you can delete branches in your own subdirectory, but not other
> branches.
> 
> Jason
> 

Hello.

Based on the IRC discussion with Jakub, there's missing key element of the 
transition.
Jakub requests to have a monotonically increasing revisions (aka rXXX) to 
be assigned
for the future git revisions. These will be linked from bugzilla and 
http://gcc.gnu.org/rN

I don't like the suggested requirement and I would prefer to use git hashes for 
both bugzilla
links and general references to revisions. That's what all projects using git 
do.

As it's still unresolved, I'm not planning to organize any GIT transition 
survey.

Martin



Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Uros Bizjak
On Mon, Aug 5, 2019 at 3:04 PM Richard Biener  wrote:
>
> On Mon, 5 Aug 2019, Uros Bizjak wrote:
>
> > On Mon, Aug 5, 2019 at 2:54 PM Jakub Jelinek  wrote:
> > >
> > > On Mon, Aug 05, 2019 at 02:51:01PM +0200, Uros Bizjak wrote:
> > > > > (define_mode_iterator MAXMIN_IMODE [SI "TARGET_SSE4_1"] [DI 
> > > > > "TARGET_AVX512F"])
> > > > >
> > > > > and then we need to split DImode for 32bits, too.
> > > >
> > > > For now, please add "TARGET_64BIT && TARGET_AVX512F" for DImode
> > > > condition, I'll provide _doubleword splitter later.
> > >
> > > Shouldn't that be TARGET_AVX512VL instead?  Or does the insn use %g0 etc.
> > > to force use of %zmmN?
> >
> > It generates V4SI mode, so - yes, AVX512VL.
>
> case SMAX:
> case SMIN:
> case UMAX:
> case UMIN:
>   if ((mode == DImode && (!TARGET_64BIT || !TARGET_AVX512VL))
>   || (mode == SImode && !TARGET_SSE4_1))
> return false;
>
> so there's no way to use AVX512VL for 32bit?

There is a way, but on 32bit targets, we need to split DImode
operation to a sequence of SImode operations for unconverted pattern.
This is of course doable, but somehow more complex than simply
emitting a DImode compare + DImode cmove, which is what current
splitter does. So, a follow-up task.

Uros.


Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Richard Biener
On Mon, 5 Aug 2019, Uros Bizjak wrote:

> On Mon, Aug 5, 2019 at 2:54 PM Jakub Jelinek  wrote:
> >
> > On Mon, Aug 05, 2019 at 02:51:01PM +0200, Uros Bizjak wrote:
> > > > (define_mode_iterator MAXMIN_IMODE [SI "TARGET_SSE4_1"] [DI 
> > > > "TARGET_AVX512F"])
> > > >
> > > > and then we need to split DImode for 32bits, too.
> > >
> > > For now, please add "TARGET_64BIT && TARGET_AVX512F" for DImode
> > > condition, I'll provide _doubleword splitter later.
> >
> > Shouldn't that be TARGET_AVX512VL instead?  Or does the insn use %g0 etc.
> > to force use of %zmmN?
> 
> It generates V4SI mode, so - yes, AVX512VL.

case SMAX:
case SMIN:
case UMAX:
case UMIN:
  if ((mode == DImode && (!TARGET_64BIT || !TARGET_AVX512VL))
  || (mode == SImode && !TARGET_SSE4_1))
return false;

so there's no way to use AVX512VL for 32bit?

Richard.


Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Uros Bizjak
On Mon, Aug 5, 2019 at 2:54 PM Jakub Jelinek  wrote:
>
> On Mon, Aug 05, 2019 at 02:51:01PM +0200, Uros Bizjak wrote:
> > > (define_mode_iterator MAXMIN_IMODE [SI "TARGET_SSE4_1"] [DI 
> > > "TARGET_AVX512F"])
> > >
> > > and then we need to split DImode for 32bits, too.
> >
> > For now, please add "TARGET_64BIT && TARGET_AVX512F" for DImode
> > condition, I'll provide _doubleword splitter later.
>
> Shouldn't that be TARGET_AVX512VL instead?  Or does the insn use %g0 etc.
> to force use of %zmmN?

It generates V4SI mode, so - yes, AVX512VL.

Thanks,
Uros.


Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Jakub Jelinek
On Mon, Aug 05, 2019 at 02:51:01PM +0200, Uros Bizjak wrote:
> > (define_mode_iterator MAXMIN_IMODE [SI "TARGET_SSE4_1"] [DI 
> > "TARGET_AVX512F"])
> >
> > and then we need to split DImode for 32bits, too.
> 
> For now, please add "TARGET_64BIT && TARGET_AVX512F" for DImode
> condition, I'll provide _doubleword splitter later.

Shouldn't that be TARGET_AVX512VL instead?  Or does the insn use %g0 etc.
to force use of %zmmN?

Jakub


Re: [PATCH] Handle new operators with no arguments in DCE.

2019-08-05 Thread Martin Liška
On 8/5/19 1:57 PM, Marc Glisse wrote:
> On Mon, 5 Aug 2019, Martin Liška wrote:
> 
>> On 8/5/19 9:07 AM, Marc Glisse wrote:
>>> On Mon, 5 Aug 2019, Martin Liška wrote:
>>>
 I'm sending fix for the ICE. The issue is that we can end up
 with a ctor without an argument (when not being used).
>>>
>>> Ah, I didn't realize that after cloning and drastically changing the 
>>> signature it would still count as operator new/delete. Is getting down to 0 
>>> arguments the only bad thing that can happen? Can't we have an operator 
>>> delete (void*, void*) where the first argument gets optimized out and we 
>>> end up optimizing as if the second argument was actually the memory being 
>>> released? Should we do some sanity-checking when propagating the new/delete 
>>> flags to clones?
>>>
>>
>> It can theoretically happen, but it should be properly handled in the 
>> following
>> code:
>>
>>   810    if (is_delete_operator
>>   811    || gimple_call_builtin_p (stmt, BUILT_IN_FREE))
>>   812  {
>>   813    /* It can happen that a user delete operator has the 
>> pointer
>>   814   argument optimized out already.  */
>>   815    if (gimple_call_num_args (stmt) == 0)
>>   816  continue;
>>   817
>>   818    tree ptr = gimple_call_arg (stmt, 0);
>>   819    gimple *def_stmt;
>>   820    tree def_callee;
>>   821    /* If the pointer we free is defined by an allocation
>>   822   function do not add the call to the worklist.  */
>>   823    if (TREE_CODE (ptr) == SSA_NAME
>>   824    && is_gimple_call (def_stmt = SSA_NAME_DEF_STMT 
>> (ptr))
>>   825    && (def_callee = gimple_call_fndecl (def_stmt))
>>   826    && ((DECL_BUILT_IN_CLASS (def_callee) == 
>> BUILT_IN_NORMAL
>>   827 && (DECL_FUNCTION_CODE (def_callee) == 
>> BUILT_IN_ALIGNED_ALLOC
>>   828 || DECL_FUNCTION_CODE (def_callee) == 
>> BUILT_IN_MALLOC
>>   829 || DECL_FUNCTION_CODE (def_callee) == 
>> BUILT_IN_CALLOC))
>>   830    || DECL_IS_REPLACEABLE_OPERATOR_NEW_P 
>> (def_callee)))
>>   831  {
>>   832    /* Delete operators can have alignment and (or) 
>> size as next
>>   833   arguments.  When being a SSA_NAME, they must be 
>> marked
>>   834   as necessary.  */
>>   835    if (is_delete_operator && gimple_call_num_args 
>> (stmt) >= 2)
>>   836  for (unsigned i = 1; i < gimple_call_num_args 
>> (stmt); i++)
>>   837    {
>>   838  tree arg = gimple_call_arg (stmt, i);
>>   839  if (TREE_CODE (arg) == SSA_NAME)
>>   840    mark_operand_necessary (arg);
>>   841    }
>>
>> Where we verify that first argument of delete call is defined as a LHS of a 
>> new operator.
> 
> What I am saying is that it may be the wrong operator new.
> 
> Imagine something like:
> 
> struct A {
>   A();
>   __attribute__((malloc)) static void* operator new(unsigned long sz, 
> void*,bool);
>   static void operator delete(void* ptr, void*,bool);
> };
> int main(){
>   int*p=new int;
>   new(p,true) A;
> }
> 
> At the beginning, it has
> 
>     D.2321 = A::operator new (1, p, 1);
> 
> and
> 
>     A::operator delete (D.2321, p, 1);
> 
> Now imagine we have access to the body of the functions, and the last one is 
> transformed into
> 
>     operator delete.clone (p)

You are right. It can really lead to confusion of the DCE.

What we have is DECL_ABSTRACT_ORIGIN(decl) which we can use to indicate 
operators
that were somehow modified by an IPA optimization. Do you believe it will be 
sufficient?

Thanks,
Martin

> 
> (the first argument was removed)
> p does come from an operator new, so we do see a matched pair new+delete, but 
> it is the wrong pair.
> 
> I admit that's rather unlikely, but with clones that have a different 
> signature, many assumptions we could make on the functions become unsafe, and 
> I am not clear on the scale of this issue.
> 



Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Uros Bizjak
On Mon, Aug 5, 2019 at 2:43 PM Uros Bizjak  wrote:
>
> On Mon, Aug 5, 2019 at 1:50 PM Richard Biener  wrote:
> >
> > On Sun, 4 Aug 2019, Uros Bizjak wrote:
> >
> > > On Sat, Aug 3, 2019 at 7:26 PM Richard Biener  wrote:
> > > >
> > > > On Thu, 1 Aug 2019, Uros Bizjak wrote:
> > > >
> > > > > On Thu, Aug 1, 2019 at 11:28 AM Richard Biener  
> > > > > wrote:
> > > > >
> > > >  So you unconditionally add a smaxdi3 pattern - indeed this looks
> > > >  necessary even when going the STV route.  The actual regression
> > > >  for the testcase could also be solved by turing the smaxsi3
> > > >  back into a compare and jump rather than a conditional move 
> > > >  sequence.
> > > >  So I wonder how you'd do that given that there's 
> > > >  pass_if_after_reload
> > > >  after pass_split_after_reload and I'm not sure we can split
> > > >  as late as pass_split_before_sched2 (there's also a split _after_
> > > >  sched2 on x86 it seems).
> > > > 
> > > >  So how would you go implement {s,u}{min,max}{si,di}3 for the
> > > >  case STV doesn't end up doing any transform?
> > > > >>>
> > > > >>> If STV doesn't transform the insn, then a pre-reload splitter splits
> > > > >>> the insn back to compare+cmove.
> > > > >>
> > > > >> OK, that would work.  But there's no way to force a jumpy sequence 
> > > > >> then
> > > > >> which we know is faster than compare+cmove because later RTL
> > > > >> if-conversion passes happily re-discover the smax (or conditional 
> > > > >> move)
> > > > >> sequence.
> > > > >>
> > > > >>> However, considering the SImode move
> > > > >>> from/to int/xmm register is relatively cheap, the cost function 
> > > > >>> should
> > > > >>> be tuned so that STV always converts smaxsi3 pattern.
> > > > >>
> > > > >> Note that on both Zen and even more so bdverN the int/xmm transition
> > > > >> makes it no longer profitable but a _lot_ slower than the cmp/cmov
> > > > >> sequence... (for the loop in hmmer which is the only one I see
> > > > >> any effect of any of my patches).  So identifying chains that
> > > > >> start/end in memory is important for cost reasons.
> > > > >
> > > > > Please note that the cost function also considers the cost of move
> > > > > from/to xmm. So, the cost of the whole chain would disable the
> > > > > transformation.
> > > > >
> > > > >> So I think the splitting has to happen after the last if-conversion
> > > > >> pass (and thus we may need to allocate a scratch register for this
> > > > >> purpose?)
> > > > >
> > > > > I really hope that the underlying issue will be solved by a machine
> > > > > dependant pass inserted somewhere after the pre-reload split. This
> > > > > way, we can split unconverted smax to the cmove, and this later pass
> > > > > would handle jcc and cmove instructions. Until then... yes your
> > > > > proposed approach is one of the ways to avoid unwanted if-conversion,
> > > > > although sometimes we would like to split to cmove instead.
> > > >
> > > > So the following makes STV also consider SImode chains, re-using the
> > > > DImode chain code.  I've kept a simple incomplete smaxsi3 pattern
> > > > and also did not alter the {SI,DI}mode chain cost function - it's
> > > > quite off for TARGET_64BIT.  With this I get the expected conversion
> > > > for the testcase derived from hmmer.
> > > >
> > > > No further testing sofar.
> > > >
> > > > Is it OK to re-use the DImode chain code this way?  I'll clean things
> > > > up some more of course.
> > >
> > > Yes, the approach looks OK to me. It makes chain building mode
> > > agnostic, and the chain building can be used for
> > > a) DImode x86_32 (as is now), but maybe 64bit minmax operation can be 
> > > added.
> > > b) SImode x86_32 and x86_64 (this will be mainly used for SImode
> > > minmax and surrounding SImode operations)
> > > c) DImode x86_64 (also, mainly used for DImode minmax and surrounding
> > > DImode operations)
> > >
> > > > Still need help with the actual patterns for minmax and how the 
> > > > splitters
> > > > should look like.
> > >
> > > Please look at the attached patch. Maybe we can add memory_operand as
> > > operand 1 and operand 2 predicate, but let's keep things simple for
> > > now.
> >
> > Thanks.  The attached patch makes the patch cleaner and it survives
> > "some" barebone testing.  It also touches the cost function to
> > avoid being too overly trigger-happy.  I've also ended up using
> > ix86_cost->sse_op instead of COSTS_N_INSN-based magic.  In
> > particular we estimated GPR reg-reg move as COST_N_INSNS(2) while
> > move costs shouldn't be wrapped in COST_N_INSNS.
> > IMHO we should probably disregard any reg-reg moves for costing pre-RA.
> > At least with the current code every reg-reg move biases in favor of
> > SSE...
> >
> > And we're simply adding move and non-move costs in 'gain', somewhat
> > mixing apples and oranges?  We could separate those and require
> > both to be a net positive win?
> >
> > Still using

Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Uros Bizjak
On Mon, Aug 5, 2019 at 1:50 PM Richard Biener  wrote:
>
> On Sun, 4 Aug 2019, Uros Bizjak wrote:
>
> > On Sat, Aug 3, 2019 at 7:26 PM Richard Biener  wrote:
> > >
> > > On Thu, 1 Aug 2019, Uros Bizjak wrote:
> > >
> > > > On Thu, Aug 1, 2019 at 11:28 AM Richard Biener  
> > > > wrote:
> > > >
> > >  So you unconditionally add a smaxdi3 pattern - indeed this looks
> > >  necessary even when going the STV route.  The actual regression
> > >  for the testcase could also be solved by turing the smaxsi3
> > >  back into a compare and jump rather than a conditional move sequence.
> > >  So I wonder how you'd do that given that there's pass_if_after_reload
> > >  after pass_split_after_reload and I'm not sure we can split
> > >  as late as pass_split_before_sched2 (there's also a split _after_
> > >  sched2 on x86 it seems).
> > > 
> > >  So how would you go implement {s,u}{min,max}{si,di}3 for the
> > >  case STV doesn't end up doing any transform?
> > > >>>
> > > >>> If STV doesn't transform the insn, then a pre-reload splitter splits
> > > >>> the insn back to compare+cmove.
> > > >>
> > > >> OK, that would work.  But there's no way to force a jumpy sequence then
> > > >> which we know is faster than compare+cmove because later RTL
> > > >> if-conversion passes happily re-discover the smax (or conditional move)
> > > >> sequence.
> > > >>
> > > >>> However, considering the SImode move
> > > >>> from/to int/xmm register is relatively cheap, the cost function should
> > > >>> be tuned so that STV always converts smaxsi3 pattern.
> > > >>
> > > >> Note that on both Zen and even more so bdverN the int/xmm transition
> > > >> makes it no longer profitable but a _lot_ slower than the cmp/cmov
> > > >> sequence... (for the loop in hmmer which is the only one I see
> > > >> any effect of any of my patches).  So identifying chains that
> > > >> start/end in memory is important for cost reasons.
> > > >
> > > > Please note that the cost function also considers the cost of move
> > > > from/to xmm. So, the cost of the whole chain would disable the
> > > > transformation.
> > > >
> > > >> So I think the splitting has to happen after the last if-conversion
> > > >> pass (and thus we may need to allocate a scratch register for this
> > > >> purpose?)
> > > >
> > > > I really hope that the underlying issue will be solved by a machine
> > > > dependant pass inserted somewhere after the pre-reload split. This
> > > > way, we can split unconverted smax to the cmove, and this later pass
> > > > would handle jcc and cmove instructions. Until then... yes your
> > > > proposed approach is one of the ways to avoid unwanted if-conversion,
> > > > although sometimes we would like to split to cmove instead.
> > >
> > > So the following makes STV also consider SImode chains, re-using the
> > > DImode chain code.  I've kept a simple incomplete smaxsi3 pattern
> > > and also did not alter the {SI,DI}mode chain cost function - it's
> > > quite off for TARGET_64BIT.  With this I get the expected conversion
> > > for the testcase derived from hmmer.
> > >
> > > No further testing sofar.
> > >
> > > Is it OK to re-use the DImode chain code this way?  I'll clean things
> > > up some more of course.
> >
> > Yes, the approach looks OK to me. It makes chain building mode
> > agnostic, and the chain building can be used for
> > a) DImode x86_32 (as is now), but maybe 64bit minmax operation can be added.
> > b) SImode x86_32 and x86_64 (this will be mainly used for SImode
> > minmax and surrounding SImode operations)
> > c) DImode x86_64 (also, mainly used for DImode minmax and surrounding
> > DImode operations)
> >
> > > Still need help with the actual patterns for minmax and how the splitters
> > > should look like.
> >
> > Please look at the attached patch. Maybe we can add memory_operand as
> > operand 1 and operand 2 predicate, but let's keep things simple for
> > now.
>
> Thanks.  The attached patch makes the patch cleaner and it survives
> "some" barebone testing.  It also touches the cost function to
> avoid being too overly trigger-happy.  I've also ended up using
> ix86_cost->sse_op instead of COSTS_N_INSN-based magic.  In
> particular we estimated GPR reg-reg move as COST_N_INSNS(2) while
> move costs shouldn't be wrapped in COST_N_INSNS.
> IMHO we should probably disregard any reg-reg moves for costing pre-RA.
> At least with the current code every reg-reg move biases in favor of
> SSE...
>
> And we're simply adding move and non-move costs in 'gain', somewhat
> mixing apples and oranges?  We could separate those and require
> both to be a net positive win?
>
> Still using -mtune=bdverN exposes that some cost tables have xmm and gpr
> costs as apples and oranges... (so it never triggers for Bulldozer)
>
> I now run into
>
> /space/rguenther/src/svn/trunk-bisect/libgcc/libgcov-driver.c:509:1:
> error: unrecognizable insn:
> (insn 116 115 1511 8 (set (subreg:V2DI (reg/v:DI 87 [ run_max ]) 0)
>   

Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Uros Bizjak
On Mon, Aug 5, 2019 at 1:50 PM Richard Biener  wrote:
>
> On Sun, 4 Aug 2019, Uros Bizjak wrote:
>
> > On Sat, Aug 3, 2019 at 7:26 PM Richard Biener  wrote:
> > >
> > > On Thu, 1 Aug 2019, Uros Bizjak wrote:
> > >
> > > > On Thu, Aug 1, 2019 at 11:28 AM Richard Biener  
> > > > wrote:
> > > >
> > >  So you unconditionally add a smaxdi3 pattern - indeed this looks
> > >  necessary even when going the STV route.  The actual regression
> > >  for the testcase could also be solved by turing the smaxsi3
> > >  back into a compare and jump rather than a conditional move sequence.
> > >  So I wonder how you'd do that given that there's pass_if_after_reload
> > >  after pass_split_after_reload and I'm not sure we can split
> > >  as late as pass_split_before_sched2 (there's also a split _after_
> > >  sched2 on x86 it seems).
> > > 
> > >  So how would you go implement {s,u}{min,max}{si,di}3 for the
> > >  case STV doesn't end up doing any transform?
> > > >>>
> > > >>> If STV doesn't transform the insn, then a pre-reload splitter splits
> > > >>> the insn back to compare+cmove.
> > > >>
> > > >> OK, that would work.  But there's no way to force a jumpy sequence then
> > > >> which we know is faster than compare+cmove because later RTL
> > > >> if-conversion passes happily re-discover the smax (or conditional move)
> > > >> sequence.
> > > >>
> > > >>> However, considering the SImode move
> > > >>> from/to int/xmm register is relatively cheap, the cost function should
> > > >>> be tuned so that STV always converts smaxsi3 pattern.
> > > >>
> > > >> Note that on both Zen and even more so bdverN the int/xmm transition
> > > >> makes it no longer profitable but a _lot_ slower than the cmp/cmov
> > > >> sequence... (for the loop in hmmer which is the only one I see
> > > >> any effect of any of my patches).  So identifying chains that
> > > >> start/end in memory is important for cost reasons.
> > > >
> > > > Please note that the cost function also considers the cost of move
> > > > from/to xmm. So, the cost of the whole chain would disable the
> > > > transformation.
> > > >
> > > >> So I think the splitting has to happen after the last if-conversion
> > > >> pass (and thus we may need to allocate a scratch register for this
> > > >> purpose?)
> > > >
> > > > I really hope that the underlying issue will be solved by a machine
> > > > dependant pass inserted somewhere after the pre-reload split. This
> > > > way, we can split unconverted smax to the cmove, and this later pass
> > > > would handle jcc and cmove instructions. Until then... yes your
> > > > proposed approach is one of the ways to avoid unwanted if-conversion,
> > > > although sometimes we would like to split to cmove instead.
> > >
> > > So the following makes STV also consider SImode chains, re-using the
> > > DImode chain code.  I've kept a simple incomplete smaxsi3 pattern
> > > and also did not alter the {SI,DI}mode chain cost function - it's
> > > quite off for TARGET_64BIT.  With this I get the expected conversion
> > > for the testcase derived from hmmer.
> > >
> > > No further testing sofar.
> > >
> > > Is it OK to re-use the DImode chain code this way?  I'll clean things
> > > up some more of course.
> >
> > Yes, the approach looks OK to me. It makes chain building mode
> > agnostic, and the chain building can be used for
> > a) DImode x86_32 (as is now), but maybe 64bit minmax operation can be added.
> > b) SImode x86_32 and x86_64 (this will be mainly used for SImode
> > minmax and surrounding SImode operations)
> > c) DImode x86_64 (also, mainly used for DImode minmax and surrounding
> > DImode operations)
> >
> > > Still need help with the actual patterns for minmax and how the splitters
> > > should look like.
> >
> > Please look at the attached patch. Maybe we can add memory_operand as
> > operand 1 and operand 2 predicate, but let's keep things simple for
> > now.
>
> Thanks.  The attached patch makes the patch cleaner and it survives
> "some" barebone testing.  It also touches the cost function to
> avoid being too overly trigger-happy.  I've also ended up using
> ix86_cost->sse_op instead of COSTS_N_INSN-based magic.  In
> particular we estimated GPR reg-reg move as COST_N_INSNS(2) while
> move costs shouldn't be wrapped in COST_N_INSNS.
> IMHO we should probably disregard any reg-reg moves for costing pre-RA.
> At least with the current code every reg-reg move biases in favor of
> SSE...

This is currently a bit mixed-up area in x86 target support. HJ is
looking into this [1] and I hope Honza can review the patch.

> And we're simply adding move and non-move costs in 'gain', somewhat
> mixing apples and oranges?  We could separate those and require
> both to be a net positive win?
>
> Still using -mtune=bdverN exposes that some cost tables have xmm and gpr
> costs as apples and oranges... (so it never triggers for Bulldozer)
>
> I now run into
>
> /space/rguenther/src/svn/trunk-bisect/libgcc/

Re: [patch] Fix tree-optimization/91169

2019-08-05 Thread Richard Biener
On Mon, Aug 5, 2019 at 10:28 AM Eric Botcazou  wrote:
>
> > Testing went OK but it looks like acats doesn't honor
> > RUNTESTFLAGS so I got no multilib testing for it :/
> > And the PR didn't contain sth I could plug into gnat.dg so I checked
> > with visual inspection of dumps on the reduced testcase.
>
> Sorry about that, gnat.dg/array37.adb now attached.
>
> > I notice pretty-printing is also confused about the wrapping ;)
> >
> >   static struct p__intarray___PAD intarray = {.F={-100, [0]=0, 100}};
> >
> > the [0]= is not necessary.
> >
> > Anyway, I can see bogus IL before and still after the proposed patch :/
> > This is because I forgot to adjust cfield handling of setting index.
> >
> > So I'm testing the adjusted patch attached which fixes the IL
> > and for testing have patched gcc/testsuite/ada/acats/run_all.sh
> > to add -m32.
>
> Thanks!

Fixed as follows - I've added checking asserts that wrapping doesn't
occur and also fixed another bug which treated

 { .el = { RANGE [1, 4], 0 }, .el = { NULL_TREE, 1 } }

as setting [2] to 1.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.


fix-pr91169
Description: Binary data


Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Uros Bizjak
On Mon, Aug 5, 2019 at 2:16 PM Richard Biener  wrote:
>
> On Mon, 5 Aug 2019, Uros Bizjak wrote:
>
> > > dimode_{scalar_to_vector_candidate_p,remove_non_convertible_regs}
> > > functions to drop the dimode_ prefix - is that OK or do you
> > > prefer some other prefix?
> >
> > No, please just drop the prefix.
>
> just noticed this applies to the derived dimode_scalar_chain class
> as well where I can't simply drop the prefix.  So would
> general_scalar_chain /
> general_{scalar_to_vector_candidate_p,remove_non_convertible_regs}
> be OK?

I don't want to bikeshed too much here ;) Whatever fits you best.

Uros.


Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Richard Biener
On Mon, 5 Aug 2019, Uros Bizjak wrote:

> > dimode_{scalar_to_vector_candidate_p,remove_non_convertible_regs}
> > functions to drop the dimode_ prefix - is that OK or do you
> > prefer some other prefix?
> 
> No, please just drop the prefix.

just noticed this applies to the derived dimode_scalar_chain class
as well where I can't simply drop the prefix.  So would
general_scalar_chain / 
general_{scalar_to_vector_candidate_p,remove_non_convertible_regs}
be OK?

Richard.


Re: [PATCH] Handle new operators with no arguments in DCE.

2019-08-05 Thread Richard Biener
On Mon, Aug 5, 2019 at 8:44 AM Martin Liška  wrote:
>
> On 8/2/19 11:34 PM, H.J. Lu wrote:
> > On Tue, Jul 2, 2019 at 4:50 AM Martin Liška  wrote:
> >>
> >> Second part.
> >>
> >> Martin
> >
> > This caused:
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91334
> >
>
> Hi.
>
> I'm sending fix for the ICE. The issue is that we can end up
> with a ctor without an argument (when not being used).
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

OK as a stop-gap for now, please discuss stuff further with Marc, I think
he's right that this isn't a good solution.  Can we check cgraph_node->clone_of
or so and simply disregard all clones?  We do not record the "original"
parameter position...

Richard.

> Thanks,
> Martin


Re: Protect tree_to_shwi call in unmodified_param_1

2019-08-05 Thread Richard Biener
On Mon, Aug 5, 2019 at 10:58 AM Richard Sandiford
 wrote:
>
> unmodified_param_1 used tree_to_shwi without first checking
> tree_fits_shwi_p.  This is needed by the SVE ACLE support and
> is hard to test independently.
>
> Tested on aarch64-linux-gnu, armeb-eabi and x86_64-linux-gnu.
> OK to install?

Hmm, a better fix would be to use poly_X for *size_p.  There are also
quite a number of callers with NULL size_p which you unduly
pessimize for SVE.

So, move the check to the if (size_p) case and add a FIXME
about SVE regs and poly_*?

OK with that.

Richard.

> Richard
>
>
> 2019-08-05  Richard Sandiford  
>
> gcc/
> * ipa-fnsummary.c (unmodified_parm_1): Test tree_fits_shwi_p
> before calling to tree_to_shwi.
>
> Index: gcc/ipa-fnsummary.c
> ===
> --- gcc/ipa-fnsummary.c 2019-07-16 09:11:50.509067138 +0100
> +++ gcc/ipa-fnsummary.c 2019-08-05 09:57:28.553617299 +0100
> @@ -927,7 +927,8 @@ unmodified_parm_1 (ipa_func_body_info *f
>/* SSA_NAME referring to parm default def?  */
>if (TREE_CODE (op) == SSA_NAME
>&& SSA_NAME_IS_DEFAULT_DEF (op)
> -  && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
> +  && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL
> +  && tree_fits_shwi_p (TYPE_SIZE (TREE_TYPE (op
>  {
>if (size_p)
> *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));


Re: Make function_code a 32-bit field

2019-08-05 Thread Richard Biener
On Mon, Aug 5, 2019 at 10:56 AM Richard Sandiford
 wrote:
>
> Adding SVE intrinsics on top of the existing AArch64 intrinsics blows
> the 12-bit function_code in tree_function_decl.  That bitfield has no
> spare bits, but it comes at the end of the structure and is preceded
> by a pointer, so on LP64 hosts there's currently a 32-bit hole at end.
>
> This patch therefore makes function_code an independent field and
> moves the bitfield to the 32-bit hole.
>
> I wondered about instead making function_code 16 bits, so that the
> patch leaves 28 spare bits instead of just 12.  That seemed a bit
> short-term though; I can't guarantee that we won't blow 16 bits once
> the SVE2 functions are added...
>
> If we run out of bits again, we can start chomping from the top
> of the enum.  E.g. 24 bits should surely be enough, but there's
> no point paying the overhead of the masking until we need it.
>
> Tested on aarch64-linux-gnu, armeb-eabi and x86_64-linux-gnu.
> OK to install?

OK.

Richard.

> Richard
>
>
> 2019-08-05  Richard Sandiford  
>
> gcc/
> * tree-core.h (tree_function_decl): Make function_code an
> independent field.  Group the remaining bitfields into bytes
> and move decl_type so that it contines to be at a byte boundary.
> Leave 12 bits for future expansion.
>
> Index: gcc/tree-core.h
> ===
> --- gcc/tree-core.h 2019-08-05 09:55:26.0 +0100
> +++ gcc/tree-core.h 2019-08-05 09:55:26.626500651 +0100
> @@ -1857,36 +1857,32 @@ struct GTY(()) tree_function_decl {
>tree vindex;
>
>/* In a FUNCTION_DECL for which DECL_BUILT_IN holds, this is
> - DECL_FUNCTION_CODE.  Otherwise unused.
> - ???  The bitfield needs to be able to hold all target function
> - codes as well.  */
> -  ENUM_BITFIELD(built_in_function) function_code : 12;
> -  ENUM_BITFIELD(built_in_class) built_in_class : 2;
> + DECL_FUNCTION_CODE.  Otherwise unused.  */
> +  enum built_in_function function_code;
>
> +  ENUM_BITFIELD(built_in_class) built_in_class : 2;
>unsigned static_ctor_flag : 1;
>unsigned static_dtor_flag : 1;
> -
>unsigned uninlinable : 1;
>unsigned possibly_inlined : 1;
>unsigned novops_flag : 1;
>unsigned returns_twice_flag : 1;
> +
>unsigned malloc_flag : 1;
>unsigned declared_inline_flag : 1;
>unsigned no_inline_warning_flag : 1;
> -
>unsigned no_instrument_function_entry_exit : 1;
> -
> -  /* Align the bitfield to boundary of a byte.  */
> -  ENUM_BITFIELD(function_decl_type) decl_type: 2;
> -
>unsigned no_limit_stack : 1;
>unsigned disregard_inline_limits : 1;
>unsigned pure_flag : 1;
>unsigned looping_const_or_pure_flag : 1;
> +
> +  /* Align the bitfield to boundary of a byte.  */
> +  ENUM_BITFIELD(function_decl_type) decl_type: 2;
>unsigned has_debug_args_flag : 1;
>unsigned versioned_function : 1;
>
> -  /* 0 bits left.  */
> +  /* 12 bits left for future expansion.  */
>  };
>
>  struct GTY(()) tree_translation_unit_decl {


Re: Fold MASK_LOAD/STORE with an all-true mask

2019-08-05 Thread Richard Biener
On Mon, Aug 5, 2019 at 10:50 AM Richard Sandiford
 wrote:
>
> This patch folds IFN_MASK_LOAD and IFN_MASK_STOREs to normal accesses
> if the mask is all-true.  This can happen for fully-masked loops that
> didn't actually need to be (which could be handled by the vectoriser
> instead), or for unrolled fully-masked loops whose first iteration is
> guaranteed to operate on a full vector.  It's also useful when the
> accesses are generated directly by intrinsics (to follow for SVE).
>
> Tested on aarch64-linux-gnu, armeb-eabi and x86_64-linux-gnu.
> OK to install?

OK.

> Richard
>
>
> 2019-08-05  Richard Sandiford  
>
> gcc/
> * gimple-fold.c (gimple_fold_mask_load_store_mem_ref)
> (gimple_fold_mask_load, gimple_fold_mask_store): New functions.
> (gimple_fold_call): Use them to fold IFN_MASK_LOAD and
> IFN_MASK_STORE.
>
> gcc/testsuite/
> * gcc.target/aarch64/sve/mask_load_1.c: New test.
>
> Index: gcc/gimple-fold.c
> ===
> --- gcc/gimple-fold.c   2019-08-05 09:47:38.821896600 +0100
> +++ gcc/gimple-fold.c   2019-08-05 09:49:29.233091006 +0100
> @@ -4180,6 +4180,63 @@ arith_overflowed_p (enum tree_code code,
>return wi::min_precision (wres, sign) > TYPE_PRECISION (type);
>  }
>
> +/* If IFN_MASK_LOAD/STORE call CALL is unconditional, return a MEM_REF
> +   for the memory it references, otherwise return null.  VECTYPE is the
> +   type of the memory vector.  */
> +
> +static tree
> +gimple_fold_mask_load_store_mem_ref (gcall *call, tree vectype)
> +{
> +  tree ptr = gimple_call_arg (call, 0);
> +  tree alias_align = gimple_call_arg (call, 1);
> +  tree mask = gimple_call_arg (call, 2);
> +  if (!tree_fits_uhwi_p (alias_align) || !integer_all_onesp (mask))
> +return NULL_TREE;
> +
> +  unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align) * BITS_PER_UNIT;
> +  if (TYPE_ALIGN (vectype) != align)
> +vectype = build_aligned_type (vectype, align);
> +  tree offset = build_zero_cst (TREE_TYPE (alias_align));
> +  return fold_build2 (MEM_REF, vectype, ptr, offset);
> +}
> +
> +/* Try to fold IFN_MASK_LOAD call CALL.  Return true on success.  */
> +
> +static bool
> +gimple_fold_mask_load (gimple_stmt_iterator *gsi, gcall *call)
> +{
> +  tree lhs = gimple_call_lhs (call);
> +  if (!lhs)
> +return false;
> +
> +  if (tree rhs = gimple_fold_mask_load_store_mem_ref (call, TREE_TYPE (lhs)))
> +{
> +  gassign *new_stmt = gimple_build_assign (lhs, rhs);
> +  gimple_set_location (new_stmt, gimple_location (call));
> +  gimple_move_vops (new_stmt, call);
> +  gsi_replace (gsi, new_stmt, false);
> +  return true;
> +}
> +  return false;
> +}
> +
> +/* Try to fold IFN_MASK_STORE call CALL.  Return true on success.  */
> +
> +static bool
> +gimple_fold_mask_store (gimple_stmt_iterator *gsi, gcall *call)
> +{
> +  tree rhs = gimple_call_arg (call, 3);
> +  if (tree lhs = gimple_fold_mask_load_store_mem_ref (call, TREE_TYPE (rhs)))
> +{
> +  gassign *new_stmt = gimple_build_assign (lhs, rhs);
> +  gimple_set_location (new_stmt, gimple_location (call));
> +  gimple_move_vops (new_stmt, call);
> +  gsi_replace (gsi, new_stmt, false);
> +  return true;
> +}
> +  return false;
> +}
> +
>  /* Attempt to fold a call statement referenced by the statement iterator GSI.
> The statement may be replaced by another statement, e.g., if the call
> simplifies to a constant value. Return true if any changes were made.
> @@ -4409,6 +4466,12 @@ gimple_fold_call (gimple_stmt_iterator *
>   subcode = MULT_EXPR;
>   cplx_result = true;
>   break;
> +   case IFN_MASK_LOAD:
> + changed |= gimple_fold_mask_load (gsi, stmt);
> + break;
> +   case IFN_MASK_STORE:
> + changed |= gimple_fold_mask_store (gsi, stmt);
> + break;
> default:
>   break;
> }
> Index: gcc/testsuite/gcc.target/aarch64/sve/mask_load_1.c
> ===
> --- /dev/null   2019-07-30 08:53:31.317691683 +0100
> +++ gcc/testsuite/gcc.target/aarch64/sve/mask_load_1.c  2019-08-05 
> 09:49:29.233091006 +0100
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 
> -fdump-tree-optimized" } */
> +
> +void
> +f (int *x)
> +{
> +  for (int i = 0; i < 8; ++i)
> +x[i] += 1;
> +}
> +
> +/* { dg-final { scan-tree-dump { = MEM } "optimized" } } */
> +/* { dg-final { scan-tree-dump { MEM  \[[^]]*\] = } 
> "optimized" } } */


Re: Add a gimple_move_vops helper function

2019-08-05 Thread Richard Biener
On Mon, Aug 5, 2019 at 10:49 AM Richard Sandiford
 wrote:
>
> I needed to add another instance of this idiom, so thought it'd
> be worth having a helper function.
>
> Tested on aarch64-linux-gnu, armeb-eabi and x86_64-linux-gnu.
> OK to install?

OK.

> Richard
>
>
> 2019-08-05  Richard Sandiford  
>
> gcc/
> * gimple.h (gimple_move_vops): Declare.
> * gimple.c (gimple_move_vops): New function
> * gimple-fold.c (replace_call_with_call_and_fold)
> (gimple_fold_builtin_memory_op, gimple_fold_builtin_memset)
> (gimple_fold_builtin_stpcpy, fold_builtin_atomic_compare_exchange)
> (gimple_fold_call): Use it.
> * ipa-param-manipulation.c (ipa_modify_call_arguments): Likewise.
> * tree-call-cdce.c (use_internal_fn): Likewise.
> * tree-if-conv.c (predicate_load_or_store): Likewise.
> * tree-ssa-ccp.c (optimize_atomic_bit_test_and): Likewise.
> * tree-ssa-math-opts.c (pass_cse_reciprocals::execute): Likewise.
> * tree-ssa-propagate.c (finish_update_gimple_call): Likewise.
> (update_call_from_tree): Likewise.
> * tree-vect-stmts.c (vectorizable_load): Likewise.
> * tree-vectorizer.c (adjust_simduid_builtins): Likewise.
>
> Index: gcc/gimple.h
> ===
> --- gcc/gimple.h2019-07-29 09:39:50.038162991 +0100
> +++ gcc/gimple.h2019-08-05 09:47:38.821896600 +0100
> @@ -1532,6 +1532,7 @@ void gimple_assign_set_rhs_with_ops (gim
>  tree gimple_get_lhs (const gimple *);
>  void gimple_set_lhs (gimple *, tree);
>  gimple *gimple_copy (gimple *);
> +void gimple_move_vops (gimple *, gimple *);
>  bool gimple_has_side_effects (const gimple *);
>  bool gimple_could_trap_p_1 (gimple *, bool, bool);
>  bool gimple_could_trap_p (gimple *);
> Index: gcc/gimple.c
> ===
> --- gcc/gimple.c2019-07-29 09:39:50.034163022 +0100
> +++ gcc/gimple.c2019-08-05 09:47:38.821896600 +0100
> @@ -2057,6 +2057,18 @@ gimple_copy (gimple *stmt)
>return copy;
>  }
>
> +/* Move OLD_STMT's vuse and vdef operands to NEW_STMT, on the assumption
> +   that OLD_STMT is about to be removed.  */
> +
> +void
> +gimple_move_vops (gimple *new_stmt, gimple *old_stmt)
> +{
> +  tree vdef = gimple_vdef (old_stmt);
> +  gimple_set_vuse (new_stmt, gimple_vuse (old_stmt));
> +  gimple_set_vdef (new_stmt, vdef);
> +  if (vdef && TREE_CODE (vdef) == SSA_NAME)
> +SSA_NAME_DEF_STMT (vdef) = new_stmt;
> +}
>
>  /* Return true if statement S has side-effects.  We consider a
> statement to have side effects if:
> Index: gcc/gimple-fold.c
> ===
> --- gcc/gimple-fold.c   2019-07-12 11:33:27.712037291 +0100
> +++ gcc/gimple-fold.c   2019-08-05 09:47:38.821896600 +0100
> @@ -641,14 +641,7 @@ replace_call_with_call_and_fold (gimple_
>gimple *stmt = gsi_stmt (*gsi);
>gimple_call_set_lhs (repl, gimple_call_lhs (stmt));
>gimple_set_location (repl, gimple_location (stmt));
> -  if (gimple_vdef (stmt)
> -  && TREE_CODE (gimple_vdef (stmt)) == SSA_NAME)
> -{
> -  gimple_set_vdef (repl, gimple_vdef (stmt));
> -  SSA_NAME_DEF_STMT (gimple_vdef (repl)) = repl;
> -}
> -  if (gimple_vuse (stmt))
> -gimple_set_vuse (repl, gimple_vuse (stmt));
> +  gimple_move_vops (repl, stmt);
>gsi_replace (gsi, repl, false);
>fold_stmt (gsi);
>  }
> @@ -832,11 +825,7 @@ gimple_fold_builtin_memory_op (gimple_st
> = gimple_build_assign (fold_build2 (MEM_REF, desttype,
> dest, off0),
>srcmem);
> - gimple_set_vuse (new_stmt, gimple_vuse (stmt));
> - gimple_set_vdef (new_stmt, gimple_vdef (stmt));
> - if (gimple_vdef (new_stmt)
> - && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
> -   SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
> + gimple_move_vops (new_stmt, stmt);
>   if (!lhs)
> {
>   gsi_replace (gsi, new_stmt, false);
> @@ -1097,11 +1086,7 @@ gimple_fold_builtin_memory_op (gimple_st
> = gimple_build_assign (fold_build2 (MEM_REF, desttype, dest, off0),
>fold_build2 (MEM_REF, srctype, src, off0));
>  set_vop_and_replace:
> -  gimple_set_vuse (new_stmt, gimple_vuse (stmt));
> -  gimple_set_vdef (new_stmt, gimple_vdef (stmt));
> -  if (gimple_vdef (new_stmt)
> - && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
> -   SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
> +  gimple_move_vops (new_stmt, stmt);
>if (!lhs)
> {
>   gsi_replace (gsi, new_stmt, false);
> @@ -1273,13 +1258,7 @@ gimple_fold_b

Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Uros Bizjak
On Mon, Aug 5, 2019 at 1:50 PM Richard Biener  wrote:
>
> On Sun, 4 Aug 2019, Uros Bizjak wrote:
>
> > On Sat, Aug 3, 2019 at 7:26 PM Richard Biener  wrote:
> > >
> > > On Thu, 1 Aug 2019, Uros Bizjak wrote:
> > >
> > > > On Thu, Aug 1, 2019 at 11:28 AM Richard Biener  
> > > > wrote:
> > > >
> > >  So you unconditionally add a smaxdi3 pattern - indeed this looks
> > >  necessary even when going the STV route.  The actual regression
> > >  for the testcase could also be solved by turing the smaxsi3
> > >  back into a compare and jump rather than a conditional move sequence.
> > >  So I wonder how you'd do that given that there's pass_if_after_reload
> > >  after pass_split_after_reload and I'm not sure we can split
> > >  as late as pass_split_before_sched2 (there's also a split _after_
> > >  sched2 on x86 it seems).
> > > 
> > >  So how would you go implement {s,u}{min,max}{si,di}3 for the
> > >  case STV doesn't end up doing any transform?
> > > >>>
> > > >>> If STV doesn't transform the insn, then a pre-reload splitter splits
> > > >>> the insn back to compare+cmove.
> > > >>
> > > >> OK, that would work.  But there's no way to force a jumpy sequence then
> > > >> which we know is faster than compare+cmove because later RTL
> > > >> if-conversion passes happily re-discover the smax (or conditional move)
> > > >> sequence.
> > > >>
> > > >>> However, considering the SImode move
> > > >>> from/to int/xmm register is relatively cheap, the cost function should
> > > >>> be tuned so that STV always converts smaxsi3 pattern.
> > > >>
> > > >> Note that on both Zen and even more so bdverN the int/xmm transition
> > > >> makes it no longer profitable but a _lot_ slower than the cmp/cmov
> > > >> sequence... (for the loop in hmmer which is the only one I see
> > > >> any effect of any of my patches).  So identifying chains that
> > > >> start/end in memory is important for cost reasons.
> > > >
> > > > Please note that the cost function also considers the cost of move
> > > > from/to xmm. So, the cost of the whole chain would disable the
> > > > transformation.
> > > >
> > > >> So I think the splitting has to happen after the last if-conversion
> > > >> pass (and thus we may need to allocate a scratch register for this
> > > >> purpose?)
> > > >
> > > > I really hope that the underlying issue will be solved by a machine
> > > > dependant pass inserted somewhere after the pre-reload split. This
> > > > way, we can split unconverted smax to the cmove, and this later pass
> > > > would handle jcc and cmove instructions. Until then... yes your
> > > > proposed approach is one of the ways to avoid unwanted if-conversion,
> > > > although sometimes we would like to split to cmove instead.
> > >
> > > So the following makes STV also consider SImode chains, re-using the
> > > DImode chain code.  I've kept a simple incomplete smaxsi3 pattern
> > > and also did not alter the {SI,DI}mode chain cost function - it's
> > > quite off for TARGET_64BIT.  With this I get the expected conversion
> > > for the testcase derived from hmmer.
> > >
> > > No further testing sofar.
> > >
> > > Is it OK to re-use the DImode chain code this way?  I'll clean things
> > > up some more of course.
> >
> > Yes, the approach looks OK to me. It makes chain building mode
> > agnostic, and the chain building can be used for
> > a) DImode x86_32 (as is now), but maybe 64bit minmax operation can be added.
> > b) SImode x86_32 and x86_64 (this will be mainly used for SImode
> > minmax and surrounding SImode operations)
> > c) DImode x86_64 (also, mainly used for DImode minmax and surrounding
> > DImode operations)
> >
> > > Still need help with the actual patterns for minmax and how the splitters
> > > should look like.
> >
> > Please look at the attached patch. Maybe we can add memory_operand as
> > operand 1 and operand 2 predicate, but let's keep things simple for
> > now.
>
> Thanks.  The attached patch makes the patch cleaner and it survives
> "some" barebone testing.  It also touches the cost function to
> avoid being too overly trigger-happy.  I've also ended up using
> ix86_cost->sse_op instead of COSTS_N_INSN-based magic.  In
> particular we estimated GPR reg-reg move as COST_N_INSNS(2) while
> move costs shouldn't be wrapped in COST_N_INSNS.
> IMHO we should probably disregard any reg-reg moves for costing pre-RA.
> At least with the current code every reg-reg move biases in favor of
> SSE...
>
> And we're simply adding move and non-move costs in 'gain', somewhat
> mixing apples and oranges?  We could separate those and require
> both to be a net positive win?
>
> Still using -mtune=bdverN exposes that some cost tables have xmm and gpr
> costs as apples and oranges... (so it never triggers for Bulldozer)
>
> I now run into
>
> /space/rguenther/src/svn/trunk-bisect/libgcc/libgcov-driver.c:509:1:
> error: unrecognizable insn:
> (insn 116 115 1511 8 (set (subreg:V2DI (reg/v:DI 87 [ run_max ]) 0)
>   

Re: [PATCH] Handle new operators with no arguments in DCE.

2019-08-05 Thread Marc Glisse

On Mon, 5 Aug 2019, Martin Liška wrote:


On 8/5/19 9:07 AM, Marc Glisse wrote:

On Mon, 5 Aug 2019, Martin Liška wrote:


I'm sending fix for the ICE. The issue is that we can end up
with a ctor without an argument (when not being used).


Ah, I didn't realize that after cloning and drastically changing the signature 
it would still count as operator new/delete. Is getting down to 0 arguments the 
only bad thing that can happen? Can't we have an operator delete (void*, void*) 
where the first argument gets optimized out and we end up optimizing as if the 
second argument was actually the memory being released? Should we do some 
sanity-checking when propagating the new/delete flags to clones?



It can theoretically happen, but it should be properly handled in the following
code:

  810if (is_delete_operator
  811|| gimple_call_builtin_p (stmt, BUILT_IN_FREE))
  812  {
  813/* It can happen that a user delete operator has the 
pointer
  814   argument optimized out already.  */
  815if (gimple_call_num_args (stmt) == 0)
  816  continue;
  817
  818tree ptr = gimple_call_arg (stmt, 0);
  819gimple *def_stmt;
  820tree def_callee;
  821/* If the pointer we free is defined by an allocation
  822   function do not add the call to the worklist.  */
  823if (TREE_CODE (ptr) == SSA_NAME
  824&& is_gimple_call (def_stmt = SSA_NAME_DEF_STMT (ptr))
  825&& (def_callee = gimple_call_fndecl (def_stmt))
  826&& ((DECL_BUILT_IN_CLASS (def_callee) == 
BUILT_IN_NORMAL
  827 && (DECL_FUNCTION_CODE (def_callee) == 
BUILT_IN_ALIGNED_ALLOC
  828 || DECL_FUNCTION_CODE (def_callee) == 
BUILT_IN_MALLOC
  829 || DECL_FUNCTION_CODE (def_callee) == 
BUILT_IN_CALLOC))
  830|| DECL_IS_REPLACEABLE_OPERATOR_NEW_P 
(def_callee)))
  831  {
  832/* Delete operators can have alignment and (or) size 
as next
  833   arguments.  When being a SSA_NAME, they must be 
marked
  834   as necessary.  */
  835if (is_delete_operator && gimple_call_num_args (stmt) 
>= 2)
  836  for (unsigned i = 1; i < gimple_call_num_args 
(stmt); i++)
  837{
  838  tree arg = gimple_call_arg (stmt, i);
  839  if (TREE_CODE (arg) == SSA_NAME)
  840mark_operand_necessary (arg);
  841}

Where we verify that first argument of delete call is defined as a LHS of a new 
operator.


What I am saying is that it may be the wrong operator new.

Imagine something like:

struct A {
  A();
  __attribute__((malloc)) static void* operator new(unsigned long sz, 
void*,bool);
  static void operator delete(void* ptr, void*,bool);
};
int main(){
  int*p=new int;
  new(p,true) A;
}

At the beginning, it has

D.2321 = A::operator new (1, p, 1);

and

A::operator delete (D.2321, p, 1);

Now imagine we have access to the body of the functions, and the last one 
is transformed into


operator delete.clone (p)

(the first argument was removed)
p does come from an operator new, so we do see a matched pair new+delete, 
but it is the wrong pair.


I admit that's rather unlikely, but with clones that have a different 
signature, many assumptions we could make on the functions become unsafe, 
and I am not clear on the scale of this issue.


--
Marc Glisse


Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Richard Biener
On Sun, 4 Aug 2019, Uros Bizjak wrote:

> On Sat, Aug 3, 2019 at 7:26 PM Richard Biener  wrote:
> >
> > On Thu, 1 Aug 2019, Uros Bizjak wrote:
> >
> > > On Thu, Aug 1, 2019 at 11:28 AM Richard Biener  wrote:
> > >
> >  So you unconditionally add a smaxdi3 pattern - indeed this looks
> >  necessary even when going the STV route.  The actual regression
> >  for the testcase could also be solved by turing the smaxsi3
> >  back into a compare and jump rather than a conditional move sequence.
> >  So I wonder how you'd do that given that there's pass_if_after_reload
> >  after pass_split_after_reload and I'm not sure we can split
> >  as late as pass_split_before_sched2 (there's also a split _after_
> >  sched2 on x86 it seems).
> > 
> >  So how would you go implement {s,u}{min,max}{si,di}3 for the
> >  case STV doesn't end up doing any transform?
> > >>>
> > >>> If STV doesn't transform the insn, then a pre-reload splitter splits
> > >>> the insn back to compare+cmove.
> > >>
> > >> OK, that would work.  But there's no way to force a jumpy sequence then
> > >> which we know is faster than compare+cmove because later RTL
> > >> if-conversion passes happily re-discover the smax (or conditional move)
> > >> sequence.
> > >>
> > >>> However, considering the SImode move
> > >>> from/to int/xmm register is relatively cheap, the cost function should
> > >>> be tuned so that STV always converts smaxsi3 pattern.
> > >>
> > >> Note that on both Zen and even more so bdverN the int/xmm transition
> > >> makes it no longer profitable but a _lot_ slower than the cmp/cmov
> > >> sequence... (for the loop in hmmer which is the only one I see
> > >> any effect of any of my patches).  So identifying chains that
> > >> start/end in memory is important for cost reasons.
> > >
> > > Please note that the cost function also considers the cost of move
> > > from/to xmm. So, the cost of the whole chain would disable the
> > > transformation.
> > >
> > >> So I think the splitting has to happen after the last if-conversion
> > >> pass (and thus we may need to allocate a scratch register for this
> > >> purpose?)
> > >
> > > I really hope that the underlying issue will be solved by a machine
> > > dependant pass inserted somewhere after the pre-reload split. This
> > > way, we can split unconverted smax to the cmove, and this later pass
> > > would handle jcc and cmove instructions. Until then... yes your
> > > proposed approach is one of the ways to avoid unwanted if-conversion,
> > > although sometimes we would like to split to cmove instead.
> >
> > So the following makes STV also consider SImode chains, re-using the
> > DImode chain code.  I've kept a simple incomplete smaxsi3 pattern
> > and also did not alter the {SI,DI}mode chain cost function - it's
> > quite off for TARGET_64BIT.  With this I get the expected conversion
> > for the testcase derived from hmmer.
> >
> > No further testing sofar.
> >
> > Is it OK to re-use the DImode chain code this way?  I'll clean things
> > up some more of course.
> 
> Yes, the approach looks OK to me. It makes chain building mode
> agnostic, and the chain building can be used for
> a) DImode x86_32 (as is now), but maybe 64bit minmax operation can be added.
> b) SImode x86_32 and x86_64 (this will be mainly used for SImode
> minmax and surrounding SImode operations)
> c) DImode x86_64 (also, mainly used for DImode minmax and surrounding
> DImode operations)
> 
> > Still need help with the actual patterns for minmax and how the splitters
> > should look like.
> 
> Please look at the attached patch. Maybe we can add memory_operand as
> operand 1 and operand 2 predicate, but let's keep things simple for
> now.

Thanks.  The attached patch makes the patch cleaner and it survives
"some" barebone testing.  It also touches the cost function to
avoid being too overly trigger-happy.  I've also ended up using
ix86_cost->sse_op instead of COSTS_N_INSN-based magic.  In
particular we estimated GPR reg-reg move as COST_N_INSNS(2) while
move costs shouldn't be wrapped in COST_N_INSNS.
IMHO we should probably disregard any reg-reg moves for costing pre-RA.
At least with the current code every reg-reg move biases in favor of 
SSE...

And we're simply adding move and non-move costs in 'gain', somewhat
mixing apples and oranges?  We could separate those and require
both to be a net positive win?

Still using -mtune=bdverN exposes that some cost tables have xmm and gpr
costs as apples and oranges... (so it never triggers for Bulldozer)

I now run into

/space/rguenther/src/svn/trunk-bisect/libgcc/libgcov-driver.c:509:1: 
error: unrecognizable insn:
(insn 116 115 1511 8 (set (subreg:V2DI (reg/v:DI 87 [ run_max ]) 0)
(smax:V2DI (subreg:V2DI (reg/v:DI 87 [ run_max ]) 0)
(subreg:V2DI (reg:DI 349 [ MEM[base: _261, offset: 0B] ]) 0))) 
-1
 (expr_list:REG_DEAD (reg:DI 349 [ MEM[base: _261, offset: 0B] ])
(expr_list:REG_UNUSED (reg:CC 17 flags)
  

[PATCH 2/2][MIPS][RFC] Emit .note.GNU-stack for hard-float linux targets.

2019-08-05 Thread Dragan Mladjenovic
From: "Dragan Mladjenovic" 

libgcc/ChangeLog:

2019-08-05  Dragan Mladjenovic  

* config/mips/gnustack.h: Check for TARGET_LIBC_GNUSTACK also.

gcc/ChangeLog:

2019-08-05  Dragan Mladjenovic  

* config.in: Regenerated.
* config/mips/linux.h (NEED_INDICATE_EXEC_STACK): Define to 1
for TARGET_LIBC_GNUSTACK.
* configure: Regenerated.
* configure.ac: Define TARGET_LIBC_GNUSTACK if glibc version is
found 2.31 or greater.
---
 gcc/config/mips/linux.h   |  4 
 gcc/configure.ac  | 12 
 libgcc/config/mips/gnustack.h |  2 +-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/gcc/config/mips/linux.h b/gcc/config/mips/linux.h
index 1fa72ef..30b22e7 100644
--- a/gcc/config/mips/linux.h
+++ b/gcc/config/mips/linux.h
@@ -53,4 +53,8 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef NEED_INDICATE_EXEC_STACK
 
+#ifdef TARGET_LIBC_GNUSTACK
+#define NEED_INDICATE_EXEC_STACK 1
+#else
 #define NEED_INDICATE_EXEC_STACK TARGET_SOFT_FLOAT
+#endif
diff --git a/gcc/configure.ac b/gcc/configure.ac
index c620dd2..ab080c8 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -6143,6 +6143,18 @@ if test x$gcc_cv_libc_provides_hwcap_in_tcb = xyes; then
[Define if your target C Library provides the AT_HWCAP value in the 
TCB])
 fi
 
+# Check if the target LIBC handles PT_GNU_STACK.
+gcc_cv_libc_gnustack=unknown
+case "$target" in
+  mips*-*-linux*)
+GCC_GLIBC_VERSION_GTE_IFELSE([2], [31], [gcc_cv_libc_gnustack=yes], )
+;;
+esac
+if test x$gcc_cv_libc_gnustack = xyes; then
+  AC_DEFINE(TARGET_LIBC_GNUSTACK, 1,
+[Define if your target C Library properly handles PT_GNU_STACK])
+fi
+
 AC_MSG_CHECKING(dl_iterate_phdr in target C library)
 gcc_cv_target_dl_iterate_phdr=unknown
 case "$target" in
diff --git a/libgcc/config/mips/gnustack.h b/libgcc/config/mips/gnustack.h
index 6d5f618..a67c4b2 100644
--- a/libgcc/config/mips/gnustack.h
+++ b/libgcc/config/mips/gnustack.h
@@ -1,6 +1,6 @@
 #include "config.h"
 #if defined(__ELF__) && defined(__linux__)
-#if defined (__mips_soft_float)
+#if defined (TARGET_LIBC_GNUSTACK) || defined (__mips_soft_float)
 .section .note.GNU-stack,"",%progbits
 .previous
 #endif
-- 
1.9.1



[PATCH 1/2][MIPS] Emit .note.GNU-stack for soft-float linux targets.

2019-08-05 Thread Dragan Mladjenovic
From: "Dragan Mladjenovic" 

gcc/ChangeLog:

2019-08-05  Dragan Mladjenovic  

* config/mips/linux.h (NEED_INDICATE_EXEC_STACK): Define to
TARGET_SOFT_FLOAT.
* config/mips/mips.c (TARGET_ASM_FILE_END): Define to ...
(mips_asm_file_end): New function. Delegate to
file_end_indicate_exec_stack if NEED_INDICATE_EXEC_STACK is true.
* config/mips/mips.h (NEED_INDICATE_EXEC_STACK): Define to 0.

libgcc/ChangeLog:

2019-08-05  Dragan Mladjenovic  

* config/mips/gnustack.h: New file.
* config/mips/crti.S: Include gnustack.h.
* config/mips/crtn.S: Likewise.
* config/mips/mips16.S: Likewise.
* config/mips/vr4120-div.S: Likewise.
---
 gcc/config/mips/linux.h |  4 
 gcc/config/mips/mips.c  | 11 +++
 gcc/config/mips/mips.h  |  2 ++
 libgcc/config/mips/crti.S   |  3 +++
 libgcc/config/mips/crtn.S   |  3 +++
 libgcc/config/mips/gnustack.h   |  7 +++
 libgcc/config/mips/mips16.S |  3 +++
 libgcc/config/mips/vr4120-div.S |  3 +++
 8 files changed, 36 insertions(+)
 create mode 100644 libgcc/config/mips/gnustack.h

diff --git a/gcc/config/mips/linux.h b/gcc/config/mips/linux.h
index 6f79ac9..1fa72ef 100644
--- a/gcc/config/mips/linux.h
+++ b/gcc/config/mips/linux.h
@@ -50,3 +50,7 @@ along with GCC; see the file COPYING3.  If not see
 #define GNU_USER_DYNAMIC_LINKERN32 \
   CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \
  BIONIC_DYNAMIC_LINKERN32, MUSL_DYNAMIC_LINKERN32)
+
+#undef NEED_INDICATE_EXEC_STACK
+
+#define NEED_INDICATE_EXEC_STACK TARGET_SOFT_FLOAT
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index e0535b1..66ef23a 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -22522,6 +22522,13 @@ mips_starting_frame_offset (void)
 return 0;
   return crtl->outgoing_args_size + MIPS_GP_SAVE_AREA_SIZE;
 }
+
+static void
+mips_asm_file_end (void)
+{
+  if (NEED_INDICATE_EXEC_STACK)
+file_end_indicate_exec_stack ();
+}
 
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
@@ -22829,6 +22836,10 @@ mips_starting_frame_offset (void)
 #undef TARGET_STARTING_FRAME_OFFSET
 #define TARGET_STARTING_FRAME_OFFSET mips_starting_frame_offset
 
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END mips_asm_file_end
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-mips.h"
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index a5be7fa3..22a1b3c 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -3461,3 +3461,5 @@ struct GTY(())  machine_function {
   (TARGET_LOAD_STORE_PAIRS \
&& (TUNE_P5600 || TUNE_I6400 || TUNE_P6600) \
&& !TARGET_MICROMIPS && !TARGET_FIX_24K)
+
+#define NEED_INDICATE_EXEC_STACK 0
diff --git a/libgcc/config/mips/crti.S b/libgcc/config/mips/crti.S
index 44409c7..8733415 100644
--- a/libgcc/config/mips/crti.S
+++ b/libgcc/config/mips/crti.S
@@ -21,6 +21,9 @@ a copy of the GCC Runtime Library Exception along with this 
program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 .  */
 
+/* An executable stack is *not* required for these functions.  */
+#include "gnustack.h"
+
 /* 4 slots for argument spill area.  1 for cpreturn, 1 for stack.
Return spill offset of 40 and 20.  Aligned to 16 bytes for n32.  */
 
diff --git a/libgcc/config/mips/crtn.S b/libgcc/config/mips/crtn.S
index b56d77c..f897906 100644
--- a/libgcc/config/mips/crtn.S
+++ b/libgcc/config/mips/crtn.S
@@ -21,6 +21,9 @@ a copy of the GCC Runtime Library Exception along with this 
program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 .  */
 
+/* An executable stack is *not* required for these functions.  */
+#include "gnustack.h"
+
 /* 4 slots for argument spill area.  1 for cpreturn, 1 for stack.
Return spill offset of 40 and 20.  Aligned to 16 bytes for n32.  */
 
diff --git a/libgcc/config/mips/gnustack.h b/libgcc/config/mips/gnustack.h
new file mode 100644
index 000..6d5f618
--- /dev/null
+++ b/libgcc/config/mips/gnustack.h
@@ -0,0 +1,7 @@
+#include "config.h"
+#if defined(__ELF__) && defined(__linux__)
+#if defined (__mips_soft_float)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+#endif
diff --git a/libgcc/config/mips/mips16.S b/libgcc/config/mips/mips16.S
index 8cfd9e4..1f4df43 100644
--- a/libgcc/config/mips/mips16.S
+++ b/libgcc/config/mips/mips16.S
@@ -21,6 +21,9 @@ a copy of the GCC Runtime Library Exception along with this 
program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 .  */
 
+/* An executable stack is *not* required for these functions.  */
+#include "gnustack.h"
+
 #include "auto-host.h"
 
 #if defined(__mips_micromips) || defined(__mips_soft_float) \
diff --git a/libgcc/config/mips/vr4120-div.S b/libgcc/config/mips/vr4120-

Re: [PATCH v2] RISC-V: Promote type correctly for libcalls

2019-08-05 Thread Kito Cheng
Committed to gcc-9 as r274108 and gcc-8 as r274113

On Mon, Aug 5, 2019 at 3:29 PM Richard Biener  wrote:
>
> On Mon, 5 Aug 2019, Kito Cheng wrote:
>
> > Committed as r274107
> >
> > Hi Jakub, Richard:
> >
> > This patch is fix ABI bug for libcall on RISC-V, we've also tested on
> > gcc 8 and 9, it's ok for gcc 9 and 8?
>
> Works for me.
>
> Thanks,
> Richard.
>
> > Thanks.
> >
> > On Fri, Aug 2, 2019 at 11:52 PM Jim Wilson  wrote:
> > >
> > > On Fri, Aug 2, 2019 at 12:11 AM Kito Cheng  wrote:
> > > > gcc/ChangeLog
> > > > * config/riscv/riscv.c (riscv_promote_function_mode): New.
> > > > (TARGET_PROMOTE_FUNCTION_MODE): Use riscv_promote_function_mode.
> > > > gcc/testsuite/ChangeLog
> > > > * gcc.target/riscv/promote-type-for-libcall.c: New.
> > >
> > > Thanks.  This looks good, and fails with gcc mainline without the patch.
> > >
> > > Jim
> >
>
> --
> Richard Biener 
> SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
> GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)


[PATCH 0/2] [MIPS] Emit .note.GNU-stack for linux targets.

2019-08-05 Thread Dragan Mladjenovic
From: "Dragan Mladjenovic" 

Greetings,

These patches enable emitting .note.GNU-stack by default on mips linux targets.
First one enables it unconditionally for soft-float builds while the second one
enables it for hard-float build if gcc is configured against the future version
of glibc that should enable safe usage of PT_GNU_STACK [1].


[1] https://sourceware.org/ml/libc-alpha/2019-08/msg00065.html

Best regards,

Dragan

Dragan Mladjenovic (2):
  Emit .note.GNU-stack for soft-float linux targets.
  Emit .note.GNU-stack for hard-float linux targets.

 gcc/config.in   |  6 ++
 gcc/config/mips/linux.h |  8 +++
 gcc/config/mips/mips.c  | 11 ++
 gcc/config/mips/mips.h  |  2 ++
 gcc/configure   | 47 +
 gcc/configure.ac| 12 +++
 libgcc/config/mips/crti.S   |  3 +++
 libgcc/config/mips/crtn.S   |  3 +++
 libgcc/config/mips/gnustack.h   |  7 ++
 libgcc/config/mips/mips16.S |  3 +++
 libgcc/config/mips/vr4120-div.S |  3 +++
 11 files changed, 96 insertions(+), 9 deletions(-)
 create mode 100644 libgcc/config/mips/gnustack.h

-- 
1.9.1



Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Richard Sandiford
Uros Bizjak  writes:
> On Mon, Aug 5, 2019 at 12:12 PM Richard Sandiford
>  wrote:
>>
>> Uros Bizjak  writes:
>> > On Mon, Aug 5, 2019 at 11:13 AM Richard Sandiford
>> >  wrote:
>> >>
>> >> Uros Bizjak  writes:
>> >> > On Sat, Aug 3, 2019 at 7:26 PM Richard Biener  wrote:
>> >> >>
>> >> >> On Thu, 1 Aug 2019, Uros Bizjak wrote:
>> >> >>
>> >> >> > On Thu, Aug 1, 2019 at 11:28 AM Richard Biener  
>> >> >> > wrote:
>> >> >> >
>> >> >>  So you unconditionally add a smaxdi3 pattern - indeed this looks
>> >> >>  necessary even when going the STV route.  The actual regression
>> >> >>  for the testcase could also be solved by turing the smaxsi3
>> >> >>  back into a compare and jump rather than a conditional move 
>> >> >>  sequence.
>> >> >>  So I wonder how you'd do that given that there's 
>> >> >>  pass_if_after_reload
>> >> >>  after pass_split_after_reload and I'm not sure we can split
>> >> >>  as late as pass_split_before_sched2 (there's also a split _after_
>> >> >>  sched2 on x86 it seems).
>> >> >> 
>> >> >>  So how would you go implement {s,u}{min,max}{si,di}3 for the
>> >> >>  case STV doesn't end up doing any transform?
>> >> >> >>>
>> >> >> >>> If STV doesn't transform the insn, then a pre-reload splitter 
>> >> >> >>> splits
>> >> >> >>> the insn back to compare+cmove.
>> >> >> >>
>> >> >> >> OK, that would work.  But there's no way to force a jumpy sequence 
>> >> >> >> then
>> >> >> >> which we know is faster than compare+cmove because later RTL
>> >> >> >> if-conversion passes happily re-discover the smax (or conditional 
>> >> >> >> move)
>> >> >> >> sequence.
>> >> >> >>
>> >> >> >>> However, considering the SImode move
>> >> >> >>> from/to int/xmm register is relatively cheap, the cost function 
>> >> >> >>> should
>> >> >> >>> be tuned so that STV always converts smaxsi3 pattern.
>> >> >> >>
>> >> >> >> Note that on both Zen and even more so bdverN the int/xmm transition
>> >> >> >> makes it no longer profitable but a _lot_ slower than the cmp/cmov
>> >> >> >> sequence... (for the loop in hmmer which is the only one I see
>> >> >> >> any effect of any of my patches).  So identifying chains that
>> >> >> >> start/end in memory is important for cost reasons.
>> >> >> >
>> >> >> > Please note that the cost function also considers the cost of move
>> >> >> > from/to xmm. So, the cost of the whole chain would disable the
>> >> >> > transformation.
>> >> >> >
>> >> >> >> So I think the splitting has to happen after the last if-conversion
>> >> >> >> pass (and thus we may need to allocate a scratch register for this
>> >> >> >> purpose?)
>> >> >> >
>> >> >> > I really hope that the underlying issue will be solved by a machine
>> >> >> > dependant pass inserted somewhere after the pre-reload split. This
>> >> >> > way, we can split unconverted smax to the cmove, and this later pass
>> >> >> > would handle jcc and cmove instructions. Until then... yes your
>> >> >> > proposed approach is one of the ways to avoid unwanted if-conversion,
>> >> >> > although sometimes we would like to split to cmove instead.
>> >> >>
>> >> >> So the following makes STV also consider SImode chains, re-using the
>> >> >> DImode chain code.  I've kept a simple incomplete smaxsi3 pattern
>> >> >> and also did not alter the {SI,DI}mode chain cost function - it's
>> >> >> quite off for TARGET_64BIT.  With this I get the expected conversion
>> >> >> for the testcase derived from hmmer.
>> >> >>
>> >> >> No further testing sofar.
>> >> >>
>> >> >> Is it OK to re-use the DImode chain code this way?  I'll clean things
>> >> >> up some more of course.
>> >> >
>> >> > Yes, the approach looks OK to me. It makes chain building mode
>> >> > agnostic, and the chain building can be used for
>> >> > a) DImode x86_32 (as is now), but maybe 64bit minmax operation can be 
>> >> > added.
>> >> > b) SImode x86_32 and x86_64 (this will be mainly used for SImode
>> >> > minmax and surrounding SImode operations)
>> >> > c) DImode x86_64 (also, mainly used for DImode minmax and surrounding
>> >> > DImode operations)
>> >> >
>> >> >> Still need help with the actual patterns for minmax and how the 
>> >> >> splitters
>> >> >> should look like.
>> >> >
>> >> > Please look at the attached patch. Maybe we can add memory_operand as
>> >> > operand 1 and operand 2 predicate, but let's keep things simple for
>> >> > now.
>> >> >
>> >> > Uros.
>> >> >
>> >> > Index: i386.md
>> >> > ===
>> >> > --- i386.md   (revision 274008)
>> >> > +++ i386.md   (working copy)
>> >> > @@ -17721,6 +17721,27 @@
>> >> >  std::swap (operands[4], operands[5]);
>> >> >  })
>> >> >
>> >> > +;; min/max patterns
>> >> > +
>> >> > +(define_code_attr smaxmin_rel [(smax "ge") (smin "le")])
>> >> > +
>> >> > +(define_insn_and_split "3"
>> >> > +  [(set (match_operand:SWI48 0 "register_operand")
>> >> > + (smaxmin:SWI48 (match_operand:SWI48 1 "register_operand"

Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Uros Bizjak
On Mon, Aug 5, 2019 at 12:12 PM Richard Sandiford
 wrote:
>
> Uros Bizjak  writes:
> > On Mon, Aug 5, 2019 at 11:13 AM Richard Sandiford
> >  wrote:
> >>
> >> Uros Bizjak  writes:
> >> > On Sat, Aug 3, 2019 at 7:26 PM Richard Biener  wrote:
> >> >>
> >> >> On Thu, 1 Aug 2019, Uros Bizjak wrote:
> >> >>
> >> >> > On Thu, Aug 1, 2019 at 11:28 AM Richard Biener  
> >> >> > wrote:
> >> >> >
> >> >>  So you unconditionally add a smaxdi3 pattern - indeed this looks
> >> >>  necessary even when going the STV route.  The actual regression
> >> >>  for the testcase could also be solved by turing the smaxsi3
> >> >>  back into a compare and jump rather than a conditional move 
> >> >>  sequence.
> >> >>  So I wonder how you'd do that given that there's 
> >> >>  pass_if_after_reload
> >> >>  after pass_split_after_reload and I'm not sure we can split
> >> >>  as late as pass_split_before_sched2 (there's also a split _after_
> >> >>  sched2 on x86 it seems).
> >> >> 
> >> >>  So how would you go implement {s,u}{min,max}{si,di}3 for the
> >> >>  case STV doesn't end up doing any transform?
> >> >> >>>
> >> >> >>> If STV doesn't transform the insn, then a pre-reload splitter splits
> >> >> >>> the insn back to compare+cmove.
> >> >> >>
> >> >> >> OK, that would work.  But there's no way to force a jumpy sequence 
> >> >> >> then
> >> >> >> which we know is faster than compare+cmove because later RTL
> >> >> >> if-conversion passes happily re-discover the smax (or conditional 
> >> >> >> move)
> >> >> >> sequence.
> >> >> >>
> >> >> >>> However, considering the SImode move
> >> >> >>> from/to int/xmm register is relatively cheap, the cost function 
> >> >> >>> should
> >> >> >>> be tuned so that STV always converts smaxsi3 pattern.
> >> >> >>
> >> >> >> Note that on both Zen and even more so bdverN the int/xmm transition
> >> >> >> makes it no longer profitable but a _lot_ slower than the cmp/cmov
> >> >> >> sequence... (for the loop in hmmer which is the only one I see
> >> >> >> any effect of any of my patches).  So identifying chains that
> >> >> >> start/end in memory is important for cost reasons.
> >> >> >
> >> >> > Please note that the cost function also considers the cost of move
> >> >> > from/to xmm. So, the cost of the whole chain would disable the
> >> >> > transformation.
> >> >> >
> >> >> >> So I think the splitting has to happen after the last if-conversion
> >> >> >> pass (and thus we may need to allocate a scratch register for this
> >> >> >> purpose?)
> >> >> >
> >> >> > I really hope that the underlying issue will be solved by a machine
> >> >> > dependant pass inserted somewhere after the pre-reload split. This
> >> >> > way, we can split unconverted smax to the cmove, and this later pass
> >> >> > would handle jcc and cmove instructions. Until then... yes your
> >> >> > proposed approach is one of the ways to avoid unwanted if-conversion,
> >> >> > although sometimes we would like to split to cmove instead.
> >> >>
> >> >> So the following makes STV also consider SImode chains, re-using the
> >> >> DImode chain code.  I've kept a simple incomplete smaxsi3 pattern
> >> >> and also did not alter the {SI,DI}mode chain cost function - it's
> >> >> quite off for TARGET_64BIT.  With this I get the expected conversion
> >> >> for the testcase derived from hmmer.
> >> >>
> >> >> No further testing sofar.
> >> >>
> >> >> Is it OK to re-use the DImode chain code this way?  I'll clean things
> >> >> up some more of course.
> >> >
> >> > Yes, the approach looks OK to me. It makes chain building mode
> >> > agnostic, and the chain building can be used for
> >> > a) DImode x86_32 (as is now), but maybe 64bit minmax operation can be 
> >> > added.
> >> > b) SImode x86_32 and x86_64 (this will be mainly used for SImode
> >> > minmax and surrounding SImode operations)
> >> > c) DImode x86_64 (also, mainly used for DImode minmax and surrounding
> >> > DImode operations)
> >> >
> >> >> Still need help with the actual patterns for minmax and how the 
> >> >> splitters
> >> >> should look like.
> >> >
> >> > Please look at the attached patch. Maybe we can add memory_operand as
> >> > operand 1 and operand 2 predicate, but let's keep things simple for
> >> > now.
> >> >
> >> > Uros.
> >> >
> >> > Index: i386.md
> >> > ===
> >> > --- i386.md   (revision 274008)
> >> > +++ i386.md   (working copy)
> >> > @@ -17721,6 +17721,27 @@
> >> >  std::swap (operands[4], operands[5]);
> >> >  })
> >> >
> >> > +;; min/max patterns
> >> > +
> >> > +(define_code_attr smaxmin_rel [(smax "ge") (smin "le")])
> >> > +
> >> > +(define_insn_and_split "3"
> >> > +  [(set (match_operand:SWI48 0 "register_operand")
> >> > + (smaxmin:SWI48 (match_operand:SWI48 1 "register_operand")
> >> > +(match_operand:SWI48 2 "register_operand")))
> >> > +   (clobber (reg:CC FLAGS_REG))]
> >> > +  "TARGET_STV && TARGET

Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Richard Sandiford
Uros Bizjak  writes:
> On Mon, Aug 5, 2019 at 11:13 AM Richard Sandiford
>  wrote:
>>
>> Uros Bizjak  writes:
>> > On Sat, Aug 3, 2019 at 7:26 PM Richard Biener  wrote:
>> >>
>> >> On Thu, 1 Aug 2019, Uros Bizjak wrote:
>> >>
>> >> > On Thu, Aug 1, 2019 at 11:28 AM Richard Biener  
>> >> > wrote:
>> >> >
>> >>  So you unconditionally add a smaxdi3 pattern - indeed this looks
>> >>  necessary even when going the STV route.  The actual regression
>> >>  for the testcase could also be solved by turing the smaxsi3
>> >>  back into a compare and jump rather than a conditional move sequence.
>> >>  So I wonder how you'd do that given that there's pass_if_after_reload
>> >>  after pass_split_after_reload and I'm not sure we can split
>> >>  as late as pass_split_before_sched2 (there's also a split _after_
>> >>  sched2 on x86 it seems).
>> >> 
>> >>  So how would you go implement {s,u}{min,max}{si,di}3 for the
>> >>  case STV doesn't end up doing any transform?
>> >> >>>
>> >> >>> If STV doesn't transform the insn, then a pre-reload splitter splits
>> >> >>> the insn back to compare+cmove.
>> >> >>
>> >> >> OK, that would work.  But there's no way to force a jumpy sequence then
>> >> >> which we know is faster than compare+cmove because later RTL
>> >> >> if-conversion passes happily re-discover the smax (or conditional move)
>> >> >> sequence.
>> >> >>
>> >> >>> However, considering the SImode move
>> >> >>> from/to int/xmm register is relatively cheap, the cost function should
>> >> >>> be tuned so that STV always converts smaxsi3 pattern.
>> >> >>
>> >> >> Note that on both Zen and even more so bdverN the int/xmm transition
>> >> >> makes it no longer profitable but a _lot_ slower than the cmp/cmov
>> >> >> sequence... (for the loop in hmmer which is the only one I see
>> >> >> any effect of any of my patches).  So identifying chains that
>> >> >> start/end in memory is important for cost reasons.
>> >> >
>> >> > Please note that the cost function also considers the cost of move
>> >> > from/to xmm. So, the cost of the whole chain would disable the
>> >> > transformation.
>> >> >
>> >> >> So I think the splitting has to happen after the last if-conversion
>> >> >> pass (and thus we may need to allocate a scratch register for this
>> >> >> purpose?)
>> >> >
>> >> > I really hope that the underlying issue will be solved by a machine
>> >> > dependant pass inserted somewhere after the pre-reload split. This
>> >> > way, we can split unconverted smax to the cmove, and this later pass
>> >> > would handle jcc and cmove instructions. Until then... yes your
>> >> > proposed approach is one of the ways to avoid unwanted if-conversion,
>> >> > although sometimes we would like to split to cmove instead.
>> >>
>> >> So the following makes STV also consider SImode chains, re-using the
>> >> DImode chain code.  I've kept a simple incomplete smaxsi3 pattern
>> >> and also did not alter the {SI,DI}mode chain cost function - it's
>> >> quite off for TARGET_64BIT.  With this I get the expected conversion
>> >> for the testcase derived from hmmer.
>> >>
>> >> No further testing sofar.
>> >>
>> >> Is it OK to re-use the DImode chain code this way?  I'll clean things
>> >> up some more of course.
>> >
>> > Yes, the approach looks OK to me. It makes chain building mode
>> > agnostic, and the chain building can be used for
>> > a) DImode x86_32 (as is now), but maybe 64bit minmax operation can be 
>> > added.
>> > b) SImode x86_32 and x86_64 (this will be mainly used for SImode
>> > minmax and surrounding SImode operations)
>> > c) DImode x86_64 (also, mainly used for DImode minmax and surrounding
>> > DImode operations)
>> >
>> >> Still need help with the actual patterns for minmax and how the splitters
>> >> should look like.
>> >
>> > Please look at the attached patch. Maybe we can add memory_operand as
>> > operand 1 and operand 2 predicate, but let's keep things simple for
>> > now.
>> >
>> > Uros.
>> >
>> > Index: i386.md
>> > ===
>> > --- i386.md   (revision 274008)
>> > +++ i386.md   (working copy)
>> > @@ -17721,6 +17721,27 @@
>> >  std::swap (operands[4], operands[5]);
>> >  })
>> >
>> > +;; min/max patterns
>> > +
>> > +(define_code_attr smaxmin_rel [(smax "ge") (smin "le")])
>> > +
>> > +(define_insn_and_split "3"
>> > +  [(set (match_operand:SWI48 0 "register_operand")
>> > + (smaxmin:SWI48 (match_operand:SWI48 1 "register_operand")
>> > +(match_operand:SWI48 2 "register_operand")))
>> > +   (clobber (reg:CC FLAGS_REG))]
>> > +  "TARGET_STV && TARGET_SSE4_1
>> > +   && can_create_pseudo_p ()"
>> > +  "#"
>> > +  "&& 1"
>> > +  [(set (reg:CCGC FLAGS_REG)
>> > + (compare:CCGC (match_dup 1)(match_dup 2)))
>> > +   (set (match_dup 0)
>> > + (if_then_else:SWI48
>> > +   ( (reg:CCGC FLAGS_REG)(const_int 0))
>> > +   (match_dup 1)
>> > +   (match_dup 2)))])
>> > +
>>
>> Th

Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Uros Bizjak
On Mon, Aug 5, 2019 at 11:13 AM Richard Sandiford
 wrote:
>
> Uros Bizjak  writes:
> > On Sat, Aug 3, 2019 at 7:26 PM Richard Biener  wrote:
> >>
> >> On Thu, 1 Aug 2019, Uros Bizjak wrote:
> >>
> >> > On Thu, Aug 1, 2019 at 11:28 AM Richard Biener  wrote:
> >> >
> >>  So you unconditionally add a smaxdi3 pattern - indeed this looks
> >>  necessary even when going the STV route.  The actual regression
> >>  for the testcase could also be solved by turing the smaxsi3
> >>  back into a compare and jump rather than a conditional move sequence.
> >>  So I wonder how you'd do that given that there's pass_if_after_reload
> >>  after pass_split_after_reload and I'm not sure we can split
> >>  as late as pass_split_before_sched2 (there's also a split _after_
> >>  sched2 on x86 it seems).
> >> 
> >>  So how would you go implement {s,u}{min,max}{si,di}3 for the
> >>  case STV doesn't end up doing any transform?
> >> >>>
> >> >>> If STV doesn't transform the insn, then a pre-reload splitter splits
> >> >>> the insn back to compare+cmove.
> >> >>
> >> >> OK, that would work.  But there's no way to force a jumpy sequence then
> >> >> which we know is faster than compare+cmove because later RTL
> >> >> if-conversion passes happily re-discover the smax (or conditional move)
> >> >> sequence.
> >> >>
> >> >>> However, considering the SImode move
> >> >>> from/to int/xmm register is relatively cheap, the cost function should
> >> >>> be tuned so that STV always converts smaxsi3 pattern.
> >> >>
> >> >> Note that on both Zen and even more so bdverN the int/xmm transition
> >> >> makes it no longer profitable but a _lot_ slower than the cmp/cmov
> >> >> sequence... (for the loop in hmmer which is the only one I see
> >> >> any effect of any of my patches).  So identifying chains that
> >> >> start/end in memory is important for cost reasons.
> >> >
> >> > Please note that the cost function also considers the cost of move
> >> > from/to xmm. So, the cost of the whole chain would disable the
> >> > transformation.
> >> >
> >> >> So I think the splitting has to happen after the last if-conversion
> >> >> pass (and thus we may need to allocate a scratch register for this
> >> >> purpose?)
> >> >
> >> > I really hope that the underlying issue will be solved by a machine
> >> > dependant pass inserted somewhere after the pre-reload split. This
> >> > way, we can split unconverted smax to the cmove, and this later pass
> >> > would handle jcc and cmove instructions. Until then... yes your
> >> > proposed approach is one of the ways to avoid unwanted if-conversion,
> >> > although sometimes we would like to split to cmove instead.
> >>
> >> So the following makes STV also consider SImode chains, re-using the
> >> DImode chain code.  I've kept a simple incomplete smaxsi3 pattern
> >> and also did not alter the {SI,DI}mode chain cost function - it's
> >> quite off for TARGET_64BIT.  With this I get the expected conversion
> >> for the testcase derived from hmmer.
> >>
> >> No further testing sofar.
> >>
> >> Is it OK to re-use the DImode chain code this way?  I'll clean things
> >> up some more of course.
> >
> > Yes, the approach looks OK to me. It makes chain building mode
> > agnostic, and the chain building can be used for
> > a) DImode x86_32 (as is now), but maybe 64bit minmax operation can be added.
> > b) SImode x86_32 and x86_64 (this will be mainly used for SImode
> > minmax and surrounding SImode operations)
> > c) DImode x86_64 (also, mainly used for DImode minmax and surrounding
> > DImode operations)
> >
> >> Still need help with the actual patterns for minmax and how the splitters
> >> should look like.
> >
> > Please look at the attached patch. Maybe we can add memory_operand as
> > operand 1 and operand 2 predicate, but let's keep things simple for
> > now.
> >
> > Uros.
> >
> > Index: i386.md
> > ===
> > --- i386.md   (revision 274008)
> > +++ i386.md   (working copy)
> > @@ -17721,6 +17721,27 @@
> >  std::swap (operands[4], operands[5]);
> >  })
> >
> > +;; min/max patterns
> > +
> > +(define_code_attr smaxmin_rel [(smax "ge") (smin "le")])
> > +
> > +(define_insn_and_split "3"
> > +  [(set (match_operand:SWI48 0 "register_operand")
> > + (smaxmin:SWI48 (match_operand:SWI48 1 "register_operand")
> > +(match_operand:SWI48 2 "register_operand")))
> > +   (clobber (reg:CC FLAGS_REG))]
> > +  "TARGET_STV && TARGET_SSE4_1
> > +   && can_create_pseudo_p ()"
> > +  "#"
> > +  "&& 1"
> > +  [(set (reg:CCGC FLAGS_REG)
> > + (compare:CCGC (match_dup 1)(match_dup 2)))
> > +   (set (match_dup 0)
> > + (if_then_else:SWI48
> > +   ( (reg:CCGC FLAGS_REG)(const_int 0))
> > +   (match_dup 1)
> > +   (match_dup 2)))])
> > +
>
> The pattern could in theory be matched after the last pre-RA split pass
> has run, so I think the pattern still needs to have constraints and be
> matchab

Re: [PATCH] Handle new operators with no arguments in DCE.

2019-08-05 Thread Martin Liška
On 8/5/19 9:07 AM, Marc Glisse wrote:
> On Mon, 5 Aug 2019, Martin Liška wrote:
> 
>> I'm sending fix for the ICE. The issue is that we can end up
>> with a ctor without an argument (when not being used).
> 
> Ah, I didn't realize that after cloning and drastically changing the 
> signature it would still count as operator new/delete. Is getting down to 0 
> arguments the only bad thing that can happen? Can't we have an operator 
> delete (void*, void*) where the first argument gets optimized out and we end 
> up optimizing as if the second argument was actually the memory being 
> released? Should we do some sanity-checking when propagating the new/delete 
> flags to clones?
> 

It can theoretically happen, but it should be properly handled in the following
code:

   810if (is_delete_operator
   811|| gimple_call_builtin_p (stmt, BUILT_IN_FREE))
   812  {
   813/* It can happen that a user delete operator has the 
pointer
   814   argument optimized out already.  */
   815if (gimple_call_num_args (stmt) == 0)
   816  continue;
   817  
   818tree ptr = gimple_call_arg (stmt, 0);
   819gimple *def_stmt;
   820tree def_callee;
   821/* If the pointer we free is defined by an allocation
   822   function do not add the call to the worklist.  */
   823if (TREE_CODE (ptr) == SSA_NAME
   824&& is_gimple_call (def_stmt = SSA_NAME_DEF_STMT (ptr))
   825&& (def_callee = gimple_call_fndecl (def_stmt))
   826&& ((DECL_BUILT_IN_CLASS (def_callee) == 
BUILT_IN_NORMAL
   827 && (DECL_FUNCTION_CODE (def_callee) == 
BUILT_IN_ALIGNED_ALLOC
   828 || DECL_FUNCTION_CODE (def_callee) == 
BUILT_IN_MALLOC
   829 || DECL_FUNCTION_CODE (def_callee) == 
BUILT_IN_CALLOC))
   830|| DECL_IS_REPLACEABLE_OPERATOR_NEW_P 
(def_callee)))
   831  {
   832/* Delete operators can have alignment and (or) size 
as next
   833   arguments.  When being a SSA_NAME, they must be 
marked
   834   as necessary.  */
   835if (is_delete_operator && gimple_call_num_args (stmt) 
>= 2)
   836  for (unsigned i = 1; i < gimple_call_num_args 
(stmt); i++)
   837{
   838  tree arg = gimple_call_arg (stmt, i);
   839  if (TREE_CODE (arg) == SSA_NAME)
   840mark_operand_necessary (arg);
   841}

Where we verify that first argument of delete call is defined as a LHS of a new 
operator.

Martin


Re: [PATCH][RFC][x86] Fix PR91154, add SImode smax, allow SImode add in SSE regs

2019-08-05 Thread Richard Sandiford
Uros Bizjak  writes:
> On Sat, Aug 3, 2019 at 7:26 PM Richard Biener  wrote:
>>
>> On Thu, 1 Aug 2019, Uros Bizjak wrote:
>>
>> > On Thu, Aug 1, 2019 at 11:28 AM Richard Biener  wrote:
>> >
>>  So you unconditionally add a smaxdi3 pattern - indeed this looks
>>  necessary even when going the STV route.  The actual regression
>>  for the testcase could also be solved by turing the smaxsi3
>>  back into a compare and jump rather than a conditional move sequence.
>>  So I wonder how you'd do that given that there's pass_if_after_reload
>>  after pass_split_after_reload and I'm not sure we can split
>>  as late as pass_split_before_sched2 (there's also a split _after_
>>  sched2 on x86 it seems).
>> 
>>  So how would you go implement {s,u}{min,max}{si,di}3 for the
>>  case STV doesn't end up doing any transform?
>> >>>
>> >>> If STV doesn't transform the insn, then a pre-reload splitter splits
>> >>> the insn back to compare+cmove.
>> >>
>> >> OK, that would work.  But there's no way to force a jumpy sequence then
>> >> which we know is faster than compare+cmove because later RTL
>> >> if-conversion passes happily re-discover the smax (or conditional move)
>> >> sequence.
>> >>
>> >>> However, considering the SImode move
>> >>> from/to int/xmm register is relatively cheap, the cost function should
>> >>> be tuned so that STV always converts smaxsi3 pattern.
>> >>
>> >> Note that on both Zen and even more so bdverN the int/xmm transition
>> >> makes it no longer profitable but a _lot_ slower than the cmp/cmov
>> >> sequence... (for the loop in hmmer which is the only one I see
>> >> any effect of any of my patches).  So identifying chains that
>> >> start/end in memory is important for cost reasons.
>> >
>> > Please note that the cost function also considers the cost of move
>> > from/to xmm. So, the cost of the whole chain would disable the
>> > transformation.
>> >
>> >> So I think the splitting has to happen after the last if-conversion
>> >> pass (and thus we may need to allocate a scratch register for this
>> >> purpose?)
>> >
>> > I really hope that the underlying issue will be solved by a machine
>> > dependant pass inserted somewhere after the pre-reload split. This
>> > way, we can split unconverted smax to the cmove, and this later pass
>> > would handle jcc and cmove instructions. Until then... yes your
>> > proposed approach is one of the ways to avoid unwanted if-conversion,
>> > although sometimes we would like to split to cmove instead.
>>
>> So the following makes STV also consider SImode chains, re-using the
>> DImode chain code.  I've kept a simple incomplete smaxsi3 pattern
>> and also did not alter the {SI,DI}mode chain cost function - it's
>> quite off for TARGET_64BIT.  With this I get the expected conversion
>> for the testcase derived from hmmer.
>>
>> No further testing sofar.
>>
>> Is it OK to re-use the DImode chain code this way?  I'll clean things
>> up some more of course.
>
> Yes, the approach looks OK to me. It makes chain building mode
> agnostic, and the chain building can be used for
> a) DImode x86_32 (as is now), but maybe 64bit minmax operation can be added.
> b) SImode x86_32 and x86_64 (this will be mainly used for SImode
> minmax and surrounding SImode operations)
> c) DImode x86_64 (also, mainly used for DImode minmax and surrounding
> DImode operations)
>
>> Still need help with the actual patterns for minmax and how the splitters
>> should look like.
>
> Please look at the attached patch. Maybe we can add memory_operand as
> operand 1 and operand 2 predicate, but let's keep things simple for
> now.
>
> Uros.
>
> Index: i386.md
> ===
> --- i386.md   (revision 274008)
> +++ i386.md   (working copy)
> @@ -17721,6 +17721,27 @@
>  std::swap (operands[4], operands[5]);
>  })
>  
> +;; min/max patterns
> +
> +(define_code_attr smaxmin_rel [(smax "ge") (smin "le")])
> +
> +(define_insn_and_split "3"
> +  [(set (match_operand:SWI48 0 "register_operand")
> + (smaxmin:SWI48 (match_operand:SWI48 1 "register_operand")
> +(match_operand:SWI48 2 "register_operand")))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "TARGET_STV && TARGET_SSE4_1
> +   && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(set (reg:CCGC FLAGS_REG)
> + (compare:CCGC (match_dup 1)(match_dup 2)))
> +   (set (match_dup 0)
> + (if_then_else:SWI48
> +   ( (reg:CCGC FLAGS_REG)(const_int 0))
> +   (match_dup 1)
> +   (match_dup 2)))])
> +

The pattern could in theory be matched after the last pre-RA split pass
has run, so I think the pattern still needs to have constraints and be
matchable even without can_create_pseudo_p.  It looks like the split
above should work post-RA.

A bit pedantic, because the pattern's probably fine in practice...

Thanks,
Richard

>  ;; Conditional addition patterns
>  (define_expand "addcc"
>[(match_operand:SWI 0 "register_operand")

[C] Fix bogus nested enum error message

2019-08-05 Thread Richard Sandiford
For:

enum a { A };
enum a { B };

we emit a bogus error about nested definitions before the real error:

foo.c:2:6: error: nested redefinition of ‘enum a’
2 | enum a { B };
  |  ^
foo.c:2:6: error: redeclaration of ‘enum a’
foo.c:1:6: note: originally defined here
1 | enum a { A };
  |  ^

This is because we weren't clearing C_TYPE_BEING_DEFINED once the
definition was over.

I think it's OK to clear C_TYPE_BEING_DEFINED even for a definition
that actually is nested (and so whose outer definition is still open),
since we'll already have given an error by then.  It means that second
and subsequent attempts to define a nested enum will usually get the
redeclaration error instead of the nested error, but that seems just
as accurate (nested_first and nested_second in the test).  The only
exception is if the first nested enum was also invalid by being empty,
but then the enum as a whole has already produced two errors
(nested_empty in the test).

Tested on aarch64-linux-gnu, armeb-eabi and x86_64-linux-gnu.
OK to install?

Richard


2019-08-05  Richard Sandiford  

gcc/c/
* c-decl.c (finish_enum): Clear C_TYPE_BEING_DEFINED.

gcc/testsuite/
* gcc.dg/pr79983.c (enum E): Don't allow an error about nested
definitions.
* gcc.dg/enum-redef-1.c: New test.

Index: gcc/c/c-decl.c
===
--- gcc/c/c-decl.c  2019-07-29 09:39:47.630182350 +0100
+++ gcc/c/c-decl.c  2019-08-05 09:58:36.509124799 +0100
@@ -8781,6 +8781,8 @@ finish_enum (tree enumtype, tree values,
   && !in_sizeof && !in_typeof && !in_alignof)
 struct_parse_info->struct_types.safe_push (enumtype);
 
+  C_TYPE_BEING_DEFINED (enumtype) = 0;
+
   return enumtype;
 }
 
Index: gcc/testsuite/gcc.dg/pr79983.c
===
--- gcc/testsuite/gcc.dg/pr79983.c  2019-03-08 18:15:07.100852863 +
+++ gcc/testsuite/gcc.dg/pr79983.c  2019-08-05 09:58:36.509124799 +0100
@@ -8,7 +8,7 @@ struct S { int i, j; }; /* { dg-error "r
 
 enum E;
 enum E { A, B, C }; /* { dg-message "originally defined here" } */
-enum E { D, F }; /* { dg-error "nested redefinition of 'enum E'|redeclaration 
of 'enum E'" } */
+enum E { D, F }; /* { dg-error "redeclaration of 'enum E'" } */
 
 union U;
 union U { int i; }; /* { dg-message "originally defined here" } */
Index: gcc/testsuite/gcc.dg/enum-redef-1.c
===
--- /dev/null   2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.dg/enum-redef-1.c 2019-08-05 09:58:36.509124799 +0100
@@ -0,0 +1,29 @@
+enum a { A };
+enum a { B }; /* { dg-bogus "nested redefinition" } */
+/* { dg-error "redeclaration of 'enum a'" "" { target *-*-* } .-1 } */
+
+enum empty {}; /* { dg-error "empty enum is invalid" } */
+enum empty {}; /* { dg-bogus "nested redefinition" } */
+/* { dg-error "empty enum is invalid" "" { target *-*-* } .-1 } */
+
+enum nested_first {
+  C1 = sizeof(enum nested_first { C1a }), /* { dg-error "nested redefinition 
of 'enum nested_first" } */
+  C2 = sizeof(enum nested_first { C2a }) /* { dg-error "redeclaration of 'enum 
nested_first'" "" } */
+};
+
+enum nested_second {
+  D1,
+  D2 = sizeof(enum nested_second { D2a }), /* { dg-error "nested redefinition 
of 'enum nested_second" } */
+  D3 = sizeof(enum nested_second { D3a }) /* { dg-error "redeclaration of 
'enum nested_second'" "" } */
+};
+
+enum nested_repeat { E };
+enum nested_repeat { /* { dg-error "redeclaration of 'enum nested_repeat'" "" 
} */
+  F = sizeof(enum nested_repeat { Fa }) /* { dg-error "nested redefinition of 
'enum nested_repeat" } */
+};
+
+enum nested_empty {
+  G1 = sizeof(enum nested_empty {}), /* { dg-error "nested redefinition of 
'enum nested_empty" } */
+  /* { dg-error "empty enum is invalid" "" { target *-*-* } .-1 } */
+  G2 = sizeof(enum nested_empty { G2a })
+};


Protect tree_to_shwi call in unmodified_param_1

2019-08-05 Thread Richard Sandiford
unmodified_param_1 used tree_to_shwi without first checking
tree_fits_shwi_p.  This is needed by the SVE ACLE support and
is hard to test independently.

Tested on aarch64-linux-gnu, armeb-eabi and x86_64-linux-gnu.
OK to install?

Richard


2019-08-05  Richard Sandiford  

gcc/
* ipa-fnsummary.c (unmodified_parm_1): Test tree_fits_shwi_p
before calling to tree_to_shwi.

Index: gcc/ipa-fnsummary.c
===
--- gcc/ipa-fnsummary.c 2019-07-16 09:11:50.509067138 +0100
+++ gcc/ipa-fnsummary.c 2019-08-05 09:57:28.553617299 +0100
@@ -927,7 +927,8 @@ unmodified_parm_1 (ipa_func_body_info *f
   /* SSA_NAME referring to parm default def?  */
   if (TREE_CODE (op) == SSA_NAME
   && SSA_NAME_IS_DEFAULT_DEF (op)
-  && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
+  && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL
+  && tree_fits_shwi_p (TYPE_SIZE (TREE_TYPE (op
 {
   if (size_p)
*size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));


Make function_code a 32-bit field

2019-08-05 Thread Richard Sandiford
Adding SVE intrinsics on top of the existing AArch64 intrinsics blows
the 12-bit function_code in tree_function_decl.  That bitfield has no
spare bits, but it comes at the end of the structure and is preceded
by a pointer, so on LP64 hosts there's currently a 32-bit hole at end.

This patch therefore makes function_code an independent field and
moves the bitfield to the 32-bit hole.

I wondered about instead making function_code 16 bits, so that the
patch leaves 28 spare bits instead of just 12.  That seemed a bit
short-term though; I can't guarantee that we won't blow 16 bits once
the SVE2 functions are added...

If we run out of bits again, we can start chomping from the top
of the enum.  E.g. 24 bits should surely be enough, but there's
no point paying the overhead of the masking until we need it.

Tested on aarch64-linux-gnu, armeb-eabi and x86_64-linux-gnu.
OK to install?

Richard


2019-08-05  Richard Sandiford  

gcc/
* tree-core.h (tree_function_decl): Make function_code an
independent field.  Group the remaining bitfields into bytes
and move decl_type so that it contines to be at a byte boundary.
Leave 12 bits for future expansion.

Index: gcc/tree-core.h
===
--- gcc/tree-core.h 2019-08-05 09:55:26.0 +0100
+++ gcc/tree-core.h 2019-08-05 09:55:26.626500651 +0100
@@ -1857,36 +1857,32 @@ struct GTY(()) tree_function_decl {
   tree vindex;
 
   /* In a FUNCTION_DECL for which DECL_BUILT_IN holds, this is
- DECL_FUNCTION_CODE.  Otherwise unused.
- ???  The bitfield needs to be able to hold all target function
- codes as well.  */
-  ENUM_BITFIELD(built_in_function) function_code : 12;
-  ENUM_BITFIELD(built_in_class) built_in_class : 2;
+ DECL_FUNCTION_CODE.  Otherwise unused.  */
+  enum built_in_function function_code;
 
+  ENUM_BITFIELD(built_in_class) built_in_class : 2;
   unsigned static_ctor_flag : 1;
   unsigned static_dtor_flag : 1;
-
   unsigned uninlinable : 1;
   unsigned possibly_inlined : 1;
   unsigned novops_flag : 1;
   unsigned returns_twice_flag : 1;
+
   unsigned malloc_flag : 1;
   unsigned declared_inline_flag : 1;
   unsigned no_inline_warning_flag : 1;
-
   unsigned no_instrument_function_entry_exit : 1;
-
-  /* Align the bitfield to boundary of a byte.  */
-  ENUM_BITFIELD(function_decl_type) decl_type: 2;
-
   unsigned no_limit_stack : 1;
   unsigned disregard_inline_limits : 1;
   unsigned pure_flag : 1;
   unsigned looping_const_or_pure_flag : 1;
+
+  /* Align the bitfield to boundary of a byte.  */
+  ENUM_BITFIELD(function_decl_type) decl_type: 2;
   unsigned has_debug_args_flag : 1;
   unsigned versioned_function : 1;
 
-  /* 0 bits left.  */
+  /* 12 bits left for future expansion.  */
 };
 
 struct GTY(()) tree_translation_unit_decl {


  1   2   >