date:20121030

Re: RFA: patch to fix PR55116

2012-10-30 Thread H.J. Lu

On Mon, Oct 29, 2012 at 5:11 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Oct 29, 2012 at 4:41 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Oct 29, 2012 at 9:38 AM, Vladimir Makarov vmaka...@redhat.com 
 wrote:
 On 12-10-29 12:21 PM, Richard Sandiford wrote:

 Vladimir Makarov vmaka...@redhat.com writes:

 H.J. in

 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55116

 reported an interesting address

 (and:DI (subreg:DI (plus:SI (ashift:SI (reg:SI 96 [ glob_vol_int.22 ])
   (const_int 2 [0x2]))
   (symbol_ref:SI (glob_vol_int_arr) var_decl
 0x703c2720 glob_vol_int_arr)) 0)
   (const_int 4294967295 [0x]))

 which can not be correctly extracted.  Here `and' with `subreg'
 behaves as an address mutation.

 The following patch fixes the problem.

 Ok to commit, Richard?

 Heh, I wondered if subregs might still be used like that, and was almost
 tempted to add them just in case.

 I think this particular case is really a failed canonicalisation and that:

 (and:DI (subreg:DI (foo:SI ...) 0) (const_int 0x))

 ought to be:

 (zero_extend:DI (foo:SI ...))

 Yes, that was my thought too.

 But I know I've approved MIPS patches to accept
 (and:DI ... (const_int 0x)) as an alternative.

 Index: rtlanal.c
 ===
 --- rtlanal.c   (revision 192942)
 +++ rtlanal.c   (working copy)
 @@ -5459,6 +5459,11 @@ strip_address_mutations (rtx *loc, enum
  else if (code == AND  CONST_INT_P (XEXP (*loc, 1)))
   /* (and ... (const_int -X)) is used to align to X bytes.  */
   loc = XEXP (*loc, 0);
 +  else if (code == SUBREG
 +   ! REG_P (XEXP (*loc, 0))  ! MEM_P (XEXP (*loc, 0)))
 +   /* (subreg (operator ...) ...) usually inside and is used for
 +  mode conversion too.  */
 +   loc = XEXP (*loc, 0);

 I think the condition should be:

else if (code == SUBREG
  !OBJECT_P (SUBREG_REG (*loc))
  subreg_lowpart (*loc))

 OK with that change, if it works.

 Yes, it works.
 I've submitted the following patch.


 It doesn't work right.  I will create a new testcase.




This patch limits SUBREG to Pmode.  Tested on Linux/x86-64.
OK to install?

Thanks.

-- 
H.J.
gcc/

2012-10-29  H.J. Lu  hongjiu...@intel.com

PR middle-end/55116
* rtlanal.c (strip_address_mutations): Handle SUBREG only for
Pmode.

gcc/testsuite/

2012-10-29  H.J. Lu  hongjiu...@intel.com

PR middle-end/55116
* gcc.target/i386/pr55116-2.c: New file.

diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 43d4cb8..d076ad6 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -5460,6 +5460,7 @@ strip_address_mutations (rtx *loc, enum rtx_code
*outer_code)
/* (and ... (const_int -X)) is used to align to X bytes.  */
loc = XEXP (*loc, 0);
   else if (code == SUBREG
+   GET_MODE (*loc) == Pmode
 !OBJECT_P (SUBREG_REG (*loc))
 subreg_lowpart_p (*loc))
/* (subreg (operator ...) ...) inside and is used for mode
diff --git a/gcc/testsuite/gcc.target/i386/pr55116-2.c
b/gcc/testsuite/gcc.target/i386/pr55116-2.c
new file mode 100644
index 000..7ef8ead
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr55116-2.c
@@ -0,0 +1,86 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options -O2 -mx32 -maddress-mode=long } */
+
+typedef struct rtx_def *rtx;
+enum rtx_code { MINUS };
+union rtunion_def {
+  rtx rt_rtx;
+};
+typedef union rtunion_def rtunion;
+struct rtx_def {
+  enum rtx_code code: 16;
+  union u {
+rtunion fld[1];
+  }
+  u;
+};
+rtx simplify_binary_operation (enum rtx_code code, int mode,
+  rtx op0, rtx op1);
+struct simplify_plus_minus_op_data {
+  rtx op;
+  short neg;
+};
+void simplify_plus_minus (enum rtx_code code, int mode, rtx op0, rtx op1)
+{
+  struct simplify_plus_minus_op_data ops[8];
+  rtx tem = (rtx) 0;
+  int n_ops = 2, input_ops = 2;
+  int changed, canonicalized = 0;
+  int i, j;
+  __builtin_memset (ops, 0, sizeof (ops));
+  do
+{
+  changed = 0;
+  for (i = 0; i  n_ops; i++)
+   {
+ rtx this_op = ops[i].op;
+ int this_neg = ops[i].neg;
+ enum rtx_code this_code = ((enum rtx_code) (this_op)-code);
+ switch (this_code)
+   {
+   case MINUS:
+ if (n_ops == 7)
+   return;
+ n_ops++;
+ input_ops++;
+ changed = 1;
+ canonicalized |= this_neg;
+ break;
+   }
+   }
+}
+  while (changed);
+  do
+{
+  j =  n_ops - 1;
+  for (i = n_ops - 1; j = 0; j--)
+   {
+ rtx lhs = ops[j].op, rhs = ops[i].op;
+ int lneg = ops[j].neg, rneg = ops[i].neg;
+ if (lhs != 0  rhs != 0)
+   {
+ enum rtx_code ncode = MINUS;
+ if (((enum rtx_code) (lhs)-code) ==

Re: PATCH: PR rtl-optimization/55093: [4.8 Regression] [x32] -maddress-mode=long failed

2012-10-30 Thread H.J. Lu

On Mon, Oct 29, 2012 at 3:49 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Oct 29, 2012 at 3:44 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Oct 29, 2012 at 8:15 AM, Richard Sandiford
 rdsandif...@googlemail.com wrote:
 H.J. Lu hjl.to...@gmail.com writes:
 Hi,

 This patch changes get_elimination to check register number instead of
 RTX.  Tested on Linux/x32 with -maddress-mode=long.  OK to install?

 FWIW, this doesn't sound right to me, at least not without more 
 justification.
 The idea is that things like frame_pointer_rtx are supposed to be unique,
 so the original code:

if ((ep = elimination_map[hard_regno]) != NULL)
 -return ep-from_rtx != reg ? NULL : ep;
 from != hard_regno ? NULL : ep;

 ought to be correct in itself.  reload did the same thing:

   for (ep = reg_eliminate; ep  reg_eliminate[NUM_ELIMINABLE_REGS];
ep++)
 if (ep-from_rtx == x  ep-can_eliminate)
   return plus_constant (Pmode, ep-to_rtx, ep-previous_offset);

 It sounds on the face of it like the bug is elsewhere.


 LRA has

   if (REG_P (reg)  (ep = get_elimination (reg)) != NULL)
 {
   rtx to_rtx = replace_p ? ep-to_rtx : ep-from_rtx;

   if (! replace_p)
 {
   offset += (ep-offset - ep-previous_offset);
   offset = trunc_int_for_mode (offset, GET_MODE (plus_cst_src));
 }

   if (GET_CODE (XEXP (plus_cst_src, 0)) == SUBREG)
 to_rtx = gen_lowpart (GET_MODE (XEXP (plus_cst_src, 0)), to_rtx);

 Reload has

 rtx to_rtx = ep-to_rtx;
 offset += ep-offset;
 offset = trunc_int_for_mode (offset, GET_MODE (plus_cst_src));

 if (GET_CODE (XEXP (plus_cst_src, 0)) == SUBREG)
   to_rtx = gen_lowpart (GET_MODE (XEXP (plus_cst_src, 0)),
 to_rtx);

 (gdb) call debug_rtx (ep-to_rtx)
 (reg/f:DI 7 sp)
 (gdb) call debug_rtx (ep-from_rtx)
 (reg/f:DI 16 argp)
 (gdb)

 gen_lowpart returns (reg/f:DI 7 sp) for reload and (reg:SI 16 argp)
 for LRA.   They are caused by

   if (FRAME_POINTER_REGNUM != ARG_POINTER_REGNUM
   /* We should convert arg register in LRA after the elimination
  if it is possible.  */
xregno == ARG_POINTER_REGNUM
! lra_in_progress)
 return -1;

 It doesn't work in this case.


 This testcase shows that LRA can't convert arg register after
 the elimination.


Here is a patch to remove ra_in_progress check for
ARG_POINTER_REGNUM.  Tested on Linux.x86-64.
OK to install?

Thanks.

-- 
H.J.
gcc/

2012-10-29  H.J. Lu  hongjiu...@intel.com

PR rtl-optimization/55093
* rtlanal.c (simplify_subreg_regno): Remove lra_in_progress
check for ARG_POINTER_REGNUM.

gcc/testsuite/

2012-10-29  H.J. Lu  hongjiu...@intel.com

PR rtl-optimization/55093
* gcc.target/i386/pr55093.c: New file.

diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 43d4cb8..c1a7580 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -3494,10 +3494,7 @@ simplify_subreg_regno (unsigned int xregno,
enum machine_mode xmode,
 return -1;

   if (FRAME_POINTER_REGNUM != ARG_POINTER_REGNUM
-  /* We should convert arg register in LRA after the elimination
-if it is possible.  */
-   xregno == ARG_POINTER_REGNUM
-   ! lra_in_progress)
+   xregno == ARG_POINTER_REGNUM)
 return -1;

   if (xregno == STACK_POINTER_REGNUM
diff --git a/gcc/testsuite/gcc.target/i386/pr55093.c
b/gcc/testsuite/gcc.target/i386/pr55093.c
new file mode 100644
index 000..76b4042
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr55093.c
@@ -0,0 +1,80 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options -O2 -mx32 -maddress-mode=long } */
+
+typedef union tree_node *tree;
+typedef const union tree_node *const_tree;
+typedef struct {
+  unsigned long long low;
+  long long high;
+} double_int;
+struct real_value {
+};
+struct real_format {
+  int has_signed_zero;
+};
+extern const struct real_format *   real_format_for_mode[];
+extern int real_isnegzero (const struct real_value *);
+enum tree_code { REAL_CST, SSA_NAME };
+struct tree_base {
+  enum tree_code code : 16;
+  union {
+unsigned int version;
+  }
+  u;
+};
+extern void tree_check_failed (const_tree, const char *, int, const
char *,   ...) __attribute__ ((__noreturn__));
+union tree_node {
+  struct tree_base base;
+};
+inline tree tree_check (tree __t, const char *__f, int __l, const
char *__g, enum tree_code __c) {
+  if (((enum tree_code) (__t)-base.code) != __c)
+tree_check_failed (__t, __f, __l, __g, __c, 0);
+  return __t;
+}
+struct prop_value_d {
+  int lattice_val;
+  tree value;
+  double_int mask;
+};
+typedef struct prop_value_d prop_value_t;
+static prop_value_t *const_val;
+static void canonicalize_float_value (prop_value_t *);
+typedef void (*ssa_prop_visit_stmt_fn) (prop_value_t);
+typedef void (*ssa_prop_visit_phi_fn) (void);
+typedef void

RE: GCC 4.8.0 Status Report (2012-10-29), Stage 1 to end soon

2012-10-30 Thread Gopalasubramanian, Ganesh

Hi Jakub,

We are working on the following. 
1. bdver3 enablement. Review completed. Changes to be incorporated and 
checked-in.
http://gcc.gnu.org/ml/gcc-patches/2012-10/msg01131.html

2. btver2 basic enablement is done 
(http://gcc.gnu.org/ml/gcc-patches/2012-07/msg01018.html)/
Scheduler descriptions are being updated. This is architecture specific and we 
consider it not to be a stage-1 material.

Regards
Ganesh

-Original Message-
From: Jakub Jelinek [mailto:ja...@redhat.com] 
Sent: Monday, October 29, 2012 11:27 PM
To: g...@gcc.gnu.org
Cc: gcc-patches@gcc.gnu.org
Subject: GCC 4.8.0 Status Report (2012-10-29), Stage 1 to end soon

Status
==

I'd like to close the stage 1 phase of GCC 4.8 development on Monday, November 
5th.  If you have still patches for new features you'd like to see in GCC 4.8, 
please post them for review soon.  Patches posted before the freeze, but 
reviewed shortly after the freeze, may still go in, further changes should be 
just bugfixes and documentation fixes.


Quality Data


Priority  #   Change from Last Report
---   ---
P1   23   + 23
P2   77   +  8
P3   85   + 84
---   ---
Total   185   +115


Previous Report
===

http://gcc.gnu.org/ml/gcc/2012-03/msg00011.html

The next report will be sent by me again, announcing end of stage 1.

Re: [patch] Unify bitmap interface.

2012-10-30 Thread Bin.Cheng

On Tue, Oct 30, 2012 at 8:23 AM, Lawrence Crowl cr...@googlers.com wrote:
On 10/29/12, Diego Novillo dnovi...@google.com wrote:
On Oct 29, 2012 Diego Novillo dnovi...@google.com wrote:
Just to make sure. Testing on ppc should be fast, for example.

And useless. Your patch does not touch ppc.

I've fixed the #if 0 and the remaining suggestions will happen in
another patch. I've committed this one.

===

This patch implements the unification of the *bitmap interfaces as discussed.
Essentially, we rename ebitmap and sbitmap functions to use the same names
as the bitmap functions. This rename works because we can now overload
on the bitmap type. Some macros now become inline functions to enable
that overloading.

The sbitmap non-bool returning bitwise operations have been merged with
the bool versions. Sometimes this merge involved modifying the non-bool
version to compute the bool value, and sometimes modifying bool version to
add additional work from the non-bool version. The redundant routines have
been removed.

The allocation functions have not been renamed, because we often do not
have an argument on which to overload. The cardinality functions have not
been renamed, because they have different parameters, and are thus not
interchangable. The iteration functions have not been renamed, because
they are functionally different.

Tested on x86_64, contrib/config-list.mk testing passed.
Index: gcc/ChangeLog

Just one question: Should we change the name of functions
sbitmap_intersection_of_succs/sbitmap_intersection_of_preds/sbitmap_union_of_succs/sbitmap_union_of_preds
too? It might be a little confusing that sbitmap_* is used among
bitmap_*.

--
Best Regards.

Adapt one fold-const optimization for vectors

2012-10-30 Thread Marc Glisse


Hello,

one more optimization that needed help for vectors, it crashed on (xy)0. 
Because of PR 55001, testcases are awkward to add (I could do a x86-only 
one if needed).


bootstrap+testsuite.

2012-10-30  Marc Glisse  marc.gli...@inria.fr

* fold-const.c (fold_binary_op_with_conditional_arg): Handle vectors.
(fold_binary_loc): call it for VEC_COND_EXPR.

--
Marc Glisse

[patch,libgcc] m32rrtems add crtinit.o and crtfinit.o

2012-10-30 Thread Ralf Corsepius


Hi,

I would like to apply the patch below to trunk and gcc-4.7-branch.

This patch was originalyl submitted by Joel Sherrill back in May 
(http://gcc.gnu.org/ml/gcc-patches/2012-05/msg01180.html),

but had never received any feedback.

It has been part of the rtems-gcc patches, since then.

Ralf
2012-05-16  Joel Sherrill joel.sherr...@oarcorp.com

	* config.host (m32r-*-rtems*): Include crtinit.o and crtfinit.o
 	as extra_parts.

diff --git a/libgcc/config.host b/libgcc/config.host
index 051d6b0..bbf21a9 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -693,6 +693,7 @@ m32r-*-elf*)
  	;;
 m32r-*-rtems*)
 	tmake_file=$tmake_file m32r/t-m32r t-fdpbit
+	extra_parts=$extra_parts crtinit.o crtfini.o
 	;;
 m32rle-*-elf*)
 	tmake_file=t-fdpbit

Re: [PATCH] Fix PR 53743 and other -freorder-blocks-and-partition failures (issue6823047)

2012-10-30 Thread Matthew Gretton-Dann

On 30 October 2012 05:20, Teresa Johnson tejohn...@google.com wrote:
 Index: cfgrtl.c
 ===
 --- cfgrtl.c(revision 192692)
 +++ cfgrtl.c(working copy)
 @@ -912,7 +912,8 @@ rtl_can_merge_blocks (basic_block a, basic_block b
   partition boundaries).  See  the comments at the top of
   bb-reorder.c:partition_hot_cold_basic_blocks for complete details.  */

 -  if (BB_PARTITION (a) != BB_PARTITION (b))
 +  if (find_reg_note (BB_END (a), REG_CROSSING_JUMP, NULL_RTX)
 +  || BB_PARTITION (a) != BB_PARTITION (b))
  return false;

/* Protect the loop latches.  */
 @@ -3978,7 +3979,8 @@ cfg_layout_can_merge_blocks_p (basic_block a, basi
   partition boundaries).  See  the comments at the top of
   bb-reorder.c:partition_hot_cold_basic_blocks for complete details.  */

 -  if (BB_PARTITION (a) != BB_PARTITION (b))
 +  if (find_reg_note (BB_END (a), REG_CROSSING_JUMP, NULL_RTX)
 +  || BB_PARTITION (a) != BB_PARTITION (b))
  return false;

/* Protect the loop latches.  */

As this if() condition seems to be the canonical way to detect being
in a different partition should it be moved out into a query function,
and all of cfgrtl.c updated to use it?

[Note I am not a maintainer and so can't approve/reject your patch].

Thanks,

Matt

-- 
Matthew Gretton-Dann
Linaro Toolchain Working Group
matthew.gretton-d...@linaro.org

Re: GCC 4.8.0 Status Report (2012-10-29), Stage 1 to end soon

2012-10-30 Thread Jakub Jelinek

On Mon, Oct 29, 2012 at 02:07:55PM -0400, David Miller wrote:
  I'd like to close the stage 1 phase of GCC 4.8 development
  on Monday, November 5th.  If you have still patches for new features you'd
  like to see in GCC 4.8, please post them for review soon.  Patches
  posted before the freeze, but reviewed shortly after the freeze, may
  still go in, further changes should be just bugfixes and documentation
  fixes.
 
 I'd like to get the Sparc cbcond stuff in (3 revisions posted) which
 is waiting for Eric B. to do some Solaris specific work.

That has been posted in stage 1, so it is certainly ok to commit it even
during early stage 3.  And, on a case by case basis exceptions are always
possible.  This hasn't changed in the last few years.  By the reviewed
shortly after the freeze I just want to say that e.g. having large intrusive
patches posted now, but reviewed late December is already too late.

As for postponing end of stage 1 by a few weeks because of the storm, I'm
afraid if we want to keep roughly timely releases we don't have that luxury.
If you look at http://gcc.gnu.org/develop.html, ending stage 1 around end of
October happened already for 4.6 and 4.7, for 4.5 if was a month earlier and
for 4.4 even two months earlier.  The 4.7 bugfixing went IMHO smothly, but
we certainly have to expect lots of bugfixing.

 I'd also like to enable LRA for at least 32-bit sparc, even if I can't
 find the time to work on auditing 64-bit completely.

I agree with Eric that it is better to enable it for the whole target
together, rather than based on some options.  Enabling LRA in early stage 3
for some targets should be ok, if it doesn't require too large and intrusive
changes to the generic code that could destabilize other targets.

Jakub

[PATCH] PR 54472

2012-10-30 Thread Andrey Belevantsev


Hello,

This PR is due to the selective scheduling missing the dependencies with 
implicit_sets.  From the sched-deps.c code it looks like implicit sets 
generate anti dependencies with either sets, uses or clobbers, so that's 
that I've done with the below patch.  Vlad, as it looks you've added 
implicit sets, does the above conclusion look correct?  I will commit the 
patch then after bootstrapping and testing will complete.


Yours,
Andrey

2012-10-30  Andrey Belevantsev  a...@ispras.ru

PR rtl-optimization/54472

* sel-sched-ir.c (has_dependence_note_reg_set): Handle
implicit sets.
(has_dependence_note_reg_clobber,
has_dependence_note_reg_use): Likewise.

2012-10-30  Andrey Belevantsev  a...@ispras.ru

PR rtl-optimization/54472

* gcc.dg/pr54472.c: New test.

diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index 2a7a170..220568a 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -3185,7 +3185,7 @@ has_dependence_note_reg_set (int regno)
  || reg_last-clobbers != NULL)
*dsp = (*dsp  ~SPECULATIVE) | DEP_OUTPUT;

-  if (reg_last-uses)
+  if (reg_last-uses || reg_last-implicit_sets)
*dsp = (*dsp  ~SPECULATIVE) | DEP_ANTI;
 }
 }
@@ -3205,7 +3205,7 @@ has_dependence_note_reg_clobber (int regno)
   if (reg_last-sets)
*dsp = (*dsp  ~SPECULATIVE) | DEP_OUTPUT;

-  if (reg_last-uses)
+  if (reg_last-uses || reg_last-implicit_sets)
*dsp = (*dsp  ~SPECULATIVE) | DEP_ANTI;
 }
 }
@@ -3225,7 +3225,7 @@ has_dependence_note_reg_use (int regno)
   if (reg_last-sets)
*dsp = (*dsp  ~SPECULATIVE) | DEP_TRUE;

-  if (reg_last-clobbers)
+  if (reg_last-clobbers || reg_last-implicit_sets)
*dsp = (*dsp  ~SPECULATIVE) | DEP_ANTI;

   /* Merge BE_IN_SPEC bits into *DSP when the dependency producer
diff --git a/gcc/testsuite/gcc.dg/pr54472.c b/gcc/testsuite/gcc.dg/pr54472.c
new file mode 100644
index 000..9395203
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr54472.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+/* { dg-options -O -fschedule-insns -fselective-scheduling } */
+
+int main ()
+{
+  int a[3][3][3];
+  __builtin_memset (a, 0, sizeof a);
+  return 0;
+}

Add myself to MAINTAINERS

2012-10-30 Thread Gopalasubramanian, Ganesh

Adding myself to the list of members in write after approval.

Index: ChangeLog
===
--- ChangeLog   (revision 192977)
+++ ChangeLog   (working copy)
@@ -1,3 +1,7 @@
+2012-10-30 Ganesh Gopalasubramanian  ganesh.gopalasubraman...@amd.com
+
+   * MAINTAINERS (Write After Approval): Add myself.
+
 2012-10-26  James Greenhalgh  james.greenha...@arm.com

* MAINTAINERS (Write After Approval): Add myself.
Index: MAINTAINERS
===
--- MAINTAINERS (revision 192977)
+++ MAINTAINERS (working copy)
@@ -372,6 +372,7 @@
 Chao-ying Fu   f...@mips.com
 Gary Funck g...@intrepid.com
 Pompapathi V Gadad pompapathi.v.ga...@nsc.com
+Gopalasubramanian Ganesh   ganesh.gopalasubraman...@amd.com
 Kaveh Ghazigh...@gcc.gnu.org
 Matthew Gingellging...@gnat.com
 Tristan Gingoldging...@adacore.com

Regards
Ganesh

Re: [PATCH, libstdc++] Make empty std::string storage readonly

2012-10-30 Thread Michael Haubenwallner


On 08/30/2012 11:45 AM, Jonathan Wakely wrote:
 On 29 August 2012 13:25, Michael Haubenwallner wrote:
 On 08/28/2012 08:12 PM, Jonathan Wakely wrote:
 On 28 August 2012 18:27, Michael Haubenwallner wrote:

 Does it actually produce a segfault? I suppose it might on some
 platforms, but not all, so I'm not sure it's worth changing.

 Using this patch on my x86_64 Gentoo Linux Desktop with gcc-4.7.1 does 
 segfault
 as expected - when I make sure the correct libstdc++ is used at runtime,
 having the '_S_empty_rep_storage' symbol in the .rodata section rather than 
 .bss.
 
 If it works reliably on x86_64 then I think the patch is worth considering.
 
 I'm on holiday for a week, so maybe one of the other maintainers will
 deal with it first.

Any chance to get this in for 4.8?

Thank you!
/haubi/

Re: Adapt one fold-const optimization for vectors

2012-10-30 Thread Marek Polacek

On Tue, Oct 30, 2012 at 08:05:13AM +0100, Marc Glisse wrote:
 Hello,
 
 one more optimization that needed help for vectors, it crashed on
 (xy)0. Because of PR 55001, testcases are awkward to add (I could
 do a x86-only one if needed).
 
 bootstrap+testsuite.
 
 2012-10-30  Marc Glisse  marc.gli...@inria.fr
 
   * fold-const.c (fold_binary_op_with_conditional_arg): Handle vectors.
   (fold_binary_loc): call it for VEC_COND_EXPR.

Patch missing?

[SH, committed] PR 54963

2012-10-30 Thread Oleg Endo

Hello,

This is the latest proposed patch from the PR.
Tested on rev 192482 with
make -k check RUNTESTFLAGS=--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}

and no new failures.
Pre-approved by Kaz in the PR.
Committed as rev 192983.

Cheers,
Oleg

gcc/ChangeLog:

PR target/54963
* config/sh/iterators.md (SIDI): New mode iterator.
* config/sh/sh.md (negdi2): Use parallel around operation and
T_REG clobber in expander.
(*negdi2): Mark output operand as early clobbered.
Add T_REG clobber.  Split after reload.  Simplify split code.
(abssi2, absdi2): Fold expanders into absmode2.
(*abssi2, *absdi2): Fold into *absmode2 insn_and_split.
Split insns before reload.
(*negabssi2, *negabsdi2): Fold into *negabsmode2.
Add T_REG clobber.  Split insns before reload.
(negsi_cond): Reformat.  Use emit_move_insn instead of
gen_movesi.
(negdi_cond): Reformat.  Use emit_move_insn instead of a pair
of gen_movsi.  Split insn before reload.

Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md	(revision 192482)
+++ gcc/config/sh/sh.md	(working copy)
@@ -5177,28 +5177,25 @@
 ;; Don't expand immediately because otherwise neg:DI (abs:DI) will not be
 ;; combined.
 (define_expand negdi2
-  [(set (match_operand:DI 0 arith_reg_dest )
-	(neg:DI (match_operand:DI 1 arith_reg_operand )))
-   (clobber (reg:SI T_REG))]
-  TARGET_SH1
-  )
+  [(parallel [(set (match_operand:DI 0 arith_reg_dest)
+		   (neg:DI (match_operand:DI 1 arith_reg_operand)))
+	  (clobber (reg:SI T_REG))])]
+  TARGET_SH1)
 
 (define_insn_and_split *negdi2
-  [(set (match_operand:DI 0 arith_reg_dest =r)
-	(neg:DI (match_operand:DI 1 arith_reg_operand r)))]
+  [(set (match_operand:DI 0 arith_reg_dest =r)
+	(neg:DI (match_operand:DI 1 arith_reg_operand r)))
+   (clobber (reg:SI T_REG))]
   TARGET_SH1
   #
-  TARGET_SH1
+   reload_completed
   [(const_int 0)]
 {
-  rtx low_src = gen_lowpart (SImode, operands[1]);
-  rtx high_src = gen_highpart (SImode, operands[1]);
-  rtx low_dst = gen_lowpart (SImode, operands[0]);
-  rtx high_dst = gen_highpart (SImode, operands[0]);
-
   emit_insn (gen_clrt ());
-  emit_insn (gen_negc (low_dst, low_src));
-  emit_insn (gen_negc (high_dst, high_src));
+  emit_insn (gen_negc (gen_lowpart (SImode, operands[0]),
+		   gen_lowpart (SImode, operands[1])));
+  emit_insn (gen_negc (gen_highpart (SImode, operands[0]),
+		   gen_highpart (SImode, operands[1])));
   DONE;
 })
 
@@ -5272,38 +5269,53 @@
 		(const_int -1)))]
   TARGET_SHMEDIA )
 
-(define_expand abssi2
-  [(set (match_operand:SI 0 arith_reg_dest )
-  	(abs:SI (match_operand:SI 1 arith_reg_operand )))
+(define_expand absmode2
+  [(parallel [(set (match_operand:SIDI 0 arith_reg_dest)
+		   (abs:SIDI (match_operand:SIDI 1 arith_reg_operand)))
+	  (clobber (reg:SI T_REG))])]
+  TARGET_SH1)
+
+(define_insn_and_split *absmode2
+  [(set (match_operand:SIDI 0 arith_reg_dest)
+  	(abs:SIDI (match_operand:SIDI 1 arith_reg_operand)))
(clobber (reg:SI T_REG))]
   TARGET_SH1
-  )
-
-(define_insn_and_split *abssi2
-  [(set (match_operand:SI 0 arith_reg_dest =r)
-  	(abs:SI (match_operand:SI 1 arith_reg_operand r)))]
-  TARGET_SH1
   #
-  TARGET_SH1
+   can_create_pseudo_p ()
   [(const_int 0)]
 {
-  emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
-  emit_insn (gen_negsi_cond (operands[0], operands[1], operands[1],
-		 const1_rtx));
+  if (MODEmode == SImode)
+emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
+  else
+{
+  rtx high_src = gen_highpart (SImode, operands[1]);
+  emit_insn (gen_cmpgesi_t (high_src, const0_rtx));
+}
+
+  emit_insn (gen_negmode_cond (operands[0], operands[1], operands[1],
+ const1_rtx));
   DONE;
 })
 
-(define_insn_and_split *negabssi2
-  [(set (match_operand:SI 0 arith_reg_dest =r)
-  	(neg:SI (abs:SI (match_operand:SI 1 arith_reg_operand r]
+(define_insn_and_split *negabsmode2
+  [(set (match_operand:SIDI 0 arith_reg_dest)
+	(neg:SIDI (abs:SIDI (match_operand:SIDI 1 arith_reg_operand
+   (clobber (reg:SI T_REG))]
   TARGET_SH1
   #
-  TARGET_SH1
+   can_create_pseudo_p ()
   [(const_int 0)]
 {
-  emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
-  emit_insn (gen_negsi_cond (operands[0], operands[1], operands[1],
-		 const0_rtx));
+  if (MODEmode == SImode)
+emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
+  else
+{
+  rtx high_src = gen_highpart (SImode, operands[1]);
+  emit_insn (gen_cmpgesi_t (high_src, const0_rtx));
+}
+
+  emit_insn (gen_negmode_cond (operands[0], operands[1], operands[1],
+ const0_rtx));
   DONE;
 })
 
@@ -5316,10 +5328,10 @@
 
 (define_insn_and_split negsi_cond
   [(set (match_operand:SI 0 arith_reg_dest =r,r)
-	(if_then_else:SI (eq:SI (reg:SI T_REG)
-			  (match_operand:SI 3 const_int_operand M,N))
-	 (match_operand:SI 1

Re: [PATCH, libstdc++] Make empty std::string storage readonly

2012-10-30 Thread Jonathan Wakely

On 30 October 2012 09:05, Michael Haubenwallner wrote:
 Any chance to get this in for 4.8?

I'm looking into it today.

Re: [PATCH] Fix debug info for expr and jump stmt

2012-10-30 Thread Jakub Jelinek

On Mon, Oct 29, 2012 at 05:10:10PM +0100, Richard Biener wrote:
 On Mon, Oct 29, 2012 at 4:25 PM, Dehao Chen de...@google.com wrote:
  On Mon, Oct 29, 2012 at 7:17 AM, Michael Matz m...@suse.de wrote:
  Hi,
 
  On Mon, 29 Oct 2012, Richard Biener wrote:
 
   Well, you merely moved the bogus code to gimple-low.c.  It is bogus
   because you unconditionally overwrite TREE_BLOCK of all operands (and 
   all
 
  Emm, then in gimple-low.c, we should probably not unconditionally
  overwrite gimple_block for stmts too?
 
 gimple stmts have no block before gimple-low.

And tree expressions don't have TREE_BLOCK before gimple-low either.
So IMNSHO it is gimple-low.c that should set TREE_BLOCK of all the gimple
stmts as well as all expression in the operands.  It is not overwriting
anything, no frontend sets TREE_BLOCK for any expression, the way frontends
associate IL with BLOCKs is by putting them inside of BIND_EXPR/GIMPLE_BIND
after gimplification, and it is gimple-low responsibility to set it.

In 4.3 before tuples, it was solely gimple-low that set TREE_BLOCK
initially.  Before the location_t changes, again it was gimple-low that
was the first setter of TREE_BLOCK, which was valid for all
IS_EXPR_CODE_CLASS.  So, IMNSHO gimple-low should merge location_t
with block for all gimple stmts and all tree expressions used in its
operands.  It shouldn't be set on trees that can be shared, so
say decls etc. should keep using just location_t's without associated block.
So perhaps the right test for gimple-low setting of block is
CAN_HAVE_LOCATION_P (exp)  !tree_node_can_be_shared (exp).

Jakub

Re: [Patch] Remove _GLIBCXX_HAVE_BROKEN_VSWPRINTF from mingw32-w64/os_defines.h

2012-10-30 Thread JonY

On 10/29/2012 21:05, JonY wrote:
 ChangeLog
 2012-10-29  Jonathan Yong  jo...@users.sourceforge.net

  * config/os/mingw32-w64/os_defines.h: Remove 
 _GLIBCXX_HAVE_BROKEN_VSWPRINTF
  as no longer required.



 Index: libstdc++-v3/config/os/mingw32-w64/os_defines.h
 ===
 --- libstdc++-v3/config/os/mingw32-w64/os_defines.h (revision 192802)
 +++ libstdc++-v3/config/os/mingw32-w64/os_defines.h (working copy)
 @@ -63,8 +63,9 @@
  // See libstdc++/20806.
  #define _GLIBCXX_HAVE_DOS_BASED_FILESYSTEM 1

 -// See  libstdc++/37522.
 -#define _GLIBCXX_HAVE_BROKEN_VSWPRINTF 1
 +// See  libstdc++/37522. mingw-w64 stdio redirect for C++
 +// #define _GLIBCXX_HAVE_BROKEN_VSWPRINTF 1
 +// Workaround added for mingw-w64 trunk headers r5437

  // See libstdc++/43738
  // On native windows targets there is no ioctl function. And the existing

 
 

Hi,

Can I have this in before 4.8 branches? Maintainer is away for the few
weeks, but OK'ed it on IRC.





signature.asc
Description: OpenPGP digital signature

Re: [PATCH, libstdc++] Make empty std::string storage readonly

2012-10-30 Thread Jonathan Wakely

On 30 October 2012 09:28, Jonathan Wakely wrote:
 On 30 October 2012 09:05, Michael Haubenwallner wrote:
 Any chance to get this in for 4.8?

 I'm looking into it today.

Consider the case where one object file containing
std::string().erase() is built with an older GCC without the fix for
PR 40518, then it's linked to a new libstdc++.so where the empty rep
is read-only.  The program will attempt to write to the empty rep, but
now it's read-only and will crash.  I don't think we can apply it
unless we change the library ABI so that no pre-PR40518 objects can
link to a libstdc++.so containing a read-only empty rep.

Re: PATCH: PR rtl-optimization/55093: [4.8 Regression] [x32] -maddress-mode=long failed

2012-10-30 Thread Richard Sandiford

H.J. Lu hjl.to...@gmail.com writes:
 LRA has

   if (REG_P (reg)  (ep = get_elimination (reg)) != NULL)
 {
   rtx to_rtx = replace_p ? ep-to_rtx : ep-from_rtx;

   if (! replace_p)
 {
   offset += (ep-offset - ep-previous_offset);
   offset = trunc_int_for_mode (offset, GET_MODE (plus_cst_src));
 }

   if (GET_CODE (XEXP (plus_cst_src, 0)) == SUBREG)
 to_rtx = gen_lowpart (GET_MODE (XEXP (plus_cst_src, 0)), 
 to_rtx);

 Reload has

 rtx to_rtx = ep-to_rtx;
 offset += ep-offset;
 offset = trunc_int_for_mode (offset, GET_MODE (plus_cst_src));

 if (GET_CODE (XEXP (plus_cst_src, 0)) == SUBREG)
   to_rtx = gen_lowpart (GET_MODE (XEXP (plus_cst_src, 0)),
 to_rtx);

 (gdb) call debug_rtx (ep-to_rtx)
 (reg/f:DI 7 sp)
 (gdb) call debug_rtx (ep-from_rtx)
 (reg/f:DI 16 argp)
 (gdb)

 gen_lowpart returns (reg/f:DI 7 sp) for reload and (reg:SI 16 argp)
 for LRA.   They are caused by

   if (FRAME_POINTER_REGNUM != ARG_POINTER_REGNUM
   /* We should convert arg register in LRA after the elimination
  if it is possible.  */
xregno == ARG_POINTER_REGNUM
! lra_in_progress)
 return -1;

 It doesn't work in this case.


 This testcase shows that LRA can't convert arg register after
 the elimination.

 Here is a patch to remove ra_in_progress check for
 ARG_POINTER_REGNUM.  Tested on Linux.x86-64.
 OK to install?

Thanks HJ.  This looks good to me.  As well as your testcase, I think
it would be dangerous to reduce this kind of subreg during non-final
elimination in cases where the argument pointer occupies more than one
hard register (like avr IIRC).  We could end up with something like
ARG_POINTER_REGNUM+1, which wouldn't show up as an elimination register
during the rest of LRA.

It's important that we do get rid of the subreg during the final
elimination stage, but I think alter_subreg already handles that case.

Since this code is outside the LRA files: patch is OK if Vlad agrees.

Richard

Re: [PATCH, libstdc++] Make empty std::string storage readonly

2012-10-30 Thread Jonathan Wakely

On 30 October 2012 10:11, Jonathan Wakely wrote:
 On 30 October 2012 09:28, Jonathan Wakely wrote:
 On 30 October 2012 09:05, Michael Haubenwallner wrote:
 Any chance to get this in for 4.8?

 I'm looking into it today.

 Consider the case where one object file containing
 std::string().erase() is built with an older GCC without the fix for
 PR 40518, then it's linked to a new libstdc++.so where the empty rep
 is read-only.  The program will attempt to write to the empty rep, but
 now it's read-only and will crash.  I don't think we can apply it
 unless we change the library ABI so that no pre-PR40518 objects can
 link to a libstdc++.so containing a read-only empty rep.

If I can convince myself this can't happen then I'll commit it, but I
need to look into it further this evening.

Re: RFA: patch to fix PR55116

2012-10-30 Thread Richard Sandiford

H.J. Lu hjl.to...@gmail.com writes:
 On Mon, Oct 29, 2012 at 5:11 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Oct 29, 2012 at 4:41 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Oct 29, 2012 at 9:38 AM, Vladimir Makarov
 vmaka...@redhat.com wrote:
 On 12-10-29 12:21 PM, Richard Sandiford wrote:

 Vladimir Makarov vmaka...@redhat.com writes:

 H.J. in

 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55116

 reported an interesting address

 (and:DI (subreg:DI (plus:SI (ashift:SI (reg:SI 96 [ glob_vol_int.22 ])
   (const_int 2 [0x2]))
   (symbol_ref:SI (glob_vol_int_arr) var_decl
 0x703c2720 glob_vol_int_arr)) 0)
   (const_int 4294967295 [0x]))

 which can not be correctly extracted.  Here `and' with `subreg'
 behaves as an address mutation.

 The following patch fixes the problem.

 Ok to commit, Richard?

 Heh, I wondered if subregs might still be used like that, and was almost
 tempted to add them just in case.

 I think this particular case is really a failed canonicalisation and that:

 (and:DI (subreg:DI (foo:SI ...) 0) (const_int 0x))

 ought to be:

 (zero_extend:DI (foo:SI ...))

 Yes, that was my thought too.

 But I know I've approved MIPS patches to accept
 (and:DI ... (const_int 0x)) as an alternative.

 Index: rtlanal.c
 ===
 --- rtlanal.c   (revision 192942)
 +++ rtlanal.c   (working copy)
 @@ -5459,6 +5459,11 @@ strip_address_mutations (rtx *loc, enum
  else if (code == AND  CONST_INT_P (XEXP (*loc, 1)))
   /* (and ... (const_int -X)) is used to align to X bytes.  */
   loc = XEXP (*loc, 0);
 +  else if (code == SUBREG
 +   ! REG_P (XEXP (*loc, 0))  ! MEM_P (XEXP (*loc, 0)))
 +   /* (subreg (operator ...) ...) usually inside and is used for
 +  mode conversion too.  */
 +   loc = XEXP (*loc, 0);

 I think the condition should be:

else if (code == SUBREG
  !OBJECT_P (SUBREG_REG (*loc))
  subreg_lowpart (*loc))

 OK with that change, if it works.

 Yes, it works.
 I've submitted the following patch.


 It doesn't work right.  I will create a new testcase.




 This patch limits SUBREG to Pmode.  Tested on Linux/x86-64.
 OK to install?

 Thanks.

The address in this case is:

(plus:SI (mult:SI (reg/v:SI 223 [orig:154 j ] [154])
(const_int 8 [0x8]))
(subreg:SI (plus:DI (reg/f:DI 20 frame)
(const_int 32 [0x20])) 0))

which after Uros's subreg simplification patch shouldn't be allowed:
the subreg ought to be on the frame register rather than the plus.

The attached patch seems to work for the testcase.  Does it work
more generally?

Richard


gcc/
* lra-eliminations.c (lra_eliminate_regs_1): Use simplify_gen_subreg
rather than gen_rtx_SUBREG.

Index: gcc/lra-eliminations.c
===
--- gcc/lra-eliminations.c  (revision 192983)
+++ gcc/lra-eliminations.c  (working copy)
@@ -550,7 +550,8 @@
  return x;
}
  else
-   return gen_rtx_SUBREG (GET_MODE (x), new_rtx, SUBREG_BYTE (x));
+   return simplify_gen_subreg (GET_MODE (x), new_rtx,
+   GET_MODE (new_rtx), SUBREG_BYTE (x));
}
 
   return x;

[RFC] Heuristics to throttle the complette unrolling

2012-10-30 Thread Jan Hubicka

Hi,
for past week or two I was playing with ways to throttle down the complette
unrolling heuristics.  I made complette unroller to use the tree-ssa-loop-niter
upper bound and unroll even in non-trivial cases and this has turned out to
increase number of complettely unrolled loops by great amount, so one can
see it as considerable code size growth at -O3 SPEC build.

http://gcc.opensuse.org/SPEC/CFP/sb-vangelis-head-64/Total-size_big.png
it is the largest jump on right hand side in both peak and base runs.
There are also performance imrovements, most impotantly 11% on applu.

The intuition is that complette unrolling is most profitable when IV tests
are eliminated and single basic block is created. When condtionals stay
in the code it is not that good idea and also functions containing calls
are less interesting for unrolling since the calls are slow and optimization
oppurtunities are not so great.

This patch reduces unrolling on loops having many branches or calls on the
hot patch.  I found that for applu speedup the number of branches needs to be
pretty high - about 32.

The patch saves about half of the growth introduced (but on different 
benchmarks)
and I think I can move all peeling to trees and reduce peeling limits a bit, 
too.

Does this sound sane? Any ideas?

Honza

Index: tree-ssa-loop-ivcanon.c
===
--- tree-ssa-loop-ivcanon.c (revision 192892)
+++ tree-ssa-loop-ivcanon.c (working copy)
@@ -140,6 +140,20 @@ struct loop_size
  instructions after exit are not executed.  */
   int last_iteration;
   int last_iteration_eliminated_by_peeling;
+  
+  /* If some IV computation will become constant.  */
+  bool constant_iv;
+
+  /* Number of call stmts that are not a builtin and are pure or const
+ present on the hot path.  */
+  int num_pure_calls_on_hot_path;
+  /* Number of call stmts that are not a builtin and are not pure nor const
+ present on the hot path.  */
+  int num_non_pure_calls_on_hot_path;
+  /* Number of statements other than calls in the loop.  */
+  int non_call_stmts_on_hot_path;
+  /* Number of branches seen on the hot path.  */
+  int num_branches_on_hot_path;
 };
 
 /* Return true if OP in STMT will be constant after peeling LOOP.  */
@@ -188,7 +202,11 @@ constant_after_peeling (tree op, gimple
   return true;
 }
 
-/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
+/* Computes an estimated number of insns in LOOP.
+   EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
+   iteration of the loop.
+   EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last 
iteration
+   of loop.
Return results in SIZE, estimate benefits for complete unrolling exiting by 
EXIT.  */
 
 static void
@@ -198,11 +216,17 @@ tree_estimate_loop_size (struct loop *lo
   gimple_stmt_iterator gsi;
   unsigned int i;
   bool after_exit;
+  VEC (basic_block, heap) *path = get_loop_hot_path (loop);
 
   size-overall = 0;
   size-eliminated_by_peeling = 0;
   size-last_iteration = 0;
   size-last_iteration_eliminated_by_peeling = 0;
+  size-num_pure_calls_on_hot_path = 0;
+  size-num_non_pure_calls_on_hot_path = 0;
+  size-non_call_stmts_on_hot_path = 0;
+  size-num_branches_on_hot_path = 0;
+  size-constant_iv = 0;
 
   if (dump_file  (dump_flags  TDF_DETAILS))
 fprintf (dump_file, Estimating sizes for loop %i\n, loop-num);
@@ -221,6 +245,8 @@ tree_estimate_loop_size (struct loop *lo
  gimple stmt = gsi_stmt (gsi);
  int num = estimate_num_insns (stmt, eni_size_weights);
  bool likely_eliminated = false;
+ bool likely_eliminated_last = false;
+ bool likely_eliminated_peeled = false;
 
  if (dump_file  (dump_flags  TDF_DETAILS))
{
@@ -231,11 +257,21 @@ tree_estimate_loop_size (struct loop *lo
  /* Look for reasons why we might optimize this stmt away. */
 
  /* Exit conditional.  */
- if (exit  body[i] == exit-src  stmt == last_stmt (exit-src))
+ if (exit  body[i] == exit-src
+   stmt == last_stmt (exit-src))
{
  if (dump_file  (dump_flags  TDF_DETAILS))
-   fprintf (dump_file,Exit condition will be eliminated.\n);
- likely_eliminated = true;
+   fprintf (dump_file,Exit condition will be eliminated 
+in peeled copies.\n);
+ likely_eliminated_peeled = true;
+   }
+ else if (edge_to_cancel  body[i] == edge_to_cancel-src
+   stmt == last_stmt (edge_to_cancel-src))
+   {
+ if (dump_file  (dump_flags  TDF_DETAILS))
+   fprintf (dump_file,Exit condition will be eliminated 
+in last copy.\n);
+ likely_eliminated_last = true;
}
  /* Sets of IV variables  */
  else if (gimple_code (stmt) == GIMPLE_ASSIGN
@@ -249,19 +285,22 @@

1 2 >

1 - 100 of 155 matches

Mail list logo