date:20121105

Re: [PATCH, i386]: AMD bdver3 enablement

2012-11-05 Thread Uros Bizjak

On Mon, Nov 5, 2012 at 8:33 AM, Gopalasubramanian, Ganesh
ganesh.gopalasubraman...@amd.com wrote:
 Couple of changes done with respect to the review comments.

 1. sseshuf type attribute is handled in unit attribute calculation.
 2. sseadd1 instruction attribute is handled in the new scheduler descriptions.

 The patch is attached as (patch.txt).
 The new file (bdver3.md) describing the pipelines is also attached.

-  [(set_attr type sselog)
+  [(set (attr type)
+ (if_then_else (eq_attr cpu bdver3)
+(const_string sseshuf)
+(const_string sselog)))
(set_attr length_immediate 1)
(set_attr prefix vex)
(set_attr mode V8SF)])
@@ -3911,7 +3914,10 @@
 }
 }
   [(set_attr isa noavx,avx)
-   (set_attr type sselog)
+   (set (attr type)
+ (if_then_else (eq_attr cpu bdver3)
+(const_string sseshuf)
+(const_string sselog)))
(set_attr length_immediate 1)
(set_attr prefix orig,vex)
(set_attr mode V4SF)])
@@ -4018,7 +4024,27 @@
vmovlps\t{%2, %1, %0|%0, %1, %2}
%vmovlps\t{%2, %0|%0, %2}
   [(set_attr isa noavx,avx,noavx,avx,*)
-   (set_attr type sselog,sselog,ssemov,ssemov,ssemov)
+   (set (attr type)
+(cond [(and (eq_attr cpu bdver3)
+ (eq_attr alternative 0))
+ (const_string sseshuf)
+   (and (eq_attr cpu bdver3)
+ (eq_attr alternative 1))
+ (const_string sseshuf)
+ (eq_attr alternative 2)
+ (const_string ssemov)
+ (eq_attr alternative 3)
+ (const_string ssemov)
+ (eq_attr alternative 4)
+ (const_string ssemov)
+  (and (not (eq_attr cpu bdver3))
+ (eq_attr alternative 0))
+ (const_string sselog)
+  (and (not (eq_attr cpu bdver3))
+ (eq_attr alternative 1))
+ (const_string sselog)
+   ]
+   (const_string * )))
(set_attr length_immediate 1,1,*,*,*)
(set_attr prefix orig,vex,orig,vex,maybe_vex)
(set_attr mode V4SF,V4SF,V2SF,V2SF,V2SF)])
@@ -4072,7 +4098,23 @@
vbroadcastss\t{%1, %0|%0, %1}
shufps\t{$0, %0, %0|%0, %0, 0}
   [(set_attr isa avx,avx,noavx)
-   (set_attr type sselog1,ssemov,sselog1)
+   (set (attr type)
+(cond [(and (eq_attr cpu bdver3)
+ (eq_attr alternative 0))
+ (const_string sseshuf)
+(and (eq_attr cpu bdver3)
+ (eq_attr alternative 2))
+ (const_string sseshuf)
+(eq_attr alternative 1)
+ (const_string ssemov)
+   (and (not (eq_attr cpu bdver3))
+ (eq_attr alternative 0))
+ (const_string sselog1)
+   (and (not (eq_attr cpu bdver3))
+ (eq_attr alternative 2))
+ (const_string sselog1)
+   ]
+   (const_string * )))

Please don't conditionally change type attribute. Change sselog{,1}
attribute unconditionally to sseshuf{,1} and handle them in the same
way as sselog{,1}.

In other words, add new attributes to all places where original
attributes are handled.

Otherwise, the patch looks good.

Uros.

[x86] Fix gcc.c-torture/compile/20080806-1.c failures

2012-11-05 Thread Richard Sandiford

This is a case where we had:

  (set (reg:HI foo) (plus:HI (reg:HI sp) (const_int X)))
  (clobber (reg:CC FLAGS_REG))

and the splitters decided to convert it to an LEA:

  (set (reg:SI foo) (plus:SI (subreg:SI (reg:HI sp) 0) (const_int X)))

But this fails to match, because ix86_address_subreg_operand
doesn't allow subregs of the stack pointer.

This shows up an inconsistency in the way the generic code handles
subregs of the stack pointer.  Normally we refuse to fold them, even
after reload, but the final_p case of alter_subreg folds them anyway.
That's how we ended up with the rather odd 16-bit sp.

However, even if the special alter_subreg case was removed
(so that we continued to use stack_pointer_rtx itself), we'd have:

  (set (reg:HI foo) (plus:HI (subreg:HI (reg:DI sp) 0) (const_int X)))
  (clobber (reg:CC FLAGS_REG))

which would get converted to:

  (set (reg:SI foo) (plus:SI (subreg:SI (reg:DI sp) 0) (const_int X)))

and we'd ICE in the same way.

The reason x86 rejects subregs of the stack pointer is this same refusal
to fold.  ix86_print_operand_address tries to simplify a SUBREG to a REG
and simplify_subreg wouldn't do anything for sp.

simplify_subreg isn't a lot of help at the output stage though.
If the insn stream contains a subreg that could be simplified but
hasn't been, then IMO that's a bug.  The cases we have to handle here
are those that can't be simplified (unless we decide at some point that
all registers must be simplifiable after reload, in which case we shouldn't
need to handle SUBREGs at all).

As things stand, I think we should be using true_regnum in this case instead.

Tested on x86_64-linux-gnu.  OK to install?

Richard


gcc/
PR target/55204
* config/i386/i386.c (ix86_address_subreg_operand): Remove stack
pointer check.
(print_reg): Use true_regnum rather than REGNO.
(ix86_print_operand_address): Remove SUBREG handling.

Index: gcc/config/i386/i386.c
===
--- gcc/config/i386/i386.c  2012-11-04 21:53:46.0 +
+++ gcc/config/i386/i386.c  2012-11-04 21:53:46.821356709 +
@@ -11784,10 +11784,6 @@ ix86_address_subreg_operand (rtx op)
   if (GET_MODE_SIZE (mode)  UNITS_PER_WORD)
 return false;
 
-  /* simplify_subreg does not handle stack pointer.  */
-  if (REGNO (op) == STACK_POINTER_REGNUM)
-return false;
-
   /* Allow only SUBREGs of non-eliminable hard registers.  */
   return register_no_elim_operand (op, mode);
 }
@@ -14084,13 +14080,6 @@ print_reg (rtx x, int code, FILE *file)
   const char *reg;
   bool duplicated = code == 'd'  TARGET_AVX;
 
-  gcc_assert (x == pc_rtx
- || (REGNO (x) != ARG_POINTER_REGNUM
-  REGNO (x) != FRAME_POINTER_REGNUM
-  REGNO (x) != FLAGS_REG
-  REGNO (x) != FPSR_REG
-  REGNO (x) != FPCR_REG));
-
   if (ASSEMBLER_DIALECT == ASM_ATT)
 putc ('%', file);
 
@@ -14101,6 +14090,13 @@ print_reg (rtx x, int code, FILE *file)
   return;
 }
 
+  unsigned int regno = true_regnum (x);
+  gcc_assert (regno != ARG_POINTER_REGNUM
+  regno != FRAME_POINTER_REGNUM
+  regno != FLAGS_REG
+  regno != FPSR_REG
+  regno != FPCR_REG);
+
   if (code == 'w' || MMX_REG_P (x))
 code = 2;
   else if (code == 'b')
@@ -14126,7 +14122,7 @@ print_reg (rtx x, int code, FILE *file)
 {
   gcc_assert (TARGET_64BIT);
   putc ('r', file);
-  fprint_ul (file, REGNO (x) - FIRST_REX_INT_REG + 8);
+  fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
   switch (code)
{
  case 0:
@@ -14170,24 +14166,24 @@ print_reg (rtx x, int code, FILE *file)
 case 16:
 case 2:
 normal:
-  reg = hi_reg_name[REGNO (x)];
+  reg = hi_reg_name[regno];
   break;
 case 1:
-  if (REGNO (x) = ARRAY_SIZE (qi_reg_name))
+  if (regno = ARRAY_SIZE (qi_reg_name))
goto normal;
-  reg = qi_reg_name[REGNO (x)];
+  reg = qi_reg_name[regno];
   break;
 case 0:
-  if (REGNO (x) = ARRAY_SIZE (qi_high_reg_name))
+  if (regno = ARRAY_SIZE (qi_high_reg_name))
goto normal;
-  reg = qi_high_reg_name[REGNO (x)];
+  reg = qi_high_reg_name[regno];
   break;
 case 32:
   if (SSE_REG_P (x))
{
  gcc_assert (!duplicated);
  putc ('y', file);
- fputs (hi_reg_name[REGNO (x)] + 1, file);
+ fputs (hi_reg_name[regno] + 1, file);
  return;
}
   break;
@@ -14943,22 +14939,6 @@ ix86_print_operand_address (FILE *file,
 
   gcc_assert (ok);
 
-  if (parts.base  GET_CODE (parts.base) == SUBREG)
-{
-  rtx tmp = SUBREG_REG (parts.base);
-  parts.base = simplify_subreg (GET_MODE (parts.base),
-   tmp, GET_MODE (tmp), 0);
-  gcc_assert (parts.base != NULL_RTX);
-}
-
-  if (parts.index  GET_CODE (parts.index) == SUBREG)
-{
-  rtx

Re: [x86] Fix gcc.c-torture/compile/20080806-1.c failures

2012-11-05 Thread Uros Bizjak

On Mon, Nov 5, 2012 at 9:30 AM, Richard Sandiford
rdsandif...@googlemail.com wrote:
 This is a case where we had:

   (set (reg:HI foo) (plus:HI (reg:HI sp) (const_int X)))
   (clobber (reg:CC FLAGS_REG))

 and the splitters decided to convert it to an LEA:

   (set (reg:SI foo) (plus:SI (subreg:SI (reg:HI sp) 0) (const_int X)))

 But this fails to match, because ix86_address_subreg_operand
 doesn't allow subregs of the stack pointer.

 This shows up an inconsistency in the way the generic code handles
 subregs of the stack pointer.  Normally we refuse to fold them, even
 after reload, but the final_p case of alter_subreg folds them anyway.
 That's how we ended up with the rather odd 16-bit sp.

 However, even if the special alter_subreg case was removed
 (so that we continued to use stack_pointer_rtx itself), we'd have:

   (set (reg:HI foo) (plus:HI (subreg:HI (reg:DI sp) 0) (const_int X)))
   (clobber (reg:CC FLAGS_REG))

 which would get converted to:

   (set (reg:SI foo) (plus:SI (subreg:SI (reg:DI sp) 0) (const_int X)))

 and we'd ICE in the same way.

 The reason x86 rejects subregs of the stack pointer is this same refusal
 to fold.  ix86_print_operand_address tries to simplify a SUBREG to a REG
 and simplify_subreg wouldn't do anything for sp.

 simplify_subreg isn't a lot of help at the output stage though.
 If the insn stream contains a subreg that could be simplified but
 hasn't been, then IMO that's a bug.  The cases we have to handle here
 are those that can't be simplified (unless we decide at some point that
 all registers must be simplifiable after reload, in which case we shouldn't
 need to handle SUBREGs at all).

 As things stand, I think we should be using true_regnum in this case instead.

 Tested on x86_64-linux-gnu.  OK to install?

Let's ask H.J. to test this change on x32.

 gcc/
 PR target/55204
 * config/i386/i386.c (ix86_address_subreg_operand): Remove stack
 pointer check.
 (print_reg): Use true_regnum rather than REGNO.
 (ix86_print_operand_address): Remove SUBREG handling.

The patch is OK for mainline and 4.7, if it passes H.J.'s tests with
-maddress-mode={short,long} on x32.

 +  unsigned int regno = true_regnum (x);

I'd rather see the declaration at the beginning of the function.

Thanks,
Uros.

[SH] PR 54089 - Add support for rotcl instruction

2012-11-05 Thread Oleg Endo

Hello,

This patch adds support for SH's rotcl instruction.
While working on it, I've noticed that the DImode left shift by one insn
was not used anymore, and instead ended up as 'x + x'.  This
transformation was happening before/during RTL expansion.  The fix for
it was to adjust the costs for DImode plus/minus.

Tested on rev 193135 with
make -k check RUNTESTFLAGS=--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}

and no new failures.
OK?

Cheers,
Oleg

gcc/ChangeLog:

PR target/54089
* config/sh/sh.c (and_xor_ior_costs, addsubcosts): Double the   
costs for ops larger than SImode.
* config/sh/sh.md (rotcl, *rotcl): New insns and splits.
(ashldi3_k): Convert to insn_and_split and use new rotcl insn.

testsuite/ChangeLog:

PR target/54089
* gcc.target/sh/pr54089-8.c: New.
* gcc.target/sh/pr54089-9.c: New.
Index: gcc/testsuite/gcc.target/sh/pr54089-9.c
===
--- gcc/testsuite/gcc.target/sh/pr54089-9.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr54089-9.c	(revision 0)
@@ -0,0 +1,63 @@
+/* Check that the rotcr instruction is generated.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { -m5*} {  } }  */
+/* { dg-final { scan-assembler-times rotcl 4 } } */
+/* { dg-final { scan-assembler-not movt } } */
+/* { dg-final { scan-assembler-not or\t } } */
+/* { dg-final { scan-assembler-not rotl } } */
+/* { dg-final { scan-assembler-not and } } */
+
+typedef char bool;
+
+int
+test_00 (int* a, int* b)
+{
+  int i;
+  int r = 0;
+  for (i = 0; i  16; ++i)
+{
+  bool t = a[i] == b[i];
+  r = (r  1) | t;
+}
+  return r;
+}
+
+int
+test_01 (int* a, int* b)
+{
+  int i;
+  int r = 0;
+  for (i = 0; i  16; ++i)
+{
+  bool t = a[i] == b[i];
+  r = (r  2) | t;
+}
+  return r;
+}
+
+int
+test_02 (int* a, int* b)
+{
+  int i;
+  int r = 0;
+  for (i = 0; i  16; ++i)
+{
+  bool t = a[i] == b[i];
+  r = (r  3) | t;
+}
+  return r;
+}
+
+int
+test_03 (const bool* a)
+{
+  int i;
+  int r = 0;
+  for (i = 0; i  16; ++i)
+{
+  bool t = a[i];
+  r = (r  1) | (t  1);
+}
+  return r;
+}
Index: gcc/testsuite/gcc.target/sh/pr54089-8.c
===
--- gcc/testsuite/gcc.target/sh/pr54089-8.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr54089-8.c	(revision 0)
@@ -0,0 +1,203 @@
+/* Check that the rotcl instruction is generated.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { -m5*} {  } }  */
+/* { dg-final { scan-assembler-times rotcl 28 } } */
+
+typedef char bool;
+
+long long
+test_00 (long long a)
+{
+  return a  1;
+}
+
+unsigned int
+test_01 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a  1) | r);
+}
+
+unsigned int
+test_02 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a  2) | r);
+}
+
+unsigned int
+test_03 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a  3) | r);
+}
+
+unsigned int
+test_04 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a  4) | r);
+}
+
+unsigned int
+test_05 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a  5) | r);
+}
+
+unsigned int
+test_06 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a  6) | r);
+}
+
+unsigned int
+test_07 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a  7) | r);
+}
+
+unsigned int
+test_08 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a  8) | r);
+}
+
+unsigned int
+test_09 (unsigned int a, int b, int c)
+{
+  bool r = b == c;
+  return ((a  31) | r);
+}
+
+unsigned int
+test_10 (unsigned int a, int b)
+{
+  /* 1x shlr, 1x rotcl  */
+  return (a  1) | (b  1);
+}
+
+unsigned int
+test_11 (unsigned int a, int b)
+{
+  /* 1x shlr, 1x rotcl (+1x add as shll)  */
+  return (a  2) | (b  1);
+}
+
+unsigned int
+test_12 (unsigned int a, int b)
+{
+  /* 1x shlr, 1x shll2, 1x rotcl  */
+  return (a  3) | (b  1);
+}
+
+unsigned int
+test_13 (unsigned int a, int b)
+{
+  /* 1x shll, 1x rotcl  */
+  bool r = b  0;
+  return (a  1) | r;
+}
+
+unsigned int
+test_14 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a  1) | r);
+}
+
+unsigned int
+test_15 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a  11) | r);
+}
+
+unsigned int
+test_16 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a  3) | r);
+}
+
+unsigned int
+test_17 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a  4) | r);
+}
+
+unsigned int
+test_18 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a  5) | r);
+}
+
+unsigned int
+test_19 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a  6) | r);
+}
+
+unsigned int
+test_20 (unsigned int a, int b, int c)
+{
+  bool r = b != c;
+  return ((a

Re: [PATCH, middle-end]: Fix mode-switching MODE_EXIT check with __builtin_apply/__builtin_return

2012-11-05 Thread Kaz Kojima

Uros Bizjak ubiz...@gmail.com wrote:
 2012-11-04  Vladimir Yakovlev  vladimir.b.yakov...@intel.com
   Uros Bizjak  ubiz...@gmail.com
 
 * mode-switching.c (create_pre_exit): Added code for
 maybe_builtin_apply case.
 
 Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu,
 with vzeroupper patch [1] applied.
 
 I have added SH4 maintainer for possible comments.

I've confirmed that there are no new failures with the patch
on sh4-unknown-linux-gnu.
BTW, it looks that the copyright year of mode-switching.c
should be updated.


Regards,
kaz

Re: [PATCH, middle-end]: Fix mode-switching MODE_EXIT check with __builtin_apply/__builtin_return

2012-11-05 Thread Vladimir Yakovlev

Hellow, Kaz

I've updated copyright. Is it Ok?

Thanks,
Vladimir

--- a/gcc/mode-switching.c
+++ b/gcc/mode-switching.c
@@ -1,6 +1,6 @@
 /* CPU mode switching
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008,
-   2009, 2010 Free Software Foundation, Inc.
+   2009, 2010, 2011, 2012 Free Software Foundation, Inc.

 This file is part of GCC.


2012/11/5 Kaz Kojima kkoj...@rr.iij4u.or.jp:
 Uros Bizjak ubiz...@gmail.com wrote:
 2012-11-04  Vladimir Yakovlev  vladimir.b.yakov...@intel.com
   Uros Bizjak  ubiz...@gmail.com

 * mode-switching.c (create_pre_exit): Added code for
 maybe_builtin_apply case.

 Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu,
 with vzeroupper patch [1] applied.

 I have added SH4 maintainer for possible comments.

 I've confirmed that there are no new failures with the patch
 on sh4-unknown-linux-gnu.
 BTW, it looks that the copyright year of mode-switching.c
 should be updated.


 Regards,
 kaz

Re: [x86] Fix gcc.c-torture/compile/20080806-1.c failures

2012-11-05 Thread H.J. Lu

On Mon, Nov 5, 2012 at 1:01 AM, Uros Bizjak ubiz...@gmail.com wrote:
 On Mon, Nov 5, 2012 at 9:30 AM, Richard Sandiford
 rdsandif...@googlemail.com wrote:
 This is a case where we had:

   (set (reg:HI foo) (plus:HI (reg:HI sp) (const_int X)))
   (clobber (reg:CC FLAGS_REG))

 and the splitters decided to convert it to an LEA:

   (set (reg:SI foo) (plus:SI (subreg:SI (reg:HI sp) 0) (const_int X)))

 But this fails to match, because ix86_address_subreg_operand
 doesn't allow subregs of the stack pointer.

 This shows up an inconsistency in the way the generic code handles
 subregs of the stack pointer.  Normally we refuse to fold them, even
 after reload, but the final_p case of alter_subreg folds them anyway.
 That's how we ended up with the rather odd 16-bit sp.

 However, even if the special alter_subreg case was removed
 (so that we continued to use stack_pointer_rtx itself), we'd have:

   (set (reg:HI foo) (plus:HI (subreg:HI (reg:DI sp) 0) (const_int X)))
   (clobber (reg:CC FLAGS_REG))

 which would get converted to:

   (set (reg:SI foo) (plus:SI (subreg:SI (reg:DI sp) 0) (const_int X)))

 and we'd ICE in the same way.

 The reason x86 rejects subregs of the stack pointer is this same refusal
 to fold.  ix86_print_operand_address tries to simplify a SUBREG to a REG
 and simplify_subreg wouldn't do anything for sp.

 simplify_subreg isn't a lot of help at the output stage though.
 If the insn stream contains a subreg that could be simplified but
 hasn't been, then IMO that's a bug.  The cases we have to handle here
 are those that can't be simplified (unless we decide at some point that
 all registers must be simplifiable after reload, in which case we shouldn't
 need to handle SUBREGs at all).

 As things stand, I think we should be using true_regnum in this case instead.

 Tested on x86_64-linux-gnu.  OK to install?

 Let's ask H.J. to test this change on x32.

 gcc/
 PR target/55204
 * config/i386/i386.c (ix86_address_subreg_operand): Remove stack
 pointer check.
 (print_reg): Use true_regnum rather than REGNO.
 (ix86_print_operand_address): Remove SUBREG handling.

 The patch is OK for mainline and 4.7, if it passes H.J.'s tests with
 -maddress-mode={short,long} on x32.


I am on it.  It will take a while.

H.J.

[PATCH, testsuite]: Fix gcc.dg/tree-ssa/cunroll-9.c failure

2012-11-05 Thread Uros Bizjak

Hello!

2012-11-05  Uros Bizjak  ubiz...@gmail.com

* gcc.dg/tree-ssa/cunroll-9.c: Dump cunrolli details.
Fix scan-tree-dump-times directive.

Tested on x86_64-pc-linux-gnu, committed to mainline SVN.

Uros.
Index: gcc.dg/tree-ssa/cunroll-9.c
===
--- gcc.dg/tree-ssa/cunroll-9.c (revision 193151)
+++ gcc.dg/tree-ssa/cunroll-9.c (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O2 -fdump-tree-cunrolli } */
+/* { dg-options -O2 -fdump-tree-cunrolli-details } */
 void abort (void);
 int a[10];
 int b[11];
@@ -18,5 +18,5 @@
 }
   return sum;
 }
-/* { dg-final { scan-tree-dump-times 1 Removed pointless exit: cunroli } } 
*/
+/* { dg-final { scan-tree-dump-times Removed pointless exit: 1 cunroli } } 
*/
 /* { dg-final { cleanup-tree-dump cunroli } } */

Re: exception safety validation

2012-11-05 Thread Jonathan Wakely

On 4 November 2012 16:01, Jonathan Wakely wrote:
 On 3 November 2012 19:05, Paolo Carlini wrote:

 On 11/03/2012 03:43 PM, François Dumont wrote:

 Note that I run exception tests to validate it and the recently introduced
 23_containers/forward_list/allocator/noexcept.cc test doesn't work in debug
 mode. I think it needs a dg-require-normal-node attribute.

 ... if you reach an agreement about the right thing to do concerning this
 failure, I think the patch could otherwise go in.

 Yes, the patch looks good, please commit it and  I'll look into the
 forward_list failure, since my changes caused it.

I'll be committing a fix for the forward_list debug mode failure (and
some unrelated profile mode failures) tonight.

[PATCH] Fix up pr19105.c test regexp for ppc (PR testsuite/55188)

2012-11-05 Thread Jakub Jelinek

Hi!

Depending on branch cost, e.g. on ppc v != d  v != e
isn't folded in the FE.  On x86_64/i686 we have in *.original
for range1:
return (x != 0  (unsigned int) v != 2)  (unsigned int) v + 4294967293  1;
but on ppc
return ((x != 0  (unsigned int) v != 2)  (unsigned int) v != 3)  
(unsigned int) v != 4;
The inter-bb range test optimization optimizes both the same, into
x != 0  (unsigned int) v + 4294967294  2
equivalent (gimplified of course), but it prints different message that
the test was trying to match to see if the optimization happened.

The following patch adjusts the regexp.  Unfortunately I couldn't find a way
to insert there ERE () to make the regexp shorter
(wanted to have there .. and -.3, (3. and -.4, )?4. or
and -.3, (3. and -.4, |)4.
but neither worked, not with zero, one, two or three backslashes before (
and ).  So I've ended up duplicating the whole regexp with just | in between
which seems to work.  Tested on x86_64-linux (-m32/-m64) and powerpc64-linux
cross (-m32/-m64), ok for trunk?

2012-11-05  Jakub Jelinek  ja...@redhat.com

PR testsuite/55188
* gcc.dg/pr19105.c: Accept also optimizing
-[2, 2] and -[3, 3] and -[4, 4] range tests together.

--- gcc/testsuite/gcc.dg/pr19105.c.jj   2012-10-31 09:44:44.0 +0100
+++ gcc/testsuite/gcc.dg/pr19105.c  2012-11-05 11:29:33.921707151 +0100
@@ -17,6 +17,6 @@ int range2 (enum e v, int x)
   return x  (v != c  v != d  v != e);
 }
 
-/* { dg-final { scan-tree-dump-times Optimizing range tests v_\[0-9\]*.D. 
-.2, 2. and -.3, 4.\[\n\r\]* into 1 reassoc1 } } */
+/* { dg-final { scan-tree-dump-times Optimizing range tests v_\[0-9\]*.D. 
-.2, 2. and -.3, 4.\[\n\r\]* into|Optimizing range tests v_\[0-9\]*.D. -.2, 2. 
and -.3, 3. and -.4, 4.\[\n\r\]* into 1 reassoc1 } } */
 /* { dg-final { cleanup-tree-dump reassoc1 } } */
 

Jakub

Re: [RFH] subreg of a vector without going through memory

2012-11-05 Thread Richard Sandiford

Marc Glisse marc.gli...@inria.fr writes:
   * simplify-rtx.c (simplify_subreg): For vectors, create a VEC_SELECT.

Probably not helpful, sorry, but a subreg-vec_select transformation
feels like it's going in the wrong direction.  Going from vec_select
to subreg would be OK from a layering perspective (although whether
it's a good idea or not is another question), but not the other way.
E.g. we convert can convert truncate to subreg, but should never
convert subreg to truncate.

One problem is that subregs can be lvalues as well as rvalues,
whereas vec_select is always an rvalue.  Also, it's invalid to
form a non-lowpart subreg of a word or subword register, so having
general vec_select handling feels like it's coping with input that
ought to be rejected with a null return.

Richard

Re: [PATCH] Fix final_scan_insn to handle all comparisons in non-jump and cmove insn

2012-11-05 Thread Richard Sandiford

Andreas Schwab sch...@linux-m68k.org writes:
   * final.c (final_scan_insn) [HAVE_cc0]: Handle all comparison
   codes in non-jump and cmove insn.

OK, thanks.

Richard

[PATCH, testsuite]: Fix gcc.dg/torture/pr55018.c testsuite failure

2012-11-05 Thread Uros Bizjak

Hello!

Just another instance of testsuite PR51128.

2012-11-05  Uros Bizjak  ubiz...@gmail.com

PR testsuite/51128
* gcc.dg/torture/pr55018.c: Skip if -fno-fat-lto-objects was passed.

Tested on x86_64-pc-linux-gnu, committed to mainline SVN.

Uros.

Index: gcc.dg/torture/pr55018.c
===
--- gcc.dg/torture/pr55018.c(revision 193151)
+++ gcc.dg/torture/pr55018.c(working copy)
@@ -1,6 +1,7 @@
 /* PR tree-optimization/55018 */
 /* { dg-do compile } */
 /* { dg-options -fdump-tree-optimized } */
+/* { dg-skip-if  { *-*-* } { -fno-fat-lto-objects } {  } } */

 void
 foo (int x)

Re: [patch] Apply conditional down cast to cgraph.h et.al.

2012-11-05 Thread Eric Botcazou

 Done and committed.

That's great, thanks!

-- 
Eric Botcazou

[PATCH] Fix up value_format ICE for DWARF2_ADDR_SIZE 4 targets (PR target/55194)

2012-11-05 Thread Jakub Jelinek

Hi!

Apparently some targets have DWARF2_ADDR_SIZE  4, for dw_val_class_addr
when DW_FORM_addr can't be used value_format already handles
DWARF2_ADDR_SIZE 1, 2, 4 and 8, so I've committed this change as obvious.

2012-11-05  Jakub Jelinek  ja...@redhat.com

PR target/55194
* dwarf2out.c (value_format) case dw_val_class_high_pc: Handle
also DWARF2_ADDR_SIZE 1 and 2.

--- gcc/dwarf2out.c.jj  2012-10-26 21:03:45.0 +0200
+++ gcc/dwarf2out.c 2012-11-05 12:04:31.832255264 +0100
@@ -7597,12 +7597,16 @@ value_format (dw_attr_ref a)
 case dw_val_class_high_pc:
   switch (DWARF2_ADDR_SIZE)
{
- case 4:
-   return DW_FORM_data4;
- case 8:
-   return DW_FORM_data8;
- default:
-   gcc_unreachable ();
+   case 1:
+ return DW_FORM_data1;
+   case 2:
+ return DW_FORM_data2;
+   case 4:
+ return DW_FORM_data4;
+   case 8:
+ return DW_FORM_data8;
+   default:
+ gcc_unreachable ();
}
 
 default:

Jakub

Re: [PATCH] Stream cgraph_node.ipa_transforms_to_apply

2012-11-05 Thread Jan Hubicka

 Hi,
 
 the following patch adds streaming ofcgraph_node.ipa_transforms_to_apply
 so that transformation phases of IPA passes are run in LTO too.  It is
 done by simple streaming of pass.static_pass_number and then looking
 it up among all_regular_ipa_passes.
 
 Bootstrapped and tested on x86_64-linux, required to make aggregate
 IPA-CP work in LTO.
 
 OK for trunk?
 
 Thanks,
 
 Martin
 
 
 2012-11-03  Martin Jambor  mjam...@suse.cz
 
   * lto-cgraph.c: Include tree-pass.h.
   (lto_output_node): Stream node-ipa_transforms_to_apply.
   (input_node): Likewise.
   * Makefile.in (lto-cgraph.o): Add TREE_PASS_H to dependencies.
 +  count = streamer_read_hwi (ib);
 +  node-ipa_transforms_to_apply = NULL;
 +  for (i = 0; i  count; i++)
 +{
 +  struct opt_pass *pass;
 +  int pi = streamer_read_hwi (ib);
 +
 +  for (pass = all_regular_ipa_passes; pass; pass = pass-next)
 + if (pass-static_pass_number == pi)

passes.c compute vector translating IDs to pass structures, please export it 
and use it here;
OK with this change.

Honza
 +   {
 + VEC_safe_push (ipa_opt_pass, heap, node-ipa_transforms_to_apply,
 +(struct ipa_opt_pass_d *) pass);
 + break;
 +   }
 +  gcc_assert (pass);
 +}
 +
if (tag == LTO_symtab_analyzed_node)
  ref = streamer_read_hwi (ib);
  
 Index: src/gcc/Makefile.in
 ===
 --- src.orig/gcc/Makefile.in
 +++ src/gcc/Makefile.in
 @@ -2146,7 +2146,7 @@ lto-cgraph.o: lto-cgraph.c $(CONFIG_H) $
 $(HASHTAB_H) langhooks.h $(BASIC_BLOCK_H) \
 $(TREE_FLOW_H) $(CGRAPH_H) $(FUNCTION_H) $(GGC_H) $(DIAGNOSTIC_CORE_H) \
 $(EXCEPT_H) $(TIMEVAR_H) pointer-set.h $(LTO_STREAMER_H) \
 -   $(GCOV_IO_H) $(DATA_STREAMER_H) $(TREE_STREAMER_H)
 +   $(GCOV_IO_H) $(DATA_STREAMER_H) $(TREE_STREAMER_H) $(TREE_PASS_H)
  lto-streamer-in.o: lto-streamer-in.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
 $(TM_H) toplev.h $(DIAGNOSTIC_CORE_H) $(EXPR_H) $(FLAGS_H) $(PARAMS_H) \
 input.h $(HASHTAB_H) $(BASIC_BLOCK_H) $(TREE_FLOW_H) $(TREE_PASS_H) \

Fix autopar testcase

2012-11-05 Thread Jan Hubicka

Hi,
this testcase tests that we are about to bound number of iterations of
  for (k = 3; k  NA_1; k++)
for (i = 3; i  MA_1; i++)
  for (j = 3; j  MB_1; j++)
{
  t = T[i][j];
  T[i][j] = t+2+A[i][k]*B[j][k];
}


based on 

#define MB 100
#define NA 450
#define MA 400

int T[MA][MB],A[MA][NA],B[MB][NA];

Well, we used to be able to, but by bug.  The problem is that we are not able 
to hoist
the loop headers prior the loop and thus we do not have perfect nest and we do 
not know
if the body is always executed on an iteration of the outer loops.

This patch fixes the testcase by hoisting by hand.
I would bet I saw loop-ch.c patch doing this, but I do not see it in mainline?

Comitted.
Honza

*** pr499602.c  2012-11-05 12:18:21.771215185 +0100
--- pr49960.c   2012-11-05 12:17:43.696420662 +0100
***
*** 11,16 
--- 11,21 
  {
int i,j, t,k;
  
+   /* At the moment we are not able to hoist the loop headers out of the loop
+  nest.  */
+   if (MA_1  4 || NA_1  4 || MB_1  4)
+ return;
+ 
/* The outer most loop is not parallel because for different k's there
   is write-write dependency for T[i][j].  */

Re: GCC for GNU Hurd: MACH built-in preprocessor macro (was: gdb: FTBFS on hurd-i386 (for review))

2012-11-05 Thread Samuel Thibault

Thomas Schwinge, le Mon 05 Nov 2012 07:09:43 +0100, a écrit :
 Samuel, is there any way you can unpack all Debian source packages on a
 Debian machine, and run a recursive grep command (I can work out a
 suitable regex) to see where removing the MACH or __MACH built-in
 preprocessor macros might cause trouble?

Sure!

Samuel

Relax limits of early inliner for the forwarder functions

2012-11-05 Thread Jan Hubicka

Hi,
in 4.6 timeframe I limited early inlier growth to apply only for leaf functions.
This does not work really well, because with less propagation of address 
expressions
we are really not 100% succesfull on detecting C++ forwarders and predicting 
them
zero cost.   This patch simply makes the cost to be divided by number of 
callees, similarly
as in LLVM.

Bootstrapped/regtested x86_64-linux, benchmarked and comitted.
The patch seems consistent win in all benchmarks, most noticeably in tramp3d.

* ipa-inline.c (leaf_node_p): Rename to ...
(num_calls) ... this one.
(want_early_inline_function_p): Allow smal growth on non-leafs.
Index: ipa-inline.c
===
--- ipa-inline.c(revision 193134)
+++ ipa-inline.c(working copy)
@@ -380,17 +380,18 @@ can_early_inline_edge_p (struct cgraph_e
 }
 
 
-/* Return true when N is leaf function.  Accept cheap builtins
-   in leaf functions.  */
+/* Return number of calls in N.  Ignore cheap builtins.  */
 
-static bool
-leaf_node_p (struct cgraph_node *n)
+static int
+num_calls (struct cgraph_node *n)
 {
   struct cgraph_edge *e;
+  int num = 0;
+
   for (e = n-callees; e; e = e-next_callee)
 if (!is_inexpensive_builtin (e-callee-symbol.decl))
-  return false;
-  return true;
+  num++;
+  return num;
 }
 
 
@@ -414,6 +415,8 @@ want_early_inline_function_p (struct cgr
   else
 {
   int growth = estimate_edge_growth (e);
+  int n;
+
   if (growth = 0)
;
   else if (!cgraph_maybe_hot_edge_p (e)
@@ -427,22 +430,23 @@ want_early_inline_function_p (struct cgr
 growth);
  want_inline = false;
}
-  else if (!leaf_node_p (callee)
-   growth  0)
+  else if (growth  PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
{
  if (dump_file)
fprintf (dump_file,   will not early inline: %s/%i-%s/%i, 
-callee is not leaf and code would grow by %i\n,
+growth %i exceeds --param early-inlining-insns\n,
 xstrdup (cgraph_node_name (e-caller)), e-caller-uid,
 xstrdup (cgraph_node_name (callee)), callee-uid,
 growth);
  want_inline = false;
}
-  else if (growth  PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
+  else if ((n = num_calls (callee)) != 0
+   growth * (n + 1)  PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
{
  if (dump_file)
fprintf (dump_file,   will not early inline: %s/%i-%s/%i, 
-growth %i exceeds --param early-inlining-insns\n,
+growth %i exceeds --param early-inlining-insns 
+divided by number of calls\n,
 xstrdup (cgraph_node_name (e-caller)), e-caller-uid,
 xstrdup (cgraph_node_name (callee)), callee-uid,
 growth);

Minor fixes to niter and ivcanon

2012-11-05 Thread Jan Hubicka

Hi,
while proofreading niter and ivcanon code I noticed that we give up on exits in 
inner
loops.  I discussed this with Zdenek and it was done only because at the time
it was implemented we did nothing useful on outer loops.  We can now unloop 
them, so
I removed the restriction.

I also noticed that not_executed_last_iteration is initialized tice and we can 
record
realistic upper bound in ivcanon for loop with only one likely exit.

Bootstrapped/regtested x86_64-linux, comitted as obvious.

* tree-ssa-loop-niter.c (find_loop_niter): Remove 
just_once_each_iteration_p.
(maybe_lower_iteration_bound): Initialize not_executed_last_iteration 
to  NULL
* tree-ssa-loop-ivcanon.c (canonicalize_loop_induction_variables): Skip
just_once_each_iteration_p; record estimated bound when loop has only 
one
likely exit; test just_once_each_iteration_p before IV canon itself.

Index: tree-ssa-loop-niter.c
===
--- tree-ssa-loop-niter.c   (revision 193155)
+++ tree-ssa-loop-niter.c   (working copy)
@@ -1941,9 +1941,6 @@ find_loop_niter (struct loop *loop, edge
   *exit = NULL;
   FOR_EACH_VEC_ELT (edge, exits, i, ex)
 {
-  if (!just_once_each_iteration_p (loop, ex-src))
-   continue;
-
   if (!number_of_iterations_exit (loop, ex, desc, false))
continue;
 
@@ -3186,7 +3183,7 @@ discover_iteration_bound_by_body_walk (s
 static void
 maybe_lower_iteration_bound (struct loop *loop)
 {
-  pointer_set_t *not_executed_last_iteration = pointer_set_create ();
+  pointer_set_t *not_executed_last_iteration = NULL;
   struct nb_iter_bound *elt;
   bool found_exit = false;
   VEC (basic_block, heap) *queue = NULL;
Index: tree-ssa-loop-ivcanon.c
===
--- tree-ssa-loop-ivcanon.c (revision 193155)
+++ tree-ssa-loop-ivcanon.c (working copy)
@@ -760,11 +760,7 @@ canonicalize_loop_induction_variables (s
 
   niter = number_of_latch_executions (loop);
   if (TREE_CODE (niter) == INTEGER_CST)
-{
-  exit = single_exit (loop);
-  if (!just_once_each_iteration_p (loop, exit-src))
-   return false;
-}
+exit = single_exit (loop);
   else
 {
   /* If the loop has more than one exit, try checking all of them
@@ -785,7 +781,10 @@ canonicalize_loop_induction_variables (s
   /* We work exceptionally hard here to estimate the bound
  by find_loop_niter_by_eval.  Be sure to keep it for future.  */
   if (niter  TREE_CODE (niter) == INTEGER_CST)
-record_niter_bound (loop, tree_to_double_int (niter), false, true);
+{
+  record_niter_bound (loop, tree_to_double_int (niter),
+ exit == single_likely_exit (loop), true);
+}
 
   /* Force re-computation of loop bounds so we can remove redundant exits.  */
   maxiter = max_loop_iterations_int (loop);
@@ -813,7 +812,8 @@ canonicalize_loop_induction_variables (s
 return true;
 
   if (create_iv
-   niter  !chrec_contains_undetermined (niter))
+   niter  !chrec_contains_undetermined (niter)
+   exit  just_once_each_iteration_p (loop, exit-src))
 create_canonical_iv (loop, exit, niter);
 
   return modified;

Re: [PATCH] Update source location for PRE inserted stmt

2012-11-05 Thread Eric Botcazou

 Those compiler generated statements do not have source origins, but
 they need to have debug location information attached so that
 information collected for them can be correlated with program
 constructs (such as CFG). One of the natural way is to use the source
 location associated with the program point where they are inserted.

No, there is nothing natural (and this can even be wrong).  The statements 
must have the source location corresponding to the source construct they are 
generated for, which may be totally different from that of the insertion 
point.  Yes, that can generate jumpiness in GDB, but this is far better that 
breaking the coverage info by giving the same source location to instructions 
that have different coverage status.

-- 
Eric Botcazou

Re: [RFH] subreg of a vector without going through memory

2012-11-05 Thread Marc Glisse


On Mon, 5 Nov 2012, Richard Sandiford wrote:


Marc Glisse marc.gli...@inria.fr writes:

* simplify-rtx.c (simplify_subreg): For vectors, create a VEC_SELECT.


Probably not helpful, sorry, but a subreg-vec_select transformation
feels like it's going in the wrong direction.  Going from vec_select
to subreg would be OK from a layering perspective (although whether
it's a good idea or not is another question), but not the other way.
E.g. we convert can convert truncate to subreg, but should never
convert subreg to truncate.


Thanks, it is helpful to know what the right direction is supposed to be 
:-)



One problem is that subregs can be lvalues as well as rvalues,
whereas vec_select is always an rvalue.


Hmm indeed. Not sure how to test that.

Also, it's invalid to form a non-lowpart subreg of a word or subword 
register, so having general vec_select handling feels like it's coping 
with input that ought to be rejected with a null return.


Ah, even if it was the right direction, I should limit it to memory and 
registers larger than a word, ok.



So if I understand you, the x86 back-end is missing define_insns for movs 
of subregs. And I should duplicate in simplify_subreg any code from 
simplify_binary_operation(VEC_SELECT,...) that might apply. And maybe add 
code that recognizes when a VEC_SELECT could be a SUBREG, but maybe not.


(this other patch I was writing (untested) sounds less useful then, I'll 
just attach it here so it doesn't get lost)


Well, I guess there are worse issues with vectors to handle first, as can 
be seen when compiling this for SSE2, which produces no less than 36 mov 
instructions :-(


typedef double vec4 __attribute__((vector_size(32)));

void f(vec4*x){
  *x+=*x**x;
}

Good thing next stage1 is far away, I am getting way too confused...

--
Marc Glisse*** /data/repos/gcc/pristine/gcc/simplify-rtx.c 2012-11-03 19:59:03.541603422 
+0100
--- simplify-rtx.c  2012-11-05 12:35:15.843396247 +0100
*** simplify_binary_operation_1 (enum rtx_co
*** 3485,3528 
  rtx subop0, subop1;
  
  gcc_assert (i0  2  i1  2);
  subop0 = XEXP (trueop0, i0);
  subop1 = XEXP (trueop0, i1);
  
  return simplify_gen_binary (VEC_CONCAT, mode, subop0, subop1);
}
}
  
!   if (XVECLEN (trueop1, 0) == 1
!  CONST_INT_P (XVECEXP (trueop1, 0, 0))
!  GET_CODE (trueop0) == VEC_CONCAT)
{
! rtx vec = trueop0;
! int offset = INTVAL (XVECEXP (trueop1, 0, 0)) * GET_MODE_SIZE (mode);
  
! /* Try to find the element in the VEC_CONCAT.  */
! while (GET_MODE (vec) != mode
! GET_CODE (vec) == VEC_CONCAT)
{
! HOST_WIDE_INT vec_size = GET_MODE_SIZE (GET_MODE (XEXP (vec, 0)));
! if (offset  vec_size)
!   vec = XEXP (vec, 0);
  else
{
! offset -= vec_size;
! vec = XEXP (vec, 1);
}
! vec = avoid_constant_pool_reference (vec);
}
  
! if (GET_MODE (vec) == mode)
!   return vec;
}
  
return 0;
  case VEC_CONCAT:
{
enum machine_mode op0_mode = (GET_MODE (trueop0) != VOIDmode
  ? GET_MODE (trueop0)
  : GET_MODE_INNER (mode));
enum machine_mode op1_mode = (GET_MODE (trueop1) != VOIDmode
  ? GET_MODE (trueop1)
--- 3485,3562 
  rtx subop0, subop1;
  
  gcc_assert (i0  2  i1  2);
  subop0 = XEXP (trueop0, i0);
  subop1 = XEXP (trueop0, i1);
  
  return simplify_gen_binary (VEC_CONCAT, mode, subop0, subop1);
}
}
  
!   /* Detect if all the elements come from the same subpart of a concat.  
*/
!   if (GET_CODE (trueop0) == VEC_CONCAT)
{
! rtx new_op0 = NULL_RTX;
! rtx new_op1 = NULL_RTX;
! int first = 0;
! int second = 0;
! unsigned nelts_first_half = 1;
! if (VECTOR_MODE_P (GET_CODE (trueop0)))
!   {
! rtx first_half = XEXP (trueop0, 0);
! enum machine_mode mode0 = GET_MODE (first_half);
! int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode0));
! nelts_first_half = (GET_MODE_SIZE (mode0) / elt_size);
!   }
  
! for (int i = 0; i  XVECLEN (trueop1, 0); i++)
{
! rtx j = XVECEXP (trueop1, 0, i);
! if (!CONST_INT_P (j))
!   {
! first++;
! second++;
! break;
!   }
! if (INTVAL (j)  nelts_first_half)
!   first++;
  else
+   second++;
+   }
+ 
+ if (second == 0)
+   {
+ new_op0 = XEXP (trueop0, 0);
+ new_op1 = trueop1;
+   }

Re: [PATCH] Do not change scope for unknown locations

2012-11-05 Thread Eric Botcazou

 As Richard suggested, if a location is UNKNOWN_LOCATION, instead of
 change_scope to DECL_INITIAL (cfun-decl), we should let it inherit
 location from its previous instructions. This is implemented in the
 attached patch.
 
 Bootstrapped and passed gcc regression tests and gdb regression tests.
 
 Is it okay for trunk?
 
 Thanks,
 Dehao
 
 gcc/ChangeLog:
 2012-11-02  Dehao Chen  de...@google.com
 
 * final.c (reemit_insn_block_notes): Do not change scope if insn
 location is UNKNOWN_LOCATION.

OK, thanks.  What happens if you go one step farther and always continue?

-- 
Eric Botcazou

Re: GCC 4.8.0 Status Report (2012-10-29), Stage 1 to end soon

2012-11-05 Thread Peter Bergner

On Mon, 2012-10-29 at 18:56 +0100, Jakub Jelinek wrote:
 Status
 ==
 
 I'd like to close the stage 1 phase of GCC 4.8 development
 on Monday, November 5th.  If you have still patches for new features you'd
 like to see in GCC 4.8, please post them for review soon.  Patches
 posted before the freeze, but reviewed shortly after the freeze, may
 still go in, further changes should be just bugfixes and documentation
 fixes.

I'd like to post later today (hopefully this morning) a very minimal
configure patch that adds the -mcpu=power8 and -mtune=power8 compiler
options to gcc.  Currently, power8 will be an alias for power7, but
getting this path in now allows us to add power8 support to the
compiler without having to touch the arch independent configure script.

The only hang up at the moment is we're still determining the
assembler mnemonic we'll be releasing that the gcc configure script
will use to test for power6 assembler support.

Peter

Re: GCC 4.8.0 Status Report (2012-10-29), Stage 1 to end soon

2012-11-05 Thread Jakub Jelinek

On Mon, Nov 05, 2012 at 06:41:47AM -0600, Peter Bergner wrote:
 On Mon, 2012-10-29 at 18:56 +0100, Jakub Jelinek wrote:
  I'd like to close the stage 1 phase of GCC 4.8 development
  on Monday, November 5th.  If you have still patches for new features you'd
  like to see in GCC 4.8, please post them for review soon.  Patches
  posted before the freeze, but reviewed shortly after the freeze, may
  still go in, further changes should be just bugfixes and documentation
  fixes.
 
 I'd like to post later today (hopefully this morning) a very minimal
 configure patch that adds the -mcpu=power8 and -mtune=power8 compiler
 options to gcc.  Currently, power8 will be an alias for power7, but
 getting this path in now allows us to add power8 support to the
 compiler without having to touch the arch independent configure script.

config.gcc target specific hunks are part of the backend, the individual
target maintainers can approve changes to that, I really don't see a reason
to add a dummy alias now just for that.  If the power8 enablement is
approved and non-intrusive enough that it would be acceptable even during
stage 3, then so would be corresponding config.gcc changes.

Jakub

Re: [x86] Fix gcc.c-torture/compile/20080806-1.c failures

2012-11-05 Thread H.J. Lu

On Mon, Nov 5, 2012 at 2:21 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Nov 5, 2012 at 1:01 AM, Uros Bizjak ubiz...@gmail.com wrote:
 On Mon, Nov 5, 2012 at 9:30 AM, Richard Sandiford
 rdsandif...@googlemail.com wrote:
 This is a case where we had:

   (set (reg:HI foo) (plus:HI (reg:HI sp) (const_int X)))
   (clobber (reg:CC FLAGS_REG))

 and the splitters decided to convert it to an LEA:

   (set (reg:SI foo) (plus:SI (subreg:SI (reg:HI sp) 0) (const_int X)))

 But this fails to match, because ix86_address_subreg_operand
 doesn't allow subregs of the stack pointer.

 This shows up an inconsistency in the way the generic code handles
 subregs of the stack pointer.  Normally we refuse to fold them, even
 after reload, but the final_p case of alter_subreg folds them anyway.
 That's how we ended up with the rather odd 16-bit sp.

 However, even if the special alter_subreg case was removed
 (so that we continued to use stack_pointer_rtx itself), we'd have:

   (set (reg:HI foo) (plus:HI (subreg:HI (reg:DI sp) 0) (const_int X)))
   (clobber (reg:CC FLAGS_REG))

 which would get converted to:

   (set (reg:SI foo) (plus:SI (subreg:SI (reg:DI sp) 0) (const_int X)))

 and we'd ICE in the same way.

 The reason x86 rejects subregs of the stack pointer is this same refusal
 to fold.  ix86_print_operand_address tries to simplify a SUBREG to a REG
 and simplify_subreg wouldn't do anything for sp.

 simplify_subreg isn't a lot of help at the output stage though.
 If the insn stream contains a subreg that could be simplified but
 hasn't been, then IMO that's a bug.  The cases we have to handle here
 are those that can't be simplified (unless we decide at some point that
 all registers must be simplifiable after reload, in which case we shouldn't
 need to handle SUBREGs at all).

 As things stand, I think we should be using true_regnum in this case 
 instead.

 Tested on x86_64-linux-gnu.  OK to install?

 Let's ask H.J. to test this change on x32.

 gcc/
 PR target/55204
 * config/i386/i386.c (ix86_address_subreg_operand): Remove stack
 pointer check.
 (print_reg): Use true_regnum rather than REGNO.
 (ix86_print_operand_address): Remove SUBREG handling.

 The patch is OK for mainline and 4.7, if it passes H.J.'s tests with
 -maddress-mode={short,long} on x32.


 I am on it.  It will take a while.


I tested it with -maddress-mode={short,long} on x32.  There are
no regressions in GCC nor glibc testsuite.

Thanks.

-- 
H.J.

New badness metric for inliner

2012-11-05 Thread Jan Hubicka

Hi,
this patch implements new badness metric for inliner.  It is now based on
relative speedup, but not of calee, but of the caller + callee combo.  This
nicely encompasses the edge frequency and other parameters simplifying the
actual badness code. I re-added code considering growth after inlining into
all callees, but this time without any attempt to keep the value up to
date across inlining that has shown to be expensive to do.

The patch also adds two new inline hints: INLINE_HINT_declared_inline and
INLINE_HINT_cross_module. INLINE_HINT_declared_inline decreases badness of the
call so explicitely declared functions are more likely inlined.
INLINE_HINT_cross_module increase badness since programs are usually organized
in a way that cross module inlining is less important than inter-module
inlining still.

While the patch ended up quite simple, it is result of much experimentation and
bugfixing ;)
I have bootstrapped/regtested the patch on x86_64-linux and tested on SPEC2000,
SPEC2006, c++ benchmark suite and Mozilla.  The patch is generally quite
considerable win. Most nice speedup are seen on the C++ benchmark suite where
it leads to nice code size savings and speedups. (i.e. tramp3d is now 30%
smaller and faster than before) http://gcc.opensuse.org/c++bench-frescobaldi/

There are few benchmarks that are not on historically best scores over my
experiments - botan (that regress relative to 4.7), fatigue and c-ray. I will
address these incrementally.  I will also do incrementally tunning with profile
feedback.

Honza

* ipa-inline.c (compute_uninlined_call_time,
compute_inlined_call_time): New functions.
(RELATIVE_TIME_BENEFIT_RANGE): New macro.
(relative_time_benefit): Rewrite.
(edge_badness): Rewrite path with guessed profile and estimated profile.
* ipa-inline.h (INLINE_HINT_declared_inline, INLINE_HINT_cross_module):
New hints.
(struct inline_summary): Add GROWTH filed.
* ipa-inline-analysis.c (dump_inline_hints): Update.
(reset_inline_summary): Update.
(dump_inline_summary): Update.
(will_be_nonconstant_predicate): Cleanup to use gimple_store_p and
gimple_assign_load_p predicates.
(estimate_node_size_and_time): Drop INLINE_HINT_declared_inline hint.
(simple_edge_hints): New function.
(do_estimate_edge_time): Return time of invocation of callee rather
than the time scaled by edge frequency; update hints code.
(do_estimate_edge_hints): Update.
(do_estimate_growth): Cleanup.
Index: ipa-inline.c
===
--- ipa-inline.c(revision 193157)
+++ ipa-inline.c(working copy)
@@ -456,6 +456,42 @@ want_early_inline_function_p (struct cgr
   return want_inline;
 }
 
+/* Compute time of the edge-caller + edge-callee execution when inlining
+   does not happen.  */
+
+inline int
+compute_uninlined_call_time (struct inline_summary *callee_info,
+struct cgraph_edge *edge)
+{
+  int uninlined_call_time =
+RDIV ((gcov_type)callee_info-time * MAX (edge-frequency, 1),
+ CGRAPH_FREQ_BASE);
+  int caller_time = inline_summary (edge-caller-global.inlined_to
+   ? edge-caller-global.inlined_to
+   : edge-caller)-time;
+  return uninlined_call_time + caller_time;
+}
+
+/* Same as compute_uinlined_call_time but compute time when inlining
+   does happen.  */
+
+inline gcov_type
+compute_inlined_call_time (struct cgraph_edge *edge,
+  int edge_time)
+{
+  int caller_time = inline_summary (edge-caller-global.inlined_to
+   ? edge-caller-global.inlined_to
+   : edge-caller)-time;
+  int time = caller_time + RDIV ((edge_time - inline_edge_summary 
(edge)-call_stmt_time)
+* MAX (edge-frequency, 1),
+CGRAPH_FREQ_BASE);
+  /* Possible one roundoff error, but watch for overflows.  */
+  gcc_checking_assert (time = INT_MIN / 2);
+  if (time  0)
+time = 0;
+  return time;
+}
+
 /* Return true if we are interested in inlining small function.
When REPORT is true, report reason to dump file.  */
 
@@ -724,31 +760,41 @@ want_inline_function_to_all_callers_p (s
return true;
 }
 
+#define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 
 /* Return relative time improvement for inlining EDGE in range
-   1...2^9.  */
+   1...RELATIVE_TIME_BENEFIT_RANGE  */
 
 static inline int
 relative_time_benefit (struct inline_summary *callee_info,
   struct cgraph_edge *edge,
-  int time_growth)
+  int edge_time)
 {
   int relbenefit;
-  gcov_type uninlined_call_time;
+  int uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
+  int inlined_call_time = compute_inlined_call_time (edge,

Re: GCC 4.8.0 Status Report (2012-10-29), Stage 1 to end soon

2012-11-05 Thread Kenneth Zadeck



On 11/04/2012 11:54 AM, Richard Biener wrote:

On Thu, Nov 1, 2012 at 2:10 PM, Richard Sandiford
rdsandif...@googlemail.com wrote:

Kenneth Zadeck zad...@naturalbridge.com writes:

I would like you to respond to at least point 1 of this email.   In it
there is code from the rtl level that was written twice, once for the
case when the size of the mode is less than the size of a HWI and once
for the case where the size of the mode is less that 2 HWIs.

my patch changes this to one instance of the code that works no matter
how large the data passed to it is.

you have made a specific requirement for wide int to be a template that
can be instantiated in several sizes, one for 1 HWI, one for 2 HWI.   I
would like to know how this particular fragment is to be rewritten in
this model?   It seems that I would have to retain the structure where
there is one version of the code for each size that the template is
instantiated.

I think richi's argument was that wide_int should be split into two.
There should be a bare-metal class that just has a length and HWIs,
and the main wide_int class should be an extension on top of that
that does things to a bit precision instead.  Presumably with some
template magic so that the length (number of HWIs) is a constant for:

   typedef foo2 double_int;

and a variable for wide_int (because in wide_int the length would be
the number of significant HWIs rather than the size of the underlying
array).  wide_int would also record the precision and apply it after
the full HWI operation.

So the wide_int class would still provide as wide as we need arithmetic,
as in your rtl patch.  I don't think he was objecting to that.

That summarizes one part of my complaints / suggestions correctly.  In other
mails I suggested to not make it a template but a constant over object lifetime
'bitsize' (or maxlen) field.  Both suggestions likely require more thought than
I put into them.  The main reason is that with C++ you can abstract from where
wide-int information pieces are stored and thus use the arithmetic / operation
workers without copying the (source) wide-int objects.  Thus you should
be able to write adaptors for double-int storage, tree or RTX storage.
We had considered something along these lines and rejected it.   I am 
not really opposed to doing something like this, but it is not an 
obvious winning idea and is likely not to be a good idea.   Here was our 
thought process:


if you abstract away the storage inside a wide int, then you should be 
able to copy a pointer to the block of data from either the rtl level 
integer constant or the tree level one into the wide int.   It is 
certainly true that making a wide_int from one of these is an extremely 
common operation and doing this would avoid those copies.


However, this causes two problems:
1)  Mike's first cut at the CONST_WIDE_INT did two ggc allocations to 
make the object.   it created the base object and then it allocated the 
array.  Richard S noticed that we could just allocate one CONST_WIDE_INT 
that had the array in it.   Doing it this way saves one ggc allocation 
and one indirection when accessing the data within the CONST_WIDE_INT.   
Our plan is to use the same trick at the tree level.   So to avoid the 
copying, you seem to have to have a more expensive rep for 
CONST_WIDE_INT and INT_CST.


2) You are now stuck either ggcing the storage inside a wide_int when 
they are created as part of an expression or you have to play some game 
to represent the two different storage plans inside of wide_int.   
Clearly this is where you think that we should be going by suggesting 
that we abstract away the internal storage.   However, this comes at a 
price:   what is currently an array access in my patches would (i 
believe) become a function call.  From a performance point of view, i 
believe that this is a non starter. If you can figure out how to design 
this so that it is not a function call, i would consider this a viable 
option.


On the other side of this you are clearly correct that we are copying 
the data when we are making wide ints from INT_CSTs or CONST_WIDE_INTs. 
   But this is why we represent data inside of the wide_ints, the 
INT_CSTs and the CONST_WIDE_INTs in a compressed form.   Even with very 
big types, which are generally rare, the constants them selves are very 
small.   So the copy operation is a loop that almost always copies one 
element, even with tree-vrp which doubles the sizes of every type.


There is the third option which is that the storage inside the wide int 
is just ggced storage.  We rejected this because of the functional 
nature of wide-ints.There are zillions created, they can be stack 
allocated, and they last for very short periods of time.



As is probably obvious, I don't agree FWIW.  It seems like an unnecessary
complication without any clear use.  Especially since the number of

Maybe the double_int typedef is without any clear use.  Properly
abstracting from the

Re: [Committed] Fix 54524: Wrong code with some 64bit addition with registers as 32bits

2012-11-05 Thread Eric Botcazou

 Hmm, like you said originally in the PR, I think generating the invalid
 PLUS is the bug here.  I agree (as Richard B said) that an invalid PLUS
 shouldn't cause us to generate wrong code, but an assert seems better
 than a check.  Eric, what do you think?

I think that this boils down to deciding whether (PLUS X 0) is invalid or 
merely non-canonical.  If the former, then I agree that an assertion is 
appropriate; if the latter, then I don't think that we should stop the 
compiler because of it.

No strong opinion, although I'd lean towards the latter.  In any case, the 
piece of code which generates this PLUS needs also to be fixed (at least on 
the mainline).

-- 
Eric Botcazou

Re: [RFA:] PR55186 - gcc.dg/const-uniq-1.c fails due to vector not in the constant pool

2012-11-05 Thread Eric Botcazou

 But, for cris-elf (and reasonably the same for other targets)
 there might not be such a constant-pool entry in the first
 place: the vectors are too short to rule out piecewise
 initialization as optimal for size (counting the vectors once
 per use).  Let's increase them.  Twice might just break-even for
 cris-elf so let's make them four times as long.  Sanity-checked
 for x86_64 w/w.o -m32 for a gcc-version that includes the
 optimization for which the test-case tests (CCing author).

I don't know cris at all, but 4 times sounds quite a lot compared to the other 
architectures (I presume that the test passes on most of them).

 Ok to commit?
 
 gcc/testsuite:
   PR testsuite/55186
   * gcc/testsuite/gcc.dg/const-uniq-1.c (a): Increase length four times.

No objections by me though.

-- 
Eric Botcazou

Re: Ping / update: RFA: replace #ifdef with if/#if for HAVE_ATTR_*

2012-11-05 Thread Bernd Schmidt

On 10/31/2012 01:12 PM, Richard Sandiford wrote:

 OK with those changes for the rtl bits.  Can't approve the generator
 stuff though.

That's also OK.


Bernd

Re: GCC 4.8.0 Status Report (2012-10-29), Stage 1 to end soon

2012-11-05 Thread Peter Bergner

On Mon, 2012-11-05 at 13:53 +0100, Jakub Jelinek wrote:
 On Mon, Nov 05, 2012 at 06:41:47AM -0600, Peter Bergner wrote:
  I'd like to post later today (hopefully this morning) a very minimal
  configure patch that adds the -mcpu=power8 and -mtune=power8 compiler
  options to gcc.  Currently, power8 will be an alias for power7, but
  getting this path in now allows us to add power8 support to the
  compiler without having to touch the arch independent configure script.
 
 config.gcc target specific hunks are part of the backend, the individual
 target maintainers can approve changes to that, I really don't see a reason
 to add a dummy alias now just for that.  If the power8 enablement is
 approved and non-intrusive enough that it would be acceptable even during
 stage 3, then so would be corresponding config.gcc changes.

Well we also patch config.in and configure.ac/configure.  If those are
acceptable to be patched later too, then great.  If not, the patch
isn't really very large.  We did do this for power7 initially too:

  http://gcc.gnu.org/ml/gcc-patches/2008-08/msg00162.html

Peter

Re: GCC 4.8.0 Status Report (2012-10-29), Stage 1 to end soon

2012-11-05 Thread Jakub Jelinek

On Mon, Nov 05, 2012 at 08:40:00AM -0600, Peter Bergner wrote:
 Well we also patch config.in and configure.ac/configure.  If those are
 acceptable to be patched later too, then great.  If not, the patch

That is the same thing as config.gcc bits.

 isn't really very large.  We did do this for power7 initially too:
 
   http://gcc.gnu.org/ml/gcc-patches/2008-08/msg00162.html

But then power7 patch went in during stage1 of the n+1 release, and
wasn't really backported to release branch (just to distro vendor branches),
right?

Jakub

[committed] 3 backports to 4.7 branch

2012-11-05 Thread Jakub Jelinek

Hi!

I've backported 3 patches of mine to 4.7 branch, and committed
them after bootstrap/regtest on x86_64-linux and i686-linux.

Jakub
2012-11-05  Jakub Jelinek  ja...@redhat.com

Backported from mainline
2012-10-10  Jakub Jelinek  ja...@redhat.com

PR tree-optimization/54877
* tree-vect-loop.c (vect_is_simple_reduction_1): For MINUS_EXPR
use make_ssa_name instead of copy_ssa_name.

* gcc.dg/torture/pr54877.c: New test.

--- gcc/tree-vect-loop.c(revision 192321)
+++ gcc/tree-vect-loop.c(revision 192322)
@@ -2257,7 +2257,10 @@ vect_is_simple_reduction_1 (loop_vec_inf
   if (orig_code == MINUS_EXPR)
 {
   tree rhs = gimple_assign_rhs2 (def_stmt);
-  tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
+  tree var = TREE_CODE (rhs) == SSA_NAME
+? SSA_NAME_VAR (rhs)
+: create_tmp_reg (TREE_TYPE (rhs), NULL);
+  tree negrhs = make_ssa_name (var, NULL);
   gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
 rhs, NULL);
   gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
--- gcc/testsuite/gcc.dg/torture/pr54877.c  (revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr54877.c  (revision 192322)
@@ -0,0 +1,23 @@
+/* PR tree-optimization/54877 */
+/* { dg-do run } */
+/* { dg-options -ffast-math } */
+
+extern void abort (void);
+
+int
+foo (void)
+{
+  double d;
+  int i;
+  for (i = 0, d = 0; i  64; i++)
+d--;
+  return (int) d;
+}
+
+int
+main ()
+{
+  if (foo () != -64)
+abort ();
+  return 0;
+}
2012-11-05  Jakub Jelinek  ja...@redhat.com

Backported from mainline
2012-10-23  Jakub Jelinek  ja...@redhat.com

PR c++/54988
* decl2.c (cplus_decl_attributes): Don't return early
if attributes is NULL.

* c-c++-common/pr54988.c: New test.

--- gcc/cp/decl2.c  (revision 192721)
+++ gcc/cp/decl2.c  (revision 192722)
@@ -1309,8 +1309,7 @@ void
 cplus_decl_attributes (tree *decl, tree attributes, int flags)
 {
   if (*decl == NULL_TREE || *decl == void_type_node
-  || *decl == error_mark_node
-  || attributes == NULL_TREE)
+  || *decl == error_mark_node)
 return;
 
   if (processing_template_decl)
@@ -1319,8 +1318,6 @@ cplus_decl_attributes (tree *decl, tree
return;
 
   save_template_attributes (attributes, decl);
-  if (attributes == NULL_TREE)
-   return;
 }
 
   cp_check_const_attributes (attributes);
--- gcc/testsuite/c-c++-common/pr54988.c(revision 0)
+++ gcc/testsuite/c-c++-common/pr54988.c(revision 192722)
@@ -0,0 +1,20 @@
+/* PR c++/54988 */
+/* { dg-do compile } */
+/* { dg-options -O2 } */
+/* { dg-additional-options -msse2 { target { i?86-*-* x86_64-*-* } } } */
+
+#if defined(__i386__) || defined(__x86_64__)
+#pragma GCC target fpmath=sse
+#endif
+
+static inline __attribute__ ((always_inline)) int
+foo (int x)
+{
+  return x;
+}
+
+int
+bar (int x)
+{
+  return foo (x);
+}
2012-11-05  Jakub Jelinek  ja...@redhat.com

Backported from mainline
2012-10-24  Jakub Jelinek  ja...@redhat.com

PR debug/54828
* gimple.h (is_gimple_sizepos): New inline function.
* gimplify.c (gimplify_one_sizepos): Use it.  Remove useless
final assignment to expr variable.
* tree.c (RETURN_TRUE_IF_VAR): Return true also if
!TYPE_SIZES_GIMPLIFIED (type) and _t is going to be gimplified
into a local temporary.

* g++.dg/debug/pr54828.C: New test.

--- gcc/tree.c  (revision 192758)
+++ gcc/tree.c  (revision 192759)
@@ -8467,14 +8467,19 @@ variably_modified_type_p (tree type, tre
   tree t;
 
 /* Test if T is either variable (if FN is zero) or an expression containing
-   a variable in FN.  */
+   a variable in FN.  If TYPE isn't gimplified, return true also if
+   gimplify_one_sizepos would gimplify the expression into a local
+   variable.  */
 #define RETURN_TRUE_IF_VAR(T)  \
   do { tree _t = (T);  \
 if (_t != NULL_TREE
\
 _t != error_mark_node\
 TREE_CODE (_t) != INTEGER_CST\
 TREE_CODE (_t) != PLACEHOLDER_EXPR   \
-(!fn || walk_tree (_t, find_var_from_fn, fn, NULL)))\
+(!fn \
+   || (!TYPE_SIZES_GIMPLIFIED (type)   \
+!is_gimple_sizepos (_t)) \
+   || walk_tree (_t, find_var_from_fn, fn, NULL)))\
   return true;  } while (0)
 
   if (type == error_mark_node)
--- gcc/gimplify.c  (revision 192758)
+++ gcc/gimplify.c  (revision 192759)
@@ -7948,9 +7948,7 @@

Re: [PATCH] Do not change scope for unknown locations

2012-11-05 Thread Dehao Chen

 OK, thanks.  What happens if you go one step farther and always continue?

Because we do want location and its block to be tightly coupled. I may
want to add an assertion that if location is known, the block should
*not* be NULL.

Dehao


 --
 Eric Botcazou

Re: [PATCH] Update source location for PRE inserted stmt

2012-11-05 Thread Dehao Chen

 No, there is nothing natural (and this can even be wrong).  The statements
 must have the source location corresponding to the source construct they are
 generated for, which may be totally different from that of the insertion
 point.  Yes, that can generate jumpiness in GDB, but this is far better that
 breaking the coverage info by giving the same source location to instructions
 that have different coverage status.

For the unittest I provided, setting the inserted stmt with
UNKNOWN_LOCATION will:

* break the coverage
* increase jumpiness in gdb

Setting location to UNKNOWN_LOCATION is like setting it to random
because compiler may put this stmt as the entry point of a BB (as in
the unittest). Thus setting deterministic locations for inserted stmt
will improve debugability and reduce jumpiness.

Thanks,
Dehao


 --
 Eric Botcazou

Re: [PATCH 08/10] Factorize condition insertion code out of build_check_stmt

2012-11-05 Thread Jakub Jelinek

On Sat, Nov 03, 2012 at 12:03:45AM +0100, Dodji Seketeli wrote:
 +  int fallthrough_probability =
 +then_more_likely_p
 +? PROB_VERY_UNLIKELY
 +: PROB_ALWAYS - PROB_VERY_UNLIKELY;

Just a formatting nit, I think = needs to go on the next line, so

  int fallthrough_probability
= then_more_likely_p
  ? PROB_VERY_UNLIKELY
  : PROB_ALWAYS - PROB_VERY_UNLIKELY;

No need to repost for that.

Jakub

Re: [PATCH] Minor ipa-prop.c internal interface tweak

2012-11-05 Thread Jan Hubicka

 2012-10-31  Martin Jambor  mjam...@suse.cz
 
   * ipa-prop.c (ipa_get_param_decl_index_1): New function.
   (ipa_get_param_decl_index): Just call ipa_get_param_decl_index_1.
   (ipa_populate_param_decls): Accept descriptors parameter rather
   than the whole info.
   (load_from_unmodified_param): Likewise.
   (ipa_load_from_parm_agg_1): Likewise.
   (ipa_load_from_parm_agg): Extract descriptors from info.
   (compute_complex_assign_jump_func): Likewise.
   (ipa_analyze_indirect_call_uses): Likewise.
OK.

Thanks,
Honza
 -int
 -ipa_get_param_decl_index (struct ipa_node_params *info, tree ptree)
 +static int
 +ipa_get_param_decl_index_1 (VEC (ipa_param_descriptor_t, heap) *descriptors,
 + tree ptree)
  {
int i, count;
  
 -  count = ipa_get_param_count (info);
 +  count = VEC_length (ipa_param_descriptor_t, descriptors);
for (i = 0; i  count; i++)
 -if (ipa_get_param (info, i) == ptree)
 +if (VEC_index (ipa_param_descriptor_t, descriptors, i).decl == ptree)
return i;
  
return -1;
  }
  
 -/* Populate the param_decl field in parameter descriptors of INFO that
 -   corresponds to NODE.  */
 +/* Return index of the formal whose tree is PTREE in function which 
 corresponds
 +   to INFO.  */
 +
 +int
 +ipa_get_param_decl_index (struct ipa_node_params *info, tree ptree)
 +{
 +  return ipa_get_param_decl_index_1 (info-descriptors, ptree);
 +}

I think it would be nice to not have same comment for both functions and explain
the reason why we need _1 variant.

Honza

Re: [PATCH] Disable aggregate jump functions for bit-field stores

2012-11-05 Thread Jan Hubicka

 Hi,
 
 the patch below disables generation of aggregate jump functions from
 bit-field stores because currently we depend on type size of the value
 to determine the size of the stored value and that does not work with
 bit-fields, making it impossible for IPA-CP to organize them in their
 lattices.
 
 If we ever decide aggregate jump functions for bit-fields are worth
 the hassle, we might remove this limitation by storing and streaming
 the size of the memory reference alongside the offset in the jump
 functions (and IPA-CP lattices).
 
 Bootstrapped and tested on x86_64-linux, needed for the aggregate
 IPA-CP.  OK for trunk?
 
 Thanks,
 
 Martin
 
 
 2012-11-02  Martin Jambor  mjam...@suse.cz
 
   * ipa-prop.c (determine_known_aggregate_parts): Do not create
   aggregate jump functions for bit-fields.
 
 Index: src/gcc/ipa-prop.c
 ===
 --- src.orig/gcc/ipa-prop.c
 +++ src/gcc/ipa-prop.c
 @@ -1295,7 +1295,10 @@ determine_known_aggregate_parts (gimple
  
lhs = gimple_assign_lhs (stmt);
rhs = gimple_assign_rhs1 (stmt);
 -  if (!is_gimple_reg_type (rhs))
 +  if (!is_gimple_reg_type (rhs)
 +  || TREE_CODE (lhs) == BIT_FIELD_REF
 +   || (TREE_CODE (lhs) == COMPONENT_REF
 +DECL_BIT_FIELD (TREE_OPERAND (lhs, 1

I am not sure I understand motivation of this patch properly.  First I think
BIT_FIELD_REF can be hidden inside chain of other REFs so you should probably
look into all handled components.  What exactly goes wrong when your type size
is bigger than the bitfield?

Honza

Re: GCC 4.8.0 Status Report (2012-10-29), Stage 1 to end soon

2012-11-05 Thread David Malcolm

On Wed, 2012-10-31 at 11:13 +0100, Richard Biener wrote:
 On Mon, Oct 29, 2012 at 6:56 PM, Jakub Jelinek ja...@redhat.com wrote:
  Status
  ==
 
  I'd like to close the stage 1 phase of GCC 4.8 development
  on Monday, November 5th.  If you have still patches for new features you'd
  like to see in GCC 4.8, please post them for review soon.
 
 Reminds me of the stable plugin API for introspection.  David, Micha - what's
 the status here?  Adding this is certainly ok during stage3 and I think that
 we should have something in 4.8 to kick of further development here.

(sorry for the belated response, I was on vacation).

I'm currently leaning towards having the API as a separate source tree
that can be compiled against 4.6 through 4.8 onwards (hiding all
necessary compatibility cruft within it [1]), generating a library that
plugins can link against, providing a consistent C API across all of
these GCC versions.  Keeping it out-of-tree allows plugins to be written
that can work with older versions of gcc, and allows the plugin API to
change more rapidly than the rest of gcc (especially important for these
older gcc releases).  Distributions of gcc could build the plugin api at
the same time as gcc, albeit from a separate tarball.

When the API is more mature, we could merge it inside gcc proper, I
guess.

I'll try to post something later today.

Dave
[1] e.g C vs C++ linkage

patch to fix PR55151

2012-11-05 Thread Vladimir Makarov

  The following patch fixes PR55151.  The patch affects a sensitive 
part of LRA code.  So it took some time to find a PR solution.  For the 
test there were to many reloads into hard registers for an insn and LRA 
failed to assign hard registers to all reload pseudos.  There is another 
more costly alternative for the insn which accepts memory but we need to 
put pseudo value into memory for that.  LRA did not do it.  The patch 
adds this possibility.  The patch also discourage putting pseudo values 
into memory by increasing reject.  Without this code, several GCC tests 
checking assembler code quality failed.  The patch changes SPEC2000 code 
for a few tests but overall score is the same.


  The patch was successfully bootstrapped and tested on x86/x86-64.

  Committed as rev. 193170.


2012-11-05  Vladimir Makarov  vmaka...@redhat.com

PR rtl-optimization/55151
* lra-constraints.c (process_alt_operands): Permit putting reg
value into memory.  Increase reject for this case.

2012-11-05  Vladimir Makarov  vmaka...@redhat.com

PR rtl-optimization/55151
* gcc.dg/pr55151.c: New test.


Index: lra-constraints.c
===
--- lra-constraints.c   (revision 193169)
+++ lra-constraints.c   (working copy)
@@ -1581,7 +1581,9 @@ process_alt_operands (int only_alternati
case TARGET_MEM_CONSTRAINT:
  if (MEM_P (op) || spilled_pseudo_p (op))
win = true;
- if (CONST_POOL_OK_P (mode, op))
+ /* We can put constant or pseudo value into memory
+to satisfy the constraint.  */
+ if (CONST_POOL_OK_P (mode, op) || REG_P (op))
badop = false;
  constmemok = true;
  break;
@@ -1613,7 +1615,10 @@ process_alt_operands (int only_alternati
offsettable_nonstrict_memref_p (op))
  || spilled_pseudo_p (op))
win = true;
- if (CONST_POOL_OK_P (mode, op) || MEM_P (op))
+ /* We can put constant or pseudo value into memory
+or make memory address offsetable to satisfy the
+constraint.  */
+ if (CONST_POOL_OK_P (mode, op) || MEM_P (op) || REG_P (op))
badop = false;
  constmemok = true;
  offmemok = true;
@@ -1638,6 +1643,7 @@ process_alt_operands (int only_alternati
  if (CONST_INT_P (op)
  || (GET_CODE (op) == CONST_DOUBLE  mode == VOIDmode))
break;
+
case 'i':
  if (general_constant_p (op))
win = true;
@@ -1702,10 +1708,12 @@ process_alt_operands (int only_alternati
win = true;
 
  /* If we didn't already win, we can reload
-constants via force_const_mem, and other
-MEMs by reloading the address like for
+constants via force_const_mem or put the
+pseudo value into memory, or make other
+memory by reloading the address like for
 'o'.  */
- if (CONST_POOL_OK_P (mode, op) || MEM_P (op))
+ if (CONST_POOL_OK_P (mode, op)
+ || MEM_P (op) || REG_P (op))
badop = false;
  constmemok = true;
  offmemok = true;
@@ -1919,6 +1927,13 @@ process_alt_operands (int only_alternati
  += ira_reg_class_max_nregs[this_alternative][mode];
}
 
+ /* We are trying to spill pseudo into memory.  It is
+usually more costly than moving to a hard register
+although it might takes the same number of
+reloads.  */
+ if (no_regs_p  REG_P (op))
+   reject++;
+
  /* Input reloads can be inherited more often than output
 reloads can be removed, so penalize output
 reloads.  */
Index: testsuite/gcc.dg/pr55151.c
===
--- testsuite/gcc.dg/pr55151.c  (revision 0)
+++ testsuite/gcc.dg/pr55151.c  (working copy)
@@ -0,0 +1,13 @@
+/* PR rtl-optimization/55151 */
+/* { dg-do compile } */
+/* { dg-options -fPIC } */
+
+int a, b, c, d, e, f, g, h, i, j, k, l;
+void f4 (void)
+{
+  __asm__ volatile (:[a] =r,m (a),[b] =r,m (b),[c] =r,m (c),
+   [d] =r,m (d),[e] =r,m (e),[f] =r,m (f),
+   [g] =r,m (g),[h] =r,m (h),[i] =r,m (i),
+   [j] =r,m (j),[k] =r,m (k),[l] =r,m (l):[a],m (a),
+   [j],m (j), [k],m (k), [l],m (l));
+}

Implement simple peeling on tree level; remove RTL peeling code

2012-11-05 Thread Jan Hubicka

Hi,
this patch removes RTL loop peeling code and makes tree-cunroll pass to also
perform simple peeling (i.e. one done with profile feedback when loop is
expected to iterate just few times).

The motivation is
 1) I want to do some re-tunning of tree peeling heuristics first week(s) of
stage 3.  The work on making complette unrolling to use all loop bound
estimates actualy enabled a lot of extra unrolling oppurtunities showing
that we have to do something about the code growth.
The fact that both RTL and tree peeling code share the same --param bounds
leads to apples/orranges problem.
 2) peeling earlier is better because it enables optimization across the
peeled sequence, so current RTL peelng pass is too late for historical
reasons.
 3) there are some bugs in RTL level peeling logic that seems not worth to
solve.
 4) we may eventually retire the need for ivcanon pass, if it really is around
just to make RTL niter code work well.

In last weeks i was looking into the cases where we still peeled on RTL level
and not at tree level and I think I narrowed down the cases enough so the RTL
level peeling can be declared obsolette.  The remaining issues are mostly
related to pass ordering where we are not able to discover induction variables
early enough.  This happens for obscure testcases only and can be better solved
by another tree unrolling pass late in queue if neccesary, but I do not see a
need in it.

So I think I can say that RTL peeling has become obsolette. There are two
issues:

One thing RTL pass does is the accumulator var splitting, we do not have tree
equivalent for this, but I think this should be handled more generally and the
pass is disabled by default at all levels anyway.

Anohter thing I dislike about current situation is that all the complette
unroling/peeling code ended up ilogically in tree-ssa-loop-ivcanon.  I think it
should be moved into separate file and dismantled from ivcanon pass, but I
think it should wait for next stage1.

Bootstrapped/regtested x86_64-linux.  I am doing some additional benchmarking,
but with earlier version of patch there was no performance regressions on SPEC.

Honza

* loop-unroll.c: Update copyright and toplevel comment.
(decide_unrolling_and_peeling): Rename to
(decide_unrolling): ... this one.
(peel_loops_completely): Remove.
(decide_peel_simple): Remove.
(decide_peel_once_rolling): Remove.
(decide_peel_completely): Remove.
(peel_loop_simple): Remove.
(peel_loop_completely): Remove.
(unroll_and_peel_loops): Rename to ...
(unroll_loops): ... this one; handle only unrolling.
* cfgloop.h (lpt_dec): Remove LPT_PEEL_COMPLETELY and
LPT_PEEL_SIMPLE.
(UAP_PEEL): Remove.
(unroll_and_peel_loops): Remove.
(unroll_loops): New.
* passes.c (init_optimization_passes): Replace
pass_rtl_unroll_and_peel_loops by pass_rtl_unroll_loops.
* loop-init.c (gate_rtl_unroll_and_peel_loops,
rtl_unroll_and_peel_loops): Rename to ...
(gate_rtl_unroll_loops, rtl_unroll_loops): ... these; update.
(pass_rtl_unroll_and_peel_loops): Rename to ...
(pass_rtl_unroll_loops): ... this one.
* tree-pass.h (pass_rtl_unroll_and_peel_loops): Remove.
(pass_rtl_unroll_loops): New.
* tree-ssa-loop-ivcanon.c: Update toplevel comment.
(estimated_peeled_sequence_size, try_peel_loop): New.
(canonicalize_loop_induction_variables): Update.

* gcc.dg/tree-prof/peel-1.c: Update.
* gcc.dg/tree-prof/unroll-1.c: Update.
Index: tree-pass.h
===
--- tree-pass.h (revision 193160)
+++ tree-pass.h (working copy)
@@ -417,7 +417,7 @@ extern struct rtl_opt_pass pass_loop2;
 extern struct rtl_opt_pass pass_rtl_loop_init;
 extern struct rtl_opt_pass pass_rtl_move_loop_invariants;
 extern struct rtl_opt_pass pass_rtl_unswitch;
-extern struct rtl_opt_pass pass_rtl_unroll_and_peel_loops;
+extern struct rtl_opt_pass pass_rtl_unroll_loops;
 extern struct rtl_opt_pass pass_rtl_doloop;
 extern struct rtl_opt_pass pass_rtl_loop_done;
 
Index: testsuite/gcc.dg/tree-prof/unroll-1.c
===
--- testsuite/gcc.dg/tree-prof/unroll-1.c   (revision 193160)
+++ testsuite/gcc.dg/tree-prof/unroll-1.c   (working copy)
@@ -20,5 +20,5 @@ main()
   return 0;
 }
 /* { dg-final-use { scan-rtl-dump Considering unrolling loop with constant 
number of iterations loop2_unroll } } */
-/* { dg-final-use { cleanup-rtl-dump Not unrolling loop, doesn't roll } } */
-/* { dg-options -O3 -fdump-rtl-loop2_unroll -funroll-loops -fno-peel-loops } 
*/
+/* { dg-final-use { scan-rtl-dump Not unrolling loop, doesn't roll 
loop2_unroll } } */
+/* { dg-final-use { cleanup-rtl-dump loop2_unroll } } */
Index: testsuite/gcc.dg/tree-prof/peel-1.c

Re: [PR54693] loss of debug info in jump threading and loop ivopts

2012-11-05 Thread Jeff Law


On 11/03/2012 10:11 AM, Alexandre Oliva wrote:

On Nov  2, 2012, Alexandre Oliva aol...@redhat.com wrote:


On Nov  1, 2012, Jakub Jelinek ja...@redhat.com wrote:

Even for stmt frontiers it is IMHO undesirable to duplicate
(perhaps many times) the whole sequence, as one would then reply the var
changing sequence in the debugger perhaps once in the original bb, then
once or many times again in the successor bb.



Quite the opposite: when we do jump threading, we're *bypassing* the
entire block


What I wrote above was not entirely accurate.

We do bypass empty blocks, in thread_around_empty_block, but we
propagated debug stmts from their predecessors, that may actually be the
block duplicated for threading.  So, the debug stmts from the duplicated
block are entirely artificial, exclusively for purposes of SSA
resolution, while those from bypassed otherwise-empty blocks actually
tell the history of the computation.  All that said, even the latter are
useless right now, when their bindings happen to be immediately modified
by subsequent debug stmts.

Now, one of the problems with my patch was that it propagated debug
stmts over and over, when there was a long chain of empty blocks.  I'd
assumed they'd all be eliminated after the jump was redirected, but
they're only bypassed by the copy of the initial block.

Furthermore, when jump threading fails to determine the taken edge out
of e-dest, it proceeds to trying to threading through successors of
e-dest, and the current code would propagate debug stmts from e-dest
to single-pred successors at such attempt, even if no threading would
take place.

The patch below (based on a tree from before your first patch that's
already in) improves the situation: we only propagate debug stmts to the
final destination block, and only after we make sure we are performing
jump threading to that block.  Also, it refrains from propagating debug
bind stmts whose bindings are rendered inoperant by later bindings, in
the threaded blocks or at the top of the destination block.

I didn't try delaying the creation of the pointer set as you did, but
I'm now thinking using a linear on-stack vector for searches up to some
size might be even faster than creating the pointer set at the first
var.

Regstrapping on x86_64-linux-gnu and i686-linux-gnu.  Ok to install?

This is fine.  Thanks,
Jeff

Re: GCC 4.8.0 Status Report (2012-10-29), Stage 1 to end soon

2012-11-05 Thread Kenneth Zadeck


Jakub and Richi,

At this point I have decided to that i am not going to get the rest of 
the wide-int patches into a stable enough form for this round. The 
combination of still living without power at my house and some issues 
that i hit with the front ends has made it impossible to get this 
finished by today's deadline.


I do want patches 1-7 to go in (after proper review) but i am going to 
withdraw patch 8 for this round.


patches 1-5 deal with the rtl level.   These have been extensively 
tested and examined with the exception of patch 4, examined by 
Richard Sandiford.They clean up a lot of things at the rtl level 
that effect every port as well as fixing some outstanding regressions.


patches 6 and 7 are general cleanups at the tree level and can be 
justified as on their own without any regard to wide-int.They have 
also been extensively tested.


I am withdrawing patch 8 because it converted tree-vpn to use wide-ints 
but the benefit of this patch really cannot be seen without the rest of 
the tree level wide-int patches.


In the next couple of days i will resubmit patches 1-7 with the patch 
rot removed and the public comments folded into them.


Kenny

Re: [PATCH] Update source location for PRE inserted stmt

2012-11-05 Thread Xinliang David Li

For the example I listed, the new statement is generated for source
construct at program point (2). However unlike simple code motion, (2)
is not going away after PRE. How would setting the location of the new
statement at the insertion point break coverage? Besides, the new
statement won't create 'false' coverage for the insertion point either
as the there are existing statements at that point where the new stmt
inherits the location from.

David

On Mon, Nov 5, 2012 at 3:54 AM, Eric Botcazou ebotca...@adacore.com wrote:
 Those compiler generated statements do not have source origins, but
 they need to have debug location information attached so that
 information collected for them can be correlated with program
 constructs (such as CFG). One of the natural way is to use the source
 location associated with the program point where they are inserted.

 No, there is nothing natural (and this can even be wrong).  The statements
 must have the source location corresponding to the source construct they are
 generated for, which may be totally different from that of the insertion
 point.  Yes, that can generate jumpiness in GDB, but this is far better that
 breaking the coverage info by giving the same source location to instructions
 that have different coverage status.

 --
 Eric Botcazou

committed: Fix dbr processing of conditional returns

2012-11-05 Thread Joern Rennecke


fill_simple_delay_slots would call optimize_skip with a conditional return,
which would cause a segfault when it calls next_active_insn (RETURN).

Other places in reorg.c check that they are not operating on a return insn
before doing branch-insn only processing, so I added this check there too.

Bootstrapped on i686-pc-linux-gnu.

Committed as obviuous.
2012-11-05  Joern Rennecke  joern.renne...@embecosm.com

* reorg.c (fill_simple_delay_slots): Avoid calling optimize_skip
with a return instruction.

Index: reorg.c
===
--- reorg.c (revision 2686)
+++ reorg.c (revision 2687)
@@ -2196,7 +2196,8 @@ fill_simple_delay_slots (int non_jumps_p
   if (slots_filled != slots_to_fill
   delay_list == 0
   JUMP_P (insn)
-  (condjump_p (insn) || condjump_in_parallel_p (insn)))
+  (condjump_p (insn) || condjump_in_parallel_p (insn))
+  !ANY_RETURN_P (JUMP_LABEL (insn)))
{
  delay_list = optimize_skip (insn);
  if (delay_list)

Re: [PATCH] New configuration options to enable additional executable/startfile/shared library prefixes

2012-11-05 Thread Michael Meissner

On Sat, Nov 03, 2012 at 07:01:04PM -0400, David Edelsohn wrote:
 On Thu, Nov 1, 2012 at 5:17 PM, Michael Meissner
 meiss...@linux.vnet.ibm.com wrote:
 
  This patch adds 4 additional configuration switches, that allow the person
  building the compiler to add additional prefixes to search for additional
  executables and startfiles.
 
  If the backend has the appropriate tweaks installed, it will also add
  additional prefixes to search for the dynamic linker.  At the moment, only 
  the
  PowerPC Linux port has the modifications.  It is fairly easy to modify other
  targets if desired.
 
 Why does the documentation refer to powerpc64-linux when the changes
 affect all PowerPC Linux and affect the 32 bit dynamic linker search
 path?

Yes, obviously I should have included powerpc-linux as well as powerpc64-linux
in the documentation.  Thanks.  If it is approved, I will update the
documentation.

-- 
Michael Meissner, IBM
5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA
meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899

libgo patch committed

2012-11-05 Thread Ian Lance Taylor

This patch from Shenghou Ma supports the os.Stat function on some more
operating systems.  Bootstrapped and ran Go testsuite on
x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

diff -r 5d8ca2f79dba libgo/Makefile.am
--- a/libgo/Makefile.am	Fri Nov 02 16:37:20 2012 -0700
+++ b/libgo/Makefile.am	Mon Nov 05 09:33:12 2012 -0800
@@ -829,8 +829,28 @@
 if LIBGO_IS_SOLARIS
 go_os_stat_file = go/os/stat_solaris.go
 else
+if LIBGO_IS_LINUX
+go_os_stat_file = go/os/stat_atim.go
+else
+if LIBGO_IS_OPENBSD
+go_os_stat_file = go/os/stat_atim.go
+else
+if LIBGO_IS_DARWIN
+go_os_stat_file = go/os/stat_atimspec.go
+else
+if LIBGO_IS_FREEBSD
+go_os_stat_file = go/os/stat_atimspec.go
+else
+if LIBGO_IS_NETBSD
+go_os_stat_file = go/os/stat_atimspec.go
+else
 go_os_stat_file = go/os/stat.go
 endif
+endif
+endif
+endif
+endif
+endif
 
 go_os_files = \
 	$(go_os_dir_file) \
diff -r 5d8ca2f79dba libgo/go/os/stat_atim.go
--- /dev/null	Thu Jan 01 00:00:00 1970 +
+++ b/libgo/go/os/stat_atim.go	Mon Nov 05 09:33:12 2012 -0800
@@ -0,0 +1,61 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package os
+
+import (
+	syscall
+	time
+)
+
+func sameFile(sys1, sys2 interface{}) bool {
+	stat1 := sys1.(*syscall.Stat_t)
+	stat2 := sys2.(*syscall.Stat_t)
+	return stat1.Dev == stat2.Dev  stat1.Ino == stat2.Ino
+}
+
+func fileInfoFromStat(st *syscall.Stat_t, name string) FileInfo {
+	fs := fileStat{
+		name:basename(name),
+		size:int64(st.Size),
+		modTime: timespecToTime(st.Mtim),
+		sys: st,
+	}
+	fs.mode = FileMode(st.Mode  0777)
+	switch st.Mode  syscall.S_IFMT {
+	case syscall.S_IFBLK:
+		fs.mode |= ModeDevice
+	case syscall.S_IFCHR:
+		fs.mode |= ModeDevice | ModeCharDevice
+	case syscall.S_IFDIR:
+		fs.mode |= ModeDir
+	case syscall.S_IFIFO:
+		fs.mode |= ModeNamedPipe
+	case syscall.S_IFLNK:
+		fs.mode |= ModeSymlink
+	case syscall.S_IFREG:
+		// nothing to do
+	case syscall.S_IFSOCK:
+		fs.mode |= ModeSocket
+	}
+	if st.Modesyscall.S_ISGID != 0 {
+		fs.mode |= ModeSetgid
+	}
+	if st.Modesyscall.S_ISUID != 0 {
+		fs.mode |= ModeSetuid
+	}
+	if st.Modesyscall.S_ISVTX != 0 {
+		fs.mode |= ModeSticky
+	}
+	return fs
+}
+
+func timespecToTime(ts syscall.Timespec) time.Time {
+	return time.Unix(int64(ts.Sec), int64(ts.Nsec))
+}
+
+// For testing.
+func atime(fi FileInfo) time.Time {
+	return timespecToTime(fi.Sys().(*syscall.Stat_t).Atim)
+}
diff -r 5d8ca2f79dba libgo/go/os/stat_atimespec.go
--- /dev/null	Thu Jan 01 00:00:00 1970 +
+++ b/libgo/go/os/stat_atimespec.go	Mon Nov 05 09:33:12 2012 -0800
@@ -0,0 +1,61 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package os
+
+import (
+	syscall
+	time
+)
+
+func sameFile(sys1, sys2 interface{}) bool {
+	stat1 := sys1.(*syscall.Stat_t)
+	stat2 := sys2.(*syscall.Stat_t)
+	return stat1.Dev == stat2.Dev  stat1.Ino == stat2.Ino
+}
+
+func fileInfoFromStat(st *syscall.Stat_t, name string) FileInfo {
+	fs := fileStat{
+		name:basename(name),
+		size:int64(st.Size),
+		modTime: timespecToTime(st.Mtimspec),
+		sys: st,
+	}
+	fs.mode = FileMode(st.Mode  0777)
+	switch st.Mode  syscall.S_IFMT {
+	case syscall.S_IFBLK:
+		fs.mode |= ModeDevice
+	case syscall.S_IFCHR:
+		fs.mode |= ModeDevice | ModeCharDevice
+	case syscall.S_IFDIR:
+		fs.mode |= ModeDir
+	case syscall.S_IFIFO:
+		fs.mode |= ModeNamedPipe
+	case syscall.S_IFLNK:
+		fs.mode |= ModeSymlink
+	case syscall.S_IFREG:
+		// nothing to do
+	case syscall.S_IFSOCK:
+		fs.mode |= ModeSocket
+	}
+	if st.Modesyscall.S_ISGID != 0 {
+		fs.mode |= ModeSetgid
+	}
+	if st.Modesyscall.S_ISUID != 0 {
+		fs.mode |= ModeSetuid
+	}
+	if st.Modesyscall.S_ISVTX != 0 {
+		fs.mode |= ModeSticky
+	}
+	return fs
+}
+
+func timespecToTime(ts syscall.Timespec) time.Time {
+	return time.Unix(int64(ts.Sec), int64(ts.Nsec))
+}
+
+// For testing.
+func atime(fi FileInfo) time.Time {
+	return timespecToTime(fi.Sys().(*syscall.Stat_t).Atimspec)
+}
diff -r 5d8ca2f79dba libgo/go/os/stat_openbsd.go
--- a/libgo/go/os/stat_openbsd.go	Fri Nov 02 16:37:20 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +
@@ -1,61 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package os
-
-import (
-	syscall
-	time
-)
-
-func sameFile(sys1, sys2 interface{}) bool {
-	stat1 := sys1.(*syscall.Stat_t)
-	stat2 := sys2.(*syscall.Stat_t)
-	return stat1.Dev == stat2.Dev  stat1.Ino == stat2.Ino
-}
-
-func fileInfoFromStat(st *syscall.Stat_t, name string) FileInfo {
-	fs := fileStat{
-		name:basename(name),
-		size:int64(st.Size),
-		modTime: timespecToTime(st.Mtim),
-		sys: st,
-	}
-	fs.mode = FileMode(st.Mode  0777)
-	switch st.Mode  syscall.S_IFMT {
-	case syscall.S_IFBLK:
-		fs.mode |=

Re: [PATCH] Disable aggregate jump functions for bit-field stores

2012-11-05 Thread Martin Jambor

On Mon, Nov 05, 2012 at 05:20:46PM +0100, Jan Hubicka wrote:
  Hi,
  
  the patch below disables generation of aggregate jump functions from
  bit-field stores because currently we depend on type size of the value
  to determine the size of the stored value and that does not work with
  bit-fields, making it impossible for IPA-CP to organize them in their
  lattices.
  
  If we ever decide aggregate jump functions for bit-fields are worth
  the hassle, we might remove this limitation by storing and streaming
  the size of the memory reference alongside the offset in the jump
  functions (and IPA-CP lattices).
  
  Bootstrapped and tested on x86_64-linux, needed for the aggregate
  IPA-CP.  OK for trunk?
  
  Thanks,
  
  Martin
  
  
  2012-11-02  Martin Jambor  mjam...@suse.cz
  
  * ipa-prop.c (determine_known_aggregate_parts): Do not create
  aggregate jump functions for bit-fields.
  
  Index: src/gcc/ipa-prop.c
  ===
  --- src.orig/gcc/ipa-prop.c
  +++ src/gcc/ipa-prop.c
  @@ -1295,7 +1295,10 @@ determine_known_aggregate_parts (gimple
   
 lhs = gimple_assign_lhs (stmt);
 rhs = gimple_assign_rhs1 (stmt);
  -  if (!is_gimple_reg_type (rhs))
  +  if (!is_gimple_reg_type (rhs)
  +  || TREE_CODE (lhs) == BIT_FIELD_REF
  + || (TREE_CODE (lhs) == COMPONENT_REF
  +  DECL_BIT_FIELD (TREE_OPERAND (lhs, 1
 
 I am not sure I understand motivation of this patch properly.  First I think
 BIT_FIELD_REF can be hidden inside chain of other REFs so you should probably
 look into all handled components.  What exactly goes wrong when your type size
 is bigger than the bitfield?
 

I hit an assert in the patch I am about to post (I am writing the
changelog right now) I hit an assert checking that stuff does not
overlap in IPA-CP aggregate lattices.  My initial reaction was that I
did not want to deal with bit-fields, unless really necessary ;-)

However, that assert can be easily changed to bottomizing the lattice,
I will re-test with that and if there are no problems, this patch can
be omitted.

Thanks,

Martin

libgo patch committed: NetBSD fix

2012-11-05 Thread Ian Lance Taylor

This patch from Shenghou Ma fixes gccgo on libgo by not losing the TLS
pointer.  This works around what I would describe as a bug in the NetBSD
libc.  Bootstrapped on x86_64-unknown-linux-gnu, not that that proves
much.  Committed to mainline.

Ian

diff -r df61836f495f libgo/runtime/proc.c
--- a/libgo/runtime/proc.c	Mon Nov 05 09:36:01 2012 -0800
+++ b/libgo/runtime/proc.c	Mon Nov 05 09:37:21 2012 -0800
@@ -82,7 +82,7 @@
 {
 }
 
-# else
+#else
 
 # if defined(__x86_64__)  defined(__sun__)
 
@@ -110,6 +110,28 @@
 	c-uc_mcontext.gregs[REG_FSBASE] = fs;
 }
 
+# elif defined(__NetBSD__)
+
+// NetBSD has a bug: setcontext clobbers tlsbase, we need to save
+// and restore it ourselves.
+
+static __thread __greg_t tlsbase;
+
+static inline void
+initcontext(void)
+{
+	ucontext_t c;
+
+	getcontext(c);
+	tlsbase = c.uc_mcontext._mc_tlsbase;
+}
+
+static inline void
+fixcontext(ucontext_t* c)
+{
+	c-uc_mcontext._mc_tlsbase = tlsbase;
+}
+
 # else
 
 #  error unknown case for SETCONTEXT_CLOBBERS_TLS

Re: [PING^2] [C++ PATCH] Add overflow checking to __cxa_vec_new[23]

2012-11-05 Thread Florian Weimer


On 11/02/2012 01:14 PM, Paolo Carlini wrote:

On 11/02/2012 01:09 PM, Florian Weimer wrote:

I looked at this again and made a new copy of the test case instead.
It has been successfully tested on x86_64-redhat-linux-gnu.

Is this okay for trunk?



Looks very nice to me, and after all the issue seems rather simple.
Let's say we wait another 2-3 days in case Jason and others have
comments, and then it's Ok for mainline.


Thanks.  I made a few formatting changes, so the attached version is 
what I committed.


--
Florian Weimer / Red Hat Product Security Team
2012-11-03  Florian Weimer  fwei...@redhat.com

	* libsupc++/vec.cc (compute_size): New.
	(__cxa_vec_new2, __cxa_vec_new3): Use it.
	* testsuite/18_support/cxa_vec.cc: New.

Index: libstdc++-v3/libsupc++/vec.cc
===
--- libstdc++-v3/libsupc++/vec.cc	(revision 193173)
+++ libstdc++-v3/libsupc++/vec.cc	(working copy)
@@ -1,7 +1,6 @@
 // New abi Support -*- C++ -*-
 
-// Copyright (C) 2000, 2001, 2003, 2004, 2009, 2011
-// Free Software Foundation, Inc.
+// Copyright (C) 2000-2012 Free Software Foundation, Inc.
 //  
 // This file is part of GCC.
 //
@@ -59,6 +58,19 @@
   globals-caughtExceptions = p-nextException;
   globals-uncaughtExceptions += 1;
 }
+
+// Compute the total size with overflow checking.
+std::size_t compute_size(std::size_t element_count,
+			 std::size_t element_size,
+			 std::size_t padding_size)
+{
+  if (element_size  element_count  std::size_t(-1) / element_size)
+	throw std::bad_alloc();
+  std::size_t size = element_count * element_size;
+  if (size + padding_size  size)
+	throw std::bad_alloc();
+  return size + padding_size;
+}
   }
 
   // Allocate and construct array.
@@ -83,7 +95,8 @@
 		 void *(*alloc) (std::size_t),
 		 void (*dealloc) (void *))
   {
-std::size_t size = element_count * element_size + padding_size;
+std::size_t size
+  = compute_size(element_count, element_size, padding_size);
 char *base = static_cast char * (alloc (size));
 if (!base)
   return base;
@@ -124,7 +137,8 @@
 		 void *(*alloc) (std::size_t),
 		 void (*dealloc) (void *, std::size_t))
   {
-std::size_t size = element_count * element_size + padding_size;
+std::size_t size
+  = compute_size(element_count, element_size, padding_size);
 char *base = static_castchar *(alloc (size));
 if (!base)
   return base;
Index: libstdc++-v3/testsuite/18_support/cxa_vec.cc
===
--- libstdc++-v3/testsuite/18_support/cxa_vec.cc	(revision 0)
+++ libstdc++-v3/testsuite/18_support/cxa_vec.cc	(working copy)
@@ -0,0 +1,64 @@
+// { dg-do run }
+// Avoid use of none-overridable new/delete operators in shared
+// { dg-options -static { target *-*-mingw* } }
+// Test __cxa_vec routines
+// Copyright (C) 2000-2012 Free Software Foundation, Inc.
+// Contributed by Nathan Sidwell 7 Apr 2000 nathan@nat...@codesourcery.com
+
+#include cxxabi.h
+#include stdio.h
+#include new
+#include stdlib.h
+#include setjmp.h
+
+// Allocate enough padding to hold an array cookie.
+#ifdef __ARM_EABI__
+static const size_t padding = 8;
+#else
+static const size_t padding = (sizeof (std::size_t));
+#endif
+
+// our pseudo ctors and dtors
+static abi::__cxa_cdtor_return_type ctor (void *x)
+{
+  abort ();
+}
+
+static abi::__cxa_cdtor_return_type dtor (void *x)
+{
+  abort ();
+}
+
+// allocate an array whose size causes an overflow during multiplication
+void test1 ()
+{
+  static const std::size_t large_size =
+std::size_t(1)  (sizeof(std::size_t) * 8 - 2);
+  try
+{
+  abi::__cxa_vec_new (large_size, 8, 0, ctor, dtor);
+  abort ();
+}
+  catch (std::bad_alloc )
+{
+}
+}
+
+// allocate an array whose size causes an overflow during addition
+void test2 ()
+{
+  try
+{
+  abi::__cxa_vec_new (std::size_t(-1) / 4, 4, padding, ctor, dtor);
+  abort ();
+}
+  catch (std::bad_alloc )
+{
+}
+}
+
+int main ()
+{
+  test1 ();
+  test2 ();
+}

PR 54805: __gthread_tsd* in vxlib-tls.c

2012-11-05 Thread rbmj


Hello all,

Since nobody has commented on bug 54805, and I'm pretty sure this is 
valid (and obvious), I'm just submitting it to the list.


This removes warnings about implicit declarations and fixes one of the 
function calls in vxlib-tls.c for vxworks targets.


I got the old prototypes from 
http://gcc.gnu.org/ml/gcc-patches/2005-08/msg01314.html


See bug for further details.

Someone please comment or commit :)

--
Robert Mason
From d008e235167796417cf6a8f68f7206dc4351b5c3 Mon Sep 17 00:00:00 2001
From: rbmj r...@verizon.net
Date: Wed, 26 Sep 2012 20:12:52 -0400
Subject: [PATCH] [libgcc] 	* config/vxlib-tls.c: Put declarations for
 kernel space TSD

---
 libgcc/config/vxlib-tls.c |   11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/libgcc/config/vxlib-tls.c b/libgcc/config/vxlib-tls.c
index c469676..a2f5e34 100644
--- a/libgcc/config/vxlib-tls.c
+++ b/libgcc/config/vxlib-tls.c
@@ -102,6 +102,14 @@ extern void __gthread_set_tls_data (void
 extern void __gthread_enter_tls_dtor_context (void);
 extern void __gthread_leave_tls_dtor_context (void);
 
+#ifndef __RTP__
+
+extern void *__gthread_get_tsd_data(WIND_TCB *tcb);
+extern void __gthread_set_tsd_data(WIND_TCB *tcb, void *data);
+extern void __gthread_enter_tsd_dtor_context(WIND_TCB *tcb);
+extern void __gthread_leave_tsd_dtor_context(WIND_TCB *tcb);
+
+#endif /* __RTP__ */
 
 /* This is a global structure which records all of the active keys.
 
@@ -150,7 +158,7 @@ static __gthread_once_t tls_init_guard =
need to read tls_keys.dtor[key] atomically.  */
 
 static void
-tls_delete_hook (void *tcb ATTRIBUTE_UNUSED)
+tls_delete_hook (void *tcb)
 {
   struct tls_data *data;
   __gthread_key_t key;
@@ -185,7 +193,7 @@ tls_delete_hook (void *tcb ATTRIBUTE_UNU
 #ifdef __RTP__
   __gthread_leave_tls_dtor_context ();
 #else
-  __gthread_leave_tsd_dtor_context ();
+  __gthread_leave_tsd_dtor_context (tcb);
 #endif
 
 #ifdef __RTP__

-- 1.7.10.4

Re: [PATCH, middle-end]: Fix mode-switching MODE_EXIT check with __builtin_apply/__builtin_return

2012-11-05 Thread Eric Botcazou

 This sequence breaks assumption in mode-switching.c, that final
 function return register load operates in MODE_EXIT mode and triggere
 following code:
 
   for (j = n_entities - 1; j = 0; j--)
 {
   int e = entity_map[j];
   int mode = MODE_NEEDED (e, return_copy);
 
   if (mode != num_modes[e]  mode != MODE_EXIT (e))
 break;
 }
 
 As discussed above, modes of loads, generated from __builtin_apply
 have no connection to function return mode. mode-switching.c does
 detect __builtin_apply situation and raises maybe_builtin_apply flag,
 but doesn't use it to short-circuit wrong check. In proposed patch, we
 detect this situation and raise force_late_switch in the same way, as
 SH4 does for its late fpscr emission.

If I understand correctly, we need to insert the vzeroupper because the 
function returns double in SSE registers but we generate an OImode load 
instead of a DFmode load because of the __builtin_return.  So we're in the 
forced_late_switch case but we fail to recognize the tweaked return value load 
since the number of registers doesn't match.

If so, I'd rather add another special case, like for the SH4, instead of a 
generic bypass for maybe_builtin_apply, something along the lines of:

/* For the x86 with AVX, we might be using a larger load for a value
   returned in SSE registers and we want to put the final mode switch
   after this return value copy.  */
if (copy_start == ret_start
 nregs == hard_regno_nregs[ret_start][GET_MODE (ret_reg)]
 copy_num = nregs
 OBJECT_P (SET_SRC (return_copy_pat)))
  forced_late_switch = 1;

 2012-11-04  Vladimir Yakovlev  vladimir.b.yakov...@intel.com
   Uros Bizjak  ubiz...@gmail.com
 
 * mode-switching.c (create_pre_exit): Added code for
 maybe_builtin_apply case.

Present tense in ChangeLog.

-- 
Eric Botcazou

Re: [patch][RFC] Filename based shared library versioning on AIX

2012-11-05 Thread Michael Haubenwallner


On 11/02/2012 12:16 AM, David Edelsohn wrote:
 I would like to introduce filename-based shared library versioning (known as
 the soname in ELF world) for AIX, activated by the '--enable-aix-soname'
 configure flag.

 As discussed in the Bugzilla, I think this is a good feature and a
 nice trick to support SVR4-like shared object naming on AIX (I think
 this really is Linux/SVR4 convention, not ELF file format convention).

Thank you!

 As you mention, this scheme is not compatible with the current shared
 object naming scheme of GCC/libtool on AIX and not backward compatible,

Well, as long as the old sharedlibs were not created as standalone shared
objects (lib.so), this is similar to a normal soname-bump on AIX, in that
it is still possible for the package manager to transfer the old shared
objects (with F_LOADONLY flag set) into the new archives.

As far as I can see, gcc does not provide this libtool-option (environment
variable LDFLAGS=-brtl) at all for its libraries (for good reason).

 so it definitely should not be the default.

Yes, of course.

 As Joseph mentioned, these options needs to be documented, but
 otherwise they are a good feature to add.

Here's the updated patch.

Thank you!
/haubi/
From 3c9141f9df666ad972688b31e1cfecfbfd6ef6b6 Mon Sep 17 00:00:00 2001
From: Michael Haubenwallner michael.haubenwall...@salomon.at
Date: Fri, 16 Mar 2012 14:49:20 +0100
Subject: [PATCH] AIX: Filename-based shlib versioning for libgcc_s

2012-11-05  Michael Haubenwallner  michael.haubenwall...@salomon.at

	(libgcc_s) Optional filename-based shared library versioning on AIX.
	* gcc/doc/install.texi: Add --enable-aix-soname option.
	* Makefile.in (enable_aix_soname): Define.
	* config/rs6000/t-slibgcc-aix: Implement filename-based versioning.
	* configure.in: Accept --enable-aix-soname option.
	* configure: Recreate.
---
 gcc/doc/install.texi   |   29 +
 libgcc/Makefile.in |1 +
 libgcc/config/rs6000/t-slibgcc-aix |   79 +++-
 libgcc/configure   |   26 
 libgcc/configure.ac|   17 
 5 files changed, 141 insertions(+), 11 deletions(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 7b2441a..8f9425d 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1336,6 +1336,30 @@ particularly useful if you intend to use several versions of GCC in
 parallel.  This is currently supported by @samp{libgfortran},
 @samp{libjava}, @samp{libmudflap}, @samp{libstdc++}, and @samp{libobjc}.
 
+@item @anchor{enable-aix-soname}--enable-aix-soname
+Traditional AIX shared library versioning (versioned @code{Shared Objects} as
+members of unversioned @code{Archive Library} files named @samp{lib.a}) causes
+numerous headaches for package managers.  However, @code{Import Files} as
+members of @code{Archive Library} files allow for @emph{filename-based} shared
+library versioning known on Linux/SVR4, where this is called the SONAME.
+But as they prevent static linking, @code{Import Files} may be used with
+@code{Runtime Linking} only, where the linker does search for @samp{lib.so}
+library filenames first.
+
+When enabled, a shared library supporting @option{--enable-aix-soname} is
+created as @code{Shared Archive Library} using the @samp{lib.so} filename, for
+use @emph{with} @code{Runtime Linking} @emph{only}, while the @samp{lib.a}
+filename is created as the plain static @code{Archive Library} - @emph{except}
+for @samp{libgcc_s}, which is not used in static linking, so @file{libgcc_s.a}
+is the symlink referring to the versioned @samp{lib.so} filename.
+
+Package managers can still
+@uref{./specific.html#transfer-aix-shobj,,transfer} the old @code{Shared
+Objects} to the static @code{Archive Library} file (pointed to by the symlink)
+@samp{lib.a} when merging into the live filesystem.
+
+@option{--enable-aix-soname} is currently supported by @samp{libgcc_s}.
+
 @item --enable-languages=@var{lang1},@var{lang2},@dots{}
 Specify that only a particular subset of compilers and
 their runtime libraries should be built.  For a list of valid values for
@@ -3690,6 +3714,7 @@ APAR IY26685 (AIX 4.3) or APAR IY25528 (AIX 5.1).  It also requires a
 fix for another AIX Assembler bug and a co-dependent AIX Archiver fix
 referenced as APAR IY53606 (AIX 5.2) or as APAR IY54774 (AIX 5.1)
 
+@anchor{transfer-aix-shobj}
 @samp{libstdc++} in GCC 3.4 increments the major version number of the
 shared object and GCC installation places the @file{libstdc++.a}
 shared library in a common location which will overwrite the and GCC
@@ -3720,6 +3745,10 @@ Archive the runtime-only shared object in the GCC 3.4
 % ar -q libstdc++.a libstdc++.so.4 libstdc++.so.5
 @end smallexample
 
+You may also need this procedure when you start using the
+@uref{./configure.html#enable-aix-soname,,@option{--enable-aix-soname}}
+configure option.
+
 Linking executables and shared libraries may produce warnings of
 duplicate

Re: [patch][RFC] Filename based shared library versioning on AIX

2012-11-05 Thread David Edelsohn

On Mon, Nov 5, 2012 at 1:10 PM, Michael Haubenwallner
michael.haubenwall...@salomon.at wrote:

 Well, as long as the old sharedlibs were not created as standalone shared
 objects (lib.so), this is similar to a normal soname-bump on AIX, in that
 it is still possible for the package manager to transfer the old shared
 objects (with F_LOADONLY flag set) into the new archives.

Yes, the old shared objects can be placed in the new archive, but one
also needs to ensure that the archive has the correct name, e.g.
libfoo.a not libfoo.so.x.y ad libfoo.so.

 As far as I can see, gcc does not provide this libtool-option (environment
 variable LDFLAGS=-brtl) at all for its libraries (for good reason).

I do not understand what you mean by gcc does not provide this libtool
option.  GCC does link libstdc++ with -G option, for use with -brtl,
but does not automatically link applications with -brtl.

Thanks, David

Re: [PATCH, middle-end]: Fix mode-switching MODE_EXIT check with __builtin_apply/__builtin_return

2012-11-05 Thread Uros Bizjak

On Mon, Nov 5, 2012 at 7:05 PM, Eric Botcazou ebotca...@adacore.com wrote:
 This sequence breaks assumption in mode-switching.c, that final
 function return register load operates in MODE_EXIT mode and triggere
 following code:

   for (j = n_entities - 1; j = 0; j--)
 {
   int e = entity_map[j];
   int mode = MODE_NEEDED (e, return_copy);

   if (mode != num_modes[e]  mode != MODE_EXIT (e))
 break;
 }

 As discussed above, modes of loads, generated from __builtin_apply
 have no connection to function return mode. mode-switching.c does
 detect __builtin_apply situation and raises maybe_builtin_apply flag,
 but doesn't use it to short-circuit wrong check. In proposed patch, we
 detect this situation and raise force_late_switch in the same way, as
 SH4 does for its late fpscr emission.

 If I understand correctly, we need to insert the vzeroupper because the
 function returns double in SSE registers but we generate an OImode load
 instead of a DFmode load because of the __builtin_return.  So we're in the
 forced_late_switch case but we fail to recognize the tweaked return value load
 since the number of registers doesn't match.

 If so, I'd rather add another special case, like for the SH4, instead of a
 generic bypass for maybe_builtin_apply, something along the lines of:

 /* For the x86 with AVX, we might be using a larger load for a value
returned in SSE registers and we want to put the final mode switch
after this return value copy.  */
 if (copy_start == ret_start
  nregs == hard_regno_nregs[ret_start][GET_MODE (ret_reg)]
  copy_num = nregs
  OBJECT_P (SET_SRC (return_copy_pat)))
   forced_late_switch = 1;

Yes, this approach also works.

I assume it is OK to commit attached patch?

2012-11-05  Eric Botcazou  ebotca...@adacore.com
Uros Bizjak  ubiz...@gmail.com

* mode-switching.c (create_pre_exit): Force late switch for
__builtin_return case, when value, returned in SSE register,
was loaded using OImode load.

Tested on x86_64-pc-linux-gnu, also with to-be-committed avx-vzeroupper-27.c

Thanks,
Uros.
Index: mode-switching.c
===
--- mode-switching.c(revision 193174)
+++ mode-switching.c(working copy)
@@ -1,6 +1,6 @@
 /* CPU mode switching
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008,
-   2009, 2010 Free Software Foundation, Inc.
+   2009, 2010, 2012  Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -342,6 +342,17 @@ create_pre_exit (int n_entities, int *entity_map,
  }
if (j = 0)
  {
+   /* For the x86 with AVX, we might be using a larger
+  load for a value returned in SSE registers and we
+  want to put the final mode switch after this
+  return value copy.  */
+   if (copy_start == ret_start
+nregs
+  == hard_regno_nregs[ret_start][GET_MODE 
(ret_reg)]
+copy_num = nregs
+OBJECT_P (SET_SRC (return_copy_pat)))
+ forced_late_switch = 1;
+
/* For the SH4, floating point loads depend on fpscr,
   thus we might need to put the final mode switch
   after the return value copy.  That is still OK,

Re: [PATCH] Do not change scope for unknown locations

2012-11-05 Thread Eric Botcazou

 Because we do want location and its block to be tightly coupled. I may
 want to add an assertion that if location is known, the block should
 *not* be NULL.

Yes, that would be a first step towards always continuing.  But that's 4.9 
material, let's install the posted patch as-is for 4.8.

-- 
Eric Botcazou

[patch][google/gcc-4_7] Extend expiration date for pr54127 (issue6817091)

2012-11-05 Thread Paul Pluzhnikov

Greetings,

This patch is for google/gcc-4_7 branch.

Thanks,


2012-11-05  Paul Pluzhnikov  ppluzhni...@google.com

* contrib/testsuite-management/powerpc-grtev3-linux-gnu.xfail:
extend expiration date for pr54127.

Index: contrib/testsuite-management/powerpc-grtev3-linux-gnu.xfail
===
--- contrib/testsuite-management/powerpc-grtev3-linux-gnu.xfail (revision 
193176)
+++ contrib/testsuite-management/powerpc-grtev3-linux-gnu.xfail (working copy)
@@ -1,9 +1,9 @@
 # Temporarily ignore gcc pr54127.
-expire=20121031 | FAIL: gcc.dg/torture/pr53589.c -O3 -g  (test for excess 
errors)
-expire=20121031 | FAIL: gcc.dg/torture/pr53589.c  -O3 -g  (internal compiler 
error)
+expire=20121231 | FAIL: gcc.dg/torture/pr53589.c -O3 -g  (test for excess 
errors)
+expire=20121231 | FAIL: gcc.dg/torture/pr53589.c  -O3 -g  (internal compiler 
error)
 # Temporarily ignore Google ref b/6983319.
-expire=20121031 | FAIL: gcc.target/powerpc/regnames-1.c (test for excess 
errors)
-expire=20121031 | FAIL: gcc.target/powerpc/regnames-1.c (internal compiler 
error)
+expire=20121231 | FAIL: gcc.target/powerpc/regnames-1.c (test for excess 
errors)
+expire=20121231 | FAIL: gcc.target/powerpc/regnames-1.c (internal compiler 
error)
 
 FAIL: gfortran.dg/bessel_6.f90  -O0  execution test
 FAIL: gfortran.dg/bessel_6.f90  -O1  execution test

--
This patch is available for review at http://codereview.appspot.com/6817091

Re: [PATCH, middle-end]: Fix mode-switching MODE_EXIT check with __builtin_apply/__builtin_return

2012-11-05 Thread Uros Bizjak

On Mon, Nov 5, 2012 at 7:34 PM, Uros Bizjak ubiz...@gmail.com wrote:
 On Mon, Nov 5, 2012 at 7:05 PM, Eric Botcazou ebotca...@adacore.com wrote:
 This sequence breaks assumption in mode-switching.c, that final
 function return register load operates in MODE_EXIT mode and triggere
 following code:

   for (j = n_entities - 1; j = 0; j--)
 {
   int e = entity_map[j];
   int mode = MODE_NEEDED (e, return_copy);

   if (mode != num_modes[e]  mode != MODE_EXIT (e))
 break;
 }

 As discussed above, modes of loads, generated from __builtin_apply
 have no connection to function return mode. mode-switching.c does
 detect __builtin_apply situation and raises maybe_builtin_apply flag,
 but doesn't use it to short-circuit wrong check. In proposed patch, we
 detect this situation and raise force_late_switch in the same way, as
 SH4 does for its late fpscr emission.

 If I understand correctly, we need to insert the vzeroupper because the
 function returns double in SSE registers but we generate an OImode load
 instead of a DFmode load because of the __builtin_return.  So we're in the
 forced_late_switch case but we fail to recognize the tweaked return value 
 load
 since the number of registers doesn't match.

 If so, I'd rather add another special case, like for the SH4, instead of a
 generic bypass for maybe_builtin_apply, something along the lines of:

 /* For the x86 with AVX, we might be using a larger load for a value
returned in SSE registers and we want to put the final mode switch
after this return value copy.  */
 if (copy_start == ret_start
  nregs == hard_regno_nregs[ret_start][GET_MODE (ret_reg)]
  copy_num = nregs
  OBJECT_P (SET_SRC (return_copy_pat)))
   forced_late_switch = 1;

 Yes, this approach also works.

 I assume it is OK to commit attached patch?

 2012-11-05  Eric Botcazou  ebotca...@adacore.com
 Uros Bizjak  ubiz...@gmail.com

 * mode-switching.c (create_pre_exit): Force late switch for
 __builtin_return case, when value, returned in SSE register,
 was loaded using OImode load.

 Tested on x86_64-pc-linux-gnu, also with to-be-committed avx-vzeroupper-27.c

Unfortunately the proposed patch fails the testcase from PR41993:

--cut here--
short retframe_short (void *rframe)
{
__builtin_return (rframe);
}
--cut here--

Uros.

 Thanks,
 Uros.

Re: [patch][google/gcc-4_7] Extend expiration date for pr54127 (issue6817091)

2012-11-05 Thread Ollie Wild

OK.

Ollie


On Mon, Nov 5, 2012 at 12:40 PM, Paul Pluzhnikov ppluzhni...@google.com wrote:
 Greetings,

 This patch is for google/gcc-4_7 branch.

 Thanks,


 2012-11-05  Paul Pluzhnikov  ppluzhni...@google.com

 * contrib/testsuite-management/powerpc-grtev3-linux-gnu.xfail:
 extend expiration date for pr54127.

 Index: contrib/testsuite-management/powerpc-grtev3-linux-gnu.xfail
 ===
 --- contrib/testsuite-management/powerpc-grtev3-linux-gnu.xfail (revision 
 193176)
 +++ contrib/testsuite-management/powerpc-grtev3-linux-gnu.xfail (working copy)
 @@ -1,9 +1,9 @@
  # Temporarily ignore gcc pr54127.
 -expire=20121031 | FAIL: gcc.dg/torture/pr53589.c -O3 -g  (test for excess 
 errors)
 -expire=20121031 | FAIL: gcc.dg/torture/pr53589.c  -O3 -g  (internal compiler 
 error)
 +expire=20121231 | FAIL: gcc.dg/torture/pr53589.c -O3 -g  (test for excess 
 errors)
 +expire=20121231 | FAIL: gcc.dg/torture/pr53589.c  -O3 -g  (internal compiler 
 error)
  # Temporarily ignore Google ref b/6983319.
 -expire=20121031 | FAIL: gcc.target/powerpc/regnames-1.c (test for excess 
 errors)
 -expire=20121031 | FAIL: gcc.target/powerpc/regnames-1.c (internal compiler 
 error)
 +expire=20121231 | FAIL: gcc.target/powerpc/regnames-1.c (test for excess 
 errors)
 +expire=20121231 | FAIL: gcc.target/powerpc/regnames-1.c (internal compiler 
 error)

  FAIL: gfortran.dg/bessel_6.f90  -O0  execution test
  FAIL: gfortran.dg/bessel_6.f90  -O1  execution test

 --
 This patch is available for review at http://codereview.appspot.com/6817091

Re: [x86] Fix gcc.c-torture/compile/20080806-1.c failures

2012-11-05 Thread Richard Sandiford

Uros Bizjak ubiz...@gmail.com writes:
 On Mon, Nov 5, 2012 at 9:30 AM, Richard Sandiford
 rdsandif...@googlemail.com wrote:
 gcc/
 PR target/55204
 * config/i386/i386.c (ix86_address_subreg_operand): Remove stack
 pointer check.
 (print_reg): Use true_regnum rather than REGNO.
 (ix86_print_operand_address): Remove SUBREG handling.

 The patch is OK for mainline and 4.7, if it passes H.J.'s tests with
 -maddress-mode={short,long} on x32.

 +  unsigned int regno = true_regnum (x);

 I'd rather see the declaration at the beginning of the function.

OK, thanks, applied to both with that change.

Richard

Re: [PATCH, middle-end]: Fix mode-switching MODE_EXIT check with __builtin_apply/__builtin_return

2012-11-05 Thread Uros Bizjak

On Mon, Nov 5, 2012 at 7:43 PM, Uros Bizjak ubiz...@gmail.com wrote:

 As discussed above, modes of loads, generated from __builtin_apply
 have no connection to function return mode. mode-switching.c does
 detect __builtin_apply situation and raises maybe_builtin_apply flag,
 but doesn't use it to short-circuit wrong check. In proposed patch, we
 detect this situation and raise force_late_switch in the same way, as
 SH4 does for its late fpscr emission.

 If I understand correctly, we need to insert the vzeroupper because the
 function returns double in SSE registers but we generate an OImode load
 instead of a DFmode load because of the __builtin_return.  So we're in the
 forced_late_switch case but we fail to recognize the tweaked return value 
 load
 since the number of registers doesn't match.

Actually, the complication with __buitlin_apply/__builtin_return is
that it blindly loads all possible return registers. So, in PR41993
case, we load SSE register using OImode load (that forces AVX dirty
state), even if we actually return %eax. In your patch it is assumed
that wide-load corresponds to the current function return register,
which is not the case.

Please note, that we enter the above code only in case the needed mode
of processed insn is different than MODE_EXIT. So in x86 case, we know
that this was due to hard-register load in wrong mode from
__builtin_{apply,return}. My patch also uses extra condition, where
hard-reg should be one of possible return registers, but not only
current function return register.

Uros.

Re: [PATCH/MIPS] Use ins/dins instruction when written manually

2012-11-05 Thread Richard Sandiford

Hi Andrew,

Andrew Pinski andrew.pin...@caviumnetworks.com writes:
 On Fri, Oct 5, 2012 at 8:43 PM, Andrew Pinski
 andrew.pin...@caviumnetworks.com wrote:
 On Sun, Aug 19, 2012 at 10:13 AM, Richard Sandiford
 rdsandif...@googlemail.com wrote:
 Andrew Pinski andrew.pin...@caviumnetworks.com writes:
   Right now we only produce ins when a zero_extract is used on the
 right hand side.  We can do better by adding some patterns which
 combine for the ins instruction.  This patch adds those patterns and a
 testcase which shows a simple example where the code is improved.

 Sorry for the delay in reviewing this.  Had you thought about trying to
 teach combine.c about this instead?  It doesn't look like any of the
 patterns are really providing more information about the underlying
 instruction.

 combine.c has some code to do this already if one of the src register
 is the same as the dest register; that is what make_field_assignment
 does.  Quickly looking at the code, the problem I doing it in
 make_field_assignment is there is no way to return that you need a
 copy of the value first unless I am missing something obvious.  Now I
 agree we should be optimize this in combine rather than these manual
 patterns.

 I now have a patch which implements this in combine which allows the
 backend not need to change.  I generate a SEQUENCE which then
 try_combine splits like we do for PARALLEL but keeping it in the
 correct order and allowing for the case where we are combing two
 instructions into two instructions.
 I hope to be able to post it later on Saturday.

Just wondering, what's the status of this?  Was worried that you might
have posted it and I'd missed it.

Richard

[patch, testsuite, mips] Fix gcc.dg/torture/mips-sdata-1.c

2012-11-05 Thread Steve Ellcey

The test gcc.dg/torture/mips-sdata-1.c fails when compiled with
-fno-fat-lto-objects because when that option is used no assembly code
is output and thus the scan does not find the '.sdata' line.

Checked with the mips-mti-elf target.  OK to checkin?

Steve Ellcey
sell...@mips.com

2012-11-05  Steve Ellcey  sell...@mips.com

* gcc.dg/torture/mips-sdata-1.c: Skip on -fno-fat-lto-objects.


diff --git a/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c 
b/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
index 8ffd4d8..53c9e4f 100644
--- a/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
+++ b/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
@@ -1,6 +1,7 @@
 /* Check that sdata-accesses are applied regardless of size or ABI.  */
 /* { dg-options -mexplicit-relocs } */
 /* { dg-do compile { target mips*-*-elf* } } */
+/* { dg-skip-if  { *-*-* } { -fno-fat-lto-objects } {  } } */
 
 struct s { int x[4]; };
 struct s my_struct __attribute__((__section__(.sdata)));

Re: [PATCH/MIPS] Use ins/dins instruction when written manually

2012-11-05 Thread Andrew Pinski

On Mon, 2012-11-05 at 19:19 +, Richard Sandiford wrote:
 Hi Andrew,
 
 Andrew Pinski andrew.pin...@caviumnetworks.com writes:
  On Fri, Oct 5, 2012 at 8:43 PM, Andrew Pinski
  andrew.pin...@caviumnetworks.com wrote:
  On Sun, Aug 19, 2012 at 10:13 AM, Richard Sandiford
  rdsandif...@googlemail.com wrote:
  Andrew Pinski andrew.pin...@caviumnetworks.com writes:
Right now we only produce ins when a zero_extract is used on the
  right hand side.  We can do better by adding some patterns which
  combine for the ins instruction.  This patch adds those patterns and a
  testcase which shows a simple example where the code is improved.
 
  Sorry for the delay in reviewing this.  Had you thought about trying to
  teach combine.c about this instead?  It doesn't look like any of the
  patterns are really providing more information about the underlying
  instruction.
 
  combine.c has some code to do this already if one of the src register
  is the same as the dest register; that is what make_field_assignment
  does.  Quickly looking at the code, the problem I doing it in
  make_field_assignment is there is no way to return that you need a
  copy of the value first unless I am missing something obvious.  Now I
  agree we should be optimize this in combine rather than these manual
  patterns.
 
  I now have a patch which implements this in combine which allows the
  backend not need to change.  I generate a SEQUENCE which then
  try_combine splits like we do for PARALLEL but keeping it in the
  correct order and allowing for the case where we are combing two
  instructions into two instructions.
  I hope to be able to post it later on Saturday.
 
 Just wondering, what's the status of this?  Was worried that you might
 have posted it and I'd missed it.

I have not posted it yet.  I am still cleaning up the code and making
sure it does not cause regressions.

Thanks,
Andrew

 
 Richard

[v3] libstdc++/55215

2012-11-05 Thread Paolo Carlini


Hi,

tested x86_64-linux, committed to mainline (and soon 4_7-branch too).

Thanks,
Paolo.

//
2012-11-05  Paolo Carlini  paolo.carl...@oracle.com

PR libstdc++/55215
* include/bits/random.tcc (mersenne_twister_engine::seed(_Sseq)):
Assign state_size to _M_p.
* testsuite/26_numerics/random/mersenne_twister_engine/cons/55215.cc:
New.
* testsuite/26_numerics/random/independent_bits_engine/cons/55215.cc:
Likewise.
* testsuite/26_numerics/random/shuffle_order_engine/cons/55215.cc:
Likewise.
* testsuite/26_numerics/random/subtract_with_carry_engine/cons/
55215.cc: Likewise.
* testsuite/26_numerics/random/discard_block_engine/cons/55215.cc:
Likewise.
* testsuite/26_numerics/random/linear_congruential_engine/cons/
55215.cc: Likewise.
Index: testsuite/26_numerics/random/independent_bits_engine/cons/55215.cc
===
--- testsuite/26_numerics/random/independent_bits_engine/cons/55215.cc  
(revision 0)
+++ testsuite/26_numerics/random/independent_bits_engine/cons/55215.cc  
(working copy)
@@ -0,0 +1,60 @@
+// { dg-options -std=gnu++11 }
+// { dg-require-cstdint  }
+//
+// Copyright (C) 2012 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// http://www.gnu.org/licenses/.
+
+#include random
+#include testsuite_hooks.h
+
+int f(int x)
+{
+  std::seed_seq sq(x, x + 1);
+  auto rnd = std::independent_bits_enginestd::mt19937, 9,
+ std::uint_fast32_t(sq);
+  return std::uniform_int_distributionint()(rnd);
+}
+
+int g(int x)
+{
+  std::seed_seq sq(x, x + 1);
+  auto rnd = std::independent_bits_enginestd::mt19937, 9,
+ std::uint_fast32_t();
+  rnd.seed(sq);
+  return std::uniform_int_distributionint()(rnd);
+}
+
+void test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  const int f1 = f(0);
+  const int f2 = f(0);
+
+  const int g1 = g(0);
+  const int g2 = g(0);
+  
+  VERIFY( f1 == f2 );
+  VERIFY( g1 == g2 );
+  VERIFY( f1 == g1 );
+}
+
+int main()
+{
+  test01();
+  return 0;
+}
Index: testsuite/26_numerics/random/discard_block_engine/cons/55215.cc
===
--- testsuite/26_numerics/random/discard_block_engine/cons/55215.cc 
(revision 0)
+++ testsuite/26_numerics/random/discard_block_engine/cons/55215.cc 
(working copy)
@@ -0,0 +1,58 @@
+// { dg-options -std=gnu++11 }
+// { dg-require-cstdint  }
+//
+// Copyright (C) 2012 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// http://www.gnu.org/licenses/.
+
+#include random
+#include testsuite_hooks.h
+
+int f(int x)
+{
+  std::seed_seq sq(x, x + 1);
+  auto rnd = std::ranlux24(sq);
+  return std::uniform_int_distributionint()(rnd);
+}
+
+int g(int x)
+{
+  std::seed_seq sq(x, x + 1);
+  auto rnd = std::ranlux24();
+  rnd.seed(sq);
+  return std::uniform_int_distributionint()(rnd);
+}
+
+void test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  const int f1 = f(0);
+  const int f2 = f(0);
+
+  const int g1 = g(0);
+  const int g2 = g(0);
+  
+  VERIFY( f1 == f2 );
+  VERIFY( g1 == g2 );
+  VERIFY( f1 == g1 );
+}
+
+int main()
+{
+  test01();
+  return 0;
+}
Index: testsuite/26_numerics/random/subtract_with_carry_engine/cons/55215.cc
===
--- testsuite/26_numerics/random/subtract_with_carry_engine/cons/55215.cc   
(revision 0)
+++ testsuite/26_numerics/random/subtract_with_carry_engine/cons/55215.cc   
(working copy)
@@ -0,0 +1,58 @@

Re: New badness metric for inliner

2012-11-05 Thread David Miller


This broke the bootstrap on sparc:

/home/davem/src/GIT/GCC/build-sparc32-linux/./prev-gcc/g++ 
-B/home/davem/src/GIT/GCC/build-sparc32\
-linux/./prev-gcc/ -B/usr/local/sparc-unknown-linux-gnu/bin/ -nostdinc++ 
-B/home/davem/src/GIT/GCC\
/build-sparc32-linux/prev-sparc-unknown-linux-gnu/libstdc++-v3/src/.libs 
-B/home/davem/src/GIT/GCC\
/build-sparc32-linux/prev-sparc-unknown-linux-gnu/libstdc++-v3/libsupc++/.libs 
-I/home/davem/src/G\
IT/GCC/build-sparc32-linux/prev-sparc-unknown-linux-gnu/libstdc++-v3/include/sparc-unknown-linux-g\
nu 
-I/home/davem/src/GIT/GCC/build-sparc32-linux/prev-sparc-unknown-linux-gnu/libstdc++-v3/include\
 -I/home/davem/src/GIT/GCC/gcc/libstdc++-v3/libsupc++ 
-L/home/davem/src/GIT/GCC/build-sparc32-linu\
x/prev-sparc-unknown-linux-gnu/libstdc++-v3/src/.libs 
-L/home/davem/src/GIT/GCC/build-sparc32-linu\
x/prev-sparc-unknown-linux-gnu/libstdc++-v3/libsupc++/.libs -c   -g -O2 
-gtoggle -DIN_GCC   -fno-e\
xceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing 
-Wwrite-strings -Wcast-qu\
al -Wmissing-format-attribute -pedantic -Wno-long-long -Wno-variadic-macros 
-Wno-overlength-string\
s -Werror -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../gcc/gcc 
-I../../gcc/gcc/. -I../../gcc/gcc/.\
./include -I../../gcc/gcc/../libcpp/include  -I../../gcc/gcc/../libdecnumber 
-I../../gcc/gcc/../li\
bdecnumber/dpd -I../libdecnumber -I../../gcc/gcc/../libbacktrace 
-DCLOOG_INT_GMP../../gcc/gcc/\
graphite-interchange.c -o graphite-interchange.o
../../gcc/gcc/graphite-interchange.c:645:1: internal compiler error: in 
relative_time_benefit, at \
ipa-inline.c:784
 }
 ^
0x10bcf1b relative_time_benefit
../../gcc/gcc/ipa-inline.c:784
0x10bd583 edge_badness
../../gcc/gcc/ipa-inline.c:894
0x10bdcdf update_edge_key
../../gcc/gcc/ipa-inline.c:962
0x10be68b update_callee_keys
../../gcc/gcc/ipa-inline.c:1141
0x10c00f7 inline_small_functions
../../gcc/gcc/ipa-inline.c:1594
0x10c0aa7 ipa_inline
../../gcc/gcc/ipa-inline.c:1769
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See http://gcc.gnu.org/bugs.html for instructions.
make[3]: *** [graphite-interchange.o] Error 1

Re: [PATCH, middle-end]: Fix mode-switching MODE_EXIT check with __builtin_apply/__builtin_return

2012-11-05 Thread Eric Botcazou

 Unfortunately the proposed patch fails the testcase from PR41993:
 
 --cut here--
 short retframe_short (void *rframe)
 {
 __builtin_return (rframe);
 }

OK, so that's not only an issue with the mode of the return register, but with 
the return register itself.  Then the original patch is OK if the comment is 
reworked, something like:

   /* __builtin_return emits a sequence of loads to all return
  registers.  One of them might require another mode than
  MODE_EXIT, even if it is unrelated to the return value,
  so we want to put the final mode switch after it.  */

-- 
Eric Botcazou

Re: [PATCH] Vzeroupper placement/47440

2012-11-05 Thread Uros Bizjak

Hello!

 2012-11-04  Vladimir Yakovlev  vladimir.b.yakov...@intel.com

* mode-switching.c (create_pre_exit): Added code for 
 maybe_builtin_apply case.

The part above is already committed.

* config/i386/i386-protos.h (emit_i387_cw_initialization): Deleted.
(emit_vzero): Added prototype.
(ix86_mode_entry): Likewise.
(ix86_mode_exit): Likewise.
(ix86_emit_mode_set): Likewise.

* config/i386/i386.c (typedef struct block_info_def): Deleted.
(define BLOCK_INFO): Deleted.
(check_avx256_stores): Added checking for MEM_P.
(move_or_delete_vzeroupper_2): Deleted.
(move_or_delete_vzeroupper_1): Deleted.
(move_or_delete_vzeroupper): Deleted.
(ix86_maybe_emit_epilogue_vzeroupper): Deleted.
(function_pass_avx256_p): Deleted.
(ix86_function_ok_for_sibcall): Deleted disabling sibcall.
(nit_cumulative_args): Deleted initialization of of avx256 fields of
cfun-machine.
(ix86_emit_restore_sse_regs_using_mov): Deleted vzeroupper generation.
(ix86_expand_epilogue): Likewise.
(ix86_avx_u128_mode_needed): New.
(ix86_i387_mode_needed): Renamed ix86_mode_needed.
(ix86_mode_needed): New.
(ix86_avx_u128_mode_after): New.
(ix86_mode_after): New.
(ix86_avx_u128_mode_entry): New.
(ix86_mode_entry): New.
(ix86_avx_u128_mode_exit): New.
(ix86_mode_exit): New.
(ix86_emit_mode_set): New.
(ix86_expand_call): Deleted vzeroupper generation.
(ix86_split_call_vzeroupper): Deleted.
(ix86_init_machine_status): Initialzed optimize_mode_switching.
(ix86_expand_special_args_builtin): Changed.
(ix86_reorg): Deleted a call of move_or_delete_vzeroupper.

* config/i386/i386.h  (VALID_AVX256_REG_OR_OI_MODE): New.
(AVX_U128): New.
(avx_u128_state): New.
(NUM_MODES_FOR_MODE_SWITCHING): Added AVX_U128_ANY.
(MODE_AFTER): New.
(MODE_ENTRY): New.
(MODE_EXIT): New.
(EMIT_MODE_SET): Changed.
(machine_function): Deleted avx256 fields.

* config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): Deleted.
(define_insn_and_split *call_vzeroupper): Deleted.
(define_insn_and_split *call_rex64_ms_sysv_vzeroupper): Deleted.
(define_insn_and_split *sibcall_vzeroupper): Deleted.
(define_insn_and_split *call_pop_vzeroupper): Deleted.
(define_insn_and_split *sibcall_pop_vzeroupper): Deleted.
(define_insn_and_split *call_value_vzeroupper): Deleted.
(define_insn_and_split *sibcall_value_vzeroupper): Deleted.
(define_insn_and_split *call_value_rex64_ms_sysv_vzeroupper): 
 Deleted.
(define_insn_and_split *call_value_pop_vzeroupper): Deleted.
(define_insn_and_split *sibcall_value_pop_vzeroupper): Deleted.
(define_expand return): Deleted vzeroupper emitting.
(define_expand simple_return): Deleted.

* config/i386/predicates.md (vzeroupper_operation): New.

* config/i386/sse.md (avx_vzeroupper): Changed.

 2012-11-04  Vladimir Yakovlev  vladimir.b.yakov...@intel.com

* gcc.target/i386/avx-vzeroupper-5.c: Changed scan-assembler-times.
gcc.target/i386/avx-vzeroupper-8.c: Likewise.
gcc.target/i386/avx-vzeroupper-9.c: Likewise.
gcc.target/i386/avx-vzeroupper-10.c: Likewise.
gcc.target/i386/avx-vzeroupper-11.c: Likewise.
gcc.target/i386/avx-vzeroupper-12.c: Likewise.
gcc.target/i386/avx-vzeroupper-19.c: Likewis.
gcc.target/i386/avx-vzeroupper-27.c: New.

OK for mainline SVN, please commit.

Thanks,
Uros.

[google] Add attributes: always_patch_for_instrumentation and never_patch_for_instrumentation (issue6821051)

2012-11-05 Thread Harshit Chopra

2012-11-05  Harshit Chopra  hars...@google.com

* gcc/c-family/c-common.c 
(handle_always_patch_for_instrumentation_attribute): Handle
  always_patch_for_instrumentation attribute and turn inlining off for the 
function.
(handle_never_patch_for_instrumentation_attribute): Handle
  never_patch_for_instrumentation attribute of a function.
* gcc/config/i386/i386.c (has_attribute): Checks the presence of an 
attribute.
(check_should_patch_current_function): Takes into account
  always_patch_for_instrumentation or never_patch_for_instrumentation attribute
  when deciding that a function should be patched.
* gcc/testsuite/gcc.target/i386/patch-functions-force-no-patching.c 
(int main): Test case
  to test for never_patch_for_instrumentation attribute.
* gcc/testsuite/gcc.target/i386/patch-functions-force-patching.c: Test 
case to test for
  always_patch_for_instrumentation attribute.

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index ab416ff..04b973f 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -396,6 +396,13 @@ static tree ignore_attribute (tree *, tree, tree, int, 
bool *);
 static tree handle_no_split_stack_attribute (tree *, tree, tree, int, bool *);
 static tree handle_fnspec_attribute (tree *, tree, tree, int, bool *);
 
+static tree handle_always_patch_for_instrumentation_attribute (tree *, tree,
+   tree, int,
+   bool *);
+static tree handle_never_patch_for_instrumentation_attribute (tree *, tree,
+  tree, int,
+  bool *);
+
 static void check_function_nonnull (tree, int, tree *);
 static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT);
 static bool nonnull_check_p (tree, unsigned HOST_WIDE_INT);
@@ -804,6 +811,13 @@ const struct attribute_spec c_common_attribute_table[] =
  The name contains space to prevent its usage in source code.  */
   { fn spec,   1, 1, false, true, true,
  handle_fnspec_attribute, false },
+  { always_patch_for_instrumentation, 0, 0, true,  false, false,
+  
handle_always_patch_for_instrumentation_attribute,
+  false },
+  { never_patch_for_instrumentation, 0, 0, true,  false, false,
+  handle_never_patch_for_instrumentation_attribute,
+  false },
+
   { NULL, 0, 0, false, false, false, NULL, false }
 };
 
@@ -9158,6 +9172,48 @@ handle_no_thread_safety_analysis_attribute (tree *node, 
tree name,
   return NULL_TREE;
 }
 
+/* Handle a always_patch_for_instrumentation attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+handle_always_patch_for_instrumentation_attribute (tree *node, tree name,
+   tree ARG_UNUSED (args),
+   int ARG_UNUSED (flags),
+   bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) == FUNCTION_DECL)
+{
+  /* Disable inlining if forced instrumentation.  */
+  DECL_UNINLINABLE (*node) = 1;
+}
+  else
+{
+  warning (OPT_Wattributes, %qE attribute ignored, name);
+  *no_add_attrs = true;
+}
+  return NULL_TREE;
+}
+
+
+/* Handle a never_patch_for_instrumentation attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+handle_never_patch_for_instrumentation_attribute (tree *node, tree name,
+  tree ARG_UNUSED (args),
+  int ARG_UNUSED (flags),
+  bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+{
+  warning (OPT_Wattributes, %qE attribute ignored, name);
+  *no_add_attrs = true;
+}
+  return NULL_TREE;
+}
+
+
+
 /* Check for valid arguments being passed to a function with FNTYPE.
There are NARGS arguments in the array ARGARRAY.  */
 void
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6972ae6..4e0d770 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10968,6 +10968,13 @@ ix86_expand_epilogue (int style)
returns.  */
 static bool patch_current_function_p = false;
 
+static inline bool
+has_attribute (const char* attribute_name)
+{
+  return lookup_attribute (attribute_name,
+   DECL_ATTRIBUTES (current_function_decl)) != NULL;
+}
+
 /* Return true if we patch the current function. By default a function
is patched if it has loops or if the number of insns is greater than
patch_functions_min_instructions (number of insns roughly translates
@@ -10983,6 +10990,13 @@

Re: [google] Add attributes: always_patch_for_instrumentation and never_patch_for_instrumentation (issue6821051)

2012-11-05 Thread Harshit Chopra

Thanks David for the review. My comments are inline.


On Sat, Nov 3, 2012 at 12:38 PM, Xinliang David Li davi...@google.com wrote:

 Harshit, Nov 5 is the gcc48 cutoff date. If you want to have the x-ray
 instrumentation feature into this release, you will need to port your
 patch and submit for trunk review now.


I am a bit too late now, I guess. If I target for the next release,
will it create any issues for the gcc48 release?




 On Tue, Oct 30, 2012 at 5:15 PM, Harshit Chopra hars...@google.com wrote:
  Adding function attributes: 'always_patch_for_instrumentation' and 
  'never_patch_for_instrumentation' to always patch a function or to never 
  patch a function, respectively, when given the option 
  -mpatch-functions-for-instrumentation. Additionally, the attribute 
  always_patch_for_instrumentation disables inlining of that function.
 
  Tested:
Tested by 'crosstool-validate.py --crosstool_ver=16 --testers=crosstool'
 
  ChangeLog:
 
  2012-10-30  Harshit Chopra hars...@google.com
 
  * gcc/c-family/c-common.c 
  (handle_always_patch_for_instrumentation_attribute): Handle
always_patch_for_instrumentation attribute and turn inlining off for the 
  function.
  (handle_never_patch_for_instrumentation_attribute): Handle 
  never_patch_for_instrumentation
attribute of a function.
  * gcc/config/i386/i386.c (check_should_patch_current_function): 
  Takes into account
always_patch_for_instrumentation or never_patch_for_instrumentation 
  attribute when
deciding that a function should be patched.
  * 
  gcc/testsuite/gcc.target/i386/patch-functions-force-no-patching.c: Test case
to test for never_patch_for_instrumentation attribute.
  * gcc/testsuite/gcc.target/i386/patch-functions-force-patching.c: 
  Test case to
test for always_patch_for_instrumentation attribute.
  * gcc/tree.h (struct GTY): Add fields for the two attributes and 
  macros to access
the fields.
  diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
  index ab416ff..998645d 100644
  --- a/gcc/c-family/c-common.c
  +++ b/gcc/c-family/c-common.c
  @@ -396,6 +396,13 @@ static tree ignore_attribute (tree *, tree, tree, int, 
  bool *);
   static tree handle_no_split_stack_attribute (tree *, tree, tree, int, bool 
  *);
   static tree handle_fnspec_attribute (tree *, tree, tree, int, bool *);
 
  +static tree handle_always_patch_for_instrumentation_attribute (tree *, 
  tree,
  +   tree, int,
  +   bool *);

 Move bool * to the previous line.


If I do that, it goes beyond the 80 char boundary.



  +static tree handle_never_patch_for_instrumentation_attribute (tree *, tree,
  +  tree, int,
  +  bool *);
  +

 Same here.


As above.



   static void check_function_nonnull (tree, int, tree *);
   static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT);
   static bool nonnull_check_p (tree, unsigned HOST_WIDE_INT);
  @@ -804,6 +811,13 @@ const struct attribute_spec c_common_attribute_table[] 
  =
The name contains space to prevent its usage in source code.  */
 { fn spec,   1, 1, false, true, true,
handle_fnspec_attribute, false },
  +  { always_patch_for_instrumentation, 0, 0, true,  false, false,
  +  
  handle_always_patch_for_instrumentation_attribute,
  +  false },
  +  { never_patch_for_instrumentation, 0, 0, true,  false, false,
  +  
  handle_never_patch_for_instrumentation_attribute,
  +  false },
  +
 { NULL, 0, 0, false, false, false, NULL, false }
   };
 
  @@ -9158,6 +9172,48 @@ handle_no_thread_safety_analysis_attribute (tree 
  *node, tree name,
 return NULL_TREE;
   }
 
  +/* Handle a always_patch_for_instrumentation attribute; arguments as in
  +   struct attribute_spec.handler.  */

 Add new line here


Done.



  +static tree
  +handle_always_patch_for_instrumentation_attribute (tree *node, tree name,
  +   tree ARG_UNUSED (args),
  +   int ARG_UNUSED (flags),
  +   bool *no_add_attrs)
  +{
  +  if (TREE_CODE (*node) == FUNCTION_DECL)
  +{
  +  DECL_FORCE_PATCHING_FOR_INSTRUMENTATION (*node) = 1;
  +  DECL_UNINLINABLE (*node) = 1;
  +}
  +  else
  +{
  +  warning (OPT_Wattributes, %qE attribute ignored, name);
  +  *no_add_attrs = true;
  +}
  +  return NULL_TREE;
  +}
  +
  +
  +/* Handle a never_patch_for_instrumentation attribute; arguments as in
  +   struct attribute_spec.handler.  */

 A new line here.


Done

Re: [PATCH 08/10] Factorize condition insertion code out of build_check_stmt

2012-11-05 Thread Dodji Seketeli

Jakub Jelinek ja...@redhat.com writes:

 On Sat, Nov 03, 2012 at 12:03:45AM +0100, Dodji Seketeli wrote:
 +  int fallthrough_probability =
 +then_more_likely_p
 +? PROB_VERY_UNLIKELY
 +: PROB_ALWAYS - PROB_VERY_UNLIKELY;

 Just a formatting nit, I think = needs to go on the next line, so

   int fallthrough_probability
 = then_more_likely_p
   ? PROB_VERY_UNLIKELY
   : PROB_ALWAYS - PROB_VERY_UNLIKELY;

OK.  Patch updated in my local tree.

 No need to repost for that.

OK, thanks.

-- 
Dodji

Re: [google] Add attributes: always_patch_for_instrumentation and never_patch_for_instrumentation (issue6821051)

2012-11-05 Thread Xinliang David Li

It does not hurt to submit the patch for review -- you need to provide
more background and motivation for this work
1) comparison with -finstrument-functions (runtime overhead etc)
2) use model difference (production binary ..)
3) Interesting examples of use cases (with graphs).

thanks,

David

On Mon, Nov 5, 2012 at 12:20 PM, Harshit Chopra hars...@google.com wrote:
 Thanks David for the review. My comments are inline.


 On Sat, Nov 3, 2012 at 12:38 PM, Xinliang David Li davi...@google.com wrote:

 Harshit, Nov 5 is the gcc48 cutoff date. If you want to have the x-ray
 instrumentation feature into this release, you will need to port your
 patch and submit for trunk review now.


 I am a bit too late now, I guess. If I target for the next release,
 will it create any issues for the gcc48 release?




 On Tue, Oct 30, 2012 at 5:15 PM, Harshit Chopra hars...@google.com wrote:
  Adding function attributes: 'always_patch_for_instrumentation' and 
  'never_patch_for_instrumentation' to always patch a function or to never 
  patch a function, respectively, when given the option 
  -mpatch-functions-for-instrumentation. Additionally, the attribute 
  always_patch_for_instrumentation disables inlining of that function.
 
  Tested:
Tested by 'crosstool-validate.py --crosstool_ver=16 --testers=crosstool'
 
  ChangeLog:
 
  2012-10-30  Harshit Chopra hars...@google.com
 
  * gcc/c-family/c-common.c 
  (handle_always_patch_for_instrumentation_attribute): Handle
always_patch_for_instrumentation attribute and turn inlining off for the 
  function.
  (handle_never_patch_for_instrumentation_attribute): Handle 
  never_patch_for_instrumentation
attribute of a function.
  * gcc/config/i386/i386.c (check_should_patch_current_function): 
  Takes into account
always_patch_for_instrumentation or never_patch_for_instrumentation 
  attribute when
deciding that a function should be patched.
  * 
  gcc/testsuite/gcc.target/i386/patch-functions-force-no-patching.c: Test 
  case
to test for never_patch_for_instrumentation attribute.
  * gcc/testsuite/gcc.target/i386/patch-functions-force-patching.c: 
  Test case to
test for always_patch_for_instrumentation attribute.
  * gcc/tree.h (struct GTY): Add fields for the two attributes and 
  macros to access
the fields.
  diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
  index ab416ff..998645d 100644
  --- a/gcc/c-family/c-common.c
  +++ b/gcc/c-family/c-common.c
  @@ -396,6 +396,13 @@ static tree ignore_attribute (tree *, tree, tree, 
  int, bool *);
   static tree handle_no_split_stack_attribute (tree *, tree, tree, int, 
  bool *);
   static tree handle_fnspec_attribute (tree *, tree, tree, int, bool *);
 
  +static tree handle_always_patch_for_instrumentation_attribute (tree *, 
  tree,
  +   tree, int,
  +   bool *);

 Move bool * to the previous line.


 If I do that, it goes beyond the 80 char boundary.



  +static tree handle_never_patch_for_instrumentation_attribute (tree *, 
  tree,
  +  tree, int,
  +  bool *);
  +

 Same here.


 As above.



   static void check_function_nonnull (tree, int, tree *);
   static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT);
   static bool nonnull_check_p (tree, unsigned HOST_WIDE_INT);
  @@ -804,6 +811,13 @@ const struct attribute_spec 
  c_common_attribute_table[] =
The name contains space to prevent its usage in source code.  */
 { fn spec,   1, 1, false, true, true,
handle_fnspec_attribute, false },
  +  { always_patch_for_instrumentation, 0, 0, true,  false, false,
  +  
  handle_always_patch_for_instrumentation_attribute,
  +  false },
  +  { never_patch_for_instrumentation, 0, 0, true,  false, false,
  +  
  handle_never_patch_for_instrumentation_attribute,
  +  false },
  +
 { NULL, 0, 0, false, false, false, NULL, false }
   };
 
  @@ -9158,6 +9172,48 @@ handle_no_thread_safety_analysis_attribute (tree 
  *node, tree name,
 return NULL_TREE;
   }
 
  +/* Handle a always_patch_for_instrumentation attribute; arguments as in
  +   struct attribute_spec.handler.  */

 Add new line here


 Done.



  +static tree
  +handle_always_patch_for_instrumentation_attribute (tree *node, tree name,
  +   tree ARG_UNUSED (args),
  +   int ARG_UNUSED (flags),
  +   bool *no_add_attrs)
  +{
  +  if (TREE_CODE (*node) == FUNCTION_DECL)
  +{
  +

Re: [patch, testsuite, mips] Fix gcc.dg/torture/mips-sdata-1.c

2012-11-05 Thread Richard Sandiford

Steve Ellcey  sell...@mips.com writes:
 diff --git a/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c 
 b/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
 index 8ffd4d8..53c9e4f 100644
 --- a/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
 +++ b/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
 @@ -1,6 +1,7 @@
  /* Check that sdata-accesses are applied regardless of size or ABI.  */
  /* { dg-options -mexplicit-relocs } */
  /* { dg-do compile { target mips*-*-elf* } } */
 +/* { dg-skip-if  { *-*-* } { -fno-fat-lto-objects } {  } } */
  
  struct s { int x[4]; };
  struct s my_struct __attribute__((__section__(.sdata)));

The other tests that hit this problem have -ffat-lto-objects in the
dg-options line, so we might as well do the same here for consistency.
That's OK if it works.

Thanks,
Richard

Re: [patch, testsuite, mips] Fix gcc.dg/torture/mips-sdata-1.c

2012-11-05 Thread Richard Sandiford

Richard Sandiford rdsandif...@googlemail.com writes:
 Steve Ellcey  sell...@mips.com writes:
 diff --git a/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c 
 b/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
 index 8ffd4d8..53c9e4f 100644
 --- a/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
 +++ b/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
 @@ -1,6 +1,7 @@
  /* Check that sdata-accesses are applied regardless of size or ABI.  */
  /* { dg-options -mexplicit-relocs } */
  /* { dg-do compile { target mips*-*-elf* } } */
 +/* { dg-skip-if  { *-*-* } { -fno-fat-lto-objects } {  } } */
  
  struct s { int x[4]; };
  struct s my_struct __attribute__((__section__(.sdata)));

 The other tests that hit this problem have -ffat-lto-objects in the
 dg-options line, so we might as well do the same here for consistency.
 That's OK if it works.

Sorry, scratch that.  I'd missed that it wasn't in gcc.target/mips
(although arguably it should be).

This sort of thing should usually be handled automatically by
scan-assembler, and is for me:

/foo/gcc/xgcc -B/foo/gcc/ /bar/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c  
-fno-diagnostics-show-caret  -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects 
-mexplicit-relocs -ffat-lto-objects -S   -isystem 
/foo/mipsisa64-elf/soft-float/newlib/targ-include -isystem 
/bar/newlib/libc/include  -EB -msoft-float -o mips-sdata-1.s

So the test passes all variations here.  I wonder what's different in
your case?

Richard

[PATCH] fix libgomp.c++/pr24455.C failures on darwin

2012-11-05 Thread Jack Howarth

Currently the following testcases are failing on x86_64-apple-darwin11/12...

FAIL: libgomp.c++/pr24455.C  -O0  (test for excess errors)
WARNING: libgomp.c++/pr24455.C  -O0  compilation failed to produce executable
FAIL: libgomp.c++/pr24455.C  -O1  (test for excess errors)
WARNING: libgomp.c++/pr24455.C  -O1  compilation failed to produce executable
FAIL: libgomp.c++/pr24455.C  -O2  (test for excess errors)
WARNING: libgomp.c++/pr24455.C  -O2  compilation failed to produce executable
FAIL: libgomp.c++/pr24455.C  -O3 -fomit-frame-pointer  (test for excess errors)
WARNING: libgomp.c++/pr24455.C  -O3 -fomit-frame-pointer  compilation failed to 
produce executable
FAIL: libgomp.c++/pr24455.C  -O3 -fomit-frame-pointer -funroll-loops  (test for 
excess errors)
WARNING: libgomp.c++/pr24455.C  -O3 -fomit-frame-pointer -funroll-loops  
compilation failed to produce executable
FAIL: libgomp.c++/pr24455.C  -O3 -fomit-frame-pointer -funroll-all-loops 
-finline-functions  (test for excess errors)
WARNING: libgomp.c++/pr24455.C  -O3 -fomit-frame-pointer -funroll-all-loops 
-finline-functions  compilation failed to produce executable
FAIL: libgomp.c++/pr24455.C  -O3 -g  (test for excess errors)
WARNING: libgomp.c++/pr24455.C  -O3 -g  compilation failed to produce executable
FAIL: libgomp.c++/pr24455.C  -Os  (test for excess errors)
WARNING: libgomp.c++/pr24455.C  -Os  compilation failed to produce executable
FAIL: libgomp.c++/tls-init1.C  -O  (test for excess errors)
WARNING: libgomp.c++/tls-init1.C  -O  compilation failed to produce executable

These can be fixed for darwin by passing -Wl,-undefined,dynamic_lookup on 
dg-options as has been
recommended by the darwin linker maintainer. Note that this should work for 
Xcode 3.2.6 and Xcode 4.4.1
and later. Between Xcode 4.2 and 4.4.0, radr://10466868, -undefined 
dynamic_lookup linker bug, broke
weak symbol dynamic lookup in the linker. Tested on x86_64-apple-darwin12. Okay 
for gcc trunk?
 Jack

libgomp/

2012-11-05  Jack Howarth howa...@bromo.med.uc.edu

* testsuite/libgomp.c++/pr24455.C: Use -Wl,-undefined,dynamic_lookup on 
darwin.

Index: libgomp/testsuite/libgomp.c++/pr24455.C
===
--- libgomp/testsuite/libgomp.c++/pr24455.C (revision 193168)
+++ libgomp/testsuite/libgomp.c++/pr24455.C (working copy)
@@ -1,6 +1,7 @@
 // { dg-do run }
 // { dg-additional-sources pr24455-1.C }
 // { dg-require-effective-target tls_runtime }
+// { dg-options -Wl,-undefined,dynamic_lookup { target *-*-darwin* } }
 
 extern C void abort (void);

Re: [PATCH Version 2][RFA]Improving register pressure directed hoist

2012-11-05 Thread Jeff Law


On 11/02/2012 02:34 AM, Bin Cheng wrote:



Also I don't understand why the bogus patch can catch more hoist
opportunities and improve code size, so please help if you have any idea
about this.
Well, perturbing the code, particularly in a way which is supposed to 
change the amount of register pressure is certainly going to affect the 
allocators and reload.


It shouldn't be that hard to look at results which differ between the 
two patches and analyze why they're different.




2012-11-02  Bin Chengbin.ch...@arm.com

* gcse.c: (struct bb_data): Add new fields, old_pressure, live_in
and backup.
(calculate_bb_reg_pressure): Initialize live_in and backup.
(update_bb_reg_pressure): New.
(should_hoist_expr_to_dom): Add new parameter from.
Monitor the change of reg pressure and use it to drive hoisting.
(hoist_code): Update LIVE and reg pressure information.

gcc/testsuite/ChangeLog
2012-11-02  Bin Chengbin.ch...@arm.com

* gcc.dg/hoist-register-pressure-3.c: New test.


+/* Update register pressure for BB when hoisting an expression from
+   instruction FROM, if live ranges of inputs are shrunk.  Also
+   maintain live_in information if live range of register referred
+   in FROM is shrunk.
+
+   Return 0 if register pressure doesn't change, otherwise return
+   the number by which register pressure is decreased.
+
+   NOTE: Register pressure won't be increased in this function.  */
+
+static int
+update_bb_reg_pressure (basic_block bb, rtx from,
+   enum reg_class pressure_class, int nregs)
+{
+  rtx dreg, insn;
+  basic_block succ_bb;
+  df_ref *op, op_ref;
+  edge succ;
+  edge_iterator ei;
+  int decreased_pressure = 0;
+
+  for (op = DF_INSN_USES (from); *op; op++)
+{
+  dreg = DF_REF_REAL_REG (*op);
+  /* The live range of register is shrunk only if it isn't:
+1. referred on any path from the end of this block to EXIT, or
+2. referred by insns other than FROM in this block.  */
+  FOR_EACH_EDGE (succ, ei, bb-succs)
+   {
+ succ_bb = succ-dest;
+ if (succ_bb == EXIT_BLOCK_PTR)
+   continue;
+
+ if (bitmap_bit_p (BB_DATA (succ_bb)-live_in, REGNO (dreg)))
+   break;
+   }
+  if (succ != NULL)
+   continue;
+
+  op_ref = DF_REG_USE_CHAIN (REGNO (dreg));
+  for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref))
+   {
+ if (!DF_REF_INSN_INFO (op_ref))
+   continue;
+
+ insn = DF_REF_INSN (op_ref);
+ if (BLOCK_FOR_INSN (insn) == bb
+  NONDEBUG_INSN_P (insn)  insn != from)
+   break;
+   }
+
+  /* Decrease register pressure and update live_in information for
+this block.  */
+  if (!op_ref)
+   {
+ decreased_pressure += nregs;
+ BB_DATA (bb)-max_reg_pressure[pressure_class] -= nregs;
+ bitmap_clear_bit (BB_DATA (bb)-live_in, REGNO (dreg));
+   }
+}
+  return decreased_pressure;
So we're looking to see if any of the registers used in FROM are used 
after from.  If none are used, then we decrease the register pressure by 
nregs which appears to be a property of the the registers *set* in FROM. 
 Is seems like there's some inconsistency here.  Or have I 
misunderstood something?


I'm not sure how much it matters in practice, except perhaps for 
conversions and the like where the source and destination operands are 
different modes.

jeff

Re: exception safety validation

2012-11-05 Thread François Dumont


I apply the attached patch.

2012-10-05  François Dumont  fdum...@gcc.gnu.org

* include/ext/throw_allocator.h (__throw_value_base): Add move
semantic, not throwing.
(__throw_value_limit): Likewise.
(__throw_value_random): Likewise.
* testsuite/util/exception/safety.h: Add validation of C++11
methods emplace/emplace_front/emplace_back/emplace_hint.
* testsuite/util/testsuite_container_traits.h: Signal emplace
support on deque, forward_list, list and vector.
* testsuite/23_containers/deque/requirements/exception/
propagation_consistent.cc: Remove dg-do run fail.

However I would still be interested in the original questions:
- why are the try{}catch(const __gnu_cxx::forced_error) { throw; } for ?
- does the Standard says that move constructor shall not throw ?

François


On 11/04/2012 05:04 PM, Jonathan Wakely wrote:

Ooh, I almost forgot, please remember to update the copyright years in
the changed files.  For trunk you can change them to ranges like
2005-2012

On 4 November 2012 16:01, Jonathan Wakely jwakely@gmail.com wrote:

On 3 November 2012 19:05, Paolo Carlini wrote:

On 11/03/2012 03:43 PM, François Dumont wrote:

Note that I run exception tests to validate it and the recently introduced
23_containers/forward_list/allocator/noexcept.cc test doesn't work in debug
mode. I think it needs a dg-require-normal-node attribute.

... if you reach an agreement about the right thing to do concerning this
failure, I think the patch could otherwise go in.

Yes, the patch looks good, please commit it and  I'll look into the
forward_list failure, since my changes caused it.

Thanks.


Index: include/ext/throw_allocator.h
===
--- include/ext/throw_allocator.h	(revision 193118)
+++ include/ext/throw_allocator.h	(working copy)
@@ -1,7 +1,6 @@
 // -*- C++ -*-
 
-// Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
-// Free Software Foundation, Inc.
+// Copyright (C) 2005-2012 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the terms
@@ -467,6 +466,11 @@
   throw_value_base(const throw_value_base __v) : _M_i(__v._M_i)
   { throw_conditionally(); }
 
+#ifdef __GXX_EXPERIMENTAL_CXX0X__
+  // Shall not throw.
+  throw_value_base(throw_value_base) = default;
+#endif
+
   explicit throw_value_base(const std::size_t __i) : _M_i(__i)
   { throw_conditionally(); }
 #endif
@@ -479,7 +483,13 @@
 	return *this;
   }
 
+#ifdef __GXX_EXPERIMENTAL_CXX0X__
+  // Shall not throw.
   throw_value_base
+  operator=(throw_value_base) = default;
+#endif
+
+  throw_value_base
   operator++()
   {
 	throw_conditionally();
@@ -568,8 +578,24 @@
 throw_value_limit(const throw_value_limit __other)
 : base_type(__other._M_i) { }
 
+#ifdef __GXX_EXPERIMENTAL_CXX0X__
+throw_value_limit(throw_value_limit) = default;
+#endif
+
 explicit throw_value_limit(const std::size_t __i) : base_type(__i) { }
 #endif
+
+throw_value_limit
+operator=(const throw_value_limit __other)
+{
+  base_type::operator=(__other);
+  return *this;
+}
+
+#ifdef __GXX_EXPERIMENTAL_CXX0X__
+throw_value_limit
+operator=(throw_value_limit) = default;
+#endif
   };
 
   /// Type throwing via random condition.
@@ -583,9 +609,24 @@
 throw_value_random(const throw_value_random __other)
 : base_type(__other._M_i) { }
 
+#ifdef __GXX_EXPERIMENTAL_CXX0X__
+throw_value_random(throw_value_random) = default;
+#endif
 
 explicit throw_value_random(const std::size_t __i) : base_type(__i) { }
 #endif
+
+throw_value_random
+operator=(const throw_value_random __other)
+{
+  base_type::operator=(__other);
+  return *this;
+}
+
+#ifdef __GXX_EXPERIMENTAL_CXX0X__
+throw_value_random
+operator=(throw_value_random) = default;
+#endif
   };
 
 
Index: testsuite/util/testsuite_container_traits.h
===
--- testsuite/util/testsuite_container_traits.h	(revision 193118)
+++ testsuite/util/testsuite_container_traits.h	(working copy)
@@ -1,6 +1,6 @@
 // -*- C++ -*-
 
-// Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+// Copyright (C) 2009-2012 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the terms
@@ -73,6 +73,7 @@
   typedef std::true_type	has_insert;
   typedef std::true_type	has_push_pop;
   typedef std::true_type	has_size_type_constructor;
+  typedef std::true_type	has_emplace;
 };
 
   templatetypename _Tp1, typename _Tp2
@@ -85,6 +86,7 @@
   typedef std::true_type	has_insert_after;
   typedef std::true_type	has_push_pop;
   typedef std::true_type	has_size_type_constructor;
+  typedef std::true_type

Re: [v3] Fix allocator-aware container requirements for forward_list

2012-11-05 Thread Jonathan Wakely

This updates the debug-mode and profile-mode forward_list code to
match my recent allocator changes to the default mode.  It also adds
checking for unswappable allocators, and fixes some tests that those
new checks showed were broken.

* include/profile/forward_list: Update to meet allocator-aware
requirements.
* include/debug/forward_list: Likewise.
* include/debug/vector: Verify allocators are swapped or equal.
* include/debug/macros.h (__glibcxx_check_equal_allocs): Define.
* include/debug/formatter.h: Add new debug message.
* src/c++11/debug.cc: Likewise.
* testsuite/23_containers/forward_list/allocator/swap.cc: Do not
swap containers with non-propagating, non-equal allocators.
* testsuite/23_containers/vector/allocator/swap.cc: Likewise.

Tested x86_64-linux, committed to trunk.
commit e537dbf92d79d68a4ff2f15864183da1cf17c821
Author: Jonathan Wakely jwakely@gmail.com
Date:   Mon Nov 5 00:15:12 2012 +

* include/profile/forward_list: Update to meet allocator-aware
requirements.
* include/debug/forward_list: Likewise.
* include/debug/vector: Verify allocators are swapped or equal.
* include/debug/macros.h (__glibcxx_check_equal_allocs): Define.
* include/debug/formatter.h: Add new debug message.
* src/c++11/debug.cc: Likewise.
* testsuite/23_containers/forward_list/allocator/swap.cc: Do not
swap containers with non-propagating, non-equal allocators.
* testsuite/23_containers/vector/allocator/swap.cc: Likewise.

diff --git a/libstdc++-v3/include/debug/formatter.h 
b/libstdc++-v3/include/debug/formatter.h
index 1d29d8c..d622ed1 100644
--- a/libstdc++-v3/include/debug/formatter.h
+++ b/libstdc++-v3/include/debug/formatter.h
@@ -114,7 +114,8 @@ namespace __gnu_debug
 __msg_self_move_assign,
 // unordered container buckets
 __msg_bucket_index_oob,
-__msg_valid_load_factor
+__msg_valid_load_factor,
+__msg_equal_allocs
   };
 
   class _Error_formatter
diff --git a/libstdc++-v3/include/debug/forward_list 
b/libstdc++-v3/include/debug/forward_list
index 8ad4336..61ae6ed 100644
--- a/libstdc++-v3/include/debug/forward_list
+++ b/libstdc++-v3/include/debug/forward_list
@@ -49,6 +49,12 @@ namespace __debug
 
   typedef typename _Base::iterator   _Base_iterator;
   typedef typename _Base::const_iterator _Base_const_iterator;
+
+  typedef typename __gnu_cxx::__alloc_traits_Alloc::template
+rebind_GLIBCXX_STD_C::_Fwd_list_node_Tp::other _Node_alloc_type;
+
+  typedef __gnu_cxx::__alloc_traits_Node_alloc_type _Node_alloc_traits;
+
 public:
   typedef typename _Base::reference reference;
   typedef typename _Base::const_reference   const_reference;
@@ -78,12 +84,15 @@ namespace __debug
   forward_list(forward_list __list, const _Alloc __al)
   : _Base(std::move(__list._M_base()), __al)
   {
-   this-_M_swap(__list);
+   if (__list.get_allocator() == __al)
+ this-_M_swap(__list);
+   else
+ __list._M_invalidate_all();
   }
 
   explicit
-  forward_list(size_type __n)
-  : _Base(__n)
+  forward_list(size_type __n, const _Alloc __al = _Alloc())
+  : _Base(__n, __al)
   { }
 
   forward_list(size_type __n, const _Tp __value,
@@ -128,12 +137,17 @@ namespace __debug
 
   forward_list
   operator=(forward_list __list)
+  noexcept(_Node_alloc_traits::_S_nothrow_move())
   {
-   // NB: DR 1204.
-   // NB: DR 675.
__glibcxx_check_self_move_assign(__list);
-   clear();
-   swap(__list);
+   bool xfer_memory = _Node_alloc_traits::_S_propagate_on_move_assign()
+   || __list.get_allocator() == this-get_allocator();
+   static_cast_Base(*this) = std::move(__list);
+   if (xfer_memory)
+ this-_M_swap(__list);
+   else
+ this-_M_invalidate_all();
+   __list._M_invalidate_all();
return *this;
   }
 
@@ -333,7 +347,10 @@ namespace __debug
 
   void
   swap(forward_list __list)
+  noexcept(_Node_alloc_traits::_S_nothrow_swap())
   {
+   if (!_Node_alloc_traits::_S_propagate_on_swap())
+ __glibcxx_check_equal_allocs(__list);
_Base::swap(__list);
this-_M_swap(__list);
   }
diff --git a/libstdc++-v3/include/debug/macros.h 
b/libstdc++-v3/include/debug/macros.h
index 3df0c9b..30606d5 100644
--- a/libstdc++-v3/include/debug/macros.h
+++ b/libstdc++-v3/include/debug/macros.h
@@ -333,6 +333,11 @@ _GLIBCXX_DEBUG_VERIFY(_F  0.0f,   
\
  _M_message(__gnu_debug::__msg_valid_load_factor)  \
   ._M_sequence(*this, this))
 
+#define __glibcxx_check_equal_allocs(_Other)   \
+_GLIBCXX_DEBUG_VERIFY(this-get_allocator() == _Other.get_allocator(), \
+ _M_message(__gnu_debug::__msg_equal_allocs)

Re: [patch, testsuite, mips] Fix gcc.dg/torture/mips-sdata-1.c

2012-11-05 Thread Steve Ellcey

On Mon, 2012-11-05 at 20:38 +, Richard Sandiford wrote:

 This sort of thing should usually be handled automatically by
 scan-assembler, and is for me:
 
 /foo/gcc/xgcc -B/foo/gcc/ /bar/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c  
 -fno-diagnostics-show-caret 
 -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects -mexplicit-relocs 
 -ffat-lto-objects -S  
 -isystem /foo/mipsisa64-elf/soft-float/newlib/targ-include -isystem 
 /bar/newlib/libc/include  -EB
 -msoft-float -o mips-sdata-1.s
 
 So the test passes all variations here.  I wonder what's different in
 your case?
 
 Richard

Well that is weird.  I took out my change and I still see the error:

Executing on
host: /local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/xgcc
-B/local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/ 
/local/home/sellcey/nightly/src/gcc/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c 
 -fno-diagnostics-show-caret   -O2 -flto -fuse-linker-plugin 
-fno-fat-lto-objects  -mexplicit-relocs -S-o mips-sdata-1.s(timeout = 
300)
spawn /local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/xgcc
-B/local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/ 
/local/home/sellcey/nightly/src/gcc/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c 
-fno-diagnostics-show-caret -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects 
-mexplicit-relocs -S -o mips-sdata-1.s^M
PASS: gcc.dg/torture/mips-sdata-1.c  -O2 -flto -fuse-linker-plugin
-fno-fat-lto-objects  (test for excess errors)
FAIL: gcc.dg/torture/mips-sdata-1.c  -O2 -flto -fuse-linker-plugin
-fno-fat-lto-objects   scan-assembler gp_?rel\\(my_struct


Do you know where the -ffat-lto-objects option in your run came from?  I
don't have that in my results and my tree is up-to-date.

Steve

[patch] Fix PR tree-optimization/54986

2012-11-05 Thread Eric Botcazou

Hi,

this is a regression present on the 4.7 branch and caused by my fix for 
another regression: http://gcc.gnu.org/ml/gcc-patches/2012-07/msg00825.html

It turns out that canonicalize_constructor_val has side effects: on the 4.7 
branch, it calls add_referenced_var on the base variable of an address (it 
does something similar on mainline) and not calling it for the testcase leads 
to a segfault later because the var_ann of a global variable doesn't exist.

So the STRIP_NOPS is needed for the pattern matching code to work.  That's why 
the attached patch adds it back but makes it so that an appropriate cast is 
added back at the end if necessary.

Bootstrapped/regtested on x86_64-suse-linux, OK for mainline and 4.7 branch?


2012-11-05  Eric Botcazou  ebotca...@adacore.com

PR tree-optimization/54986
* gimple-fold.c (canonicalize_constructor_val): Strip again all no-op
conversions on entry but add them back on exit if needed.


2012-11-05  Eric Botcazou  ebotca...@adacore.com

* g++.dg/torture/20121005-1.C: New test.


-- 
Eric BotcazouIndex: gimple-fold.c
===
--- gimple-fold.c	(revision 193090)
+++ gimple-fold.c	(working copy)
@@ -139,7 +139,8 @@ can_refer_decl_in_current_unit_p (tree d
 tree
 canonicalize_constructor_val (tree cval, tree from_decl)
 {
-  STRIP_USELESS_TYPE_CONVERSION (cval);
+  tree orig_cval = cval;
+  STRIP_NOPS (cval);
   if (TREE_CODE (cval) == POINTER_PLUS_EXPR
TREE_CODE (TREE_OPERAND (cval, 1)) == INTEGER_CST)
 {
@@ -182,8 +183,12 @@ canonicalize_constructor_val (tree cval,
   /* Fixup types in global initializers.  */
   if (TREE_TYPE (TREE_TYPE (cval)) != TREE_TYPE (TREE_OPERAND (cval, 0)))
 	cval = build_fold_addr_expr (TREE_OPERAND (cval, 0));
+
+  if (!useless_type_conversion_p (TREE_TYPE (orig_cval), TREE_TYPE (cval)))
+	cval = fold_convert (TREE_TYPE (orig_cval), cval);
+  return cval;
 }
-  return cval;
+  return orig_cval;
 }
 
 /* If SYM is a constant variable with known value, return the value.
/* PR tree-optimization/54986 */
/* Reported by Remi Vanicat vani...@debian.org */
/* Reduced testcase by Markus Trippelsdorf mar...@trippelsdorf.de */

struct A;
struct B
{
  int *_ptr;
  bool operator==(B *p1)
  {
return p1-_ptr;
  }
};
struct C {
  A* ref_SYMBptr();
};
struct A
{
  B sommet;
};
typedef C *gen_op_context;
struct D
{
  D(gen_op_context) {}
};

D c(0);
const long d = (long)c;
B *const   e = (B *)d;

static bool
fn1(C p1)
{
  return p1.ref_SYMBptr()-sommet == e;
}

void
fn2()
{
  C b;
  fn1(b);
}

Re: [patch] Fix PR tree-optimization/54986

2012-11-05 Thread Jakub Jelinek

On Mon, Nov 05, 2012 at 10:16:31PM +0100, Eric Botcazou wrote:
 Bootstrapped/regtested on x86_64-suse-linux, OK for mainline and 4.7 branch?
 
 
 2012-11-05  Eric Botcazou  ebotca...@adacore.com
 
   PR tree-optimization/54986
   * gimple-fold.c (canonicalize_constructor_val): Strip again all no-op
   conversions on entry but add them back on exit if needed.
 
 
 2012-11-05  Eric Botcazou  ebotca...@adacore.com
 
   * g++.dg/torture/20121005-1.C: New test.

Ok, thanks.

Jakub

Re: [patch, testsuite, mips] Fix gcc.dg/torture/mips-sdata-1.c

2012-11-05 Thread Richard Sandiford

Steve Ellcey sell...@mips.com writes:
 On Mon, 2012-11-05 at 20:38 +, Richard Sandiford wrote:
 This sort of thing should usually be handled automatically by
 scan-assembler, and is for me:
 
 /foo/gcc/xgcc -B/foo/gcc/ /bar/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c  
 -fno-diagnostics-show-caret 
 -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects -mexplicit-relocs 
 -ffat-lto-objects -S  
 -isystem /foo/mipsisa64-elf/soft-float/newlib/targ-include -isystem 
 /bar/newlib/libc/include  -EB
 -msoft-float -o mips-sdata-1.s
 
 So the test passes all variations here.  I wonder what's different in
 your case?
 
 Richard

 Well that is weird.  I took out my change and I still see the error:

 Executing on
 host: /local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/xgcc
 -B/local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/ 
 /local/home/sellcey/nightly/src/gcc/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
   -fno-diagnostics-show-caret   -O2 -flto -fuse-linker-plugin 
 -fno-fat-lto-objects  -mexplicit-relocs -S-o mips-sdata-1.s(timeout = 
 300)
 spawn /local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/xgcc
 -B/local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/ 
 /local/home/sellcey/nightly/src/gcc/gcc/testsuite/gcc.dg/torture/mips-sdata-1.c
  -fno-diagnostics-show-caret -O2 -flto -fuse-linker-plugin 
 -fno-fat-lto-objects -mexplicit-relocs -S -o mips-sdata-1.s^M
 PASS: gcc.dg/torture/mips-sdata-1.c  -O2 -flto -fuse-linker-plugin
 -fno-fat-lto-objects  (test for excess errors)
 FAIL: gcc.dg/torture/mips-sdata-1.c  -O2 -flto -fuse-linker-plugin
 -fno-fat-lto-objects   scan-assembler gp_?rel\\(my_struct


 Do you know where the -ffat-lto-objects option in your run came from?  I
 don't have that in my results and my tree is up-to-date.

gcc.dg.exp has:

if [check_effective_target_lto] {
# When having plugin test both slim and fat LTO and plugin/nonplugin
# path.
if [check_linker_plugin_available] {
  set LTO_TORTURE_OPTIONS [list \
  { -O2 -flto -fno-use-linker-plugin -flto-partition=none } \
  { -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects }
  ]
  set gcc_force_conventional_output -fcfat-lto-objects
} else {
  set LTO_TORTURE_OPTIONS [list \
  { -O2 -flto -flto-partition=none } \
  { -O2 -flto }
  ]
}
}

and scanasm.exp has:

proc scan-assembler_required_options { args } {
global gcc_force_conventional_output
return $gcc_force_conventional_output
}

(similarly for scan-assembler-not, etc.).  Those options are then
supposed to be added by gcc.dg.exp:gcc-dg-test-1:

# Let { dg-final { action } } force options as returned by an
# optional proc ${action}_required_options.
upvar 2 dg-final-code finalcode
foreach x [split $finalcode \n] {
set finalcmd [lindex $x 0]
if { [info procs ${finalcmd}_required_options] !=  } {
set req [${finalcmd}_required_options]
if { $req !=  } {
lappend extra_tool_flags $req
}
}
}

if { $extra_tool_flags !=  } {
lappend options additional_flags=$extra_tool_flags
}

set comp_output [$target_compile $prog $output_file $compile_type 
$options]

Does it work for you if you run it separately?  E.g. with:

make check-gcc RUNTESTFLAGS=dg-torture.exp=mips-sdata-1.c

Richard

Re: [patch, testsuite, mips] Fix gcc.dg/torture/mips-sdata-1.c

2012-11-05 Thread Steve Ellcey

On Mon, 2012-11-05 at 21:28 +, Richard Sandiford wrote:

 Does it work for you if you run it separately?  E.g. with:
 
 make check-gcc RUNTESTFLAGS=dg-torture.exp=mips-sdata-1.c
 
 Richard

Hm, I configured GCC with '--enable-languages=c,c++', I think this might
set ENABLE_LTO to '0' and cause check_effective_target_lto to return
FALSE and cause the -ffat-lto-objects flag to not be added.  Of course
this raises the question of why it is trying to do lto testing if I
didn't configure it in.  I will add 'lto' to my configure line and see
if that fixes the problem.

Steve Ellcey
sell...@mips.com

[v3] 55028

2012-11-05 Thread Benjamin De Kosnik


Missing exports for unordered_* containers in debug mode. 

The patches between mainline/4.7 are a slight bit different, but
equivalent.

tested x86/linux
tested x86/linux --enable-symvers=gnu-versioned-namespace
2012-11-05  Benjamin Kosnik  b...@redhat.com
Oleg Smolsky  o...@smolsky.net

	PR libstdc++/55028
	*  config/abi/pre/gnu-versioned-namespace.ver: Add symbols.
	* testsuite/23_containers/unordered_multimap/insert/55028-debug.cc: New.

diff --git a/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver b/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver
index 84210e4..d13c059 100644
--- a/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver
+++ b/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver
@@ -64,6 +64,7 @@ GLIBCXX_7.0 {
 # vtable
 _ZTVSt*;
 _ZTVNSt*;
+_ZTVN9__gnu_cxx3__718stdio_sync_filebufI[cw]NSt3__711char_traitsI[cw];
 
 # thunk
 _ZTv0_n24_NS*;
@@ -145,6 +146,14 @@ GLIBCXX_7.0 {
 _ZNK11__gnu_debug16_Error_formatter8_M_error*;
 _ZNK11__gnu_debug16_Error_formatter17_M_get_max_lengthEv;
 
+# __gnu_debug::_Safe_unordered_container_base
+# __gnu_debug::_Safe_local_iterator_base
+_ZN11__gnu_debug30_Safe_unordered_container_base7_M_swapERS0_;
+_ZN11__gnu_debug30_Safe_unordered_container_base13_M_detach_allEv;
+_ZN11__gnu_debug25_Safe_local_iterator_base9_M_attachEPNS_19_Safe_sequence_baseEb;
+_ZN11__gnu_debug25_Safe_local_iterator_base9_M_detachEv;
+
+
 # parallel mode
 _ZN14__gnu_parallel9_Settings3getEv;
 _ZN14__gnu_parallel9_Settings3setERS0_;
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_multimap/insert/55028-debug.cc b/libstdc++-v3/testsuite/23_containers/unordered_multimap/insert/55028-debug.cc
new file mode 100644
index 000..fc17256
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/unordered_multimap/insert/55028-debug.cc
@@ -0,0 +1,40 @@
+// { dg-options -std=gnu++0x -D_GLIBCXX_DEBUG }
+//
+// Copyright (C) 2012 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// http://www.gnu.org/licenses/.
+
+// libstdc++/55028
+#include string
+#include unordered_map
+#include testsuite_hooks.h
+
+struct MyType
+{ };
+
+void test()
+{
+  // using MyMap = std::multimapstd::string, MyType *; // works
+  using MyMap = std::unordered_multimapstd::string, MyType*; // fails to link
+  MyMap m;
+  m.insert(std::make_pair(std::string(blah), new MyType));
+}
+
+int main()
+{
+  test();
+  return 0;
+}

RE: [Ping]FW: [PATCH] Cilk Plus merging to trunk (2 of n)

2012-11-05 Thread Iyer, Balaji V

Hello Joseph,
Here is the fixed patch with all your changes and the ChangeLog entries 
below.

gcc/ChangeLog
2012-11-05  Balaji V. Iyer  balaji.v.i...@intel.com

* Makefile.in (C_COMMON_OBJS): Added c-family/array-notation-common.o.
* doc/passes.texi (Cilk Plus Transformation): Documented array
notation and overall transformations for Cilk Plus.
* doc/invoke.texi (C Dialect Options): Documented -fcilkplus flag.
* doc/generic.texi (Storage References): Documented ARRAY_NOTATION_REF
tree addition.

gcc/c-family/ChangeLog
2012-11-05  Balaji V. Iyer  balaji.v.i...@intel.com

* c-common.h (build_array_notation_expr): New function declaration.
(ARRAY_NOTATION_ARRAY): Added new #define.
(ARRAY_NOTATION_CHECK): Likewise.
(ARRAY_NOTATION_START): Likewise.
(ARRAY_NOTATION_LENGTH): Likewise.
(ARRAY_NOTATION_STRIDE): Likewise.
(ARRAY_NOTATION_TYPE): Likewise.
(enum array_notation_reduce_type): Added new enumerator.
* c-common.def: Added new tree ARRAY_NOTATION_REF.
* c-common.c (c_define_builtins): Added a call to initialize array
notation builtin functions.
(c_common_init_ts): Set ARRAY_NOTATION_REF as typed.
* c-pretty-print.c (pp_c_postfix_expression): Added ARRAY_NOTATION_REF
case.
* c.opt (-fcilkplus): Define new command line switch.
* array-notation-common.c: New file.

gcc/c/ChangeLog
2012-11-05  Balaji V. Iyer  balaji.v.i...@intel.com

* c-typeck.c (build_array_ref): Added a check to see if array's index
is greater than one.  If true, then emit an error.
(build_function_call_vec): Exclude error reporting  checking for
builtin array-notation functions.
(convert_arguments): Likewise.
(c_finish_return): Added a check for array notations as a return
expression.  If true, then emit an error.
(c_finish_loop): Added a check for array notations in a loop condition.
If true then emit an error.
(lvalue_p): Added a ARRAY_NOTATION_REF case.
* Make-lang.in (C_AND_OBJC_OBJS): Added c-array-notation.o.
* c-parser.c (c_parser_compound_statement): Check if array notation code
is used in tree, if so, then transform them into appropriate C code.
(c_parser_expr_no_commas): Check if array notation is used in LHS or
RHS, if so, then build array notation expression instead of regular
modify.
(c_parser_postfix_expression_after_primary): Added a check for colon(s)
after square braces, if so then handle it like an array notation.  Also,
break up array notations in unary op if found.
(c_parser_direct_declarator_inner): Added a check for array notation.
(c_parser_compound_statement): Added a check for array notation in a
stmt.  If one is present, then expand array notation expr.
(c_parser_if_statement): Likewise.
(c_parser_switch_statement): Added a check for array notations in a
switch statement's condition.  If true, then output an error.
(c_parser_while_statement): Same as switch statement, but for a while.
(c_parser_do_statement): Same as switch statement, but for a do-while.
(c_parser_for_statement): Same as switch statement, but for a for-loop.
(c_parser_unary_expression): Check if array notation is used in a
pre-increment or pre-decrement expression.  If true, then expand them.
(c_parser_array_notation): New function.
* c-array-notation.c: New file.

gcc/testsuite/ChangeLog
2012-11-05  Balaji V. Iyer  balaji.v.i...@intel.com

* gcc.dg/cilk-plus/array_notation/execute/execute.exp: New script.
* gcc.dg/cilk-plus/array_notation/compile/compile.exp: Likewise.
* gcc.dg/cilk-plus/array_notation/errors/errors.exp: Likewise.
* gcc.dg/cilk-plus/array_notation/execute/sec_implicit_ex.c: New test.
* gcc.dg/cilk-plus/array_notation/execute/comma_exp.c: Likewise.
* gcc.dg/cilk-plus/array_notation/execute/conditional.c: Likewise.
* gcc.dg/cilk-plus/array_notation/execute/exec-once.c: Likewise.
* gcc.dg/cilk-plus/array_notation/execute/if_test.c: Likewise.
* gcc.dg/cilk-plus/array_notation/execute/n-ptr_test.c: Likewise.
* gcc.dg/cilk-plus/array_notation/execute/gather_scatter.c: Likewise.
* gcc.dg/cilk-plus/array_notation/execute/builtin_func_double2.c:
Likewise.
* gcc.dg/cilk-plus/array_notation/execute/builtin_func_double.c:
Likewise.
* gcc.dg/cilk-plus/array_notation/execute/builtin_fn_custom.c: Likewise.
* gcc.dg/cilk-plus/array_notation/execute/builtin_fn_mutating.c:
Likewise.
* gcc.dg/cilk-plus/array_notation/execute/array_test_ND.c: Likewise.
* gcc.dg/cilk-plus/array_notation/execute/array_test2.c: Likewise.
*

Re: patch to fix PR55151

2012-11-05 Thread H.J. Lu

On Mon, Nov 5, 2012 at 8:43 AM, Vladimir Makarov vmaka...@redhat.com wrote:
   The following patch fixes PR55151.  The patch affects a sensitive part of
 LRA code.  So it took some time to find a PR solution.  For the test there
 were to many reloads into hard registers for an insn and LRA failed to
 assign hard registers to all reload pseudos.  There is another more costly
 alternative for the insn which accepts memory but we need to put pseudo
 value into memory for that.  LRA did not do it.  The patch adds this
 possibility.  The patch also discourage putting pseudo values into memory by
 increasing reject.  Without this code, several GCC tests checking assembler
 code quality failed.  The patch changes SPEC2000 code for a few tests but
 overall score is the same.

   The patch was successfully bootstrapped and tested on x86/x86-64.

   Committed as rev. 193170.


 2012-11-05  Vladimir Makarov  vmaka...@redhat.com

 PR rtl-optimization/55151
 * lra-constraints.c (process_alt_operands): Permit putting reg
 value into memory.  Increase reject for this case.

 2012-11-05  Vladimir Makarov  vmaka...@redhat.com

 PR rtl-optimization/55151
 * gcc.dg/pr55151.c: New test.



It doesn't work on Linux/x86-64:

[hjl@gnu-tools-1 gcc]$
/export/build/gnu/gcc/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/gcc/build-x86_64-linux/gcc/
/export/gnu/import/git/gcc/gcc/testsuite/gcc.dg/pr55151.c
-fno-diagnostics-show-caret -fPIC -S  -o pr55151.s -mno-lra
[hjl@gnu-tools-1 gcc]$
/export/build/gnu/gcc/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/gcc/build-x86_64-linux/gcc/
/export/gnu/import/git/gcc/gcc/testsuite/gcc.dg/pr55151.c
-fno-diagnostics-show-caret -fPIC -S -m32 -o pr55151.s
/export/gnu/import/git/gcc/gcc/testsuite/gcc.dg/pr55151.c: In function ‘f4’:
/export/gnu/import/git/gcc/gcc/testsuite/gcc.dg/pr55151.c:13:1:
internal compiler error: in assign_by_spills, at lra-assigns.c:1217
0x8c7333 assign_by_spills
/export/gnu/import/git/gcc/gcc/lra-assigns.c:1217
0x8c7c5f lra_assign()
/export/gnu/import/git/gcc/gcc/lra-assigns.c:1369
0x8c25bd lra(_IO_FILE*)
/export/gnu/import/git/gcc/gcc/lra.c:2303
0x878004 do_reload
/export/gnu/import/git/gcc/gcc/ira.c:4624
0x878212 rest_of_handle_reload
/export/gnu/import/git/gcc/gcc/ira.c:4737
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See http://gcc.gnu.org/bugs.html for instructions.
[hjl@gnu-tools-1 gcc]$


-- 
H.J.

Re: RFC: PR middle-end/55142: Check Pmode instead of ptr_mode for address mode

2012-11-05 Thread H.J. Lu

On Sat, Nov 3, 2012 at 1:14 AM, H.J. Lu hjl.to...@gmail.com wrote:
 Hi,

 The testcase shows -O -mx32 -maddress-mode=long -fPIC -S generates;

 x.i:22:37: internal compiler error: in plus_constant, at explow.c:88
   info[0x6eff - dyn-d_tag + 12] = dyn;

 expand_expr_real_2 has

   /* No sense saving up arithmetic to be done
  if it's all in the wrong mode to form part of an address.
  And force_operand won't know whether to sign-extend or
  zero-extend.  */
   if ((modifier != EXPAND_SUM  modifier != EXPAND_INITIALIZER)
   || mode != ptr_mode)

 With mode == SImode, Pmode == DImode and ptr_mode == SImode, we
 generate wrong address.  Instead of zero-extending address
 0xf7ffdd64, we sign-extend it to 0xf7ffdd64.  This patch
 checks Pmode instead of ptr_mode for address mode.  It fixes the testcase
 and generates a working x32 glibc. Is this patch correct?

 Thanks.


 H.J.
 ---
 gcc/

 2012-11-03  H.J. Lu  hongjiu...@intel.com

 PR middle-end/55142
 * expr.c (expand_expr_real_2): Check Pmode instead of ptr_mode
 for wrong address mode.

 gcc/testsuite/

 2012-11-03  H.J. Lu  hongjiu...@intel.com

 PR middle-end/55142
 * gcc.target/i386/pr55142.c: New file.

 diff --git a/gcc/expr.c b/gcc/expr.c
 index 0ad3b57..1600380 100644
 --- a/gcc/expr.c
 +++ b/gcc/expr.c
 @@ -8290,7 +8290,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum 
 machine_mode tmode,
  And force_operand won't know whether to sign-extend or
  zero-extend.  */
if ((modifier != EXPAND_SUM  modifier != EXPAND_INITIALIZER)
 - || mode != ptr_mode)
 + || mode != Pmode)
 {
   expand_operands (treeop0, treeop1,
subtarget, op0, op1, EXPAND_NORMAL);
 @@ -8333,7 +8333,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum 
 machine_mode tmode,
  And force_operand won't know whether to sign-extend or
  zero-extend.  */
if ((modifier != EXPAND_SUM  modifier != EXPAND_INITIALIZER)
 - || mode != ptr_mode)
 + || mode != Pmode)
 goto binop;

expand_operands (treeop0, treeop1,
 diff --git a/gcc/testsuite/gcc.target/i386/pr55142.c 
 b/gcc/testsuite/gcc.target/i386/pr55142.c
 new file mode 100644
 index 000..c8a9625
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/i386/pr55142.c
 @@ -0,0 +1,34 @@
 +/* { dg-do compile { target { ! { ia32 } } } } */
 +/* { dg-require-effective-target fpic } */
 +/* { dg-options -O2 -mx32 -maddress-mode=long -fpic } */
 +
 +typedef int int32_t;
 +typedef unsigned int uint32_t;
 +typedef int32_t Elf32_Sword;
 +typedef struct
 +{
 +  Elf32_Sword d_tag;
 +} Elf32_Dyn;
 +struct link_map
 +{
 +  Elf32_Dyn *l_ld;
 +  Elf32_Dyn *l_info[34];
 +};
 +extern struct link_map _dl_rtld_map __attribute__ ((visibility (hidden)));
 +static void elf_get_dynamic_info (struct link_map *l)
 +{
 +  Elf32_Dyn *dyn = l-l_ld;
 +  Elf32_Dyn **info;
 +  info = l-l_info;
 +  while (dyn-d_tag != 0)
 +{
 +  if ((uint32_t) (0x6eff - dyn-d_tag)  11)
 +   info[0x6eff - dyn-d_tag + 12] = dyn;
 +  ++dyn;
 +}
 +}
 +void
 +foo (void)
 +{
 +  elf_get_dynamic_info (_dl_rtld_map);
 +}

Any comments? I'd like to get it fixed for 4.8.  This patch only impacts
targets with Pmode != ptr_mode.  Richard, can you take a look at
this for mips?

Thanks.


-- 
H.J.

[Cilkplus] Merged with trunk at revision 193151

2012-11-05 Thread Iyer, Balaji V

Cilk Plus branch was merged with trunk at revision 193151. Committed as 
revision 193192.

Thanks,

Balaji V. Iyer.

[patch] PR55191 - ICE in tree-ssa-pre.c due to missing fake edge for an infinite loop

2012-11-05 Thread Steven Bosscher

Hello,

I hadn't expected that cfganal.c's reverse-CFG DFS would actually
depend on the order of the basic blocks. The attached patch fixes that
small oversight...

Bootstrappedtested on powerpc64-unknown-linux-gnu. OK for trunk?

Ciao!
Steven

* cfganal.c (connect_infinite_loops_to_exit): Call dfs_deadend
from here.
(flow_dfs_compute_reverse_execute): Don't call it here.

Index: cfganal.c
===
--- cfganal.c   (revision 193141)
+++ cfganal.c   (working copy)
@@ -452,6 +452,7 @@ void
 connect_infinite_loops_to_exit (void)
 {
   basic_block unvisited_block = EXIT_BLOCK_PTR;
+  basic_block deadend_block;
   struct depth_first_search_dsS dfs_ds;

   /* Perform depth-first search in the reverse graph to find nodes
@@ -467,8 +468,9 @@ connect_infinite_loops_to_exit (void)
   if (!unvisited_block)
break;

-  make_edge (unvisited_block, EXIT_BLOCK_PTR, EDGE_FAKE);
-  flow_dfs_compute_reverse_add_bb (dfs_ds, unvisited_block);
+  deadend_block = dfs_find_deadend (unvisited_block);
+  make_edge (deadend_block, EXIT_BLOCK_PTR, EDGE_FAKE);
+  flow_dfs_compute_reverse_add_bb (dfs_ds, deadend_block);
 }

   flow_dfs_compute_reverse_finish (dfs_ds);
@@ -958,7 +960,7 @@ flow_dfs_compute_reverse_execute (depth_first_sear
   /* Determine if there are unvisited basic blocks.  */
   FOR_BB_BETWEEN (bb, last_unvisited, NULL, prev_bb)
 if (!bitmap_bit_p (data-visited_blocks, bb-index))
-  return dfs_find_deadend (bb);
+  return bb;

   return NULL;
 }
Index: testsuite/gcc.dg/pr55191.c
===
--- testsuite/gcc.dg/pr55191.c  (revision 0)
+++ testsuite/gcc.dg/pr55191.c  (revision 0)
@@ -0,0 +1,14 @@
+/* PR tree-optimization/55191 */
+/* { dg-do compile } */
+/* { dg-options -O2 } */
+
+int a, b;
+
+void f(void)
+{
+  b = a || b;
+
+  for(a = 0; a  2; a++);
+  while(1);
+}
+

[Patch, Fortran, OOP] PR 54917: [4.7/4.8 Regression] TRANSFER on polymorphic variable causes ICE

2012-11-05 Thread Janus Weil

Hi all,

the attached patch implements support for polymorphic arguments to
TRANSFER. For details and discussion see the PR.

The patch was regtested successfully on x86_64-unknown-linux-gnu. Ok for trunk?

Btw, as part of the PR is a regression in 4.7 and trunk, I would like
to backport the target-memory.c part and the first test case to the
4.7 branch. Ok?

Cheers,
Janus


2012-11-05  Janus Weil  ja...@gcc.gnu.org

PR fortran/54917
* target-memory.c (gfc_target_expr_size,gfc_target_interpret_expr):
Handle BT_CLASS.
* trans-intrinsic.c (gfc_conv_intrinsic_transfer): Add support for
polymorphic arguments.

2012-11-05  Janus Weil  ja...@gcc.gnu.org

PR fortran/54917
* gfortran.dg/transfer_class_1.f90: New.
* gfortran.dg/transfer_class_2.f90: New.


pr54917.diff
Description: Binary data


transfer_class_1.f90
Description: Binary data


transfer_class_2.f90
Description: Binary data

Re: New badness metric for inliner

2012-11-05 Thread Jan Hubicka

 
 This broke the bootstrap on sparc:
 
 /home/davem/src/GIT/GCC/build-sparc32-linux/./prev-gcc/g++ 
 -B/home/davem/src/GIT/GCC/build-sparc32\
 -linux/./prev-gcc/ -B/usr/local/sparc-unknown-linux-gnu/bin/ -nostdinc++ 
 -B/home/davem/src/GIT/GCC\
 /build-sparc32-linux/prev-sparc-unknown-linux-gnu/libstdc++-v3/src/.libs 
 -B/home/davem/src/GIT/GCC\
 /build-sparc32-linux/prev-sparc-unknown-linux-gnu/libstdc++-v3/libsupc++/.libs
  -I/home/davem/src/G\
 IT/GCC/build-sparc32-linux/prev-sparc-unknown-linux-gnu/libstdc++-v3/include/sparc-unknown-linux-g\
 nu 
 -I/home/davem/src/GIT/GCC/build-sparc32-linux/prev-sparc-unknown-linux-gnu/libstdc++-v3/include\
  -I/home/davem/src/GIT/GCC/gcc/libstdc++-v3/libsupc++ 
 -L/home/davem/src/GIT/GCC/build-sparc32-linu\
 x/prev-sparc-unknown-linux-gnu/libstdc++-v3/src/.libs 
 -L/home/davem/src/GIT/GCC/build-sparc32-linu\
 x/prev-sparc-unknown-linux-gnu/libstdc++-v3/libsupc++/.libs -c   -g -O2 
 -gtoggle -DIN_GCC   -fno-e\
 xceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing 
 -Wwrite-strings -Wcast-qu\
 al -Wmissing-format-attribute -pedantic -Wno-long-long -Wno-variadic-macros 
 -Wno-overlength-string\
 s -Werror -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../gcc/gcc 
 -I../../gcc/gcc/. -I../../gcc/gcc/.\
 ./include -I../../gcc/gcc/../libcpp/include  -I../../gcc/gcc/../libdecnumber 
 -I../../gcc/gcc/../li\
 bdecnumber/dpd -I../libdecnumber -I../../gcc/gcc/../libbacktrace 
 -DCLOOG_INT_GMP../../gcc/gcc/\
 graphite-interchange.c -o graphite-interchange.o
 ../../gcc/gcc/graphite-interchange.c:645:1: internal compiler error: in 
 relative_time_benefit, at \
 ipa-inline.c:784

I am sorry for that.  I added couple of sanity checks for overflow and it seems 
it triggers.
Is line 784 for you gcc_checking_assert (uninlined_call_time = 0);?

Honza

Re: [RFA:] PR55186 - gcc.dg/const-uniq-1.c fails due to vector not in the constant pool

2012-11-05 Thread Hans-Peter Nilsson

 From: Eric Botcazou ebotca...@adacore.com
 Date: Mon, 5 Nov 2012 15:29:11 +0100

  But, for cris-elf (and reasonably the same for other targets)
  there might not be such a constant-pool entry in the first
  place: the vectors are too short to rule out piecewise
  initialization as optimal for size (counting the vectors once
  per use).  Let's increase them.  Twice might just break-even for
  cris-elf so let's make them four times as long.  Sanity-checked
  for x86_64 w/w.o -m32 for a gcc-version that includes the
  optimization for which the test-case tests (CCing author).

 I don't know cris at all, but 4 times sounds quite a lot compared to the 
 other 
 architectures (I presume that the test passes on most of them).

That's right, because I *want* the size to be excessive compared
to the (expected max) in-line-limits of any target.

What I wrote above regarding numbers is incorrect; i misthought.
Counting just the size of the vector and emitted instructions,
it wouldn't make sense to just increase the vector, as the
size for the best-case instructions for initializing one item in
the 32-bit-int:
 moveq 1,$r2
 move.d $r2,[$r10+]
is four bytes; the size the corresponding constant-vector item.
The overhead for forming the address in a register is of course
smaller than the overhead for calling memcpy, as the former is a
subset of the latter. :)

What happens is that I increase the vector-length much above
what I see as the expected target-specific max-limit for the
in-lined memcpy/memset/whatever; from 32 to 128 for 32-bit
targets.  Yes, outliers are expected to have to adjust the
test-case anyway; this outlier is just the first one, I'm vainly
hoping there will be no others. :)

I could also have just changed the numbers in the vector from
those expected to be quickly formed (for CRIS, 0..31) to
e.g. random numbers, but then I'd have to deal with 32-bit vs.
16-bit-issues ...and also, to be absolutely correct I'd have to
deal with issues where the size of the emitted in-lined code
isn't fit for -Os compared to memcpy.  IIRC when -Os, gcc
middle-end *doesn't* emit rtl and add the sizes of the insns
(the insn attribute-size numbers) before deciding on emitting
insns or in-lined memcpy.  I *guess* the 32 bytes set [for CRIS]
by MOVE_RATIO is the only target-number that matters at present.
Since this minor can of worms wasn't the point of this
test-case, I opted for simply increasing the size of the vector,
keeping it simple.

 No objections by me though.

I take that as approval.  Committed as posted.

brgds, H-P

Re: New badness metric for inliner

2012-11-05 Thread David Miller

From: Jan Hubicka hubi...@ucw.cz
Date: Mon, 5 Nov 2012 23:19:16 +0100

 Is line 784 for you gcc_checking_assert (uninlined_call_time = 0);?

Yes.

Re: [patch, testsuite, mips] Fix gcc.dg/torture/mips-sdata-1.c

2012-11-05 Thread Steve Ellcey

On Mon, 2012-11-05 at 13:40 -0800, Steve Ellcey wrote:

 Hm, I configured GCC with '--enable-languages=c,c++', I think this might
 set ENABLE_LTO to '0' and cause check_effective_target_lto to return
 FALSE and cause the -ffat-lto-objects flag to not be added.  Of course
 this raises the question of why it is trying to do lto testing if I
 didn't configure it in.  I will add 'lto' to my configure line and see
 if that fixes the problem.
 
 Steve Ellcey
 sell...@mips.com

OK, that was a red herring.  The problem isn't
check_effective_target_lto but check_linker_plugin_available.
This is returning false because my compile can't find crt0.o.

% /local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/xgcc
-B/local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/ -v x.c
-Tmti32.ld  -o x

 /local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/collect2 -EB
-mips32r2 -melf32btsmip -o
x /local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/crti.o 
/local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/crtbegin.o 
-L/local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc /tmp/ccgdsofE.o 
-lgcc -lgcc /local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/crtend.o 
/local/home/sellcey/nightly/obj-mips-mti-elf/gcc/final/gcc/crtn.o -T mti32.ld
/local/home/sellcey/nightly/install-mips-mti-elf/mips-mti-elf/bin/ld:
cannot find crt0.o
collect2: error: ld returned 1 exit status



If I run the compiler from the installed location, everything is fine:


% install-mips-mti-elf/bin/mips-mti-elf-gcc -v x.c -Tmti32.ld -o x

 
/local/home/sellcey/nightly/install-mips-mti-elf/libexec/gcc/mips-mti-elf/4.8.0/collect2
 -EB -mips32r2 -melf32btsmip -o x 
/local/home/sellcey/nightly/install-mips-mti-elf/lib/gcc/mips-mti-elf/4.8.0/crti.o
 
/local/home/sellcey/nightly/install-mips-mti-elf/lib/gcc/mips-mti-elf/4.8.0/crtbegin.o
 -L/local/home/sellcey/nightly/install-mips-mti-elf/lib/gcc/mips-mti-elf/4.8.0 
-L/local/home/sellcey/nightly/install-mips-mti-elf/lib/gcc/mips-mti-elf/4.8.0/../../../../mips-mti-elf/lib
 /tmp/cczngIXe.o -lgcc -lgcc 
/local/home/sellcey/nightly/install-mips-mti-elf/lib/gcc/mips-mti-elf/4.8.0/crtend.o
 
/local/home/sellcey/nightly/install-mips-mti-elf/lib/gcc/mips-mti-elf/4.8.0/crtn.o
 -T mti32.ld


I am not sure how ld is supposed to find crt0.o in the uninstalled
setup, it exists in obj-mips-mti-elf/newlib/mips-mti-elf/libgloss/mips.
Maybe it should find it in the installed location, it is there because I
have made and installed newlib before building and testing GCC.

Steve Ellcey
sell...@mips.com

Re: [patch, testsuite, mips] Fix gcc.dg/torture/mips-sdata-1.c

2012-11-05 Thread Richard Sandiford

Steve Ellcey sell...@mips.com writes:
 I am not sure how ld is supposed to find crt0.o in the uninstalled
 setup, it exists in obj-mips-mti-elf/newlib/mips-mti-elf/libgloss/mips.

The newlib and libgloss flags are usually added by the dejagnu
baseboard file.  E.g. for mips-sim.exp:

set_board_info cflags [libgloss_include_flags] [newlib_include_flags]
set_board_info ldflags [libgloss_link_flags] [newlib_link_flags]

Richard

[patch RFA middle-end] Fix PR target/41993

2012-11-05 Thread Kaz Kojima

Hi,

The attached patch is to solve PR target/41993 which will affect
targets using MODE_EXIT.
Without it, we can't find all return registers for __builtin_return
in mode-switching.c:create_pre_exit.  See the trail #4 by Uros in
the PR for the details.  The patch is tested with bootstrap and
regtested on i686-pc-linux-gnu with no new failures.  It's also
tested on cross sh4-unknown-linux-gnu.
OK for trunk?

Regards,
kaz
--
2012-11-05  Uros Bizjak  ubiz...@gmail.com
Kaz Kojima  kkoj...@gcc.gnu.org

PR target/41993
* mode-switching.c (create_pre_exit): Set return_copy to
last_insn when copy_start is a pseudo reg.

--- ORIG/trunk/gcc/mode-switching.c 2012-11-05 08:07:55.0 +0900
+++ trunk/gcc/mode-switching.c  2012-11-05 19:22:56.0 +0900
@@ -324,7 +324,10 @@ create_pre_exit (int n_entities, int *en
else
  break;
if (copy_start = FIRST_PSEUDO_REGISTER)
- break;
+ {
+   last_insn = return_copy;
+   continue;
+ }
copy_num
  = hard_regno_nregs[copy_start][GET_MODE (copy_reg)];

Re: [patch, testsuite, mips] Fix gcc.dg/torture/mips-sdata-1.c

2012-11-05 Thread Steve Ellcey

On Mon, 2012-11-05 at 22:55 +, Richard Sandiford wrote:
 Steve Ellcey sell...@mips.com writes:
  I am not sure how ld is supposed to find crt0.o in the uninstalled
  setup, it exists in obj-mips-mti-elf/newlib/mips-mti-elf/libgloss/mips.
 
 The newlib and libgloss flags are usually added by the dejagnu
 baseboard file.  E.g. for mips-sim.exp:
 
 set_board_info cflags [libgloss_include_flags] [newlib_include_flags]
 set_board_info ldflags [libgloss_link_flags] [newlib_link_flags]
 
 Richard

Yup, I have those in my baseboard file.  When you configure GCC for a
mips-*-elf target, do you use --with-sysroot=X?  I don't, and the
build seems to work fine, but I was wondering if this is why GCC
couldn't find crt0.o until after it had been installed (into the same
directory home as newlib was installed to).

Steve Ellcey
sell...@mips.com

Re: RFC: PR middle-end/55142: Check Pmode instead of ptr_mode for address mode

2012-11-05 Thread Richard Sandiford

H.J. Lu hjl.to...@gmail.com writes:
 On Sat, Nov 3, 2012 at 1:14 AM, H.J. Lu hjl.to...@gmail.com wrote:
 Hi,

 The testcase shows -O -mx32 -maddress-mode=long -fPIC -S generates;

 x.i:22:37: internal compiler error: in plus_constant, at explow.c:88
   info[0x6eff - dyn-d_tag + 12] = dyn;

 expand_expr_real_2 has

   /* No sense saving up arithmetic to be done
  if it's all in the wrong mode to form part of an address.
  And force_operand won't know whether to sign-extend or
  zero-extend.  */
   if ((modifier != EXPAND_SUM  modifier != EXPAND_INITIALIZER)
   || mode != ptr_mode)

 With mode == SImode, Pmode == DImode and ptr_mode == SImode, we
 generate wrong address.  Instead of zero-extending address
 0xf7ffdd64, we sign-extend it to 0xf7ffdd64.  This patch
 checks Pmode instead of ptr_mode for address mode.  It fixes the testcase
 and generates a working x32 glibc. Is this patch correct?

 Thanks.


 H.J.
 ---
 gcc/

 2012-11-03  H.J. Lu  hongjiu...@intel.com

 PR middle-end/55142
 * expr.c (expand_expr_real_2): Check Pmode instead of ptr_mode
 for wrong address mode.

 gcc/testsuite/

 2012-11-03  H.J. Lu  hongjiu...@intel.com

 PR middle-end/55142
 * gcc.target/i386/pr55142.c: New file.

 diff --git a/gcc/expr.c b/gcc/expr.c
 index 0ad3b57..1600380 100644
 --- a/gcc/expr.c
 +++ b/gcc/expr.c
 @@ -8290,7 +8290,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum 
 machine_mode tmode,
  And force_operand won't know whether to sign-extend or
  zero-extend.  */
if ((modifier != EXPAND_SUM  modifier != EXPAND_INITIALIZER)
 - || mode != ptr_mode)
 + || mode != Pmode)
 {
   expand_operands (treeop0, treeop1,
subtarget, op0, op1, EXPAND_NORMAL);
 @@ -8333,7 +8333,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum 
 machine_mode tmode,
  And force_operand won't know whether to sign-extend or
  zero-extend.  */
if ((modifier != EXPAND_SUM  modifier != EXPAND_INITIALIZER)
 - || mode != ptr_mode)
 + || mode != Pmode)
 goto binop;

expand_operands (treeop0, treeop1,
 diff --git a/gcc/testsuite/gcc.target/i386/pr55142.c 
 b/gcc/testsuite/gcc.target/i386/pr55142.c
 new file mode 100644
 index 000..c8a9625
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/i386/pr55142.c
 @@ -0,0 +1,34 @@
 +/* { dg-do compile { target { ! { ia32 } } } } */
 +/* { dg-require-effective-target fpic } */
 +/* { dg-options -O2 -mx32 -maddress-mode=long -fpic } */
 +
 +typedef int int32_t;
 +typedef unsigned int uint32_t;
 +typedef int32_t Elf32_Sword;
 +typedef struct
 +{
 +  Elf32_Sword d_tag;
 +} Elf32_Dyn;
 +struct link_map
 +{
 +  Elf32_Dyn *l_ld;
 +  Elf32_Dyn *l_info[34];
 +};
 +extern struct link_map _dl_rtld_map __attribute__ ((visibility (hidden)));
 +static void elf_get_dynamic_info (struct link_map *l)
 +{
 +  Elf32_Dyn *dyn = l-l_ld;
 +  Elf32_Dyn **info;
 +  info = l-l_info;
 +  while (dyn-d_tag != 0)
 +{
 +  if ((uint32_t) (0x6eff - dyn-d_tag)  11)
 +   info[0x6eff - dyn-d_tag + 12] = dyn;
 +  ++dyn;
 +}
 +}
 +void
 +foo (void)
 +{
 +  elf_get_dynamic_info (_dl_rtld_map);
 +}

 Any comments? I'd like to get it fixed for 4.8.

I can't approve this anyway, but the existing ptr_mode sounds right for
EXPAND_INTIALIZER.  I assume it would be for EXPAND_SUM too, although
I've always been a bit unsure what that means.

 This patch only impacts targets with Pmode != ptr_mode.  Richard, can
 you take a look at this for mips?

MIPS isn't affected FWIW.  Having Pmode != ptr_mode was just a temporary
experiment on the 3.4 rewrite branch.

Richard

Re: [Dwarf Fission] Implement Fission Proposal (issue6305113)

2012-11-05 Thread Cary Coutant

 +/* %:replace-extension spec function.  Replaces the extension of the
 +   first argument with the second argument.  */
 +
 +const char *
 +replace_extension_spec_func (int argc, const char **argv)
 +{
 +  char *name;
 +  char *p;
 +  char *result;
 +
 +  if (argc != 2)
 +fatal_error (too few arguments to %%:replace-extension);
 +
 +  name = xstrdup (argv[0]);
 +  p = strrchr (name, '.');
 +  if (p != NULL)
 +  *p = '\0';
 +
 +  result = concat (name, argv[1], NULL);
 +
 +  free (name);
 +  return result;
 +}

This doesn't do the right thing when there is no '.' in the last
component of the path. It should look for the last DIR_SEPARATOR,
then search for the last '.' after that.


 +/* Describe an entry into the .debug_addr section.  */
 +
 +enum ate_kind {
 +  ate_kind_rtx,
 +  ate_kind_rtx_dtprel,
 +  ate_kind_label
 +};
 +
 +typedef struct GTY(()) addr_table_entry_struct {
 +  enum ate_kind kind;
 +  unsigned int refcount;
 +  unsigned int index;
 +  union addr_table_entry_struct_union
 +{
 +  rtx GTY ((tag (ate_kind_rtx))) rtl;
 +  char * GTY ((tag (ate_kind_label))) label;
 +}
 +  GTY ((desc (%1.kind))) addr;

When kind == ate_kind_rtx_dtprel, we use the rtl field. I think this needs
to be covered for GC to work. As far as I know, gengtype doesn't support
multiple tags for one union member, so I think it needs to be something
like this:

  union addr_table_entry_struct_union
{
  rtx GTY ((tag (0))) rtl;
  char * GTY ((tag (1))) label;
}
  GTY ((desc ((%1.kind == ate_kind_label addr;


 +static void add_AT_lbl_id (dw_die_ref, enum dwarf_attribute, const char *,
 +   bool);

It turns out we never call add_AT_lbl_id with force_direct == true.
I don't think it's necessary to add this parameter here.


 +/* enum for tracking thread-local variables whose address is really an offset
 +   relative to the TLS pointer, which will need link-time relocation, but 
 will
 +   not need relocation by the DWARF consumer.  */
 +
 +enum dtprel_bool
 +  {
 +dtprel_false = 0,
 +dtprel_true = 1
 +  };

Extra indentation here.


 +static inline enum dwarf_location_atom
 +dw_addr_op (enum dtprel_bool dtprel)
 +{
 +  if (dtprel == dtprel_true)
 +return (dwarf_split_debug_info ? DW_OP_GNU_const_index
 +: (DWARF2_ADDR_SIZE == 4 ? DW_OP_const4u : DW_OP_const8u));
 +  else
 +return (dwarf_split_debug_info ? DW_OP_GNU_addr_index : DW_OP_addr);

Unnecessary parentheses here.


 +/* Return the index for any attribute that will be referenced with a
 +   DW_FORM_GNU_addr_index.  Strings have their indices handled differently to
 +   account for reference counting pruning.  */
 +
 +static inline unsigned int
 +AT_index (dw_attr_ref a)
 +{
 +  if (AT_class (a) == dw_val_class_str)
 +return a-dw_attr_val.v.val_str-index;
 +  else if (a-dw_attr_val.val_entry != NULL)
 +return a-dw_attr_val.val_entry-index;
 +  return NOT_INDEXED;
 +}

The comment seems out of date. DW_FORM_GNU_str_index should also be
mentioned, and it doesn't look like strings have their indices handled
differently (at least not here).


 +static void
 +remove_addr_table_entry (addr_table_entry *entry)
 +{
 +  addr_table_entry *node;
 +
 +  gcc_assert (dwarf_split_debug_info  addr_index_table);
 +  node = (addr_table_entry *) htab_find (addr_index_table, entry);
 +  node-refcount--;
 +  /* After an index is assigned, the table is frozen.  */
 +  gcc_assert (node-refcount  0 || node-index == NO_INDEX_ASSIGNED);

This shouldn't ever be called after we've assigned any indexes at all,
so I think it's always safe to asser that node-index == NO_INDEX_ASSIGNED.
We can also assert that the ref count should never go negative, so I think
you can rewrite this assert as:

  gcc_assert (node-refcount = 0  node-index == NO_INDEX_ASSIGNED);


 @@ -21215,7 +22086,7 @@ prune_unused_types_update_strings (dw_die_ref die)
   *slot = s;
 }
}
 -}
 + }

Accidental extra space?


 +static void
 +index_location_lists (dw_die_ref die)
 +{
 +  dw_die_ref c;
 +  dw_attr_ref a;
 +  unsigned ix;
 +
 +  FOR_EACH_VEC_ELT (dw_attr_node, die-die_attr, ix, a)
 +if (AT_class (a) == dw_val_class_loc_list)
 +  {
 +dw_loc_list_ref list = AT_loc_list (a);
 +dw_loc_list_ref curr;
 +for (curr = list; curr != NULL; curr = curr-dw_loc_next)
 +  {
 +/* Don't index an entry that has already been indexed
 +   or won't be output.  */
 +if (curr-begin_entry != NULL
 +   || (strcmp (curr-begin, curr-end) == 0  !curr-force))
 + continue;

Check the indentation here -- looks like these last two lines are missing
a space.


OK for trunk with these changes. Thanks!

-cary

1 2 >

1 - 100 of 122 matches

Mail list logo