Re: [PATCH] Use new dump scheme to emit loop unroll/peel summary info (issue6941070)

2012-12-20 Thread Bernhard Reutner-Fischer
On Mon, Dec 17, 2012 at 10:44:59PM -0800, Teresa Johnson wrote:
Index: tree-ssa-loop-ivcanon.c
===
--- tree-ssa-loop-ivcanon.c(revision 194516)
+++ tree-ssa-loop-ivcanon.c(working copy)
@@ -639,22 +639,24 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
 
 /* Tries to unroll LOOP completely, i.e. NITER times.
UL determines which loops we are allowed to unroll.
-   EXIT is the exit of the loop that should be eliminated.  
+   EXIT is the exit of the loop that should be eliminated.
MAXITER specfy bound on number of iterations, -1 if it is
-   not known or too large for HOST_WIDE_INT.  */
+   not known or too large for HOST_WIDE_INT. The location
+   LOCUS corresponding to the loop is used when emitting
+   a summary of the unroll to the dump file.  */
 
 static bool
 try_unroll_loop_completely (struct loop *loop,
   edge exit, tree niter,
   enum unroll_level ul,
-  HOST_WIDE_INT maxiter)
+  HOST_WIDE_INT maxiter,
+location_t locus)

whitespace damage?

Index: loop-unroll.c
===
--- loop-unroll.c  (revision 194516)
+++ loop-unroll.c  (working copy)
@@ -148,6 +148,61 @@ static void combine_var_copies_in_loop_exit (struc
basic_block);
 static rtx get_expansion (struct var_to_expand *);
 
+/* Emit a message summarizing the unroll or peel that will be
+   performed for LOOP, along with the loop's location LOCUS, if
+   appropriate given the dump or -fopt-info settings.  */
+
+static void
+report_unroll_peel(struct loop *loop, location_t locus)

missing space before (

contrib/check_GNU_style.sh generally says:
Dot, space, space, new sentence.
loop-dump.01.patch:223:+   not known or too large for HOST_WIDE_INT. The 
location
loop-dump.01.patch:514:+   * of the for or while statement, if possible. To do 
this, look

Dot, space, space, end of comment.
loop-dump.01.patch:504:+/* Return location corresponding to the loop control 
condition if possible. */
loop-dump.01.patch:541:+  /* Next check the latch, to see if it is non-empty. *
loop-dump.01.patch:555:+  /* If all else fails, simply return the current 
function location. */

There should be exactly one space between function name and parentheses.
loop-dump.01.patch:329:+report_unroll_peel(struct loop *loop, location_t locus)
loop-dump.01.patch:386:+  location_t locus = get_loop_location(loop);
loop-dump.01.patch:404:+  report_unroll_peel(loop, locus);
loop-dump.01.patch:412:+  location_t locus = get_loop_location(loop);
loop-dump.01.patch:429:+  report_unroll_peel(loop, locus);
loop-dump.01.patch:533:+  if ((exit = single_exit(loop)))

@@ -248,6 +305,7 @@ peel_loops_completely (int flags)
 
   if (loop-lpt_decision.decision == LPT_PEEL_COMPLETELY)
   {
+  report_unroll_peel(loop, locus);
 peel_loop_completely (loop);

whitespace damage? You seem to have this kind of whitespace error
throughout the patch. I take it you are aware of
http://gcc.gnu.org/wiki/FormattingCodeForGCC
and just forgot to have it on the machine you edited?

I seemingly have
$ cat ~/.vim/gcc_style.vim 
 put this plugin into ~/.vim/gcc_style.vim and source it into your ~/.vimrc via
 source ~/.vim/gcc_style.vim
if exists(g:loaded_gcc_style) || cp
  finish
endif
let g:loaded_gcc_style = 1

augroup gcc_style
  autocmd BufReadPost,FileReadPost * call s:maybe_gcc_style()
augroup END
if exists(*s:maybe_gcc_style)
  finish
endif
let s:cpo_save = cpo
set cpovim

function! s:maybe_gcc_style()
  let s:i = 1 + 0
  while s:i = line($)  s:i = 25
let s:line = getline(s:i)
if s:line =~ '^\s*This\sfile\sis\spart\sof\sGCC.*'
   gcc-mode
  set cino=:s,{s,n-s,2s,^-s
  set sw=2
  set sts=2
  set cindent
  set smartindent
  set autoindent
  break
else
  let s:i = s:i + 1
endif
  endwhile
endfunction

command! NoGCCstyle unlet! g:loaded_gcc_style | au! gcc_style
command! DoGCCstyle runtime gcc_style.vim
let cpo = s:cpo_save

Index: cfgloop.c
===
--- cfgloop.c  (revision 194516)
+++ cfgloop.c  (working copy)
@@ -1666,3 +1666,59 @@ loop_exits_from_bb_p (struct loop *loop, basic_blo
 
   return false;
 }
+
+/* Return location corresponding to the loop control condition if possible. */
+
+location_t
+get_loop_location (struct loop *loop)
+{
+  rtx insn = NULL;
+  struct niter_desc *desc = NULL;
+  edge exit;
+
+  /* For a for or while loop, we would like to return the location
+   * of the for or while statement, if possible. To do this, look
+   * for the branch guarding the loop back-edge.
+   */

IIRC there is not supposed to be a * in comments.

Other than these nits i like it (but cannot approve it).

thanks,


[PATCH][ARM][thumb1] Reduce lr save for leaf function with non-far jump

2012-12-20 Thread Joey Ye
Current GCC thumb1 has an annoying problem that always assuming far branch.
So it forces to save lr, even when unnecessarily. The most extreme case
complained by partner is:

// compiled with -mthumb -mcpu=cortex-m0 -Os.
void foo() { for (;;); }
=
foo:
push{lr}  // Crazy!!!
.L2:
b   .L2

The reason is that thumb1 far jump is only resolved in the very late pass
shorten_branch. Prologue/epilogue pass doesn't actually know a branch is
far or not from its attribute. It has to conservatively save/restore lr
whenever there is a branch.

This patch tries to fix it with a simple heuristic, i.e., using function
size to decide if a far jump will likely be used. Function size information
is meaningful in prologue/epilogue pass. The heuristic uses following check
to decide if lr should be saved for far jump:

function_size * 3 = 2048 // yes: save lr for possible far jump. No: don't
save lr for far jump

The scheme has an issue: if some corner case does break above condition,
there is no chance to fix-up but to ICE. But the heuristic condition is very
conservative. It is base on the worse normal condition that each instruction
is associated with a 4 byte literal ( (2+4)/2=3, blooming size by 3 times ).
I can't think of a real case to trigger the ICE. So I think it should work.

Other approaches than the heuristic scheme are too expensive to implement
for this small size/performance issue. I did explored some but none of them
persuaded myself.

Tests passed:
* build libgcc, libstdc++, newlib, libm
* make check-gcc with cpu=cortex-m0
* Small and extreme test cases

ChangeLog:

2012-12-20  Joey Ye  joey...@arm.com

* config/arm/arm.c(thumb1_final_prescan_insn): 
Assert lr save for real far jump.
(thumb_far_jump_used_p): Count instruction size and set 
 far_jump_used.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 327ef22..ad79451 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -21790,6 +21857,11 @@ thumb1_final_prescan_insn (rtx insn)
   else if (conds != CONDS_NOCOND)
cfun-machine-thumb1_cc_insn = NULL_RTX;
 }
+
+/* Check if unexpected far jump is used.  */
+if (cfun-machine-lr_save_eliminated
+ get_attr_far_jump (insn) == FAR_JUMP_YES)
+  internal_error(Unexpected thumb1 far jump);
 }
 
 int
@@ -21815,6 +21887,8 @@ static int
 thumb_far_jump_used_p (void)
 {
   rtx insn;
+  bool far_jump = false;
+  unsigned int func_size = 0;
 
   /* This test is only important for leaf functions.  */
   /* assert (!leaf_function_p ()); */
@@ -21870,6 +21944,26 @@ thumb_far_jump_used_p (void)
   get_attr_far_jump (insn) == FAR_JUMP_YES
  )
{
+ far_jump = true;
+   }
+  func_size += get_attr_length (insn);
+}
+
+  /* Attribute far_jump will always be true for thumb1 before
shorten_branch
+ pass. So checking far_jump attribute before shorten_branch isn't much
+ useful.
+ 
+ Following heuristic tries to estimate more accruately if a far jump
may 
+ finally be used. The heuristic is very conservative as there is no
chance
+ to roll-back the decision of not to use far jump.
+
+ Thumb1 long branch offset is -2048 to 2046. The worst case is each
2-byte
+ insn is assiociated with a 4 byte constant pool. Using function size 
+ 2048/3 as the threshold is conservative enough.  */
+  if (far_jump)
+{
+  if ((func_size * 3) = 2048)
+{
  /* Record the fact that we have decided that
 the function does use far jumps.  */
  cfun-machine-far_jump_used = 1;







Re: Patch to enable unlimited polymorphism to gfortran

2012-12-20 Thread Dominique Dhumieres
Dear Paul,

Apparently you have forgotten to commit the update for
same_type_as_1.f03.

Dominique


[Patch, wwwdocs] Update Fortran part of the GCC 4.8 release notes

2012-12-20 Thread Tobias Burnus
The following patch updates the Fortran part of the GCC 4.8 release 
notes at http://gcc.gnu.org/gcc-4.8/changes.html#fortran


It adds quips for
- CLASS(*)
- The new BACKTRACE intrinsic
- A compatibility notice

I would like if someone could comment on the latter. I think it is time 
to explicitly inform about compatibility issues with gfortran. So far, 
smaller ABI changes were done all the time [affecting very special cases 
or very experimental features] and the .mod version was different in 
every release.


(The smaller ABI changes were related to code which required modules, 
hence, the .mod version change forced users to re-compile. In fact, the 
.mod version change in 4.8 has just be done to force recompilation.* 
Thus, the past ABI breakages were and this ABI breakage is very unlikely 
to lead to run-time/link-time issues.)


Comments? Suggestions?

Tobias

* The background for 4.8's ABI changes were: The module name was missing 
from module-defined procedure-pointer variables, leading to a potential 
naming clash with same-name variables in different modules. And the 
deferred-length string ABI was changed as some systems didn't like a . 
in the assembler name of a variable.
Index: changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-4.8/changes.html,v
retrieving revision 1.73
diff -p -u -r1.73 changes.html
--- changes.html	19 Dec 2012 21:54:50 -	1.73
+++ changes.html	20 Dec 2012 09:43:10 -
@@ -243,6 +243,35 @@ B b(42); // OK
 
 h3 id=fortranFortran/h3
   ul
+liCompatibility notice:
+ul
+  liModule files: The version of the module files (code.mod/code)
+has been incremented. Fortran codeMODULE/codes compiled by earlier
+GCC versions have to be recompiled, when they are codeUSE/coded by
+files compiled with GCC 4.8, because GCC 4.8 is not able to read
+code.mod/code file of earlier GCC versions; attempting to do so
+gives an error message. Note: The ABI of the produced assembler data
+itself has not changed; object files and libraries are fully compatible
+to older versions. (Except as noted below.)/li
+  liABI: Some internal names (name in the assembler/object file) have
+changed for symbols declared in the specification part of a module.
+If the module ndash; or a file using such a symbol via use
+association ndash; is recompiled, the module and all files which
+directly use such symbols have to be recompiled. The change only
+affects the following kind of module symbols:
+ul
+  liProcedure pointers. Note: C-interoperable function pointers
+(codetype(c_funptr)/code) are not affected nor are
+procedure-pointer components./li
+  liDeferred-length character strings./li
+/ul/li
+  /ul/li
+
+liThe a href=http://gcc.gnu.org/onlinedocs/gfortran/BACKTRACE.html;
+codeBACKTRACE/code/a intrinsic subroutine has been added. It shows
+a backtrace at an arbitrary place in user code; program execution
+continues normally afterwards./li
+ 
 liThe codea
 href=http://gcc.gnu.org/onlinedocs/gfortran/Error-and-Warning-Options.html;
 -Wc-binding-type/a/code warning option has been added (disabled
@@ -301,6 +330,12 @@ B b(42); // OK
 a href=http://gcc.gnu.org/onlinedocs/gfortran/TMPDIR.html;user
 manual/a./li
 
+lia href=http://gcc.gnu.org/wiki/Fortran2003Status;Fortran 2003/a:
+  liExperimental support for unlimited polymorphic variables
+  (codeCLASS(*)/code) has been added./li
+ul
+/ul/li
+
 lia href=http://gcc.gnu.org/wiki/TS29113Status;TS 29113/a:
 ul
   liAssumed types (codeTYPE(*)/code) are now supported./li


Re: [PATCH] Fix PR gcov-profile/55734 for bootstrapping with older compilers (issue6980044)

2012-12-20 Thread Jakub Jelinek
On Wed, Dec 19, 2012 at 10:14:26PM -0800, Teresa Johnson wrote:
 Merged this pair into an #elif, but left the outer one (from the IN_LIBGCOV
 check) since it looks clearer.
 
 New patch:
 
 2012-12-19  Teresa Johnson  tejohn...@google.com
 Jakub Jelinek  ja...@redhat.com
 
 PR gcov-profile/55734
 * gcov-io.c (gcov_read_summary): Use __builtin_popcount instead
 of __builtin_popcountll when building libgcov.a, otherwise use
 popcount_hwi.
 (gcov_histo_index): When not building libgcov.a, use floor_log2
 instead of __builtin_clzll.

Okay, thanks.

Jakub


*ping* [patch, libfortran] Fix PR 30162, write with pipes

2012-12-20 Thread Thomas Koenig

Ping?

Thomas


Hi Janus,

Oops, right.  Here is the correct one.

Regards

 Thomas


wrong patch attached? It contains a hunk in frontend-passes.c, which
seems totally unrelated ...

Cheers,
Janus



2012/12/15 Thomas Koenig tkoe...@netcologne.de:

Hello world,

the attached patch fixes the regression and regtests cleanly.
No test case because I could not find anything portable
to create a FIFO in the testsuite.

OK for trunk and 4.7?

 Thomas

2012-12-15  Thomas Koenig  tkoe...@gcc.gnu.org

 PR libfortran/30162
 * io/unix.c (raw_tell):  If the lseek is done on a
 non-seekable file, return 0.









RE: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread Melik-adamyan, Areg
We checked,  no significant gains or losses.

-Original Message-
From: H.J. Lu [mailto:hjl.to...@gmail.com] 
Sent: Friday, December 14, 2012 1:03 AM
To: Jan Hubicka
Cc: Jakub Jelinek; Xinliang David Li; GCC Patches; Teresa Johnson; 
Melik-adamyan, Areg
Subject: Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

On Thu, Dec 13, 2012 at 12:40 PM, Jan Hubicka hubi...@ucw.cz wrote:
  Here we speak about memcpy/memset only.  I never got around to 
  modernize strlen and friends, unfortunately...
 
  memcmp and friends are different beats.  They realy need some TLC...

 memcpy and memset in glibc are also extremely fast.

 The default strategy now is to inline only when the block is known to 
 be small (either constant or via profile feedback, we do not really 
 use the info on upper bound of size of the copied object that would be 
 useful but not readilly available at expansion time).

 You can try the test_stringop script I attached and send me the 
 results.  For

Areg, can you give it a try?  Thanks.

 me libc starts to be win only for rather large blocks (i.e. 8KB)


Which glibc are you using?

--
H.J.


[PATCH] Fix PR55740

2012-12-20 Thread Richard Biener

The following fixes a fixup for loops when merging two basic-blocks.
We didn't handle merging two loop headers well which the following
patch addresses.

LTO bootstrapped (which was broken before this patch) and tested
on x86_64-unknown-linux-gnu, applied.

Richard.

2012-12-20  Richard Biener  rguent...@suse.de

PR middle-end/55740
* cfghooks.c (merge_blocks): Properly handle merging of
two loop headers.

* g++.dg/torture/pr55740.C: New testcase.

Index: gcc/cfghooks.c
===
*** gcc/cfghooks.c  (revision 194610)
--- gcc/cfghooks.c  (working copy)
*** merge_blocks (basic_block a, basic_block
*** 724,734 
  
cfg_hooks-merge_blocks (a, b);
  
-   /* If we merge a loop header into its predecessor, update the loop
-  structure.  */
if (current_loops != NULL)
  {
!   if (b-loop_father-header == b)
{
  remove_bb_from_loops (a);
  add_bb_to_loop  (a, b-loop_father);
--- 724,746 
  
cfg_hooks-merge_blocks (a, b);
  
if (current_loops != NULL)
  {
!   /* If the block we merge into is a loop header do nothing unless ... */
!   if (a-loop_father-header == a)
!   {
! /* ... we merge two loop headers, in which case we kill
!the inner loop.  */
! if (b-loop_father-header == b)
!   {
! b-loop_father-header = NULL;
! b-loop_father-latch = NULL;
! loops_state_set (LOOPS_NEED_FIXUP);
!   }
!   }
!   /* If we merge a loop header into its predecessor, update the loop
!structure.  */
!   else if (b-loop_father-header == b)
{
  remove_bb_from_loops (a);
  add_bb_to_loop  (a, b-loop_father);
Index: gcc/testsuite/g++.dg/torture/pr55740.C
===
*** gcc/testsuite/g++.dg/torture/pr55740.C  (revision 0)
--- gcc/testsuite/g++.dg/torture/pr55740.C  (working copy)
***
*** 0 
--- 1,19 
+ // { dg-do compile }
+ 
+ static bool st_IsPathDelimiter( char c ) { return c == '/'; }
+ bool IsValidPath( char const * filename )
+ {
+   if ( !filename || filename[0] == 0 ) 
+ return false;
+   char const * run = filename;
+   while ( run  *run )   
+ {
+   if ( run[0] == '.' )   
+   if ( run[1] != '.' || ( !st_IsPathDelimiter( run[2] )  run[2] != 0 ) 
)   
+ return false;   
+   while ( *run  !st_IsPathDelimiter( *run ) )
+   ++run;
+   if ( *run ) 
+   ++run;
+ }
+ }


Re: [PATCH] Fix PR55740

2012-12-20 Thread Steven Bosscher
On Thu, Dec 20, 2012 at 1:43 PM, Richard Biener wrote:
 --- 724,746 

 cfg_hooks-merge_blocks (a, b);

 if (current_loops != NULL)
   {
 !   /* If the block we merge into is a loop header do nothing unless ... 
 */
 !   if (a-loop_father-header == a)
 !   {
 ! /* ... we merge two loop headers, in which case we kill
 !the inner loop.  */

Before loops were maintained, we'd simply re-discover the nested
loops. Do we now lose this information?

How about adjusting the can_merge_blocks hook to reject merging loop headers?

Ciao!
Steven


Re: [PATCH] Fix PR55740

2012-12-20 Thread Richard Biener
On Thu, 20 Dec 2012, Steven Bosscher wrote:

 On Thu, Dec 20, 2012 at 1:43 PM, Richard Biener wrote:
  --- 724,746 
 
  cfg_hooks-merge_blocks (a, b);
 
  if (current_loops != NULL)
{
  !   /* If the block we merge into is a loop header do nothing unless 
  ... */
  !   if (a-loop_father-header == a)
  !   {
  ! /* ... we merge two loop headers, in which case we kill
  !the inner loop.  */
 
 Before loops were maintained, we'd simply re-discover the nested
 loops. Do we now lose this information?

When we merge loop headers we effectively merge two loops (in this
case cross-jumping merged the latches and the headers).  We re-discover
the nest when multiple latches remain and we disambiguate loops
with multiple latches.

 How about adjusting the can_merge_blocks hook to reject merging loop headers?

I didn't want to do that, but sure - that's another possibility.  In
this case the outer loop is really unnecessary.

Richard.


Re: [Patch, wwwdocs] Update Fortran part of the GCC 4.8 release notes

2012-12-20 Thread Paul Richard Thomas
Dear Tobias,

Could you note that class(*) is complete up to the restriction to
fixed length character values only?

Thanks

Paul

On 20 December 2012 10:55, Tobias Burnus bur...@net-b.de wrote:
 The following patch updates the Fortran part of the GCC 4.8 release notes at
 http://gcc.gnu.org/gcc-4.8/changes.html#fortran

 It adds quips for
 - CLASS(*)
 - The new BACKTRACE intrinsic
 - A compatibility notice

 I would like if someone could comment on the latter. I think it is time to
 explicitly inform about compatibility issues with gfortran. So far, smaller
 ABI changes were done all the time [affecting very special cases or very
 experimental features] and the .mod version was different in every release.

 (The smaller ABI changes were related to code which required modules, hence,
 the .mod version change forced users to re-compile. In fact, the .mod
 version change in 4.8 has just be done to force recompilation.* Thus, the
 past ABI breakages were and this ABI breakage is very unlikely to lead to
 run-time/link-time issues.)

 Comments? Suggestions?

 Tobias

 * The background for 4.8's ABI changes were: The module name was missing
 from module-defined procedure-pointer variables, leading to a potential
 naming clash with same-name variables in different modules. And the
 deferred-length string ABI was changed as some systems didn't like a . in
 the assembler name of a variable.



-- 
The knack of flying is learning how to throw yourself at the ground and miss.
   --Hitchhikers Guide to the Galaxy


[PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Richard Biener

In the PR we perform expression replacement of an FP operation
across a builtin call that sets the FP control register.  This
patch restricts replacement across calls further, from allowing
all builtins to only allowing those without side-effects.

Allowing replacement over calls at all was to not pessimize
FP code generation for example for sqrt which is most often
expanded to a single instruction.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Comments?

Thanks,
Richard.

2012-12-20  Richard Biener  rguent...@suse.de

PR middle-end/55752
* tree-ssa-ter.c (find_replaceable_in_bb): Only allow replacing
across calls with no side-effects.

Index: gcc/tree-ssa-ter.c
===
*** gcc/tree-ssa-ter.c  (revision 194632)
--- gcc/tree-ssa-ter.c  (working copy)
*** find_replaceable_in_bb (temp_expr_table_
*** 681,692 
kill_expr (tab, partition);
}
  
!   /* Increment counter if this is a non BUILT_IN call. We allow
!replacement over BUILT_IN calls since many will expand to inline
!insns instead of a true call.  */
if (is_gimple_call (stmt)
!  !((fndecl = gimple_call_fndecl (stmt))
!   DECL_BUILT_IN (fndecl)))
cur_call_cnt++;
  
/* Now see if we are creating a new expression or not.  */
--- 681,693 
kill_expr (tab, partition);
}
  
!   /* Increment counter if this is not a BUILT_IN call without
!side-effects.  We allow replacement over BUILT_IN calls
!since many will expand to inline insns instead of a true call.  */
if (is_gimple_call (stmt)
!  (!((fndecl = gimple_call_fndecl (stmt))
!DECL_BUILT_IN (fndecl))
! || gimple_has_side_effects (stmt)))
cur_call_cnt++;
  
/* Now see if we are creating a new expression or not.  */


Re: [PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Jakub Jelinek
On Thu, Dec 20, 2012 at 02:51:55PM +0100, Richard Biener wrote:
 In the PR we perform expression replacement of an FP operation
 across a builtin call that sets the FP control register.  This
 patch restricts replacement across calls further, from allowing
 all builtins to only allowing those without side-effects.
 
 Allowing replacement over calls at all was to not pessimize
 FP code generation for example for sqrt which is most often
 expanded to a single instruction.
 
 Bootstrap and regtest running on x86_64-unknown-linux-gnu.
 
 Comments?

Wouldn't it be better to have there a list of known builtins over which it
is fine to do TER?  I'd bet most of memory or string builtins that don't
call malloc/free should be still ok, but they surely have side-effects.

 2012-12-20  Richard Biener  rguent...@suse.de
 
   PR middle-end/55752
   * tree-ssa-ter.c (find_replaceable_in_bb): Only allow replacing
   across calls with no side-effects.

Jakub


Re: [PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Richard Biener
On Thu, 20 Dec 2012, Jakub Jelinek wrote:

 On Thu, Dec 20, 2012 at 02:51:55PM +0100, Richard Biener wrote:
  In the PR we perform expression replacement of an FP operation
  across a builtin call that sets the FP control register.  This
  patch restricts replacement across calls further, from allowing
  all builtins to only allowing those without side-effects.
  
  Allowing replacement over calls at all was to not pessimize
  FP code generation for example for sqrt which is most often
  expanded to a single instruction.
  
  Bootstrap and regtest running on x86_64-unknown-linux-gnu.
  
  Comments?
 
 Wouldn't it be better to have there a list of known builtins over which it
 is fine to do TER?  I'd bet most of memory or string builtins that don't
 call malloc/free should be still ok, but they surely have side-effects.

I'm not sure - the original reason was that replacing across calls
made us spill more because there was a call.  We agreed that replacing
across calls isn't usually a good idea but put in the (admittedly bad)
workaround to still allow doing so across likely-not-calls.
string builtins generally will expand to calls though.

I was thinking of even making it stronger and increment cur_call_cnt
when the stmt (even non-call) has side-effects (would for example
cover volatile asms or general volatile touching insns).

Richard.

  2012-12-20  Richard Biener  rguent...@suse.de
  
  PR middle-end/55752
  * tree-ssa-ter.c (find_replaceable_in_bb): Only allow replacing
  across calls with no side-effects.
 
   Jakub
 
 

-- 
Richard Biener rguent...@suse.de
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend


Re: [PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Richard Biener
On Thu, 20 Dec 2012, Richard Biener wrote:

 On Thu, 20 Dec 2012, Jakub Jelinek wrote:
 
  On Thu, Dec 20, 2012 at 02:51:55PM +0100, Richard Biener wrote:
   In the PR we perform expression replacement of an FP operation
   across a builtin call that sets the FP control register.  This
   patch restricts replacement across calls further, from allowing
   all builtins to only allowing those without side-effects.
   
   Allowing replacement over calls at all was to not pessimize
   FP code generation for example for sqrt which is most often
   expanded to a single instruction.
   
   Bootstrap and regtest running on x86_64-unknown-linux-gnu.
   
   Comments?
  
  Wouldn't it be better to have there a list of known builtins over which it
  is fine to do TER?  I'd bet most of memory or string builtins that don't
  call malloc/free should be still ok, but they surely have side-effects.

Btw, it would need to be a target specific list as most xmm intrinsic
builtins are fine to replace over.

Richard.


Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread H.J. Lu
On Thu, Dec 20, 2012 at 4:13 AM, Melik-adamyan, Areg
areg.melik-adam...@intel.com wrote:
 We checked,  no significant gains or losses.

 -Original Message-
 From: H.J. Lu [mailto:hjl.to...@gmail.com]
 Sent: Friday, December 14, 2012 1:03 AM
 To: Jan Hubicka
 Cc: Jakub Jelinek; Xinliang David Li; GCC Patches; Teresa Johnson; 
 Melik-adamyan, Areg
 Subject: Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

 On Thu, Dec 13, 2012 at 12:40 PM, Jan Hubicka hubi...@ucw.cz wrote:
  Here we speak about memcpy/memset only.  I never got around to
  modernize strlen and friends, unfortunately...
 
  memcmp and friends are different beats.  They realy need some TLC...

 memcpy and memset in glibc are also extremely fast.

 The default strategy now is to inline only when the block is known to
 be small (either constant or via profile feedback, we do not really
 use the info on upper bound of size of the copied object that would be
 useful but not readilly available at expansion time).

 You can try the test_stringop script I attached and send me the
 results.  For

 Areg, can you give it a try?  Thanks.


Hi Areg,

Did you mean inlined memcpy/memset are as fast as
the ones in libc.so on both ia32 and Intel64?

Please keep in mind that memcpy/memset in libc.a
may not be optimized.  You must not use -static for
linking.

-- 
H.J.


Re: [PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Richard Biener
On Thu, 20 Dec 2012, Richard Biener wrote:

 On Thu, 20 Dec 2012, Jakub Jelinek wrote:
 
  On Thu, Dec 20, 2012 at 02:51:55PM +0100, Richard Biener wrote:
   In the PR we perform expression replacement of an FP operation
   across a builtin call that sets the FP control register.  This
   patch restricts replacement across calls further, from allowing
   all builtins to only allowing those without side-effects.
   
   Allowing replacement over calls at all was to not pessimize
   FP code generation for example for sqrt which is most often
   expanded to a single instruction.
   
   Bootstrap and regtest running on x86_64-unknown-linux-gnu.
   
   Comments?
  
  Wouldn't it be better to have there a list of known builtins over which it
  is fine to do TER?  I'd bet most of memory or string builtins that don't
  call malloc/free should be still ok, but they surely have side-effects.
 
 I'm not sure - the original reason was that replacing across calls
 made us spill more because there was a call.  We agreed that replacing
 across calls isn't usually a good idea but put in the (admittedly bad)
 workaround to still allow doing so across likely-not-calls.
 string builtins generally will expand to calls though.
 
 I was thinking of even making it stronger and increment cur_call_cnt
 when the stmt (even non-call) has side-effects (would for example
 cover volatile asms or general volatile touching insns).

Like so:

Index: gcc/tree-ssa-ter.c
===
--- gcc/tree-ssa-ter.c  (revision 194632)
+++ gcc/tree-ssa-ter.c  (working copy)
@@ -681,12 +681,13 @@ find_replaceable_in_bb (temp_expr_table_
kill_expr (tab, partition);
}
 
-  /* Increment counter if this is a non BUILT_IN call. We allow
-replacement over BUILT_IN calls since many will expand to inline
-insns instead of a true call.  */
-  if (is_gimple_call (stmt)
-  !((fndecl = gimple_call_fndecl (stmt))
-   DECL_BUILT_IN (fndecl)))
+  /* Increment counter if this is not a BUILT_IN call or a stmt with
+side-effects.  We allow replacement over BUILT_IN calls
+since many will expand to inline insns instead of a true call.  
*/
+  if (gimple_has_side_effects (stmt)
+ || (is_gimple_call (stmt)
+  !((fndecl = gimple_call_fndecl (stmt))
+   DECL_BUILT_IN (fndecl
cur_call_cnt++;
 
   /* Now see if we are creating a new expression or not.  */

Richard.


[patch] fix libstdc++/55741 - use Sleep on mingw

2012-12-20 Thread Jonathan Wakely
PR libstdc++/55741
* acinclude.m4 (GLIBCXX_ENABLE_LIBSTDCXX_TIME): Check for Sleep.
* config.h.in: Regenerate.
* configure: Regenerate.
* src/c++11/thread.cc (__sleep_for): Use Sleep if available.

Tested by Kai (thanks), committed to trunk.
commit 1149c65a987eba50ad0138a48729b020e7d8d0bd
Author: Jonathan Wakely jwakely@gmail.com
Date:   Thu Dec 20 14:29:54 2012 +

PR libstdc++/55741
* acinclude.m4 (GLIBCXX_ENABLE_LIBSTDCXX_TIME): Check for Sleep.
* config.h.in: Regenerate.
* configure: Regenerate.
* src/c++11/thread.cc (__sleep_for): Use Sleep if available.

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 281ee7e..2d4d7f0 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -1301,6 +1301,17 @@ AC_DEFUN([GLIBCXX_ENABLE_LIBSTDCXX_TIME], [
   AC_MSG_RESULT($ac_has_usleep)
   fi
 
+  if test x$ac_has_nanosleep$ac_has_sleep = xnono; then
+  AC_MSG_CHECKING([for Sleep])
+  AC_TRY_COMPILE([#include windows.h],
+ [Sleep(1)],
+ [ac_has_win32_sleep=yes],[ac_has_win32_sleep=no])
+  if test x$ac_has_win32_sleep = xyes; then
+AC_DEFINE(HAVE_WIN32_SLEEP,1, [Defined if Sleep exists.])
+  fi
+  AC_MSG_RESULT($ac_has_win32_sleep)
+  fi
+
   AC_SUBST(GLIBCXX_LIBS)
 
   CXXFLAGS=$ac_save_CXXFLAGS
diff --git a/libstdc++-v3/src/c++11/thread.cc b/libstdc++-v3/src/c++11/thread.cc
index fa86a1b..b04e6dc 100644
--- a/libstdc++-v3/src/c++11/thread.cc
+++ b/libstdc++-v3/src/c++11/thread.cc
@@ -61,6 +61,8 @@ static inline int get_nprocs()
 #ifndef _GLIBCXX_USE_NANOSLEEP
 # ifdef _GLIBCXX_HAVE_SLEEP
 #  include unistd.h
+# elif defined(_GLIBCXX_HAVE_WIN32_SLEEP)
+#  include windows.h
 # else
 #  error No sleep function known for this target
 # endif
@@ -170,9 +172,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
static_castlong(__ns.count())
   };
 ::nanosleep(__ts, 0);
-#else
-# ifdef _GLIBCXX_HAVE_SLEEP
-#  ifdef _GLIBCXX_HAVE_USLEEP
+#elif defined(_GLIBCXX_HAVE_SLEEP)
+# ifdef _GLIBCXX_HAVE_USLEEP
 ::sleep(__s.count());
 if (__ns.count()  0)
   {
@@ -181,10 +182,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __us = 1;
 ::usleep(__us);
   }
-#  else
+# else
 ::sleep(__s.count() + (__ns = 100));
-#  endif
 # endif
+#elif defined(_GLIBCXX_HAVE_WIN32_SLEEP)
+unsigned long ms = __ns.count() / 100;
+if (__ns.count()  0  ms == 0)
+  ms = 1;
+::Sleep(chrono::milliseconds(__s).count() + ms);
 #endif
   }
 


Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread Jan Hubicka
 Hi Areg,
 
 Did you mean inlined memcpy/memset are as fast as
 the ones in libc.so on both ia32 and Intel64?

I would be interested in output of the stringop script.
 
 Please keep in mind that memcpy/memset in libc.a
 may not be optimized.  You must not use -static for
 linking.

In my setup I use dynamic linking...
(this is quite anoying property in general - people tend to use --static for
performance critical binaries to save expenses of PIC.  It would be really cool
to have way to call proper stringops based on -march switch)

Honza
 
 -- 
 H.J.


Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread Jan Hubicka
  Hi Areg,
  
  Did you mean inlined memcpy/memset are as fast as
  the ones in libc.so on both ia32 and Intel64?
 
 I would be interested in output of the stringop script.

Also as far as I can remember, none of spec2k6 benchmarks is really stringop
bound.  On Spec2k GCC was quite bound by memset (within alloc_rtx and bitmap
oprations) but mostly by collecting page faults there.  Inlining that one made
quite a lot of difference on K8 hardware, but not on later chips.

Honza


Re: [PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Richard Biener
On Thu, 20 Dec 2012, Richard Biener wrote:

 On Thu, 20 Dec 2012, Richard Biener wrote:
 
  On Thu, 20 Dec 2012, Jakub Jelinek wrote:
  
   On Thu, Dec 20, 2012 at 02:51:55PM +0100, Richard Biener wrote:
In the PR we perform expression replacement of an FP operation
across a builtin call that sets the FP control register.  This
patch restricts replacement across calls further, from allowing
all builtins to only allowing those without side-effects.

Allowing replacement over calls at all was to not pessimize
FP code generation for example for sqrt which is most often
expanded to a single instruction.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Comments?
   
   Wouldn't it be better to have there a list of known builtins over which it
   is fine to do TER?  I'd bet most of memory or string builtins that don't
   call malloc/free should be still ok, but they surely have side-effects.
  
  I'm not sure - the original reason was that replacing across calls
  made us spill more because there was a call.  We agreed that replacing
  across calls isn't usually a good idea but put in the (admittedly bad)
  workaround to still allow doing so across likely-not-calls.
  string builtins generally will expand to calls though.
  
  I was thinking of even making it stronger and increment cur_call_cnt
  when the stmt (even non-call) has side-effects (would for example
  cover volatile asms or general volatile touching insns).

After discussing on IRC I am testing the following which adds
a target hook and just treats ldmxcsr and stmxcsr differently
as well as all volatile asms and internal functions.

Bootstrap  regtest on x86_64-unknown-linux-gnu running.

Ok for trunk?

Thanks,
Richard.

2012-12-20  Richard Biener  rguent...@suse.de

PR middle-end/55752
* target.def (sched): Add scheduling_barrier_p.
* targhooks.c (default_scheduling_barrier_p): New function.
* targhooks.h (default_scheduling_barrier_p): Declare.
* doc/tm.texi.in (TARGET_SCHED_SCHEDULING_BARRIER_P): Add.
* doc/tm.texi: Update.
* tree-ssa-ter.c: Include target.h.
(find_replaceable_in_bb): Do not schedule across volatile
asms or stmts the target thinks are scheduling barriers.
Do not treat internal functions as scheduling barrier by default.
* i386/i386.c (TARGET_SCHED_SCHEDULING_BARRIER_P): Override.
(ix86_scheduling_barrier_p): New function.  Handle
IX86_BUILTIN_LDMXCSR and IX86_BUILTIN_STMXCSR.
* Makefile.in (tree-ssa-ter.o): Add $(TARGET_H) dependency.

Index: gcc/target.def
===
*** gcc/target.def  (revision 194632)
--- gcc/target.def  (working copy)
*** parallelism required in output calculati
*** 939,944 
--- 939,954 
  int, (unsigned int opc, enum machine_mode mode),
  hook_int_uint_mode_1)
  
+ /* The following member value is a function that returns whether
+the statement is considered a barrier for scheduling.  By default
+this returns false.  */
+ DEFHOOK
+ (scheduling_barrier_p,
+ This hook is called by TER to determine whether the statement is\n\
+ a scheduling barrier.,
+ bool, (gimple stmt),
+ default_scheduling_barrier_p)
+ 
  HOOK_VECTOR_END (sched)
  
  /* Functions relating to vectorization.  */
Index: gcc/targhooks.c
===
*** gcc/targhooks.c (revision 194632)
--- gcc/targhooks.c (working copy)
*** default_canonicalize_comparison (int *,
*** 1547,1550 
--- 1547,1557 
  {
  }
  
+ /* Default version of scheduling_barrier_p.  */
+ bool
+ default_scheduling_barrier_p (gimple)
+ {
+   return false;
+ }
+ 
  #include gt-targhooks.h
Index: gcc/targhooks.h
===
*** gcc/targhooks.h (revision 194632)
--- gcc/targhooks.h (working copy)
*** extern const char *default_pch_valid_p (
*** 195,197 
--- 195,199 
  extern void default_asm_output_ident_directive (const char*);
  
  extern bool default_member_type_forces_blk (const_tree, enum machine_mode);
+ 
+ extern bool default_scheduling_barrier_p (gimple);
Index: gcc/doc/tm.texi.in
===
*** gcc/doc/tm.texi.in  (revision 194632)
--- gcc/doc/tm.texi.in  (working copy)
*** in its second parameter.
*** 6737,6742 
--- 6737,6744 
  
  @hook TARGET_SCHED_REASSOCIATION_WIDTH
  
+ @hook TARGET_SCHED_SCHEDULING_BARRIER_P
+ 
  @node Sections
  @section Dividing the Output into Sections (Texts, Data, @dots{})
  @c the above section title is WAY too long.  maybe cut the part between
Index: gcc/doc/tm.texi
===
*** gcc/doc/tm.texi (revision 194632)
--- gcc/doc/tm.texi (working copy)
*** This hook is 

Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread H.J. Lu
On Thu, Dec 20, 2012 at 7:06 AM, Jan Hubicka hubi...@ucw.cz wrote:
  Hi Areg,
 
  Did you mean inlined memcpy/memset are as fast as
  the ones in libc.so on both ia32 and Intel64?

 I would be interested in output of the stringop script.

 Also as far as I can remember, none of spec2k6 benchmarks is really stringop
 bound.  On Spec2k GCC was quite bound by memset (within alloc_rtx and bitmap
 oprations) but mostly by collecting page faults there.  Inlining that one made
 quite a lot of difference on K8 hardware, but not on later chips.


There is a GCC performance regression bug on EEMBC.  It turns out
that -static was used for linking and optimized memory functions weren't
used.  Remove -static fixed the performance regression.

-- 
H.J.


Re: [Patch, wwwdocs] Update Fortran part of the GCC 4.8 release notes

2012-12-20 Thread Tobias Burnus

Dear Paul,

Paul Richard Thomas wrote:

Could you note that class(*) is complete up to the restriction to fixed length 
character values only?


Done. See http://gcc.gnu.org/gcc-4.8/changes.html#fortran and 
http://gcc.gnu.org/wiki/GFortran#GCC4.8


I admit that the BACKTRACE announcement is slightly premature, but I 
assume that Janus will commit the patch very soon.


Tobias


Re: [PATCH] Fix combined tree for LTO

2012-12-20 Thread Thomas Schwinge
Hi!

On Sat, 10 Nov 2012 10:32:07 -0800, Andrew Pinski 
andrew.pin...@caviumnetworks.com wrote:
 2012-11-10  Andrew Pinski  apin...@cavium.com
 
 PR bootstrap/55202
 * configure.ac: Set PLUGIN_LD_SUFFIX to just ld if it was ld-new
 or collect-ld.
 * configure: Regenerate.

 Index: configure.ac
 ===
 --- configure.ac  (revision 193392)
 +++ configure.ac  (working copy)
 @@ -2003,6 +2003,12 @@ fi])
  
  ORIGINAL_PLUGIN_LD_FOR_TARGET=$gcc_cv_ld
  PLUGIN_LD_SUFFIX=`basename $gcc_cv_ld | sed -e s,$target_alias-,,`
 +# if the PLUGIN_LD is set ld-new, just have it as ld
 +# as that is the installed named.
 +if test x$PLUGIN_LD_SUFFIX == xld-new \
 +   || test x$PLUGIN_LD_SUFFIX == xcollect-ld ; then
 +  PLUGIN_LD_SUFFIX=ld
 +fi

Using dash, this caused:

checking for ld... /usr/bin/ld
[...]/gcc/configure: 21384: test: xld: unexpected operator
[...]/gcc/configure: 21385: test: xld: unexpected operator
checking whether we are using gold... no

Fixed in r194637:

PR bootstrap/55202
* configure.ac PLUGIN_LD_SUFFIX: Use POSIX shell syntax.
* configure: Regenerate.

diff --git gcc/configure.ac gcc/configure.ac
index c6f57bd..7abe7cf 100644
--- gcc/configure.ac
+++ gcc/configure.ac
@@ -2031,8 +2031,8 @@ ORIGINAL_PLUGIN_LD_FOR_TARGET=$gcc_cv_ld
 PLUGIN_LD_SUFFIX=`basename $gcc_cv_ld | sed -e s,$target_alias-,,`
 # if the PLUGIN_LD is set ld-new, just have it as ld
 # as that is the installed named.
-if test x$PLUGIN_LD_SUFFIX == xld-new \
-   || test x$PLUGIN_LD_SUFFIX == xcollect-ld ; then
+if test x$PLUGIN_LD_SUFFIX = xld-new \
+   || test x$PLUGIN_LD_SUFFIX = xcollect-ld ; then
   PLUGIN_LD_SUFFIX=ld
 fi
 AC_ARG_WITH(plugin-ld,


Grüße,
 Thomas


pgpU1wMIs6dcN.pgp
Description: PGP signature


Fix PR55761

2012-12-20 Thread Paulo Matos
2012-12-20 Paulo Matos pma...@broadcom.com

PR tree-optimization/55761
* tree-tailcall.c (process_assignment): Use build_int_cst only for 
integral types,
for every other type that managed to pass all conditions use 
fold_build1.





pr55761.patch
Description: pr55761.patch


Re: Fix PR55761

2012-12-20 Thread Richard Biener
On Thu, Dec 20, 2012 at 5:06 PM, Paulo Matos pma...@broadcom.com wrote:
 2012-12-20 Paulo Matos pma...@broadcom.com

 PR tree-optimization/55761
 * tree-tailcall.c (process_assignment): Use build_int_cst only for 
 integral types,
 for every other type that managed to pass all conditions use 
 fold_build1.

 case NEGATE_EXPR:
   if (FLOAT_TYPE_P (TREE_TYPE (op0)))
 *m = build_real (TREE_TYPE (op0), dconstm1);
+  else if (INTEGRAL_TYPE_P (TREE_TYPE (non_ass_var)))
+*m = build_int_cst (TREE_TYPE (non_ass_var), -1);
   else
-*m = build_int_cst (TREE_TYPE (op0), -1);
+*m = fold_build1 (NEGATE_EXPR, TREE_TYPE (non_ass_var), non_ass_var);

looks bogus (op0 vs. non_ass_var).  I'd rather use fold_unary here as I'm not
sure if callers handle a NEGATE_EXPR in *m.  And I'd use that unconditionally,
this last case looks like it will have very weak testing coverage.  Thus,

   *m = fold_unary (NEGATE_EXPR, TREE_TYPE (op0), op0);

and also in the MINUS_EXPR case.

Richard.


Re: [google 4.7] atomic update of profile counters (issue6965050)

2012-12-20 Thread Jan Hubicka
 On Wed, Dec 19, 2012 at 4:29 PM, Andrew Pinski pins...@gmail.com wrote:
 
  On Wed, Dec 19, 2012 at 12:08 PM, Rong Xu x...@google.com wrote:
   Hi,
  
   This patch adds the supprot of atomic update the profile counters.
   Tested with google internal benchmarks and fdo kernel build.
 
  I think you should use the __atomic_ functions instead of __sync_
  functions as they allow better performance for simple counters as you
  can use __ATOMIC_RELAXED.
 
 You are right. I think __ATOMIC_RELAXED should be OK here.
 Thanks for the suggestion.
 
 
  And this would be useful for the trunk also.  I was going to implement
  this exact thing this week but some other important stuff came up.
 
 I'll post trunk patch later.

Yes, I like that patch, too. Even if the costs are quite high (and this is why
atomic updates was sort of voted down in the past) the alternative of using TLS
has problems with too-much per-thread memory.

While there are even more alternatives, like recording the changes and
commmiting them in blocks (say at function return), I guess some solution is
better than no solution.

Thanks,
Honza


RE: Fix PR55761

2012-12-20 Thread Paulo Matos
 -Original Message-
 From: Richard Biener [mailto:richard.guent...@gmail.com]
 Sent: 20 December 2012 16:13
 To: Paulo Matos
 Cc: gcc-patches@gcc.gnu.org
 Subject: Re: Fix PR55761
 
 On Thu, Dec 20, 2012 at 5:06 PM, Paulo Matos pma...@broadcom.com wrote:
  2012-12-20 Paulo Matos pma...@broadcom.com
 
  PR tree-optimization/55761
  * tree-tailcall.c (process_assignment): Use build_int_cst only for
 integral types,
  for every other type that managed to pass all conditions use
 fold_build1.
 
  case NEGATE_EXPR:
if (FLOAT_TYPE_P (TREE_TYPE (op0)))
  *m = build_real (TREE_TYPE (op0), dconstm1);
 +  else if (INTEGRAL_TYPE_P (TREE_TYPE (non_ass_var)))
 +*m = build_int_cst (TREE_TYPE (non_ass_var), -1);
else
 -*m = build_int_cst (TREE_TYPE (op0), -1);
 +*m = fold_build1 (NEGATE_EXPR, TREE_TYPE (non_ass_var),
 non_ass_var);
 
 looks bogus (op0 vs. non_ass_var). 

Correct. My mistake applying same MINUS_EXPR pattern to NEGATE_EXPR case.

 I'd rather use fold_unary here as I'm not
 sure if callers handle a NEGATE_EXPR in *m.  And I'd use that
 unconditionally,
 this last case looks like it will have very weak testing coverage.  Thus,
 
*m = fold_unary (NEGATE_EXPR, TREE_TYPE (op0), op0);
 
 and also in the MINUS_EXPR case.
 

Sounds reasonable. That would simplify it, it seems. Will fix patch and replace 
it in PR.

 Richard.



Re: [google 4.7] atomic update of profile counters (issue6965050)

2012-12-20 Thread Andrew Pinski
On Thu, Dec 20, 2012 at 8:20 AM, Jan Hubicka hubi...@ucw.cz wrote:
 On Wed, Dec 19, 2012 at 4:29 PM, Andrew Pinski pins...@gmail.com wrote:
 
  On Wed, Dec 19, 2012 at 12:08 PM, Rong Xu x...@google.com wrote:
   Hi,
  
   This patch adds the supprot of atomic update the profile counters.
   Tested with google internal benchmarks and fdo kernel build.
 
  I think you should use the __atomic_ functions instead of __sync_
  functions as they allow better performance for simple counters as you
  can use __ATOMIC_RELAXED.

 You are right. I think __ATOMIC_RELAXED should be OK here.
 Thanks for the suggestion.

 
  And this would be useful for the trunk also.  I was going to implement
  this exact thing this week but some other important stuff came up.

 I'll post trunk patch later.

 Yes, I like that patch, too. Even if the costs are quite high (and this is why
 atomic updates was sort of voted down in the past) the alternative of using 
 TLS
 has problems with too-much per-thread memory.

Actually sometimes (on some processors) atomic increments are cheaper
than doing a regular incremental.  Mainly because there is an
instruction which can handle it in the L2 cache rather than populating
the L1.   Octeon is one such processor where this is true.

Thanks,
Andrew Pinski


 While there are even more alternatives, like recording the changes and
 commmiting them in blocks (say at function return), I guess some solution is
 better than no solution.

 Thanks,
 Honza


[PATCH] Fix postincrement/decrement of a bitfield (PR middle-end/55750)

2012-12-20 Thread Jakub Jelinek
Hi!

As the following testcase shows, the !is_gimple_min_lval code would for bit
fields want to take address of those bitfields and dereference it, which of
course leads to ICEs.

As discussed with Richard on IRC, this code is not needed at all since
PR48814 fix, so there is no need to teach it about bitfields and instead it
can be just removed altogether.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2012-12-20  Jakub Jelinek  ja...@redhat.com

PR middle-end/55750
* gimplify.c (gimplify_self_mod_expr): Don't force lvalue to
pass is_gimple_min_lval.

* gcc.c-torture/execute/pr55750.c: New test.

--- gcc/gimplify.c.jj   2012-12-20 11:38:45.0 +0100
+++ gcc/gimplify.c  2012-12-20 14:45:42.586627882 +0100
@@ -2391,25 +2391,15 @@ gimplify_self_mod_expr (tree *expr_p, gi
   rhs = TREE_OPERAND (*expr_p, 1);
 
   /* For postfix operator, we evaluate the LHS to an rvalue and then use
- that as the result value and in the postqueue operation.  We also
- make sure to make lvalue a minimal lval, see
- gcc.c-torture/execute/20040313-1.c for an example where this matters.  */
+ that as the result value and in the postqueue operation.  */
   if (postfix)
 {
-  if (!is_gimple_min_lval (lvalue))
-   {
- mark_addressable (lvalue);
- lvalue = build_fold_addr_expr_loc (input_location, lvalue);
- gimplify_expr (lvalue, pre_p, post_p, is_gimple_val, fb_rvalue);
- lvalue = build_fold_indirect_ref_loc (input_location, lvalue);
-   }
   ret = gimplify_expr (lhs, pre_p, post_p, is_gimple_val, fb_rvalue);
   if (ret == GS_ERROR)
return ret;
-}
 
-  if (postfix)
-lhs = get_initialized_tmp_var (lhs, pre_p, NULL);
+  lhs = get_initialized_tmp_var (lhs, pre_p, NULL);
+}
 
   /* For POINTERs increment, use POINTER_PLUS_EXPR.  */
   if (POINTER_TYPE_P (TREE_TYPE (lhs)))
--- gcc/testsuite/gcc.c-torture/execute/pr55750.c.jj2012-12-20 
14:24:03.487344949 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr55750.c   2012-12-20 
14:25:10.0 +0100
@@ -0,0 +1,29 @@
+/* PR middle-end/55750 */
+
+extern void abort (void);
+
+struct S
+{
+  int m : 1;
+  int n : 7;
+} arr[2];
+
+__attribute__((noinline, noclone)) void
+foo (unsigned i)
+{
+  arr[i].n++;
+}
+
+int
+main ()
+{
+  arr[0].m = -1;
+  arr[0].n = (1  6) - 1;
+  arr[1].m = 0;
+  arr[1].n = -1;
+  foo (0);
+  foo (1);
+  if (arr[0].m != -1 || arr[0].n != -(1  6) || arr[1].m != 0 || arr[1].n != 
0)
+abort ();
+  return 0;
+}

Jakub


Re: [Patch, wwwdocs] Update Fortran part of the GCC 4.8 release notes

2012-12-20 Thread Janus Weil
 I admit that the BACKTRACE announcement is slightly premature, but I assume
 that Janus will commit the patch very soon.

yes, it's only a matter of a few hours now ;)

Cheers,
Janus


Re: [PATCH] Use new dump scheme to emit loop unroll/peel summary info (issue6941070)

2012-12-20 Thread Teresa Johnson
On Thu, Dec 20, 2012 at 1:21 AM, Bernhard Reutner-Fischer
rep.dot@gmail.com wrote:

Thanks for your comments. Responses inlined below, and new patch include below.

 On Mon, Dec 17, 2012 at 10:44:59PM -0800, Teresa Johnson wrote:
Index: tree-ssa-loop-ivcanon.c
===
--- tree-ssa-loop-ivcanon.c(revision 194516)
+++ tree-ssa-loop-ivcanon.c(working copy)
@@ -639,22 +639,24 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,

 /* Tries to unroll LOOP completely, i.e. NITER times.
UL determines which loops we are allowed to unroll.
-   EXIT is the exit of the loop that should be eliminated.
+   EXIT is the exit of the loop that should be eliminated.
MAXITER specfy bound on number of iterations, -1 if it is
-   not known or too large for HOST_WIDE_INT.  */
+   not known or too large for HOST_WIDE_INT. The location
+   LOCUS corresponding to the loop is used when emitting
+   a summary of the unroll to the dump file.  */

 static bool
 try_unroll_loop_completely (struct loop *loop,
   edge exit, tree niter,
   enum unroll_level ul,
-  HOST_WIDE_INT maxiter)
+  HOST_WIDE_INT maxiter,
+location_t locus)

 whitespace damage?

This and the other location you pointed out below as possible
whitespace damage are because the surrounding lines use tab characters
whereas mine uses spaces. Is there a guideline on which one is correct
for gcc? I looked in the style guide but didn't find anything. The
existing code uses a mix of indentation via tabs and spaces. I have
fixed this location and the one you point out below to use a tab
character so that the diff goes away, but I haven't searched the patch
exhaustively for similar issues.


Index: loop-unroll.c
===
--- loop-unroll.c  (revision 194516)
+++ loop-unroll.c  (working copy)
@@ -148,6 +148,61 @@ static void combine_var_copies_in_loop_exit (struc
basic_block);
 static rtx get_expansion (struct var_to_expand *);

+/* Emit a message summarizing the unroll or peel that will be
+   performed for LOOP, along with the loop's location LOCUS, if
+   appropriate given the dump or -fopt-info settings.  */
+
+static void
+report_unroll_peel(struct loop *loop, location_t locus)

 missing space before (

 contrib/check_GNU_style.sh generally says:
 Dot, space, space, new sentence.
 loop-dump.01.patch:223:+   not known or too large for HOST_WIDE_INT. The 
 location
 loop-dump.01.patch:514:+   * of the for or while statement, if possible. To 
 do this, look

 Dot, space, space, end of comment.
 loop-dump.01.patch:504:+/* Return location corresponding to the loop control 
 condition if possible. */
 loop-dump.01.patch:541:+  /* Next check the latch, to see if it is non-empty. 
 *
 loop-dump.01.patch:555:+  /* If all else fails, simply return the current 
 function location. */

 There should be exactly one space between function name and parentheses.
 loop-dump.01.patch:329:+report_unroll_peel(struct loop *loop, location_t 
 locus)
 loop-dump.01.patch:386:+  location_t locus = get_loop_location(loop);
 loop-dump.01.patch:404:+  report_unroll_peel(loop, locus);
 loop-dump.01.patch:412:+  location_t locus = get_loop_location(loop);
 loop-dump.01.patch:429:+  report_unroll_peel(loop, locus);
 loop-dump.01.patch:533:+  if ((exit = single_exit(loop)))

I fixed all these and verified that check_GNU_style.sh no longer reports these.


@@ -248,6 +305,7 @@ peel_loops_completely (int flags)

   if (loop-lpt_decision.decision == LPT_PEEL_COMPLETELY)
   {
+  report_unroll_peel(loop, locus);
 peel_loop_completely (loop);

 whitespace damage? You seem to have this kind of whitespace error
 throughout the patch. I take it you are aware of
 http://gcc.gnu.org/wiki/FormattingCodeForGCC
 and just forgot to have it on the machine you edited?

This was the same issue described above (tab vs space). As noted
above, I fixed this instance too, but there may be others and I'm not
sure what is required or correct.


 I seemingly have
 $ cat ~/.vim/gcc_style.vim
  put this plugin into ~/.vim/gcc_style.vim and source it into your ~/.vimrc 
 via
  source ~/.vim/gcc_style.vim
 if exists(g:loaded_gcc_style) || cp
   finish
 endif
 let g:loaded_gcc_style = 1

 augroup gcc_style
   autocmd BufReadPost,FileReadPost * call s:maybe_gcc_style()
 augroup END
 if exists(*s:maybe_gcc_style)
   finish
 endif
 let s:cpo_save = cpo
 set cpovim

 function! s:maybe_gcc_style()
   let s:i = 1 + 0
   while s:i = line($)  s:i = 25
 let s:line = getline(s:i)
 if s:line =~ '^\s*This\sfile\sis\spart\sof\sGCC.*'
gcc-mode
   set cino=:s,{s,n-s,2s,^-s
   set sw=2
   set sts=2
   set cindent
   set smartindent
   set autoindent
   break

Re: [PATCH] Fix postincrement/decrement of a bitfield (PR middle-end/55750)

2012-12-20 Thread rguenther
Jakub Jelinek ja...@redhat.com wrote:

Hi!

As the following testcase shows, the !is_gimple_min_lval code would for
bit
fields want to take address of those bitfields and dereference it,
which of
course leads to ICEs.

As discussed with Richard on IRC, this code is not needed at all since
PR48814 fix, so there is no need to teach it about bitfields and
instead it
can be just removed altogether.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

2012-12-20  Jakub Jelinek  ja...@redhat.com

   PR middle-end/55750
   * gimplify.c (gimplify_self_mod_expr): Don't force lvalue to
   pass is_gimple_min_lval.

   * gcc.c-torture/execute/pr55750.c: New test.

--- gcc/gimplify.c.jj  2012-12-20 11:38:45.0 +0100
+++ gcc/gimplify.c 2012-12-20 14:45:42.586627882 +0100
@@ -2391,25 +2391,15 @@ gimplify_self_mod_expr (tree *expr_p, gi
   rhs = TREE_OPERAND (*expr_p, 1);
 
 /* For postfix operator, we evaluate the LHS to an rvalue and then use
- that as the result value and in the postqueue operation.  We also
- make sure to make lvalue a minimal lval, see
- gcc.c-torture/execute/20040313-1.c for an example where this
matters.  */
+ that as the result value and in the postqueue operation.  */
   if (postfix)
 {
-  if (!is_gimple_min_lval (lvalue))
-  {
-mark_addressable (lvalue);
-lvalue = build_fold_addr_expr_loc (input_location, lvalue);
-gimplify_expr (lvalue, pre_p, post_p, is_gimple_val, fb_rvalue);
-lvalue = build_fold_indirect_ref_loc (input_location, lvalue);
-  }
   ret = gimplify_expr (lhs, pre_p, post_p, is_gimple_val, fb_rvalue);
   if (ret == GS_ERROR)
   return ret;
-}
 
-  if (postfix)
-lhs = get_initialized_tmp_var (lhs, pre_p, NULL);
+  lhs = get_initialized_tmp_var (lhs, pre_p, NULL);
+}
 
   /* For POINTERs increment, use POINTER_PLUS_EXPR.  */
   if (POINTER_TYPE_P (TREE_TYPE (lhs)))
--- gcc/testsuite/gcc.c-torture/execute/pr55750.c.jj   2012-12-20
14:24:03.487344949 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr55750.c  2012-12-20
14:25:10.0 +0100
@@ -0,0 +1,29 @@
+/* PR middle-end/55750 */
+
+extern void abort (void);
+
+struct S
+{
+  int m : 1;
+  int n : 7;
+} arr[2];
+
+__attribute__((noinline, noclone)) void
+foo (unsigned i)
+{
+  arr[i].n++;
+}
+
+int
+main ()
+{
+  arr[0].m = -1;
+  arr[0].n = (1  6) - 1;
+  arr[1].m = 0;
+  arr[1].n = -1;
+  foo (0);
+  foo (1);
+  if (arr[0].m != -1 || arr[0].n != -(1  6) || arr[1].m != 0 ||
arr[1].n != 0)
+abort ();
+  return 0;
+}

   Jakub


-- 
Sent from my Android phone with K-9 Mail. Please excuse my brevity.


[PATCH, ARM] Initial pipeline description for Cortex-A7

2012-12-20 Thread Greta Yorsh
Currently, GCC uses generic ARMv7-A tuning for Cortex-A7.
This patch adds an initial pipeline description for Cortex-A7. Details:
* integer/vfp is based on the pipeline description for Cortex-A5,
* models dual issue in limited circumstances using simple_alu_imm and
simple_alu_shift type attribute (introduced by a previous patch),
* basic neon timings.

No regression on qemu for arm-none-eabi target with cpu cortex-a7.

Bootstrap successful on Cortex-A15 (gcc configured with cpu cortex-a7).

Performance evaluation on Cortex-A7 hardware:

Coremark: 
* No change compared to generic tuning even though the generated assembly is
significantly different due to instruction scheduling. 
* Improvement compared to tuning for Cortex-A5: 4% improvement in arm mode
and 9% improvement in thumb mode.
CINT2000:
* compared to generic tuning, overall improvement of 1.9%.
* compared to tuning for Cortex-A5, overall improvement of 1.5%.
* in both cases, all benchmarks improved except 254.gap.
CFP2000:
* compared to generic tuning (which doesn't do much for FP), overall
improvement of 5.5%, all benchmarks improved.
* compared to Cortex-A5 tuning (as pipeline descriptions are nearly
identical) overall no change, but individual benchmarks mixed results.

Ok for trunk?

Thanks,
Greta

gcc/ChangeLog

2012-12-20  Greta Yorsh  greta.yo...@arm.com

* config/arm/cortex-a7.md: New file.
* config/arm/arm.md: Include cortex-a7.md.
(generic_sched): Don't use generic scheduler for Cortex-A7.
(generic_vfp): Likewise.
* config/arm/t-arm (arm_cpu_table): Likewise.
* config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder.
(arm_sched_reorder): New function.
(cortexa7_older_only,cortexa7_younger): Likewise.diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 84ce56f..ab6c88b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -132,6 +132,7 @@ static void arm_output_function_prologue (FILE *, 
HOST_WIDE_INT);
 static int arm_comp_type_attributes (const_tree, const_tree);
 static void arm_set_default_type_attributes (tree);
 static int arm_adjust_cost (rtx, rtx, rtx, int);
+static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
 static int optimal_immediate_sequence (enum rtx_code code,
   unsigned HOST_WIDE_INT val,
   struct four_ints *return_sequence);
@@ -366,6 +367,9 @@ static const struct attribute_spec arm_attribute_table[] =
 #undef  TARGET_SCHED_ADJUST_COST
 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER arm_sched_reorder
+
 #undef TARGET_REGISTER_MOVE_COST
 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 
@@ -8680,6 +8684,164 @@ arm_memory_move_cost (enum machine_mode mode, 
reg_class_t rclass,
 }
 }
 
+
+/* Return true if and only if this insn can dual-issue only as older.  */
+static bool
+cortexa7_older_only (rtx insn)
+{
+  if (recog_memoized (insn)  0)
+return false;
+
+  if (get_attr_insn (insn) == INSN_MOV)
+return false;
+
+  switch (get_attr_type (insn))
+{
+case TYPE_ALU_REG:
+case TYPE_LOAD_BYTE:
+case TYPE_LOAD1:
+case TYPE_STORE1:
+case TYPE_FFARITHS:
+case TYPE_FADDS:
+case TYPE_FFARITHD:
+case TYPE_FADDD:
+case TYPE_FCPYS:
+case TYPE_F_CVT:
+case TYPE_FCMPS:
+case TYPE_FCMPD:
+case TYPE_FCONSTS:
+case TYPE_FCONSTD:
+case TYPE_FMULS:
+case TYPE_FMACS:
+case TYPE_FMULD:
+case TYPE_FMACD:
+case TYPE_FDIVS:
+case TYPE_FDIVD:
+case TYPE_F_2_R:
+case TYPE_F_FLAG:
+case TYPE_F_LOADS:
+case TYPE_F_STORES:
+  return true;
+default:
+  return false;
+}
+}
+
+/* Return true if and only if this insn can dual-issue as younger.  */
+static bool
+cortexa7_younger (FILE *file, int verbose, rtx insn)
+{
+  if (recog_memoized (insn)  0)
+{
+  if (verbose  5)
+fprintf (file, ;; not cortexa7_younger %d\n, INSN_UID (insn));
+  return false;
+}
+
+  if (get_attr_insn (insn) == INSN_MOV)
+return true;
+
+  switch (get_attr_type (insn))
+{
+case TYPE_SIMPLE_ALU_IMM:
+case TYPE_SIMPLE_ALU_SHIFT:
+case TYPE_BRANCH:
+  return true;
+default:
+  return false;
+}
+}
+
+
+/* Look for an instruction that can dual issue only as an older
+   instruction, and move it in front of any instructions that can
+   dual-issue as younger, while preserving the relative order of all
+   other instructions in the ready list.  This is a hueuristic to help
+   dual-issue in later cycles, by postponing issue of more flexible
+   instructions.  This heuristic may affect dual issue opportunities
+   in the current cycle.  */
+static void
+cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
+int clock)
+{
+  int i;
+  int first_older_only = -1, first_younger = -1;
+
+  if (verbose  5)
+fprintf 

[patch] fix install dependencies for target libraries

2012-12-20 Thread Matthias Klose
This was seen with the libgo installation [1], but from my point of view can
happen when the install target is called with -j 1, libtool seems to fall back
to the system libraries if the library in the install location is not available
(which is always the case if you install into an empty dir set with DESTDIR).
Currently it just works for a non-parallel install because the dependencies in
Makefile.def are created in the right order.

Ok for the trunk?

  Matthias

[1] http://gcc.gnu.org/ml/gcc-patches/2012-12/msg01192.html


2012-12-20  Matthias Klose  d...@ubuntu.com

	* Makefile.def (install-target-libgfortran): Depend on
	install-target-libquadmath, install-target-libgcc.
	(install-target-libsanitizer): Depend on install-target-libgcc.
	(install-target-libjava): Depend on install-target-libgcc.
	(install-target-libitm): Depend on install-target-libgcc.
	(install-target-libobjc): Depend on install-target-libgcc.
	(install-target-libstdc++-v3): Depend on install-target-libgcc.
	* Makefile.in: Regenerate.

Index: Makefile.def
===
--- Makefile.def	(Revision 194635)
+++ Makefile.def	(Arbeitskopie)
@@ -515,6 +515,13 @@
 dependencies = { module=all-target-libstdc++-v3; on=configure-target-libgomp; };
 
 dependencies = { module=install-target-libgo; on=install-target-libatomic; };
+dependencies = { module=install-target-libgfortran; on=install-target-libquadmath; };
+dependencies = { module=install-target-libgfortran; on=install-target-libgcc; };
+dependencies = { module=install-target-libsanitizer; on=install-target-libgcc; };
+dependencies = { module=install-target-libjava; on=install-target-libgcc; };
+dependencies = { module=install-target-libitm; on=install-target-libgcc; };
+dependencies = { module=install-target-libobjc; on=install-target-libgcc; };
+dependencies = { module=install-target-libstdc++-v3; on=install-target-libgcc; };
 
 // Target modules in the 'src' repository.
 lang_env_dependencies = { module=libtermcap; };


Re: [patch] fix install dependencies for target libraries

2012-12-20 Thread Ian Lance Taylor
On Thu, Dec 20, 2012 at 10:22 AM, Matthias Klose d...@ubuntu.com wrote:
 This was seen with the libgo installation [1], but from my point of view can
 happen when the install target is called with -j 1, libtool seems to fall 
 back
 to the system libraries if the library in the install location is not 
 available
 (which is always the case if you install into an empty dir set with DESTDIR).
 Currently it just works for a non-parallel install because the dependencies in
 Makefile.def are created in the right order.

 Ok for the trunk?

This is OK with a ChangeLog entry.

Thanks.

Ian


Re: [patch] fix install dependencies for target libraries

2012-12-20 Thread Matthias Klose
Am 20.12.2012 20:11, schrieb Ian Lance Taylor:
 On Thu, Dec 20, 2012 at 10:22 AM, Matthias Klose d...@ubuntu.com wrote:
 This was seen with the libgo installation [1], but from my point of view can
 happen when the install target is called with -j 1, libtool seems to fall 
 back
 to the system libraries if the library in the install location is not 
 available
 (which is always the case if you install into an empty dir set with DESTDIR).
 Currently it just works for a non-parallel install because the dependencies 
 in
 Makefile.def are created in the right order.

 Ok for the trunk?
 
 This is OK with a ChangeLog entry.

committed, with the ChangeLog entry from the original mail.

  Matthias



Re: [google 4.7] fdo build for linux kernel (issue 6968046)

2012-12-20 Thread Rong Xu
On Wed, Dec 19, 2012 at 5:22 PM, Rong Xu x...@google.com wrote:
 On Wed, Dec 19, 2012 at 5:04 PM,  davi...@google.com wrote:
 The change in gcov-io.h is from a different patch.

 sorry. here is the patch for gcov-io.h:

 Index: gcov-io.h
 ===
 --- gcov-io.h   (revision 194562)
 +++ gcov-io.h   (working copy)
 @@ -781,8 +781,8 @@
   unused) */

unsigned n_functions;/* number of functions */
 -  const struct gcov_fn_info *const *functions; /* pointer to pointers
 - to function information  */
 +  const struct gcov_fn_info **functions; /* pointer to pointers
 +   to function information  */
  };

  /* Information about a single imported module.  */
 @@ -988,8 +988,7 @@
  GCOV_LINKAGE void gcov_seek (gcov_position_t /*position*/) ATTRIBUTE_HIDDEN;
  GCOV_LINKAGE void gcov_truncate (void) ATTRIBUTE_HIDDEN;
  GCOV_LINKAGE gcov_unsigned_t gcov_string_length (const char *)
 ATTRIBUTE_HIDDEN;
 -GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *,
 -   struct gcov_summary *);
 +GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *);
  #else
  /* Available outside libgcov */
  GCOV_LINKAGE void gcov_sync (gcov_position_t /*base*/,


 David


 https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c
 File gcc/gcov-io.c (right):

 https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c#newcode688
 gcc/gcov-io.c:688:
 Have you compared this with this impl:

 while (x)
 {
c++;
x=(x-1)
 }
 return c;


 I did not try this pimplier version. I can do a test on the dump speed
 and report back.

This simpler version is about 2% slow in dumping the profiles (average
of 10 dumps). But this is not a big deal.
I'll use this this version.

-Rong


 https://codereview.appspot.com/6968046/


Re: [fortran, patch] Allow displaying backtraces from user code

2012-12-20 Thread Janus Weil
 Attached is a new patch, which expands the documentation according to
 your proposal, and uses the name BACKTRACE. I hope that both Janne and
 Tobias can agree with this naming decision ...

 Looks fine from my side.

 Great, thanks. Janne?

 Yes, Ok for trunk.

Thanks again to both of you. Committed as r194648.

Cheers,
Janus


 Can you also add a quip to
 http://gcc.gnu.org/wiki/GFortran#GCC4.8 ?

 Sure, as soon as the patch is committed ...

 Cheers,
 Janus



 --
 Janne Blomqvist


Re: [google 4.7] atomic update of profile counters (issue6965050)

2012-12-20 Thread Rong Xu
we have this patch primarily for getting valid profile counts. we
observe that for some high-threaded programs, we are getting poor
counter due to data racing of counter update (like counter value is
only 15% of what it supposed to be for a 10-thread program).

In general, enabling atomic updates slows down programs. (for my some
of my toy programs, it has 3x slow down.) And that the reason I use
options to control value and edge profile count.

-Rong

On Thu, Dec 20, 2012 at 8:57 AM, Andrew Pinski pins...@gmail.com wrote:
 On Thu, Dec 20, 2012 at 8:20 AM, Jan Hubicka hubi...@ucw.cz wrote:
 On Wed, Dec 19, 2012 at 4:29 PM, Andrew Pinski pins...@gmail.com wrote:
 
  On Wed, Dec 19, 2012 at 12:08 PM, Rong Xu x...@google.com wrote:
   Hi,
  
   This patch adds the supprot of atomic update the profile counters.
   Tested with google internal benchmarks and fdo kernel build.
 
  I think you should use the __atomic_ functions instead of __sync_
  functions as they allow better performance for simple counters as you
  can use __ATOMIC_RELAXED.

 You are right. I think __ATOMIC_RELAXED should be OK here.
 Thanks for the suggestion.

 
  And this would be useful for the trunk also.  I was going to implement
  this exact thing this week but some other important stuff came up.

 I'll post trunk patch later.

 Yes, I like that patch, too. Even if the costs are quite high (and this is 
 why
 atomic updates was sort of voted down in the past) the alternative of using 
 TLS
 has problems with too-much per-thread memory.

 Actually sometimes (on some processors) atomic increments are cheaper
 than doing a regular incremental.  Mainly because there is an
 instruction which can handle it in the L2 cache rather than populating
 the L1.   Octeon is one such processor where this is true.

 Thanks,
 Andrew Pinski


 While there are even more alternatives, like recording the changes and
 commmiting them in blocks (say at function return), I guess some solution is
 better than no solution.

 Thanks,
 Honza


Re: [google 4.7] atomic update of profile counters (issue6965050)

2012-12-20 Thread Andrew Pinski
On Thu, Dec 20, 2012 at 11:35 AM, Rong Xu x...@google.com wrote:
 we have this patch primarily for getting valid profile counts. we
 observe that for some high-threaded programs, we are getting poor
 counter due to data racing of counter update (like counter value is
 only 15% of what it supposed to be for a 10-thread program).

I have seen much worse on Octeon running with 32-threaded program.  I
think it was only 1% of what it should have been.



 In general, enabling atomic updates slows down programs. (for my some
 of my toy programs, it has 3x slow down.) And that the reason I use
 options to control value and edge profile count.

I think on Octeon, the atomic updates would be a speedup because of
the atomic instruction which was added explicitly for incrementing a
statistics counter.  Internally at Cavium, I might just turn this on
by default as it even helps the one thread case :).

Thanks,
Andrew Pinski


 -Rong

 On Thu, Dec 20, 2012 at 8:57 AM, Andrew Pinski pins...@gmail.com wrote:
 On Thu, Dec 20, 2012 at 8:20 AM, Jan Hubicka hubi...@ucw.cz wrote:
 On Wed, Dec 19, 2012 at 4:29 PM, Andrew Pinski pins...@gmail.com wrote:
 
  On Wed, Dec 19, 2012 at 12:08 PM, Rong Xu x...@google.com wrote:
   Hi,
  
   This patch adds the supprot of atomic update the profile counters.
   Tested with google internal benchmarks and fdo kernel build.
 
  I think you should use the __atomic_ functions instead of __sync_
  functions as they allow better performance for simple counters as you
  can use __ATOMIC_RELAXED.

 You are right. I think __ATOMIC_RELAXED should be OK here.
 Thanks for the suggestion.

 
  And this would be useful for the trunk also.  I was going to implement
  this exact thing this week but some other important stuff came up.

 I'll post trunk patch later.

 Yes, I like that patch, too. Even if the costs are quite high (and this is 
 why
 atomic updates was sort of voted down in the past) the alternative of using 
 TLS
 has problems with too-much per-thread memory.

 Actually sometimes (on some processors) atomic increments are cheaper
 than doing a regular incremental.  Mainly because there is an
 instruction which can handle it in the L2 cache rather than populating
 the L1.   Octeon is one such processor where this is true.

 Thanks,
 Andrew Pinski


 While there are even more alternatives, like recording the changes and
 commmiting them in blocks (say at function return), I guess some solution is
 better than no solution.

 Thanks,
 Honza


Re: [google 4.7] fdo build for linux kernel (issue 6968046)

2012-12-20 Thread Xinliang David Li
It  depends on the value distribution .

David

On Thu, Dec 20, 2012 at 11:30 AM, Rong Xu x...@google.com wrote:
 On Wed, Dec 19, 2012 at 5:22 PM, Rong Xu x...@google.com wrote:
 On Wed, Dec 19, 2012 at 5:04 PM,  davi...@google.com wrote:
 The change in gcov-io.h is from a different patch.

 sorry. here is the patch for gcov-io.h:

 Index: gcov-io.h
 ===
 --- gcov-io.h   (revision 194562)
 +++ gcov-io.h   (working copy)
 @@ -781,8 +781,8 @@
   unused) */

unsigned n_functions;/* number of functions */
 -  const struct gcov_fn_info *const *functions; /* pointer to pointers
 - to function information  */
 +  const struct gcov_fn_info **functions; /* pointer to pointers
 +   to function information  */
  };

  /* Information about a single imported module.  */
 @@ -988,8 +988,7 @@
  GCOV_LINKAGE void gcov_seek (gcov_position_t /*position*/) ATTRIBUTE_HIDDEN;
  GCOV_LINKAGE void gcov_truncate (void) ATTRIBUTE_HIDDEN;
  GCOV_LINKAGE gcov_unsigned_t gcov_string_length (const char *)
 ATTRIBUTE_HIDDEN;
 -GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *,
 -   struct gcov_summary *);
 +GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *);
  #else
  /* Available outside libgcov */
  GCOV_LINKAGE void gcov_sync (gcov_position_t /*base*/,


 David


 https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c
 File gcc/gcov-io.c (right):

 https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c#newcode688
 gcc/gcov-io.c:688:
 Have you compared this with this impl:

 while (x)
 {
c++;
x=(x-1)
 }
 return c;


 I did not try this pimplier version. I can do a test on the dump speed
 and report back.

 This simpler version is about 2% slow in dumping the profiles (average
 of 10 dumps). But this is not a big deal.
 I'll use this this version.

 -Rong


 https://codereview.appspot.com/6968046/


[patch] std::unique_ptrT[], D improvements

2012-12-20 Thread Jonathan Wakely
This patch started when I noticed that it's not possibly to construct
a shared_ptrT from unique_ptrT[], D, then I discovered we don't
use D::pointer if it exists, and there were a number of other
non-conformance issues with our std::unique_ptrT[], D.  I ended up
fixing them by implementing Geoffrey's proposed resolution for LWG
issue 2118, which isn't official yet but is better than what we had
before so is a step in the right direction, even if it ends up needing
further revision when 2118 is resolved.

* include/std/functional (_Require): Move to ...
* include/std/type_traits (_Require): ... here.
* include/bits/shared_ptr_base.h (__shared_count::_S_create_from_up):
Handle unique_ptr for arrays or with custom pointer types.
(__shared_ptr::__shared_ptr(unique_ptr_Tp1, _Del): Likewise.
* include/bits/unique_ptr.h (unique_ptr_Tp[], _Dp): Use
_Dp::pointer if defined. Implement proposed resolution of LWG 2118.
* testsuite/20_util/shared_ptr/cons/unique_ptr_array.cc: New.
* testsuite/20_util/unique_ptr/assign/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/cons/array_convertible_neg.cc: New.
* testsuite/20_util/unique_ptr/cons/convertible_neg.cc: New.
* testsuite/20_util/unique_ptr/cons/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/modifiers/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/requirements/pointer_type_array.cc: New.
* testsuite/20_util/shared_ptr/cons/unique_ptr.cc: Adjust comments.
* testsuite/20_util/unique_ptr/cons/pointer_array_convertible_neg.cc:
Likewise.
* testsuite/20_util/unique_ptr/requirements/pointer_type.cc: Likewise.
* testsuite/20_util/bind/ref_neg.cc: Adjust dg-error line number.
* testsuite/20_util/declval/requirements/1_neg.cc: Likewise.
* testsuite/20_util/default_delete/48631_neg.cc: Likewise.
* testsuite/20_util/shared_ptr/cons/43820_neg.cc: Likewise.
* testsuite/20_util/unique_ptr/assign/48635_neg.cc: Likewise.
* testsuite/20_util/unique_ptr/modifiers/reset_neg.cc: Adjust
dg-error text.
* testsuite/20_util/unique_ptr/cons/ptr_deleter_neg.cc: Use
different instantiations so static_assert fails for each.

Thanks to Geoffrey and Lawrence for input and test cases.

Tested x86_64-linux, committed to trunk.
commit 907290c8077e6757c56fc64c9160c4bdaea86b90
Author: Jonathan Wakely jwakely@gmail.com
Date:   Thu Dec 20 17:57:33 2012 +

* include/std/functional (_Require): Move to ...
* include/std/type_traits (_Require): ... here.
* include/bits/shared_ptr_base.h (__shared_count::_S_create_from_up):
Handle unique_ptr for arrays or with custom pointer types.
(__shared_ptr::__shared_ptr(unique_ptr_Tp1, _Del): Likewise.
* include/bits/unique_ptr.h (unique_ptr_Tp[], _Dp): Use
_Dp::pointer if defined. Implement proposed resolution of LWG 2118.
* testsuite/20_util/shared_ptr/cons/unique_ptr_array.cc: New.
* testsuite/20_util/unique_ptr/assign/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/cons/array_convertible_neg.cc: New.
* testsuite/20_util/unique_ptr/cons/convertible_neg.cc: New.
* testsuite/20_util/unique_ptr/cons/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/modifiers/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/requirements/pointer_type_array.cc: New.
* testsuite/20_util/shared_ptr/cons/unique_ptr.cc: Adjust comments.
* testsuite/20_util/unique_ptr/cons/pointer_array_convertible_neg.cc:
Likewise.
* testsuite/20_util/unique_ptr/requirements/pointer_type.cc: Likewise.
* testsuite/20_util/bind/ref_neg.cc: Adjust dg-error line number.
* testsuite/20_util/declval/requirements/1_neg.cc: Likewise.
* testsuite/20_util/default_delete/48631_neg.cc: Likewise.
* testsuite/20_util/shared_ptr/cons/43820_neg.cc: Likewise.
* testsuite/20_util/unique_ptr/assign/48635_neg.cc: Likewise.
* testsuite/20_util/unique_ptr/modifiers/reset_neg.cc: Adjust
dg-error text.
* testsuite/20_util/unique_ptr/cons/ptr_deleter_neg.cc: Use
different instantiations so static_assert fails for each.

diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h 
b/libstdc++-v3/include/bits/shared_ptr_base.h
index ead3728..9d9fecb 100644
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -616,7 +616,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_create_from_up(std::unique_ptr_Tp, _Del __r,
  typename std::enable_if!std::is_reference_Del::value::type* = 0)
{
- return new _Sp_counted_deleter_Tp*, _Del, std::allocatorvoid,
+ typedef typename unique_ptr_Tp, _Del::pointer _Ptr;
+ return new _Sp_counted_deleter_Ptr, _Del, std::allocatorvoid,
_Lp(__r.get(), __r.get_deleter());
}
 
@@ 

Re: [google 4.7] fdo build for linux kernel (issue 6968046)

2012-12-20 Thread Rong Xu
that's right. but there is no way to predict the pattern.
what I meant was as far as it does not introduce major slow-down in
dumping profile, I'd like to use the simpler version.
what do you think?

-Rong

On Thu, Dec 20, 2012 at 11:54 AM, Xinliang David Li davi...@google.com wrote:
 It  depends on the value distribution .

 David

 On Thu, Dec 20, 2012 at 11:30 AM, Rong Xu x...@google.com wrote:
 On Wed, Dec 19, 2012 at 5:22 PM, Rong Xu x...@google.com wrote:
 On Wed, Dec 19, 2012 at 5:04 PM,  davi...@google.com wrote:
 The change in gcov-io.h is from a different patch.

 sorry. here is the patch for gcov-io.h:

 Index: gcov-io.h
 ===
 --- gcov-io.h   (revision 194562)
 +++ gcov-io.h   (working copy)
 @@ -781,8 +781,8 @@
   unused) */

unsigned n_functions;/* number of functions */
 -  const struct gcov_fn_info *const *functions; /* pointer to pointers
 - to function information  
 */
 +  const struct gcov_fn_info **functions; /* pointer to pointers
 +   to function information  */
  };

  /* Information about a single imported module.  */
 @@ -988,8 +988,7 @@
  GCOV_LINKAGE void gcov_seek (gcov_position_t /*position*/) 
 ATTRIBUTE_HIDDEN;
  GCOV_LINKAGE void gcov_truncate (void) ATTRIBUTE_HIDDEN;
  GCOV_LINKAGE gcov_unsigned_t gcov_string_length (const char *)
 ATTRIBUTE_HIDDEN;
 -GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *,
 -   struct gcov_summary *);
 +GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *);
  #else
  /* Available outside libgcov */
  GCOV_LINKAGE void gcov_sync (gcov_position_t /*base*/,


 David


 https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c
 File gcc/gcov-io.c (right):

 https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c#newcode688
 gcc/gcov-io.c:688:
 Have you compared this with this impl:

 while (x)
 {
c++;
x=(x-1)
 }
 return c;


 I did not try this pimplier version. I can do a test on the dump speed
 and report back.

 This simpler version is about 2% slow in dumping the profiles (average
 of 10 dumps). But this is not a big deal.
 I'll use this this version.

 -Rong


 https://codereview.appspot.com/6968046/


Re: [patch] fix install dependencies for target libraries

2012-12-20 Thread Tobias Burnus

Am 20.12.2012 19:22, schrieb Matthias Klose:

This was seen with the libgo installation [1], but from my point of view can
happen when the install target is called with -j 1, libtool seems to fall back
to the system libraries if the library in the install location is not available
(which is always the case if you install into an empty dir set with DESTDIR).
Currently it just works for a non-parallel install because the dependencies in
Makefile.def are created in the right order.

Ok for the trunk?


For the Fortran change: Can you test with --disable-libquadmath 
--disable-libquadmath-support? Will that work by silently ignoring the 
libquadmath dependence or will it break?



+dependencies = { module=install-target-libgfortran; 
on=install-target-libquadmath; };


Tobias


Re: [Patch, fortran] PR55763 - Issues with some simpler CLASS(*) programs

2012-12-20 Thread Tobias Burnus

Paul Richard Thomas wrote:

Thanks to Tobias for coming up so quickly with class(*) bugs!


That was simple: I could mine Reinhold Bader's collection. Only the 
ICE-on-invalid part of the test case is mine. Please credit him in/for 
the test case.



Bootstrapped and regtested on FC17/x86_64 - OK for trunk?


OK. Thanks for the quick patch. Hopefully, fixing the remaining issues 
of that PR will be as quick.


(Can you update the TODO list in the other CLASS(*) PR - or is it complete?)

Tobias


Re: [wwwdocs,Java] Obsolete GCJ FAQ entry for Solaris?

2012-12-20 Thread Gerald Pfeifer
PING.

On Fri, 2 Nov 2012, Gerald Pfeifer wrote:
 Rainer (or others),
 
 the FAQ entry below seems obsolete to me (dates back more than a
 decade).  Shall we remove it, or is there something else we still
 should document (in addition to gcc/doc/install.texi)?
 
 Gerald
 
 Index: faq.html
 ===
 RCS file: /cvs/gcc/wwwdocs/htdocs/java/faq.html,v
 retrieving revision 1.69
 diff -u -3 -p -r1.69 faq.html
 --- faq.html  2 Nov 2012 19:59:34 -   1.69
 +++ faq.html  2 Nov 2012 20:29:12 -
 @@ -36,7 +36,6 @@
ol
  lia href=#3_1I need something more recent than the last 
 release; how
should I build it?/a/li
 -lia href=#3_2Linker bug on Solaris/a/li
/ol
  /li
  lia href=#4_0Gcj Compile/Link Questions/a 
 @@ -278,33 +277,6 @@ $ gij HelloWorld
  /dd
/dl
  
 -  hr /
 -  h3a name=3_23.2 Linker bug on Solaris/a/h3
 -  dl 
 -dd
 -  There is a known problem with the a 
 href=http://gcc.gnu.org/ml/gcc-bugs/1999-10/msg00159.html; 
 -  native Solaris linker/a when using gcc/gcj. A good indication 
 you've 
 -  run into this problem is if you get an error that looks like the 
 following 
 -  when building libgcj: 
 -  pre
 -ld: warning: option -o appears more than once, first setting taken
 -ld: fatal: file libfoo.so: cannot open file: No such file or directory
 -ld: fatal: File processing errors. No output written to .libs/libfoo.so
 -collect2: ld returned 1 exit status
 -  /pre
 -  A known workaround for this and other reported link problems on 
 the 
 -  various releases of Solaris is to build gcc/gcj with the a 
 href=ftp://sources.redhat.com/pub/binutils/snapshots; 
 -  latest GNU binutils/a instead of the native Solaris ttld/tt. 
 The 
 -  most straightforward way to do this is to build and install 
 binutils, 
 -  and then reference it in the configure for gcc via 
 tt--with-ld=/path_to_binutils_install/bin/ld/tt 
 -  (tt--with-as/tt may also be similarly specified but is not 
 believed 
 -  to be required).
 -  br /
 -  Please note, gcc/gcj must be built using GNU ld prior to doing a 
 -  clean build of libgcj! 
 -/dd
 -  /dl
 -
h2a name=4_0Gcj Compile/Link Questions/a/h2
   
h3a name=4_14.1 Why do I get ttundefined reference to 
 `main'/tt 


fix libquadmath build regression

2012-12-20 Thread Alexandre Oliva
Revision 193063 brought in calls to fraiseexcept() into libquadmath,
which caused a build regression on Fedora 16 (BLAG 160k actually) x86_64
while building an i686-linux-gnu native toolchain.

The problem is that glibc has an extern inline definition of
fraiseexcept that is introduced by including fenv.h (it's in
bits/fenv.h), and this definition requires SSE support regardless of
target arch of word width, so it doesn't work for an i686 native that
doesn't assume SSE registers and instructions are available.

This bug is fixed in newer versions of glibc, but I figured it wouldn't
hurt to have a work-around in place for libquadmath to build, detecting
that the extern inline in the header is broken and introducing a wrapper
that bypasses the header so as to use the out-of-line definition in the
math library.

Is this ok to install?

Deal with SSE-requiring extern inline in bits/fenv.h

From: Alexandre Oliva aol...@redhat.com

for  libquadmath/ChangeLog

	* configure.ac: Check that calling feraiseexcept compiles when
	fenv.h is included.  Define QUADMATH_FERAISEEXCEPT cpp macro
	and LIBQUAD_FERAISEEXCEPT conditional otherwise.
	* Makefile.am (libquadmath_la_SOURCES): Add
	math/feraiseexcept.c, conditional on LIBQUAD_FERAISEEXCEPT.
	* aclocal.m4: Rebuilt.
	* configure: Rebuilt.
	* config.h.in: Rebuilt.
	* Makefile.in: Rebuilt.
	* math/feraiseexcept.c: New file.
	* math/quadmath-imp.h (__quadmath_feraiseexcept): Declare.
	* math/ccoshq.c: Use QUADMATH_FERAISEEXCEPT macro to call, and
	to decide whether to call, feraiseexcept.
	* math/cexpq.c: Likewise.
	* math/csinhq.c: Likewise.
	* math/csinq.c: Likewise.
	* math/ctanhq.c: Likewise.
	* math/ctanq.c: Likewise.
	* math/ilogbq.c: Likewise.  Include fenv.h if HAVE_FENV_H.
---

 libquadmath/Makefile.am  |3 +
 libquadmath/Makefile.in  |  200 +++---
 libquadmath/aclocal.m4   |   74 +-
 libquadmath/config.h.in  |3 +
 libquadmath/configure|   58 +++
 libquadmath/configure.ac |   21 
 libquadmath/math/ccoshq.c|8 +-
 libquadmath/math/cexpq.c |   12 +-
 libquadmath/math/csinhq.c|   12 +-
 libquadmath/math/csinq.c |   12 +-
 libquadmath/math/ctanhq.c|4 -
 libquadmath/math/ctanq.c |4 -
 libquadmath/math/feraiseexcept.c |9 ++
 libquadmath/math/ilogbq.c|   16 ++-
 libquadmath/quadmath-imp.h   |5 +
 15 files changed, 318 insertions(+), 123 deletions(-)
 create mode 100644 libquadmath/math/feraiseexcept.c


diff --git a/libquadmath/Makefile.am b/libquadmath/Makefile.am
index 6c97ee8..9acf619 100644
--- a/libquadmath/Makefile.am
+++ b/libquadmath/Makefile.am
@@ -69,6 +69,9 @@ libquadmath_la_SOURCES = \
   printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
   strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
 
+if LIBQUAD_FERAISEEXCEPT
+libquadmath_la_SOURCES += math/feraiseexcept.c
+endif
 
 # Work around what appears to be a GNU make bug handling MAKEFLAGS
 # values defined in terms of make variables, as is the case for CC and
diff --git a/libquadmath/configure.ac b/libquadmath/configure.ac
index c547da8..765dfea 100644
--- a/libquadmath/configure.ac
+++ b/libquadmath/configure.ac
@@ -150,6 +150,27 @@ else
   fi
 fi
 
+if test x$ac_cv_header_fenv_h = xyes; then
+  dnl Some versions of libc 2.16 for x86_64 have an extern inline
+  dnl definition of feraiseexcept in bits/fenv.h that requires SSE
+  dnl support, and they fail to compile with -m32 when targeting
+  dnl pre-x86_64 32-bit architectures.
+
+  dnl This wrapper enables us to bypass the inline definition and call
+  dnl the out-of-line feraiseexcept definition, because it does not
+  dnl include fenv.h itself.
+
+  AC_CACHE_CHECK([whether feraiseexcept is broken in fenv.h], [quadmath_cv_feraiseexcept_fenv_broken], [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include fenv.h]], [feraiseexcept (FE_INVALID);])], [quadmath_cv_feraiseexcept_fenv_broken=no], [quadmath_cv_feraiseexcept_fenv_broken=yes])])
+  if test x$quadmath_cv_feraiseexcept_fenv_broken = xyes; then
+feraiseexcept=__quadmath_feraiseexcept
+  else
+feraiseexcept=feraiseexcept
+  fi
+  AC_DEFINE_UNQUOTED([QUADMATH_FERAISEEXCEPT], [$feraiseexcept],
+		 [Optional replacement for compile-time broken feraiseexcept.])
+fi
+AM_CONDITIONAL([LIBQUAD_FERAISEEXCEPT], [test x$ac_cv_header_fenv_h$quadmath_cv_feraiseexcept_fenv_broken = xyesyes])
+
 # Check for hidden visibility (copied from libssp).
 saved_CFLAGS=$CFLAGS
 CFLAGS=$CFLAGS -Werror
diff --git a/libquadmath/math/ccoshq.c b/libquadmath/math/ccoshq.c
index 8d55ad3..c1b24ac 100644
--- a/libquadmath/math/ccoshq.c
+++ b/libquadmath/math/ccoshq.c
@@ -89,9 +89,9 @@ ccoshq (__complex128 x)
 	  __imag__ retval = __real__ x == 0.0Q ? 0.0Q : nanq ();
 	  __real__ retval = nanq () + nanq ();
 
-#ifdef HAVE_FENV_H
+#ifdef QUADMATH_FERAISEEXCEPT
 	  if (icls == QUADFP_INFINITE)
-	

[PR libmudflap/53359] don't register symbols not emitted

2012-12-20 Thread Alexandre Oliva
libmudflap emits a global initializer that registers memory ranges for
global data symbols.  However, even if IPA decides not to emit a symbol
because it's unused, we'd still emit registration sequences for them in
some cases, which, in the PR testcase, would result in TOC references to
the undefined symbols.

This patch fixes the problem, avoiding registration for symbols that are
not present in the varpool.

Regstrapped on x86_64-linux-gnu and i686-linux-gnu; I've also verified
that it removes the TOC references on a ppc64-linux-gnu cross.

Ok to install?

don't let mudflap register global symbols that won't be emitted

From: Alexandre Oliva aol...@redhat.com

for  gcc/ChangeLog

	PR libmudflap/53359
	* tree-mudflap.c (mudflap_finish_file): Skip deferred decls
	not found in the symtab.
---

 gcc/tree-mudflap.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)


diff --git a/gcc/tree-mudflap.c b/gcc/tree-mudflap.c
index 90d0448..a9caaf2 100644
--- a/gcc/tree-mudflap.c
+++ b/gcc/tree-mudflap.c
@@ -1335,6 +1335,10 @@ mudflap_finish_file (void)
   if (! TREE_PUBLIC (obj)  ! TREE_ADDRESSABLE (obj))
 continue;
 
+	  /* If we're not emitting the symbol, don't register it.  */
+	  if (!symtab_get_node (obj))
+	continue;
+
   if (! COMPLETE_TYPE_P (TREE_TYPE (obj)))
 {
   warning (OPT_Wmudflap,


-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist  Red Hat Brazil Compiler Engineer


atomic update of profile counters (issue7000044)

2012-12-20 Thread Rong Xu
Hi,

This patch adds support of atomic update of profiles counters. The goal is to 
improve
the poor counter values for highly thread programs. 

The atomic update is under a new option -fprofile-gen-atomic=N
N=0: default, no atomic update
N=1: atomic update edge counters.
N=2: atomic update some of value profile counters (currently indirect-call and 
one value profile).
N=3: both edge counter and the above value profile counters.
Other value: fall back to the default.

This patch is a simple porting of the version in google-4_7 branch. It uses 
__atomic_fetch_add
based on Andrew Pinski's suggestion. Note I did not apply to all the value 
profiles as
the indirect-call profile is the most relevant one here.

Test with bootstrap.

Comments and suggestions are welcomed.

Thanks,

-Rong


2012-12-20  Rong Xu  x...@google.com

* libgcc/libgcov.c (__gcov_one_value_profiler_body_atomic): New
function. Atomic update profile counters.
(__gcov_one_value_profiler_atomic): Ditto.
(__gcov_indirect_call_profiler_atomic): Ditto.
* gcc/gcov-io.h: Macros for atomic update.
* gcc/common.opt: New option.
* gcc/tree-profile.c (gimple_init_edge_profiler): Atomic
update profile counters.
(gimple_gen_edge_profiler): Ditto.

Index: libgcc/libgcov.c
===
--- libgcc/libgcov.c(revision 194652)
+++ libgcc/libgcov.c(working copy)
@@ -1113,12 +1113,35 @@ __gcov_one_value_profiler_body (gcov_type *counter
   counters[2]++;
 }
 
+/* Atomic update version of __gcov_one_value_profile_body().  */
+static inline void 
+__gcov_one_value_profiler_body_atomic (gcov_type *counters, gcov_type value)
+{
+  if (value == counters[0])
+GCOV_TYPE_ATOMIC_FETCH_ADD_FN (counters[1], 1, MEMMODEL_RELAXED);
+  else if (counters[1] == 0)
+{
+  counters[1] = 1; 
+  counters[0] = value;
+}
+  else 
+GCOV_TYPE_ATOMIC_FETCH_ADD_FN (counters[1], -1, MEMMODEL_RELAXED);
+  GCOV_TYPE_ATOMIC_FETCH_ADD_FN (counters[2], 1, MEMMODEL_RELAXED);
+}
+
 #ifdef L_gcov_one_value_profiler
 void
 __gcov_one_value_profiler (gcov_type *counters, gcov_type value)
 {
   __gcov_one_value_profiler_body (counters, value);
 }
+
+void
+__gcov_one_value_profiler_atomic (gcov_type *counters, gcov_type value)
+{
+  __gcov_one_value_profiler_body_atomic (counters, value);
+}
+
 #endif
 
 #ifdef L_gcov_indirect_call_profiler
@@ -1153,6 +1176,17 @@ __gcov_indirect_call_profiler (gcov_type* counter,
   *(void **) cur_func == *(void **) callee_func))
 __gcov_one_value_profiler_body (counter, value);
 }
+
+/* Atomic update version of __gcov_indirect_call_profiler().  */
+void
+__gcov_indirect_call_profiler_atomic (gcov_type* counter, gcov_type value,
+  void* cur_func, void* callee_func)
+{
+  if (cur_func == callee_func
+  || (VTABLE_USES_DESCRIPTORS  callee_func
+   *(void **) cur_func == *(void **) callee_func))
+__gcov_one_value_profiler_body_atomic (counter, value);
+}
 #endif
 
 
Index: gcc/gcov-io.h
===
--- gcc/gcov-io.h   (revision 194652)
+++ gcc/gcov-io.h   (working copy)
@@ -202,7 +202,15 @@ typedef unsigned gcov_type_unsigned __attribute__
 #endif
 #endif
 
+#if LONG_LONG_TYPE_SIZE  32
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_8
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_8
+#else
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_4
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_4
+#endif
 
+
 #if defined (TARGET_POSIX_IO)
 #define GCOV_LOCKED 1
 #else
@@ -212,6 +220,18 @@ typedef unsigned gcov_type_unsigned __attribute__
 #else /* !IN_LIBGCOV */
 /* About the host */
 
+#if LONG_LONG_TYPE_SIZE  32
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_8
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_8
+#else
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_4
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_4
+#endif
+#define PROFILE_GEN_EDGE_ATOMIC (flag_profile_gen_atomic == 1 || \
+ flag_profile_gen_atomic == 3)
+#define PROFILE_GEN_VALUE_ATOMIC (flag_profile_gen_atomic == 2 || \
+  flag_profile_gen_atomic == 3)
+
 typedef unsigned gcov_unsigned_t;
 typedef unsigned gcov_position_t;
 /* gcov_type is typedef'd elsewhere for the compiler */
Index: gcc/common.opt
===
--- gcc/common.opt  (revision 194652)
+++ gcc/common.opt  (working copy)
@@ -1635,6 +1635,15 @@ fprofile-correction
 Common Report Var(flag_profile_correction)
 Enable correction of flow inconsistent profile data input
 
+; fprofile-gen-atomic=0: disable atomically update.
+; fprofile-gen-atomic=1: atomically update edge profile counters.
+; fprofile-gen-atomic=2: atomically update value 

Re: [Patch, PR 54128] ira.c change to fix mips bootstrap

2012-12-20 Thread Jakub Jelinek
On Fri, Aug 31, 2012 at 10:58:51AM -0700, Steve Ellcey  wrote:
 Here is my patch to fix the bootstrap comparision failure (PR 54128) on
 MIPS.  The reason for the comparision failure was a difference in
 register usage and I tracked it down to build_insn_chain which checked
 all instructions for register usage in order to set the dead_or_set
 and live_relevant_regs bitmaps instead of checking only non-debug
 instructions.  Changing INSN_P to NONDEBUG_INSN_P in build_insn_chain
 allowed me to bootstrap and caused no regressions.
 
 OK to checkin?

Given Alex' comments in the PR, the second hunk is definitely ok for trunk,
the first one can be applied too (but you can skip it too if you want, it
shouldn't make a difference).

 2012-08-31  Steve Ellcey  sell...@mips.com
 
   PR bootstrap/54128
   * ira.c (build_insn_chain): Check only NONDEBUG instructions for
   register usage.
 
 diff --git a/gcc/ira.c b/gcc/ira.c
 index 3825498..477c87b 100644
 --- a/gcc/ira.c
 +++ b/gcc/ira.c
 @@ -3341,7 +3341,7 @@ build_insn_chain (void)
 c-insn = insn;
 c-block = bb-index;
  
 -   if (INSN_P (insn))
 +   if (NONDEBUG_INSN_P (insn))
   for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
 {
   df_ref def = *def_rec;
 @@ -3432,7 +3432,7 @@ build_insn_chain (void)
 bitmap_and_compl_into (live_relevant_regs, elim_regset);
 bitmap_copy (c-live_throughout, live_relevant_regs);
  
 -   if (INSN_P (insn))
 +   if (NONDEBUG_INSN_P (insn))
   for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
 {
   df_ref use = *use_rec;

Jakub