[PATCH] lower-subreg and IBM long double

2013-08-19 Thread Alan Modra
On Tue, Jun 11, 2013 at 09:56:11AM +0930, Alan Modra wrote:
[snip] 
 It isn't hard to see why we are going wrong.  IBM long double is
 really a two element array of double, and the rs6000 backend uses
 subregs to access the elements.  The problem is that lower-subreg
 lowers to word_mode, so we get DImode.  word_mode makes sense for most
 targets where subregs of FP modes might be used to narrow an access
 for bit-twiddling operations on the sign bit.  It doesn't make sense
 for us.  We want DFmode for FP operations.  An example is the expander
 used by the testcase.

This is a repost of http://gcc.gnu.org/ml/gcc/2013-06/msg00053.html,
taking into account Joseph's doc review comments and adding a
testcase.  David has already acked the rs6000.c changes.  Bootstrapped
and regression tested powerpc64-linux.  OK to apply?

gcc/
* rs6000.c (TARGET_INIT_LOWER_SUBREG): Define.
(rs6000_init_lower_subreg): New function.
* lower-subreg.c (init_lower_subreg): Call targetm.init_lower_subreg.
* target.def (init_lower_subreg): New.
* doc/tm.texi.in (TARGET_INIT_LOWER_SUBREG): Document.
* doc/tm.texi: Regenerate.
gcc/testsuite/
* gcc.target/powerpc/fabsl.c: New test.

Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 201834)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -59,6 +59,7 @@
 #include opts.h
 #include tree-vectorizer.h
 #include dumpfile.h
+#include lower-subreg.h
 #if TARGET_XCOFF
 #include xcoffout.h  /* get declarations of xcoff_*_section_name */
 #endif
@@ -1364,6 +1365,8 @@ static const struct attribute_spec rs6000_attribut
 #define TARGET_RTX_COSTS rs6000_rtx_costs
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+#undef TARGET_INIT_LOWER_SUBREG
+#define TARGET_INIT_LOWER_SUBREG rs6000_init_lower_subreg
 
 #undef TARGET_DWARF_REGISTER_SPAN
 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
@@ -27971,6 +27974,20 @@ rs6000_memory_move_cost (enum machine_mode mode, r
   return ret;
 }
 
+static void
+rs6000_init_lower_subreg (void *data)
+{
+  if (!TARGET_IEEEQUAD
+   TARGET_HARD_FLOAT
+   (TARGET_FPRS || TARGET_E500_DOUBLE)
+   TARGET_LONG_DOUBLE_128)
+{
+  struct target_lower_subreg *info = (struct target_lower_subreg *) data;
+  info-x_choices[0].move_modes_to_split[TFmode] = false;
+  info-x_choices[1].move_modes_to_split[TFmode] = false;
+}
+}
+
 /* Returns a code for a target-specific builtin that implements
reciprocal of the function, or NULL_TREE if not available.  */
 
Index: gcc/lower-subreg.c
===
--- gcc/lower-subreg.c  (revision 201834)
+++ gcc/lower-subreg.c  (working copy)
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include tree-pass.h
 #include df.h
 #include lower-subreg.h
+#include target.h
 
 #ifdef STACK_GROWS_DOWNWARD
 # undef STACK_GROWS_DOWNWARD
@@ -287,6 +288,9 @@ init_lower_subreg (void)
   if (LOG_COSTS)
 fprintf (stderr, \nSpeed costs\n===\n\n);
   compute_costs (true, rtxes);
+
+  if (targetm.init_lower_subreg)
+targetm.init_lower_subreg (this_target_lower_subreg);
 }
 
 static bool
Index: gcc/target.def
===
--- gcc/target.def  (revision 201834)
+++ gcc/target.def  (working copy)
@@ -5110,6 +5110,14 @@ comparison code or operands.,
  void, (int *code, rtx *op0, rtx *op1, bool op0_preserve_value),
  default_canonicalize_comparison)
 
+/* Allow modification of subreg choices.  */
+DEFHOOK
+(init_lower_subreg,
+ This hook allows modification of the choices the lower_subreg pass \
+will make for particular subreg modes.  @var{data} is a pointer to a \
+@code{struct target_lower_subreg}.,
+ void, (void *data), NULL)
+
 DEFHOOKPOD
 (atomic_test_and_set_trueval,
  This value should be set if the result written by\
Index: gcc/doc/tm.texi.in
===
--- gcc/doc/tm.texi.in  (revision 201834)
+++ gcc/doc/tm.texi.in  (working copy)
@@ -4939,6 +4939,8 @@ Define this macro if a non-short-circuit operation
 
 @hook TARGET_ADDRESS_COST
 
+@hook TARGET_INIT_LOWER_SUBREG
+
 @node Scheduling
 @section Adjusting the Instruction Scheduler
 


-- 
Alan Modra
Australia Development Lab, IBM


Re: Type inheritance graph analysis speculative devirtualization, part 2/6 (type inheritance graph builder)

2013-08-19 Thread Richard Biener
Jan Hubicka hubi...@ucw.cz wrote:
Hi,
this patch implements the type inheritance graph builder. Once the
graph is
built it stays in memory and unchanged thorough the compilation (we do
not
expect to invent new virtual methods during the optimization)
The graph is dumped into new IPA dump file type-inheritance.

Construction does not work in LTO.  I will update and post the code to
merge
types based on ODR in an followup patch.

There is new function to lookup possible targets of given polymorphic
call and
function to dump those.  Callgraph construction is extended so
obviously
unreachable virtual functions are not lowered and are removed early. 
Result of
target lookups are stored in a global cache that is flushed each time
when any
virtual function that is target of a virtual call is removed.  This
probably
won't happen too many times in a practice - generally only after
remove_unreachable_function when given type of virtual calls becomes
dead.

I founded ipa-devirt file for the code.  It is supposed to be used by
IPA
passes, but also by the intraprocedural gimple OBJ_TYPE_REF folding.  I
think,
as a followup, we can move there other devirtualization related tools
and
analysis - get_binfo_at_offset, gimple_get_virt_method_for_binfo and
logic
checking dynamic type changes.

The construction and target lookup seems to work resonably well for
Firefox LTO
build.  The time spent by polymorphic call target lookup is about 2%. 
This is
still more than it needs to be.  Most of time is spent by parsing
vtables the
difficult way via gimple_get_virt_method_for_binfo.  This can be fixed
by
representing vtables directly in the type-inheritance graph, but also
by
implementing gimple_get_virt_method_for_binfo in less general way (it
goes
through folding ctor access, figuring out if method can be references
and doing
other expensive and unnecesary steps).

Bootstrapped/regtested ppc64-linux.  Will commit it tomorrow if there
are
no objections.  Any comments are welcome.

I believe you can get derived objects of derived type not visible in the tu, so 
the list of targets does only contain local methods and implicitly all methods 
only defined in other tus? That is, you cannot rely no that list to be complete?

Richard.

Honza

   * Makeifle-in (ipa-devirt.o): New.
   (GTFILES): Add ipa-utils.h and ipa-devirt.c
   * cgraphunit.c (decide_is_symbol_needed): Do not care about virtuals.
   (analyze_functions): Look into possible targets of polymorphic call.
   * dumpfile.c (dump_files): Add type-inheritance dump.
   * dumpfile.h (TDI_inheritance): New.
   * ipa-devirt.c: New file.
   * ipa-utils.h (odr_type_d): Forward declare.
   (odr_type): New type.
   (build_type_inheritance_graph): Declare.
   (possible_polymorphic_call_targets): Declare and introduce inline
   variant when only edge is pased.
   (dump_possible_polymorphic_call_targets): Likewise.
   * timevar.def (TV_IPA_INHERITANCE, TV_IPA_VIRTUAL_CALL): New.
   * tree.c (type_in_anonymous_namespace_p): Break out from ...
   (types_same_for_odr): ... here.
   * tree.h (type_in_anonymous_namespace_p): Declare.

   * g++.dg/ipa/type-inheritance-1.C: New testcase.

Index: Makefile.in
===
--- Makefile.in(revision 201824)
+++ Makefile.in(working copy)
@@ -1275,6 +1275,7 @@ OBJS = \
   init-regs.o \
   internal-fn.o \
   ipa-cp.o \
+  ipa-devirt.o \
   ipa-split.o \
   ipa-inline.o \
   ipa-inline-analysis.o \
@@ -2945,6 +2946,9 @@ ipa.o : ipa.c $(CONFIG_H) $(SYSTEM_H) co
$(TREE_PASS_H) $(GIMPLE_H) $(TARGET_H) $(GGC_H) pointer-set.h \
   $(IPA_UTILS_H) tree-inline.h $(HASH_TABLE_H) profile.h $(PARAMS_H) \
$(LTO_STREAMER_H) $(DATA_STREAMER_H)
+ipa-devirt.o : ipa-devirt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h
$(TM_H) $(CGRAPH_H) \
+   $(GIMPLE_H) $(TARGET_H) $(GGC_H) pointer-set.h \
+   $(IPA_UTILS_H) $(HASH_TABLE_H) 
 ipa-prop.o : ipa-prop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
langhooks.h $(GGC_H) $(TARGET_H) $(CGRAPH_H) $(IPA_PROP_H)
$(DIAGNOSTIC_H) \
$(TREE_FLOW_H) $(TM_H) $(TREE_PASS_H) $(FLAGS_H) $(TREE_H) \
@@ -3784,7 +3788,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/inp
$(srcdir)/cselib.h $(srcdir)/basic-block.h  $(srcdir)/ipa-ref.h
$(srcdir)/cgraph.h \
   $(srcdir)/reload.h $(srcdir)/caller-save.c $(srcdir)/symtab.c \
$(srcdir)/alias.c $(srcdir)/bitmap.c $(srcdir)/cselib.c
$(srcdir)/cgraph.c \
-  $(srcdir)/ipa-prop.c $(srcdir)/ipa-cp.c \
+  $(srcdir)/ipa-prop.c $(srcdir)/ipa-cp.c $(srcdir)/ipa-utils.h \
   $(srcdir)/dbxout.c \
   $(srcdir)/dwarf2out.h \
   $(srcdir)/dwarf2asm.c \
@@ -3826,7 +3830,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/inp
   $(srcdir)/ipa-inline.h \
   $(srcdir)/vtable-verify.c \
   $(srcdir)/asan.c \
-  $(srcdir)/tsan.c \
+  $(srcdir)/tsan.c $(srcdir)/ipa-devirt.c \
   @all_gtfiles@
 
# Compute the list of GT header files from the corresponding C sources,
Index: 

Re: [patch] [python libstdc++ printers] Fix gdb/15195

2013-08-19 Thread Jonathan Wakely
On 16 August 2013 16:28, Tom Tromey wrote:
 Phil == Phil Muldoon pmuld...@redhat.com writes:

 Phil Anyway, I have regenerated the patch with the fixes requested.

 Thanks.

 Phil 2013-08-16  Phil Muldoon  pmuld...@redhat.com

 Phil   PR gcc/53477

 I think this should say  PR libstdc++/53477

 Other than this nit, it looks good to me.
 I can't remember if I can approve these, but I think not.

I can though, so it's approved with the ChangeLog nit fixed.

Thanks.


Re: Type inheritance graph analysis speculative devirtualization, part 2/6 (type inheritance graph builder)

2013-08-19 Thread Jan Hubicka
 
 I believe you can get derived objects of derived type not visible in the tu, 
 so the list of targets does only contain local methods and implicitly all 
 methods only defined in other tus? That is, you cannot rely no that list to 
 be complete?

Yes, the list is in majority cases not complete. It is complete when you deal
with virtual method comming from object in local namespaces or final
classes/methods (this info we do not pass to middle-end yet). There is final
flag passed from possible_polymorphic_call_targets to you saying when this
happens.

I am currently using it for compile time unreachable code removal. I.e. to
remove known to be unreachable comdatexternal virtual function bodies rather
then pickling them all to LTO and remove them post inlining when we assume
devirt to no longer happen.  I alos use it for speculative devirtualization
(i.e when I only see one possible virtual call candidate on LTO I believe it is
likely to be the one called on runtime).

There are some cases where you can get more complete information but in general
this is not meant to replace the current code that is tracking lists of
possible dynamic object types reaching to given call.  It is kind of ortoghonal
to it.

Honza


Re: PING: Re: [patch] implement simd loops in trunk (OMP_SIMD)

2013-08-19 Thread Jakub Jelinek
On Thu, Aug 08, 2013 at 09:15:54PM +0200, Aldy Hernandez wrote:
 On 08/08/13 18:42, Richard Henderson wrote:
 On 08/08/2013 02:06 AM, Aldy Hernandez wrote:
 The hash is not really mapping the simd DECL to the simduid, since that's 
 just
 a matter of DECL_UID(simduid), but the OMP simd array to the index used to
 reference it (simduid), like thus:
 
  _7 = GOMP_SIMD_LANE (simduid.0)
  ...
  ...
  D.1737[_7] = stuff;
 
 decl_to_simduid is a mapping from the simduid.0 to the D.1737[] (the OMP 
 simd
 array).
 
 Agreed, or am I missing something?
 
 Agreed.  Yeah, that's a bit confusing.
 
 Ok, I will change the hash name and add some comments.

Do you have such an incremental patch?  I'd like to apply it to
gomp-4_0-branch so that we are in sync.

Jakub


Re: Fix class type lookup from OBJ_TYPE_REF

2013-08-19 Thread Jan Hubicka
 On Aug 17, 2013, at 8:54 AM, Jan Hubicka hubi...@ucw.cz wrote:
  Moreover objc apparently never produce any virtual functions/methods.
 
 Objective-C++ might.  :-)
Sure, those ought to be regular C++ methods though.
 
  Can someone explain me in greater detail how the objc use works?
 
 Objective-C uses it to manage code generation for post-increments of method 
 calls that involve a cast…  in greater detail, no, that's just from a quick 
 read of the code.

OK, the real question is what objective-C expects middle end to do with 
OBJ_TYPE_REF.
If it expects it to do some optimization, it is probably wrong, since I am not 
aware
of any OBJ_TYPE_REF code that will work on types w/o virtual tables.
If it expects to do nothing, I guess we ought to drop it at gimplification time.

Honza


Re: [RFC] Issues with intraprocedural devirtualization

2013-08-19 Thread Jan Hubicka
Hi,
here is variant of patch that drops the field walking from
gimple_extract_devirt_binfo_from_cst completely. As pointed out
by Jason, it is pointless since all structures have BINFO in C++
and thus get_binfo_at_offset will do the job.

I would like to return the code back eventually to handle arraysunions
but that can be done incrementally (and this is not the only place that
sufers from the problem)

Martin: I am still not quite certain about the dynamic type changing logic.
if this is the case ipa-prop needs to deal with and it handles only 0 offsets
within the outer type, I guess it can just test the offset by itself?

Honza

Bootstrapped/regtested x86_64-linux, OK?

* ipa-cp.c (ipa_get_indirect_edge_target_1): Update use
of gimple_extract_devirt_binfo_from_cst.
* gimple-fold.c (gimple_extract_devirt_binfo_from_cst): Rework.
(gimple_fold_call): Update use of gimple_extract_devirt_binfo_from_cst.
* ipa-prop.c (try_make_edge_direct_virtual_call): Likewise.
* gimple.h (gimple_extract_devirt_binfo_from_cst): Update.

Index: ipa-cp.c
===
*** ipa-cp.c(revision 201814)
--- ipa-cp.c(working copy)
*** ipa_get_indirect_edge_target_1 (struct c
*** 1541,1554 
if (TREE_CODE (t) != TREE_BINFO)
  {
tree binfo;
binfo = gimple_extract_devirt_binfo_from_cst
!(t, ie-indirect_info-otr_type);
if (!binfo)
return NULL_TREE;
!   binfo = get_binfo_at_offset (binfo, anc_offset, otr_type);
!   if (!binfo)
return NULL_TREE;
!   return gimple_get_virt_method_for_binfo (token, binfo);
  }
else
  {
--- 1541,1564 
if (TREE_CODE (t) != TREE_BINFO)
  {
tree binfo;
+   tree target, base_target;
binfo = gimple_extract_devirt_binfo_from_cst
!(t, ie-indirect_info-otr_type,
! ie-indirect_info-offset);
if (!binfo)
return NULL_TREE;
!   target = gimple_get_virt_method_for_binfo (token, binfo);
!   if (!target)
!   return NULL_TREE;
!   /* Constructors may be partially inlined.  We do not track
!  if type is in construction and during that time the
!virtual table may correspond to virtual table of the
!base type.  */
!   base_target = gimple_get_virt_method_for_binfo
!(token, TYPE_BINFO (ie-indirect_info-otr_type));
!   if (base_target != target)
return NULL_TREE;
!   return target;
  }
else
  {
Index: gimple-fold.c
===
*** gimple-fold.c   (revision 201814)
--- gimple-fold.c   (working copy)
*** gimple_fold_builtin (gimple stmt)
*** 1006,1021 
  /* Return a binfo to be used for devirtualization of calls based on an object
 represented by a declaration (i.e. a global or automatically allocated one)
 or NULL if it cannot be found or is not safe.  CST is expected to be an
!ADDR_EXPR of such object or the function will return NULL.  Currently it is
!safe to use such binfo only if it has no base binfo (i.e. no ancestors)
!EXPECTED_TYPE is type of the class virtual belongs to.  */
  
  tree
! gimple_extract_devirt_binfo_from_cst (tree cst, tree expected_type)
  {
HOST_WIDE_INT offset, size, max_size;
!   tree base, type, binfo;
!   bool last_artificial = false;
  
if (!flag_devirtualize
|| TREE_CODE (cst) != ADDR_EXPR
--- 1006,1025 
  /* Return a binfo to be used for devirtualization of calls based on an object
 represented by a declaration (i.e. a global or automatically allocated one)
 or NULL if it cannot be found or is not safe.  CST is expected to be an
!ADDR_EXPR of such object or the function will return NULL.
! 
!It is up to the caller to check for absence of dynamic type changes.
!Because constructors may be partially inlined and the virtual tables
!during construction may be overwritten by virtual tables by base types,
!it is also up to caller to verify that either all base types have
!the same virtual method or that this does not happen.  */
  
  tree
! gimple_extract_devirt_binfo_from_cst (tree cst, tree expected_type,
! HOST_WIDE_INT otr_offset)
  {
HOST_WIDE_INT offset, size, max_size;
!   tree base, type;
  
if (!flag_devirtualize
|| TREE_CODE (cst) != ADDR_EXPR
*** gimple_extract_devirt_binfo_from_cst (tr
*** 1028,1074 
if (!DECL_P (base)
|| max_size == -1
|| max_size != size
!   || TREE_CODE (type) != RECORD_TYPE)
! return NULL_TREE;
! 
!   /* Find the sub-object the constant actually refers to and mark whether it 
is
!  an artificial one (as opposed to a user-defined one).  */
!   while (true)
! {
!   HOST_WIDE_INT pos, size;
!   tree fld;
! 
!   if (types_same_for_odr 

Re: [PATCH, i386, MPX 1/X] Support of Intel MPX ISA

2013-08-19 Thread Ilya Enkovich
Ping

2013/8/12 Ilya Enkovich enkovich@gmail.com:
 2013/8/10 Joseph S. Myers jos...@codesourcery.com:
 On Mon, 29 Jul 2013, Ilya Enkovich wrote:

 Hi,

 Here is updated version of the patch. I removed redundant
 mode_for_bound, added comments to BOUND_TYPE and added -mmpx option.
 I also fixed bndmk/bndldx/bndstx constraints to avoid incorrect
 register allocation (created two new constraints for that).

 I think the -mmpx option should be documented in invoke.texi, and the new
 machine modes / mode class should be documented in rtl.texi where other
 machine modes / mode classes are documented.  Beyond that, I have no
 comments on this patch revision.

 --
 Joseph S. Myers
 jos...@codesourcery.com

 Thanks! Here is a new revision with -mmpx and new machine modes /
 class documented.
 Is it good to install to trunk?

 Thanks,
 Ilya
 ---
 2013-08-12  Ilya Enkovich  ilya.enkov...@intel.com

 * mode-classes.def (MODE_BOUND): New.
 * tree.def (BOUND_TYPE): New.
 * genmodes.c (complete_mode): Support MODE_BOUND.
 (BOUND_MODE): New.
 (make_bound_mode): New.
 * machmode.h (BOUND_MODE_P): New.
 * stor-layout.c (int_mode_for_mode): Support MODE_BOUND.
 (layout_type): Support BOUND_TYPE.
 * tree-pretty-print.c (dump_generic_node): Support BOUND_TYPE.
 * tree.c (build_int_cst_wide): Support BOUND_TYPE.
 (type_contains_placeholder_1): Likewise.
 * tree.h (BOUND_TYPE_P): New.
 * varasm.c (output_constant): Support BOUND_TYPE.
 * config/i386/constraints.md (B): New.
 (Ti): New.
 (Tb): New.
 * config/i386/i386-modes.def (BND32): New.
 (BND64): New.
 * config/i386/i386-protos.h (ix86_bnd_prefixed_insn_p): New.
 * config/i386/i386.c (isa_opts): Add mmpx.
 (regclass_map): Add bound registers.
 (dbx_register_map): Likewise.
 (dbx64_register_map): Likewise.
 (svr4_dbx_register_map): Likewise.
 (PTA_MPX): New.
 (ix86_option_override_internal) Support MPX ISA.
 (ix86_code_end): Add MPX bnd prefix.
 (output_set_got): Likewise.
 (ix86_output_call_insn): Likewise.
 (get_some_local_dynamic_name): Add '!' (MPX bnd) print prefix support.
 (ix86_print_operand_punct_valid_p): Likewise.
 (ix86_print_operand_address): Support UNSPEC_BNDMK_ADDR and
 UNSPEC_BNDMK_ADDR.
 (ix86_class_likely_spilled_p): Add bound regs support.
 (ix86_hard_regno_mode_ok): Likewise.
 (x86_order_regs_for_local_alloc): Likewise.
 (ix86_bnd_prefixed_insn_p): New.
 * config/i386/i386.h (FIRST_PSEUDO_REGISTER): Fix to new value.
 (FIXED_REGISTERS): Add bound registers.
 (CALL_USED_REGISTERS): Likewise.
 (REG_ALLOC_ORDER): Likewise.
 (HARD_REGNO_NREGS): Likewise.
 (TARGET_MPX): New.
 (VALID_BND_REG_MODE): New.
 (FIRST_BND_REG): New.
 (LAST_BND_REG): New.
 (reg_class): Add BND_REGS.
 (REG_CLASS_NAMES): Likewise.
 (REG_CLASS_CONTENTS): Likewise.
 (BND_REGNO_P): New.
 (ANY_BND_REG_P): New.
 (BNDmode): New.
 (HI_REGISTER_NAMES): Add bound registers.
 * config/i386/i386.md (UNSPEC_BNDMK): New.
 (UNSPEC_BNDMK_ADDR): New.
 (UNSPEC_BNDSTX): New.
 (UNSPEC_BNDLDX): New.
 (UNSPEC_BNDLDX_ADDR): New.
 (UNSPEC_BNDCL): New.
 (UNSPEC_BNDCU): New.
 (UNSPEC_BNDCN): New.
 (UNSPEC_MPX_FENCE): New.
 (BND0_REG): New.
 (BND1_REG): New.
 (type): Add mpxmov, mpxmk, mpxchk, mpxld, mpxst.
 (length_immediate): Likewise.
 (prefix_0f): Likewise.
 (memory): Likewise.
 (prefix_rep): Check for bnd prefix.
 (BND): New.
 (bnd_ptr): New.
 (BNDCHECK): New.
 (bndcheck): New.
 (*jcc_1): Add MPX bnd prefix and fix length.
 (*jcc_2): Likewise.
 (jump): Likewise.
 (simple_return_internal): Likewise.
 (simple_return_pop_internal): Likewise.
 (*indirect_jump): Add MPX bnd prefix.
 (*tablejump_1): Likewise.
 (simple_return_internal_long): Likewise.
 (simple_return_indirect_internal): Likewise.
 (mode_mk): New.
 (*mode_mk): New.
 (movmode): New.
 (*movmode_internal_mpx): New.
 (mode_bndcheck): New.
 (*mode_bndcheck): New.
 (mode_ldx): New.
 (*mode_ldx): New.
 (mode_stx): New.
 (*mode_stx): New.
 * config/i386/predicates.md (lea_address_operand): Rename to...
 (address_no_seg_operand): ... this.
 (address_mpx_no_base_operand): New.
 (address_mpx_no_index_operand): New.
 (bnd_mem_operator): New.
 * config/i386/i386.opt (mmpx): New.
 * doc/invoke.texi: Add documentation for the flags -mmpx, -mno-mpx.
 * doc/rtl.texi 

Re: PING: Re: [patch] implement simd loops in trunk (OMP_SIMD)

2013-08-19 Thread Jakub Jelinek
On Thu, Aug 01, 2013 at 09:38:31AM -1000, Richard Henderson wrote:
  +  if (simd
  + /*
  + || (fd-sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
  +  !fd-have_ordered)*/)
 
 Debugging leftovers or what?

gomp-4_0-branch contains also the || there, but for the trunk merge
I think Aldy can leave that comment out, I'll deal with it when merging the
trunk changes back to gomp-4_0-branch.

  +  /* Enforce simdlen 1 in simd loops with data sharing clauses referencing
  + variable sized vars.  That is unnecessarily hard to support and very
  + unlikely to result in vectorized code anyway.  */
  +  if (is_simd)
  +for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
  +  switch (OMP_CLAUSE_CODE (c))
  +   {
  +   case OMP_CLAUSE_REDUCTION:
  + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
  +   max_vf = 1;
  + /* FALLTHRU */
  +   case OMP_CLAUSE_PRIVATE:
  +   case OMP_CLAUSE_FIRSTPRIVATE:
  +   case OMP_CLAUSE_LASTPRIVATE:
  +   case OMP_CLAUSE_LINEAR:
  + if (is_variable_sized (OMP_CLAUSE_DECL (c)))
  +   max_vf = 1;
  + break;
  +   default:
  + continue;
  +   }
 
 Comment is wrong, code is right, adjusting max_vf not simdlen.

max_vf = 1 causes
  c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
  OMP_CLAUSE_SAFELEN_EXPR (c) = build_int_cst (integer_type_node,
   max_vf);
so I think the comment should just be changed s/simdlen/safelen/, always get
those 2 wrong...  simdlen is for elemental functions, safelen for simd
loops.
 
  +  /* If max_vf is non-NULL, then we can use only vectorization factor
  + up to the max_vf we chose.  So stick it into safelen clause.  */
  +  if (max_vf)
  +{
  +  tree c = find_omp_clause (gimple_omp_for_clauses (ctx-stmt),
  +   OMP_CLAUSE_SAFELEN);
 
 First, non-zero, not non-null -- max_vf is not a pointer.

Ack.

 Second, I don't understand why we're adjusting safelen.  The VF we choose
 for optimizing the loop (even 1), does not change the fact that there are
 no dependencies between loop iterations larger than that.

No, it adds depenencies.  We choose some size of the per-simd lane arrays,
and we must not allow larger vectorization factor than that, as every simd
lane should have different object.  The size is normally the maximum
possible value for a target, so it should be big enough, but we really need
it to be recorded, so that we don't generate invalid code.  max_vf = 1
is for the cases where we punt on handling it properly, those loops for now
can't be vectorized just because they had safelen clause (normal
vectorization rules will probably also punt on those).  In the future if
e.g. privatized per-SIMD lane VLAs are common we can improve that, but right
now I somehow doubt that would be the case.

 Did you want to be adding a _max_vf_ attribute to record stuff that we
 learned while examining the omp loop?  E.g. the max_vf=1 reduction above?
 
 Given the only adjustment we make to max_vf is to disable vectorization,
 did we in fact want to discard the simd attribute instead, making this a
 regular openmp loop?

See above, it is not only about disabling it, we record the sizes of the
arrays there.  And, for SAFELEN clause with 1, we actually don't set
loop-force_vect and set loop-safelen to 0 (i.e. normal loop).

  +   if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
  +   if (cond3 is )
  + adj = STEP3 - 1;
  +   else
  + adj = STEP3 + 1;
  +   count3 = (adj + N32 - N31) / STEP3;
  +   if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
  +   if (cond2 is )
  + adj = STEP2 - 1;
  +   else
  + adj = STEP2 + 1;
  +   count2 = (adj + N22 - N21) / STEP2;
  +   if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
 
 CEIL_DIV_EXPR, instead of TRUNC_DIV_EXPR and adjusting by hand?  Unless we
 can't generate the same code in the end because generically we don't know that
 the values involved must be negative for GT?

This is just moving code around (and merging multiple copies of it), the old
code was also using TRUNC_DIV_EXPR.  I think we can try CEIL_DIV_EXPR, but
I'd strongly prefer to only do it incrementally and separately from these
changes.

 I wonder if we do better mooshing all of the BBs together, creating one larger
 BB with all the computation and the unexpected jump at the end.  I.e.
 
   bool zero3, zero2, zero1, zero;
 
   zero3 = N32 c3 N31;
   count3 = (N32 - N31) /[cl] STEP3;
   zero2 = N22 c2 N21;
   count2 = (N22 - N21) /[cl] STEP2;
   zero1 = N12 c1 N11;
   count1 = (N12 - N11) /[cl] STEP1;
   zero = zero3 || zero2 || zero1;
   count = count1 * count2 * count3;
   if (__builtin_expect(zero, false)) goto zero_iter_bb;
 
 After all, we expect the zero=false, and thus we expect to have to evaluate 
 all
 of the comparison 

Re: Fix class type lookup from OBJ_TYPE_REF

2013-08-19 Thread Richard Biener
Jan Hubicka hubi...@ucw.cz wrote:
 On Aug 17, 2013, at 8:54 AM, Jan Hubicka hubi...@ucw.cz wrote:
  Moreover objc apparently never produce any virtual
functions/methods.
 
 Objective-C++ might.  :-)
Sure, those ought to be regular C++ methods though.
 
  Can someone explain me in greater detail how the objc use works?
 
 Objective-C uses it to manage code generation for post-increments of
method calls that involve a cast…  in greater detail, no, that's just
from a quick read of the code.

OK, the real question is what objective-C expects middle end to do with
OBJ_TYPE_REF.
If it expects it to do some optimization, it is probably wrong, since I
am not aware
of any OBJ_TYPE_REF code that will work on types w/o virtual tables.
If it expects to do nothing, I guess we ought to drop it at
gimplification time.

The handling is probably dependent no the runtime in use, so I suggest looking 
into Darwin specific files for example.

Richard.

Honza




[Patch] Fix empty grouping problem in regex

2013-08-19 Thread Tim Shen
Before this patch, it causes the executor into infinite loop.

Tested under x86_64. I'll test -m32 and check-debug before committing.

Thanks!

-- 
Tim Shen


changelog
Description: Binary data


emptygroup.patch
Description: Binary data


Re: [Patch] Fix empty grouping problem in regex

2013-08-19 Thread Jakub Jelinek
On Mon, Aug 19, 2013 at 06:07:10PM +0800, Tim Shen wrote:
 Before this patch, it causes the executor into infinite loop.

 Tested under x86_64. I'll test -m32 and check-debug before committing.

@@ -2371,8 +2372,10 @@
   if (__re._M_automaton == nullptr)
 return false;
   for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like 
algo?
-if (__detail::__get_executor(__cur, __last, __m, __re, __flags)
-  -_M_search_from_first())
+{
+  __detail::__get_executor(__cur, __last, __m, __re, __flags)
+-_M_search_from_first();
+  if (__m.size()  0  __m[0].matched)
   {
 for (auto __it : __m)
   if (!__it.matched)
@@ -2387,6 +2390,7 @@
   (__m.suffix().first != __m.suffix().second);
 return true;
   }

Doesn't everything in between the last added line above and the first added
line below need reindenting by 2 spaces (plus of course transforming any 8
consecutive spaces into tabs)?

+}
   return false;
 }

Jakub


Re: Type inheritance graph analysis speculative devirtualization, part 2/6 (type inheritance graph builder)

2013-08-19 Thread Dodji Seketeli
Hello Jan,

Just some casual notes.

Jan Hubicka hubi...@ucw.cz a écrit:

[...]

 Index: ipa-devirt.c
 ===

[...]

 +/* Brief vocalburary:

[...]

 + OTR = OBJ_TYPE_REF
 +   This is Gimple representation of type information of a polymorphic 
 call.
 ^
Maybe insert a the here?

 +   It contains two parameters:
 +  otr_type is a type of class whose method is called.
 +  otr_token is index into virtual table where address is taken.

Likewise.

 +
 + BINFO
 +   This is the type inheritance information attached to each tree
 +   RECORD_TYPE by the C++ frotend.  It provides information about base
 +   types and virtual tables.
 +
 +   BINFO is linked to the RECORD_TYPE by TYPE_BINFO.
 +   BINFO also links to its type by BINFO_TYPE and to the virtual table by
 +   BINFO_VTABLE.
 +
 +   Base types of a given type are enumerated by BINFO_BASE_BINFO
 +   vector.  Members of this vectors are not BINFOs associated
 +   with a base type.  Rather they are new copies of BINFOs
 +   (base BINFOs). Their virtual tables may differ from
 +   virtual table of the base type.  Also BINFO_OFFSET specify
 ^^^
specifies.

 +   offset of the base within the type.
 ^
Maybe insert a the here?

 +
 +   In the case of single inheritance, the virtual table is shared
 +   and BINFO_VTABLE of base BINFO is NULL.  In the case of multiple
 +   inheritance the individual virtual tables are pointer to by
 +   BINFO_VTABLE of base binfos (that differs of BINFO_VTABLE of 
 +   binfo associated to the base type).
 +
 +   BINFO lookup for a given base type and offset can be done by
 +   get_binfo_at_offset.  It returns proper BINFO whose virtual table
 +   can be used for lookup of virtual methods associated with the
 +   base type.
 +
 + token
 +   This is an index of virtual method in virtual table associated
 +   to the type defining it. Token can be looked up from OBJ_TYPE_REF
 +   or from DECL_VINDEX of given virtual table.
^
Maybe insert a 'a' here?

 +
 + polymorphic (indirect) call
 +   This is callgraph represention of virtual method call.  Every
 +   polymorphic call contains otr_type and otr_token taken from
 +   original OBJ_TYPE_REF at callgraph construction time.
 +
 +   What we do here:
 +
 +   build_type_inheritance_graph triggers a construction of the type 
 inheritance
 +   graph.
 +
 + We reconstruct it based on types of methods we see in the unit.
 + This means that the graph is not complete. Types with no methods are not
 + inserted to the graph.  Also types without virtual methods are not
 ^^
into

 + represented at all, though it may be easy to add this.
 +  
 + The inheritance graph is represented as follows:
 +
 +   Vertices are structures odr_type.  Every odr_type may correspond
 +   to one or more tree type nodes that are equivalent by ODR rule.
 +   (the multiple type nodes appear only with linktime optimization)
 +
 +   Edges are repsented by odr_type-base and odr_type-derived_types.
^
Typo: represented

 +static inline bool
 +polymorphic_type_binfo_p (tree binfo)
 +{
 +  /* See if BINFO's type has an virtual table associtated with it.  */
 +  return BINFO_VTABLE (TYPE_BINFO (BINFO_TYPE (binfo)));

Just for my education, why not just:

return BINFO_VTABLE (binfo);
?

At the very least, I'd say there ought to be a comment here explaining
why this gotcha'.

[...]

Incidentally ...

 +/* See if BINFO's type match OTR_TYPE.  If so, lookup method
 +   in vtable of TYPE_BINFO and insert method to NODES array.
 +   Otherwise recurse to base BINFOs.
 +   This match what get_binfo_at_offset does, but with offset
 +   being unknown.
 +
 +   TYPE_BINFO is binfo holding an virtual table matching
 +   BINFO's type.  In the case of single inheritance, this
 +   is binfo of BINFO's type ancestor (vtable is shared),
 +   otherwise it is binfo of BINFO's type.
 +
 +   MATCHED_VTABLES tracks virtual tables we already did lookup
 +   for virtual function in.
 +  */
 +
 +static void
 +record_binfo (vec cgraph_node * nodes,
 +   tree binfo,
 +   tree otr_type,
 +   tree type_binfo,
 +   HOST_WIDE_INT otr_token,
 +   pointer_set_t *inserted,
 +   pointer_set_t *matched_vtables)
 +{
 +  tree type = BINFO_TYPE (binfo);
 +  int i;
 +  tree base_binfo;
 +
 +  gcc_checking_assert (BINFO_VTABLE (type_binfo));
 +
 +  if (types_same_for_odr (type, otr_type)
 +   !pointer_set_insert (matched_vtables, BINFO_VTABLE (type_binfo)))
 +{
 +  tree target = gimple_get_virt_method_for_binfo (otr_token, type_binfo);
 +  if (target)
 + maybe_record_node (nodes, target, inserted);
 +  

Re: [PATCH, vtv update] Fix /tmp directory issues in libvtv

2013-08-19 Thread Florian Weimer

On 08/17/2013 12:29 AM, Caroline Tice wrote:

OK, I *think* I have done as you requested.  I have to try the
environment variable before falling back on stderr (there's a program
we want to use this on that disables the ability to write to stderr).
I have added the secure_getenv stuff as you requested. The fixed patch
is attached.

Please review the patch and let me know if this is OK to commit.  Thanks!


This looks good, but I have problems regenerating the libvtv/configure 
file.  Could you please send me your copy?


I want to make sure that the secure_getenv symbol actually ends up in 
libvtv.so.


--
Florian Weimer / Red Hat Product Security Team


[PATCH/RFA] Do not set MULTILIB_DEFAULTS for arm*-*-linux-gnueabi* targets

2013-08-19 Thread Matthew Gretton-Dann
All,

The attached patch removes the setting of MULTILIB_DEFAULTS for
arm*-*-linux-gnueabi* targets.

The current setting of MULTILIB_DEFAULTS includes mfloat-abi=hard,
which for arm*-*-linux-gnueabi is not true.  This makes generating a
hard-float multilib impossible in this configuration.

In an off-list conversation with Ramana we decided that
MULTILIB_DEFAULTS should not be set for these targets, as we set
MULTILIB_OPTIONS to an empty string (as per the docs for
MULTILIB_DEFAULTS).

I added comments by the definition of MULTILIB_OPTIONS and
MULTILIB_DEFAULTS to make it clear that the two options are related.

Tested cross arm-none-linux-gnueabi.

OK for trunk?

Thanks,

Matt

gcc/ChangeLog:
2013-08-19  Matthew Gretton-Dann  matthew.gretton-d...@linaro.org

* config/arm/linux-elf.h (MULTILIB_DEFAULTS): Remove definition.
* config/arm/t-linux-eabi: (MULTILIB_OPTIONS): Document association
with MULTILIB_DEFAULTS.

-- 
Matthew Gretton-Dann
Linaro Toolchain Working Group
matthew.gretton-d...@linaro.org
diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h
index 488efa4..475e220 100644
--- a/gcc/config/arm/linux-elf.h
+++ b/gcc/config/arm/linux-elf.h
@@ -44,9 +44,9 @@
 
 #define SUBTARGET_EXTRA_LINK_SPEC  -m  TARGET_LINKER_EMULATION  -p
 
+/* We do not have any MULTILIB_OPTIONS specified, so there are no
+   MULTILIB_DEFAULTS.  */
 #undef  MULTILIB_DEFAULTS
-#define MULTILIB_DEFAULTS \
-   { marm, mlittle-endian, mfloat-abi=hard, mno-thumb-interwork }
 
 /* Now we define the strings used to build the spec file.  */
 #undef  LIB_SPEC
diff --git a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi
index 2f2f8ff..07e32b3 100644
--- a/gcc/config/arm/t-linux-eabi
+++ b/gcc/config/arm/t-linux-eabi
@@ -18,6 +18,8 @@
 
 # We do not build a Thumb multilib for Linux because the definition of
 # CLEAR_INSN_CACHE in linux-gas.h does not work in Thumb mode.
+# If you set MULTILIB_OPTIONS to a non-empty value you should also set
+# MULTILIB_DEFAULTS in linux-elf.h.
 MULTILIB_OPTIONS   =
 MULTILIB_DIRNAMES  =
 


[C++ Patch, obvious] Use cp_parser_lookup_name_simple more

2013-08-19 Thread Paolo Carlini

Hi,

was having a look to c++/58187 (looks like we are missing a lookup) and 
noticed this. It seems obvious to me, I'll commit it later today barring 
objections.


Tested x86_64-linux.

Thanks,
Paolo.


2013-08-19  Paolo Carlini  paolo.carl...@oracle.com

* parser.c (cp_parser_lambda_introducer, cp_parser_decltype_expr):
Use cp_parser_lookup_name_simple.
Index: parser.c
===
--- parser.c(revision 201835)
+++ parser.c(working copy)
@@ -8710,15 +8710,8 @@ cp_parser_lambda_introducer (cp_parser* parser, tr
 
  /* Turn the identifier into an id-expression.  */
  capture_init_expr
-= cp_parser_lookup_name
-(parser,
-capture_id,
- none_type,
- /*is_template=*/false,
- /*is_namespace=*/false,
- /*check_dependency=*/true,
- /*ambiguous_decls=*/NULL,
- capture_token-location);
+   = cp_parser_lookup_name_simple (parser, capture_id,
+   capture_token-location);
 
  if (capture_init_expr == error_mark_node)
{
@@ -11583,13 +11576,8 @@ cp_parser_decltype_expr (cp_parser *parser,
 
   if (identifier_p (expr))
/* Lookup the name we got back from the id-expression.  */
-   expr = cp_parser_lookup_name (parser, expr,
- none_type,
- /*is_template=*/false,
- /*is_namespace=*/false,
- /*check_dependency=*/true,
- /*ambiguous_decls=*/NULL,
- id_expr_start_token-location);
+   expr = cp_parser_lookup_name_simple (parser, expr,
+id_expr_start_token-location);
 
   if (expr
expr != error_mark_node


Re: [PATCH] Redesign pthread in LIB_SPEC for systems without libpthread

2013-08-19 Thread Pavel Chupin
 1. In config/gnu-user.h you define GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC to 
 the main part of GNU_USER_TARGET_LIB_SPEC.  GNU_USER_TARGET_LIB_SPEC now 
 becomes '%{pthread:-lpthread}  GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC'.
 2. In occurrences of LINUX_OR_ANDROID_LD you continue to use 
 GNU_USER_TARGET_LIB_SPEC for the first argument and 
 'GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC   ANDROID_LIB_SPEC'  for the second 
 argument.

 This way you are operating with named macros instead of verbatim 
 %{pthread:-lpthread}, and, hopefully, this will be more clear to an outside 
 observer.


Done.


 Please see modified patch attached.
 I've built all 3 Android compilers, x86_64 and i686 and ran simple
 test with -pthread.

 Thanks for good test coverage.  I assume you have also tested a non-android 
 Linux target?

Yes. By x86_64 and i686 above I meant Linux non-android target. And I
built successfully all 3 Android compilers.


 The patch is OK if adjusted to the comments above (or good arguments provided 
 why your current patch is more straigh-forward).


Thanks. I'm OK with your suggestions.
Please see adjusted patch attached.

Updated ChangeLog:

2013-08-19  Pavel Chupin  pavel.v.chu...@intel.com

Fix LIB_SPEC for systems without libpthread
* config/gnu-user.h: Introduce GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC.
* config/arm/linux-eabi.h: Use GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC
for Android.
* config/i386/linux-common.h: Likewise.
* config/mips/linux-common.h: Likewise.

-- 
Pavel Chupin
Intel Corporation


0001-Fix-LIB_SPEC-for-systems-without-libpthread.patch
Description: Binary data


[Patch, Fortran, OOP] PR 58185: [4.8/4.9 Regression] ICE when selector in SELECT TYPE is non-polymorphic

2013-08-19 Thread Janus Weil
Hi all,

here is a small patch which does some cleanup to avoid an ICE on
invalid SELECT TYPE statements.

The first three hunks are just cosmetics, and the fourth one also
contains minor refactoring, where I pull some common code out of the
two branches of an if statement. The important part, however, is that
I prevent gfc_build_class_symbol from being called if no type symbol
is available.

Regtested on x86_64-unknown-linux-gnu. Ok for trunk and 4.8?

Cheers,
Janus


2013-08-19  Janus Weil  ja...@gcc.gnu.org

PR fortran/58185
* match.c (copy_ts_from_selector_to_associate): Don't build class
symbol if type is not available. Some cleanup.


2013-08-19  Janus Weil  ja...@gcc.gnu.org

PR fortran/58185
* gfortran.dg/select_type_34.f90: New.


pr58185.diff
Description: Binary data


select_type_34.f90
Description: Binary data


Re: [Patch, Fortran, F03] PR 46271: OpenMP default(none) and procedure pointers

2013-08-19 Thread Janus Weil
Committed as r201835 ...



2013/8/18 Janus Weil ja...@gcc.gnu.org:
 Hi all,

 here is a pretty-much-trivial patch for a problem with OpenMP and
 procedure pointers (proc-ptrs to functions are working, but not
 subroutines).

 Regtested on x86_64-unknown-linux-gnu. Will commit as obvious tomorrow
 if no one protests in the meantime ...

 Cheers,
 Janus



 2013-08-18  Janus Weil  ja...@gcc.gnu.org

 PR fortran/46271
 * openmp.c (resolve_omp_clauses): Bugfix for procedure pointers.


 2013-08-18  Janus Weil  ja...@gcc.gnu.org

 PR fortran/46271
 * gfortran.dg/gomp/proc_ptr_1.f90: New.


RFA: testsuite patches (1/6): keeps_null_pointer_checks effect on pta/alias dump files

2013-08-19 Thread Joern Rennecke

On keeps_null_pointer_checks, relative to other targets, we see some NULLs in
sets replaced with/merged into NONLOCAL.

Tested for avr with --target_board=atmega128-sim and native on  
i686-pc-linuc-gnu.


Ok to apply?
2013-08-14  Joern Rennecke  joern.renne...@embecosm.com

* gcc.dg/ipa/ipa-pta-14.c (scan-ipa-dump) [keeps_null_pointer_checks]:
Don't expect NULL in foo.result set.
* gcc.dg/tree-ssa/pta-escape-1.c (scan-tree-dump): Don't expect NULL
in ESCAPED set.
* gcc.dg/tree-ssa/pta-escape-2.c: Likewise.
* gcc.dg/tree-ssa/pta-escape-3.c: Likewise.

Index: gcc.dg/ipa/ipa-pta-14.c
===
--- gcc.dg/ipa/ipa-pta-14.c (revision 201835)
+++ gcc.dg/ipa/ipa-pta-14.c (working copy)
@@ -22,7 +22,7 @@ int main()
   a.p = (void *)c;
   p = foo(a, a);
   /* { dg-final { scan-ipa-dump foo.result = { NULL a\[^ \]* c\[^ \]* } 
pta { xfail *-*-* } } } */
-  /* { dg-final { scan-ipa-dump foo.result = { NULL a\[^ \]* a\[^ \]* c\[^ 
\]* } pta } } */
+  /* { dg-final { scan-ipa-dump foo.result = { NULL a\[^ \]* a\[^ \]* c\[^ 
\]* } pta { target { ! keeps_null_pointer_checks } } } } */
   ((struct X *)p)-p = (void *)0;
   if (a.p != (void *)0)
 abort ();
Index: gcc.dg/tree-ssa/pta-escape-1.c
===
--- gcc.dg/tree-ssa/pta-escape-1.c  (revision 201835)
+++ gcc.dg/tree-ssa/pta-escape-1.c  (working copy)
@@ -33,5 +33,5 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump ESCAPED = { NULL ESCAPED NONLOCAL x } alias 
} } */
+/* { dg-final { scan-tree-dump ESCAPED = { NULL ESCAPED NONLOCAL x } alias 
{ target { ! keeps_null_pointer_checks } } } } */
 /* { dg-final { cleanup-tree-dump alias } } */
Index: gcc.dg/tree-ssa/pta-escape-2.c
===
--- gcc.dg/tree-ssa/pta-escape-2.c  (revision 201835)
+++ gcc.dg/tree-ssa/pta-escape-2.c  (working copy)
@@ -34,5 +34,5 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump ESCAPED = { NULL ESCAPED NONLOCAL x } alias 
} } */
+/* { dg-final { scan-tree-dump ESCAPED = { NULL ESCAPED NONLOCAL x } alias 
{ target { ! keeps_null_pointer_checks } } } } */
 /* { dg-final { cleanup-tree-dump alias } } */
Index: gcc.dg/tree-ssa/pta-escape-3.c
===
--- gcc.dg/tree-ssa/pta-escape-3.c  (revision 201835)
+++ gcc.dg/tree-ssa/pta-escape-3.c  (working copy)
@@ -38,5 +38,5 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump ESCAPED = { NULL ESCAPED NONLOCAL x } alias 
} } */
+/* { dg-final { scan-tree-dump ESCAPED = { NULL ESCAPED NONLOCAL x } alias 
{ target { ! keeps_null_pointer_checks } } } } */
 /* { dg-final { cleanup-tree-dump alias } } */


RFA: testsuite patches (2/6): [avr]: Set required branch cost for gcc.dg/tree-ssa/vrp87.c

2013-08-19 Thread Joern Rennecke

We need a minimum branch cost of 2 to make the expected optimization happen.

Tested for avr with --target_board=atmega128-sim and native on  
i686-pc-linuc-gnu.


Ok to apply?
2013-08-16  Joern Rennecke  joern.renne...@embecosm.com

* gcc.dg/tree-ssa/vrp87.c [avr-*-*] (dg-additional-options): New.

Index: gcc.dg/tree-ssa/vrp87.c
===
--- gcc.dg/tree-ssa/vrp87.c (revision 201835)
+++ gcc.dg/tree-ssa/vrp87.c (working copy)
@@ -1,6 +1,7 @@
 /* { dg-do compile { target { ! m68k*-*-* mmix*-*-* mep*-*-* bfin*-*-* 
v850*-*-* picochip*-*-* moxie*-*-* cris*-*-* m32c*-*-* fr30*-*-* mcore*-*-* 
powerpc*-*-* xtensa*-*-*} } } */
 
 /* { dg-options -O2 -fdump-tree-vrp2-details -fdump-tree-cddce2-details } */
+/* { dg-additional-options -mbranch-cost=2 { target avr-*-* } } */
 
 struct bitmap_head_def;
 typedef struct bitmap_head_def *bitmap;


RFA: testsuite patches (3/6): [avr]: ssa-dom-thread-4.c: expect 6 times Threaded.

2013-08-19 Thread Joern Rennecke
Tested for avr with --target_board=atmega128-sim and native on  
i686-pc-linuc-gnu.


Ok to apply?
2013-08-16  Joern Rennecke  joern.renne...@embecosm.com

* gcc.dg/tree-ssa/ssa-dom-thread-4.c [avr-*-*]: Expect 6 times
Threaded.

Index: gcc.dg/tree-ssa/ssa-dom-thread-4.c
===
--- gcc.dg/tree-ssa/ssa-dom-thread-4.c  (revision 201835)
+++ gcc.dg/tree-ssa/ssa-dom-thread-4.c  (working copy)
@@ -61,7 +61,7 @@ bitmap_ior_and_compl (bitmap dst, const_
zero.  */
 /* ARM Cortex-M0 defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
so skip below test.  */
-/* { dg-final { scan-tree-dump-times Threaded 3 dom1 { target { ! { 
mips*-*-* || { arm_cortex_m  arm_thumb1 } } } } } } */
+/* { dg-final { scan-tree-dump-times Threaded 3 dom1 { target { ! { { 
mips*-*-* avr-*-* } || { arm_cortex_m  arm_thumb1 } } } } } } */
 /* MIPS defines LOGICAL_OP_NON_SHORT_CIRCUIT to 0, so we split both
a_elt || b_elt and b_elt  kill_elt into two conditions each,
rather than using (var1 != 0) op (var2 != 0).  Also, as on other targets,
@@ -81,6 +81,8 @@ bitmap_ior_and_compl (bitmap dst, const_
   - kill_elt-indx == b_elt-indx in the second condition,
 skipping the known-true b_elt  kill_elt in the second
 condition.  */
-/* { dg-final { scan-tree-dump-times Threaded 6 dom1 { target mips*-*-* } 
} } */
+/* For avr, BRANCH_COST is by default 0, so the default
+   LOGICAL_OP_NON_SHORT_CIRCUIT definition also computes as 0.  */
+/* { dg-final { scan-tree-dump-times Threaded 6 dom1 { target mips*-*-* 
avr-*-* } } } */
 /* { dg-final { cleanup-tree-dump dom1 } } */
 


RFA: testsuite patches (4/6): vrp55.c can only thread when not keeping null pointer checks.

2013-08-19 Thread Joern Rennecke
Tested for avr with --target_board=atmega128-sim and native on  
i686-pc-linuc-gnu.


Ok to apply?
2013-08-17  Joern Rennecke  joern.renne...@embecosm.com

* gcc.dg/tree-ssa/vrp55.c: Use keeps_null_pointer_checks to determine
correct test response.

Index: gcc.dg/tree-ssa/vrp55.c
===
--- gcc.dg/tree-ssa/vrp55.c (revision 201835)
+++ gcc.dg/tree-ssa/vrp55.c (working copy)
@@ -9,6 +9,7 @@ fu (char *p, int x)
 arf ();
 }
 
-/* { dg-final { scan-tree-dump-times Threaded jump 1 vrp1 } } */
+/* { dg-final { scan-tree-dump-times Threaded jump 1 vrp1 { target { ! 
keeps_null_pointer_checks } } } } */
+/* { dg-final { scan-tree-dump-times Threaded jump 0 vrp1 { target {   
keeps_null_pointer_checks } } } } */
 /* { dg-final { cleanup-tree-dump vrp1 } } */
 


Committed: testsuite patches (5/6): Update error line number in gcc.target/avr/progmem-error-1.cpp

2013-08-19 Thread Joern Rennecke
Tested for avr with --target_board=atmega128-sim and native on  
i686-pc-linuc-gnu.


Committed as obvious.
2013-08-17  Joern Rennecke  joern.renne...@embecosm.com

* gcc.target/avr/progmem-error-1.cpp: Update linenumber of error.

Index: gcc.target/avr/progmem-error-1.cpp
===
--- gcc.target/avr/progmem-error-1.cpp  (revision 201835)
+++ gcc.target/avr/progmem-error-1.cpp  (working copy)
@@ -2,4 +2,4 @@
 
 #include progmem.h
 
-char str[] PROGMEM = Hallo; /* { dg-error must be const } */
+char str[] PROGMEM = Hallo; /* { dg-error must be const  { target 
avr-*-* } 1 } */


RFA: testsuite patches (6/6): More int16 / !size32plus patches

2013-08-19 Thread Joern Rennecke
Tested for avr with --target_board=atmega128-sim and native on  
i686-pc-linuc-gnu.


Ok to apply?
2013-08-18  Joern Rennecke  joern.renne...@embecosm.com

PR testsuite/52641
* gcc.dg/tree-ssa/pr31261.c [int16]: Change expected unsigned type.
* gcc.dg/tree-ssa/ssa-pre-21.c [! size32plus]: Mark as xfail.
* gcc.dg/tree-ssa/vector-4.c (SItype): New typedef.
(v4si): Use it.
* gcc.dg/tree-ssa/ssa-pre-30.c: Test requires int32.
* gcc.dg/tree-ssa/vrp58.c: Adjust scan expression for int16.

Index: gcc.dg/tree-ssa/pr31261.c
===
--- gcc.dg/tree-ssa/pr31261.c   (revision 201835)
+++ gcc.dg/tree-ssa/pr31261.c   (working copy)
@@ -35,6 +35,7 @@ f5 (int e)
 /* { dg-final { scan-tree-dump-times return -a \ 7; 1 original } } */
 /* { dg-final { scan-tree-dump-times return b \ 7; 1 original } } */
 /* { dg-final { scan-tree-dump-times return \\(char\\) -\\(unsigned char\\) c 
\ 31; 1 original } } */
-/* { dg-final { scan-tree-dump-times return \\(int\\) \\(12 - \\(unsigned 
int\\) d\\) \ 7; 1 original } } */
+/* { dg-final { scan-tree-dump-times return \\(int\\) \\(12 - \\(unsigned 
int\\) d\\) \ 7; 1 original { target { ! int16 } } } } */
+/* { dg-final { scan-tree-dump-times return \\(int\\) \\(12 - \\(unsigned 
short\\) d\\) \ 7; 1 original { target { int16 } } } } */
 /* { dg-final { scan-tree-dump-times return 12 - \\(e \ 7\\) \ 15; 1 
original } } */
 /* { dg-final { cleanup-tree-dump original } } */
Index: gcc.dg/tree-ssa/ssa-pre-21.c
===
--- gcc.dg/tree-ssa/ssa-pre-21.c(revision 201835)
+++ gcc.dg/tree-ssa/ssa-pre-21.c(working copy)
@@ -11,5 +11,5 @@ NumSift (long *array, unsigned long k)
 
 /* There should be only two loads left.  */
 
-/* { dg-final { scan-tree-dump-times = \\\*\[^\n;\]*; 2 pre } } */
+/* { dg-final { scan-tree-dump-times = \\\*\[^\n;\]*; 2 pre { xfail { ! 
size32plus } } } } */ /* xfail: PR tree-optimization/58169 */
 /* { dg-final { cleanup-tree-dump pre } } */
Index: gcc.dg/tree-ssa/vector-4.c
===
--- gcc.dg/tree-ssa/vector-4.c  (revision 201835)
+++ gcc.dg/tree-ssa/vector-4.c  (working copy)
@@ -1,7 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options -w -O1 -fdump-tree-gimple } */
 
-typedef int v4si __attribute__ ((vector_size (16)));
+typedef int SItype __attribute__ ((mode (SI)));
+typedef SItype v4si __attribute__ ((vector_size (16)));
 
 v4si vs (v4si a, v4si b)
 {
Index: gcc.dg/tree-ssa/ssa-pre-30.c
===
--- gcc.dg/tree-ssa/ssa-pre-30.c(revision 201835)
+++ gcc.dg/tree-ssa/ssa-pre-30.c(working copy)
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target int32 } */
 /* { dg-options -O2 -fdump-tree-pre-details } */
 
 int f;
Index: gcc.dg/tree-ssa/vrp58.c
===
--- gcc.dg/tree-ssa/vrp58.c (revision 201835)
+++ gcc.dg/tree-ssa/vrp58.c (working copy)
@@ -8,5 +8,6 @@ foo (long long a, signed char b, signed
   return a + (short)bc;
 }
 
-/* { dg-final { scan-tree-dump Folded into vrp1 } } */
+/* { dg-final { scan-tree-dump Folded into vrp1 { target int32plus } } } */
+/* { dg-final { scan-tree-dump Folding statement: _\[0-9\]\* = \\(long long 
int\\) bc_\[0-9\]\*; vrp1 { target int16 } } } */
 /* { dg-final { cleanup-tree-dump vrp1 } } */


Re: [ping] Fix error recovery issue with alias

2013-08-19 Thread Dominique Dhumieres
 Can you try and add

  pragma Weak_External (Var);

on line 8 and see whether it passes?  If so, you can commit the patchlet.

The test still fails with the following change:

--- /opt/gcc/_clean/gcc/testsuite/gnat.dg/specs/linker_alias.ads
2013-08-18 17:39:22.0 +0200
+++ /opt/gcc/work/gcc/testsuite/gnat.dg/specs/linker_alias.ads  2013-08-19 
08:52:35.0 +0200
@@ -5,5 +5,6 @@ package Linker_Alias is
Var : Integer;  -- { dg-error aliased to undefined symbol }
pragma Export (C, Var, my_var);
pragma Linker_Alias (Var, var2);
+   pragma Weak_External (Var);
 
 end Linker_Alias;

Dominique


Re: Type inheritance graph analysis speculative devirtualization, part 2/6 (type inheritance graph builder)

2013-08-19 Thread Jan Hubicka
 Hello Jan,
 
 Just some casual notes.

Thank you! I will fix the typos shortly!
  +static inline bool
  +polymorphic_type_binfo_p (tree binfo)
  +{
  +  /* See if BINFO's type has an virtual table associtated with it.  */
  +  return BINFO_VTABLE (TYPE_BINFO (BINFO_TYPE (binfo)));
 
 Just for my education, why not just:
 
 return BINFO_VTABLE (binfo);
 ?
 
 At the very least, I'd say there ought to be a comment here explaining
 why this gotcha'.

I tried to explain in the toplevel comment, but I guess it should be repeated
here.  Every polymorphic type has multiple BINFOs.  One represents the type
itself and others represents the variants present as basetypes of its deriverd
types. Those do not neccesarily have BINFO_VTABLE set, so I want to look
up the basetype's BINFO.

  +  /* Walk bases.  */
  +  for (i = 0; BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
  +/* Walking bases that have no virtual method is pointless excercise.  
  */
  +if (polymorphic_type_binfo_p (base_binfo))
  +  record_binfo (nodes, base_binfo, otr_type,
  +   BINFO_VTABLE (base_binfo) ? base_binfo : type_binfo,
 
 ... here, as polymorphic_type_binfo_p (base_binfo) is true,
 shouldn't BINFO_VTABLE(base_info) be unconditionally true as well?

It is the same thing here.  In the case of single inheritance, the BINFOs
representing bases do not have VTABLE, since their VTABLE is shared with
the main VTABLE of the type we walk.

Honza
 
 
  +   otr_token, inserted,
  +   matched_vtables);
  +}
 
 Thanks.
 
 -- 
   Dodji


Re: [PATCH] Enable non-complex math builtins from C99 for Bionic

2013-08-19 Thread Kirill Yukhin
 This is OK, with function_gnu removed (nothing appears to use it), if no 
 OS port maintainers object to the changes for their OSes within the next 
 week.
Hello,
Week is over.
Comitted to MT: http://gcc.gnu.org/ml/gcc-cvs/2013-08/msg00447.html

--
Thanks, K


Re: [RS6000] Fix for PR57865, _savegpr64 breakage on spe

2013-08-19 Thread David Edelsohn
On Mon, Aug 19, 2013 at 12:26 AM, Alan Modra amo...@gmail.com wrote:
 When I made the following change
 -#define FIRST_SAVED_GP_REGNO 13
 +#define FIRST_SAVED_GP_REGNO (FIXED_R13 ? 14 : 13)
 in http://gcc.gnu.org/ml/gcc-patches/2012-04/msg01274.html, I checked
 all uses of FIRST_SAVED_GP_REGNO, but missed the signifigance of
 FIRST_SAVRES_REGISTER appearing in the ool_adjust calculation.

 Using FIRST_SAVRES_REGISTER in ool_adjust was not exactly the best
 choice of available constants.  Why use a value that is the minimum
 over gp, fp and vector regs, when what you need is specific to gp
 regs?  Fixed as follows, bootstrapped and regression tested.
 OK for mainline and 4.8?

 PR target/57865
 * config/rs6000/rs6000.c (rs6000_emit_prologue): Correct ool_adjust.
 (rs6000_emit_epilogue): Likewise.

Okay everywhere.

Thanks, David


Re: [PATCH, rs6000, generic builtins] Fix unary TDmode patterns and add DFP ABS builtins

2013-08-19 Thread David Edelsohn
On Fri, Aug 16, 2013 at 6:32 PM, Peter Bergner berg...@vnet.ibm.com wrote:

 Ok, updated to switch the order of the alternatives.  This works...just
 like the previous one.  I created two versions of the TD test case to
 test both alternatives and to make sure we get fmrs on the one and
 no fmrs on the other.

 Peter


 gcc/
 * builtins.def (BUILT_IN_FABSD32): New DFP ABS builtin.
 (BUILT_IN_FABSD64): Likewise.
 (BUILT_IN_FABSD128): Likewise.
 * builtins.c (expand_builtin): Add support for new DFP ABS builtins.
 (fold_builtin_1): Likewise.
 * config/rs6000/dfp.md (*negtd2_fpr): Handle non-overlapping 
 destination
 and source operands.
 (*abstd2_fpr): Likewise.
 (*nabstd2_fpr): Likewise.

 gcc/testsuite/
 * gcc.target/powerpc/dfp-dd-2.c: New test.
 * gcc.target/powerpc/dfp-td-2.c: Likewise.
 * gcc.target/powerpc/dfp-td-3.c: Likewise.

The last version is okay.

 And is the *negtd2_fpr hunk ok for the 4.8 branch?

The negtd2_fpr change is okay for the 4.8 branch.

Thanks, David


[C++ Patch] Add a pop_bindings_and_leave_scope ?!?

2013-08-19 Thread Paolo Carlini

Hi,

also wondered if we want to do something like this?? And the name of the 
helper function?


Thanks,
Paolo.

/
Index: name-lookup.c
===
--- name-lookup.c   (revision 201835)
+++ name-lookup.c   (working copy)
@@ -392,6 +392,17 @@ pop_binding (tree id, tree decl)
 }
 }
 
+/* Remove the bindings for the decls of the current level and leave
+   the current scope.  */
+
+void
+pop_bindings_and_leave_scope (void)
+{
+  for (tree t = getdecls (); t; t = DECL_CHAIN (t))
+pop_binding (DECL_NAME (t), t);
+  leave_scope ();
+}
+
 /* Strip non dependent using declarations.  */
 
 tree
Index: name-lookup.h
===
--- name-lookup.h   (revision 201835)
+++ name-lookup.h   (working copy)
@@ -89,6 +89,7 @@ typedef struct GTY(()) cxx_saved_binding {
 extern tree identifier_type_value (tree);
 extern void set_identifier_type_value (tree, tree);
 extern void pop_binding (tree, tree);
+extern void pop_bindings_and_leave_scope (void);
 extern tree constructor_name (tree);
 extern bool constructor_name_p (tree, tree);
 
Index: parser.c
===
--- parser.c(revision 201835)
+++ parser.c(working copy)
@@ -8809,7 +8802,6 @@ cp_parser_lambda_declarator_opt (cp_parser* parser
   tree param_list = void_list_node;
   tree attributes = NULL_TREE;
   tree exception_spec = NULL_TREE;
-  tree t;
 
   /* The lambda-declarator is optional, but must begin with an opening
  parenthesis if present.  */
@@ -8824,7 +8816,7 @@ cp_parser_lambda_declarator_opt (cp_parser* parser
 
   /* Default arguments shall not be specified in the
 parameter-declaration-clause of a lambda-declarator.  */
-  for (t = param_list; t; t = TREE_CHAIN (t))
+  for (tree t = param_list; t; t = TREE_CHAIN (t))
if (TREE_PURPOSE (t))
  pedwarn (DECL_SOURCE_LOCATION (TREE_VALUE (t)), OPT_Wpedantic,
   default argument specified for lambda parameter);
@@ -8853,10 +8845,7 @@ cp_parser_lambda_declarator_opt (cp_parser* parser
 
   /* The function parameters must be in scope all the way until after the
  trailing-return-type in case of decltype.  */
-  for (t = current_binding_level-names; t; t = DECL_CHAIN (t))
-   pop_binding (DECL_NAME (t), t);
-
-  leave_scope ();
+  pop_bindings_and_leave_scope ();
 }
 
   /* Create the function call operator.
@@ -16741,7 +16725,6 @@ cp_parser_direct_declarator (cp_parser* parser,
  tree params;
  unsigned saved_num_template_parameter_lists;
  bool is_declarator = false;
- tree t;
 
  /* In a member-declarator, the only valid interpretation
 of a parenthesis is the start of a
@@ -16830,9 +16813,7 @@ cp_parser_direct_declarator (cp_parser* parser,
}
 
  /* Remove the function parms from scope.  */
- for (t = current_binding_level-names; t; t = DECL_CHAIN (t))
-   pop_binding (DECL_NAME (t), t);
- leave_scope();
+ pop_bindings_and_leave_scope ();
 
  if (is_declarator)
/* Repeat the main loop.  */
@@ -24101,7 +24082,6 @@ cp_parser_cache_defarg (cp_parser *parser, bool ns
 the default argument; otherwise the default
 argument continues.  */
  bool error = false;
- tree t;
 
  /* Set ITALP so cp_parser_parameter_declaration_list
 doesn't decide to commit to this parse.  */
@@ -24123,9 +24103,7 @@ cp_parser_cache_defarg (cp_parser *parser, bool ns
{
  begin_scope (sk_function_parms, NULL_TREE);
  cp_parser_parameter_declaration_list (parser, error);
- for (t = current_binding_level-names; t; t = DECL_CHAIN (t))
-   pop_binding (DECL_NAME (t), t);
- leave_scope ();
+ pop_bindings_and_leave_scope ();
}
  if (!cp_parser_error_occurred (parser)  !error)
done = true;


Re: [PATCH] Sanitize block partitioning under -freorder-blocks-and-partition

2013-08-19 Thread Teresa Johnson
On Sat, Aug 17, 2013 at 1:44 PM, Jan Hubicka hubi...@ucw.cz wrote:

 I added both of these and ran into issues due to profile maintenance.
 For example, there were non-zero blocks in the cold section because
 pro_and_epilogue split a simple return block that was previously reach
 by both hot and cold paths. The new return block that was then only
 reached via the cold path did not have its count properly updated to
 reflect this, and since with this patch, blocks dominated by cold
 blocks are remarked cold, we ended up with a non-zero count block in
 the cold section. And there were 0 count blocks reached by non-zero
 edges because copyprop did not clean up edge weights after removing
 some branches and blocks, leading to non-zero edge weights that had
 previously targeted a branch that was removed, now targeting a 0 count
 block that the removed branch always branched around.

 I see, can you please send fixes for the problems you identified?
 Thanks for owrking on this!

I don't have fixes at this point - I just identified the phase and
transformation from looking at the dump. But I'll try to fix them soon
while I'm working on performance tuning for splitting. I have a
feeling there are probably a bunch of places where the profile isn't
getting updated properly, unfortunately.


 In any case, the good news is in that the cases I looked at, the
 splitting code is doing the right thing and these blocks that were
 marked cold really were cold. It would be great to fix the profile
 maintenance issues, but that in the meantime the above sanity checks
 are too aggressive.

 We can keep them and output info into dump file - it is what most of
 the profile sanity checking does anyway.

Ok, I will add that.


 Did you try to use Martin's linker script to turn text.unlikely
 section unexecutable?  I think that way we will easily find what really
 causes us to use it during startup of trained application (just like
 Martin does for gimp).

I haven't - where can I get that script?


 I think it makes sense to commit the current patch if possible, as it
 is making the splitting more sane.

 My only concern about the patch is that I am not convinced the dominator
 based code has chance to work reliably enough so we won't see too many
 accesses into the cold section.

Remember it isn't using dominance anymore. The latest patch was
instead ensuring the most frequent path between hot blocks and the
entry/exit are marked hot. That should be better than the dominance
approach used in the earlier version.

 We can commit it and work on better solution incrementally but it will
 probably mean replacing it later.  If you think it makes things easier
 to work on it incrementally, I think the patch is OK.

Yes, I think this is a big step forward from what is there now for
splitting, which does the splitting purely based on bb count in
isolation. I don't have a much better solution in mind yet.


  - I'll try building and profiling gimp myself to see if I can
  reproduce the issue with code executing out of the cold section.

 I have spent some time this week trying to get the latest gimp Martin
 pointed me to configured and built, but it took awhile to track down
 and configure/build all of the required versions of dependent
 packages. I'm still hitting some issues trying to get it compiled, so
 it may not yet be configured properly. I'll take a look again early
 next week.

 I do not think there is anything special about gimp.  You can probably
 take any other bigger app, like GCC itself. With profiledbootstrap
 and linker script to lock unlikely section you should get ICEs where
 we jump into cold secton and should not.

Ok, please point me to the linker script and I will try gcc
profiledbootstrap as well. I wanted to try gimp if possible as I
haven't seen this much jumping to the cold section in some of the
internal apps I tried.

Thanks,
Teresa


 Teresa

 patch for updating counts based on estimated frequencies to address
 inlined comdats with 0 profile counts:

 013-08-16  Teresa Johnson  tejohn...@google.com

 * tree-inline.c (copy_bb): Compute count based on frequency.
 (copy_edges_for_bb): Ditto.
 (copy_cfg_body): Ditto.
 (copy_body): Pass down frequency.
 (expand_call_inline): Ditto.
 (tree_function_versioning): Ditto.
 * predict.c (init_and_estimate_bb_frequencies): New function.
 (rebuild_frequencies): Invoke init_and_estimate_bb_frequencies.
 * predict.h (init_and_estimate_bb_frequencies): Declare.
 * profile.c (branch_prob): Invoke init_and_estimate_bb_frequencies.
 * ipa-inline-transform.c (update_noncloned_frequencies): Scale edge
 counts.
 (clone_inlined_nodes): Compute edge count scale if needed.

 I do not see why inliner needs to care about scaling more than it does right
 now.  So you have init_and_estimate_bb_frequencies that force profile guessing
 on a given function body. In addition to that I thing 

Re: [PATCH] Fix for PR c/57490

2013-08-19 Thread Jason Merrill

On 08/18/2013 07:42 PM, Iyer, Balaji V wrote:

On 08/16/2013 02:13 PM, Iyer, Balaji V wrote:

+  /* If it is a built-in array notation function, then the return type of
+ the function is the type of the array passed in as array notation.  */



Ah, then the comment should say ...is the element type of the array


Hmm, now it occurs to me to wonder why the CALL_EXPR doesn't already 
have the appropriate type?


Jason




Re: [C++ Patch] Add a pop_bindings_and_leave_scope ?!?

2013-08-19 Thread Jason Merrill

That looks good.

Jason


[RFA] Type inheritance graph analysis speculative devirtualization, part 4/7, ODR at LTO time

2013-08-19 Thread Jan Hubicka
Hi,
this patch makes inheritance graph builder to work on LTO.  Nothing but
ODR violation warnings and dump file is produced, yet.

The main change is to make types_same_for_odr and ODR hasher to use
assembler name of virtual tables of the type (if present) to establish type
equality. This is result of discussion with Jason at 
http://gcc.gnu.org/ml/gcc-patches/2013-08/msg00826.html
Jason, does the implemenetation seem sane?  

Multiple tree types can now be representations of single ODR type.  There is
new add_type_duplicate that output warnings on obvious mismatches and records
variant in odr_type-types array.  Those are used for debug output (it may
be interesting to get more of the unified by Richard's merging).

Trickier part is the code that merges BINFOs of ODR equivalent types.  This is
important since we do not want to keep external vtables around when internal
exists (external version may refer to external static methods that are internal
to our LTO unit). With ODR violations it may lead to wrong devirtualization,
but that is the case previously anyway.

Bootstrapped/regtested x86_64-linux, OK?

Honza

* Makefile.in (ipa-devirt.o): Add dependency on diagnostic.h
* ipa-devirt.c: Include diganostic.h
(odr_type_d): Add types and types_set.
(hash_type_name): Work for types with vtables during LTO.
(odr_hasher::remove): Fix comment; destroy types_set.
(add_type_duplicate): New function,
(get_odr_type): Use it.
(dump_type_inheritance_graph): Dump type duplicates.
* ipa.c (symtab_remove_unreachable_nodes): Build type inheritance
graph.
* tree.c (types_same_for_odr): Give exact answers on types with
virtual tables.
Index: Makefile.in
===
*** Makefile.in (revision 201836)
--- Makefile.in (working copy)
*** ipa.o : ipa.c $(CONFIG_H) $(SYSTEM_H) co
*** 2948,2954 
 $(LTO_STREAMER_H) $(DATA_STREAMER_H)
  ipa-devirt.o : ipa-devirt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) 
$(CGRAPH_H) \
 $(GIMPLE_H) $(TARGET_H) $(GGC_H) pointer-set.h \
!$(IPA_UTILS_H) $(HASH_TABLE_H) 
  ipa-prop.o : ipa-prop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
 langhooks.h $(GGC_H) $(TARGET_H) $(CGRAPH_H) $(IPA_PROP_H) $(DIAGNOSTIC_H) 
\
 $(TREE_FLOW_H) $(TM_H) $(TREE_PASS_H) $(FLAGS_H) $(TREE_H) \
--- 2948,2954 
 $(LTO_STREAMER_H) $(DATA_STREAMER_H)
  ipa-devirt.o : ipa-devirt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) 
$(CGRAPH_H) \
 $(GIMPLE_H) $(TARGET_H) $(GGC_H) pointer-set.h \
!$(IPA_UTILS_H) $(HASH_TABLE_H) $(DIAGNOSTIC_H)
  ipa-prop.o : ipa-prop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
 langhooks.h $(GGC_H) $(TARGET_H) $(CGRAPH_H) $(IPA_PROP_H) $(DIAGNOSTIC_H) 
\
 $(TREE_FLOW_H) $(TM_H) $(TREE_PASS_H) $(FLAGS_H) $(TREE_H) \
Index: ipa-devirt.c
===
*** ipa-devirt.c(revision 201836)
--- ipa-devirt.c(working copy)
*** along with GCC; see the file COPYING3.
*** 116,121 
--- 116,122 
  #include tree-pretty-print.h
  #include ipa-utils.h
  #include gimple.h
+ #include diagnostic.h
  
  /* The node of type inheritance graph.  For each type unique in
 One Defintion Rule (ODR) sense, we produce one node linking all 
*** along with GCC; see the file COPYING3.
*** 123,136 
  
  struct GTY(()) odr_type_d
  {
-   /* Unique ID indexing the type in odr_types array.  */
-   int id;
/* leader type.  */
tree type;
/* All bases.  */
vecodr_type GTY((skip)) bases;
/* All derrived types with virtual methods seen in unit.  */
vecodr_type GTY((skip)) derived_types;
/* Is it in anonymous namespace? */
bool anonymous_namespace;
  };
--- 124,143 
  
  struct GTY(()) odr_type_d
  {
/* leader type.  */
tree type;
/* All bases.  */
vecodr_type GTY((skip)) bases;
/* All derrived types with virtual methods seen in unit.  */
vecodr_type GTY((skip)) derived_types;
+ 
+   /* All equivalent types, if more than one.  */
+   vectree, va_gc *types;
+   /* Set of all equivalent types, if NON-NULL.  */
+   pointer_set_t * GTY((skip)) types_set;
+ 
+   /* Unique ID indexing the type in odr_types array.  */
+   int id;
/* Is it in anonymous namespace? */
bool anonymous_namespace;
  };
*** hash_type_name (tree t)
*** 172,177 
--- 179,204 
if (type_in_anonymous_namespace_p (t))
  return htab_hash_pointer (t);
  
+   /* For polymorphic types, we can simply hash the virtual table.  */
+   if (TYPE_BINFO (t)  BINFO_VTABLE (TYPE_BINFO (t)))
+ {
+   tree v = BINFO_VTABLE (TYPE_BINFO (t));
+   hashval_t hash = 0;
+ 
+   if (TREE_CODE (v) == POINTER_PLUS_EXPR)
+   {
+ hash = TREE_INT_CST_LOW (TREE_OPERAND (v, 1));
+ v = TREE_OPERAND (TREE_OPERAND (v, 0), 0);
+   }
+ 
+   v = DECL_ASSEMBLER_NAME (v);
+ 

Re: [Patch] Fix empty grouping problem in regex

2013-08-19 Thread Tim Shen
On Mon, Aug 19, 2013 at 6:17 PM, Jakub Jelinek ja...@redhat.com wrote:
 Doesn't everything in between the last added line above and the first added
 line below need reindenting by 2 spaces (plus of course transforming any 8
 consecutive spaces into tabs)?

Thanks for correcting the indentation :)

About tab vs spaces, I didn't notice(or care) this before. I searched
about tab vs spaces and found something interesting
here(http://gcc.gnu.org/ml/libstdc++/2008-07/msg00161.html).

So should I later propose a single patch to make every 8 spaces to a
tab, or just make changes for all my committed files in this patch?


-- 
Tim Shen


RE: [PATCH] Fix for PR c/57490

2013-08-19 Thread Iyer, Balaji V


 -Original Message-
 From: Jason Merrill [mailto:ja...@redhat.com]
 Sent: Monday, August 19, 2013 9:51 AM
 To: Iyer, Balaji V; Rainer Orth
 Cc: Jakub Jelinek; gcc-patches@gcc.gnu.org; Marek Polacek
 (pola...@redhat.com)
 Subject: Re: [PATCH] Fix for PR c/57490
 
 On 08/18/2013 07:42 PM, Iyer, Balaji V wrote:
  On 08/16/2013 02:13 PM, Iyer, Balaji V wrote:
  +  /* If it is a built-in array notation function, then the return type 
  of
  + the function is the type of the array passed in as array
  + notation.  */
 
  Ah, then the comment should say ...is the element type of the array
 
 Hmm, now it occurs to me to wonder why the CALL_EXPR doesn't already have
 the appropriate type?
 

Well, it is described in cilkplus.def. The return type of it changes based on 
the array that is passed in. So, it is given a fake type. Thus, we need to fix 
it up here.


 Jason
 



Re: [C++ Patch, obvious] Use cp_parser_lookup_name_simple more

2013-08-19 Thread Gabriel Dos Reis
On Mon, Aug 19, 2013 at 6:04 AM, Paolo Carlini paolo.carl...@oracle.com wrote:
 Hi,

 was having a look to c++/58187 (looks like we are missing a lookup) and
 noticed this. It seems obvious to me, I'll commit it later today barring
 objections.

OK.


 Tested x86_64-linux.

 Thanks,
 Paolo.

 


Re: [PATCH] Fix for PR c/57490

2013-08-19 Thread Jason Merrill

On 08/19/2013 10:06 AM, Iyer, Balaji V wrote:

Well, it is described in cilkplus.def. The return type of it changes based on 
the array that is passed in. So, it is given a fake type. Thus, we need to fix 
it up here.


Right, but it should be fixed up when the CALL_EXPR is created, rather 
than when that CALL_EXPR is used in another expression.


Jason



Re: [Patch] Fix empty grouping problem in regex

2013-08-19 Thread Tim Shen
Here's the correct version of changelog.


-- 
Tim Shen


changelog
Description: Binary data


Re: [Patch] Fix empty grouping problem in regex

2013-08-19 Thread Paolo Carlini

Hi,

On 08/19/2013 12:07 PM, Tim Shen wrote:

+// 2013-08-08  Tim Shentimshe...@gmail.com
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.

Before committing, please adjust the date.

Thanks,
Paolo.


RE: [PATCH] Fix for PR c/57490

2013-08-19 Thread Iyer, Balaji V


 -Original Message-
 From: Jason Merrill [mailto:ja...@redhat.com]
 Sent: Monday, August 19, 2013 10:20 AM
 To: Iyer, Balaji V; Rainer Orth
 Cc: Jakub Jelinek; gcc-patches@gcc.gnu.org; Marek Polacek
 (pola...@redhat.com)
 Subject: Re: [PATCH] Fix for PR c/57490
 
 On 08/19/2013 10:06 AM, Iyer, Balaji V wrote:
  Well, it is described in cilkplus.def. The return type of it changes based 
  on the
 array that is passed in. So, it is given a fake type. Thus, we need to fix it 
 up here.
 
 Right, but it should be fixed up when the CALL_EXPR is created, rather than
 when that CALL_EXPR is used in another expression.
 

HI Jason,
I just want to make sure I get what you are saying. Are you suggesting 
that I do this in finish_call_expr() instead of cp_build_binary_op() ?

Thanks,

Balaji V. Iyer.

 Jason



Re: [PATCH] Fix for PR c/57490

2013-08-19 Thread Jason Merrill

On 08/19/2013 10:37 AM, Iyer, Balaji V wrote:

I just want to make sure I get what you are saying. Are you suggesting 
that I do this in finish_call_expr() instead of cp_build_binary_op() ?


I think build_cxx_call is the right place.

Jason




Re: RFA: testsuite patches (4/6): vrp55.c can only thread when not keeping null pointer checks.

2013-08-19 Thread Jeff Law

On 08/19/2013 05:56 AM, Joern Rennecke wrote:

Tested for avr with --target_board=atmega128-sim and native on
i686-pc-linuc-gnu.

Ok to apply?

20130819-4


2013-08-17  Joern Renneckejoern.renne...@embecosm.com

* gcc.dg/tree-ssa/vrp55.c: Use keeps_null_pointer_checks to determine
correct test response.

OK.
jeff


Re: RFA: testsuite patches (3/6): [avr]: ssa-dom-thread-4.c: expect 6 times Threaded.

2013-08-19 Thread Jeff Law

On 08/19/2013 05:53 AM, Joern Rennecke wrote:

Tested for avr with --target_board=atmega128-sim and native on
i686-pc-linuc-gnu.

Ok to apply?

20130819-3


2013-08-16  Joern Renneckejoern.renne...@embecosm.com

* gcc.dg/tree-ssa/ssa-dom-thread-4.c [avr-*-*]: Expect 6 times
Threaded.

OK
Jeff


Re: RFA: testsuite patches (6/6): More int16 / !size32plus patches

2013-08-19 Thread Jeff Law

On 08/19/2013 06:05 AM, Joern Rennecke wrote:

Tested for avr with --target_board=atmega128-sim and native on
i686-pc-linuc-gnu.

Ok to apply?

20130819-6


2013-08-18  Joern Renneckejoern.renne...@embecosm.com

PR testsuite/52641
* gcc.dg/tree-ssa/pr31261.c [int16]: Change expected unsigned type.
* gcc.dg/tree-ssa/ssa-pre-21.c [! size32plus]: Mark as xfail.
* gcc.dg/tree-ssa/vector-4.c (SItype): New typedef.
(v4si): Use it.
* gcc.dg/tree-ssa/ssa-pre-30.c: Test requires int32.
* gcc.dg/tree-ssa/vrp58.c: Adjust scan expression for int16.

OK.
Jeff


Re: [PATCH] Sanitize block partitioning under -freorder-blocks-and-partition

2013-08-19 Thread Jan Hubicka
 Remember it isn't using dominance anymore. The latest patch was
 instead ensuring the most frequent path between hot blocks and the
 entry/exit are marked hot. That should be better than the dominance
 approach used in the earlier version.

Indeed, that looks more resonable approach.
Can you point me to the last version of patch? Last one I remember still
walked dominators...
 
  We can commit it and work on better solution incrementally but it will
  probably mean replacing it later.  If you think it makes things easier
  to work on it incrementally, I think the patch is OK.
 
 Yes, I think this is a big step forward from what is there now for
 splitting, which does the splitting purely based on bb count in
 isolation. I don't have a much better solution in mind yet.
 
 
   - I'll try building and profiling gimp myself to see if I can
   reproduce the issue with code executing out of the cold section.
 
  I have spent some time this week trying to get the latest gimp Martin
  pointed me to configured and built, but it took awhile to track down
  and configure/build all of the required versions of dependent
  packages. I'm still hitting some issues trying to get it compiled, so
  it may not yet be configured properly. I'll take a look again early
  next week.
 
  I do not think there is anything special about gimp.  You can probably
  take any other bigger app, like GCC itself. With profiledbootstrap
  and linker script to lock unlikely section you should get ICEs where
  we jump into cold secton and should not.
 
 Ok, please point me to the linker script and I will try gcc
 profiledbootstrap as well. I wanted to try gimp if possible as I
 haven't seen this much jumping to the cold section in some of the
 internal apps I tried.

You can also discuss with Martin the systemtap script to plot disk accesses
during the startup.  It is very handy for analyzing the code layout issues

It may be interesting to get similar script taking traces from valgrind
and ploting the most frequent calls in the final layout ;)

Honza


Fwd: [regex] New enum type syntax_option_type

2013-08-19 Thread Tim Shen
-- Forwarded message --
From: Tim Shen timshe...@gmail.com
Date: Mon, Aug 19, 2013 at 10:26 PM
Subject: Re: [regex] New enum type syntax_option_type
To: Daniel Krügler daniel.krueg...@gmail.com
Cc: libstdc++ libstd...@gcc.gnu.org


On Sun, Aug 18, 2013 at 8:50 PM, Daniel Krügler
daniel.krueg...@gmail.com wrote:
 Due to Tim Shen's recent activities at regex improvements we have in
 libstdc++-v3/include/bits/regex_constants.h now

 enum syntax_option_type { .. };

 instead of the previous

 typedef unsigned int syntax_option_type;

 That change makes sense to me, but the question arises to me:
 Shouldn't this enum get an explicit underlying type, such as

 enum syntax_option_type : unsigned int { .. };

 to keep the ABI more stable even if new enumerators would be added?

 - Daniel

Great, let's test it now, though I don't think that it breaks something.


--
Tim Shen


-- 
Tim Shen


changelog
Description: Binary data


a.patch
Description: Binary data


Re: [PATCH] Sanitize block partitioning under -freorder-blocks-and-partition

2013-08-19 Thread Teresa Johnson
On Sat, Aug 17, 2013 at 1:44 PM, Jan Hubicka hubi...@ucw.cz wrote:

 patch for updating counts based on estimated frequencies to address
 inlined comdats with 0 profile counts:

 013-08-16  Teresa Johnson  tejohn...@google.com

 * tree-inline.c (copy_bb): Compute count based on frequency.
 (copy_edges_for_bb): Ditto.
 (copy_cfg_body): Ditto.
 (copy_body): Pass down frequency.
 (expand_call_inline): Ditto.
 (tree_function_versioning): Ditto.
 * predict.c (init_and_estimate_bb_frequencies): New function.
 (rebuild_frequencies): Invoke init_and_estimate_bb_frequencies.
 * predict.h (init_and_estimate_bb_frequencies): Declare.
 * profile.c (branch_prob): Invoke init_and_estimate_bb_frequencies.
 * ipa-inline-transform.c (update_noncloned_frequencies): Scale edge
 counts.
 (clone_inlined_nodes): Compute edge count scale if needed.

 I do not see why inliner needs to care about scaling more than it does right
 now.  So you have init_and_estimate_bb_frequencies that force profile guessing
 on a given function body. In addition to that I thing you need something like
 freqs_to_counts that will compute counts based on freqs with given scale
 (actually you can do that as part of propagation before frequencies are 
 scalled
 to the usual 0...FREQ_MAX scale and precision is lost).

 Because offline COMDAT functoin will be porduced for every COMDAT used, I 
 think
 it is bad to porduce any COMDAT (or any reachable function via calls with 
 non-0
 count) that has empty profile (either because it got lost by COMDAT merging
 or because of reading mismatch).

The approach this patch takes is to simply treat those functions the
same as we would if we didn't feed back profile data in the first
place, by using the frequencies. This is sufficient except when one is
inlined, which is why I have the special handling in the inliner
itself.


 So I guess you can just check functions with 0 count and non-0 count callers
 and initialize their guessed profile.
 Some capping will probably be needed to not propagate insanely large numbers..

But at profile read time we don't have access to the inter-module
calls. Presumably having guessed profiles for these routines should
help the O2 profile-use case as well (i.e. better optimized
out-of-line copy), so I wouldn't want to limit it to IPO or LIPO
compiles where we can identify inter-module call counts at some point
in the compilation.


 Since new direct calls can be discovered later, inline may want to do that
 again each time it inlines non-0 count call of COMDAT with 0 count...

How about an approach like this:
- Invoke init_and_estimate_bb_frequencies as I am doing to guess the
profiles at profile read time for functions with 0 counts.
- At inline time, invoke some kind of freqs_to_counts routine for any
0-count routine that is reached by non-zero call edges. It would take
the sum of all incoming call edge counts and synthesize counts for the
bbs using the guessed profile frequencies applied earlier by
init_and_estimate_bb_frequencies. Then the inliner can do its normal
bb count scaling.

Does that seem like a reasonable approach?

There is one other fix in this patch:
- The clone_inlined_nodes/update_noncloned_frequencies changes below
are handling a different case: 0-count call edge in this module, with
non-zero callee node count due to calls from other modules. It will
allow update_noncloned_frequencies to scale down the edge counts in
callee's cloned call tree. This was a fix I made for the
callgraph-based linker plugin function reordering, and not splitting
(since it is using both the node and edge weights to make ordering
decisions). Here's a description of the issue when I was debugging it:


In this case, because the callee we are inlining does not have any
other callers and is not external, we call
update_noncloned_frequencies from clone_inlined_nodes instead of
creating a clone. This routine does not attempt to scale the outgoing
edge weight counters on the callee, since the assumption must be that
there are no other callers so all the weight is attributed to the
current edge that we are inlining.

In this case this is clearly not correct, because the caller's count
is 0. I'm assuming that this is happening because the callee we are
inlining is a comdat, so its non-zero weights must have come from a
different module. It seems like update_noncloned_frequencies should go
ahead and scale the counts too.


For the above case, I think the right place to fix this is probably
during clone_inlined_nodes/update_noncloned_frequencies, as scaling is
handled by cgraph_clone_node in the case where we need cloning (also
called from clone_inlined_nodes).

Teresa


 Honza

 Index: tree-inline.c
 ===
 --- tree-inline.c   (revision 201644)
 +++ tree-inline.c   (working copy)
 @@ -1502,7 +1502,7 @@ 

Re: [PATCH,ARM] fix testsuite failures for arm-none-linux-gnueabihf

2013-08-19 Thread Richard Earnshaw
On 15/08/13 15:10, Charles Baylis wrote:
 Hi
 
 The attached patch fixes some tests which fail when testing gcc for a
 arm-none-linux-gnueabihf target because they do not expect to be built
 with a hard float ABI.
 
 The change in target-supports.exp fixes arm-fp16-ops-5.c and arm-fp16-ops-6.c.
 
 Tested on arm-none-linux-gnueabihf using qemu-arm, and does not cause
 any other tests to break.
 
 Comments? This is my first patch, so please point out anything wrong.
 
 

 
 
 2013-08-15  Charles Baylis  charles.bay...@linaro.org
 
 * gcc.dg/builtin-apply2.c: skip test on arm hardfloat ABI targets
 * gcc.dg/tls/pr42894.c: Use -mfloat-abi=soft as Thumb1 does
 not support hardfloat ABI
 * arm/thumb-ltu.c: Use -mfloat-abi=soft as Thumb1 does not
 support hardfloat ABI
 * target-supports.exp: don't force -mfloat-abi=soft when
 building for hardfloat target
 
 
 hf-fixes.txt
 
 
 Index: gcc/testsuite/gcc.dg/builtin-apply2.c
 ===
 --- gcc/testsuite/gcc.dg/builtin-apply2.c (revision 201726)
 +++ gcc/testsuite/gcc.dg/builtin-apply2.c (working copy)
 @@ -1,6 +1,7 @@
  /* { dg-do run } */
  /* { dg-skip-if Variadic funcs have all args on stack. Normal funcs have 
 args in registers. { aarch64*-*-* avr-*-*  } { * } {  } } */
  /* { dg-skip-if Variadic funcs use Base AAPCS.  Normal funcs use VFP 
 variant. { arm*-*-* } { -mfloat-abi=hard } {  } } */
 +/* { dg-skip-if Variadic funcs use Base AAPCS.  Normal funcs use VFP 
 variant. { arm*-*-gnueabihf } { * } { -mfloat-abi=soft* } } */
  


As you've noticed, basing the test's behaviour on the config variant
doesn't work reliably.  The builtin-apply2 test really should be skipped
if the current test variant is not soft-float.  We already have
check_effective_target_arm_hf_eabi in target-supports.exp that checks
whether __ARM_PCS_VFP is defined during a compilation.  So can  replace
both arm related lines in builtin-apply2 with

 /* { dg-skip-if Variadic funcs use Base AAPCS.  Normal funcs use VFP
variant. { arm*-*-*  arm_hf_eabi} { * } {  } } */

  /* PR target/12503 */
  /* Origin: pierre.nguyen-tu...@asim.lip6.fr */
 Index: gcc/testsuite/gcc.dg/tls/pr42894.c
 ===
 --- gcc/testsuite/gcc.dg/tls/pr42894.c(revision 201726)
 +++ gcc/testsuite/gcc.dg/tls/pr42894.c(working copy)
 @@ -1,6 +1,7 @@
  /* PR target/42894 */
  /* { dg-do compile } */
  /* { dg-options -march=armv5te -mthumb { target arm*-*-* } } */
 +/* { dg-options -march=armv5te -mthumb -mfloat-abi=soft { target 
 arm*-*-*hf } } */
  /* { dg-require-effective-target tls } */
  

Although the original PR was for Thumb1, this is a generic test.  I'm
not convinced that on ARM it should try to force thumb1.  Removing the
original dg-options line should solve the problem and we then get better
multi-lib testing as well.

  extern __thread int t;
 Index: gcc/testsuite/gcc.target/arm/thumb-ltu.c
 ===
 --- gcc/testsuite/gcc.target/arm/thumb-ltu.c  (revision 201726)
 +++ gcc/testsuite/gcc.target/arm/thumb-ltu.c  (working copy)
 @@ -1,6 +1,6 @@
  /* { dg-do compile } */
  /* { dg-skip-if incompatible options { arm*-*-* } { -march=* } { 
 -march=armv6 -march=armv6j -march=armv6z } } */
 -/* { dg-options -mcpu=arm1136jf-s -mthumb -O2 } */
 +/* { dg-options -mcpu=arm1136jf-s -mthumb -O2 -mfloat-abi=soft } */
  

This won't work if there's an explict -mfloat-abi={softfp,hard} on the
multilib options.  Probably the best thing to do here is to skip the
test if arm_thumb1_ok is not true.

  void f(unsigned a, unsigned b, unsigned c, unsigned d)
  {
 Index: gcc/testsuite/lib/target-supports.exp
 ===
 --- gcc/testsuite/lib/target-supports.exp (revision 201726)
 +++ gcc/testsuite/lib/target-supports.exp (working copy)
 @@ -2445,6 +2445,11 @@
   # Must generate floating-point instructions.
   return 0
  }
 +if [check-flags [list  { *-*-gnueabihf } { * } {  } ]] {
 +# Use existing float-abi and force an fpu which supports fp16

This should use arm_hf_eabi as described above.

 + set et_arm_fp16_flags -mfpu=vfpv4
 + return 1;
 +}
  if [check-flags [list  { *-*-* } { -mfpu=* } {  } ]] {
  # The existing -mfpu value is OK; use it, but add softfp.
   set et_arm_fp16_flags -mfloat-abi=softfp
 

Kyrill's comments re ChangeLogs also apply.

R.



RE: testsuite patches (3/6): [avr]: ssa-dom-thread-4.c: expect 6 times Threaded.

2013-08-19 Thread Weddington, Eric


 -Original Message-
 From: Joern Rennecke [mailto:joern.renne...@embecosm.com]
 Sent: Monday, August 19, 2013 5:53 AM
 To: gcc-patches@gcc.gnu.org
 Cc: Denis Chertykov; Anatoly Sokolov; Weddington, Eric
 Subject: RFA: testsuite patches (3/6): [avr]: ssa-dom-thread-4.c:
 expect 6 times Threaded.
 
 Tested for avr with --target_board=atmega128-sim and native on
 i686-pc-linuc-gnu.
 
 Ok to apply?

Approved, thanks.

Eric


RE: testsuite patches (2/6): [avr]: Set required branch cost for gcc.dg/tree-ssa/vrp87.c

2013-08-19 Thread Weddington, Eric

 -Original Message-
 From: Joern Rennecke [mailto:joern.renne...@embecosm.com]
 Sent: Monday, August 19, 2013 5:49 AM
 To: gcc-patches@gcc.gnu.org
 Cc: Denis Chertykov; Anatoly Sokolov; Weddington, Eric
 Subject: RFA: testsuite patches (2/6): [avr]: Set required branch cost
 for gcc.dg/tree-ssa/vrp87.c
 
 We need a minimum branch cost of 2 to make the expected optimization
 happen.
 
 Tested for avr with --target_board=atmega128-sim and native on
 i686-pc-linuc-gnu.
 
 Ok to apply?

Approved, thanks.

Eric


RE: Committed: testsuite patches (5/6): Update error line number in gcc.target/avr/progmem-error-1.cpp

2013-08-19 Thread Weddington, Eric


 -Original Message-
 From: Joern Rennecke [mailto:joern.renne...@embecosm.com]
 Sent: Monday, August 19, 2013 6:02 AM
 To: gcc-patches@gcc.gnu.org
 Cc: Denis Chertykov; Anatoly Sokolov; Weddington, Eric
 Subject: Committed: testsuite patches (5/6): Update error line number
 in gcc.target/avr/progmem-error-1.cpp
 
 Tested for avr with --target_board=atmega128-sim and native on
 i686-pc-linuc-gnu.
 
 Committed as obvious.

Thanks!
Eric


Re: [GOOGLE] Emit linkage_name when built with -gmlt and for abstract decls

2013-08-19 Thread Cary Coutant
 This patch emits linkage_name at -gmlt. It also make sure abstract
 decls' linkage_names are emitted so that inlined functions can also
 find linkage name.

 Bootstrapped and passed regression test.

 OK for google branches?

OK.

-cary


[patch] Adjust DECL_NAME of virtual clones

2013-08-19 Thread Eric Botcazou
Hi,

this patchlet adjusts the DECL_NAME of the cloned functions created by 
cgraph_create_virtual_clone.  It's currently set to a simple copy of the 
DECL_ASSEMBLER_NAME, which is mildly annoying because it comprises weird 
platform-independent tweaks, which results in a different output in error 
messages or in the -fstack-usage report for example.  With the patchlet, it's 
changed to a uniform name based on the DECL_NAME and using $ as concatenation 
character like in SRA.

Tested on x86_64-suse-linux, OK for the mainline?


2013-08-19  Eric Botcazou  ebotca...@adacore.com

* cgraphclones.c (cgraph_create_virtual_clone): Compute the DECL_NAME
of the clone from the DECL_NAME of the original function.
testsuite/
* gcc.dg/tree-ssa/ipa-cp-1.c: Adjust regexp.


-- 
Eric Botcazou
Index: cgraphclones.c
===
--- cgraphclones.c	(revision 201823)
+++ cgraphclones.c	(working copy)
@@ -252,7 +252,7 @@ cgraph_clone_node (struct cgraph_node *n
   return new_node;
 }
 
-/* Create a new name for clone of DECL, add SUFFIX.  Returns an identifier.  */
+/* Return a new assembler name for a clone of DECL with SUFFIX.  */
 
 static GTY(()) unsigned int clone_fn_id_num;
 
@@ -293,8 +293,9 @@ cgraph_create_virtual_clone (struct cgra
   tree old_decl = old_node-symbol.decl;
   struct cgraph_node *new_node = NULL;
   tree new_decl;
-  size_t i;
+  size_t len, i;
   struct ipa_replace_map *map;
+  char *name;
 
   if (!in_lto_p)
 gcc_checking_assert  (tree_versionable_function_p (old_decl));
@@ -318,8 +319,13 @@ cgraph_create_virtual_clone (struct cgra
  sometimes storing only clone decl instead of original.  */
 
   /* Generate a new name for the new version. */
-  DECL_NAME (new_decl) = clone_function_name (old_decl, suffix);
-  SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
+  len = IDENTIFIER_LENGTH (DECL_NAME (old_decl));
+  name = XALLOCAVEC (char, len + strlen (suffix) + 2);
+  memcpy (name, IDENTIFIER_POINTER (DECL_NAME (old_decl)), len);
+  name[len] = '$';
+  strcpy (name + len + 1, suffix);
+  DECL_NAME (new_decl) = get_identifier (name);
+  SET_DECL_ASSEMBLER_NAME (new_decl, clone_function_name (old_decl, suffix));
   SET_DECL_RTL (new_decl, NULL);
 
   new_node = cgraph_clone_node (old_node, new_decl, old_node-count,
Index: testsuite/gcc.dg/tree-ssa/ipa-cp-1.c
===
--- testsuite/gcc.dg/tree-ssa/ipa-cp-1.c	(revision 201823)
+++ testsuite/gcc.dg/tree-ssa/ipa-cp-1.c	(working copy)
@@ -16,5 +16,5 @@ blah ()
   very_long_function (1);
 }
 /* One appearance for dump, one self recursive call and one call from main.  */
-/* { dg-final { scan-tree-dump-times very_long_function.constprop.0 \\(\\) 3 optimized} } */
+/* { dg-final { scan-tree-dump-times very_long_function\\\$constprop \\(\\) 3 optimized} } */
 /* { dg-final { cleanup-tree-dump optimized } } */


Re: [GOOGLE] Add discriminator for inlined callsite

2013-08-19 Thread Cary Coutant
 -  expanded_location s = expand_location (BLOCK_SOURCE_LOCATION (stmt));
 +  location_t locus = BLOCK_SOURCE_LOCATION (stmt);
 +  expanded_location s = expand_location (locus);

if (dwarf_version = 3 || !dwarf_strict)
  {
add_AT_file (die, DW_AT_call_file, lookup_filename (s.file));
add_AT_unsigned (die, DW_AT_call_line, s.line);
 +  unsigned discr = get_discriminator_from_locus (locus);
 +  if (discr != 0)
 + add_AT_unsigned (die, DW_AT_discr, discr);

DW_AT_discr is used to identify the discriminant for variant
records; we should not reuse it for the discriminator, even if it's
the case that the other meaning could never apply to a
DW_TAG_inlined_subroutine. You should instead add a new
DW_AT_GNU_discriminator to include/dwarf2.def and use that. 0x2136
looks like the next value in the GNU vendor range. Note that
dwarf2.def lives in both GCC and binutils repos, so once you have
approval to commit to the GCC repo, you'll need to commit the same
change to the binutils repo as well.

-cary


Re: [PATCH, vtv update] Fix /tmp directory issues in libvtv

2013-08-19 Thread Florian Weimer

On 08/17/2013 12:29 AM, Caroline Tice wrote:

OK, I *think* I have done as you requested.  I have to try the
environment variable before falling back on stderr (there's a program
we want to use this on that disables the ability to write to stderr).
I have added the secure_getenv stuff as you requested. The fixed patch
is attached.

Please review the patch and let me know if this is OK to commit.  Thanks!


I found a packaged version of autoconf 2.64 and bootstrapped with 
--enable-vtable-verify.  It's a bit confusing that libvtv is always 
built, but ends up being empty.


It seems that HAVE_*SECURE_GETENV is not properly passed down to the 
compiler invocation:


/bin/bash ./libtool --tag=CXX   --mode=compile 
/home/fw/src/gnu/gcc/build/./gcc/xgcc 
-B/home/fw/src/gnu/gcc/build/./gcc/  -I. -I../../../git/libvtv 
-I../../../git/libvtv/../include  -D_GNU_SOURCE -Wall -Wextra 
-fno-exceptions -I./../libstdc++-v3/include 
-I./../libstdc++-v3/include/x86_64-unknown-linux-gnu 
-I../../../git/libvtv/../libstdc++-v3/libsupc++ 
-Wl,-u_vtable_map_vars_start,-u_vtable_map_vars_end -g -O2 -D_GNU_SOURCE 
-MT vtv_utils.lo -MD -MP -MF .deps/vtv_utils.Tpo -c -o vtv_utils.lo 
../../../git/libvtv/vtv_utils.cc
libtool: compile:  /home/fw/src/gnu/gcc/build/./gcc/xgcc 
-B/home/fw/src/gnu/gcc/build/./gcc/ -I. -I../../../git/libvtv 
-I../../../git/libvtv/../include -D_GNU_SOURCE -Wall -Wextra 
-fno-exceptions -I./../libstdc++-v3/include 
-I./../libstdc++-v3/include/x86_64-unknown-linux-gnu 
-I../../../git/libvtv/../libstdc++-v3/libsupc++ 
-Wl,-u_vtable_map_vars_start,-u_vtable_map_vars_end -g -O2 -D_GNU_SOURCE 
-MT vtv_utils.lo -MD -MP -MF .deps/vtv_utils.Tpo -c 
../../../git/libvtv/vtv_utils.cc  -fPIC -DPIC -o .libs/vtv_utils.o


As a result, the DSO ends up referencing getenv, even though 
secure_getenv is available (and has been detected by the autoconf check).


Sorry, I don't know what's wrong here.  I'm not familiar with the GCC 
autoconf machinery.  Perhaps you need a config.h.in file?


You can check this yourself with readelf -s libvtv.so | grep getenv. 
It should print a line containing secure_getenv or __secure_getenv, 
but not plain getenv.


--
Florian Weimer / Red Hat Product Security Team


RE: [PATCH] Fix for PR c/57490

2013-08-19 Thread Iyer, Balaji V


 -Original Message-
 From: Jason Merrill [mailto:ja...@redhat.com]
 Sent: Monday, August 19, 2013 10:46 AM
 To: Iyer, Balaji V; Rainer Orth
 Cc: Jakub Jelinek; gcc-patches@gcc.gnu.org; Marek Polacek
 (pola...@redhat.com)
 Subject: Re: [PATCH] Fix for PR c/57490
 
 On 08/19/2013 10:37 AM, Iyer, Balaji V wrote:
  I just want to make sure I get what you are saying. Are you suggesting
 that I do this in finish_call_expr() instead of cp_build_binary_op() ?
 
 I think build_cxx_call is the right place.

Attached, please find a fixed patch as you suggested. Is this Ok for trunk? 

Here is the ChangeLog entries.

gcc/cp/ChangeLog
2013-08-19  Balaji V. Iyer  balaji.v.i...@intel.com

PR c/57490
* cp-array-notation.c (cp_expand_cond_array_notations): Added a
check for truth values.
(expand_array_notation_exprs): Added truth values case.  Removed an
unwanted else.  Added for-loop to walk through subtrees in default
case.
* call.c (build_cxx_call): Inherited the type of the array notation for
certain built-in array notation functions.

gcc/c/ChangeLog
2013-08-19  Balaji V. Iyer  balaji.v.i...@intel.com

PR c/57490
* c-array-notation.c (fix_conditional_array_notations_1): Added a
check for truth values.
(expand_array_notation_exprs): Added truth values case.  Removed an
unwanted else.  Added for-loop to walk through subtrees in default
case.

gcc/testsuite/ChangeLog
2013-08-19  Balaji V. Iyer  balaji.v.i...@intel.com

PR c/57490
* c-c++-common/cilk-plus/AN/pr57490.c: New test.

 
 Jason
 

diff --git a/gcc/c/c-array-notation.c b/gcc/c/c-array-notation.c
index 7788f7b..5747bcb 100644
--- a/gcc/c/c-array-notation.c
+++ b/gcc/c/c-array-notation.c
@@ -906,6 +906,8 @@ fix_conditional_array_notations_1 (tree stmt)
 cond = COND_EXPR_COND (stmt);
   else if (TREE_CODE (stmt) == SWITCH_EXPR)
 cond = SWITCH_COND (stmt);
+  else if (truth_value_p (TREE_CODE (stmt)))
+cond = TREE_OPERAND (stmt, 0);
   else
 /* Otherwise dont even touch the statement.  */
 return stmt;
@@ -1232,6 +1234,12 @@ expand_array_notation_exprs (tree t)
 case BIND_EXPR:
   t = expand_array_notation_exprs (BIND_EXPR_BODY (t));
   return t;
+case TRUTH_ORIF_EXPR:
+case TRUTH_ANDIF_EXPR:
+case TRUTH_OR_EXPR:
+case TRUTH_AND_EXPR:
+case TRUTH_XOR_EXPR:
+case TRUTH_NOT_EXPR:
 case COND_EXPR:
   t = fix_conditional_array_notations (t);
 
@@ -1246,8 +1254,6 @@ expand_array_notation_exprs (tree t)
COND_EXPR_ELSE (t) =
  expand_array_notation_exprs (COND_EXPR_ELSE (t));
}
-  else
-   t = expand_array_notation_exprs (t);
   return t;
 case STATEMENT_LIST:
   {
@@ -1284,6 +1290,10 @@ expand_array_notation_exprs (tree t)
 Replace those with just void zero node.  */
   t = void_zero_node;
 default:
+  for (int ii = 0; ii  TREE_CODE_LENGTH (TREE_CODE (t)); ii++)
+   if (contains_array_notation_expr (TREE_OPERAND (t, ii)))
+ TREE_OPERAND (t, ii) =
+   expand_array_notation_exprs (TREE_OPERAND (t, ii));
   return t;
 }
   return t;
diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index e493a79..e3af4b4 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -7205,6 +7205,33 @@ build_cxx_call (tree fn, int nargs, tree *argarray,
   if (MAYBE_CLASS_TYPE_P (TREE_TYPE (fn)))
fn = build_cplus_new (TREE_TYPE (fn), fn, complain);
 }
+
+  /* If it is a built-in array notation function, then the return type of
+ the function is the element type of the array passed in as array 
+ notation (i.e. the first parameter of the function).  */
+  if (flag_enable_cilkplus  TREE_CODE (fn) == CALL_EXPR) 
+{
+  enum built_in_function bif = 
+   is_cilkplus_reduce_builtin (CALL_EXPR_FN (fn));
+  if (bif == BUILT_IN_CILKPLUS_SEC_REDUCE_ADD
+ || bif == BUILT_IN_CILKPLUS_SEC_REDUCE_MUL
+ || bif == BUILT_IN_CILKPLUS_SEC_REDUCE_MAX
+ || bif == BUILT_IN_CILKPLUS_SEC_REDUCE_MIN
+ || bif == BUILT_IN_CILKPLUS_SEC_REDUCE
+ || bif == BUILT_IN_CILKPLUS_SEC_REDUCE_MUTATING)
+   { 
+ /* for bif == BUILT_IN_CILKPLUS_SEC_REDUCE_ALL_ZERO or
+BUILT_IN_CILKPLUS_SEC_REDUCE_ANY_ZERO or
+BUILT_IN_CILKPLUS_SEC_REDUCE_ANY_NONZERO or 
+BUILT_IN_CILKPLUS_SEC_REDUCE_ALL_NONZERO or
+BUILT_IN_CILKPLUS_SEC_REDUCE_MIN_IND or
+BUILT_IN_CILKPLUS_SEC_REDUCE_MAX_IND
+The pre-defined return-type is the correct one.  */
+ tree array_ntn = CALL_EXPR_ARG (fn, 0); 
+ TREE_TYPE (fn) = TREE_TYPE (array_ntn); 
+ return fn;
+   }
+} 
   return convert_from_reference (fn);
 }
 
diff --git a/gcc/cp/cp-array-notation.c b/gcc/cp/cp-array-notation.c
index eb6a70d..f4581f0 100644
--- a/gcc/cp/cp-array-notation.c
+++ b/gcc/cp/cp-array-notation.c
@@ -857,6 +857,19 

Re: [patch] Adjust DECL_NAME of virtual clones

2013-08-19 Thread David Edelsohn
 This patchlet adjusts the DECL_NAME of the cloned functions created by
cgraph_create_virtual_clone.  It's currently set to a simple copy of the
DECL_ASSEMBLER_NAME, which is mildly annoying because it comprises weird
platform-independent tweaks, which results in a different output in error
messages or in the -fstack-usage report for example.  With the patchlet, it's
changed to a uniform name based on the DECL_NAME and using $ as concatenation
character like in SRA.

Dollar sign is not a valid character in symbols on some platforms, such as AIX.

- David


Re: RFA: testsuite patches (2/6): [avr]: Set required branch cost for gcc.dg/tree-ssa/vrp87.c

2013-08-19 Thread Denis Chertykov
2013/8/19 Joern Rennecke joern.renne...@embecosm.com:
 We need a minimum branch cost of 2 to make the expected optimization happen.

 Tested for avr with --target_board=atmega128-sim and native on
 i686-pc-linuc-gnu.

 Ok to apply?

 2013-08-16  Joern Rennecke  joern.renne...@embecosm.com

 * gcc.dg/tree-ssa/vrp87.c [avr-*-*] (dg-additional-options): New.


Ok.

Denis


Re: [patch] documentation: clarify that Cilk Plus implementation is incomplete

2013-08-19 Thread Aldy Hernandez

On 08/18/13 05:21, Gerald Pfeifer wrote:

On Wed, 26 Jun 2013, Aldy Hernandez wrote:

This is a small cleanup to the Cilk Plus mention in our documentation,
but more importantly, it clarifies that the Cilk Plus implementation
in GCC is only partial.

OK for trunk?


 * doc/invoke.texi (-fcilkplus): Clarify that implementation is
 incomplete.

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index dd82880..3150c8d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1804,13 +1804,17 @@ have support for @option{-pthread}.
@item -fcilkplus
@opindex fcilkplus
@cindex Enable Cilk Plus
-Enable the usage of Cilk Language extension features for C/C++.  When
the flag
-@option{-fcilkplus} is specified, all the Cilk Plus components are
converted -to the appropriate C/C++ code.  The present implementation
follows ABI version -0.9.  There are four major parts to Cilk Plus
language -extension: Array Notations, Cilk Keywords, SIMD annotations
and elemental -functions.  Detailed information about Cilk Plus can be
found at -@w{@uref{http://www.cilkplus.org}}. +When the option
@option{-fcilkplus} is specified, enable the usage of
+the Cilk Plus Language extension features for C/C++.  The present
+implementation follows ABI version 0.9.  This is an experimental
+feature that is only partially complete, and whose interface may
+change in future versions of GCC, as the official specification
+changes.  Currently only the array notation feature of the language
+specification has been implemented.  More features will be implemented
+in subsequent release cycles.
+
+Detailed information about Cilk Plus can be found at
+@w{@uref{http://www.cilkplus.org}}.

I would keep the first sentence as is, which is more in line how
we generally describe options.


Done (fixed capitalization problems in original sentence though).



No comma before as the official.


Fixed.



And personally I would omit the last sentence, but it's fine to
keep if you really want to leave it.


Agreed.

I have committed the attached patch.

Thanks.
commit d73291f1eb8feee7a2f6b91af11e1e00de401a42
Author: Aldy Hernandez al...@redhat.com
Date:   Mon Aug 19 12:18:31 2013 -0500

* doc/invoke.texi (-fcilkplus): Clarify that implementation is
incomplete.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6645df9..dd6559d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2013-08-19  Aldy Hernandez  al...@redhat.com
+
+   * doc/invoke.texi (-fcilkplus): Clarify that implementation is
+   incomplete.
+
 2013-08-19  Alexander Ivchenko  alexander.ivche...@intel.com
 
* target.def (TARGET_LIBC_HAS_FUNCTION): New target hook.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index f6a4ec4..dae7605 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1811,13 +1811,15 @@ have support for @option{-pthread}.
 @item -fcilkplus
 @opindex fcilkplus
 @cindex Enable Cilk Plus
-Enable the usage of Cilk Language extension features for C/C++.  When the flag
-@option{-fcilkplus} is specified, all the Cilk Plus components are converted 
-to the appropriate C/C++ code.  The present implementation follows ABI version 
-0.9.  There are four major parts to Cilk Plus language 
-extension: Array Notations, Cilk Keywords, SIMD annotations and elemental 
-functions.  Detailed information about Cilk Plus can be found at 
-@w{@uref{http://www.cilkplus.org}}. 
+Enable the usage of Cilk Plus language extension features for C/C++.
+When the option @option{-fcilkplus} is specified, enable the usage of
+the Cilk Plus Language extension features for C/C++.  The present
+implementation follows ABI version 0.9.  This is an experimental
+feature that is only partially complete, and whose interface may
+change in future versions of GCC as the official specification
+changes.  Currently only the array notation feature of the language
+specification has been implemented.  More features will be implemented
+in subsequent release cycles.
 
 @item -fgnu-tm
 @opindex fgnu-tm


[MIPS, committed] Fix rtl checking failure in gcc.dg/pr27531-1.c

2013-08-19 Thread Richard Sandiford
gcc.dg/pr27531-1.c was failing with rtl checking enabled for -mips16 -mabi=32
on mips64-linux-gnu.  The problem was that mips_adjust_insn_length didn't cope
properly with the new(ish) JUMP_TABLE_DATA rtx, which has no insn code.

Tested on mips64-linux-gnu and applied.

Richard


gcc/
* config/mips/mips.c (mips_adjust_insn_length): Add checks for
JUMP_P and INSN_P.

Index: gcc/config/mips/mips.c
===
--- gcc/config/mips/mips.c  2013-08-13 08:57:15.711421748 +0100
+++ gcc/config/mips/mips.c  2013-08-18 21:00:01.382816765 +0100
@@ -12297,6 +12297,7 @@ mips_adjust_insn_length (rtx insn, int l
   /* mips.md uses MAX_PIC_BRANCH_LENGTH as a placeholder for the length
  of a PIC long-branch sequence.  Substitute the correct value.  */
   if (length == MAX_PIC_BRANCH_LENGTH
+   JUMP_P (insn)
INSN_CODE (insn) = 0
get_attr_type (insn) == TYPE_BRANCH)
 {
@@ -12318,7 +12319,9 @@ mips_adjust_insn_length (rtx insn, int l
 length += TARGET_MIPS16 ? 2 : 4;
 
   /* See how many nops might be needed to avoid hardware hazards.  */
-  if (!cfun-machine-ignore_hazard_length_p  INSN_CODE (insn) = 0)
+  if (!cfun-machine-ignore_hazard_length_p
+   INSN_P (insn)
+   INSN_CODE (insn) = 0)
 switch (get_attr_hazard (insn))
   {
   case HAZARD_NONE:


[MIPS, committed] Fix mulsize-2.c test

2013-08-19 Thread Richard Sandiford
mulsize-2.c expected a subtraction, but when multiplying by 9 we should
have a shift and addition instead.  While there I made mulsize-1.c and
mulsize-2.c check for a shift, like mulsize-3.c does.

Tested on mips64-linux-gnu and applied.

Richard


gcc/testsuite/
* gcc.target/mips/mulsize-1.c: Check for SLL as well as SUBU.
* gcc.target/mips/mulsize-2.c: Check for ADDU rather than SUBU.
Check for SLL too.

Index: gcc/testsuite/gcc.target/mips/mulsize-1.c
===
--- gcc/testsuite/gcc.target/mips/mulsize-1.c   2013-07-17 08:36:11.089030684 
+0100
+++ gcc/testsuite/gcc.target/mips/mulsize-1.c   2013-08-19 18:32:51.558727758 
+0100
@@ -1,4 +1,5 @@
 /* { dg-final { scan-assembler \t.globl\tf7 } } */
+/* { dg-final { scan-assembler \tsll\t } } */
 /* { dg-final { scan-assembler \tsubu\t } } */
 /* { dg-final { scan-assembler-not \tli\t } } */
 /* { dg-final { scan-assembler-not \tmul\t } } */
Index: gcc/testsuite/gcc.target/mips/mulsize-2.c
===
--- gcc/testsuite/gcc.target/mips/mulsize-2.c   2013-07-17 08:36:11.089030684 
+0100
+++ gcc/testsuite/gcc.target/mips/mulsize-2.c   2013-08-19 18:33:00.567808120 
+0100
@@ -1,5 +1,6 @@
 /* { dg-final { scan-assembler \t.globl\tf9 } } */
-/* { dg-final { scan-assembler \tsubu\t } } */
+/* { dg-final { scan-assembler \tsll\t } } */
+/* { dg-final { scan-assembler \taddu\t } } */
 /* { dg-final { scan-assembler-not \tli\t } } */
 /* { dg-final { scan-assembler-not \tmul\t } } */
 int


Re: [PATCH] -mcmodel=large -fpic TLS GD and LD support gcc + binutils (PR target/58067)

2013-08-19 Thread H.J. Lu
On Wed, Aug 14, 2013 at 12:03 AM, Uros Bizjak ubiz...@gmail.com wrote:
 On Tue, Aug 13, 2013 at 9:42 PM, Jakub Jelinek ja...@redhat.com wrote:

 We right now ICE with -mcmodel=large -fpic on x86_64 on TLS GD and LD
 sequences, because obviously we can't call __tls_get_addr@plt there from code
 potentially more than 2GB away from the PLT slot.

 The attached patches add support for that in gcc and also teaches linker
 about those, because otherwise the linker will fail if you try to link such
 -mcmodel=large -fpic code into binaries or PIEs.

 To make transitions possible, we emit always
 leaq foo@tlsgd(%rip), %rdi
 movabsq $__tls_get_addr@pltoff, %rax
 addq $rbx, %rax
 call *%rax
 resp.
 leaq foo@tlsld(%rip), %rdi
 movabsq $__tls_get_addr@pltoff, %rax
 addq $rbx, %rax
 call *%rax
 sequences (22 bytes, 6 bytes longer than what we do for TLSGD for normal
 libraries).

 Bootstrapped/regtested on x86_64-linux and i686-linux, attached is also the
 sources I've used to test all the 3 different transitions.

 Ok for trunk and 4.8 branch (and binutils trunk)?

 The implementation for x86 is technically OK, but I wonder if these
 sequences should be documented in some authoritative document about
 TLS relocations. The ELF Handling For Thread-Local Storage document
 [1] doesn't mention various code models fo x86_64, so I was not able
 to cross-check the implementaton vs. documentation.

 [1] http://www.akkadia.org/drepper/tls.pdf


I agree.  We need to document the TLS code sequences
for PIC/non-PIC medium/large models first.


-- 
H.J.


Re: [PATCH] Sanitize block partitioning under -freorder-blocks-and-partition

2013-08-19 Thread Teresa Johnson
On Mon, Aug 19, 2013 at 8:09 AM, Jan Hubicka hubi...@ucw.cz wrote:
 Remember it isn't using dominance anymore. The latest patch was
 instead ensuring the most frequent path between hot blocks and the
 entry/exit are marked hot. That should be better than the dominance
 approach used in the earlier version.

 Indeed, that looks more resonable approach.
 Can you point me to the last version of patch? Last one I remember still
 walked dominators...

I've included the latest patch below. I still use dominators in the
post-cfg-optimization fixup (fixup_partitions), but not in the
partition sanitizing done during the partitioning itself
(sanitize_hot_paths). The former is looking for hot bbs newly
dominated by cold bbs after cfg transformations.


  We can commit it and work on better solution incrementally but it will
  probably mean replacing it later.  If you think it makes things easier
  to work on it incrementally, I think the patch is OK.

 Yes, I think this is a big step forward from what is there now for
 splitting, which does the splitting purely based on bb count in
 isolation. I don't have a much better solution in mind yet.

 
   - I'll try building and profiling gimp myself to see if I can
   reproduce the issue with code executing out of the cold section.
 
  I have spent some time this week trying to get the latest gimp Martin
  pointed me to configured and built, but it took awhile to track down
  and configure/build all of the required versions of dependent
  packages. I'm still hitting some issues trying to get it compiled, so
  it may not yet be configured properly. I'll take a look again early
  next week.
 
  I do not think there is anything special about gimp.  You can probably
  take any other bigger app, like GCC itself. With profiledbootstrap
  and linker script to lock unlikely section you should get ICEs where
  we jump into cold secton and should not.

 Ok, please point me to the linker script and I will try gcc
 profiledbootstrap as well. I wanted to try gimp if possible as I
 haven't seen this much jumping to the cold section in some of the
 internal apps I tried.

 You can also discuss with Martin the systemtap script to plot disk accesses
 during the startup.  It is very handy for analyzing the code layout issues

Ok. I am using linux perf to collect this info (fed through some
scripts that munge and plot the data).


 It may be interesting to get similar script taking traces from valgrind
 and ploting the most frequent calls in the final layout ;)

I think linux perf -g to get a callgraph should give similar data.

Teresa


 Honza

2013-08-05  Teresa Johnson  tejohn...@google.com
Steven Bosscher  ste...@gcc.gnu.org

* cfgrtl.c (fixup_new_cold_bb): New routine.
(commit_edge_insertions): Invoke fixup_partitions.
(find_partition_fixes): New routine.
(fixup_partitions): Ditto.
(verify_hot_cold_block_grouping): Update comments.
(rtl_verify_edges): Invoke find_partition_fixes.
(rtl_verify_bb_pointers): Update comments.
(rtl_verify_bb_layout): Ditto.
* basic-block.h (fixup_partitions): Declare.
* cfgcleanup.c (try_optimize_cfg): Invoke fixup_partitions.
* bb-reorder.c (sanitize_hot_paths): New function.
(find_rarely_executed_basic_blocks_and_crossing_edges): Invoke
sanitize_hot_paths.

Index: cfgrtl.c
===
--- cfgrtl.c(revision 201461)
+++ cfgrtl.c(working copy)
@@ -1341,6 +1341,43 @@ fixup_partition_crossing (edge e)
 }
 }

+/* Called when block BB has been reassigned to the cold partition,
+   because it is now dominated by another cold block,
+   to ensure that the region crossing attributes are updated.  */
+
+static void
+fixup_new_cold_bb (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+
+  /* This is called when a hot bb is found to now be dominated
+ by a cold bb and therefore needs to become cold. Therefore,
+ its preds will no longer be region crossing. Any non-dominating
+ preds that were previously hot would also have become cold
+ in the caller for the same region. Any preds that were previously
+ region-crossing will be adjusted in fixup_partition_crossing.  */
+  FOR_EACH_EDGE (e, ei, bb-preds)
+{
+  fixup_partition_crossing (e);
+}
+
+  /* Possibly need to make bb's successor edges region crossing,
+ or remove stale region crossing.  */
+  FOR_EACH_EDGE (e, ei, bb-succs)
+{
+  /* We can't have fall-through edges across partition boundaries.
+ Note that force_nonfallthru will do any necessary partition
+ boundary fixup by calling fixup_partition_crossing itself.  */
+  if ((e-flags  EDGE_FALLTHRU)
+   BB_PARTITION (bb) != BB_PARTITION (e-dest)
+   e-dest != EXIT_BLOCK_PTR)
+force_nonfallthru (e);
+  else
+fixup_partition_crossing (e);
+}
+}
+
 /* Attempt to change code to 

Re: [Patch] Fix empty grouping problem in regex

2013-08-19 Thread Mike Stump
On Aug 19, 2013, at 7:03 AM, Tim Shen timshe...@gmail.com wrote:
 So should I later propose a single patch to make every 8 spaces to a
 tab, or just make changes for all my committed files in this patch?

One cannot merely replace 8 spaces with a tab, this doesn't work in general.

If you expand all tabs in the file (pr will do this), then do the replacement, 
then it is likely to work in the regex file.

Re: v3 of patch (was Re: [PATCH 11/11] Make opt_pass and gcc::pipeline be GC-managed)

2013-08-19 Thread Richard Henderson
On 08/16/2013 08:33 AM, David Malcolm wrote:
 I also tweaked the traversal hooks for opt_pass to emulate chain_next,
 since this is where the really deep callchains could otherwise occur.
 
 See the patch for details (given the subtleties I opted to put big
 comments in the relevant routines).
 
 (I also fixed the typo overide to override as noted by Bernhard)
 
 Successfully bootstrapped and tested on x86_64-unknown-linux-gnu, on top
 of the other patches (Note that the gengtype fix I committed just now as
 r201791 is also needed [1]).
 
 OK for trunk?

Ok.


r~


Re: [PATCH, rs6000, generic builtins] Fix unary TDmode patterns and add DFP ABS builtins

2013-08-19 Thread Peter Bergner
On Mon, 2013-08-19 at 09:17 -0400, David Edelsohn wrote:
 The last version is okay.

Jakub ack'd the builtins.{c,def] changes on IRC, so I have committed
this as revision 201849.


  And is the *negtd2_fpr hunk ok for the 4.8 branch?
 
 The negtd2_fpr change is okay for the 4.8 branch.

...and this as revision 201850.

Thank you Jakub and David!

Peter




Re: [PATCH] Fix for PR c/57490

2013-08-19 Thread Jason Merrill

On 08/19/2013 12:40 PM, Iyer, Balaji V wrote:

+  /* If it is a built-in array notation function, then the return type of
+ the function is the element type of the array passed in as array
+ notation (i.e. the first parameter of the function).  */
+  if (flag_enable_cilkplus  TREE_CODE (fn) == CALL_EXPR)

...

Let's move this a bit higher in the function, to...


  /* Check that arguments to builtin functions match the expectations.  */
  if (fndecl
   DECL_BUILT_IN (fndecl)
   DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
   !check_builtin_function_arguments (fndecl, nargs, argarray))
return error_mark_node;


...here.


  /* Some built-in function calls will be evaluated at compile-time in
 fold ().  Set optimize to 1 when folding __builtin_constant_p inside
 a constexpr function so that fold_builtin_1 doesn't fold it to 0.  */


OK with that change.

Jason



Re: [PATCH] Convert more passes to new dump framework

2013-08-19 Thread Teresa Johnson
Ping.
Thanks,
Teresa

On Mon, Aug 12, 2013 at 6:54 AM, Teresa Johnson tejohn...@google.com wrote:
 On Tue, Aug 6, 2013 at 10:23 PM, Teresa Johnson tejohn...@google.com wrote:
 On Tue, Aug 6, 2013 at 9:29 AM, Teresa Johnson tejohn...@google.com wrote:
 On Tue, Aug 6, 2013 at 9:01 AM, Martin Jambor mjam...@suse.cz wrote:
 Hi,

 On Tue, Aug 06, 2013 at 07:14:42AM -0700, Teresa Johnson wrote:
 On Tue, Aug 6, 2013 at 5:37 AM, Martin Jambor mjam...@suse.cz wrote:
  On Mon, Aug 05, 2013 at 10:37:00PM -0700, Teresa Johnson wrote:
  This patch ports messages to the new dump framework,
 
  It would be great this new framework was documented somewhere.  I lost
  track of what was agreed it would be and from the uses in the
  vectorizer I was never quite sure how to utilize it in other passes.

 Cc'ing Sharad who implemented this - Sharad, is this documented on a
 wiki or elsewhere?

 Thanks


 
  I'd also like to point out two other minor things inline:
 
  [...]
 
  2013-08-06  Teresa Johnson  tejohn...@google.com
  Dehao Chen  de...@google.com
 
  * dumpfile.c (dump_loc): Add column number to output, make 
  newlines
  consistent.
  * dumpfile.h (OPTGROUP_OTHER): Add and enable under 
  OPTGROUP_ALL.
  * ipa-inline-transform.c (clone_inlined_nodes):
  (cgraph_node_opt_info): New function.
  (cgraph_node_call_chain): Ditto.
  (dump_inline_decision): Ditto.
  (inline_call): Invoke dump_inline_decision.
  * doc/invoke.texi: Document optall -fopt-info flag.
  * profile.c (read_profile_edge_counts): Use new dump framework.
  (compute_branch_probabilities): Ditto.
  * passes.c (pass_manager::register_one_dump_file): Use 
  OPTGROUP_OTHER
  when pass not in any opt group.
  * value-prof.c (check_counter): Use new dump framework.
  (find_func_by_funcdef_no): Ditto.
  (check_ic_target): Ditto.
  * coverage.c (get_coverage_counts): Ditto.
  (coverage_init): Setup new dump framework.
  * ipa-inline.c (inline_small_functions): Set is_in_ipa_inline.
  * ipa-inline.h (is_in_ipa_inline): Declare.
 
  * testsuite/gcc.dg/pr40209.c: Use -fopt-info.
  * testsuite/gcc.dg/pr26570.c: Ditto.
  * testsuite/gcc.dg/pr32773.c: Ditto.
  * testsuite/g++.dg/tree-ssa/dom-invalid.C (struct C): Ditto.
 
 
  [...]
 
  Index: ipa-inline-transform.c
  ===
  --- ipa-inline-transform.c  (revision 201461)
  +++ ipa-inline-transform.c  (working copy)
  @@ -192,6 +192,108 @@ clone_inlined_nodes (struct cgraph_edge *e, bool 
  d
   }
 
 
  +#define MAX_INT_LENGTH 20
  +
  +/* Return NODE's name and profile count, if available.  */
  +
  +static const char *
  +cgraph_node_opt_info (struct cgraph_node *node)
  +{
  +  char *buf;
  +  size_t buf_size;
  +  const char *bfd_name = lang_hooks.dwarf_name (node-symbol.decl, 0);
  +
  +  if (!bfd_name)
  +bfd_name = unknown;
  +
  +  buf_size = strlen (bfd_name) + 1;
  +  if (profile_info)
  +buf_size += (MAX_INT_LENGTH + 3);
  +
  +  buf = (char *) xmalloc (buf_size);
  +
  +  strcpy (buf, bfd_name);
  +
  +  if (profile_info)
  +sprintf (buf, %s (HOST_WIDEST_INT_PRINT_DEC), buf, 
  node-count);
  +  return buf;
  +}
 
  I'm not sure if output of this function is aimed only at the user or
  if it is supposed to be used by gcc developers as well.  If the
  latter, an incredibly useful thing is to also dump node-symbol.order
  too.  We usually dump it after / sign separating it from node name.
  It is invaluable when examining decisions in C++ code where you can
  have lots of clones of a node (and also because existing dumps print
  it, it is easy to combine them).

 The output is useful for both power users doing performance tuning of
 their application, and by gcc developers. Adding the id is not so
 useful for the former, but I agree that it is very useful for compiler
 developers. In fact, in the google branch version we emit more verbose
 information (the lipo module id and the funcdef_no) to help uniquely
 identify the routines and to aid in post-processing by humans and
 tools. So it is probably useful to add something similar here too. Is
 the node-symbol.order more or less unique than the funcdef_no? I see
 that you added a patch a few months ago to print the
 node-symbol.order in the function header, and it also has the
 advantage as you note of matching up with existing ipa dumps.

 node-symbol.order is unique and if I remember correctly, it is not
 even recycled.  Clones, inline clones, thunks, every symbol table node
 gets its own symbol order so it should be more unique than funcdef_no.
 On the other hand it may be a bit cryptic for users but at the same
 time it is only one number.

 Ok, I am going to go ahead and add this to the output.



 
  [...]
 
  Index: ipa-inline.c
  

Re: [PATCH] Fix PR57451 (Incorrect debug ranges emitted for -freorder-blocks-and-partition -g)

2013-08-19 Thread Teresa Johnson
Ping.
Thanks,
Teresa

On Sun, Aug 11, 2013 at 9:35 PM, Teresa Johnson tejohn...@google.com wrote:
 This patch fixes PR rtl-optimizations/57451 by preventing scopes and
 therefore lexical blocks from crossing split section boundaries.
 This will prevent debug info generation from using DW_AT_low_pc/high_pc
 pairs across the section boundary.

 Bootstrapped and tested on x86_64-unknown-linux-gnu. With this patch,
 a profilebootstrap with -freorder-blocks-and-partition force-enabled
 also passes. Ok for trunk?

 Thanks,
 Teresa

 2013-08-11  Teresa Johnson  tejohn...@google.com

 PR rtl-optimizations/57451
 * final.c (reemit_insn_block_notes): Prevent lexical blocks
 from crossing split section boundaries.

 Index: final.c
 ===
 --- final.c (revision 201644)
 +++ final.c (working copy)
 @@ -1650,12 +1650,26 @@ reemit_insn_block_notes (void)
rtx insn, note;

insn = get_insns ();
 -  if (!active_insn_p (insn))
 -insn = next_active_insn (insn);
 -  for (; insn; insn = next_active_insn (insn))
 +  for (; insn; insn = next_insn (insn))
  {
tree this_block;

 +  /* Prevent lexical blocks from straddling section boundaries.  */
 +  if (NOTE_P (insn)  NOTE_KIND (insn) == 
 NOTE_INSN_SWITCH_TEXT_SECTIONS)
 +{
 +  for (tree s = cur_block; s != DECL_INITIAL (cfun-decl);
 +   s = BLOCK_SUPERCONTEXT (s))
 +{
 +  rtx note = emit_note_before (NOTE_INSN_BLOCK_END, insn);
 +  NOTE_BLOCK (note) = s;
 +  note = emit_note_after (NOTE_INSN_BLOCK_BEG, insn);
 +  NOTE_BLOCK (note) = s;
 +}
 +}
 +
 +  if (!active_insn_p (insn))
 +continue;
 +
/* Avoid putting scope notes between jump table and its label.  */
if (JUMP_TABLE_DATA_P (insn))
 continue;

 --
 Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413



-- 
Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413


Re: [GOOGLE] Add discriminator for inlined callsite

2013-08-19 Thread Dehao Chen
Patch updated. BTW, do you think this patch should actually go into trunk?

Thanks,
Dehao

Index: include/dwarf2.def
===
--- include/dwarf2.def (revision 201852)
+++ include/dwarf2.def (working copy)
@@ -390,6 +390,8 @@ DW_AT (DW_AT_GNU_ranges_base, 0x2132)
 DW_AT (DW_AT_GNU_addr_base, 0x2133)
 DW_AT (DW_AT_GNU_pubnames, 0x2134)
 DW_AT (DW_AT_GNU_pubtypes, 0x2135)
+/* Attribute for discriminator.  */
+DW_AT (DW_AT_GNU_discriminator, 0x2136)
 /* VMS extensions.  */
 DW_AT (DW_AT_VMS_rtnbeg_pd_address, 0x2201)
 /* GNAT extensions.  */
Index: gcc/dwarf2out.c
===
--- gcc/dwarf2out.c (revision 201852)
+++ gcc/dwarf2out.c (working copy)
@@ -18815,12 +18815,16 @@ gen_label_die (tree decl, dw_die_ref context_die)
 static inline void
 add_call_src_coords_attributes (tree stmt, dw_die_ref die)
 {
-  expanded_location s = expand_location (BLOCK_SOURCE_LOCATION (stmt));
+  location_t locus = BLOCK_SOURCE_LOCATION (stmt);
+  expanded_location s = expand_location (locus);

   if (dwarf_version = 3 || !dwarf_strict)
 {
   add_AT_file (die, DW_AT_call_file, lookup_filename (s.file));
   add_AT_unsigned (die, DW_AT_call_line, s.line);
+  unsigned discr = get_discriminator_from_locus (locus);
+  if (discr != 0)
+ add_AT_unsigned (die, DW_AT_GNU_discriminator, discr);
 }
 }


On Mon, Aug 19, 2013 at 9:31 AM, Cary Coutant ccout...@google.com wrote:
 -  expanded_location s = expand_location (BLOCK_SOURCE_LOCATION (stmt));
 +  location_t locus = BLOCK_SOURCE_LOCATION (stmt);
 +  expanded_location s = expand_location (locus);

if (dwarf_version = 3 || !dwarf_strict)
  {
add_AT_file (die, DW_AT_call_file, lookup_filename (s.file));
add_AT_unsigned (die, DW_AT_call_line, s.line);
 +  unsigned discr = get_discriminator_from_locus (locus);
 +  if (discr != 0)
 + add_AT_unsigned (die, DW_AT_discr, discr);

 DW_AT_discr is used to identify the discriminant for variant
 records; we should not reuse it for the discriminator, even if it's
 the case that the other meaning could never apply to a
 DW_TAG_inlined_subroutine. You should instead add a new
 DW_AT_GNU_discriminator to include/dwarf2.def and use that. 0x2136
 looks like the next value in the GNU vendor range. Note that
 dwarf2.def lives in both GCC and binutils repos, so once you have
 approval to commit to the GCC repo, you'll need to commit the same
 change to the binutils repo as well.

 -cary


Re: [PATCH] Fix PR57451 (Incorrect debug ranges emitted for -freorder-blocks-and-partition -g)

2013-08-19 Thread Jeff Law

On 08/19/2013 12:34 PM, Teresa Johnson wrote:

Ping.
Thanks,
Teresa

On Sun, Aug 11, 2013 at 9:35 PM, Teresa Johnson tejohn...@google.com wrote:

This patch fixes PR rtl-optimizations/57451 by preventing scopes and
therefore lexical blocks from crossing split section boundaries.
This will prevent debug info generation from using DW_AT_low_pc/high_pc
pairs across the section boundary.

Bootstrapped and tested on x86_64-unknown-linux-gnu. With this patch,
a profilebootstrap with -freorder-blocks-and-partition force-enabled
also passes. Ok for trunk?

Thanks,
Teresa

2013-08-11  Teresa Johnson  tejohn...@google.com

 PR rtl-optimizations/57451
 * final.c (reemit_insn_block_notes): Prevent lexical blocks
 from crossing split section boundaries.
This is fine.  Is there any way you can turn the steps mentioned in 
57451 into a dejagnu testcase?  Clearly it would only work in a native 
configuration due to the need to run the instrumented program.


jeff



Re: [GOOGLE] bug in discriminator assignment

2013-08-19 Thread Cary Coutant
 This patch fixes a bug in assigning discrminator. We should explicitly
 call the hash function when finding the next discriminator.

 Bootstrapped and passed regression tests.

 OK for google branches?

OK. Thanks.

-cary


Re: [GOOGLE] Add discriminator for inlined callsite

2013-08-19 Thread Cary Coutant
 Patch updated. BTW, do you think this patch should actually go into trunk?

This is OK for google branches. I don't think it can go into trunk
like this, as it relies on get_discriminator_from_locus. In trunk, we
only have the discriminator assigned to a basic block.

-cary


 Index: include/dwarf2.def
 ===
 --- include/dwarf2.def (revision 201852)
 +++ include/dwarf2.def (working copy)
 @@ -390,6 +390,8 @@ DW_AT (DW_AT_GNU_ranges_base, 0x2132)
  DW_AT (DW_AT_GNU_addr_base, 0x2133)
  DW_AT (DW_AT_GNU_pubnames, 0x2134)
  DW_AT (DW_AT_GNU_pubtypes, 0x2135)
 +/* Attribute for discriminator.  */
 +DW_AT (DW_AT_GNU_discriminator, 0x2136)
  /* VMS extensions.  */
  DW_AT (DW_AT_VMS_rtnbeg_pd_address, 0x2201)
  /* GNAT extensions.  */
 Index: gcc/dwarf2out.c
 ===
 --- gcc/dwarf2out.c (revision 201852)
 +++ gcc/dwarf2out.c (working copy)
 @@ -18815,12 +18815,16 @@ gen_label_die (tree decl, dw_die_ref context_die)
  static inline void
  add_call_src_coords_attributes (tree stmt, dw_die_ref die)
  {
 -  expanded_location s = expand_location (BLOCK_SOURCE_LOCATION (stmt));
 +  location_t locus = BLOCK_SOURCE_LOCATION (stmt);
 +  expanded_location s = expand_location (locus);

if (dwarf_version = 3 || !dwarf_strict)
  {
add_AT_file (die, DW_AT_call_file, lookup_filename (s.file));
add_AT_unsigned (die, DW_AT_call_line, s.line);
 +  unsigned discr = get_discriminator_from_locus (locus);
 +  if (discr != 0)
 + add_AT_unsigned (die, DW_AT_GNU_discriminator, discr);
  }
  }



Re: [GOOGLE] Add discriminator for inlined callsite

2013-08-19 Thread Cary Coutant
 Patch updated. BTW, do you think this patch should actually go into trunk?

 This is OK for google branches. I don't think it can go into trunk
 like this, as it relies on get_discriminator_from_locus. In trunk, we
 only have the discriminator assigned to a basic block.

Oh, but the dwarf2.def patch *should* probably go to trunk, just to
reserve the value we're using. The comment should provide a bit more
detail -- perhaps a pointer to a GCC wiki page?

-cary


Re: [PATCH] Sanitize block partitioning under -freorder-blocks-and-partition

2013-08-19 Thread Martin Liška
Dear Teresa,

On 19 August 2013 19:47, Teresa Johnson tejohn...@google.com wrote:
 On Mon, Aug 19, 2013 at 8:09 AM, Jan Hubicka hubi...@ucw.cz wrote:
 Remember it isn't using dominance anymore. The latest patch was
 instead ensuring the most frequent path between hot blocks and the
 entry/exit are marked hot. That should be better than the dominance
 approach used in the earlier version.

 Indeed, that looks more resonable approach.
 Can you point me to the last version of patch? Last one I remember still
 walked dominators...

 I've included the latest patch below. I still use dominators in the
 post-cfg-optimization fixup (fixup_partitions), but not in the
 partition sanitizing done during the partitioning itself
 (sanitize_hot_paths). The former is looking for hot bbs newly
 dominated by cold bbs after cfg transformations.


  We can commit it and work on better solution incrementally but it will
  probably mean replacing it later.  If you think it makes things easier
  to work on it incrementally, I think the patch is OK.

 Yes, I think this is a big step forward from what is there now for
 splitting, which does the splitting purely based on bb count in
 isolation. I don't have a much better solution in mind yet.

 
   - I'll try building and profiling gimp myself to see if I can
   reproduce the issue with code executing out of the cold section.
 
  I have spent some time this week trying to get the latest gimp Martin
  pointed me to configured and built, but it took awhile to track down
  and configure/build all of the required versions of dependent
  packages. I'm still hitting some issues trying to get it compiled, so
  it may not yet be configured properly. I'll take a look again early
  next week.
 
  I do not think there is anything special about gimp.  You can probably
  take any other bigger app, like GCC itself. With profiledbootstrap
  and linker script to lock unlikely section you should get ICEs where
  we jump into cold secton and should not.

 Ok, please point me to the linker script and I will try gcc
 profiledbootstrap as well. I wanted to try gimp if possible as I
 haven't seen this much jumping to the cold section in some of the
 internal apps I tried.

 You can also discuss with Martin the systemtap script to plot disk accesses
 during the startup.  It is very handy for analyzing the code layout issues

I send you as an attachment linker script that preserves
.text.unlikely, .text.exit, .text.startup and .text.hot sections. I
tries to modify memory access flags for .text.unlikely section to
read-only, but I am not so familiar with linker scripting. Maybe, you
can define with MEMORY command the read-only memory area for
.text.unlikely.

I am using for my graphing disabled read-ahead function in the Linux
kernel with systemtap script and the results are presented with
matplotlib library. If you are more interested in this tool-chain,
write me and I can send you these scripts. I was also using valgrind
to trace functions, so I will try to get locations of called functions
to identify which functions are called in corresponding sections.

Martin

 Ok. I am using linux perf to collect this info (fed through some
 scripts that munge and plot the data).


 It may be interesting to get similar script taking traces from valgrind
 and ploting the most frequent calls in the final layout ;)

 I think linux perf -g to get a callgraph should give similar data.

 Teresa


 Honza

 2013-08-05  Teresa Johnson  tejohn...@google.com
 Steven Bosscher  ste...@gcc.gnu.org

 * cfgrtl.c (fixup_new_cold_bb): New routine.
 (commit_edge_insertions): Invoke fixup_partitions.
 (find_partition_fixes): New routine.
 (fixup_partitions): Ditto.
 (verify_hot_cold_block_grouping): Update comments.
 (rtl_verify_edges): Invoke find_partition_fixes.
 (rtl_verify_bb_pointers): Update comments.
 (rtl_verify_bb_layout): Ditto.
 * basic-block.h (fixup_partitions): Declare.
 * cfgcleanup.c (try_optimize_cfg): Invoke fixup_partitions.
 * bb-reorder.c (sanitize_hot_paths): New function.
 (find_rarely_executed_basic_blocks_and_crossing_edges): Invoke
 sanitize_hot_paths.

 Index: cfgrtl.c
 ===
 --- cfgrtl.c(revision 201461)
 +++ cfgrtl.c(working copy)
 @@ -1341,6 +1341,43 @@ fixup_partition_crossing (edge e)
  }
  }

 +/* Called when block BB has been reassigned to the cold partition,
 +   because it is now dominated by another cold block,
 +   to ensure that the region crossing attributes are updated.  */
 +
 +static void
 +fixup_new_cold_bb (basic_block bb)
 +{
 +  edge e;
 +  edge_iterator ei;
 +
 +  /* This is called when a hot bb is found to now be dominated
 + by a cold bb and therefore needs to become cold. Therefore,
 + its preds will no longer be region crossing. Any non-dominating
 + preds that were previously hot 

Re: GDB hooks for debugging GCC

2013-08-19 Thread Jeff Law

On 08/02/2013 07:48 PM, David Malcolm wrote:

GDB 7.0 onwards supports hooks written in Python to improve the
quality-of-life within the debugger.  The best known are the
pretty-printing hooks [1], which we already use within libstdc++ for
printing better representations of STL containers.
So as I mentioned during the Cauldron, I really like this and feel it 
could simplify certain aspects of debugging.  What's even better is we 
can easily twiddle this stuff to further improve the experience as we 
use it more.  Of course I'm particularly interested in blocks  edges, 
so that's where you'll probably see further feedback from me.


[ ... ]



Note that this doesn't eliminate the .gdbinit script that some people
use, but it's arguably far superior, in that it happens automatically,
and for all values that are printed e.g. in backtraces, viewing struct
fields, etc.

Right.



I'd like to get this into trunk.  Some remaining issues:
   * installation; currently one has to manually copy it into place, as
noted above; it would be nice to automate things so that during a build
it gets copied to cc1-gdb.py, cc1plus-gdb.py etc
Perhaps add -iex add-auto-load-safe-path gcc source path in 
.gdbinit?  I can't imagine typing that every time I start up...








   * it hardcoded values in a few places rather than correctly looking up
enums
   * the kludge in the RTX printer mentioned above.
Given this stuff is in python, I'm comfortable letting you own it and 
decide if/when/how to fix these warts.


Jeff



Re: [PATCH] Redesign pthread in LIB_SPEC for systems without libpthread

2013-08-19 Thread Maxim Kuvyrkov
On 19/08/2013, at 11:09 PM, Pavel Chupin wrote:
...
 
 Thanks. I'm OK with your suggestions.
 Please see adjusted patch attached.

All good, thanks!

--
Maxim Kuvyrkov
www.kugelworks.com






Re: msp430 port

2013-08-19 Thread Jeff Law

On 07/30/2013 09:24 PM, DJ Delorie wrote:

[nickc added for comments about the bits he wrote]


... define these as

(define_predicate msp_general_operand
   (match_code mem,reg,subreg,const_int,const,symbol_ref,label_ref
{
   int save_volatile_ok = volatile_ok;
   volatile_ok = 1;
   int ret = general_operand (op, mode);
   volatile_ok = save_volatile_ok;
   return ret;
})

That said, why do they exist?


Because gcc refuses to optimize operations with volatile memory
references, even when the target has opcodes that follow the volatile
memory rules.  set bit in memory for example.  I've had to do
something like this for every port I've written, for the same reason,
despite arguing against gcc's pedantry about volatile memory accesses.
I'd say it's not as simple as you make it out to be.  You can't blindly 
combine operations on volatile memory.  ie, the programmer may have 
written them as separate statements and if they're volatile you should 
leave them alone.  With this change a series of statements involving 
volatiles might be combined into a single insn.  That's not good.


So while you will get better optimization in the presence of volatile, 
you'll also eventually get a mis-optimization of a volatile reference. 
And debugging it will be an absolute nightmare because whether or not it 
causes a visible problem will be dependent on the state of some hardware 
widget.


I'll note that *NO* ports in the official GCC sources do this and that's 
because it's wrong.  Unfortunately, I missed this when looking at the 
port or I would have called it out earlier.  What you may have done for 
ports that are still in private trees is not pertinent.




Jeff


Re: [GOOGLE] Emit linkage_name when built with -gmlt and for abstract decls

2013-08-19 Thread Dehao Chen
After rerunning test, this will fail one gcc regression test. So I
updated the patch to make sure all test will pass:

Index: gcc/dwarf2out.c
===
--- gcc/dwarf2out.c (revision 201850)
+++ gcc/dwarf2out.c (working copy)
@@ -16545,10 +16545,9 @@ add_src_coords_attributes (dw_die_ref die, tree de
 static void
 add_linkage_name (dw_die_ref die, tree decl)
 {
-  if (debug_info_level  DINFO_LEVEL_TERSE
+  if (debug_info_level  DINFO_LEVEL_NONE
(TREE_CODE (decl) == FUNCTION_DECL || TREE_CODE (decl) == VAR_DECL)
TREE_PUBLIC (decl)
-   !DECL_ABSTRACT (decl)
!(TREE_CODE (decl) == VAR_DECL  DECL_REGISTER (decl))
die-die_tag != DW_TAG_member)
 {
Index: gcc/testsuite/g++.dg/debug/dwarf2/cdtor-1.C
===
--- gcc/testsuite/g++.dg/debug/dwarf2/cdtor-1.C (revision 201850)
+++ gcc/testsuite/g++.dg/debug/dwarf2/cdtor-1.C (working copy)
@@ -14,4 +14,4 @@ main()
   K k;
 }

-// { dg-final {scan-assembler-times  DW_AT_\[MIPS_\]*linkage_name 2 } }
+// { dg-final {scan-assembler-times  DW_AT_\[MIPS_\]*linkage_name 4 } }

On Mon, Aug 19, 2013 at 9:22 AM, Cary Coutant ccout...@google.com wrote:
 This patch emits linkage_name at -gmlt. It also make sure abstract
 decls' linkage_names are emitted so that inlined functions can also
 find linkage name.

 Bootstrapped and passed regression test.

 OK for google branches?

 OK.

 -cary


Re: msp430 port

2013-08-19 Thread Jeff Law

On 08/19/2013 02:32 PM, Jeff Law wrote:


Ok, I think I got it all covered...  ended up copying most of the
pushm/popm from rx, added return but not simple_return, and merged
added a peephole for and-bic.

[ ... ignore ... hit send in the wrong window ]




Re: msp430 port

2013-08-19 Thread Jeff Law

On 08/14/2013 02:55 PM, DJ Delorie wrote:


+#endif /* GCC_MSP430_PROTOS_H */
Index: gcc/config/msp430/predicates.md
===
--- gcc/config/msp430/predicates.md (revision 0)
+++ gcc/config/msp430/predicates.md (revision 0)
+
+(define_predicate msp_volatile_memory_operand
+  (and (match_code mem)
+   (match_test (memory_address_addr_space_p (GET_MODE (op), XEXP (op, 0), 
MEM_ADDR_SPACE (op)
+)
+
+; TRUE for any valid general operand.  We do this because
+; general_operand refuses to match volatile memory refs.
+
+(define_predicate msp_general_operand
+  (ior (match_operand 0 general_operand)
+   (match_operand 0 msp_volatile_memory_operand))
+)
This is no good.  You really can't go around optimizing volatiles like 
you want to.  That's my only objection to the port at this point.


Given this is strictly an optimization issue and you want to pursue it 
further, my recommendation would be to get the port in without the 
hackery for volatiles and discuss optimization of volatiles 
independently of the base port submission.


Jeff


Re: msp430 port

2013-08-19 Thread DJ Delorie

 I'd say it's not as simple as you make it out to be.  You can't blindly 
 combine operations on volatile memory.

I'm not blindly combining them, I'm combining them when I know the
hardware will do the volatile-correct thing.

 ie, the programmer may have written them as separate statements and
 if they're volatile you should leave them alone.

Most of the programmers I know would expect port |= 0x80; to do a
hardware-specific set bits operation, not a series of
volatile-pedantic operations, especially when the ISA has a set of
insns specifically designed to do bit operations on volatile I/O
registers.

 With this change a series of statements involving 
 volatiles might be combined into a single insn.  That's not good.

If the insn does exactly the same thing as the operations, just
faster, why is it not good?

 I'll note that *NO* ports in the official GCC sources do this and that's 
 because it's wrong.

m32c, rl78, and sh do this (to varying degrees) in the official
sources.


Re: msp430 port

2013-08-19 Thread Jeff Law

On 08/19/2013 02:49 PM, DJ Delorie wrote:

I'd say it's not as simple as you make it out to be.  You can't blindly
combine operations on volatile memory.


I'm not blindly combining them, I'm combining them when I know the
hardware will do the volatile-correct thing.
You're missing the point.  If the programmer wrote two statements which 
hit volatile memory and you've got some pattern which matches those two 
statements, then with your change you'll end up combining them, that's 
wrong.





ie, the programmer may have written them as separate statements and
if they're volatile you should leave them alone.


Most of the programmers I know would expect port |= 0x80; to do a
hardware-specific set bits operation, not a series of
volatile-pedantic operations, especially when the ISA has a set of
insns specifically designed to do bit operations on volatile I/O
registers.
I fully understand that.  But that doesn't change the fact that if the 
programmer wrote separate statements with volatile operands you can't 
combine them.  Your change allows such combinations as far as I can tell.


You simply don't have enough context at any point to know if what you're 
doing is safe or not.



Jeff


Re: [PATCH i386 1/8] [AVX512] Adjust register classes.

2013-08-19 Thread Richard Henderson
On 08/07/2013 01:07 PM, Kirill Yukhin wrote:
 +  -1, -1, -1, -1, -1, -1, -1, -1,   /* new SSE registers 16-23*/
 +  -1, -1, -1, -1, -1, -1, -1, -1,   /* new SSE registers 24-31*/

Don't say new, say avx512 -- that comment will be there for 10 years.

 @@ -4080,6 +4111,8 @@ ix86_conditional_register_usage (void)
   fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ;
for (i = FIRST_REX_SSE_REG; i = LAST_REX_SSE_REG; i++)
   fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ;
 +  for (i = FIRST_EXT_REX_SSE_REG; i = LAST_EXT_REX_SSE_REG; i++)
 + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ;
  }
  
/*  See the definition of CALL_USED_REGISTERS in i386.h.  */
 @@ -4120,6 +4153,12 @@ ix86_conditional_register_usage (void)
  for (i = 0; i  FIRST_PSEUDO_REGISTER; i++)
if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
   fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ;
 +
 +  /* If AVX512F is disabled, squash the registers.  */
 +  if (! TARGET_AVX512F)
 +for (i = 0; i  FIRST_PSEUDO_REGISTER; i++)
 +  if (TEST_HARD_REG_BIT (reg_class_contents[(int)EVEX_SSE_REGS], i))
 + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ;

Using TEST_HARD_REG_BIT is silly when FIRST/LAST_EXT_REX_SSE_REG is available.

 @@ -34285,14 +34346,20 @@ ix86_hard_regno_mode_ok (int regno, enum 
 machine_mode mode)
  {
/* We implement the move patterns for all vector modes into and
out of SSE registers, even when no operation instructions
 -  are available.  OImode move is available only when AVX is
 -  enabled.  */
 -  return ((TARGET_AVX  mode == OImode)
 -   || VALID_AVX256_REG_MODE (mode)
 -   || VALID_SSE_REG_MODE (mode)
 -   || VALID_SSE2_REG_MODE (mode)
 -   || VALID_MMX_REG_MODE (mode)
 -   || VALID_MMX_REG_MODE_3DNOW (mode));
 +  are available.  In xmm16-xmm31 we can store only 512 bit
 +  modes.  OImode move is available only when AVX is enabled.
 +  XImode is available only when AVX512F is enabled.  */
 +  return ((TARGET_AVX512F
 + (mode == XImode
 +|| VALID_AVX512F_REG_MODE (mode)
 +|| VALID_AVX512F_SCALAR_MODE (mode)))
 +   || (!EXT_REX_SSE_REGNO_P (regno)
 +((TARGET_AVX  mode == OImode)
 +   || VALID_AVX256_REG_MODE (mode)
 +   || VALID_SSE_REG_MODE (mode)
 +   || VALID_SSE2_REG_MODE (mode)
 +   || VALID_MMX_REG_MODE (mode)
 +   || VALID_MMX_REG_MODE_3DNOW (mode;

Break up this condition.  It's too big.  It'll probably be best to
split it up by regno first.

  
  (define_insn *movmode_internal
 -  [(set (match_operand:V16 0 nonimmediate_operand   =x,x ,m)
 - (match_operand:V16 1 nonimmediate_or_sse_const_operand  C ,xm,x))]
 +  [(set (match_operand:V16 0 nonimmediate_operand   =v,v ,m)
 + (match_operand:V16 1 nonimmediate_or_sse_const_operand  C ,vm,v))]
TARGET_SSE

We really need to change the mode iterator names here.  V16 is not a
good description of 16, 32, and 64 byte vectors.

  (register_operand (operands[0], MODEmode)
 || register_operand (operands[1], MODEmode))
  {
 +  int mode = get_attr_mode (insn);
switch (which_alternative)
  {
  case 0:
return standard_sse_constant_opcode (insn, operands[1]);
  case 1:
  case 2:
 -  switch (get_attr_mode (insn))
 +  /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
 +  in avx512f, so we need to use workarounds, to access sse registers
 +  16-31, which are evex-only.  */
 +  if (TARGET_AVX512F  GET_MODE_SIZE (MODEmode)  64
 +(EXT_REX_SSE_REGNO_P (REGNO (operands[0]))
 +   || EXT_REX_SSE_REGNO_P (REGNO (operands[1]
 + {
 +   if (memory_operand (operands[0], MODEmode))
 + {
 +   if (GET_MODE_SIZE (MODEmode) == 32)
 + return vextractshuffletype64x4\t{$0x0, %g1, %0|%0, %g1, 
 0x0};
 +   else if (GET_MODE_SIZE (MODEmode) == 16)
 + return vextractshuffletype32x4\t{$0x0, %g1, %0|%0, %g1, 
 0x0};
 +   else
 + gcc_unreachable ();
 + }
 +   else if (memory_operand (operands[1], MODEmode))
 + {
 +   if (GET_MODE_SIZE (MODEmode) == 32)
 + return vinsertshuffletype64x4\t{$0x0, %1, %g0, %g0|%g0, %g0, 
 %1, $0x0};
 +   else if (GET_MODE_SIZE (MODEmode) == 16)
 + return vinsertshuffletype32x4\t{$0x0, %1, %g0, %g0|%g0, %g0, 
 %1, $0x0};
 +   else
 + gcc_unreachable ();
 + }

This creates a false dependency on the old %g0.  Wouldn't vbroadcast be better?

 @@ -603,9 +663,9 @@
  })
  
  (define_insn sse_loadussemodesuffixavxsizesuffix
 -  [(set (match_operand:VF 0 register_operand =x)
 +  [(set (match_operand:VF 0 register_operand =v)
   (unspec:VF
 -   

Re: [PATCH] Rerun df_analyze after delete_unmarked_insns during DCE

2013-08-19 Thread Jeff Law

On 07/20/2013 03:02 AM, Alexey Makhalov wrote:

Hello!

If delete_unmarked_insns deletes some insn, DF state might be
out of date, and, regs_ever_live might contain unused registers till the end.

Fixed by forcing regs_ever_live update and rerunning df_analyze () at 
fini_dce().

I found this bug on my target (not included into main sources). But I can send 
everything you need to reproduce this bug.

Using gcc-4.8.1

2013-07-20  Alexey Makhalov  makhal...@gmail.com  mailto:makhal...@gmail.com

* dce.c (fini_dce): Call df_analyze again just in case 
delete_unmarked_insns
removed anything.

Presumably DCE is removing the last reference to one or more hard registers?

Do you really need the df_analyze call?

Note that the DF machinery tracks the number of references to each hard 
reg and thus if DCE were to delete the last reference, that counter 
should go to zero.  That in turn should allow the call to 
df_compute_regs_ever_live to set regs_ever_live appropriately.


I'm guessing the call to df_analyze is there to ensure correct liveness 
is propagated so that you don't end up with an inconsistency between the 
life information and regs_ever_live?  Is that correct?


The patch is probably OK, I just want to make sure I understand the 
state you're in at the end of dce and more about how that's causing 
problems.


jeff




Re: [PATCH i386 2/8] [AVX512] Add mask registers.

2013-08-19 Thread Richard Henderson
On 08/14/2013 12:23 AM, Kirill Yukhin wrote:
 +  ;; For AVX512F mask support
 +  UNSPEC_KIOR
 +  UNSPEC_KXOR
 +  UNSPEC_KAND
 +  UNSPEC_KANDN

I thought we determined that you didn't need these,
that *Yk as a constraint was sufficient.

 +(define_insn kandnmode
 +(define_insn kandmode
 +(define_insn kiormode
 +(define_insn kxormode

Because otherwise there's nothing different between these...

 +(define_insn kxnormode
 +(define_insn kortestzhi
 +(define_insn kortestchi
 +(define_insn kunpckhi
  (define_insn *one_cmplmode2_1
 +(define_insn *one_cmplhi2_1
  (define_insn *one_cmplqi2_1

... and these.



r~


Re: msp430 port

2013-08-19 Thread DJ Delorie

 You're missing the point.  If the programmer wrote two statements
 which hit volatile memory and you've got some pattern which matches
 those two statements, then with your change you'll end up combining
 them, that's wrong.

I see nothing in the ISO spec that says you can't combine a volatile
read/modify/write cycle into a single insn.  The spec requires that
the volatile value is stable at sequence points, but does not (AFAICT)
require that the sequence point be between insns.

So even if the programmer wrote:

register int a;
extern volatile int b;

a = b;
a |= 0x54;
b = a;

The ISO spec seems to allow gcc to perform those operations in a
single physical insn, as long as the operations on 'b' are all
performed, and in the correct sequence.


Re: msp430 port

2013-08-19 Thread Richard Kenner
   register int a;
   extern volatile int b;
 
   a = b;
   a |= 0x54;
   b = a;
 
 The ISO spec seems to allow gcc to perform those operations in a
 single physical insn, as long as the operations on 'b' are all
 performed, and in the correct sequence.

And I believe it's also consistent with the traditional view
of volatile if that were done with a single read-modify-write insn.


Re: GDB hooks for debugging GCC

2013-08-19 Thread Steven Bosscher
On Mon, Aug 19, 2013 at 9:56 PM, Jeff Law l...@redhat.com wrote:
 On 08/02/2013 07:48 PM, David Malcolm wrote:

 GDB 7.0 onwards supports hooks written in Python to improve the
 quality-of-life within the debugger.  The best known are the
 pretty-printing hooks [1], which we already use within libstdc++ for
 printing better representations of STL containers.

 So as I mentioned during the Cauldron, I really like this and feel it could
 simplify certain aspects of debugging.  What's even better is we can easily
 twiddle this stuff to further improve the experience as we use it more.  Of
 course I'm particularly interested in blocks  edges, so that's where you'll
 probably see further feedback from me.

I assume you mean basic blocks and edges? For that, you can already
use the CFG pretty printers with a file attached to a pipe and sent
through XDot (http://code.google.com/p/jrfonseca/wiki/XDot) which is
written in Python and quite easy to integrate in a GDB Python routine.
(I did just that not too long ago but I can't find the code just now.)

Ciao!
Steven


Re: msp430 port

2013-08-19 Thread Oleg Endo
On Mon, 2013-08-19 at 16:49 -0400, DJ Delorie wrote:
  I'd say it's not as simple as you make it out to be.  You can't blindly 
  combine operations on volatile memory.
 
 I'm not blindly combining them, I'm combining them when I know the
 hardware will do the volatile-correct thing.
 
  ie, the programmer may have written them as separate statements and
  if they're volatile you should leave them alone.
 
 Most of the programmers I know would expect port |= 0x80; to do a
 hardware-specific set bits operation, not a series of
 volatile-pedantic operations, especially when the ISA has a set of
 insns specifically designed to do bit operations on volatile I/O
 registers.
 
  With this change a series of statements involving 
  volatiles might be combined into a single insn.  That's not good.
 
 If the insn does exactly the same thing as the operations, just
 faster, why is it not good?
 
  I'll note that *NO* ports in the official GCC sources do this and that's 
  because it's wrong.
 
 m32c, rl78, and sh do this (to varying degrees) in the official
 sources.

Just a note regarding the issue in SH:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52483

In this case, the problem is that the volatile handling in
'general_operand' prevents matching of 'complex' addresses and sign
extending mem loads.

The recent SH volatile mem load fix doesn't play with the 'volatile_ok'
variable before invoking 'general_operand', but rather avoids it
altogether, by not using 'general_operand' for matching mems.  In this
case it should be fine, since it's done in the predicate
'general_movsrc_operand' which is used for mem loads only and not in
arithmetic insns.  Maybe it would be better to handle this in a general
way somehow instead of having to do workarounds in each target.

Cheers,
Oleg



Re: GDB hooks for debugging GCC

2013-08-19 Thread Jeff Law

On 08/19/2013 03:28 PM, Steven Bosscher wrote:


I assume you mean basic blocks and edges? For that, you can already
use the CFG pretty printers with a file attached to a pipe and sent
through XDot (http://code.google.com/p/jrfonseca/wiki/XDot) which is
written in Python and quite easy to integrate in a GDB Python routine.
(I did just that not too long ago but I can't find the code just now.)
Not exactly what I was talking about, though I did have David do some 
work on CFG dumping a while back.  Think about the ability to do things 
like collapse regions and the like.


In this specific instance I was referring to getting meaningful data out 
of gdb when I do something like p bb or p e for a block and edge 
respectively.


Printing the pointer is useful, but printing information about the 
actual block/edge is far more useful  :-)  It'd save a lot of debug_bb 
(bb)  as well a p e-src-index and p e-dest-index in my 
interactive debug sessions.


jeff


Re: [PATCH i386 3/8] [AVX512] Add AVX-512 patterns.

2013-08-19 Thread Richard Henderson
  ;; All vector modes including V?TImode, used in move patterns.
  (define_mode_iterator V16
 -  [(V32QI TARGET_AVX) V16QI
 -   (V16HI TARGET_AVX) V8HI
 -   (V8SI TARGET_AVX) V4SI
 -   (V4DI TARGET_AVX) V2DI
 +  [(V64QI TARGET_AVX512F) (V32QI TARGET_AVX) V16QI
 +   (V32HI TARGET_AVX512F) (V16HI TARGET_AVX) V8HI
 +   (V16SI TARGET_AVX512F) (V8SI TARGET_AVX) V4SI
 +   (V8DI TARGET_AVX512F)  (V4DI TARGET_AVX) V2DI
 (V2TI TARGET_AVX) V1TI
 -   (V8SF TARGET_AVX) V4SF
 -   (V4DF TARGET_AVX) V2DF])
 +   (V16SF TARGET_AVX512F) (V8SF TARGET_AVX) V4SF
 +   (V8DF TARGET_AVX512F)  (V4DF TARGET_AVX) V2DF])

Let's rename this VMOVE, and apply only that change as a separate patch.

 +(define_mode_iterator VF_AVX512F
 +  [(V16SF TARGET_AVX512F) (V8SF TARGET_AVX) V4SF
 +   (V8DF TARGET_AVX512F) (V4DF TARGET_AVX) (V2DF TARGET_SSE2)])
 +

Why aren't you modifying VF instead?  Certainly this makes the comment for VF
be incorrect, as it no longer contains all vector float modes.

  ;; All SFmode vector float modes
  (define_mode_iterator VF1
[(V8SF TARGET_AVX) V4SF])
  
 +(define_mode_iterator VF1_AVX512F
 +  [(V16SF TARGET_AVX512F) (V8SF TARGET_AVX) V4SF])

Likewise.

  ;; All DFmode vector float modes
  (define_mode_iterator VF2
[(V4DF TARGET_AVX) V2DF])
  
 +(define_mode_iterator VF2_AVX512F
 +  [(V8DF TARGET_AVX512F) (V4DF TARGET_AVX) V2DF])

Likewise.

 +;; 128bit and 512bit float modes
 +(define_mode_iterator VF_128_512
 +  [V4SF V2DF V16SF V8DF])

Unused?

  (define_expand codemode2
 -  [(set (match_operand:VF 0 register_operand)
 - (absneg:VF
 -   (match_operand:VF 1 register_operand)))]
 +  [(set (match_operand:VF_AVX512F 0 register_operand)
 + (absneg:VF_AVX512F
 +   (match_operand:VF_AVX512F 1 register_operand)))]
TARGET_SSE
ix86_expand_fp_absneg_operator (CODE, MODEmode, operands); DONE;)

Fixing VF as I describe above appears to make a large portion
of the patch just go away.



r~


Re: [PATCH i386 3/8] [AVX512] Add AVX-512 patterns.

2013-08-19 Thread Richard Henderson
On 08/14/2013 12:26 AM, Kirill Yukhin wrote:
 Hello,
 Patch was rebased on top of trunk.
 
 It is applicable on top of [2/8] (which was rebased on new trunk today).
 
 Testing:
   1. Bootstrap pass.
   2. make check shows no regressions.
   3. Spec 2000  2006 build show no regressions both with and without 
 -mavx512f option.
   4. Spec 2000  2006 run shows no regressions without -m512d option.
 
 Thanks, K

This patch is still far too large.

I think you should split it up based on every single mode iterator that
you need to add or change.


r~


[GOOGLE] Assign discriminators for different callsites at a same line within one BB

2013-08-19 Thread Dehao Chen
This patch assigns discriminators for different callsites within the
same BB. This is needed for accurate profile attribution in AutoFDO.

Testing on going.

OK for google branches if test pass?

Thanks,
Dehao

Index: gcc/tree-cfg.c
===
--- gcc/tree-cfg.c (revision 201858)
+++ gcc/tree-cfg.c (working copy)
@@ -781,9 +781,25 @@ assign_discriminators (void)
 {
   edge e;
   edge_iterator ei;
+  gimple_stmt_iterator gsi;
   gimple last = last_stmt (bb);
   location_t locus = last ? gimple_location (last) : UNKNOWN_LOCATION;
+  location_t curr_locus = UNKNOWN_LOCATION;

+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (gsi))
+ {
+  gimple stmt = gsi_stmt (gsi);
+  if (gimple_code (stmt) != GIMPLE_CALL)
+continue;
+  if (curr_locus == UNKNOWN_LOCATION ||
+  !same_line_p (curr_locus, gimple_location (stmt)))
+curr_locus = gimple_location (stmt);
+  else
+gimple_set_location (stmt, location_with_discriminator (
+ gimple_location (stmt),
+ next_discriminator_for_locus (gimple_location (stmt;
+ }
+
   if (locus == UNKNOWN_LOCATION)
  continue;


Re: [PATCH] Change the badness computation to ensure no integer-underflow

2013-08-19 Thread Dehao Chen
Hi, Richard,

Could you take a second look at this patch?

Thanks,
Dehao

On Mon, Jun 24, 2013 at 3:45 PM, Dehao Chen de...@google.com wrote:
 The original patch has some flaw. The new patch is attached.
 Bootstrapped and passed regression tests.

 Thanks,
 Dehao

 On Mon, Jun 24, 2013 at 9:44 AM, Dehao Chen de...@google.com wrote:
 Hi, Richard,

 I've updated the patch (as attached) to use sreal to compute badness.

 Thanks,
 Dehao

 On Mon, Jun 24, 2013 at 5:41 AM, Richard Biener
 richard.guent...@gmail.com wrote:
 On Sat, Jun 22, 2013 at 2:59 AM, Dehao Chen de...@google.com wrote:
 This patch prevents integer-underflow of badness computation in ipa-inline.

 Bootstrapped and passed regression tests.

 OK for trunk?

 Thanks,
 Dehao

 gcc/ChangeLog:
 2013-06-21  Dehao Chen  de...@google.com

 * ipa-inline.c (edge_badness): Fix integer underflow.

 Index: gcc/ipa-inline.c
 ===
 --- gcc/ipa-inline.c (revision 200326)
 +++ gcc/ipa-inline.c (working copy)
 @@ -888,11 +888,9 @@ edge_badness (struct cgraph_edge *edge, bool dump)
else if (max_count)
  {
int relbenefit = relative_time_benefit (callee_info, edge, 
 edge_time);
 -  badness =
 - ((int)
 - ((double) edge-count * INT_MIN / 2 / max_count /
 RELATIVE_TIME_BENEFIT_RANGE) *
 - relbenefit) / growth;
 -
 +  badness = ((int)((double) edge-count / max_count
 +  * relbenefit / RELATIVE_TIME_BENEFIT_RANGE * INT_MIN / 2)) / growth;
 +

 FP operations on the host are frowned upon if code generation depends
 on their outcome.  They all should use sreal_* as predict already does.
 Other than that I wonder why the final division is int (so we get truncation
 and not rounding)?  That increases the effect of doing the multiply by
 relbenefit in double.

 Richard.

/* Be sure that insanity of the profile won't lead to increasing 
 counts
   in the scalling and thus to overflow in the computation above.  */
gcc_assert (max_count = edge-count);


Re: [PATCH] Cilk Keywords (_Cilk_spawn and _Cilk_sync) for C

2013-08-19 Thread Aldy Hernandez

@@ -960,6 +960,7 @@ SCEV_H = tree-scalar-evolution.h $(GGC_H) tree-chrec.h 
$(PARAMS_H)
 OMEGA_H = omega.h $(PARAMS_H)
 TREE_DATA_REF_H = tree-data-ref.h $(OMEGA_H) graphds.h $(SCEV_H)
 TREE_INLINE_H = tree-inline.h
+CILK_H = cilk.h
 REAL_H = real.h $(MACHMODE_H)
 IRA_INT_H = ira.h ira-int.h $(CFGLOOP_H) alloc-pool.h


Doesn't cilk.h depend on tree.h?  So shouldn't that be:

CILK_H = cilk.h $(TREE_H)

Which would mean that whoever includes cilk.h doesn't need to include 
tree.h.  Although... what I would prefer is not to include tree.h from 
cilk.h at all, and have the caller/includer of cilk.h include tree.h first.



+c-family/cilk.o : c-family/cilk.c $(TREE_H) $(SYSTEM_H) $(CONFIG_H) toplev.h \
+   $(TREE_H) coretypes.h tree-iterator.h $(TREE_INLINE_H) $(CGRAPH_H) \
+   $(DIAGNOSTIC_CORE_H) $(GIMPLE_H) $(CILK_H) $(C_COMMON_H) langhooks.h


TREE_H is duplicated.

Also, you are using DIAGNOSTIC_CORE_H whereas c-family/cilk.c is using


+#include diagnostic.h



+/* Cilk Keywords builtins.  */
+#include cilk-builtins.def


keywords should be lowercase


diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index dc430c3..ab77fa4 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -148,6 +148,9 @@ enum rid
   /* C++11 */
   RID_CONSTEXPR, RID_DECLTYPE, RID_NOEXCEPT, RID_NULLPTR, RID_STATIC_ASSERT,

+  /* Cilk Plus Keywords.  */
+  RID_CILK_SPAWN, RID_CILK_SYNC,
+


Same here.


+/* In cilk.c.  */
+extern tree insert_cilk_frame (tree);
+extern void cilk_init_builtins (void);
+extern int gimplify_cilk_spawn (tree *, gimple_seq *, gimple_seq *);
+extern int gimplify_cilk_sync (tree *, gimple_seq *, gimple_seq *);
+extern void c_cilk_install_body_w_frame_cleanup (tree, tree);
+extern bool cilk_valid_spawn (tree *);
+extern bool cilkplus_set_spawn_marker (location_t, tree);


You should be consistent with the prefix throughout the entire patch. 
For instance, now you have cilk_init_builtins() but 
cilkplus_set_spawn_marker().  Do whatever you like, but be consistent.


Also, insert_cilk_frame-- the prefix should be well...as a prefix should 
be...at the beginning.  However, gimplify_cilk_spawn/sync is ok since 
that is in keeping with the rest of the gimplify.c style.



+  /* IF the function has _Cilk_spawn in front of a function call inside it
+ i.e. it is a spawning function, then add the appropriate Cilk plus
+ functions inside.  */


Lowercase the F in IF.


+ c_parser_skip_until_found (parser, CPP_SEMICOLON, expected %;%);
+ if (!flag_enable_cilkplus)
+   error_at (loc, -fcilkplus must be enabled to use _Cilk_sync);


The error message should use %_Cilk_sync


+   case RID_CILK_SPAWN:
+ c_parser_consume_token (parser);
+ if (!flag_enable_cilkplus)
+   {
+ error_at (loc, -fcilkplus must be enabled to use _Cilk_spawn);
+ expr = c_parser_postfix_expression (parser);
+ expr.value = error_mark_node; 


Similarly with _Cilk_spawn.


+ if (c_parser_peek_token (parser)-keyword == RID_CILK_SPAWN)
+   {
+ error_at (input_location, consecutive _Cilk_spawn keywords 
+   are not permitted);


Similarly here-- for that matter, check the rest of the patch for any 
keywords in error messages, they should have %blah.


Also, avoid input_location when possible.  Surely, you can infer the 
actual location from the parser.



+  if (flag_enable_cilkplus  retval  TREE_CODE (retval) == CILK_SPAWN_STMT)
+{
+  error_at (loc, use of _Cilk_spawn in return statement is not allowed);
+  return error_mark_node;


s/in return/in a return/.  Also, use %_Cilk_spawn.


+/* Returns a tree of type CILK_SYNC_STMT if Cilk Plus is enabled.  Otherwise
+   return error_mark_node.  */
+
+tree
+c_build_cilk_sync (void)
+{
+  tree sync = build0 (CILK_SYNC_STMT, void_type_node);
+  TREE_SIDE_EFFECTS (sync) = 1;
+  return sync;
+}


Code seems correct and comment wrong.  Adjust accordingly.


diff --git a/gcc/cilk.h b/gcc/cilk.h
new file mode 100644
index 000..038316a
--- /dev/null
+++ b/gcc/cilk.h
@@ -0,0 +1,94 @@
+/* This file is part of the Intel(R) Cilk(TM) Plus support
+   This file contains Cilk Support files.
+   Copyright (C) 2013  Free Software Foundation, Inc.


Only one space after 2013.  Similarly in other files.


+
+#ifndef GCC_CILK_H
+#define GCC_CILK_H
+
+#include tree.h


As mentioned earlier, cilk.h should not include tree.h, the caller should.


+/* Frame status bits known to compiler.  */
+#define CILK_FRAME_STOLEN0x01


Unused?


+this keyword with a set of functions that are stored in the Cilk Runtime.


Lowercase Runtime.


+@file{c-family/cilk.c}.  In the spawn-helper, the gimplification function
+@code{gimplify_call_expr}, inserts a function call @code{__cilkrts_detach}.
+This function is expanded by @code{builtin_expand_cilk_detach} located in
+@file{c-family/cilk.c}.
+
+@item @code{_Cilk_sync}:
+@code{_Cilk_sync} is parsed 

Re: [PATCH, vtv update] Fix /tmp directory issues in libvtv

2013-08-19 Thread Caroline Tice
Hi All,

I could really use some help here from someone who has a better
understanding of how the config/Makefile system works than I do.

In my libvtv/configure.ac file, I have:

AC_GNU_SOURCE
AC_CHECK_FUNCS([__secure_getenv])

AC_GNU_SOURCE
AC_CHECK_FUNCS([secure_getenv])


This gets translated in my libvtv/configure file to:

for ac_func in __secure_getenv
do :
  ac_fn_c_check_func $LINENO __secure_getenv ac_cv_func___secure_getenv
if test x$ac_cv_func___secure_getenv = xyes; then :
  cat confdefs.h _ACEOF
#define HAVE___SECURE_GETENV 1
_ACEOF

fi
done



for ac_func in secure_getenv
do :
  ac_fn_c_check_func $LINENO secure_getenv ac_cv_func_secure_getenv
if test x$ac_cv_func_secure_getenv = xyes; then :
  cat confdefs.h _ACEOF
#define HAVE_SECURE_GETENV 1
_ACEOF

fi
done


After running 'make all', I look in the libvtv/config.log, I see

configure:4560: checking for __secure_getenv
configure:4560: /usr/local/google2/cmtice/gcc-fsf.clean.obj/./gcc/xgcc -B/usr/l\
ocal/google2/cmtice/gcc-fsf.clean.obj/./gcc/ -B/usr/local/x86_64-unknown-linux-\
gnu/bin/ -B/usr/local/x86_64-unknown-linux-gnu/lib/ -isystem /usr/local/x86_64-\
unknown-linux-gnu/include -isystem /usr/local/x86_64-unknown-linux-gnu/sys-incl\
ude-o conftest -g -O2   conftest.c  5
configure:4560: $? = 0
configure:4560: result: yes
configure:4575: checking for secure_getenv
configure:4575: /usr/local/google2/cmtice/gcc-fsf.clean.obj/./gcc/xgcc -B/usr/l\
ocal/google2/cmtice/gcc-fsf.clean.obj/./gcc/ -B/usr/local/x86_64-unknown-linux-\
gnu/bin/ -B/usr/local/x86_64-unknown-linux-gnu/lib/ -isystem /usr/local/x86_64-\
unknown-linux-gnu/include -isystem /usr/local/x86_64-unknown-linux-gnu/sys-incl\
ude-o conftest -g -O2   conftest.c  5
/tmp/cc2jF2RF.o: In function `main':
/usr/local/google2/cmtice/gcc-fsf.clean.obj/x86_64-unknown-linux-gnu/libvtv/con\
ftest.c:61: undefined reference to `secure_getenv'
collect2: error: ld returned 1 exit status
configure:4575: $? = 1
configure: failed program was:
[snip]
configure:4575: result: no


So it looks to me like the check for __secure_getenv succeeded, so
HAVE___SECURE_GETENV *should* have
been defined in confdefs.h, and the test for it in my program *should* succeed.


The source code in my program looks like this (at the moment):

#define secure_getenv getenv

#ifdef HAVE___SECURE_GETENV
#define secure_getenv __secure_getenv
#endif

[snip]

  logs_prefix = secure_getenv (VTV_LOGS_DIR);


BUT...when I check to see what version of the getenv symbol made it
into libvtv.so, it is the wrong version:

$ readelf -s libvtv.so | grep getenv
 4:  0 FUNCGLOBAL DEFAULT  UND
getenv@GLIBC_2.2.5 (2)
76:  0 FUNCGLOBAL DEFAULT  UND getenv@@GLIBC_2.2.5


If I alter the source program to by removing the #ifdef
HAVE___SECURE_GETENV check, and just force it to try to use
__secure_getenv, the program works properly, and the readelf -s
libvtv.so | grep getenv shows __secure_getenv as the function.


WHAT am I doing wrong?  Help?

-- Caroline Tice
cmt...@google.com


On Mon, Aug 19, 2013 at 9:37 AM, Florian Weimer fwei...@redhat.com wrote:
 On 08/17/2013 12:29 AM, Caroline Tice wrote:

 OK, I *think* I have done as you requested.  I have to try the
 environment variable before falling back on stderr (there's a program
 we want to use this on that disables the ability to write to stderr).
 I have added the secure_getenv stuff as you requested. The fixed patch
 is attached.

 Please review the patch and let me know if this is OK to commit.  Thanks!


 I found a packaged version of autoconf 2.64 and bootstrapped with
 --enable-vtable-verify.  It's a bit confusing that libvtv is always built,
 but ends up being empty.

 It seems that HAVE_*SECURE_GETENV is not properly passed down to the
 compiler invocation:

 /bin/bash ./libtool --tag=CXX   --mode=compile
 /home/fw/src/gnu/gcc/build/./gcc/xgcc -B/home/fw/src/gnu/gcc/build/./gcc/
 -I. -I../../../git/libvtv -I../../../git/libvtv/../include  -D_GNU_SOURCE
 -Wall -Wextra -fno-exceptions -I./../libstdc++-v3/include
 -I./../libstdc++-v3/include/x86_64-unknown-linux-gnu
 -I../../../git/libvtv/../libstdc++-v3/libsupc++
 -Wl,-u_vtable_map_vars_start,-u_vtable_map_vars_end -g -O2 -D_GNU_SOURCE -MT
 vtv_utils.lo -MD -MP -MF .deps/vtv_utils.Tpo -c -o vtv_utils.lo
 ../../../git/libvtv/vtv_utils.cc
 libtool: compile:  /home/fw/src/gnu/gcc/build/./gcc/xgcc
 -B/home/fw/src/gnu/gcc/build/./gcc/ -I. -I../../../git/libvtv
 -I../../../git/libvtv/../include -D_GNU_SOURCE -Wall -Wextra -fno-exceptions
 -I./../libstdc++-v3/include
 -I./../libstdc++-v3/include/x86_64-unknown-linux-gnu
 -I../../../git/libvtv/../libstdc++-v3/libsupc++
 -Wl,-u_vtable_map_vars_start,-u_vtable_map_vars_end -g -O2 -D_GNU_SOURCE -MT
 vtv_utils.lo -MD -MP -MF .deps/vtv_utils.Tpo -c
 ../../../git/libvtv/vtv_utils.cc  -fPIC -DPIC -o .libs/vtv_utils.o

 As a result, the DSO ends up referencing getenv, even though secure_getenv
 is available (and has been detected by the 

Re: [committed] Fix expand_mult (PR middle-end/56420)

2013-08-19 Thread Mike Stump
On Feb 21, 2013, at 1:35 PM, Jakub Jelinek ja...@redhat.com wrote:
 I've committed the following fix for the following testcase.
 When scalar_op1 is 0x8000 with 64-bit HWI,
 it matches EXACT_POWER_OF_2_OR_ZERO_P, but we should expand it as
 negation of the  63 shift rather than the  63 shift alone.
 
 The patch also improves multiplication by 0x8000
 in TImode, which can be done as  63 shift, and multiplication by
 -(((TImode)1)  N) for N 0 through 62, which can be also expanded as
 negation of left shift.

So, I noticed that I also need something like:

diff --git a/gcc/expr.c b/gcc/expr.c
index ea3ff90..0253c0a 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -729,6 +729,11 @@ convert_modes (enum machine_mode mode, enum machine_mode 
oldmode, rtx x, int uns
   return immed_double_int_const (val, mode);
 }
 
+  if (GET_MODE_BITSIZE (mode)  GET_MODE_BITSIZE (oldmode)
+   GET_MODE_CLASS (mode) == MODE_INT
+   CONST_SCALAR_INT_P (x))
+return x;
+
   /* We can do this with a gen_lowpart if both desired and current modes
  are integer, and this is either a constant integer, a register, or a
  non-volatile MEM.  Except for the constant case where MODE is no


to not have this test case fail on me.  I widen to a larger size and then 
compare.  The problem is the code just after this don't handle widening, only 
narrowing.  Since the constants have no modes and since we want identity, we 
can just return x directly.

Thoughts?

 Furthermore I've noticed several places where we could invoke signed integer
 overflows inside of the compiler and fixed them.
 
 Bootstrapped/regtested on x86_64-linux and i686-linux, acked by Richard
 on IRC, committed to trunk.
 
 2013-02-21  Jakub Jelinek  ja...@redhat.com
 
   PR middle-end/56420
   * expmed.c (EXACT_POWER_OF_2_OR_ZERO_P): Do subtraction in uhwi, to
   avoid signed wrapping.
   (expand_mult): Handle properly multiplication by
   ((dword_type) -1)  (BITS_PER_WORD - 1).  Improve multiplication by
   ((dword_type) 1)  (BITS_PER_WORD - 1).  Avoid undefined behavior
   in the compiler if coeff is HOST_WIDE_INT_MIN.
   (expand_divmod): Don't make ext_op1 static, change it's type to
   uhwi.  Avoid undefined behavior in -INTVAL (op1).
 
   * gcc.dg/torture/pr56420.c: New test.
 
 --- gcc/expmed.c.jj   2013-02-13 21:46:52.0 +0100
 +++ gcc/expmed.c  2013-02-21 19:25:05.692298011 +0100
 @@ -64,7 +64,8 @@ static rtx expand_smod_pow2 (enum machin
 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
 
 /* Test whether a value is zero of a power of two.  */
 -#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x)  ((x) - 1)) == 0)
 +#define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 +  (((x)  ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
 
 struct init_expmed_rtl
 {
 @@ -3079,7 +3080,10 @@ expand_mult (enum machine_mode mode, rtx
 /* If we are multiplying in DImode, it may still be a win
to try to work with shifts and adds.  */
 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
 -CONST_DOUBLE_LOW (scalar_op1)  0)
 +(CONST_DOUBLE_LOW (scalar_op1)  0
 +   || (CONST_DOUBLE_LOW (scalar_op1)  0
 +EXACT_POWER_OF_2_OR_ZERO_P
 +(CONST_DOUBLE_LOW (scalar_op1)
   {
 coeff = CONST_DOUBLE_LOW (scalar_op1);
 is_neg = false;
 @@ -3109,7 +3113,8 @@ expand_mult (enum machine_mode mode, rtx
use synth_mult.  */
 
   /* Special case powers of two.  */
 -  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
 +  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
 +!(is_neg  mode_bitsize  HOST_BITS_PER_WIDE_INT))
   return expand_shift (LSHIFT_EXPR, mode, op0,
floor_log2 (coeff), target, unsignedp);
 
 @@ -3124,13 +3129,24 @@ expand_mult (enum machine_mode mode, rtx
result is interpreted as an unsigned coefficient.
Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult.  */
 +   coeff = -(unsigned HOST_WIDE_INT) coeff;
 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
 - neg_cost(speed, mode));
 -   if (max_cost  0
 -choose_mult_variant (mode, -coeff, algorithm,
 -   variant, max_cost))
 +   if (max_cost = 0)
 + goto skip_synth;
 +
 +   /* Special case powers of two.  */
 +   if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
 + {
 +   rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
 +floor_log2 (coeff), target, unsignedp);
 +   return expand_unop (mode, neg_optab, temp, target, 0);
 + }
 +
 +   if (choose_mult_variant (mode, coeff, algorithm, variant,
 +max_cost))
   {
 -   rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
 +

  1   2   >