Enable reference removal for speculative edges

2013-09-09 Thread Jan Hubicka
Hi,
this hack I put on place since ipa-prop was breaking without it.  Martin fixed 
the problem
and I tested it can now be removed for Firefx build.

Bootstrapped/regtested x86_64-linux, comitted.

* ipa-prop.c (try_make_edge_direct_simple_call): Do not special case
speculative edges.

Index: ipa-prop.c
===
--- ipa-prop.c  (revision 202368)
+++ ipa-prop.c  (working copy)
@@ -2586,7 +2586,6 @@ try_make_edge_direct_simple_call (struct
   struct cgraph_edge *cs;
   tree target;
   bool agg_contents = ie-indirect_info-agg_contents;
-  bool speculative = ie-speculative;
 
   if (ie-indirect_info-agg_contents)
 target = ipa_find_agg_cst_for_param (jfunc-agg,
@@ -2598,8 +2597,7 @@ try_make_edge_direct_simple_call (struct
 return NULL;
   cs = ipa_make_edge_direct_to_target (ie, target);
 
-  /* FIXME: speculative edges can be handled.  */
-  if (cs  !agg_contents  !speculative)
+  if (cs  !agg_contents)
 {
   bool ok;
   gcc_checking_assert (cs-callee


RE: [PATCH GCC]Catch more MEM_REFs sharing common addressing part in gimple strength reduction

2013-09-09 Thread bin.cheng
Thanks for reviewing, I will correct all stupid spelling problem in the next 
version of patch.

On Mon, Sep 9, 2013 at 8:15 AM, Bill Schmidt wschm...@linux.vnet.ibm.com 
wrote:

+   int (i * S).
+   Otherwise, just return double int zero.  */

 This is sufficient, since you are properly checking the next_interp
 chain.  Another possible form would be

 X = (B + i) * 1,

 but if this is present, then one of the forms you're checking for should
 also be present, so there's no need to check the MULT_CANDs.
I'm not very sure here since I didn't check MULT_CAND in the patch.  Could you 
please explain more about this?


+
+static double_int
+backtrace_base_for_ref (tree *pbase)
+{
+  tree base_in = *pbase;
+  slsr_cand_t base_cand;
+
+  STRIP_NOPS (base_in);
+  if (TREE_CODE (base_in) != SSA_NAME)
+return tree_to_double_int (integer_zero_node);
+
+  base_cand = base_cand_from_table (base_in);
+
+  while (base_cand  base_cand-kind != CAND_PHI)
+{
+  if (base_cand-kind == CAND_ADD
+base_cand-index.is_one ()
+TREE_CODE (base_cand-stride) == INTEGER_CST)
+ {
+   /* X = B + (1 * S), S is integer constant.  */
+   *pbase = base_cand-base_expr;
+   return tree_to_double_int (base_cand-stride);
+ }
+  else if (base_cand-kind == CAND_ADD
+ TREE_CODE (base_cand-stride) == INTEGER_CST
+ integer_onep (base_cand-stride))
+{
+   /* X = B + (i * S), S is integer one.  */
+   *pbase = base_cand-base_expr;
+   return base_cand-index;
+ }
+
+  if (base_cand-next_interp)
+ base_cand = lookup_cand (base_cand-next_interp);
+  else
+ base_cand = NULL;
+}
+
+  return tree_to_double_int (integer_zero_node);
+}
+
 /* Look for the following pattern:

 *PBASE:MEM_REF (T1, C1)
@@ -767,8 +818,15 @@ slsr_process_phi (gimple phi, bool speed)

 *PBASE:T1
 *POFFSET:  MULT_EXPR (T2, C3)
-*PINDEX:   C1 + (C2 * C3) + C4  */
+*PINDEX:   C1 + (C2 * C3) + C4

+   When T2 is recorded by an CAND_ADD in the form of (T2' + C5), It
  ^  ^
  a  it

+   will be further restructured to:
+
+*PBASE:T1
+*POFFSET:  MULT_EXPR (T2', C3)
+*PINDEX:   C1 + (C2 * C3) + C4 + (C5 * C3)  */
+
 static bool
 restructure_reference (tree *pbase, tree *poffset, double_int
 *pindex,
 tree *ptype)
@@ -777,7 +835,7 @@ restructure_reference (tree *pbase, tree *poffset,
   double_int index = *pindex;
   double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
   tree mult_op0, mult_op1, t1, t2, type;
-  double_int c1, c2, c3, c4;
+  double_int c1, c2, c3, c4, c5;

   if (!base
   || !offset
@@ -823,11 +881,12 @@ restructure_reference (tree *pbase, tree
 *poffset,
 }

   c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
+  c5 = backtrace_base_for_ref (t2);

   *pbase = t1;
-  *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
-   double_int_to_tree (sizetype, c3));
-  *pindex = c1 + c2 * c3 + c4;
+  *poffset = size_binop (MULT_EXPR, fold_convert (sizetype, t2),
+  double_int_to_tree (sizetype, c3));

 I am not sure why you changed this call.  fold_build2 is a more
 efficient call than size_binop.  size_binop makes several checks that
 will fail in this case, and then calls fold_build2_loc, right?  Not a
 big deal but seems like changing it back would be better.  Perhaps I'm
 missing something (as usual ;).
I rely on size_binop to convert T2 into sizetype, because T2' may be in other 
kind of type.  Otherwise there will be ssa_verify error later.

Thanks.
bin






Re: [PATCH 1/4] Support lambda templates.

2013-09-09 Thread Adam Butcher

Hi Jason,

I've attached a patch that handles parameter packs in the conversion 
op.  I thought it best to get this reviewed independently before rolling 
up into the 'Support lambda templates' patch.


Do you think it's the right idea?  It seems to work okay but I've 
reworked the way the template op call is built (and built a separate 
call for the decltype expression).


Cheers,
Adam
commit 29891189d498f5c6e3aabac72db14b94a200182c
Author: Adam Butcher a...@jessamine.co.uk
Date:   Thu Aug 15 20:21:38 2013 +0100

[Intended for rollup into 1/4]: Handle parameter packs in lambda conversion 
op.

* lambda.c (maybe_add_lambda_conv_op): Move decls to point of use.
* TODO: parm pack changelog to be merged with PATCH 1/4.

diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c
index e9bc7c5..3d17948 100644
--- a/gcc/cp/lambda.c
+++ b/gcc/cp/lambda.c
@@ -741,6 +741,22 @@ nonlambda_method_basetype (void)
   return TYPE_METHOD_BASETYPE (TREE_TYPE (fn));
 }
 
+/* Helper function for maybe_add_lambda_conv_op; build a CALL_EXPR with
+   indicated FN, and NARGS, but do not initialize the return type or any of the
+   argument slots.  */
+
+static tree
+prepare_op_call (tree fn, int nargs)
+{
+  tree t;
+
+  t = build_vl_exp (CALL_EXPR, nargs + 3);
+  CALL_EXPR_FN (t) = fn;
+  CALL_EXPR_STATIC_CHAIN (t) = NULL;
+
+  return t;
+}
+
 /* If the closure TYPE has a static op(), also add a conversion to function
pointer.  */
 
@@ -749,9 +765,6 @@ maybe_add_lambda_conv_op (tree type)
 {
   bool nested = (current_function_decl != NULL_TREE);
   tree callop = lambda_function (type);
-  tree rettype, name, fntype, fn, body, compound_stmt;
-  tree thistype, stattype, statfn, convfn, call, arg;
-  vectree, va_gc *argvec;
 
   if (LAMBDA_EXPR_CAPTURE_LIST (CLASSTYPE_LAMBDA_EXPR (type)) != NULL_TREE)
 return;
@@ -759,7 +772,7 @@ maybe_add_lambda_conv_op (tree type)
   if (processing_template_decl)
 return;
 
-  bool generic_lambda_p
+  bool const generic_lambda_p
 = (DECL_TEMPLATE_INFO (callop)
  DECL_TEMPLATE_RESULT (DECL_TI_TEMPLATE (callop)) == callop);
 
@@ -770,63 +783,127 @@ maybe_add_lambda_conv_op (tree type)
   return;
 }
 
+  /* Non-template conversion operators are defined directly with build_call_a
+ and using DIRECT_ARGVEC for arguments (including 'this').  Templates are
+ deferred and the CALL is built in-place.  In the case of a deduced return
+ call op, the decltype expression used as a substitute for the return type,
+ DECLTYPE_CALL is also built in-place.  The arguments of DECLTYPE_CALL in
+ the return expression may differ in flags from those in body CALL.  In
+ particular, parameter pack expansions are marked PACK_EXPANSION_LOCAL_P in
+ the body CALL, but not in DECLTYPE_CALL.  */
+
+  vectree, va_gc *direct_argvec;
+  tree decltype_call = 0, call;
   tree fn_result = TREE_TYPE (TREE_TYPE (callop));
-  tree fn_args = copy_list (DECL_CHAIN (DECL_ARGUMENTS (callop)));
 
   if (generic_lambda_p)
 {
-  /* Construct the dependent member call for the static member function
-'_FUN' and remove 'auto' from its return type to allow for simple
+  /* Prepare the dependent member call for the static member function
+'_FUN' and, potentially, prepare another call to be used in a decltype
+return expression for a deduced return call op to allow for simple
 implementation of the conversion operator.  */
 
   tree instance = build_nop (type, null_pointer_node);
-  argvec = make_tree_vector ();
-  for (arg = fn_args; arg; arg = DECL_CHAIN (arg))
-   {
- mark_exp_read (arg);
- vec_safe_push (argvec, convert_from_reference (arg));
-   }
-
   tree objfn = build_min (COMPONENT_REF, NULL_TREE,
  instance, DECL_NAME (callop), NULL_TREE);
-  call = build_nt_call_vec (objfn, argvec);
+  int nargs = list_length (DECL_ARGUMENTS (callop)) - 1;
 
+  call = prepare_op_call (objfn, nargs);
   if (type_uses_auto (fn_result))
+   decltype_call = prepare_op_call (objfn, nargs);
+}
+  else
+{
+  direct_argvec = make_tree_vector ();
+  direct_argvec-quick_push (build1 (NOP_EXPR,
+TREE_TYPE (DECL_ARGUMENTS (callop)),
+null_pointer_node));
+}
+
+  /* Copy CALLOP's argument list (as per 'copy_list') as FN_ARGS in order to
+ declare the static member function _FUN below.  For each arg append to
+ DIRECT_ARGVEC (for the non-template case) or populate the pre-allocated
+ call args (for the template case).  If a parameter pack is found, expand
+ it, flagging it as PACK_EXPANSION_LOCAL_P for the body call.  */
+
+  tree fn_args = NULL_TREE;
+  {
+int ix = 0;
+tree src = DECL_CHAIN (DECL_ARGUMENTS (callop));
+tree tgt;
+
+while (src)
+  {
+   tree new_node = copy_node (src);
+
+   if (!fn_args)
+ fn_args = 

Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Michael V. Zolotukhin
Hi HJ,
You were right, and the change from epilogue_size_needed to size_needed was the
rootcause of the bug.
Here is the obvious fix for that, regtested and bootstrapped on i386/x86_64.

Is it OK for trunk?

Changelog:
2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

* config/i386/i386.c (ix86_expand_movmem): Fix epilogue generation.

---
Thanks, Michael
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e2fa71a..1f07e6f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23329,7 +23329,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, 
rtx align_exp,
 
   if (count_exp != const0_rtx  epilogue_size_needed  1)
 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
-   size_needed);
+   epilogue_size_needed);
   if (jump_around_label)
 emit_label (jump_around_label);
   return true;


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Jan Hubicka
 Hi HJ,
 You were right, and the change from epilogue_size_needed to size_needed was 
 the
 rootcause of the bug.
 Here is the obvious fix for that, regtested and bootstrapped on i386/x86_64.
 
 Is it OK for trunk?
 
 Changelog:
 2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com
 
   * config/i386/i386.c (ix86_expand_movmem): Fix epilogue generation.

OK,
Would you mind adding a testcase?
Honza


Re: RFC: patch to build GCC for arm with LRA

2013-09-09 Thread Yvan Roux
Hi,

Thanks for the review.  Here is the fixed self-contained patch to
enable LRA on AAarch32 and AArch64 (for those who want to give a try).
 I'm still working on the issues described previously and will send
rtlanal.c patch separately to prepare the move.

Thanks,
Yvan


On 9 September 2013 01:23, Vladimir Makarov vmaka...@redhat.com wrote:
 On 13-09-08 2:04 PM, Richard Sandiford wrote:

 Yvan Roux yvan.r...@linaro.org writes:

 @@ -5786,7 +5796,11 @@ get_index_scale (const struct address_info *info)
  info-index_term == XEXP (index, 0))
   return INTVAL (XEXP (index, 1));
   -  if (GET_CODE (index) == ASHIFT
 +  if ((GET_CODE (index) == ASHIFT
 +   || GET_CODE (index) == ASHIFTRT
 +   || GET_CODE (index) == LSHIFTRT
 +   || GET_CODE (index) == ROTATE
 +   || GET_CODE (index) == ROTATERT)
  CONST_INT_P (XEXP (index, 1))
  info-index_term == XEXP (index, 0))
   return (HOST_WIDE_INT) 1  INTVAL (XEXP (index, 1));

 This bit doesn't look right.  The scale is only 1  N for (ashift ... N).
 I think we have to stick to returning zero for the other codes.

 Thanks, Richard.  I missed this.

 I am agree it might be a problem although the probability of it is
 negligible as index reg hardly can be equivalent to something + constant
 displacement (except may be case reg+reg when each reg can be base or index
 but in this case we don't use shifts).  Still returning zero is a safe
 solution as it prevents such equivalence substitution completely.

 Yvan, could modify this part of code and resend it to Richard Henderson for
 approval.

 The other two (snipped) rtlanal.c hunks like fine to me FWIW.  Maybe now
 would be a good time to add an is this a shift code? predicate though,
 if we don't have one already.

 Yes, I think we could somehow to promote this info to LRA but I don't think
 it is necessary or urgent.  There is already an alternative solution -- just
 doing nothing as prohibiting equivalence substitution for complicated index
 part hardly worsens the generated code.



arm-lra.patch
Description: Binary data


Avoid ipa-profile dropping functions unlikely when profile is read

2013-09-09 Thread Jan Hubicka
Hi,
ipa-profile does propagation across the CFG in attempt to prove that function is
cold.  This is counter-productive when profile is read and we can easilly work
this out from count itself.   This patch makes it less agressive in this setting
and it also put ipa-profile into busyness to make count based decisions that was
previously done at predict.c in an inferrior way.

This patch fixes problems Martin Liska noticed with inkscape.

Honza

* ipa-profile.c: Add toplevel comment.
(ipa_propagate_frequency_1): Be more conservative when profile is read.
(contains_hot_call_p): New function.
(ipa_propagate_frequency): Set frequencies based on counts when
profile is read.
* predict.c (compute_function_frequency): Use PROFILE_READ gueard for
profile; do not tamper with profile after inlining if it is read.
Index: ipa-profile.c
===
--- ipa-profile.c   (revision 202366)
+++ ipa-profile.c   (working copy)
@@ -17,6 +17,33 @@ You should have received a copy of the G
 along with GCC; see the file COPYING3.  If not see
 http://www.gnu.org/licenses/.  */
 
+/* ipa-profile pass implements the following analysis propagating profille
+   inter-procedurally.
+
+   - Count histogram construction.  This is a histogram analyzing how much
+ time is spent executing statements with a given execution count read
+ from profile feedback. This histogram is complette only with LTO,
+ otherwise it contains information only about the current unit.
+
+ Similar histogram is also estimated by coverage runtime.  This histogram
+ is not dependent on LTO, but it suffers from various defects; first
+ gcov runtime is not weighting individual basic block by estimated 
execution
+ time and second the merging of multiple runs makes assumption that the
+ histogram distribution did not change.  Consequentely histogram 
constructed
+ here may be more precise.
+
+ The information is used to set hot/cold thresholds.
+   - Next speculative indirect call resolution is performed:  the local
+ profile pass assigns profile-id to each function and provide us with a
+ histogram specifying the most common target.  We look up the callgraph
+ node corresponding to the target and produce a speculative call.
+
+ This call may or may not survive through IPA optimization based on 
decision
+ of inliner. 
+   - Finally we propagate the following flags: unlikely executed, executed
+ once, executed at startup and executed at exit.  These flags are used to
+ control code size/performance threshold and and code placement (by 
producing
+ .text.unlikely/.text.hot/.text.startup/.text.exit subsections).  */
 #include config.h
 #include system.h
 #include coretypes.h
@@ -301,6 +328,18 @@ ipa_propagate_frequency_1 (struct cgraph
d-only_called_at_startup = 0;
   d-only_called_at_exit = edge-caller-only_called_at_exit;
}
+
+  /* When profile feedback is available, do not try to propagate too hard;
+counts are already good guide on function frequencies and roundoff
+errors can make us to push function into unlikely section even when
+it is executed by the train run.  Transfer the function only if all
+callers are unlikely executed.  */
+  if (profile_info  flag_branch_probabilities
+  (edge-caller-frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
+ || (edge-caller-global.inlined_to
+  edge-caller-global.inlined_to-frequency
+!= NODE_FREQUENCY_UNLIKELY_EXECUTED)))
+ d-maybe_unlikely_executed = false;
   if (!edge-frequency)
continue;
   switch (edge-caller-frequency)
@@ -332,6 +371,24 @@ ipa_propagate_frequency_1 (struct cgraph
   return edge != NULL;
 }
 
+/* Return ture if NODE contains hot calls.  */
+
+bool
+contains_hot_call_p (struct cgraph_node *node)
+{
+  struct cgraph_edge *e;
+  for (e = node-callees; e; e = e-next_callee)
+if (cgraph_maybe_hot_edge_p (e))
+  return true;
+else if (!e-inline_failed
+ contains_hot_call_p (e-callee))
+  return true;
+  for (e = node-indirect_calls; e; e = e-next_callee)
+if (cgraph_maybe_hot_edge_p (e))
+  return true;
+  return false;
+}
+
 /* See if the frequency of NODE can be updated based on frequencies of its
callers.  */
 bool
@@ -343,6 +400,7 @@ ipa_propagate_frequency (struct cgraph_n
   /* We can not propagate anything useful about externally visible functions
  nor about virtuals.  */
   if (!node-local.local
+  || node-symbol.alias
   || (flag_devirtualize  DECL_VIRTUAL_P (node-symbol.decl)))
 return false;
   gcc_assert (node-symbol.analyzed);
@@ -369,6 +427,36 @@ ipa_propagate_frequency (struct cgraph_n
  cgraph_node_name (node));
changed = true;
 }
+
+  /* With profile we can decide on hot/normal based 

Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Michael V. Zolotukhin
 OK,
 Would you mind adding a testcase?
Thanks, here is the patch with Eric's test.
OK to commit?

Changelog:
gcc:
2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

* config/i386/i386.c (ix86_expand_movmem): Fix epilogue generation.

gcc/testsuite:
2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

* gcc.target/i386/memcpy-2.c: New test.


Thanks, Michael
 Honza
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e2fa71a..1f07e6f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23329,7 +23329,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, 
rtx align_exp,
 
   if (count_exp != const0_rtx  epilogue_size_needed  1)
 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
-   size_needed);
+   epilogue_size_needed);
   if (jump_around_label)
 emit_label (jump_around_label);
   return true;
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-2.c 
b/gcc/testsuite/gcc.target/i386/memcpy-2.c
new file mode 100644
index 000..c8dfbe3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memcpy-2.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-require-effective-target ia32 } */
+/* { dg-options -O2 -march=pentiumpro -minline-all-stringops -fno-common } */
+
+static void __attribute__((noinline, noclone))
+my_memcpy (char *dest, const char *src, int n)
+{
+  __builtin_memcpy (dest, src, n);
+}
+
+int
+main (void)
+{
+  char a1[4], a2[4];
+  __builtin_memset (a1, 'a', 4);
+  __builtin_memset (a2, 'b', 4);
+  my_memcpy (a2, a1, 4);
+  if (a2[0] != 'a')
+__builtin_abort ();
+  return 0;
+}
+


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Richard Biener
On Sun, 8 Sep 2013, Paolo Carlini wrote:

 Hi,
 
 this patchlet fixes the column # of the unused parameter warnings emitted by
 do_warn_unused_parameter by explicitly passing DECL_SOURCE_LOCATION (decl)
 instead of wrongly relying on '+', which in this case ends up meaning the
 location of the function declaration. Tested x86_64-linux.

I would have expected %q+D to use the location of the corresponding
decl, not some random other location.  So, isn't the bug in the
C++ frontend diagnostic machinery?

Thanks,
Richard.


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Uros Bizjak
On Mon, Sep 9, 2013 at 9:42 AM, Michael V. Zolotukhin
michael.v.zolotuk...@gmail.com wrote:
 OK,
 Would you mind adding a testcase?
 Thanks, here is the patch with Eric's test.
 OK to commit?

 Changelog:
 gcc:
 2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

 * config/i386/i386.c (ix86_expand_movmem): Fix epilogue generation.

 gcc/testsuite:
 2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

 * gcc.target/i386/memcpy-2.c: New test.

+/* { dg-do run } */
+/* { dg-require-effective-target ia32 } */
+/* { dg-options -O2 -march=pentiumpro -minline-all-stringops -fno-common } */

Please use -mtune instead of -march.

Otherwise OK.

Thanks,
Uros.


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Jakub Jelinek
On Mon, Sep 09, 2013 at 11:42:05AM +0400, Michael V. Zolotukhin wrote:
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/i386/memcpy-2.c
 @@ -0,0 +1,22 @@
 +/* { dg-do run } */
 +/* { dg-require-effective-target ia32 } */
 +/* { dg-options -O2 -march=pentiumpro -minline-all-stringops -fno-common } 
 */

I don't see anything i386 specific on the testcase, except the flags,
and don't see why you need -fno-common in there, there are no global vars.
So, I think it would be better to stick it into gcc.dg/torture/, drop
dg-require-* line and instead just add
/* { dg-additional-options -march=pentiumpro { target ia32 } } */
/* { dg-additional-options -minline-all-stringops { target { i?86-*-* 
x86_64-*-* } } } */
or so (and let it cycle through all the -O* options).

 +
 +static void __attribute__((noinline, noclone))
 +my_memcpy (char *dest, const char *src, int n)
 +{
 +  __builtin_memcpy (dest, src, n);
 +}
 +
 +int
 +main (void)
 +{
 +  char a1[4], a2[4];
 +  __builtin_memset (a1, 'a', 4);
 +  __builtin_memset (a2, 'b', 4);
 +  my_memcpy (a2, a1, 4);
 +  if (a2[0] != 'a')
 +__builtin_abort ();
 +  return 0;
 +}
 +


Jakub


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Michael V. Zolotukhin
 I don't see anything i386 specific on the testcase, except the flags,
 and don't see why you need -fno-common in there, there are no global vars.
 So, I think it would be better to stick it into gcc.dg/torture/, drop
 dg-require-* line and instead just add
 /* { dg-additional-options -march=pentiumpro { target ia32 } } */
 /* { dg-additional-options -minline-all-stringops { target { i?86-*-* 
 x86_64-*-* } } } */
 or so (and let it cycle through all the -O* options).
Originally the test targeted a specific situation, happening on pentiumpro (and
thus ia32), because on pentium pro we want 64-bit alignment for 32-bit
rep-moves.  So, it reveals an issue when desired alignment is bigger than size
of move_mode.
I don't see if it could be helpful on other platforms, though if
you think it's worthwhile, I'll update the test as you suggested.

Michael
   Jakub


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Jakub Jelinek
On Mon, Sep 09, 2013 at 11:52:49AM +0400, Michael V. Zolotukhin wrote:
  I don't see anything i386 specific on the testcase, except the flags,
  and don't see why you need -fno-common in there, there are no global vars.
  So, I think it would be better to stick it into gcc.dg/torture/, drop
  dg-require-* line and instead just add
  /* { dg-additional-options -march=pentiumpro { target ia32 } } */
  /* { dg-additional-options -minline-all-stringops { target { i?86-*-* 
  x86_64-*-* } } } */
  or so (and let it cycle through all the -O* options).
 Originally the test targeted a specific situation, happening on pentiumpro 
 (and
 thus ia32), because on pentium pro we want 64-bit alignment for 32-bit
 rep-moves.  So, it reveals an issue when desired alignment is bigger than size
 of move_mode.
 I don't see if it could be helpful on other platforms, though if
 you think it's worthwhile, I'll update the test as you suggested.

I think it is worthwhile, various targets have many different ways to expand
memcpy, admittedly i?86/x86_64 probably the biggest number of these, and
while right now you've encountered it on ia32 with certain options doesn't
mean that in a few years it couldn't hit some unrelated target, arm, sh,
sparc, whatever.

Jakub


[PATCH, ARM, LRA] Prepare ARM build with LRA

2013-09-09 Thread Yvan Roux
Hi,

here are the modifications, discussed in another thread, needed in
rtlanal.c by ARM targets (AArch32 and AArch64) to build GCC with LRA.

Is it ok for trunk ?

Thanks,
Yvan

2013-09-09  Yvan Roux  yvan.r...@linaro.org
Vladimir Makarov  vmaka...@redhat.com

* rtlanal.c (must_be_index_p, set_address_index): Add ASHIFTRT,
LSHIFTRT, ROTATE, ROTATERT and SIGN_EXTRACT handling.
(set_address_base): Add SIGN_EXTRACT handling.


arm-lra-rtl.patch
Description: Binary data


Re: [PATCH][RFC] Move IVOPTs closer to RTL expansion

2013-09-09 Thread Richard Biener
On Sun, 8 Sep 2013, pins...@gmail.com wrote:

 On Sep 8, 2013, at 7:01 PM, Bin.Cheng amker.ch...@gmail.com wrote:
 
  On Wed, Sep 4, 2013 at 5:20 PM, Richard Biener rguent...@suse.de wrote:
  
  The patch below moves IVOPTs out of the GIMPLE loop pipeline more
  closer to RTL expansion.  That's done for multiple reasons.
  
  First, the loop passes that at the moment preceede IVOPTs leave
  around IL that is in desparate need of basic re-optimization
  like CSE, constant propagation and DCE.  That puts extra load
  on IVOPTs and its cost model, increasing compile-time and
  possibly confusing it.
  
  Second, IVOPTs introduces lowered memory accesses that it
  expects to stay as is, likewise it produces auto-inc/dec
  sequences that it expects to stay as is until RTL expansion.
  Passes such as DOM can break this expectation and make the
  work done by IVOPTs a waste.
  
  I remember doing this excercise in the GCC 4.3 timeframe where
  benchmarking on x86_64 showed no gains or losses (but x86_64
  isn't very sensitive to IV choices).
  
  Any help with benchmarking this on targets other than x86_64
  is appreciated (I'll re-do x86_64).
  
  Bootstrapped and tested on x86_64-unknown-linux-gnu.
  
  General comments are of course also welcome.
  
  Thanks,
  Richard.
  
  2013-09-04  Richard Biener  rguent...@suse.de
  
 * passes.def: Move IVOPTs before final DCE pass.
 * tree-ssa-loop.c (tree_ssa_loop_ivopts): Adjust for being
 outside of the loop pipeline.
  
 * gcc.dg/tree-ssa/ivopts-3.c: Scan non-details dump.
 * gcc.dg/tree-ssa/reassoc-19.c: Be more permissive.
  
  Index: gcc/passes.def
  ===
  *** gcc/passes.def.orig 2013-09-04 10:57:33.0 +0200
  --- gcc/passes.def  2013-09-04 11:11:27.535952665 +0200
  *** along with GCC; see the file COPYING3.
  *** 221,227 
   NEXT_PASS (pass_complete_unroll);
   NEXT_PASS (pass_slp_vectorize);
   NEXT_PASS (pass_loop_prefetch);
  - NEXT_PASS (pass_iv_optimize);
   NEXT_PASS (pass_lim);
   NEXT_PASS (pass_tree_loop_done);
 POP_INSERT_PASSES ()
  --- 221,226 
  *** along with GCC; see the file COPYING3.
  *** 237,242 
  --- 236,246 
  opportunities.  */
 NEXT_PASS (pass_phi_only_cprop);
 NEXT_PASS (pass_vrp);
  +   /* IVOPTs lowers memory accesses and exposes auto-inc/dec
  +  opportunities.  Run it after the above passes cleaned up
  +the loop optimized IL but before DCE as IVOPTs generates
  +quite some garbage.  */
  +   NEXT_PASS (pass_iv_optimize);
 NEXT_PASS (pass_cd_dce);
 NEXT_PASS (pass_tracer);
  
  Index: gcc/tree-ssa-loop.c
  ===
  *** gcc/tree-ssa-loop.c.orig2013-09-04 10:57:32.0 +0200
  --- gcc/tree-ssa-loop.c 2013-09-04 11:11:27.536952677 +0200
  *** make_pass_loop_prefetch (gcc::context *c
  *** 906,915 
   static unsigned int
   tree_ssa_loop_ivopts (void)
   {
  !   if (number_of_loops (cfun) = 1)
  ! return 0;
  
  -   tree_ssa_iv_optimize ();
 return 0;
   }
  
  --- 906,924 
   static unsigned int
   tree_ssa_loop_ivopts (void)
   {
  !   loop_optimizer_init (LOOPS_NORMAL
  !  | LOOPS_HAVE_RECORDED_EXITS);
  !
  !   if (number_of_loops (cfun)  1)
  ! {
  !   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
  !   scev_initialize ();
  !   tree_ssa_iv_optimize ();
  !   scev_finalize ();
  ! }
  !
  !   loop_optimizer_finalize ();
  
 return 0;
   }
  
  Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c
  ===
  *** gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c.orig   2013-09-04 
  10:57:33.0 +0200
  --- gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c2013-09-04 
  11:11:27.559952952 +0200
  ***
  *** 1,5 
   /* { dg-do compile } */
  ! /* { dg-options -O2 -fdump-tree-ivopts-details } */
  
   void main (void)
   {
  --- 1,5 
   /* { dg-do compile } */
  ! /* { dg-options -O2 -fdump-tree-ivopts } */
  
   void main (void)
   {
  *** void main (void)
  *** 8,12 
   f2 ();
   }
  
  ! /* { dg-final { scan-tree-dump-times != 0 5 ivopts } }  */
   /* { dg-final { cleanup-tree-dump ivopts } }  */
  --- 8,12 
   f2 ();
   }
  
  ! /* { dg-final { scan-tree-dump-times != 0 1 ivopts } }  */
   /* { dg-final { cleanup-tree-dump ivopts } }  */
  Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-19.c
  ===
  *** gcc/testsuite/gcc.dg/tree-ssa/reassoc-19.c.orig 2012-12-18 
  14:24:58.0 +0100
  --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-19.c  2013-09-04 
  11:13:30.895416700 +0200
  *** void foo(char* left, char* rite, int ele
  *** 

Re: *PING* Re: [PATCH 2/2] Fix HLE example in manual

2013-09-09 Thread Gerald Pfeifer
On Sun, 8 Sep 2013, Andi Kleen wrote:
 2013-06-13  Andi Kleen  a...@linux.intel.com
 
* doc/extend.texi: Dont use __atomic_clear in HLE
example.  Fix typo.
 I would like to to backport this to 4.8. Ok too?
 Ping!

That's documentation only, right?

Okay.

Gerald


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Paolo Carlini

Hi Richard,

On 09/09/2013 09:46 AM, Richard Biener wrote:

On Sun, 8 Sep 2013, Paolo Carlini wrote:


Hi,

this patchlet fixes the column # of the unused parameter warnings emitted by
do_warn_unused_parameter by explicitly passing DECL_SOURCE_LOCATION (decl)
instead of wrongly relying on '+', which in this case ends up meaning the
location of the function declaration. Tested x86_64-linux.

I would have expected %q+D to use the location of the corresponding
decl, not some random other location.  So, isn't the bug in the
C++ frontend diagnostic machinery?
Well, first notice that the patch fixes the issue *both* for the C and 
C++ front-ends, that's why I added the testcase to c-c++-common. This 
isn't a C++ issue. Then notice that we do already have tens of cases 
where we use DECL_SOURCE_LOCATION + %qD, when we want to be precise 
about the location. The diagnostic machinery has this mechanism using + 
which uses location_of, which is often useful for expressions, but which 
very often we don't use for decls. In fact, some people, like Manuel, 
see the audit trail of the bug, find the mechanism quite confusing. Is 
there something specific you want me to check?


Thanks,
Paolo.


Re: Fix PR middle-end/57370

2013-09-09 Thread Richard Biener
On Fri, Sep 6, 2013 at 8:47 PM, Easwaran Raman era...@google.com wrote:
 On Fri, Sep 6, 2013 at 12:05 AM, Richard Biener
 richard.guent...@gmail.com wrote:
 On Thu, Sep 5, 2013 at 9:23 PM, Easwaran Raman era...@google.com wrote:
 On Tue, Aug 27, 2013 at 3:35 AM, Richard Biener
 richard.guent...@gmail.com wrote:
 On Thu, Aug 1, 2013 at 1:34 AM, Easwaran Raman era...@google.com wrote:
 I have a new patch that supersedes this. The new patch also fixes PR
 tree-optimization/57393 and PR tree-optimization/58011. Bootstraps and
 no test regression on x86_64/linux. Ok for trunk?

 2013-07-31  Easwaran Raman  era...@google.com

 PR middle-end/57370
 * tree-ssa-reassoc.c (build_and_add_sum): Fix UID assignment and reset
 of debug statements that cause inconsistent IR.

 Missing ChangeLog entry for the insert_stmt_after hunk which I do not like
 at all.  The other hunks are ok, but we need to work harder to preserve
 debug stmts - simply removing all is not going to fly.

 Richard.

 I looked into the problem related to the debug stmts in this failing
 test case in detail. Initially, the code sequence looks


 s$n_13 = MEM[(struct S *)s];
 # DEBUG s$n = s$n_13
 _2 = (double) s$n_13;
 _4 = _2 * a_3(D);
 _6 = (double) i_5(D);
 _7 = _4 * _6;
 _9 = (double) j_8(D);
 _10 = _7 * _9;
 bar (_10);
 # DEBUG D#2 = (double) k_12(D)
 # DEBUG D#1 = _7 * D#2
 # DEBUG t$n = (int) D#1

 After reassociation

 s$n_13 = MEM[(struct S *)s];
 # DEBUG s$n = s$n_13
 _2 = (double) s$n_13;
 _6 = (double) i_5(D);
 # DEBUG D#3 = _4 * _6
 _9 = (double) j_8(D);
 _4 = _9 * _2;
 _7 = _4 * a_3(D);
 _10 = _7 * _6;
 bar (_10);
 # DEBUG D#2 = (double) k_12(D)
 # DEBUG D#1 = D#3 * D#2
 # DEBUG t$n = (int) D#1

 In the above, # DEBUG D#3 = _4 * _6  appears above the statement that
 defines _4. But even if I move the def of D#3 below the statement
 defining _4, it is not sufficient. Before reassociation, t$n refers to
 the expression (s$n_13 * a_3(D) * i_5(D) * k_12(D)), but after
 reassociation it would refer to ( j_8(D) * s$n_13 *  i_5(D) *
 k_12(D)). It seems the correct fix is to discard the debug temps whose
 RHS contains a variable that is involved in reassociation and then
 reconstruct it. Is that the expected fix here?

 The value of the DEBUG expression changes because the value
 that _4 computes changes - that is a no-no that may not happen.
 We cannot re-use _4 (without previously releasing it and allocating
 it newly) with a new value.  releasing _4 should have fixed up the
 debug stmts.

 So - can you verify whether we are indeed just replacing the
 RHS of _4 = _2 * a_3(D) to _9 * _2 without changing the SSA name of
 that expression?

 I have confirmed that the SSA name of the LHS remains the same. The
 reassociation code simply calls gimple_assign_set_rhs2 to modify RHS2
 and then calls update_stmt.

That's definitely not allowed (though ISTR I remember that reassoc does
this kind of things :/) - it will result in wrong debug info.

Richard.

 - Easwaran

 Another reason why re-using the same LHS for another value is
 wrong is that annotated information like points-to sets, alignment
 and soon value-range information will be wrong.

 Thanks,
 Richard.

 - Easwaran


 testsuite/ChangeLog:
 2013-07-31  Easwaran Raman  era...@google.com

 PR middle-end/57370
 PR tree-optimization/57393
 PR tree-optimization/58011
 * gfortran.dg/reassoc_12.f90: New testcase.
 * gcc.dg/tree-ssa/reassoc-31.c: New testcase.
 * gcc.dg/tree-ssa/reassoc-31.c: New testcase.


 On Fri, Jul 12, 2013 at 7:57 AM, Easwaran Raman era...@google.com wrote:
 Ping.

 On Thu, Jun 27, 2013 at 10:15 AM, Easwaran Raman era...@google.com 
 wrote:
 A newly generated statement in build_and_add_sum  function of
 tree-ssa-reassoc.c has to be assigned the UID of its adjacent
 statement. In one instance, it was assigned the wrong uid (of an
 earlier phi statement) which messed up the IR and caused the test
 program to hang. Bootstraps and no test regressions on x86_64/linux.
 Ok for trunk?

 Thanks,
 Easwaran


 2013-06-27  Easwaran Raman  era...@google.com

 PR middle-end/57370
 * tree-ssa-reassoc.c (build_and_add_sum): Do not use the UID of 
 a phi
 node for a non-phi gimple statement.

 testsuite/ChangeLog:
 2013-06-27  Easwaran Raman  era...@google.com

 PR middle-end/57370
 * gfortran.dg/reassoc_12.f90: New testcase.


 Index: gcc/testsuite/gfortran.dg/reassoc_12.f90
 ===
 --- gcc/testsuite/gfortran.dg/reassoc_12.f90 (revision 0)
 +++ gcc/testsuite/gfortran.dg/reassoc_12.f90 (revision 0)
 @@ -0,0 +1,74 @@
 +! { dg-do compile }
 +! { dg-options -O2 -ffast-math }
 +! PR middle-end/57370
 +
 + SUBROUTINE xb88_lr_adiabatic_lda_calc(e_ndrho_ndrho_ndrho, 
 +   grad_deriv,npoints, sx)
 +IMPLICIT REAL*8 (t)
 +INTEGER, PARAMETER :: dp=8
 +REAL(kind=dp), DIMENSION(1:npoints) :: e_ndrho_ndrho_ndrho, 
 +   

Re: RFC: patch to build GCC for arm with LRA

2013-09-09 Thread Richard Sandiford
Yvan Roux yvan.r...@linaro.org writes:
 Thanks for the review.  Here is the fixed self-contained patch to
 enable LRA on AAarch32 and AArch64 (for those who want to give a try).
  I'm still working on the issues described previously and will send
 rtlanal.c patch separately to prepare the move.

Looks like the rtlanal.c patch contains some extra changes from last time.
Just FWIW:

 diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
 index 95a314f..04832cf 100644
 --- a/gcc/rtlanal.c
 +++ b/gcc/rtlanal.c
 @@ -5483,7 +5483,16 @@ must_be_base_p (rtx x)
  static bool
  must_be_index_p (rtx x)
  {
 -  return GET_CODE (x) == MULT || GET_CODE (x) == ASHIFT;
 +  rtx addr = x;
 +  if (GET_CODE (addr) == SIGN_EXTRACT)
 +addr = *strip_address_mutations (XEXP (addr, 0));
 +
 +  return (GET_CODE (x) == MULT
 +  || GET_CODE (x) == ASHIFT
 +  || GET_CODE (x) == ASHIFTRT
 +  || GET_CODE (x) == LSHIFTRT
 +  || GET_CODE (x) == ROTATE
 +  || GET_CODE (x) == ROTATERT);

This doesn't look right, since you don't use the stripped address (addr)
anywhere.

I think SIGN_EXTRACT from the lsb (i.e. when the third operand is 0 for
!BITS_BIG_ENDIAN or GET_MODE_PRECISION (mode) - 1 for BITS_BIG_ENDIAN)
should be treated as an address mutation, since it's really a combination
of a truncation and extension.  Maybe the comment could be changed from:

   Mutations either convert between modes or apply some kind of
   alignment.  */

to:

   Mutations either convert between modes or apply some kind of
   extension, truncation or alignment.  */

Then must_be_index_p should just be:

  return (GET_CODE (x) == MULT
  || GET_CODE (x) == ASHIFT
  || GET_CODE (x) == ASHIFTRT
  || GET_CODE (x) == LSHIFTRT
  || GET_CODE (x) == ROTATE
  || GET_CODE (x) == ROTATERT);

as in your earlier patch, since the function is only passed stripped rtxes.
(Although like I say I'd personally prefer something like:

  return GET_CODE (x) == MULT || shift_code_p (GET_CODE (x));

where shift_code_p is a new predicate.)

Thanks,
Richard


Re: RFC: patch to build GCC for arm with LRA

2013-09-09 Thread Richard Sandiford
Richard Sandiford rdsandif...@googlemail.com writes:
 I think SIGN_EXTRACT from the lsb (i.e. when the third operand is 0 for
 !BITS_BIG_ENDIAN or GET_MODE_PRECISION (mode) - 1 for BITS_BIG_ENDIAN)

Gah, GET_MODE_PRECISION (mode) - size for BITS_BIG_ENDIAN.

Richard


Re: Factor gimple structures out of gimple.h

2013-09-09 Thread Richard Biener
On Sat, Sep 7, 2013 at 12:26 AM, Diego Novillo dnovi...@google.com wrote:
 This patch introduces gimple-core.h, which contains just the data
 structures needed to define gimple. I left everything else in
 gimple.h

 The addition of alias.h to tree-ssa-alias.h is so that we can
 include tree-ssa-alias.h in isolation. It now includes everything
 it needs.

Because of alias_set_type?  That easily could move to coretypes.h.

 More discussion on rationale at the thread:
 http://gcc.gnu.org/ml/gcc-patches/2013-09/msg00300.html

 Tested on x86_64.

The split looks ok to me, given the plan.  But as there is still discussion
around it please hold off until that died down with a conclusion.

Thanks,
Richard.

 2013-09-06  Diego Novillo  dnovi...@google.com

 * Makefile.in (GIMPLE_CORE_H): New.
 (GIMPLE_H): Depend on GIMPLE_CORE_H.
 (TREE_SSA_ALIAS_H): New. Replace references to tree-ssa-alias.h with
 TREE_SSA_ALIAS_H.
 * gimple-core.h: New. Factor all gimple data structures out of ...
 * gimple.h: ... here.
 * tree-ssa-alias.h: Include alias.h.

 diff --git a/gcc/Makefile.in b/gcc/Makefile.in
 index a72b753..2be8846 100644
 --- a/gcc/Makefile.in
 +++ b/gcc/Makefile.in
 @@ -882,15 +882,19 @@ TREE_H = tree.h $(TREE_CORE_H)  tree-check.h
  REGSET_H = regset.h $(BITMAP_H) hard-reg-set.h
  BASIC_BLOCK_H = basic-block.h $(PREDICT_H) $(VEC_H) $(FUNCTION_H) \
 cfg-flags.def cfghooks.h
 -GIMPLE_H = gimple.h gimple.def gsstruct.def pointer-set.h $(VEC_H) \
 +GIMPLE_CORE_H = gimple-core.h $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) \
 +   $(INPUT_H) gimple.def gsstruct.def $(TREE_SSA_ALIAS_H) \
 +   $(INTERNAL_FN_H) $(TREE_CORE_H)
 +GIMPLE_H = gimple.h $(GIMPLE_CORE_H) pointer-set.h $(VEC_H) \
 $(GGC_H) $(BASIC_BLOCK_H) $(TREE_H) tree-ssa-operands.h \
 -   tree-ssa-alias.h $(INTERNAL_FN_H) $(HASH_TABLE_H)
 +   $(HASH_TABLE_H)
  TRANS_MEM_H = trans-mem.h
  GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h
  COVERAGE_H = coverage.h $(GCOV_IO_H)
  DEMANGLE_H = $(srcdir)/../include/demangle.h
  RECOG_H = recog.h
  ALIAS_H = alias.h
 +TREE_SSA_ALIAS_H = tree-ssa-alias.h $(ALIAS_H)
  EMIT_RTL_H = emit-rtl.h
  FLAGS_H = flags.h flag-types.h $(OPTIONS_H)
  OPTIONS_H = options.h flag-types.h $(OPTIONS_H_EXTRA)
 @@ -946,7 +950,7 @@ TREE_PASS_H = tree-pass.h $(TIMEVAR_H) $(DUMPFILE_H)
  TREE_FLOW_H = tree-flow.h tree-flow-inline.h tree-ssa-operands.h \
 $(BITMAP_H) sbitmap.h $(BASIC_BLOCK_H) $(GIMPLE_H) \
 $(HASHTAB_H) $(CGRAPH_H) $(IPA_REFERENCE_H) \
 -   tree-ssa-alias.h
 +   $(TREE_SSA_ALIAS_H)
  TREE_HASHER_H = tree-hasher.h $(HASH_TABLE_H) $(TREE_FLOW_H)
  TREE_SSA_LIVE_H = tree-ssa-live.h $(PARTITION_H)
  SSAEXPAND_H = ssaexpand.h $(TREE_SSA_LIVE_H)
 @@ -2234,7 +2238,7 @@ test-dump.o : test-dump.c $(CONFIG_H) $(SYSTEM_H) 
 $(CORETYPES_H) \
 $(BITMAP_H) sbitmap.h sreal.h $(TREE_H) $(CXX_PARSER_H) $(DWARF2OUT_H) \
 $(GIMPLE_PRETTY_PRINT_H) $(BASIC_BLOCK_H) insn-config.h $(LRA_INT.H) \
 $(SEL_SCHED_DUMP_H) $(IRA_INT_H) $(TREE_DATA_REF_H) $(TREE_FLOW_H) \
 -   $(TREE_SSA_LIVE_H) tree-ssa-alias.h $(OMEGA_H) $(RTL_H)
 +   $(TREE_SSA_LIVE_H) $(TREE_SSA_ALIAS_H) $(OMEGA_H) $(RTL_H)
  tree.o: tree.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
 all-tree.def $(FLAGS_H) $(FUNCTION_H) $(PARAMS_H) \
 toplev.h $(DIAGNOSTIC_CORE_H) $(GGC_H) $(HASHTAB_H) $(TARGET_H) output.h 
 $(TM_P_H) \
 @@ -2722,7 +2726,7 @@ targhooks.o : targhooks.c $(CONFIG_H) $(SYSTEM_H) 
 coretypes.h $(TREE_H) \
 $(EXPR_H) $(TM_H) $(RTL_H) $(TM_P_H) $(FUNCTION_H) output.h 
 $(DIAGNOSTIC_CORE_H) \
 $(MACHMODE_H) $(TARGET_DEF_H) $(TARGET_H) $(GGC_H) gt-targhooks.h \
 $(OPTABS_H) $(RECOG_H) $(REGS_H) reload.h hard-reg-set.h intl.h $(OPTS_H) 
 \
 -   tree-ssa-alias.h $(TREE_FLOW_H)
 +   $(TREE_SSA_ALIAS_H) $(TREE_FLOW_H)
  common/common-targhooks.o : common/common-targhooks.c $(CONFIG_H) 
 $(SYSTEM_H) \
 coretypes.h $(INPUT_H) $(TM_H) $(COMMON_TARGET_H) 
 common/common-targhooks.h

 @@ -2746,7 +2750,7 @@ toplev.o : toplev.c $(CONFIG_H) $(SYSTEM_H) coretypes.h 
 $(TM_H) $(TREE_H) \
 langhooks.h insn-flags.h $(CFGLOOP_H) hosthooks.h \
 $(CGRAPH_H) $(COVERAGE_H) alloc-pool.h $(GGC_H) \
 $(OPTS_H) params.def tree-mudflap.h $(TREE_PASS_H) $(GIMPLE_H) \
 -   tree-ssa-alias.h $(PLUGIN_H) realmpfr.h tree-diagnostic.h \
 +   $(TREE_SSA_ALIAS_H) $(PLUGIN_H) realmpfr.h tree-diagnostic.h \
 $(TREE_PRETTY_PRINT_H) opts-diagnostic.h $(COMMON_TARGET_H) \
 tsan.h diagnostic-color.h $(CONTEXT_H) $(PASS_MANAGER_H)

 @@ -3303,7 +3307,7 @@ alias.o : alias.c $(CONFIG_H) $(SYSTEM_H) coretypes.h 
 $(DUMPFILE_H) \
 $(ALIAS_H) $(EMIT_RTL_H) $(GGC_H) $(FUNCTION_H) cselib.h $(TREE_H) 
 $(TM_P_H) \
 langhooks.h $(TARGET_H) gt-alias.h $(TIMEVAR_H) $(CGRAPH_H) \
 $(SPLAY_TREE_H) $(DF_H) \
 -   tree-ssa-alias.h pointer-set.h $(TREE_FLOW_H)
 +   $(TREE_SSA_ALIAS_H) pointer-set.h 

Re: [PATCH, bootstrap]: Initialize deref_align in ipa_modify_call_arguments to fix profiledbootstrap

2013-09-09 Thread Richard Biener
On Sat, Sep 7, 2013 at 10:15 AM, Uros Bizjak ubiz...@gmail.com wrote:
 Hello!

 It looks that it is too hard for the compiler to track deref_align
 initialization through dependent deref_base boolean. The patch bellow
 fixes may be used uninitialized warning that breaks
 profiledbootstrap.

 2013-09-07  Uros Bizjak  ubiz...@gmail.com

 * ipa-prop.c (ipa_modify_call_arguments): Initialize deref_align.

 Tested on x86_64-pc-linux-gnu with LTO profiledbootstrap.

 OK for mainline?

Ok.

Thanks,
Richard.

 Uros.


Re: operator new returns nonzero

2013-09-09 Thread Richard Biener
On Sat, Sep 7, 2013 at 11:00 PM, Marc Glisse marc.gli...@inria.fr wrote:
 On Sat, 7 Sep 2013, Mike Stump wrote:

 On Sep 7, 2013, at 12:27 PM, Marc Glisse marc.gli...@inria.fr wrote:

 Now flag_check_new should probably disable this optimization…


 Yes, this why my point.


 Ok, here it is (again, no proper testing until bootstrap is fixed)

I wonder what happens on targets where 0 is a valid address of an object
(stated by !flag_delete_null_pointer_checks)?

Richard.


 2013-09-07  Marc Glisse  marc.gli...@inria.fr

 PR c++/19476
 gcc/
 * fold-const.c (tree_expr_nonzero_warnv_p): Handle operator new.
 * tree-vrp.c (gimple_stmt_nonzero_warnv_p,
 stmt_interesting_for_vrp):
 Likewise.
 (vrp_visit_stmt): Remove duplicated code.

 gcc/testsuite/
 * g++.dg/tree-ssa/pr19476-1.C: New file.
 * g++.dg/tree-ssa/pr19476-2.C: Likewise.
 * g++.dg/tree-ssa/pr19476-3.C: Likewise.

 --
 Marc Glisse
 Index: testsuite/g++.dg/tree-ssa/pr19476-1.C
 ===
 --- testsuite/g++.dg/tree-ssa/pr19476-1.C   (revision 0)
 +++ testsuite/g++.dg/tree-ssa/pr19476-1.C   (revision 0)
 @@ -0,0 +1,15 @@
 +/* { dg-do compile } */
 +/* { dg-options -O -fdump-tree-ccp1 } */
 +
 +#include new
 +
 +int f(){
 +  return 33 + (0 == new(std::nothrow) int);
 +}
 +int g(){
 +  return 42 + (0 == new int[50]);
 +}
 +
 +/* { dg-final { scan-tree-dump return 42 ccp1 } } */
 +/* { dg-final { scan-tree-dump-not return 33 ccp1 } } */
 +/* { dg-final { cleanup-tree-dump ccp1 } } */

 Property changes on: testsuite/g++.dg/tree-ssa/pr19476-1.C
 ___
 Added: svn:keywords
+ Author Date Id Revision URL
 Added: svn:eol-style
+ native

 Index: testsuite/g++.dg/tree-ssa/pr19476-2.C
 ===
 --- testsuite/g++.dg/tree-ssa/pr19476-2.C   (revision 0)
 +++ testsuite/g++.dg/tree-ssa/pr19476-2.C   (revision 0)
 @@ -0,0 +1,17 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +#include new
 +
 +int f(){
 +  int *p = new(std::nothrow) int;
 +  return 33 + (0 == p);
 +}
 +int g(){
 +  int *p = new int[50];
 +  return 42 + (0 == p);
 +}
 +
 +/* { dg-final { scan-tree-dump return 42 optimized } } */
 +/* { dg-final { scan-tree-dump-not return 33 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */

 Property changes on: testsuite/g++.dg/tree-ssa/pr19476-2.C
 ___
 Added: svn:keywords
+ Author Date Id Revision URL
 Added: svn:eol-style
+ native

 Index: testsuite/g++.dg/tree-ssa/pr19476-3.C
 ===
 --- testsuite/g++.dg/tree-ssa/pr19476-3.C   (revision 0)
 +++ testsuite/g++.dg/tree-ssa/pr19476-3.C   (revision 0)
 @@ -0,0 +1,11 @@
 +/* { dg-do compile } */
 +/* { dg-options -O3 -fcheck-new -fdump-tree-optimized } */
 +
 +#include new
 +
 +int g(){
 +  return 42 + (0 == new int);
 +}
 +
 +/* { dg-final { scan-tree-dump-not return 42 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */

 Property changes on: testsuite/g++.dg/tree-ssa/pr19476-3.C
 ___
 Added: svn:keywords
+ Author Date Id Revision URL
 Added: svn:eol-style
+ native

 Index: fold-const.c
 ===
 --- fold-const.c(revision 202351)
 +++ fold-const.c(working copy)
 @@ -16171,21 +16171,31 @@ tree_expr_nonzero_warnv_p (tree t, bool
  case MODIFY_EXPR:
  case BIND_EXPR:
return tree_expr_nonzero_warnv_p (TREE_OPERAND (t, 1),
 strict_overflow_p);

  case SAVE_EXPR:
return tree_expr_nonzero_warnv_p (TREE_OPERAND (t, 0),
 strict_overflow_p);

  case CALL_EXPR:
 -  return alloca_call_p (t);
 +  {
 +   tree fn = CALL_EXPR_FN (t);
 +   if (TREE_CODE (fn) != ADDR_EXPR) return false;
 +   tree fndecl = TREE_OPERAND (fn, 0);
 +   if (TREE_CODE (fndecl) != FUNCTION_DECL) return false;
 +   if (!flag_check_new
 +DECL_IS_OPERATOR_NEW (fndecl)
 +!TREE_NOTHROW (fndecl))
 + return true;
 +   return alloca_call_p (t);
 +  }

  default:
break;
  }
return false;
  }

  /* Return true when T is an address and is known to be nonzero.
 Handle warnings about undefined signed overflow.  */

 Index: tree-vrp.c
 ===
 --- tree-vrp.c  (revision 202351)
 +++ tree-vrp.c  (working copy)
 @@ -1047,21 +1047,29 @@ gimple_assign_nonzero_warnv_p (gimple st
 *STRICT_OVERFLOW_P.*/

  static bool
  gimple_stmt_nonzero_warnv_p (gimple stmt, bool *strict_overflow_p)
  {

Re: [RS6000] powerpc64 -mcmodel=medium large symbol offsets

2013-09-09 Thread Alan Modra
Revised patch with testcase.  This one also fixes a small problem with
reg_or_add_cint_operand in that any 32-bit value is valid for SImode.
Compare with reg_or_sub_cint_operand.

Bootstrapped and regression tested powerpc64-linux.  OK to apply?

gcc/
* config/rs6000/predicates.md (add_cint_operand): New.
(reg_or_add_cint_operand): Use add_cint_operand.
* config/rs6000/rs6000.md (largetoc_high_plus): Restrict offset
using add_cint_operand.
(largetoc_high_plus_aix, small_toc_ref): Likewise.
gcc/testsuite/
* gcc.target/powerpc/medium_offset.c: New.

Index: gcc/config/rs6000/predicates.md
===
--- gcc/config/rs6000/predicates.md (revision 202351)
+++ gcc/config/rs6000/predicates.md (working copy)
@@ -376,12 +376,18 @@
   (ior (match_code const_int)
(match_operand 0 gpc_reg_operand)))
 
+;; Return 1 if op is a constant integer valid for addition with addis, addi.
+(define_predicate add_cint_operand
+  (and (match_code const_int)
+   (match_test (unsigned HOST_WIDE_INT)
+ (INTVAL (op) + (mode == SImode ? 0x8000 : 0x80008000))
+(unsigned HOST_WIDE_INT) 0x1ll)))
+
 ;; Return 1 if op is a constant integer valid for addition
 ;; or non-special register.
 (define_predicate reg_or_add_cint_operand
   (if_then_else (match_code const_int)
-(match_test (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x80008000)
- (unsigned HOST_WIDE_INT) 0x1ll)
+(match_operand 0 add_cint_operand)
 (match_operand 0 gpc_reg_operand)))
 
 ;; Return 1 if op is a constant integer valid for subtraction
@@ -1697,7 +1703,7 @@
 (define_predicate small_toc_ref
   (match_code unspec,plus)
 {
-  if (GET_CODE (op) == PLUS  CONST_INT_P (XEXP (op, 1)))
+  if (GET_CODE (op) == PLUS  add_cint_operand (XEXP (op, 1), mode))
 op = XEXP (op, 0);
 
   return GET_CODE (op) == UNSPEC  XINT (op, 1) == UNSPEC_TOCREL;
Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 202351)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -12207,7 +12209,7 @@
(unspec [(match_operand:DI 1  )
 (match_operand:DI 2 gpc_reg_operand b)]
UNSPEC_TOCREL)
-   (match_operand 3 const_int_operand n]
+   (match_operand:DI 3 add_cint_operand n]
TARGET_ELF  TARGET_CMODEL != CMODEL_SMALL
addis %0,%2,%1+%3@toc@ha)
 
@@ -12218,7 +12220,7 @@
(unspec [(match_operand:P 1  )
 (match_operand:P 2 gpc_reg_operand b)]
UNSPEC_TOCREL)
-   (match_operand 3 const_int_operand n]
+   (match_operand:P 3 add_cint_operand n]
TARGET_XCOFF  TARGET_CMODEL != CMODEL_SMALL
addis %0,%1+%3@u(%2))
 
Index: gcc/testsuite/gcc.target/powerpc/medium_offset.c
===
--- gcc/testsuite/gcc.target/powerpc/medium_offset.c(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/medium_offset.c(revision 0)
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options -O } */
+/* { dg-final { scan-assembler-not \\+4611686018427387904 } } */
+
+static int x;
+
+unsigned long
+foo (void)
+{
+  return ((unsigned long) x) - 0xc000;
+}

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Michael V. Zolotukhin
 I think it is worthwhile, various targets have many different ways to expand
 memcpy, admittedly i?86/x86_64 probably the biggest number of these, and
 while right now you've encountered it on ia32 with certain options doesn't
 mean that in a few years it couldn't hit some unrelated target, arm, sh,
 sparc, whatever.
Ok, that makes sense.
I've never written torture tests before, could you please check if I did it
right?

Changelog:
gcc:
2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

* config/i386/i386.c (ix86_expand_movmem): Fix epilogue generation.

gcc/testsuite:
2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

* gcc.c-torture/execute/memcpy-3.c: New test.
* gcc.c-torture/execute/memcpy-3.x: New file.


Michael
   Jakub
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a8d70bc..50e9fa9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23329,7 +23329,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, 
rtx align_exp,
 
   if (count_exp != const0_rtx  epilogue_size_needed  1)
 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
-   size_needed);
+   epilogue_size_needed);
   if (jump_around_label)
 emit_label (jump_around_label);
   return true;
diff --git a/gcc/testsuite/gcc.c-torture/execute/memcpy-3.c 
b/gcc/testsuite/gcc.c-torture/execute/memcpy-3.c
new file mode 100644
index 000..f1ceb88
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/memcpy-3.c
@@ -0,0 +1,18 @@
+static void __attribute__((noinline, noclone))
+my_memcpy (char *dest, const char *src, int n)
+{
+  __builtin_memcpy (dest, src, n);
+}
+
+int
+main (void)
+{
+  char a1[4], a2[4];
+  __builtin_memset (a1, 'a', 4);
+  __builtin_memset (a2, 'b', 4);
+  my_memcpy (a2, a1, 4);
+  if (a2[0] != 'a')
+__builtin_abort ();
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/execute/memcpy-3.x 
b/gcc/testsuite/gcc.c-torture/execute/memcpy-3.x
new file mode 100644
index 000..4458269
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/memcpy-3.x
@@ -0,0 +1,7 @@
+if { [istarget i?86-*-*] } {
+  set additional_flags -mtune=pentiumpro -minline-all-stringops
+}
+if { [istarget x86_64-*-*] } {
+  set additional_flags -minline-all-stringops
+}
+return 0


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Jakub Jelinek
On Mon, Sep 09, 2013 at 01:09:12PM +0400, Michael V. Zolotukhin wrote:
 I've never written torture tests before, could you please check if I did it
 right?

Please don't introduce new *.x files, for tests where you need something
like that just stick it into gcc.dg/torture/ instead and use normal dg stuff
in there.

So:
/* { dg-do run } */
/* { dg-additional-options -march=pentiumpro { target ia32 } } */ 
   
/* { dg-additional-options -minline-all-stringops { target { i?86-*-* 
x86_64-*-* } } } */
in gcc.dg/torture/memcpy-1.c would do it.

Jakub


Re: operator new returns nonzero

2013-09-09 Thread Marc Glisse

On Mon, 9 Sep 2013, Richard Biener wrote:


On Sat, Sep 7, 2013 at 11:00 PM, Marc Glisse marc.gli...@inria.fr wrote:

On Sat, 7 Sep 2013, Mike Stump wrote:


On Sep 7, 2013, at 12:27 PM, Marc Glisse marc.gli...@inria.fr wrote:


Now flag_check_new should probably disable this optimization…



Yes, this why my point.



Ok, here it is (again, no proper testing until bootstrap is fixed)


I wonder what happens on targets where 0 is a valid address of an object
(stated by !flag_delete_null_pointer_checks)?


I am not at all familiar with those targets (I thought you had to write
asm to access 0 so the compiler doesn't mess with your code), but it
makes sense to me to test (flag_delete_null_pointer_checks 
!flag_check_new) instead of just !flag_check_new. Consider the patch
changed this way. (we have so many options, I wouldn't be surprised if
there is yet another one to check...)

--
Marc Glisse


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Michael V. Zolotukhin
 Please don't introduce new *.x files, for tests where you need something
 like that just stick it into gcc.dg/torture/ instead and use normal dg stuff
 in there.
Thanks, fixed.  Ok to commit?

Changelog:
gcc:
2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

* config/i386/i386.c (ix86_expand_movmem): Fix epilogue generation.

gcc/testsuite:
2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

* gcc.dg/torture/memcpy-1.c: New test.


Michael
   Jakub
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a8d70bc..50e9fa9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23329,7 +23329,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, 
rtx align_exp,
 
   if (count_exp != const0_rtx  epilogue_size_needed  1)
 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
-   size_needed);
+   epilogue_size_needed);
   if (jump_around_label)
 emit_label (jump_around_label);
   return true;
diff --git a/gcc/testsuite/gcc.dg/torture/memcpy-1.c 
b/gcc/testsuite/gcc.dg/torture/memcpy-1.c
new file mode 100644
index 000..f8e8974
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/memcpy-1.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-additional-options -march=pentiumpro { target ia32 } } */
+/* { dg-additional-options -minline-all-stringops { target { i?86-*-* 
x86_64-*-* } } } */
+
+static void __attribute__((noinline, noclone))
+my_memcpy (char *dest, const char *src, int n)
+{
+  __builtin_memcpy (dest, src, n);
+}
+
+int
+main (void)
+{
+  char a1[4], a2[4];
+  __builtin_memset (a1, 'a', 4);
+  __builtin_memset (a2, 'b', 4);
+  my_memcpy (a2, a1, 4);
+  if (a2[0] != 'a')
+__builtin_abort ();
+  return 0;
+}
+


[committed] Use gen_int_mode instead of GEN_INT+trunc_int_for_mode

2013-09-09 Thread Richard Sandiford
Bootstrapped  regression-tested on x86_64-linxu-gnu.  Applied as obvious.

Thanks,
Richard


gcc/
* asan.c (asan_shadow_cst): Use gen_int_mode.

Index: gcc/asan.c
===
--- gcc/asan.c  2013-09-08 16:54:50.485108772 +0100
+++ gcc/asan.c  2013-09-08 16:59:51.380477968 +0100
@@ -869,7 +869,7 @@ asan_shadow_cst (unsigned char shadow_by
   for (i = 0; i  4; i++)
 val |= (unsigned HOST_WIDE_INT) shadow_bytes[BYTES_BIG_ENDIAN ? 3 - i : i]
(BITS_PER_UNIT * i);
-  return GEN_INT (trunc_int_for_mode (val, SImode));
+  return gen_int_mode (val, SImode);
 }
 
 /* Clear shadow memory at SHADOW_MEM, LEN bytes.  Can't call a library call 
here


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Uros Bizjak
On Mon, Sep 9, 2013 at 11:23 AM, Michael V. Zolotukhin
michael.v.zolotuk...@gmail.com wrote:
 Please don't introduce new *.x files, for tests where you need something
 like that just stick it into gcc.dg/torture/ instead and use normal dg stuff
 in there.
 Thanks, fixed.  Ok to commit?

No -march in runtime tests, please.

Uros.


Re: PR bootstrap/58340

2013-09-09 Thread Gerald Pfeifer

On Sun, 8 Sep 2013, Jeff Law wrote:
This fixes the problem noted by Zhendong in c#4.  I'll be working 
through other reports of issues related to the tree-ssa-threadedge.c

to see if there's any lingering problems.


It also fixes my original report, PR bootstrap/58340 (on 
x86_64-unknown-freebsd).


Thanks,
Gerald


Re: [ping][PATCH][1 of 2] Add value range info to SSA_NAME for zero sign extension elimination in RTL

2013-09-09 Thread Richard Biener
On Mon, Sep 9, 2013 at 1:09 AM, Kugan kugan.vivekanandara...@linaro.org wrote:

 On 06/09/13 16:16, Richard Biener wrote:

 On 9/3/13 2:15 PM, Kugan wrote:

 Thanks Richard for reviewing.

 On 02/09/13 22:15, Richard Biener wrote:

 On Wed, Jul 3, 2013 at 2:25 PM, Kugan
 kugan.vivekanandara...@linaro.org wrote:

 On 17/06/13 18:33, Richard Biener wrote:


 On Mon, 17 Jun 2013, Kugan wrote:
 +/* Extract the value range of assigned exprassion for GIMPLE_ASSIGN
 stmt.
 +   If the extracted value range is valid, return true else return
 +   false.  */
 +static bool


 [snip]



  for (i = 0; i  num_vr_values; i++)
if (vr_value[i])
  {
tree name = ssa_name (i);
if (POINTER_TYPE_P (name))
  continue;
if (vr_value[i].type == VR_RANGE
|| vr_value[i].type == VR_ANTI_RANGE)
  tree_ssa_set_value_range (name, tree_to_double_int
 (vr_value[i].min), tree_to_double_int (vr_value[i].max),
 vr_value[i].type
 == VR_RANGE);
  }


 Thanks Richard for taking time to review it.

 I was doing something like what you are suggesting earlier but
 noticed some
 problems and that’s the reason why I changed.

 For example, for the following testcase from the test suite,

 unsigned long l = (unsigned long)-2;
 unsigned short s;

 int main () {
 long t = l + 1;
 s = l;
 if (s != (unsigned short) -2)
   abort ();
 exit (0);
 }

 with the following gimple stmts

 main ()
 {
 short unsigned int s.1;
 long unsigned int l.0;

 ;;   basic block 2, loop depth 0
 ;;pred:   ENTRY
 l.0_2 = l;
 s.1_3 = (short unsigned int) l.0_2;
 s = s.1_3;
 if (s.1_3 != 65534)
   goto bb 3;
 else
   goto bb 4;
 ;;succ:   3
 ;;4

 ;;   basic block 3, loop depth 0
 ;;pred:   2
 abort ();
 ;;succ:

 ;;   basic block 4, loop depth 0
 ;;pred:   2
 exit (0);
 ;;succ:

 }



 has the following value range.

 l.0_2: VARYING
 s.1_3: [0, +INF]


   From zero/sign extension point of view, the variable s.1_3 is
 expected to
 have a value that will overflow (or varying) as this is what is
 assigned to
 a smaller variable. extract_range_from_assignment initially
 calculates the
 value range as VARYING but later changed to [0, +INF] by
 extract_range_basic. What I need here is the value that will be
 assigned
 from the rhs expression and not the value that we will have with proper
 assignment.


 I don't understand this.  The relevant statement is

 s.1_3 = (short unsigned int) l.0_2;

 right?  You have value-ranges for both s.1_3 and l.0_2 as above.  And
 you clearly cannot optimize the truncation away (and if you could,
 you wond't need value-range information for that fact).

 This is true. But just by looking at the value range of s.1.3 we will
 only see [0 +INF], as we are transferring directly from the lattice to
 lhs its value range.

 [0, +INF] here tells us  vrp_val_is_max and it is not
 is_positive_overflow_infinity (or varying). Thats why we need to get the
 value range of RHS expression which will tell us the actual range. We
 can then use this range and see of we can fit it to lhs type without
 truncation.


 Well, my point is you want to look at the l.0_2 value-range for this.
 Storing the l.0_2 value-range for s.1_3 is wrong.


 Yes, tree SSA_NAME should have it's correct value range. But, assigning rhs
 expression's value range is not totally wrong , it is just that it can be
 conservative value range (please correct me if I am wrong here) in few
 cases, as it can have wider range.

If it's a sign-changing conversion it can be surely wrong.

 I can use the rhs value range in the above case. We can also eliminate
 redundant zero/sign extensions for gimple binary and ternary stmts. In this
 case we will have to calculate the value range.  We will have to reuse these
 logic in tree-vrp.

I fail to see the issue given no concrete example.

 Other option is to add another attribute in range_info_t to indicate if
 set_value_range_to_nonnegative is used in value range extraction.

Why should the result of this not be accurately represented in the lattice?

 What is your preferred solution please.

I don't know because I do not understand the problem at hand.

 I understand that the above code of mine needs to be changed but not
 convinced about the best way to do that.

 I can possibly re-factor extract_range_from_assignment to give me this
 information with an additional argument. Could you kindly let me know
 your
 preference.




 /* SSA name annotations.  */

 +  union vrp_info_type {
 +/* Pointer attributes used for alias analysis.  */
 +struct GTY ((tag (TREE_SSA_PTR_INFO))) ptr_info_def *ptr_info;
 +/* Value range attributes used for zero/sign extension
 elimination.
 */

 /* Value range information.  */

 +struct GTY ((tag (TREE_SSA_RANGE_INFO))) range_info_def
 *range_info;
 +  } GTY ((desc (%1.def_stmt  

Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Richard Biener
On Mon, 9 Sep 2013, Paolo Carlini wrote:

 Hi Richard,
 
 On 09/09/2013 09:46 AM, Richard Biener wrote:
  On Sun, 8 Sep 2013, Paolo Carlini wrote:
  
   Hi,
   
   this patchlet fixes the column # of the unused parameter warnings emitted
   by
   do_warn_unused_parameter by explicitly passing DECL_SOURCE_LOCATION (decl)
   instead of wrongly relying on '+', which in this case ends up meaning the
   location of the function declaration. Tested x86_64-linux.
  I would have expected %q+D to use the location of the corresponding
  decl, not some random other location.  So, isn't the bug in the
  C++ frontend diagnostic machinery?
 Well, first notice that the patch fixes the issue *both* for the C and C++
 front-ends, that's why I added the testcase to c-c++-common. This isn't a C++
 issue. Then notice that we do already have tens of cases where we use
 DECL_SOURCE_LOCATION + %qD, when we want to be precise about the location. The
 diagnostic machinery has this mechanism using + which uses location_of, which
 is often useful for expressions, but which very often we don't use for decls.
 In fact, some people, like Manuel, see the audit trail of the bug, find the
 mechanism quite confusing. Is there something specific you want me to check?

How is '+' in %q+D defined?  I failed to find documentation of the
diagnostic formats (but only searched for like two minutes).

Richard.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Richard Biener
On Mon, 9 Sep 2013, Richard Biener wrote:

 On Mon, 9 Sep 2013, Paolo Carlini wrote:
 
  Hi Richard,
  
  On 09/09/2013 09:46 AM, Richard Biener wrote:
   On Sun, 8 Sep 2013, Paolo Carlini wrote:
   
Hi,

this patchlet fixes the column # of the unused parameter warnings 
emitted
by
do_warn_unused_parameter by explicitly passing DECL_SOURCE_LOCATION 
(decl)
instead of wrongly relying on '+', which in this case ends up meaning 
the
location of the function declaration. Tested x86_64-linux.
   I would have expected %q+D to use the location of the corresponding
   decl, not some random other location.  So, isn't the bug in the
   C++ frontend diagnostic machinery?
  Well, first notice that the patch fixes the issue *both* for the C and C++
  front-ends, that's why I added the testcase to c-c++-common. This isn't a 
  C++
  issue. Then notice that we do already have tens of cases where we use
  DECL_SOURCE_LOCATION + %qD, when we want to be precise about the location. 
  The
  diagnostic machinery has this mechanism using + which uses location_of, 
  which
  is often useful for expressions, but which very often we don't use for 
  decls.
  In fact, some people, like Manuel, see the audit trail of the bug, find the
  mechanism quite confusing. Is there something specific you want me to check?
 
 How is '+' in %q+D defined?  I failed to find documentation of the
 diagnostic formats (but only searched for like two minutes).

That said, grepping for %q+D reveals quite some uses and it looks like
all of them expect the location being used to be that of the decl passed
to the diagnostic call, not some random other location.

Richard.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Jakub Jelinek
On Mon, Sep 09, 2013 at 11:37:31AM +0200, Richard Biener wrote:
 this patchlet fixes the column # of the unused parameter warnings 
 emitted
 by
 do_warn_unused_parameter by explicitly passing DECL_SOURCE_LOCATION 
 (decl)
 instead of wrongly relying on '+', which in this case ends up meaning 
 the
 location of the function declaration. Tested x86_64-linux.
I would have expected %q+D to use the location of the corresponding
decl, not some random other location.  So, isn't the bug in the
C++ frontend diagnostic machinery?
   Well, first notice that the patch fixes the issue *both* for the C and C++
   front-ends, that's why I added the testcase to c-c++-common. This isn't a 
   C++
   issue. Then notice that we do already have tens of cases where we use
   DECL_SOURCE_LOCATION + %qD, when we want to be precise about the 
   location. The
   diagnostic machinery has this mechanism using + which uses location_of, 
   which
   is often useful for expressions, but which very often we don't use for 
   decls.
   In fact, some people, like Manuel, see the audit trail of the bug, find 
   the
   mechanism quite confusing. Is there something specific you want me to 
   check?
  
  How is '+' in %q+D defined?  I failed to find documentation of the
  diagnostic formats (but only searched for like two minutes).
 
 That said, grepping for %q+D reveals quite some uses and it looks like
 all of them expect the location being used to be that of the decl passed
 to the diagnostic call, not some random other location.

The C++ FE locus handling is in very bad shape (C FE is much better, Aldy
and others have spent quite some time fixing all the issues).  I guess this
is just one of the many issues.  The most annoying to me is that the C++ FE
for function calls uses the location of the closing ) of the call expression
rather than the function name or at least opening (, so if you have a 
  call_something (one_arg,
  second_arg,
  third_arg,
  fourth_arg,
  fifth_arg);
you really don't see what is being called in the debugger when debugging
C++.

Jakub


Re: RFC: patch to build GCC for arm with LRA

2013-09-09 Thread Yvan Roux
Thanks for noticing it Richard, I made a refactoring mistake and addr
was supposed to be used instead of x.  In fact on AArch64 it occurs
that we don't have stripped rtxes at this step and we have some of the
form below, this if why I added the strip.

(insn 29 27 5 7 (set (mem:SI (plus:DI (sign_extract:DI (mult:DI
(subreg:DI (reg/v:SI 76 [ elt ]) 0)

I'll redo the patch...

Thanks
Yvan

On 9 September 2013 10:58, Richard Sandiford rdsandif...@googlemail.com wrote:
 Richard Sandiford rdsandif...@googlemail.com writes:
 I think SIGN_EXTRACT from the lsb (i.e. when the third operand is 0 for
 !BITS_BIG_ENDIAN or GET_MODE_PRECISION (mode) - 1 for BITS_BIG_ENDIAN)

 Gah, GET_MODE_PRECISION (mode) - size for BITS_BIG_ENDIAN.

 Richard


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Richard Biener
On Mon, 9 Sep 2013, Jakub Jelinek wrote:

 On Mon, Sep 09, 2013 at 11:37:31AM +0200, Richard Biener wrote:
  this patchlet fixes the column # of the unused parameter warnings 
  emitted
  by
  do_warn_unused_parameter by explicitly passing DECL_SOURCE_LOCATION 
  (decl)
  instead of wrongly relying on '+', which in this case ends up 
  meaning the
  location of the function declaration. Tested x86_64-linux.
 I would have expected %q+D to use the location of the corresponding
 decl, not some random other location.  So, isn't the bug in the
 C++ frontend diagnostic machinery?
Well, first notice that the patch fixes the issue *both* for the C and 
C++
front-ends, that's why I added the testcase to c-c++-common. This isn't 
a C++
issue. Then notice that we do already have tens of cases where we use
DECL_SOURCE_LOCATION + %qD, when we want to be precise about the 
location. The
diagnostic machinery has this mechanism using + which uses location_of, 
which
is often useful for expressions, but which very often we don't use for 
decls.
In fact, some people, like Manuel, see the audit trail of the bug, find 
the
mechanism quite confusing. Is there something specific you want me to 
check?
   
   How is '+' in %q+D defined?  I failed to find documentation of the
   diagnostic formats (but only searched for like two minutes).
  
  That said, grepping for %q+D reveals quite some uses and it looks like
  all of them expect the location being used to be that of the decl passed
  to the diagnostic call, not some random other location.
 
 The C++ FE locus handling is in very bad shape (C FE is much better, Aldy
 and others have spent quite some time fixing all the issues).  I guess this
 is just one of the many issues. 

Well, in this case the patch should IMHO be a no-op.

-  warning (OPT_Wunused_parameter, unused parameter %q+D, decl);
+  warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wunused_parameter,
+ unused parameter %qD, decl);

no?  Unless I misunderstand what %q+D should do.

Richard.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Paolo Carlini

Hi,

On 09/09/2013 11:33 AM, Richard Biener wrote:
How is '+' in %q+D defined? I failed to find documentation of the 
diagnostic formats (but only searched for like two minutes). 
You see, this is an issue ;) No seriously, in C++ I understand it by 
looking at error.c:3438: when '+' is there, cp_printer use location_of 
(t) instead of what is passed down in error_at or warning_at, etc. Then 
if you go to location_of, in the same file, you see the reason of the 
difference between DECL_SOURCE_LOCATION + %qD and %q+D:


  if (TREE_CODE (t) == PARM_DECL  DECL_CONTEXT (t))
t = DECL_CONTEXT (t);
  else if ...

thus, in case of PARM_DECLs, t becomes DECL_CONTEXT (t), thus the 
function declaration instead of the parameter declaration!!! See? (I'm 
learning the details with you)


Now, I think we have two options: either we do more or less what Marc  
I  Manuel figured out, or we try to change the definition of 
location_of, which impacts a lot of code...


Paolo.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Jakub Jelinek
On Mon, Sep 09, 2013 at 11:45:08AM +0200, Richard Biener wrote:
 Well, in this case the patch should IMHO be a no-op.
 
 -  warning (OPT_Wunused_parameter, unused parameter %q+D, decl);
 +  warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wunused_parameter,
 +   unused parameter %qD, decl);
 
 no?  Unless I misunderstand what %q+D should do.

The question is how exactly is %q+D defined, if it is
warning_at (location_of (decl), OPT_Wunused_parameter, unused parameter %qD, 
decl); in this case, or 
DECL_SOURCE_LOCATION (decl) instead.

Jakub


[PATCH][Resend][tree-optimization] Fix PR58088

2013-09-09 Thread Kyrylo Tkachov
[Resending, since I was away and not pinging it]


Hi all,

In PR58088 the constant folder goes into an infinite recursion and runs out of
stack space because of two conflicting optimisations:
(X * C1)  C2 plays dirty when nested inside an IOR expression like so: ((X *
C1)  C2) | C4. One can undo the other leading to an infinite recursion.

Thanks to Marek for finding the IOR case.

This patch fixes that by checking in the IOR case that the change to C2 will
not conflict with the AND case transformation. Example testcases in the PR on
bugzilla.

This affects both trunk and 4.8 and regresses and bootstraps cleanly on both.

Bootstrapped on x86_64-linux-gnu and tested arm-none-eabi on qemu.

Ok for trunk and 4.8?

Thanks,
Kyrill

2013-09-09  Kyrylo Tkachov  kyrylo.tkac...@arm.com

PR tree-optimization/58088
* fold-const.c (mask_with_trailing_zeros): New function.
(fold_binary_loc): Make sure we don't recurse infinitely
when the X in (X  C1) | C2 is a tree of the form (Y * K1)  K2.
Use mask_with_trailing_zeros where appropriate.


2013-09-09  Kyrylo Tkachov  kyrylo.tkac...@arm.com

PR tree-optimization/58088
* gcc.c-torture/compile/pr58088.c: New test.

pr58088.patch
Description: Binary data


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Paolo Carlini

Hi,

On 09/09/2013 11:45 AM, Richard Biener wrote:

Well, in this case the patch should IMHO be a no-op.

-  warning (OPT_Wunused_parameter, unused parameter %q+D, decl);
+  warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wunused_parameter,
+ unused parameter %qD, decl);

no?  Unless I misunderstand what %q+D should do.

See my previous email Richard. The situation should be rather clear now.

Paolo.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Richard Biener
On Mon, 9 Sep 2013, Jakub Jelinek wrote:

 On Mon, Sep 09, 2013 at 11:45:08AM +0200, Richard Biener wrote:
  Well, in this case the patch should IMHO be a no-op.
  
  -  warning (OPT_Wunused_parameter, unused parameter %q+D, decl);
  +  warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wunused_parameter,
  + unused parameter %qD, decl);
  
  no?  Unless I misunderstand what %q+D should do.
 
 The question is how exactly is %q+D defined, if it is
 warning_at (location_of (decl), OPT_Wunused_parameter, unused parameter 
 %qD, decl); in this case, or 
 DECL_SOURCE_LOCATION (decl) instead.

It can't be 'location_of' because that's a C++ FE speciality but
warning_at and %q+D are diagnostic machinery level.

Unless of course the meaning of %q+D depends on the frontend which
would make its use from the middle-end ill-defined.

Richard.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Paolo Carlini

On 09/09/2013 12:04 PM, Richard Biener wrote:

On Mon, 9 Sep 2013, Jakub Jelinek wrote:


On Mon, Sep 09, 2013 at 11:45:08AM +0200, Richard Biener wrote:

Well, in this case the patch should IMHO be a no-op.

-  warning (OPT_Wunused_parameter, unused parameter %q+D, decl);
+  warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wunused_parameter,
+ unused parameter %qD, decl);

no?  Unless I misunderstand what %q+D should do.

The question is how exactly is %q+D defined, if it is
warning_at (location_of (decl), OPT_Wunused_parameter, unused parameter %qD, 
decl); in this case, or
DECL_SOURCE_LOCATION (decl) instead.

It can't be 'location_of' because that's a C++ FE speciality but
warning_at and %q+D are diagnostic machinery level.
Everything happens via call backs. Thus from the generic diagnostic 
machinery, you go to cp_printer for C++, thus location_of for C++. In C 
is different, but again there is, evidently, a mechanism which uses 
DECL_CONTEXT for PARM_DECLs which leads to an inaccurate location when 
we *really* want the location of the parameter (exactly as I explained 
for C++).


Paolo.


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Michael V. Zolotukhin
 No -march in runtime tests, please.
Is mtune ok here?

Michael
 Uros.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Paolo Carlini

On 09/09/2013 12:08 PM, Paolo Carlini wrote:
Everything happens via call backs. Thus from the generic diagnostic 
machinery, you go to cp_printer for C++, thus location_of for C++. In 
C is different, but again there is, evidently, a mechanism which uses 
DECL_CONTEXT for PARM_DECLs which leads to an inaccurate location when 
we *really* want the location of the parameter (exactly as I explained 
for C++).
See line # 615 of pretty-print.c, that call leads you back to the 
front-end, thus cp_printer etc, for C++


Paolo.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Richard Biener
On Mon, 9 Sep 2013, Paolo Carlini wrote:

 On 09/09/2013 12:04 PM, Richard Biener wrote:
  On Mon, 9 Sep 2013, Jakub Jelinek wrote:
  
   On Mon, Sep 09, 2013 at 11:45:08AM +0200, Richard Biener wrote:
Well, in this case the patch should IMHO be a no-op.

-  warning (OPT_Wunused_parameter, unused parameter %q+D, decl);
+  warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wunused_parameter,
+ unused parameter %qD, decl);

no?  Unless I misunderstand what %q+D should do.
   The question is how exactly is %q+D defined, if it is
   warning_at (location_of (decl), OPT_Wunused_parameter, unused parameter
   %qD, decl); in this case, or
   DECL_SOURCE_LOCATION (decl) instead.
  It can't be 'location_of' because that's a C++ FE speciality but
  warning_at and %q+D are diagnostic machinery level.
 Everything happens via call backs. Thus from the generic diagnostic machinery,
 you go to cp_printer for C++, thus location_of for C++. In C is different, but
 again there is, evidently, a mechanism which uses DECL_CONTEXT for PARM_DECLs
 which leads to an inaccurate location when we *really* want the location of
 the parameter (exactly as I explained for C++).

I understand that.  But I question it.  Why would that ever be useful?
Can't the places that want that simply use warning/error_at with the
proper location?

Richard.


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Uros Bizjak
On Mon, Sep 9, 2013 at 12:12 PM, Michael V. Zolotukhin
michael.v.zolotuk...@gmail.com wrote:
 No -march in runtime tests, please.
 Is mtune ok here?

Yes.

Uros.


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Jakub Jelinek
On Mon, Sep 09, 2013 at 02:12:35PM +0400, Michael V. Zolotukhin wrote:
  No -march in runtime tests, please.
 Is mtune ok here?

Please verify that with the s/-march/-mtune/ change without the i386.c fix the 
testcase
will fail with
make check-gcc RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} 
dg-torture.exp=pr58*.c'
and with the i386.c fix will succeed (that is pretty much a general rule for
most of the testcases for some PR being added into the testsuite).

Jakub


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Uros Bizjak
On Mon, Sep 9, 2013 at 12:17 PM, Jakub Jelinek ja...@redhat.com wrote:
 On Mon, Sep 09, 2013 at 02:12:35PM +0400, Michael V. Zolotukhin wrote:
  No -march in runtime tests, please.
 Is mtune ok here?

 Please verify that with the s/-march/-mtune/ change without the i386.c fix 
 the testcase

It will fail, I have checked it.

Uros.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Paolo Carlini

Hi,

On 09/09/2013 12:13 PM, Richard Biener wrote:

Everything happens via call backs. Thus from the generic diagnostic machinery,
you go to cp_printer for C++, thus location_of for C++. In C is different, but
again there is, evidently, a mechanism which uses DECL_CONTEXT for PARM_DECLs
which leads to an inaccurate location when we *really* want the location of
the parameter (exactly as I explained for C++).

I understand that.  But I question it.  Why would that ever be useful?
Can't the places that want that simply use warning/error_at with the
proper location?
Indeed, this is *exactly*, if I understand him correctly, what Manuel 
says: looking forward he thinks we should not have this magic relying on 
'+' and things happening behind the scenes, simply explicit locations 
and warning_at/error_at.


For the time being, however, we are stuck with the situation that 
relying on '+' in this case (like in *many* existing others) doesn't 
work, because, in C++, location_of does t = DECL_CONTEXT (t) which is 
almost right but not really, we want the location of the PARM_DECL. The 
C front-end does something very similar (as the inaccurate location shows).


What do you think?

Paolo.


[1/4] Using gen_int_mode instead of GEN_INT

2013-09-09 Thread Richard Sandiford
These four patches are the result of going through gcc/* looking for
places where there was an obvious mode associated with a GEN_INT and
where gen_int_mode could therefore be used instead.  Some of the GEN_INTs
did create noncanoical rtl before, and were trapped by Kenny's assert on
the wide-int branch.  However, most of the affected places will be benign.
I'm therefore trying to sell this as a cleanup and don't have any testcases.

I've split the patches up based on the reason why the mode is obvious.
In this first patch, the GEN_INTs are all used in calls to gen_rtx_*
and are all cases where the operand must have the same mode as the result.
(This excludes things like shift count operands, since they can have a
different mode from the shifted operand and the result.)

Tested on x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
* alias.c (addr_side_effect_eval): Use gen_int_mode with the mode
of the associated gen_rtx_* call.
* caller-save.c (init_caller_save): Likewise.
* combine.c (find_split_point, make_extraction): Likewise.
(make_compound_operation): Likewise.
* dwarf2out.c (mem_loc_descriptor): Likewise.
* explow.c (plus_constant, probe_stack_range): Likewise.
* expmed.c (expand_mult_const): Likewise.
* expr.c (emit_single_push_insn_1, do_tablejump): Likewise.
* reload1.c (init_reload): Likewise.
* valtrack.c (cleanup_auto_inc_dec): Likewise.
* var-tracking.c (adjust_mems): Likewise.
* modulo-sched.c (sms_schedule): Likewise, but use gen_rtx_GT
rather than gen_rtx_fmt_ee.

Index: gcc/alias.c
===
--- gcc/alias.c 2013-09-08 17:18:39.842591328 +0100
+++ gcc/alias.c 2013-09-09 10:49:45.213460712 +0100
@@ -1891,15 +1891,15 @@ addr_side_effect_eval (rtx addr, int siz
 
 default:
   return addr;
 }
 
   if (offset)
 addr = gen_rtx_PLUS (GET_MODE (addr), XEXP (addr, 0),
-GEN_INT (offset));
+gen_int_mode (offset, GET_MODE (addr)));
   else
 addr = XEXP (addr, 0);
   addr = canon_rtx (addr);
 
   return addr;
 }
 
Index: gcc/caller-save.c
===
--- gcc/caller-save.c   2013-09-08 17:18:39.842591328 +0100
+++ gcc/caller-save.c   2013-09-09 10:49:45.213460712 +0100
@@ -235,15 +235,15 @@ init_caller_save (void)
 
   gcc_assert (i  FIRST_PSEUDO_REGISTER);
 
   addr_reg = gen_rtx_REG (Pmode, i);
 
   for (offset = 1  (HOST_BITS_PER_INT / 2); offset; offset = 1)
 {
-  address = gen_rtx_PLUS (Pmode, addr_reg, GEN_INT (offset));
+  address = gen_rtx_PLUS (Pmode, addr_reg, gen_int_mode (offset, Pmode));
 
   for (i = 0; i  FIRST_PSEUDO_REGISTER; i++)
if (regno_save_mode[i][1] != VOIDmode
   ! strict_memory_address_p (regno_save_mode[i][1], address))
  break;
 
   if (i == FIRST_PSEUDO_REGISTER)
Index: gcc/combine.c
===
--- gcc/combine.c   2013-09-08 17:18:39.842591328 +0100
+++ gcc/combine.c   2013-09-09 10:49:45.218460753 +0100
@@ -4721,21 +4721,22 @@ find_split_point (rtx *loc, rtx insn, bo
 AND or two shifts.  Use two shifts for field sizes where the
 constant might be too large.  We assume here that we can
 always at least get 8-bit constants in an AND insn, which is
 true for every current RISC.  */
 
  if (unsignedp  len = 8)
{
+ unsigned HOST_WIDE_INT mask
+   = ((unsigned HOST_WIDE_INT) 1  len) - 1;
  SUBST (SET_SRC (x),
 gen_rtx_AND (mode,
  gen_rtx_LSHIFTRT
  (mode, gen_lowpart (mode, inner),
   GEN_INT (pos)),
- GEN_INT (((unsigned HOST_WIDE_INT) 1  len)
-  - 1)));
+ gen_int_mode (mask, mode)));
 
  split = find_split_point (SET_SRC (x), insn, true);
  if (split  split != SET_SRC (x))
return split;
}
  else
{
@@ -4810,17 +4811,19 @@ find_split_point (rtx *loc, rtx insn, bo
   if (set_src  code == MINUS  GET_CODE (XEXP (x, 1)) == MULT
   GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
   exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))  0)
{
  enum machine_mode mode = GET_MODE (x);
  unsigned HOST_WIDE_INT this_int = INTVAL (XEXP (XEXP (x, 1), 1));
  HOST_WIDE_INT other_int = trunc_int_for_mode (-this_int, mode);
- SUBST (*loc, gen_rtx_PLUS (mode, gen_rtx_MULT (mode,
-XEXP (XEXP (x, 1), 0),
-GEN_INT (other_int)),
+ SUBST (*loc, 

Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Michael V. Zolotukhin
  Is mtune ok here?
 Yes.
Thanks, fixed.  Ok to commit?
I verified that the test fails without fix in i386.c and passes with it.

Changelog:
gcc:
2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

* config/i386/i386.c (ix86_expand_movmem): Fix epilogue generation.

gcc/testsuite:
2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

* gcc.dg/torture/memcpy-1.c: New test.

Michael
 Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a8d70bc..50e9fa9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23329,7 +23329,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, 
rtx align_exp,
 
   if (count_exp != const0_rtx  epilogue_size_needed  1)
 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
-   size_needed);
+   epilogue_size_needed);
   if (jump_around_label)
 emit_label (jump_around_label);
   return true;
diff --git a/gcc/testsuite/gcc.dg/torture/memcpy-1.c 
b/gcc/testsuite/gcc.dg/torture/memcpy-1.c
new file mode 100644
index 000..290c789
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/memcpy-1.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-additional-options -mtune=pentiumpro { target ia32 } } */
+/* { dg-additional-options -minline-all-stringops { target { i?86-*-* 
x86_64-*-* } } } */
+
+static void __attribute__((noinline, noclone))
+my_memcpy (char *dest, const char *src, int n)
+{
+  __builtin_memcpy (dest, src, n);
+}
+
+int
+main (void)
+{
+  char a1[4], a2[4];
+  __builtin_memset (a1, 'a', 4);
+  __builtin_memset (a2, 'b', 4);
+  my_memcpy (a2, a1, 4);
+  if (a2[0] != 'a')
+__builtin_abort ();
+  return 0;
+}
+


[2/4] Using gen_int_mode instead of GEN_INT

2013-09-09 Thread Richard Sandiford
Similar to patch 1, but here the calls are all to expand_* routines.

Tested in the same way as before.  OK to install?

Thanks,
Richard


gcc/
* asan.c (asan_clear_shadow): Use gen_int_mode with the mode
of the associated expand_* call.
(asan_emit_stack_protection): Likewise.
* builtins.c (round_trampoline_addr): Likewise.
* explow.c (allocate_dynamic_stack_space, probe_stack_range): Likewise.
* expmed.c (expand_smod_pow2, expand_sdiv_pow2, expand_divmod)
(emit_store_flag): Likewise.
* expr.c (emit_move_resolve_push, push_block, emit_single_push_insn_1)
(emit_push_insn, optimize_bitfield_assignment_op, expand_expr_real_1):
Likewise.
* function.c (instantiate_virtual_regs_in_insn): Likewise.
* ifcvt.c (noce_try_store_flag_constants): Likewise.
* loop-unroll.c (unroll_loop_runtime_iterations): Likewise.
* modulo-sched.c (generate_prolog_epilog): Likewise.
* optabs.c (expand_binop, widen_leading, expand_doubleword_clz)
(expand_ctz, expand_ffs, expand_unop): Likewise.

Index: gcc/asan.c
===
--- gcc/asan.c  2013-09-09 10:23:49.095277703 +0100
+++ gcc/asan.c  2013-09-09 10:55:59.743537289 +0100
@@ -897,15 +897,15 @@ asan_clear_shadow (rtx shadow_mem, HOST_
   top_label = gen_label_rtx ();
   addr = force_reg (Pmode, XEXP (shadow_mem, 0));
   shadow_mem = adjust_automodify_address (shadow_mem, SImode, addr, 0);
   end = force_reg (Pmode, plus_constant (Pmode, addr, len));
   emit_label (top_label);
 
   emit_move_insn (shadow_mem, const0_rtx);
-  tmp = expand_simple_binop (Pmode, PLUS, addr, GEN_INT (4), addr,
+  tmp = expand_simple_binop (Pmode, PLUS, addr, gen_int_mode (4, Pmode), addr,
  true, OPTAB_LIB_WIDEN);
   if (tmp != addr)
 emit_move_insn (addr, tmp);
   emit_cmp_and_jump_insns (addr, end, LT, NULL_RTX, Pmode, true, top_label);
   jump = get_last_insn ();
   gcc_assert (JUMP_P (jump));
   add_reg_note (jump, REG_BR_PROB, GEN_INT (REG_BR_PROB_BASE * 80 / 100));
@@ -962,25 +962,27 @@ asan_emit_stack_protection (rtx base, HO
   else
pp_string (asan_pp, 9 unknown);
   pp_space (asan_pp);
 }
   str_cst = asan_pp_string (asan_pp);
 
   /* Emit the prologue sequence.  */
-  base = expand_binop (Pmode, add_optab, base, GEN_INT (base_offset),
+  base = expand_binop (Pmode, add_optab, base,
+  gen_int_mode (base_offset, Pmode),
   NULL_RTX, 1, OPTAB_DIRECT);
   mem = gen_rtx_MEM (ptr_mode, base);
   emit_move_insn (mem, GEN_INT (ASAN_STACK_FRAME_MAGIC));
   mem = adjust_address (mem, VOIDmode, GET_MODE_SIZE (ptr_mode));
   emit_move_insn (mem, expand_normal (str_cst));
   shadow_base = expand_binop (Pmode, lshr_optab, base,
  GEN_INT (ASAN_SHADOW_SHIFT),
  NULL_RTX, 1, OPTAB_DIRECT);
   shadow_base = expand_binop (Pmode, add_optab, shadow_base,
- GEN_INT (targetm.asan_shadow_offset ()),
+ gen_int_mode (targetm.asan_shadow_offset (),
+   Pmode),
  NULL_RTX, 1, OPTAB_DIRECT);
   gcc_assert (asan_shadow_set != -1
   (ASAN_RED_ZONE_SIZE  ASAN_SHADOW_SHIFT) == 4);
   shadow_mem = gen_rtx_MEM (SImode, shadow_base);
   set_mem_alias_set (shadow_mem, asan_shadow_set);
   prev_offset = base_offset;
   for (l = length; l; l -= 2)
Index: gcc/builtins.c
===
--- gcc/builtins.c  2013-09-08 17:18:39.707590244 +0100
+++ gcc/builtins.c  2013-09-09 10:55:59.747537322 +0100
@@ -4858,16 +4858,16 @@ round_trampoline_addr (rtx tramp)
   /* If we don't need too much alignment, we'll have been guaranteed
  proper alignment by get_trampoline_type.  */
   if (TRAMPOLINE_ALIGNMENT = STACK_BOUNDARY)
 return tramp;
 
   /* Round address up to desired boundary.  */
   temp = gen_reg_rtx (Pmode);
-  addend = GEN_INT (TRAMPOLINE_ALIGNMENT / BITS_PER_UNIT - 1);
-  mask = GEN_INT (-TRAMPOLINE_ALIGNMENT / BITS_PER_UNIT);
+  addend = gen_int_mode (TRAMPOLINE_ALIGNMENT / BITS_PER_UNIT - 1, Pmode);
+  mask = gen_int_mode (-TRAMPOLINE_ALIGNMENT / BITS_PER_UNIT, Pmode);
 
   temp  = expand_simple_binop (Pmode, PLUS, tramp, addend,
   temp, 0, OPTAB_LIB_WIDEN);
   tramp = expand_simple_binop (Pmode, AND, temp, mask,
   temp, 0, OPTAB_LIB_WIDEN);
 
   return tramp;
Index: gcc/explow.c
===
--- gcc/explow.c2013-09-09 10:49:45.226460819 +0100
+++ gcc/explow.c2013-09-09 10:55:59.748537330 +0100
@@ -1351,15 +1351,16 @@ allocate_dynamic_stack_space (rtx size,
 by malloc does not meet REQUIRED_ALIGN, we increase SIZE to
 make sure we allocate enough space.  */

[3/4] Using gen_int_mode instead of GEN_INT

2013-09-09 Thread Richard Sandiford
Similar to patch 1, but here the calls are all to simplify_* routines.
In the 7414 combine.c hunk, the code is PLUS, AND, IOR or XOR.
In the hunk after that, the cval |= ... stuff is effectively doing
a trunc_int_for_mode by hand.

Tested in the same way as before.  OK to install?

Thanks,
Richard


gcc/
* combine.c (simplify_set, expand_field_assignment, extract_left_shift)
(force_to_mode, simplify_shift_const_1, simplify_comparison):
Use gen_int_mode with the mode of the associated simplify_* call.
* explow.c (probe_stack_range, anti_adjust_stack_and_probe): Likewise.
* expmed.c (expand_shift_1): Likewise.
* function.c (instantiate_virtual_regs_in_insn): Likewise.
* loop-iv.c (iv_number_of_iterations): Likewise.
* loop-unroll.c (unroll_loop_runtime_iterations): Likewise.
* simplify-rtx.c (simplify_binary_operation_1): Likewise.

Index: gcc/combine.c
===
--- gcc/combine.c   2013-09-09 10:49:45.218460753 +0100
+++ gcc/combine.c   2013-09-09 11:11:13.699055109 +0100
@@ -6370,16 +6370,17 @@ simplify_set (rtx x)
 
  if ((recog_for_combine (pat, other_insn, note)  0
! check_asm_operands (pat)))
{
  *cc_use = old_cc_use;
  other_changed = 0;
 
- op0 = simplify_gen_binary (XOR, GET_MODE (op0),
-op0, GEN_INT (mask));
+ op0 = simplify_gen_binary (XOR, GET_MODE (op0), op0,
+gen_int_mode (mask,
+  GET_MODE (op0)));
}
}
}
 
   if (other_changed)
undobuf.other_insn = other_insn;
 
@@ -6893,19 +6894,21 @@ expand_field_assignment (const_rtx x)
  else if (GET_CODE (pos) == MINUS
CONST_INT_P (XEXP (pos, 1))
(INTVAL (XEXP (pos, 1))
   == GET_MODE_PRECISION (GET_MODE (inner)) - len))
/* If position is ADJUST - X, new position is X.  */
pos = XEXP (pos, 0);
  else
-   pos = simplify_gen_binary (MINUS, GET_MODE (pos),
-  GEN_INT (GET_MODE_PRECISION (
-   GET_MODE (inner))
-   - len),
-  pos);
+   {
+ HOST_WIDE_INT prec = GET_MODE_PRECISION (GET_MODE (inner));
+ pos = simplify_gen_binary (MINUS, GET_MODE (pos),
+gen_int_mode (prec - len,
+  GET_MODE (pos)),
+pos);
+   }
}
}
 
   /* A SUBREG between two modes that occupy the same numbers of words
 can be done by moving the SUBREG to the source.  */
   else if (GET_CODE (SET_DEST (x)) == SUBREG
   /* We need SUBREGs to compute nonzero_bits properly.  */
@@ -6950,15 +6953,16 @@ expand_field_assignment (const_rtx x)
   /* Compute a mask of LEN bits, if we can do this on the host machine.  */
   if (len = HOST_BITS_PER_WIDE_INT)
break;
 
   /* Now compute the equivalent expression.  Make a copy of INNER
 for the SET_DEST in case it is a MEM into which we will substitute;
 we don't want shared RTL in that case.  */
-  mask = GEN_INT (((unsigned HOST_WIDE_INT) 1  len) - 1);
+  mask = gen_int_mode (((unsigned HOST_WIDE_INT) 1  len) - 1,
+  compute_mode);
   cleared = simplify_gen_binary (AND, compute_mode,
 simplify_gen_unary (NOT, compute_mode,
   simplify_gen_binary (ASHIFT,
compute_mode,
mask, pos),
   compute_mode),
 inner);
@@ -7414,17 +7418,19 @@ extract_left_shift (rtx x, int count)
 case PLUS:  case IOR:  case XOR:  case AND:
   /* If we can safely shift this constant and we find the inner shift,
 make a new operation.  */
   if (CONST_INT_P (XEXP (x, 1))
   (UINTVAL (XEXP (x, 1))
   unsigned HOST_WIDE_INT) 1  count)) - 1)) == 0
   (tem = extract_left_shift (XEXP (x, 0), count)) != 0)
-   return simplify_gen_binary (code, mode, tem,
-   GEN_INT (INTVAL (XEXP (x, 1))  count));
-
+   {
+ HOST_WIDE_INT val = INTVAL (XEXP (x, 1))  count;
+ return simplify_gen_binary (code, mode, tem,
+ gen_int_mode (val, mode));
+

[4/4] Using gen_int_mode instead of GEN_INT

2013-09-09 Thread Richard Sandiford
Similar to patch 1, but here the calls are involved in a SET instruction
(or equivalent) and are cases where the operand must have the same mode as
the SET destination.

Tested in the same way as before.  OK to install?

Thanks,
Richard


gcc/
* asan.c (asan_emit_stack_protection): Use gen_int_mode instead of
GEN_INT.
* builtins.c (expand_errno_check): Likewise.
* dwarf2cfi.c (init_return_column_size): Likewise.
* except.c (sjlj_mark_call_sites): Likewise.
* expr.c (move_by_pieces_1, store_by_pieces_2): Likewise.
* lra-constraints.c (emit_inc): Likewise.
* ree.c (combine_set_extension): Likewise.
* regmove.c (fixup_match_2): Likewise.
* reload1.c (inc_for_reload): Likewise.

Index: gcc/asan.c
===
--- gcc/asan.c  2013-09-09 10:55:59.743537289 +0100
+++ gcc/asan.c  2013-09-09 11:06:25.029673768 +0100
@@ -966,15 +966,15 @@ asan_emit_stack_protection (rtx base, HO
   str_cst = asan_pp_string (asan_pp);
 
   /* Emit the prologue sequence.  */
   base = expand_binop (Pmode, add_optab, base,
   gen_int_mode (base_offset, Pmode),
   NULL_RTX, 1, OPTAB_DIRECT);
   mem = gen_rtx_MEM (ptr_mode, base);
-  emit_move_insn (mem, GEN_INT (ASAN_STACK_FRAME_MAGIC));
+  emit_move_insn (mem, gen_int_mode (ASAN_STACK_FRAME_MAGIC, ptr_mode));
   mem = adjust_address (mem, VOIDmode, GET_MODE_SIZE (ptr_mode));
   emit_move_insn (mem, expand_normal (str_cst));
   shadow_base = expand_binop (Pmode, lshr_optab, base,
  GEN_INT (ASAN_SHADOW_SHIFT),
  NULL_RTX, 1, OPTAB_DIRECT);
   shadow_base = expand_binop (Pmode, add_optab, shadow_base,
  gen_int_mode (targetm.asan_shadow_offset (),
Index: gcc/builtins.c
===
--- gcc/builtins.c  2013-09-09 10:55:59.747537322 +0100
+++ gcc/builtins.c  2013-09-09 11:06:25.030673776 +0100
@@ -1963,15 +1963,16 @@ expand_errno_check (tree exp, rtx target
 {
 #ifdef GEN_ERRNO_RTX
   rtx errno_rtx = GEN_ERRNO_RTX;
 #else
   rtx errno_rtx
  = gen_rtx_MEM (word_mode, gen_rtx_SYMBOL_REF (Pmode, errno));
 #endif
-  emit_move_insn (errno_rtx, GEN_INT (TARGET_EDOM));
+  emit_move_insn (errno_rtx,
+ gen_int_mode (TARGET_EDOM, GET_MODE (errno_rtx)));
   emit_label (lab);
   return;
 }
 #endif
 
   /* Make sure the library call isn't expanded as a tail call.  */
   CALL_EXPR_TAILCALL (exp) = 0;
Index: gcc/dwarf2cfi.c
===
--- gcc/dwarf2cfi.c 2013-09-08 17:18:39.456588227 +0100
+++ gcc/dwarf2cfi.c 2013-09-09 11:06:25.028673760 +0100
@@ -242,15 +242,16 @@ expand_builtin_dwarf_sp_column (void)
which has mode MODE.  Initialize column C as a return address column.  */
 
 static void
 init_return_column_size (enum machine_mode mode, rtx mem, unsigned int c)
 {
   HOST_WIDE_INT offset = c * GET_MODE_SIZE (mode);
   HOST_WIDE_INT size = GET_MODE_SIZE (Pmode);
-  emit_move_insn (adjust_address (mem, mode, offset), GEN_INT (size));
+  emit_move_insn (adjust_address (mem, mode, offset),
+ gen_int_mode (size, mode));
 }
 
 /* Generate code to initialize the register size table.  */
 
 void
 expand_builtin_init_dwarf_reg_sizes (tree address)
 {
Index: gcc/except.c
===
--- gcc/except.c2013-09-08 17:18:39.456588227 +0100
+++ gcc/except.c2013-09-09 11:06:25.017673669 +0100
@@ -1152,15 +1152,15 @@ sjlj_mark_call_sites (void)
   before = insn;
   if (CALL_P (insn))
before = find_first_parameter_load (insn, NULL_RTX);
 
   start_sequence ();
   mem = adjust_address (crtl-eh.sjlj_fc, TYPE_MODE (integer_type_node),
sjlj_fc_call_site_ofs);
-  emit_move_insn (mem, GEN_INT (this_call_site));
+  emit_move_insn (mem, gen_int_mode (this_call_site, GET_MODE (mem)));
   p = get_insns ();
   end_sequence ();
 
   emit_insn_before (p, before);
   last_call_site = this_call_site;
 }
 }
Index: gcc/expr.c
===
--- gcc/expr.c  2013-09-09 10:55:59.754537379 +0100
+++ gcc/expr.c  2013-09-09 11:06:25.021673702 +0100
@@ -1067,34 +1067,40 @@ move_by_pieces_1 (insn_gen_fn genfun, ma
from1 = adjust_automodify_address (data-from, mode, data-from_addr,
   data-offset);
   else
from1 = adjust_address (data-from, mode, data-offset);
 
   if (HAVE_PRE_DECREMENT  data-explicit_inc_to  0)
emit_insn (gen_add2_insn (data-to_addr,
- GEN_INT (-(HOST_WIDE_INT)size)));
+ gen_int_mode (-(HOST_WIDE_INT) size,
+   

Re: [C++ Patch] PR 43452

2013-09-09 Thread Paolo Carlini

Hi,

On 09/09/2013 06:35 AM, Jason Merrill wrote:

On 09/05/2013 06:44 PM, Paolo Carlini wrote:

+   warning (0, possible problem detected in invocation of 
+  delete [] operator:))


The warning should probably be suppressible by some flag.
In fact clang has such a flag - I wasn't sure we wanted it because we 
come from an hard error - thus instead of inventing a name I choose the 
same name. Enabled by default anyway.


Thanks!

Paolo.

/
2013-09-09  Paolo Carlini  paolo.carl...@oracle.com

PR c++/43452
* doc/invoke.texi (-Wdelete-incomplete): Document it.

/c-family
2013-09-09  Paolo Carlini  paolo.carl...@oracle.com

PR c++/43452
* c.opt (Wdelete-incomplete): Add.

/cp
2013-09-09  Paolo Carlini  paolo.carl...@oracle.com

PR c++/43452
* init.c (build_vec_delete_1): When the type is incomplete emit a
warning, enabled by default (not an error).
(build_delete): Adjust to use OPT_Wdelete_incomplete.

/testsuite
2013-09-09  Paolo Carlini  paolo.carl...@oracle.com

PR c++/43452
* g++.dg/warn/Wdelete-incomplete-1.C: New.
* g++.dg/warn/Wdelete-incomplete-2.C: Likewise.
* g++.dg/init/delete1.C: Adjust.
Index: c-family/c.opt
===
--- c-family/c.opt  (revision 202384)
+++ c-family/c.opt  (working copy)
@@ -339,6 +339,10 @@ Wdeclaration-after-statement
 C ObjC Var(warn_declaration_after_statement) Warning
 Warn when a declaration is found after a statement
 
+Wdelete-incomplete
+C++ ObjC++ Var(warn_delete_incomplete) Init(1) Warning
+Warn when deleting a pointer to incomplete type
+
 Wdelete-non-virtual-dtor
 C++ ObjC++ Var(warn_delnonvdtor) Warning LangEnabledBy(C++ ObjC++,Wall)
 Warn about deleting polymorphic objects with non-virtual destructors
Index: cp/init.c
===
--- cp/init.c   (revision 202384)
+++ cp/init.c   (working copy)
@@ -3078,7 +3078,7 @@ build_vec_delete_1 (tree base, tree maxindex, tree
 {
   tree virtual_size;
   tree ptype = build_pointer_type (type = complete_type (type));
-  tree size_exp = size_in_bytes (type);
+  tree size_exp;
 
   /* Temporary variables used by the loop.  */
   tree tbase, tbase_init;
@@ -3106,6 +3106,23 @@ build_vec_delete_1 (tree base, tree maxindex, tree
   if (base == error_mark_node || maxindex == error_mark_node)
 return error_mark_node;
 
+  if (!COMPLETE_TYPE_P (type))
+{
+  if ((complain  tf_warning)
+  warning (OPT_Wdelete_incomplete,
+ possible problem detected in invocation of 
+ delete [] operator:))
+   {
+ cxx_incomplete_type_diagnostic (base, type, DK_WARNING);
+ inform (input_location, neither the destructor nor the 
+ class-specific operator delete [] will be called, 
+ even if they are declared when the class is defined);
+   }
+  return build_builtin_delete_call (base);
+} 
+
+  size_exp = size_in_bytes (type);
+
   if (! MAYBE_CLASS_TYPE_P (type) || TYPE_HAS_TRIVIAL_DESTRUCTOR (type))
 goto no_destructor;
 
@@ -3820,11 +3837,13 @@ build_delete (tree type, tree addr, special_functi
  if (!COMPLETE_TYPE_P (type))
{
  if ((complain  tf_warning)
-  warning (0, possible problem detected in invocation of 
+  warning (OPT_Wdelete_incomplete,
+ possible problem detected in invocation of 
  delete operator:))
{
  cxx_incomplete_type_diagnostic (addr, type, DK_WARNING);
- inform (input_location, neither the destructor nor the 
class-specific 
+ inform (input_location,
+ neither the destructor nor the class-specific 
  operator delete will be called, even if they are 
  declared when the class is defined);
}
Index: doc/invoke.texi
===
--- doc/invoke.texi (revision 202384)
+++ doc/invoke.texi (working copy)
@@ -240,8 +240,8 @@ Objective-C and Objective-C++ Dialects}.
 -Wno-attributes -Wno-builtin-macro-redefined @gol
 -Wc++-compat -Wc++11-compat -Wcast-align  -Wcast-qual  @gol
 -Wchar-subscripts -Wclobbered  -Wcomment -Wconditionally-supported  @gol
--Wconversion  -Wcoverage-mismatch  -Wno-cpp  -Wno-deprecated  @gol
--Wno-deprecated-declarations -Wdisabled-optimization  @gol
+-Wconversion -Wcoverage-mismatch  -Wdelete-incomplete -Wno-cpp  @gol
+-Wno-deprecated -Wno-deprecated-declarations -Wdisabled-optimization  @gol
 -Wno-div-by-zero -Wdouble-promotion -Wempty-body  -Wenum-compare @gol
 -Wno-endif-labels -Werror  -Werror=* @gol
 -Wfatal-errors  -Wfloat-equal  -Wformat  -Wformat=2 @gol
@@ -4490,6 +4490,12 @@ types. 

Simplify expmed.c:lshift_value

2013-09-09 Thread Richard Sandiford
expmed.c:lshift_value has:

  val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
  val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);

but its only caller has already zero-extended INTVAL (value) from BITSIZE,
so we might as well pass that instead of the (value, bitsize) pair.

This isn't really much of a win on its own, but it makes the associated
wide-int change more obvious.

Tested on x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
* expmed.c (lshift_value): Take an unsigned HOST_WIDE_INT instead
of an rtx/bitpos pair.
(store_fixed_bit_field): Update accordingly.

Index: gcc/expmed.c
===
--- gcc/expmed.c2013-09-08 11:52:28.995962127 +0100
+++ gcc/expmed.c2013-09-08 14:21:49.710092468 +0100
@@ -56,7 +56,7 @@ static rtx extract_fixed_bit_field (enum
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT, rtx, int, bool);
 static rtx mask_rtx (enum machine_mode, int, int, int);
-static rtx lshift_value (enum machine_mode, rtx, int, int);
+static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT, int);
 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
@@ -991,7 +991,7 @@ store_fixed_bit_field (rtx op0, unsigned
   || (bitsize == HOST_BITS_PER_WIDE_INT  v == -1))
all_one = 1;
 
-  value = lshift_value (mode, value, bitnum, bitsize);
+  value = lshift_value (mode, v, bitnum);
 }
   else
 {
@@ -1862,14 +1862,15 @@ mask_rtx (enum machine_mode mode, int bi
 }
 
 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
-   VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
+   VALUE  BITPOS.  */
 
 static rtx
-lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
+lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
+ int bitpos)
 {
   double_int val;
   
-  val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
+  val = double_int::from_uhwi (value);
   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
 
   return immed_double_int_const (val, mode);


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Paolo Carlini

On 09/09/2013 11:37 AM, Richard Biener wrote:

That said, grepping for %q+D reveals quite some uses and it looks like
all of them expect the location being used to be that of the decl passed
to the diagnostic call, not some random other location.
If the decl is *not* a PARM_DECL, I expect %q+D to be often accurate. In 
fact, even when *is* a PARM_DECL what we have now is pretty decent, 
because normally the location of the corresponding FUNCTION_DECL isn't 
that far. The point is whether we want to be *more* accurate and point 
to the specific unused parameter, for C and C++, as clang and icc do.


Paolo.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Jakub Jelinek
On Mon, Sep 09, 2013 at 12:38:46PM +0200, Paolo Carlini wrote:
 On 09/09/2013 11:37 AM, Richard Biener wrote:
 That said, grepping for %q+D reveals quite some uses and it looks like
 all of them expect the location being used to be that of the decl passed
 to the diagnostic call, not some random other location.
 If the decl is *not* a PARM_DECL, I expect %q+D to be often
 accurate. In fact, even when *is* a PARM_DECL what we have now is
 pretty decent, because normally the location of the corresponding
 FUNCTION_DECL isn't that far. The point is whether we want to be
 *more* accurate and point to the specific unused parameter, for C
 and C++, as clang and icc do.

I guess the primary question is why location_of special cases the PARM_DECL
and in which case it is useful to do so, and whether the number of cases (if
any) when it is useful to do so is bigger than the number of place when it
is undesirable.

Jakub


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Paolo Carlini

On 09/09/2013 12:41 PM, Jakub Jelinek wrote:

On Mon, Sep 09, 2013 at 12:38:46PM +0200, Paolo Carlini wrote:

On 09/09/2013 11:37 AM, Richard Biener wrote:

That said, grepping for %q+D reveals quite some uses and it looks like
all of them expect the location being used to be that of the decl passed
to the diagnostic call, not some random other location.

If the decl is *not* a PARM_DECL, I expect %q+D to be often
accurate. In fact, even when *is* a PARM_DECL what we have now is
pretty decent, because normally the location of the corresponding
FUNCTION_DECL isn't that far. The point is whether we want to be
*more* accurate and point to the specific unused parameter, for C
and C++, as clang and icc do.

I guess the primary question is why location_of special cases the PARM_DECL
and in which case it is useful to do so, and whether the number of cases (if
any) when it is useful to do so is bigger than the number of place when it
is undesirable.
I understand that. It seems to me a much bigger project and must be done 
for the C front-end too (I don't know the name of the equivalent of 
location_of, but the location is wrong for it too, there must be the 
equivalent of t = DECL_CONTEXT (t) for it too)


What can I tell you, I *may* be able to work on that, but not now, not 
for both front-ends.


Paolo.


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Paolo Carlini

On 09/09/2013 12:44 PM, Paolo Carlini wrote:
I understand that. It seems to me a much bigger project and must be 
done for the C front-end too (I don't know the name of the equivalent 
of location_of, but the location is wrong for it too, there must be 
the equivalent of t = DECL_CONTEXT (t) for it too)
Sorry, I stand corrected, the C front-end is fine (yesterday I worked on 
other diagnostic issues impacting both). That changes the issue 
completely. Of course we want to only change location_of.


Paolo.


Re: [PATCH i386 2/8] [AVX512] Add mask registers.

2013-09-09 Thread Kirill Yukhin
Hello,
On 04 Sep 22:45, Kirill Yukhin wrote:
 Hello,
 
 PING.

PING.

--
Thanks, K 


Re: [PATCH i386 3/8] [AVX512] [1/n] Add AVX-512 patterns: VF iterator extended.

2013-09-09 Thread Kirill Yukhin
Hello,
On 06 Sep 17:41, Kirill Yukhin wrote:
 Hello,
 
 PING.
PING.

--
Thanks, K


Re: [4/4] Using gen_int_mode instead of GEN_INT

2013-09-09 Thread Richard Biener
On Mon, Sep 9, 2013 at 12:27 PM, Richard Sandiford
rdsandif...@googlemail.com wrote:
 Similar to patch 1, but here the calls are involved in a SET instruction
 (or equivalent) and are cases where the operand must have the same mode as
 the SET destination.

 Tested in the same way as before.  OK to install?

Ok.

Thanks,
Richard.

 Thanks,
 Richard


 gcc/
 * asan.c (asan_emit_stack_protection): Use gen_int_mode instead of
 GEN_INT.
 * builtins.c (expand_errno_check): Likewise.
 * dwarf2cfi.c (init_return_column_size): Likewise.
 * except.c (sjlj_mark_call_sites): Likewise.
 * expr.c (move_by_pieces_1, store_by_pieces_2): Likewise.
 * lra-constraints.c (emit_inc): Likewise.
 * ree.c (combine_set_extension): Likewise.
 * regmove.c (fixup_match_2): Likewise.
 * reload1.c (inc_for_reload): Likewise.

 Index: gcc/asan.c
 ===
 --- gcc/asan.c  2013-09-09 10:55:59.743537289 +0100
 +++ gcc/asan.c  2013-09-09 11:06:25.029673768 +0100
 @@ -966,15 +966,15 @@ asan_emit_stack_protection (rtx base, HO
str_cst = asan_pp_string (asan_pp);

/* Emit the prologue sequence.  */
base = expand_binop (Pmode, add_optab, base,
gen_int_mode (base_offset, Pmode),
NULL_RTX, 1, OPTAB_DIRECT);
mem = gen_rtx_MEM (ptr_mode, base);
 -  emit_move_insn (mem, GEN_INT (ASAN_STACK_FRAME_MAGIC));
 +  emit_move_insn (mem, gen_int_mode (ASAN_STACK_FRAME_MAGIC, ptr_mode));
mem = adjust_address (mem, VOIDmode, GET_MODE_SIZE (ptr_mode));
emit_move_insn (mem, expand_normal (str_cst));
shadow_base = expand_binop (Pmode, lshr_optab, base,
   GEN_INT (ASAN_SHADOW_SHIFT),
   NULL_RTX, 1, OPTAB_DIRECT);
shadow_base = expand_binop (Pmode, add_optab, shadow_base,
   gen_int_mode (targetm.asan_shadow_offset (),
 Index: gcc/builtins.c
 ===
 --- gcc/builtins.c  2013-09-09 10:55:59.747537322 +0100
 +++ gcc/builtins.c  2013-09-09 11:06:25.030673776 +0100
 @@ -1963,15 +1963,16 @@ expand_errno_check (tree exp, rtx target
  {
  #ifdef GEN_ERRNO_RTX
rtx errno_rtx = GEN_ERRNO_RTX;
  #else
rtx errno_rtx
   = gen_rtx_MEM (word_mode, gen_rtx_SYMBOL_REF (Pmode, errno));
  #endif
 -  emit_move_insn (errno_rtx, GEN_INT (TARGET_EDOM));
 +  emit_move_insn (errno_rtx,
 + gen_int_mode (TARGET_EDOM, GET_MODE (errno_rtx)));
emit_label (lab);
return;
  }
  #endif

/* Make sure the library call isn't expanded as a tail call.  */
CALL_EXPR_TAILCALL (exp) = 0;
 Index: gcc/dwarf2cfi.c
 ===
 --- gcc/dwarf2cfi.c 2013-09-08 17:18:39.456588227 +0100
 +++ gcc/dwarf2cfi.c 2013-09-09 11:06:25.028673760 +0100
 @@ -242,15 +242,16 @@ expand_builtin_dwarf_sp_column (void)
 which has mode MODE.  Initialize column C as a return address column.  */

  static void
  init_return_column_size (enum machine_mode mode, rtx mem, unsigned int c)
  {
HOST_WIDE_INT offset = c * GET_MODE_SIZE (mode);
HOST_WIDE_INT size = GET_MODE_SIZE (Pmode);
 -  emit_move_insn (adjust_address (mem, mode, offset), GEN_INT (size));
 +  emit_move_insn (adjust_address (mem, mode, offset),
 + gen_int_mode (size, mode));
  }

  /* Generate code to initialize the register size table.  */

  void
  expand_builtin_init_dwarf_reg_sizes (tree address)
  {
 Index: gcc/except.c
 ===
 --- gcc/except.c2013-09-08 17:18:39.456588227 +0100
 +++ gcc/except.c2013-09-09 11:06:25.017673669 +0100
 @@ -1152,15 +1152,15 @@ sjlj_mark_call_sites (void)
before = insn;
if (CALL_P (insn))
 before = find_first_parameter_load (insn, NULL_RTX);

start_sequence ();
mem = adjust_address (crtl-eh.sjlj_fc, TYPE_MODE (integer_type_node),
 sjlj_fc_call_site_ofs);
 -  emit_move_insn (mem, GEN_INT (this_call_site));
 +  emit_move_insn (mem, gen_int_mode (this_call_site, GET_MODE (mem)));
p = get_insns ();
end_sequence ();

emit_insn_before (p, before);
last_call_site = this_call_site;
  }
  }
 Index: gcc/expr.c
 ===
 --- gcc/expr.c  2013-09-09 10:55:59.754537379 +0100
 +++ gcc/expr.c  2013-09-09 11:06:25.021673702 +0100
 @@ -1067,34 +1067,40 @@ move_by_pieces_1 (insn_gen_fn genfun, ma
 from1 = adjust_automodify_address (data-from, mode, data-from_addr,
data-offset);
else
 from1 = adjust_address (data-from, mode, data-offset);

if (HAVE_PRE_DECREMENT  data-explicit_inc_to  0)
 

Re: [1/4] Using gen_int_mode instead of GEN_INT

2013-09-09 Thread Richard Biener
On Mon, Sep 9, 2013 at 12:21 PM, Richard Sandiford
rdsandif...@googlemail.com wrote:
 These four patches are the result of going through gcc/* looking for
 places where there was an obvious mode associated with a GEN_INT and
 where gen_int_mode could therefore be used instead.  Some of the GEN_INTs
 did create noncanoical rtl before, and were trapped by Kenny's assert on
 the wide-int branch.  However, most of the affected places will be benign.
 I'm therefore trying to sell this as a cleanup and don't have any testcases.

 I've split the patches up based on the reason why the mode is obvious.
 In this first patch, the GEN_INTs are all used in calls to gen_rtx_*
 and are all cases where the operand must have the same mode as the result.
 (This excludes things like shift count operands, since they can have a
 different mode from the shifted operand and the result.)

 Tested on x86_64-linux-gnu.  OK to install?

Ok.

Thanks,
Richard.

 Thanks,
 Richard


 gcc/
 * alias.c (addr_side_effect_eval): Use gen_int_mode with the mode
 of the associated gen_rtx_* call.
 * caller-save.c (init_caller_save): Likewise.
 * combine.c (find_split_point, make_extraction): Likewise.
 (make_compound_operation): Likewise.
 * dwarf2out.c (mem_loc_descriptor): Likewise.
 * explow.c (plus_constant, probe_stack_range): Likewise.
 * expmed.c (expand_mult_const): Likewise.
 * expr.c (emit_single_push_insn_1, do_tablejump): Likewise.
 * reload1.c (init_reload): Likewise.
 * valtrack.c (cleanup_auto_inc_dec): Likewise.
 * var-tracking.c (adjust_mems): Likewise.
 * modulo-sched.c (sms_schedule): Likewise, but use gen_rtx_GT
 rather than gen_rtx_fmt_ee.

 Index: gcc/alias.c
 ===
 --- gcc/alias.c 2013-09-08 17:18:39.842591328 +0100
 +++ gcc/alias.c 2013-09-09 10:49:45.213460712 +0100
 @@ -1891,15 +1891,15 @@ addr_side_effect_eval (rtx addr, int siz

  default:
return addr;
  }

if (offset)
  addr = gen_rtx_PLUS (GET_MODE (addr), XEXP (addr, 0),
 -GEN_INT (offset));
 +gen_int_mode (offset, GET_MODE (addr)));
else
  addr = XEXP (addr, 0);
addr = canon_rtx (addr);

return addr;
  }

 Index: gcc/caller-save.c
 ===
 --- gcc/caller-save.c   2013-09-08 17:18:39.842591328 +0100
 +++ gcc/caller-save.c   2013-09-09 10:49:45.213460712 +0100
 @@ -235,15 +235,15 @@ init_caller_save (void)

gcc_assert (i  FIRST_PSEUDO_REGISTER);

addr_reg = gen_rtx_REG (Pmode, i);

for (offset = 1  (HOST_BITS_PER_INT / 2); offset; offset = 1)
  {
 -  address = gen_rtx_PLUS (Pmode, addr_reg, GEN_INT (offset));
 +  address = gen_rtx_PLUS (Pmode, addr_reg, gen_int_mode (offset, Pmode));

for (i = 0; i  FIRST_PSEUDO_REGISTER; i++)
 if (regno_save_mode[i][1] != VOIDmode
! strict_memory_address_p (regno_save_mode[i][1], address))
   break;

if (i == FIRST_PSEUDO_REGISTER)
 Index: gcc/combine.c
 ===
 --- gcc/combine.c   2013-09-08 17:18:39.842591328 +0100
 +++ gcc/combine.c   2013-09-09 10:49:45.218460753 +0100
 @@ -4721,21 +4721,22 @@ find_split_point (rtx *loc, rtx insn, bo
  AND or two shifts.  Use two shifts for field sizes where the
  constant might be too large.  We assume here that we can
  always at least get 8-bit constants in an AND insn, which is
  true for every current RISC.  */

   if (unsignedp  len = 8)
 {
 + unsigned HOST_WIDE_INT mask
 +   = ((unsigned HOST_WIDE_INT) 1  len) - 1;
   SUBST (SET_SRC (x),
  gen_rtx_AND (mode,
   gen_rtx_LSHIFTRT
   (mode, gen_lowpart (mode, inner),
GEN_INT (pos)),
 - GEN_INT (((unsigned HOST_WIDE_INT) 1  len)
 -  - 1)));
 + gen_int_mode (mask, mode)));

   split = find_split_point (SET_SRC (x), insn, true);
   if (split  split != SET_SRC (x))
 return split;
 }
   else
 {
 @@ -4810,17 +4811,19 @@ find_split_point (rtx *loc, rtx insn, bo
if (set_src  code == MINUS  GET_CODE (XEXP (x, 1)) == MULT
GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))  0)
 {
   enum machine_mode mode = GET_MODE (x);
   unsigned HOST_WIDE_INT this_int = INTVAL (XEXP (XEXP (x, 1), 1));
   HOST_WIDE_INT other_int = trunc_int_for_mode (-this_int, mode);
 - SUBST (*loc, gen_rtx_PLUS (mode, 

Re: [2/4] Using gen_int_mode instead of GEN_INT

2013-09-09 Thread Richard Biener
On Mon, Sep 9, 2013 at 12:24 PM, Richard Sandiford
rdsandif...@googlemail.com wrote:
 Similar to patch 1, but here the calls are all to expand_* routines.

 Tested in the same way as before.  OK to install?

Ok.

Thanks,
Richard.

 Thanks,
 Richard


 gcc/
 * asan.c (asan_clear_shadow): Use gen_int_mode with the mode
 of the associated expand_* call.
 (asan_emit_stack_protection): Likewise.
 * builtins.c (round_trampoline_addr): Likewise.
 * explow.c (allocate_dynamic_stack_space, probe_stack_range): 
 Likewise.
 * expmed.c (expand_smod_pow2, expand_sdiv_pow2, expand_divmod)
 (emit_store_flag): Likewise.
 * expr.c (emit_move_resolve_push, push_block, emit_single_push_insn_1)
 (emit_push_insn, optimize_bitfield_assignment_op, expand_expr_real_1):
 Likewise.
 * function.c (instantiate_virtual_regs_in_insn): Likewise.
 * ifcvt.c (noce_try_store_flag_constants): Likewise.
 * loop-unroll.c (unroll_loop_runtime_iterations): Likewise.
 * modulo-sched.c (generate_prolog_epilog): Likewise.
 * optabs.c (expand_binop, widen_leading, expand_doubleword_clz)
 (expand_ctz, expand_ffs, expand_unop): Likewise.

 Index: gcc/asan.c
 ===
 --- gcc/asan.c  2013-09-09 10:23:49.095277703 +0100
 +++ gcc/asan.c  2013-09-09 10:55:59.743537289 +0100
 @@ -897,15 +897,15 @@ asan_clear_shadow (rtx shadow_mem, HOST_
top_label = gen_label_rtx ();
addr = force_reg (Pmode, XEXP (shadow_mem, 0));
shadow_mem = adjust_automodify_address (shadow_mem, SImode, addr, 0);
end = force_reg (Pmode, plus_constant (Pmode, addr, len));
emit_label (top_label);

emit_move_insn (shadow_mem, const0_rtx);
 -  tmp = expand_simple_binop (Pmode, PLUS, addr, GEN_INT (4), addr,
 +  tmp = expand_simple_binop (Pmode, PLUS, addr, gen_int_mode (4, Pmode), 
 addr,
   true, OPTAB_LIB_WIDEN);
if (tmp != addr)
  emit_move_insn (addr, tmp);
emit_cmp_and_jump_insns (addr, end, LT, NULL_RTX, Pmode, true, top_label);
jump = get_last_insn ();
gcc_assert (JUMP_P (jump));
add_reg_note (jump, REG_BR_PROB, GEN_INT (REG_BR_PROB_BASE * 80 / 100));
 @@ -962,25 +962,27 @@ asan_emit_stack_protection (rtx base, HO
else
 pp_string (asan_pp, 9 unknown);
pp_space (asan_pp);
  }
str_cst = asan_pp_string (asan_pp);

/* Emit the prologue sequence.  */
 -  base = expand_binop (Pmode, add_optab, base, GEN_INT (base_offset),
 +  base = expand_binop (Pmode, add_optab, base,
 +  gen_int_mode (base_offset, Pmode),
NULL_RTX, 1, OPTAB_DIRECT);
mem = gen_rtx_MEM (ptr_mode, base);
emit_move_insn (mem, GEN_INT (ASAN_STACK_FRAME_MAGIC));
mem = adjust_address (mem, VOIDmode, GET_MODE_SIZE (ptr_mode));
emit_move_insn (mem, expand_normal (str_cst));
shadow_base = expand_binop (Pmode, lshr_optab, base,
   GEN_INT (ASAN_SHADOW_SHIFT),
   NULL_RTX, 1, OPTAB_DIRECT);
shadow_base = expand_binop (Pmode, add_optab, shadow_base,
 - GEN_INT (targetm.asan_shadow_offset ()),
 + gen_int_mode (targetm.asan_shadow_offset (),
 +   Pmode),
   NULL_RTX, 1, OPTAB_DIRECT);
gcc_assert (asan_shadow_set != -1
(ASAN_RED_ZONE_SIZE  ASAN_SHADOW_SHIFT) == 4);
shadow_mem = gen_rtx_MEM (SImode, shadow_base);
set_mem_alias_set (shadow_mem, asan_shadow_set);
prev_offset = base_offset;
for (l = length; l; l -= 2)
 Index: gcc/builtins.c
 ===
 --- gcc/builtins.c  2013-09-08 17:18:39.707590244 +0100
 +++ gcc/builtins.c  2013-09-09 10:55:59.747537322 +0100
 @@ -4858,16 +4858,16 @@ round_trampoline_addr (rtx tramp)
/* If we don't need too much alignment, we'll have been guaranteed
   proper alignment by get_trampoline_type.  */
if (TRAMPOLINE_ALIGNMENT = STACK_BOUNDARY)
  return tramp;

/* Round address up to desired boundary.  */
temp = gen_reg_rtx (Pmode);
 -  addend = GEN_INT (TRAMPOLINE_ALIGNMENT / BITS_PER_UNIT - 1);
 -  mask = GEN_INT (-TRAMPOLINE_ALIGNMENT / BITS_PER_UNIT);
 +  addend = gen_int_mode (TRAMPOLINE_ALIGNMENT / BITS_PER_UNIT - 1, Pmode);
 +  mask = gen_int_mode (-TRAMPOLINE_ALIGNMENT / BITS_PER_UNIT, Pmode);

temp  = expand_simple_binop (Pmode, PLUS, tramp, addend,
temp, 0, OPTAB_LIB_WIDEN);
tramp = expand_simple_binop (Pmode, AND, temp, mask,
temp, 0, OPTAB_LIB_WIDEN);

return tramp;
 Index: gcc/explow.c
 ===
 --- gcc/explow.c2013-09-09 10:49:45.226460819 +0100
 +++ gcc/explow.c2013-09-09 

Re: [3/4] Using gen_int_mode instead of GEN_INT

2013-09-09 Thread Richard Biener
On Mon, Sep 9, 2013 at 12:27 PM, Richard Sandiford
rdsandif...@googlemail.com wrote:
 Similar to patch 1, but here the calls are all to simplify_* routines.
 In the 7414 combine.c hunk, the code is PLUS, AND, IOR or XOR.
 In the hunk after that, the cval |= ... stuff is effectively doing
 a trunc_int_for_mode by hand.

 Tested in the same way as before.  OK to install?

Ok.

Thanks,
Richard.

 Thanks,
 Richard


 gcc/
 * combine.c (simplify_set, expand_field_assignment, 
 extract_left_shift)
 (force_to_mode, simplify_shift_const_1, simplify_comparison):
 Use gen_int_mode with the mode of the associated simplify_* call.
 * explow.c (probe_stack_range, anti_adjust_stack_and_probe): Likewise.
 * expmed.c (expand_shift_1): Likewise.
 * function.c (instantiate_virtual_regs_in_insn): Likewise.
 * loop-iv.c (iv_number_of_iterations): Likewise.
 * loop-unroll.c (unroll_loop_runtime_iterations): Likewise.
 * simplify-rtx.c (simplify_binary_operation_1): Likewise.

 Index: gcc/combine.c
 ===
 --- gcc/combine.c   2013-09-09 10:49:45.218460753 +0100
 +++ gcc/combine.c   2013-09-09 11:11:13.699055109 +0100
 @@ -6370,16 +6370,17 @@ simplify_set (rtx x)

   if ((recog_for_combine (pat, other_insn, note)  0
 ! check_asm_operands (pat)))
 {
   *cc_use = old_cc_use;
   other_changed = 0;

 - op0 = simplify_gen_binary (XOR, GET_MODE (op0),
 -op0, GEN_INT (mask));
 + op0 = simplify_gen_binary (XOR, GET_MODE (op0), op0,
 +gen_int_mode (mask,
 +  GET_MODE (op0)));
 }
 }
 }

if (other_changed)
 undobuf.other_insn = other_insn;

 @@ -6893,19 +6894,21 @@ expand_field_assignment (const_rtx x)
   else if (GET_CODE (pos) == MINUS
 CONST_INT_P (XEXP (pos, 1))
 (INTVAL (XEXP (pos, 1))
== GET_MODE_PRECISION (GET_MODE (inner)) - len))
 /* If position is ADJUST - X, new position is X.  */
 pos = XEXP (pos, 0);
   else
 -   pos = simplify_gen_binary (MINUS, GET_MODE (pos),
 -  GEN_INT (GET_MODE_PRECISION (
 -   GET_MODE (inner))
 -   - len),
 -  pos);
 +   {
 + HOST_WIDE_INT prec = GET_MODE_PRECISION (GET_MODE (inner));
 + pos = simplify_gen_binary (MINUS, GET_MODE (pos),
 +gen_int_mode (prec - len,
 +  GET_MODE (pos)),
 +pos);
 +   }
 }
 }

/* A SUBREG between two modes that occupy the same numbers of words
  can be done by moving the SUBREG to the source.  */
else if (GET_CODE (SET_DEST (x)) == SUBREG
/* We need SUBREGs to compute nonzero_bits properly.  */
 @@ -6950,15 +6953,16 @@ expand_field_assignment (const_rtx x)
/* Compute a mask of LEN bits, if we can do this on the host machine.  
 */
if (len = HOST_BITS_PER_WIDE_INT)
 break;

/* Now compute the equivalent expression.  Make a copy of INNER
  for the SET_DEST in case it is a MEM into which we will substitute;
  we don't want shared RTL in that case.  */
 -  mask = GEN_INT (((unsigned HOST_WIDE_INT) 1  len) - 1);
 +  mask = gen_int_mode (((unsigned HOST_WIDE_INT) 1  len) - 1,
 +  compute_mode);
cleared = simplify_gen_binary (AND, compute_mode,
  simplify_gen_unary (NOT, compute_mode,
simplify_gen_binary (ASHIFT,
 compute_mode,
 mask, pos),
compute_mode),
  inner);
 @@ -7414,17 +7418,19 @@ extract_left_shift (rtx x, int count)
  case PLUS:  case IOR:  case XOR:  case AND:
/* If we can safely shift this constant and we find the inner shift,
  make a new operation.  */
if (CONST_INT_P (XEXP (x, 1))
(UINTVAL (XEXP (x, 1))
unsigned HOST_WIDE_INT) 1  count)) - 1)) == 0
(tem = extract_left_shift (XEXP (x, 0), count)) != 0)
 -   return simplify_gen_binary (code, mode, tem,
 -   GEN_INT (INTVAL (XEXP (x, 1))  count));

Re: Simplify expmed.c:lshift_value

2013-09-09 Thread Richard Biener
On Mon, Sep 9, 2013 at 12:31 PM, Richard Sandiford
rdsandif...@googlemail.com wrote:
 expmed.c:lshift_value has:

   val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);

 but its only caller has already zero-extended INTVAL (value) from BITSIZE,
 so we might as well pass that instead of the (value, bitsize) pair.

 This isn't really much of a win on its own, but it makes the associated
 wide-int change more obvious.

 Tested on x86_64-linux-gnu.  OK to install?

Ok.

Thanks,
Richard.

 Thanks,
 Richard


 gcc/
 * expmed.c (lshift_value): Take an unsigned HOST_WIDE_INT instead
 of an rtx/bitpos pair.
 (store_fixed_bit_field): Update accordingly.

 Index: gcc/expmed.c
 ===
 --- gcc/expmed.c2013-09-08 11:52:28.995962127 +0100
 +++ gcc/expmed.c2013-09-08 14:21:49.710092468 +0100
 @@ -56,7 +56,7 @@ static rtx extract_fixed_bit_field (enum
 unsigned HOST_WIDE_INT,
 unsigned HOST_WIDE_INT, rtx, int, bool);
  static rtx mask_rtx (enum machine_mode, int, int, int);
 -static rtx lshift_value (enum machine_mode, rtx, int, int);
 +static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
  static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
 unsigned HOST_WIDE_INT, int);
  static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, 
 rtx);
 @@ -991,7 +991,7 @@ store_fixed_bit_field (rtx op0, unsigned
|| (bitsize == HOST_BITS_PER_WIDE_INT  v == -1))
 all_one = 1;

 -  value = lshift_value (mode, value, bitnum, bitsize);
 +  value = lshift_value (mode, v, bitnum);
  }
else
  {
 @@ -1862,14 +1862,15 @@ mask_rtx (enum machine_mode mode, int bi
  }

  /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
 -   VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
 +   VALUE  BITPOS.  */

  static rtx
 -lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
 +lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
 + int bitpos)
  {
double_int val;

 -  val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
 +  val = double_int::from_uhwi (value);
val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);

return immed_double_int_const (val, mode);


Re: [PATCH, x86] Use vector moves in memmove expanding

2013-09-09 Thread Uros Bizjak
On Mon, Sep 9, 2013 at 12:22 PM, Michael V. Zolotukhin
michael.v.zolotuk...@gmail.com wrote:
  Is mtune ok here?
 Yes.
 Thanks, fixed.  Ok to commit?
 I verified that the test fails without fix in i386.c and passes with it.

 Changelog:
 gcc:
 2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

 * config/i386/i386.c (ix86_expand_movmem): Fix epilogue generation.

 gcc/testsuite:
 2013-09-09  Michael Zolotukhin  michael.v.zolotuk...@gmail.com

 * gcc.dg/torture/memcpy-1.c: New test.

OK.

Thanks,
Uros.


Re: [c++-concepts] Class template constraints

2013-09-09 Thread Andrew Sutton
Ok to commit? Attached is the doc fix patch. I'll send the TREE_TYPE
patch shortly.

Andrew
Andrew Sutton


On Sat, Sep 7, 2013 at 1:00 PM, Jason Merrill ja...@redhat.com wrote:
 On 09/06/2013 12:03 PM, Andrew Sutton wrote:

 +// Returns the template type of the class scope being entered. If we're
 +// entering a constrained class scope. TMPL is the most general template
 +// of the scope being entered, and TYPE is its type.


 TMPL is not part of the interface of fixup_template_type, so it should be
 documented when it is declared rather than before the function.

 OK with that tweak.

 +  tree cur_constr = TREE_TYPE (parms);


 In a separate patch, I'd like to use a different macro name for getting
 constraints from template parms.

 Jason



templates-2.patch
Description: Binary data


[RS6000] Fix PR58330 powerpc64 atomic store split in two

2013-09-09 Thread Alan Modra
This patch prevents the powerpc backend from combining a 64-bit
volatile load or store with a bswap insn when the resulting combined
insn will be implemented as two lwbrx or stwbrx machine insns.
Bootstrapped and regression tested powerpc64-linux.

PR target/58330
* config/rs6000/rs6000.md (bswapdi2_64bit): Disable for volatile mems.
gcc/testsuite/
* gcc.target/powerpc/pr58330.c: New.

Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 202351)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -2376,7 +2376,9 @@
(clobber (match_scratch:DI 3 =r,r,r))
(clobber (match_scratch:DI 4 =r,X,r))]
   TARGET_POWERPC64  !TARGET_LDBRX
-(REG_P (operands[0]) || REG_P (operands[1]))
+(REG_P (operands[0]) || REG_P (operands[1]))
+!(MEM_P (operands[0])  MEM_VOLATILE_P (operands[0]))
+!(MEM_P (operands[1])  MEM_VOLATILE_P (operands[1]))
   #
   [(set_attr length 16,12,36)])
 
Index: gcc/testsuite/gcc.target/powerpc/pr58330.c
===
--- gcc/testsuite/gcc.target/powerpc/pr58330.c  (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr58330.c  (revision 0)
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options -O -mno-popcntb } */
+/* { dg-final { scan-assembler-not stwbrx } } */
+
+void
+write_reverse (unsigned long *addr, unsigned long val)
+{
+  unsigned long reverse = __builtin_bswap64 (val);
+  __atomic_store_n (addr, reverse, __ATOMIC_RELAXED);
+}

-- 
Alan Modra
Australia Development Lab, IBM


Re: [c++-concepts] Class template constraints

2013-09-09 Thread Gabriel Dos Reis
Andrew Sutton andrew.n.sut...@gmail.com writes:

| Ok to commit? Attached is the doc fix patch. I'll send the TREE_TYPE
| patch shortly.

Yes, please -- that is what Jason earlier.  Let me know when youve
committed so that I can synchronize with trunk.

-- Gaby


Re: operator new returns nonzero

2013-09-09 Thread Gabriel Dos Reis
On Mon, Sep 9, 2013 at 4:06 AM, Richard Biener
richard.guent...@gmail.com wrote:
 On Sat, Sep 7, 2013 at 11:00 PM, Marc Glisse marc.gli...@inria.fr wrote:
 On Sat, 7 Sep 2013, Mike Stump wrote:

 On Sep 7, 2013, at 12:27 PM, Marc Glisse marc.gli...@inria.fr wrote:

 Now flag_check_new should probably disable this optimization…


 Yes, this why my point.


 Ok, here it is (again, no proper testing until bootstrap is fixed)

 I wonder what happens on targets where 0 is a valid address of an object
 (stated by !flag_delete_null_pointer_checks)?

We should distinguish between front-end notion of null pointer,
and backend notion of address zero.  The language, and therefore
the front-end, does not allow an object to have 'this' value to be
a null pointer, nor does is allow 'operator new' to return null pointer.

Consequently, if we have a target (which ones?) where zero is
a valid address for an object, that target should take precaution
to satisfy the requirement of the language.

-- Gaby


 Richard.


 2013-09-07  Marc Glisse  marc.gli...@inria.fr

 PR c++/19476
 gcc/
 * fold-const.c (tree_expr_nonzero_warnv_p): Handle operator new.
 * tree-vrp.c (gimple_stmt_nonzero_warnv_p,
 stmt_interesting_for_vrp):
 Likewise.
 (vrp_visit_stmt): Remove duplicated code.

 gcc/testsuite/
 * g++.dg/tree-ssa/pr19476-1.C: New file.
 * g++.dg/tree-ssa/pr19476-2.C: Likewise.
 * g++.dg/tree-ssa/pr19476-3.C: Likewise.

 --
 Marc Glisse
 Index: testsuite/g++.dg/tree-ssa/pr19476-1.C
 ===
 --- testsuite/g++.dg/tree-ssa/pr19476-1.C   (revision 0)
 +++ testsuite/g++.dg/tree-ssa/pr19476-1.C   (revision 0)
 @@ -0,0 +1,15 @@
 +/* { dg-do compile } */
 +/* { dg-options -O -fdump-tree-ccp1 } */
 +
 +#include new
 +
 +int f(){
 +  return 33 + (0 == new(std::nothrow) int);
 +}
 +int g(){
 +  return 42 + (0 == new int[50]);
 +}
 +
 +/* { dg-final { scan-tree-dump return 42 ccp1 } } */
 +/* { dg-final { scan-tree-dump-not return 33 ccp1 } } */
 +/* { dg-final { cleanup-tree-dump ccp1 } } */

 Property changes on: testsuite/g++.dg/tree-ssa/pr19476-1.C
 ___
 Added: svn:keywords
+ Author Date Id Revision URL
 Added: svn:eol-style
+ native

 Index: testsuite/g++.dg/tree-ssa/pr19476-2.C
 ===
 --- testsuite/g++.dg/tree-ssa/pr19476-2.C   (revision 0)
 +++ testsuite/g++.dg/tree-ssa/pr19476-2.C   (revision 0)
 @@ -0,0 +1,17 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +#include new
 +
 +int f(){
 +  int *p = new(std::nothrow) int;
 +  return 33 + (0 == p);
 +}
 +int g(){
 +  int *p = new int[50];
 +  return 42 + (0 == p);
 +}
 +
 +/* { dg-final { scan-tree-dump return 42 optimized } } */
 +/* { dg-final { scan-tree-dump-not return 33 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */

 Property changes on: testsuite/g++.dg/tree-ssa/pr19476-2.C
 ___
 Added: svn:keywords
+ Author Date Id Revision URL
 Added: svn:eol-style
+ native

 Index: testsuite/g++.dg/tree-ssa/pr19476-3.C
 ===
 --- testsuite/g++.dg/tree-ssa/pr19476-3.C   (revision 0)
 +++ testsuite/g++.dg/tree-ssa/pr19476-3.C   (revision 0)
 @@ -0,0 +1,11 @@
 +/* { dg-do compile } */
 +/* { dg-options -O3 -fcheck-new -fdump-tree-optimized } */
 +
 +#include new
 +
 +int g(){
 +  return 42 + (0 == new int);
 +}
 +
 +/* { dg-final { scan-tree-dump-not return 42 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */

 Property changes on: testsuite/g++.dg/tree-ssa/pr19476-3.C
 ___
 Added: svn:keywords
+ Author Date Id Revision URL
 Added: svn:eol-style
+ native

 Index: fold-const.c
 ===
 --- fold-const.c(revision 202351)
 +++ fold-const.c(working copy)
 @@ -16171,21 +16171,31 @@ tree_expr_nonzero_warnv_p (tree t, bool
  case MODIFY_EXPR:
  case BIND_EXPR:
return tree_expr_nonzero_warnv_p (TREE_OPERAND (t, 1),
 strict_overflow_p);

  case SAVE_EXPR:
return tree_expr_nonzero_warnv_p (TREE_OPERAND (t, 0),
 strict_overflow_p);

  case CALL_EXPR:
 -  return alloca_call_p (t);
 +  {
 +   tree fn = CALL_EXPR_FN (t);
 +   if (TREE_CODE (fn) != ADDR_EXPR) return false;
 +   tree fndecl = TREE_OPERAND (fn, 0);
 +   if (TREE_CODE (fndecl) != FUNCTION_DECL) return false;
 +   if (!flag_check_new
 +DECL_IS_OPERATOR_NEW (fndecl)
 +!TREE_NOTHROW (fndecl))
 + return true;
 +   return alloca_call_p (t);

Re: operator new returns nonzero

2013-09-09 Thread Gabriel Dos Reis
On Mon, Sep 9, 2013 at 4:19 AM, Marc Glisse marc.gli...@inria.fr wrote:
 On Mon, 9 Sep 2013, Richard Biener wrote:

 On Sat, Sep 7, 2013 at 11:00 PM, Marc Glisse marc.gli...@inria.fr wrote:

 On Sat, 7 Sep 2013, Mike Stump wrote:

 On Sep 7, 2013, at 12:27 PM, Marc Glisse marc.gli...@inria.fr wrote:


 Now flag_check_new should probably disable this optimization…



 Yes, this why my point.



 Ok, here it is (again, no proper testing until bootstrap is fixed)


 I wonder what happens on targets where 0 is a valid address of an object
 (stated by !flag_delete_null_pointer_checks)?


 I am not at all familiar with those targets (I thought you had to write
 asm to access 0 so the compiler doesn't mess with your code), but it
 makes sense to me to test (flag_delete_null_pointer_checks 
 !flag_check_new) instead of just !flag_check_new. Consider the patch
 changed this way. (we have so many options, I wouldn't be surprised if
 there is yet another one to check…)

If we have such a target (do we?) where 0 is a valid address of an object,
I would not be surprised if it breaks in so many other ways, since the
language explicitly states that can never happen (programmers write
code depending on that).



 --
 Marc Glisse


[C++, diagnostic Patch] PR 58362

2013-09-09 Thread Paolo Carlini

Hi all, hi Gaby,

thus a more sensible try at fixing this issue: 
function.c:do_warn_unused_parameter uses %q+D for the error call. That 
means that cp/error.c:cp_printer does:


  if (set_locus  t != NULL)
*text-locus = location_of (t);

and it's important that location_of (t) is correct. However, for 
PARM_DECLs, location_of (still) does:


  if (TREE_CODE (t) == PARM_DECL  DECL_CONTEXT (t))
t = DECL_CONTEXT (t);
  else if ...
  ...

which means that for those we end up with the location of a 
FUNCTION_DECL or anyway not exactly the location of the PARM_DECL 
itself. I went through the C++ front-end and currently there are very 
few cases if any where location_of may be fed a PARM_DECL, and the very 
few seem Ok to me. The testsuite passes on x86_64-linux of course, thus 
I'm proposing the below. IMHO now that we are in Stage 1 the risk of 
breaking locations somewhere else is low enough. By the way, as 
discussed today elsewhere, the C front-end is already Ok.


Thanks!
Paolo.

//
/cp
2013-09-09  Paolo Carlini  paolo.carl...@oracle.com

PR c++/58362
* error.c (location_of): Don't handle PARM_DECLs specially.

/testsuite
2013-09-09  Paolo Carlini  paolo.carl...@oracle.com

PR c++/58362
* g++.dg/warn/Wunused-parm-5.C: New.
Index: cp/error.c
===
--- cp/error.c  (revision 202384)
+++ cp/error.c  (working copy)
@@ -2786,9 +2789,7 @@ lang_decl_name (tree decl, int v, bool translate)
 location_t
 location_of (tree t)
 {
-  if (TREE_CODE (t) == PARM_DECL  DECL_CONTEXT (t))
-t = DECL_CONTEXT (t);
-  else if (TYPE_P (t))
+  if (TYPE_P (t))
 {
   t = TYPE_MAIN_DECL (t);
   if (t == NULL_TREE)
Index: testsuite/g++.dg/warn/Wunused-parm-5.C
===
--- testsuite/g++.dg/warn/Wunused-parm-5.C  (revision 0)
+++ testsuite/g++.dg/warn/Wunused-parm-5.C  (working copy)
@@ -0,0 +1,14 @@
+// PR c++/58362
+// { dg-options -Wunused-parameter }
+
+void f1 (long s) { }  // { dg-warning 15:unused parameter 's' }
+
+void f2 (long s, int u) { }  // { dg-warning 15:unused parameter 's' }
+// { dg-warning 22:unused parameter 'u'  { target *-*-* } 6 }
+
+void f3 (long s);
+void f3 (long s) { }  // { dg-warning 15:unused parameter 's' }
+
+void f4 (long s, int u);
+void f4 (long s, int u) { }  // { dg-warning 15:unused parameter 's' }
+// { dg-warning 22:unused parameter 'u'  { target *-*-* } 13 }


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Gabriel Dos Reis
On Mon, Sep 9, 2013 at 4:46 AM, Jakub Jelinek ja...@redhat.com wrote:
 On Mon, Sep 09, 2013 at 11:45:08AM +0200, Richard Biener wrote:
 Well, in this case the patch should IMHO be a no-op.

 -  warning (OPT_Wunused_parameter, unused parameter %q+D, decl);
 +  warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wunused_parameter,
 +   unused parameter %qD, decl);

 no?  Unless I misunderstand what %q+D should do.

 The question is how exactly is %q+D defined, if it is
 warning_at (location_of (decl), OPT_Wunused_parameter, unused parameter 
 %qD, decl); in this case, or
 DECL_SOURCE_LOCATION (decl) instead.

The semantics of '%+D' was defined long before I got involved.

The way it was supposed to work is that we pick the location
of the decl being specified, instead of taking the current
location.  When we figured that was insufficient, we introduced
%H to say: pick this location.  For that reason, one can only have
on +D in a diagnostic message (I don't think we

-- Gaby


RE: [PATCH][AArch64] Fix FAIL: gcc.target/aarch64/cmn.c scan-assembler cmn\tw[0-9]

2013-09-09 Thread Kyrylo Tkachov
Hi Richard,

 On 29/07/13 14:58, Kyrylo Tkachov wrote:
  Hi all,
 
  Now that combine emits the canonical form for (eq (neg x) (y)) instead
 of (eq
  (x) (neg y)), this patch fixes up the corresponding pattern in aarch64
 to
  match that. This enables combine to properly generate the cmn
 instruction
  where appropriate.
 
  Tested aarch64-none-elf on model.
 
  Ok for trunk?
 
 
 No, this is wrong for inequalities, since in reality it's the second
 operand that's inverted.
 
 You'll need to use CC_SWP mode and then arrange for this to be correctly
 picked by select_cc_mode.
 

How's this?

aarch64_select_cc_mode is updated to return CC_SWPmode for these comparisons and
the MD pattern is updated accordingly.

Tested aarch64-none-elf on a model.

Is this ok?

Thanks,
Kyrill

2013-09-09  Kyrylo Tkachov  kyrylo.tkac...@arm.com

* config/aarch64/aarch64.c (aarch64_select_cc_mode): Return CC_SWP for
comparison with negated operand.
* config/aarch64/aarch64.md (compare_negmode): Match canonical RTL 
form.

2013-09-09  Kyrylo Tkachov  kyrylo.tkac...@arm.com

* gcc.target/aarch64/cmn-neg.c: New test.



aarch64-cmn.patch
Description: Binary data


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Gabriel Dos Reis
On Mon, Sep 9, 2013 at 5:04 AM, Richard Biener rguent...@suse.de wrote:
 On Mon, 9 Sep 2013, Jakub Jelinek wrote:

 On Mon, Sep 09, 2013 at 11:45:08AM +0200, Richard Biener wrote:
  Well, in this case the patch should IMHO be a no-op.
 
  -  warning (OPT_Wunused_parameter, unused parameter %q+D, decl);
  +  warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wunused_parameter,
  + unused parameter %qD, decl);
 
  no?  Unless I misunderstand what %q+D should do.

 The question is how exactly is %q+D defined, if it is
 warning_at (location_of (decl), OPT_Wunused_parameter, unused parameter 
 %qD, decl); in this case, or
 DECL_SOURCE_LOCATION (decl) instead.

 It can't be 'location_of' because that's a C++ FE speciality but
 warning_at and %q+D are diagnostic machinery level.

%+D was, and has for long, been a C++ FE-specific marker.

I don't recall when we decided to make that available to all front-ends.

 Unless of course the meaning of %q+D depends on the frontend which
 would make its use from the middle-end ill-defined.

We introduced xxx_at so that a different locus (different from the
decl and different from current locus, if ever defined) can be specified.

-- Gaby


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Gabriel Dos Reis
On Mon, Sep 9, 2013 at 5:13 AM, Richard Biener rguent...@suse.de wrote:
 On Mon, 9 Sep 2013, Paolo Carlini wrote:

 On 09/09/2013 12:04 PM, Richard Biener wrote:
  On Mon, 9 Sep 2013, Jakub Jelinek wrote:
 
   On Mon, Sep 09, 2013 at 11:45:08AM +0200, Richard Biener wrote:
Well, in this case the patch should IMHO be a no-op.
   
-  warning (OPT_Wunused_parameter, unused parameter %q+D, decl);
+  warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wunused_parameter,
+ unused parameter %qD, decl);
   
no?  Unless I misunderstand what %q+D should do.
   The question is how exactly is %q+D defined, if it is
   warning_at (location_of (decl), OPT_Wunused_parameter, unused parameter
   %qD, decl); in this case, or
   DECL_SOURCE_LOCATION (decl) instead.
  It can't be 'location_of' because that's a C++ FE speciality but
  warning_at and %q+D are diagnostic machinery level.
 Everything happens via call backs. Thus from the generic diagnostic 
 machinery,
 you go to cp_printer for C++, thus location_of for C++. In C is different, 
 but
 again there is, evidently, a mechanism which uses DECL_CONTEXT for PARM_DECLs
 which leads to an inaccurate location when we *really* want the location of
 the parameter (exactly as I explained for C++).

 I understand that.  But I question it.  Why would that ever be useful?
 Can't the places that want that simply use warning/error_at with the
 proper location?

I agree with Richard that if you want a locus that is not the current locus,
and is not the locus of the decl, you should use the _at version.

-- Gaby


Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Gabriel Dos Reis
On Mon, Sep 9, 2013 at 5:38 AM, Paolo Carlini paolo.carl...@oracle.com wrote:
 On 09/09/2013 11:37 AM, Richard Biener wrote:

 That said, grepping for %q+D reveals quite some uses and it looks like
 all of them expect the location being used to be that of the decl passed
 to the diagnostic call, not some random other location.

 If the decl is *not* a PARM_DECL, I expect %q+D to be often accurate. In
 fact, even when *is* a PARM_DECL what we have now is pretty decent, because
 normally the location of the corresponding FUNCTION_DECL isn't that far. The
 point is whether we want to be *more* accurate and point to the specific
 unused parameter, for C and C++, as clang and icc do.

I think the logic is simpler if we use the xxx_at form in these cases.

-- Gaby


PING^2: Re: [patch] implement Cilk Plus simd loops on trunk

2013-09-09 Thread Aldy Hernandez

Hi guys!

PING for both C and C++.

Thanks.


 Original Message 
Subject: Re: PING: Fwd: Re: [patch] implement Cilk Plus simd loops on trunk
Date: Tue, 27 Aug 2013 15:03:26 -0500
From: Aldy Hernandez al...@redhat.com
To: Richard Henderson r...@redhat.com
CC: jason merrill ja...@redhat.com,  gcc-patches gcc-patches@gcc.gnu.org

On 08/26/13 12:22, Richard Henderson wrote:

+static tree
+c_check_cilk_loop_incr (location_t loc, tree decl, tree incr)
+{
+  if (EXPR_HAS_LOCATION (incr))
+loc = EXPR_LOCATION (incr);
+
+  if (!incr)
+{
+  error_at (loc, missing increment);
+  return error_mark_node;
+}


Either these tests are swapped, or the second one isn't really needed.


Swapped.  Fixed.





+  switch (TREE_CODE (incr))
+{
+case POSTINCREMENT_EXPR:
+case PREINCREMENT_EXPR:
+case POSTDECREMENT_EXPR:
+case PREDECREMENT_EXPR:
+  if (TREE_OPERAND (incr, 0) != decl)
+   break;
+
+  // Bah... canonicalize into whatever OMP_FOR_INCR needs.
+  if (POINTER_TYPE_P (TREE_TYPE (decl))
+  TREE_OPERAND (incr, 1))
+   {
+ tree t = fold_convert_loc (loc,
+sizetype, TREE_OPERAND (incr, 1));
+
+ if (TREE_CODE (incr) == POSTDECREMENT_EXPR
+ || TREE_CODE (incr) == PREDECREMENT_EXPR)
+   t = fold_build1_loc (loc, NEGATE_EXPR, sizetype, t);
+ t = fold_build_pointer_plus (decl, t);
+ incr = build2 (MODIFY_EXPR, void_type_node, decl, t);
+   }
+  return incr;


Handling pointer types and pointer_plus_expr here (p++) ...


+case MODIFY_EXPR:
+  {
+   tree rhs;
+
+   if (TREE_OPERAND (incr, 0) != decl)
+ break;
+
+   rhs = TREE_OPERAND (incr, 1);
+   if (TREE_CODE (rhs) == PLUS_EXPR
+(TREE_OPERAND (rhs, 0) == decl
+   || TREE_OPERAND (rhs, 1) == decl)
+INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
+ return incr;
+   else if (TREE_CODE (rhs) == MINUS_EXPR
+ TREE_OPERAND (rhs, 0) == decl
+ INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
+ return incr;
+   // Otherwise fail because only PLUS_EXPR and MINUS_EXPR are
+   // allowed.
+   break;


... but not here (p += 1)?


I should make the code more obvious.  What I'm trying to do is generate
what the gimplifier for OMP_FOR is expecting.  OMP rewrites pointer
increment/decrement expressions into a corresponding MODIFY_EXPR.

I have abstracted the OMP code and shared it between both type checks,
and I have also added an assert in the gimplifier just in case some
future front-end extension generates OMP_FOR_INCR in a wonky way.





+c_validate_cilk_plus_loop (tree *tp, int *walk_subtrees, void *data)
+{
+  if (!tp || !*tp)
+return NULL_TREE;
+
+  bool *valid = (bool *) data;
+
+  switch (TREE_CODE (*tp))
+{
+case CALL_EXPR:
+  {
+   tree fndecl = CALL_EXPR_FN (*tp);
+
+   if (TREE_CODE (fndecl) == ADDR_EXPR)
+ fndecl = TREE_OPERAND (fndecl, 0);
+   if (TREE_CODE (fndecl) == FUNCTION_DECL)
+ {
+   if (setjmp_call_p (fndecl))
+ {
+   error_at (EXPR_LOCATION (*tp),
+ calls to setjmp are not allowed within loops 
+ annotated with #pragma simd);
+   *valid = false;
+   *walk_subtrees = 0;
+ }
+ }
+   break;


Why bother checking for setjmp?  While I agree it makes little sense, there are
plenty of other standard functions which also make no sense to use from within
#pragma simd.  What's likely to go wrong with a call to setjmp, as opposed to
getcontext, pthread_create, or even printf?


Sigh...the standard specifically disallows setjmp.




+  if (DECL_REGISTER (decl))
+{
+  error_at (loc, induction variable cannot be declared register);
+  return false;
+}


Why?


The standard :(.



All of the actual gimple changes look good.  You could commit those now if you
like to reduce the size of the next patch.


Ughh...got lazy on this round.  How about I commit the gimple changes
for the next round?

How does this look?





Re: [Patch to gcc/function] PR 58362

2013-09-09 Thread Gabriel Dos Reis
On Mon, Sep 9, 2013 at 5:41 AM, Jakub Jelinek ja...@redhat.com wrote:
 On Mon, Sep 09, 2013 at 12:38:46PM +0200, Paolo Carlini wrote:
 On 09/09/2013 11:37 AM, Richard Biener wrote:
 That said, grepping for %q+D reveals quite some uses and it looks like
 all of them expect the location being used to be that of the decl passed
 to the diagnostic call, not some random other location.
 If the decl is *not* a PARM_DECL, I expect %q+D to be often
 accurate. In fact, even when *is* a PARM_DECL what we have now is
 pretty decent, because normally the location of the corresponding
 FUNCTION_DECL isn't that far. The point is whether we want to be
 *more* accurate and point to the specific unused parameter, for C
 and C++, as clang and icc do.

 I guess the primary question is why location_of special cases the PARM_DECL
 and in which case it is useful to do so, and whether the number of cases (if
 any) when it is useful to do so is bigger than the number of place when it
 is undesirable.

Most likely historical reason, the exact sequence of which is lost to history.

-- Gaby


Re: [C++, diagnostic Patch] PR 58362

2013-09-09 Thread Gabriel Dos Reis
On Mon, Sep 9, 2013 at 7:49 AM, Paolo Carlini paolo.carl...@oracle.com wrote:
 Hi all, hi Gaby,

 thus a more sensible try at fixing this issue:
 function.c:do_warn_unused_parameter uses %q+D for the error call. That
 means that cp/error.c:cp_printer does:

   if (set_locus  t != NULL)
 *text-locus = location_of (t);

 and it's important that location_of (t) is correct. However, for PARM_DECLs,
 location_of (still) does:


   if (TREE_CODE (t) == PARM_DECL  DECL_CONTEXT (t))
 t = DECL_CONTEXT (t);
   else if ...
   ...

 which means that for those we end up with the location of a FUNCTION_DECL or
 anyway not exactly the location of the PARM_DECL itself. I went through the
 C++ front-end and currently there are very few cases if any where
 location_of may be fed a PARM_DECL, and the very few seem Ok to me. The
 testsuite passes on x86_64-linux of course, thus I'm proposing the below.
 IMHO now that we are in Stage 1 the risk of breaking locations somewhere
 else is low enough. By the way, as discussed today elsewhere, the C
 front-end is already Ok.

Patch OK; thanks!

-- Gaby


Re: [C++ diagnostic Patch] Partially fix c++/58363 and more

2013-09-09 Thread Gabriel Dos Reis
On Sun, Sep 8, 2013 at 3:26 PM, Paolo Carlini paolo.carl...@oracle.com wrote:
 Hi all, Gaby,

 I was having a look to c++/58363 and besides the main issue, that is we
 probably want to help the user and tell him/her something about destructors,
 etc, I noticed that we aren't able to pretty print the pseudo destructor
 expression at issue:

 cannot convert ‘f.#‘var_decl’ not supported by dump_type#type error::~’
 (type ‘void’) to type ‘int’

 Weird. Thus I went to cp-tree.def and found the very clear comment:

 /* A pseudo-destructor, of the form OBJECT.~DESTRUCTOR or
 OBJECT.SCOPE::~DESTRUCTOR. The first operand is the OBJECT. The
 second operand (if non-NULL) is the SCOPE. The third operand is
 the TYPE node corresponding to the DESTRUCTOR.

 which in fact is inconsistent with the code in error.c:dump_expr. As regards
 cxx-pretty-print.c, the code in postfix_expression seems largely Ok (that
 confirmed my analysis), only I don't think the case of NULL second operand
 is handled correctly. What do you think about the below? Certainly passes
 the testsuite and the pretty printing for 58363 is Ok.

Looks good.  OK to commit.

I have been hoping that we would have ditched dump_expr and consorts, in
favor of the C++ specific pretty printers, but now…

-- Gaby


[c++-concepts] merge from trunk

2013-09-09 Thread Gabriel Dos Reis

at revision 202396.  No conflict.

-- Gaby



[wide-int] Fix mode choice in convert_modes

2013-09-09 Thread Richard Sandiford
Enabling the CONST_INT assert showed that this branch-local code was
passing the wrong mode to std::make_pair.  The mode of X is OLDMODE,
and we're supposed to be converting it to MODE.

In the case where OLDMODE is VOIDmode, I think we have to assume
that every bit of the input is significant.

Tested on x86_64-linux-gnu.  OK to install?

This and the other patches that I just committed to trunk are enough
for a bootstrap and regression test to pass on x86_64-linux-gnu with
the assertion for a canonical CONST_INT enabled.  I think it would be
better to leave the assertion out until after the merge though,
so we can deal with any fallout separately.

Thanks,
Richard


Index: gcc/expr.c
===
--- gcc/expr.c  2013-09-09 11:32:33.734617409 +0100
+++ gcc/expr.c  2013-09-09 11:45:27.381001353 +0100
@@ -714,13 +714,14 @@ convert_modes (enum machine_mode mode, e
GET_MODE_CLASS (mode) == MODE_INT
(oldmode == VOIDmode || GET_MODE_CLASS (oldmode) == MODE_INT))
 {
-  wide_int w = std::make_pair (x, mode);
   /* If the caller did not tell us the old mode, then there is
-not much to do with respect to canonization.  */
-  if (oldmode != VOIDmode
-  GET_MODE_PRECISION (mode)  GET_MODE_PRECISION (oldmode))
-   w = wi::ext (w, GET_MODE_PRECISION (oldmode),
-unsignedp ? UNSIGNED : SIGNED);
+not much to do with respect to canonization.  We have to assume
+that all the bits are significant.  */
+  if (oldmode == VOIDmode)
+   oldmode = MAX_MODE_INT;
+  wide_int w = wide_int::from (std::make_pair (x, oldmode),
+  GET_MODE_PRECISION (mode),
+  unsignedp ? UNSIGNED : SIGNED);
   return immed_wide_int_const (w, mode);
 }
 


[PATCH] Fix PR58326

2013-09-09 Thread Richard Biener

This fixes PR58326, fix_bb_placements via unloop does not properly
mark all blocks with uses that are now eventually subject to
loop-closed SSA handling.

Fixed with the following, bootstrapped and tested on 
x86_64-unknown-linux-gnu, applied.

Richard.

2013-09-09  Richard Biener  rguent...@suse.de

PR middle-end/58326
* cfgloopmanip.c (fix_bb_placements): When fixing the placement
of a subloop record all its block as affecting loop-closed SSA form.

* gcc.dg/torture/pr58326-1.c: New testcase.
* gcc.dg/torture/pr58326-2.c: Likewise.

Index: gcc/cfgloopmanip.c
===
*** gcc/cfgloopmanip.c  (revision 202382)
--- gcc/cfgloopmanip.c  (working copy)
*** fix_bb_placements (basic_block from,
*** 223,237 
  if (!fix_loop_placement (from-loop_father, irred_invalidated))
continue;
  target_loop = loop_outer (from-loop_father);
}
else
{
  /* Ordinary basic block.  */
  if (!fix_bb_placement (from))
continue;
  if (loop_closed_ssa_invalidated)
bitmap_set_bit (loop_closed_ssa_invalidated, from-index);
- target_loop = from-loop_father;
}
  
FOR_EACH_EDGE (e, ei, from-succs)
--- 223,244 
  if (!fix_loop_placement (from-loop_father, irred_invalidated))
continue;
  target_loop = loop_outer (from-loop_father);
+ if (loop_closed_ssa_invalidated)
+   {
+ basic_block *bbs = get_loop_body (from-loop_father);
+ for (unsigned i = 0; i  from-loop_father-num_nodes; ++i)
+   bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]-index);
+ free (bbs);
+   }
}
else
{
  /* Ordinary basic block.  */
  if (!fix_bb_placement (from))
continue;
+ target_loop = from-loop_father;
  if (loop_closed_ssa_invalidated)
bitmap_set_bit (loop_closed_ssa_invalidated, from-index);
}
  
FOR_EACH_EDGE (e, ei, from-succs)
Index: gcc/testsuite/gcc.dg/torture/pr58326-1.c
===
*** gcc/testsuite/gcc.dg/torture/pr58326-1.c(revision 0)
--- gcc/testsuite/gcc.dg/torture/pr58326-1.c(working copy)
***
*** 0 
--- 1,23 
+ /* { dg-do compile } */
+ 
+ int a, *d; 
+ long b;
+ short c;
+ 
+ void foo ()
+ {
+   int e;
+ lbl:
+   for (c = 0; c  2; c++)
+ {
+   if (1  b)
+   break;
+   e = *d;
+   for (; a; a++)
+   {
+ *d = e;
+ if (b)
+   goto lbl;
+   }
+ }
+ }
Index: gcc/testsuite/gcc.dg/torture/pr58326-2.c
===
*** gcc/testsuite/gcc.dg/torture/pr58326-2.c(revision 0)
--- gcc/testsuite/gcc.dg/torture/pr58326-2.c(working copy)
***
*** 0 
--- 1,20 
+ /* { dg-do compile } */
+ 
+ int a, b, c, d;
+ 
+ void foo ()
+ {
+   int e;
+ 
+ lbl:
+   for (c = 0; c  2; c++)
+ {
+   e = d;
+   for (; a; a++)
+   {
+ d = e;
+ if (b)
+   goto lbl; 
+   }
+ }
+ }


Re: [C++ Patch] PR 43452

2013-09-09 Thread Jason Merrill

OK, thanks.

Jason


Re: [wide-int] Fix mode choice in convert_modes

2013-09-09 Thread Richard Biener
On Mon, Sep 9, 2013 at 3:11 PM, Richard Sandiford
rdsandif...@googlemail.com wrote:
 Enabling the CONST_INT assert showed that this branch-local code was
 passing the wrong mode to std::make_pair.  The mode of X is OLDMODE,
 and we're supposed to be converting it to MODE.

 In the case where OLDMODE is VOIDmode, I think we have to assume
 that every bit of the input is significant.

 Tested on x86_64-linux-gnu.  OK to install?

 This and the other patches that I just committed to trunk are enough
 for a bootstrap and regression test to pass on x86_64-linux-gnu with
 the assertion for a canonical CONST_INT enabled.  I think it would be
 better to leave the assertion out until after the merge though,
 so we can deal with any fallout separately.

If consuming code assumes CONST_INTs are canonical we better leave it
in though.

Richard.

 Thanks,
 Richard


 Index: gcc/expr.c
 ===
 --- gcc/expr.c  2013-09-09 11:32:33.734617409 +0100
 +++ gcc/expr.c  2013-09-09 11:45:27.381001353 +0100
 @@ -714,13 +714,14 @@ convert_modes (enum machine_mode mode, e
 GET_MODE_CLASS (mode) == MODE_INT
 (oldmode == VOIDmode || GET_MODE_CLASS (oldmode) == MODE_INT))
  {
 -  wide_int w = std::make_pair (x, mode);
/* If the caller did not tell us the old mode, then there is
 -not much to do with respect to canonization.  */
 -  if (oldmode != VOIDmode
 -  GET_MODE_PRECISION (mode)  GET_MODE_PRECISION (oldmode))
 -   w = wi::ext (w, GET_MODE_PRECISION (oldmode),
 -unsignedp ? UNSIGNED : SIGNED);
 +not much to do with respect to canonization.  We have to assume
 +that all the bits are significant.  */
 +  if (oldmode == VOIDmode)
 +   oldmode = MAX_MODE_INT;
 +  wide_int w = wide_int::from (std::make_pair (x, oldmode),
 +  GET_MODE_PRECISION (mode),
 +  unsignedp ? UNSIGNED : SIGNED);
return immed_wide_int_const (w, mode);
  }



Re: PR bootstrap/58340

2013-09-09 Thread Jeff Law

On 09/09/2013 03:26 AM, Gerald Pfeifer wrote:

On Sun, 8 Sep 2013, Jeff Law wrote:

This fixes the problem noted by Zhendong in c#4.  I'll be working
through other reports of issues related to the tree-ssa-threadedge.c
to see if there's any lingering problems.


It also fixes my original report, PR bootstrap/58340 (on
x86_64-unknown-freebsd).
Thanks for the confirmation.  The patch likely fixed the ia64 
mis-compare, but it'll be hours before I know that for sure.


I'm going to assume the the darwin and other x86_64 linux failures are 
fixed as well.  While they never failed for me, I can easily speculate 
how that bug would result incorrect codegen leading to a stage3 failure 
of that nature.


jeff


Re: [PATCH][AArch64] Fix FAIL: gcc.target/aarch64/cmn.c scan-assembler cmn\tw[0-9]

2013-09-09 Thread Richard Earnshaw
On 09/09/13 13:50, Kyrylo Tkachov wrote:
 Hi Richard,
 
 On 29/07/13 14:58, Kyrylo Tkachov wrote:
 Hi all,

 Now that combine emits the canonical form for (eq (neg x) (y)) instead
 of (eq
 (x) (neg y)), this patch fixes up the corresponding pattern in aarch64
 to
 match that. This enables combine to properly generate the cmn
 instruction
 where appropriate.

 Tested aarch64-none-elf on model.

 Ok for trunk?


 No, this is wrong for inequalities, since in reality it's the second
 operand that's inverted.

 You'll need to use CC_SWP mode and then arrange for this to be correctly
 picked by select_cc_mode.

 
 How's this?
 
 aarch64_select_cc_mode is updated to return CC_SWPmode for these comparisons 
 and
 the MD pattern is updated accordingly.
 
 Tested aarch64-none-elf on a model.
 
 Is this ok?
 
 Thanks,
 Kyrill
 
 2013-09-09  Kyrylo Tkachov  kyrylo.tkac...@arm.com
 
   * config/aarch64/aarch64.c (aarch64_select_cc_mode): Return CC_SWP for
   comparison with negated operand.
   * config/aarch64/aarch64.md (compare_negmode): Match canonical RTL 
 form.
   
 2013-09-09  Kyrylo Tkachov  kyrylo.tkac...@arm.com
 
   * gcc.target/aarch64/cmn-neg.c: New test.
 
 
 

OK.

R.




Re: [wide-int] Fix mode choice in convert_modes

2013-09-09 Thread Richard Sandiford
Richard Biener richard.guent...@gmail.com writes:
 On Mon, Sep 9, 2013 at 3:11 PM, Richard Sandiford
 rdsandif...@googlemail.com wrote:
 Enabling the CONST_INT assert showed that this branch-local code was
 passing the wrong mode to std::make_pair.  The mode of X is OLDMODE,
 and we're supposed to be converting it to MODE.

 In the case where OLDMODE is VOIDmode, I think we have to assume
 that every bit of the input is significant.

 Tested on x86_64-linux-gnu.  OK to install?

 This and the other patches that I just committed to trunk are enough
 for a bootstrap and regression test to pass on x86_64-linux-gnu with
 the assertion for a canonical CONST_INT enabled.  I think it would be
 better to leave the assertion out until after the merge though,
 so we can deal with any fallout separately.

 If consuming code assumes CONST_INTs are canonical we better leave it
 in though.

Yeah, I think the patch Kenny's working on is going to canonise CONST_INTs
via the scratch array, to avoid making any assumptions.  I think we should
keep that for the merge and only swap it for an assert later.

Thanks,
Richard


Re: Problems with LRA for KNF/KNC in GCC 4.9

2013-09-09 Thread Vladimir Makarov

On 13-08-22 9:37 AM, Andrey Turetskiy wrote:

Hi Vladimir,

I'm trying to apply KNF and KNC changes for GCC 4.9 (they have been
applied for 4.7 before), but I have some problems with LRA.
I've tried to build cross compiler for KNC. During the compiling of
libgcc/libgcc2.c the compiler becomes locked inside the loop in
function lra () and can't quit it. There is no error messages, just
infinite looping.
Could you please advise me, what I need to fix?
Patch and reload dump (first few iterations) are in attach.


Andrey, I've just wanted to start to work on your problem. 
Unfortunately, I can not apply your patch as it conflicts with avx512 
work recently added.


So could you send me an updated patch and the preprocessed file on which 
LRA cycles.


Please use gcc-patches mailing list as it more fit for your problem.

Thanks.



Re: [PATCH 1/4] Support lambda templates.

2013-09-09 Thread Jason Merrill

On 09/09/2013 03:22 AM, Adam Butcher wrote:

I've attached a patch that handles parameter packs in the conversion
op.  I thought it best to get this reviewed independently before rolling
up into the 'Support lambda templates' patch.

Do you think it's the right idea?  It seems to work okay but I've
reworked the way the template op call is built (and built a separate
call for the decltype expression).


Yes, that looks fine.

Jason




Re: [wide-int] Fix mode choice in convert_modes

2013-09-09 Thread Kenneth Zadeck

On 09/09/2013 09:56 AM, Richard Sandiford wrote:

Richard Biener richard.guent...@gmail.com writes:

On Mon, Sep 9, 2013 at 3:11 PM, Richard Sandiford
rdsandif...@googlemail.com wrote:

Enabling the CONST_INT assert showed that this branch-local code was
passing the wrong mode to std::make_pair.  The mode of X is OLDMODE,
and we're supposed to be converting it to MODE.

In the case where OLDMODE is VOIDmode, I think we have to assume
that every bit of the input is significant.

Tested on x86_64-linux-gnu.  OK to install?

This and the other patches that I just committed to trunk are enough
for a bootstrap and regression test to pass on x86_64-linux-gnu with
the assertion for a canonical CONST_INT enabled.  I think it would be
better to leave the assertion out until after the merge though,
so we can deal with any fallout separately.

If consuming code assumes CONST_INTs are canonical we better leave it
in though.

Yeah, I think the patch Kenny's working on is going to canonise CONST_INTs
via the scratch array, to avoid making any assumptions.  I think we should
keep that for the merge and only swap it for an assert later.

Thanks,
Richard

yes, this is what my patch does now.


Re: [PATCH v2 00/18] resurrect automatic dependencies

2013-09-09 Thread Tom Tromey
 Tom == Tom Tromey tro...@redhat.com writes:

Tom This is version 3 of my series to resurrect automatic dependencies for
Tom GCC.  Version 2 is here:
Tom http://gcc.gnu.org/ml/gcc-patches/2013-07/msg01386.html

Tom Ordinarily I would simply ping the existing patches, but Alexandre
Tom asked me to resend the series and CC him.

Ping.

I've rebased the series locally.  I can re-send it if you want.  Just
let me know; I don't want to spam the list.

Tom


[c++-concepts] pretty print fix

2013-09-09 Thread Andrew Sutton
The last merge didn't compile for me. Apparently some pretty printing
functions have disappeared that were being called in the concept
stuff. This patch just replaces the broken calls to pp_cxx_type_id
with pp-type_id.

Ok to commit?

Andrew Sutton


fix.patch
Description: Binary data


[AArch64] obvious - Fix parameter to vrsqrte_f64

2013-09-09 Thread James Greenhalgh

Hi,

vrsqrte_f64 is currently defined to take a float64x2_t, but it should
take a float64x1_t.

I've committed the attached, obvious fix as revision 202407.

James

---
2013-09-09  James Greenhalgh  james.greenha...@arm.com

* config/aarch64/arm_neon.h (vrsqrte_f64): Fix parameter type.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index ac94516..23b1116 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -12764,11 +12764,11 @@ vrsqrte_f32 (float32x2_t a)
   return result;
 }
 
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vrsqrte_f64 (float64x2_t a)
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrsqrte_f64 (float64x1_t a)
 {
-  float64x2_t result;
-  __asm__ (frsqrte %0.2d,%1.2d
+  float64x1_t result;
+  __asm__ (frsqrte %d0,%d1
: =w(result)
: w(a)
: /* No clobbers */);

Re: [RS6000] powerpc64 -mcmodel=medium large symbol offsets

2013-09-09 Thread David Edelsohn
On Mon, Sep 9, 2013 at 5:07 AM, Alan Modra amo...@gmail.com wrote:
 Revised patch with testcase.  This one also fixes a small problem with
 reg_or_add_cint_operand in that any 32-bit value is valid for SImode.
 Compare with reg_or_sub_cint_operand.

 Bootstrapped and regression tested powerpc64-linux.  OK to apply?

 gcc/
 * config/rs6000/predicates.md (add_cint_operand): New.
 (reg_or_add_cint_operand): Use add_cint_operand.
 * config/rs6000/rs6000.md (largetoc_high_plus): Restrict offset
 using add_cint_operand.
 (largetoc_high_plus_aix, small_toc_ref): Likewise.
 gcc/testsuite/
 * gcc.target/powerpc/medium_offset.c: New.

Okay. This seems much better.

Thanks, David


RE: [PATCH GCC]Catch more MEM_REFs sharing common addressing part in gimple strength reduction

2013-09-09 Thread Bill Schmidt
On Mon, 2013-09-09 at 14:25 +0800, bin.cheng wrote:
 Thanks for reviewing, I will correct all stupid spelling problem in the next 
 version of patch.
 
 On Mon, Sep 9, 2013 at 8:15 AM, Bill Schmidt wschm...@linux.vnet.ibm.com 
 wrote:
 
 +   int (i * S).
 +   Otherwise, just return double int zero.  */
 
  This is sufficient, since you are properly checking the next_interp
  chain.  Another possible form would be
 
  X = (B + i) * 1,
 
  but if this is present, then one of the forms you're checking for should
  also be present, so there's no need to check the MULT_CANDs.
 I'm not very sure here since I didn't check MULT_CAND in the patch.  Could 
 you please explain more about this?

Sorry, perhaps I shouldn't have mentioned it.  I was simply stating
that, although a candidate representing B + i could be represented with
a CAND_MULT as shown, there is no need for you to check it (as you
don't) since there will also be a corresponding CAND_ADD in one of the
other forms.  Since you are walking the next_interp chain, this works.

In other words, the code is fine as is.  I was just thinking out loud
about other candidate types.

 
 
 +
 +static double_int
 +backtrace_base_for_ref (tree *pbase)
 +{
 +  tree base_in = *pbase;
 +  slsr_cand_t base_cand;
 +
 +  STRIP_NOPS (base_in);
 +  if (TREE_CODE (base_in) != SSA_NAME)
 +return tree_to_double_int (integer_zero_node);
 +
 +  base_cand = base_cand_from_table (base_in);
 +
 +  while (base_cand  base_cand-kind != CAND_PHI)
 +{
 +  if (base_cand-kind == CAND_ADD
 +base_cand-index.is_one ()
 +TREE_CODE (base_cand-stride) == INTEGER_CST)
 + {
 +   /* X = B + (1 * S), S is integer constant.  */
 +   *pbase = base_cand-base_expr;
 +   return tree_to_double_int (base_cand-stride);
 + }
 +  else if (base_cand-kind == CAND_ADD
 + TREE_CODE (base_cand-stride) == INTEGER_CST
 + integer_onep (base_cand-stride))
 +{
 +   /* X = B + (i * S), S is integer one.  */
 +   *pbase = base_cand-base_expr;
 +   return base_cand-index;
 + }
 +
 +  if (base_cand-next_interp)
 + base_cand = lookup_cand (base_cand-next_interp);
 +  else
 + base_cand = NULL;
 +}
 +
 +  return tree_to_double_int (integer_zero_node);
 +}
 +
  /* Look for the following pattern:
 
  *PBASE:MEM_REF (T1, C1)
 @@ -767,8 +818,15 @@ slsr_process_phi (gimple phi, bool speed)
 
  *PBASE:T1
  *POFFSET:  MULT_EXPR (T2, C3)
 -*PINDEX:   C1 + (C2 * C3) + C4  */
 +*PINDEX:   C1 + (C2 * C3) + C4
 
 +   When T2 is recorded by an CAND_ADD in the form of (T2' + C5), It
   ^  ^
   a  it
 
 +   will be further restructured to:
 +
 +*PBASE:T1
 +*POFFSET:  MULT_EXPR (T2', C3)
 +*PINDEX:   C1 + (C2 * C3) + C4 + (C5 * C3)  */
 +
  static bool
  restructure_reference (tree *pbase, tree *poffset, double_int
  *pindex,
  tree *ptype)
 @@ -777,7 +835,7 @@ restructure_reference (tree *pbase, tree *poffset,
double_int index = *pindex;
double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
tree mult_op0, mult_op1, t1, t2, type;
 -  double_int c1, c2, c3, c4;
 +  double_int c1, c2, c3, c4, c5;
 
if (!base
|| !offset
 @@ -823,11 +881,12 @@ restructure_reference (tree *pbase, tree
  *poffset,
  }
 
c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
 +  c5 = backtrace_base_for_ref (t2);
 
*pbase = t1;
 -  *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
 -   double_int_to_tree (sizetype, c3));
 -  *pindex = c1 + c2 * c3 + c4;
 +  *poffset = size_binop (MULT_EXPR, fold_convert (sizetype, t2),
 +  double_int_to_tree (sizetype, c3));
 
  I am not sure why you changed this call.  fold_build2 is a more
  efficient call than size_binop.  size_binop makes several checks that
  will fail in this case, and then calls fold_build2_loc, right?  Not a
  big deal but seems like changing it back would be better.  Perhaps I'm
  missing something (as usual ;).
 I rely on size_binop to convert T2 into sizetype, because T2' may be in other 
 kind of type.  Otherwise there will be ssa_verify error later.

OK, I see now.  I had thought this was handled by fold_build2, but
apparently not.  I guess all T2's formerly handled were already sizetype
as expected.  Thanks for the explanation!

Bill

 
 Thanks.
 bin
 
 
 
 



  1   2   >