decl, optimize) > 2
> || gimple_call_must_tail_p (ocall)
> - || (optimize == 2 && gimple_call_tail_p (ocall)))
> + || (opt_for_fn (e->caller->decl, optimize) == 2
> + && gimple_call_tail_p (ocall)))
>
On Thu, Dec 14, 2023 at 8:53 PM Alexandre Oliva wrote:
>
>
> The strub builtins are not suited for cross-unit inlining, they should
> only be inlined by the builtin expanders, if at all. While testing on
> sparc64, it occurred to me that, if libgcc was built with LTO enabled,
> lto1 might inline
When BB reduction vectorization picks up a chain with an ASM def
in it and that's inside the vectorized region we fail to get its
LHS. Instead of trying to get the correct def the following
avoids vectorizing such def and instead keeps it as def to add
in the epilog.
Bootstrapped and tested on
le, Each only need a single SELECT_VL
> + or MIN since beginning to calculate the number of elements
> + need to be processed in current iteration. */
> + length_update_cost = 1;
> else
> /* For increment IV stype, Each may need two MINs and one MINUS to
>update lengths in body for next iteration. */
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
The following makes the attempt at code-generating a constant/external
SLP node twice well-formed as that can happen when partitioning BB
vectorization attempts where we keep constants/externals unpartitioned.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
PR
On Wed, Dec 13, 2023 at 5:51 PM Andrew Pinski wrote:
>
> After r14-2667-gceae1400cf24f329393e96dd9720, we force a constant to a
> register
> if it is shared with one of the other operands. The problem is used the
> comparison
> mode for the register but that could be different from the operand
BRANCH_EDGE (gimple_bb (cond_stmt))->dest))
> +{
> + new_code = EQ_EXPR;
> + cst = build_minus_one_cst (vectype);
> +}
> +
> + gimple_cond_set_condition (cond_stmt, new_code, new_temp, cst);
> + update_stmt (stmt);
> +
> + if (slp_node)
> +SLP_TREE_VEC_DEFS (slp_node).truncate (0);
> + else
> +STMT_VINFO_VEC_STMTS (stmt_info).truncate (0);
> +
> + if (!slp_node)
> +*vec_stmt = stmt;
> +
> + return true;
> +}
> +
> /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
> can handle all live statements in the node. Otherwise return true
> if STMT_INFO is not live or if vectorizable_live_operation can handle it.
> @@ -12949,7 +13154,9 @@ vect_analyze_stmt (vec_info *vinfo,
> || vectorizable_lc_phi (as_a (vinfo),
> stmt_info, NULL, node)
> || vectorizable_recurr (as_a (vinfo),
> -stmt_info, NULL, node, cost_vec));
> +stmt_info, NULL, node, cost_vec)
> + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
> + cost_vec));
>else
> {
>if (bb_vinfo)
> @@ -12972,7 +13179,10 @@ vect_analyze_stmt (vec_info *vinfo,
>NULL, NULL, node, cost_vec)
> || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
> cost_vec)
> - || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
> + || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
> + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
> + cost_vec));
> +
> }
>
>if (node)
> @@ -13131,6 +13341,12 @@ vect_transform_stmt (vec_info *vinfo,
>gcc_assert (done);
>break;
>
> +case loop_exit_ctrl_vec_info_type:
> + done = vectorizable_early_exit (vinfo, stmt_info, gsi, _stmt,
> + slp_node, NULL);
> + gcc_assert (done);
> + break;
> +
> default:
>if (!STMT_VINFO_LIVE_P (stmt_info))
> {
> @@ -14321,6 +14537,14 @@ vect_get_vector_types_for_stmt (vec_info *vinfo,
> stmt_vec_info stmt_info,
> }
>else
> {
> + /* If we got here with a gcond it means that the target had no
> available vector
> + mode for the scalar type. We can't vectorize so abort. */
> + if (is_a (stmt))
> + return opt_result::failure_at (stmt,
> +"not vectorized:"
> +" unsupported data-type for gcond %T\n",
> +scalar_type);
> +
>if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
> scalar_type = TREE_TYPE (DR_REF (dr));
>else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
ortant to decide between SELECT_VL and not
SELECT_VL then we'd need to start modeling address cost _at all_.
Richard.
> Thanks.
>
>
>
> juzhe.zh...@rivai.ai
>
> From: Richard Biener
> Date: 2023-12-14 18:46
> To: juzhe.zhong
> CC: gcc-patches; richard.sandiford;
))
> + /* For decrement IV style, we use a single SELECT_VL since
> + beginning to calculate the number of elements need to be
> + processed in current iteration, and a SHIFT operation to
> + compute the next memory address instead of adding vectorization
> + factor. */
>
> Am 14.12.2023 um 08:37 schrieb Jakub Jelinek :
>
> Hi!
>
> On top of the previously posted patch, this simplifies say (x * 16) / (x * 4)
> into 4. Unlike the previous pattern, this is something we didn't fold
> previously on GENERIC, so I think it shouldn't be all wrapped with #if
>
> Am 14.12.2023 um 08:35 schrieb Jakub Jelinek :
>
> Hi!
>
> The following testcase is optimized just on GENERIC (using
> strict_overflow_p = false;
> if (TREE_CODE (arg1) == INTEGER_CST
> && (tem = extract_muldiv (op0, arg1, code, NULL_TREE,
>
Pushed.
PR tree-optimization/110640
* gcc.dg/torture/pr110640.c: New testcase.
---
gcc/testsuite/gcc.dg/torture/pr110640.c | 22 ++
1 file changed, 22 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/torture/pr110640.c
diff --git
> Am 13.12.2023 um 17:12 schrieb Filip Kastl :
>
>
>>
Hi,
this is a patch that I submitted two months ago as an RFC. I added some
polish
since.
It is a new lightweight pass that removes redundant PHI functions and as a
bonus does basic copy
> Am 13.12.2023 um 17:07 schrieb Martin Jambor :
>
> Hi,
>
> sorry for getting to this only so late, my email backlog from my medical
> leave still isn't empty.
>
>> On Mon, Oct 16 2023, Richard Biener wrote:
>> The following addresses build_reconstruc
The following defers, for non-gather/scatter and non-pattern stmts,
setting of STMT_VINFO_VECTYPE until after we computed the desired
vectorization factor. This allows us to use larger vector types
when the vectorization factor and the preferred vector mode allow,
reducing the number of vector
The gather_load optab and friends require the offset vector mode to
have the same number of lanes as the data vector mode. Restrict the
vector type query to that when searching for a proper offset type.
* tree-vect-data-refs.cc (vect_gather_scatter_fn_p):
Use
The following changes the unsigned group_size argument to a poly_uint64
one to avoid too much special-casing in callers for VLA vectors when
passing down the effective maximum desirable vector size to vector
type query routines. The intent is to be able to pass down
the vectorization factor
The following makes sure to keep LOOP_VINFO_VECT_FACTOR at the
indetermined value zero until it is final, making LOOP_VINFO_VECT_FACTOR
an rvalue and changing some direct references to use the macro.
* tree-vectorizer.h (LOOP_VINFO_VECT_FACTOR): Make an rvalue.
* tree-vect-loop.cc
This reduces more calls to get_vectype_for_scalar_type.
* tree-vect-loop.cc (vect_transform_cycle_phi): Specify
the vector type for invariant/external defs.
* tree-vect-stmts.cc (vectorizable_shift): For invariant
or external shifted operands use the result vector
The following removes get_vectype_for_scalar_type calls when we
already have the vector type computed. It also avoids some
premature and possibly redundant or unnecessary check during
data-ref analysis for gathers.
* tree-vect-data-refs.cc (vect_analyze_data_refs): Do
not check
I've been asked to look into how to best relax the current restriction
of the vectorizer that it prefers to use a single vector size throughout
loop vectorization. That size is determined by the preferred_simd_mode
and the autovectorize_vector_modes hook for other-than-first iterations.
The
hat should be
>real warnings (e.g., testsuite/gcc.dg/pr18501.c). */
>NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);
> + NEXT_PASS (pass_sccopy);
>NEXT_PASS (pass_tail_calls);
>/* Split critical edges before late uninit warning to redu
beginning to calculate the number of elements need to be
> + processed in current iteration, and a SHIFT operation to
> + compute the next memory address instead of adding vectorization
> + factor. */
> + length_update_cost = 2;
&
m_data.pop ();
> m_data[m_data_cnt + 1] = t;
> --- gcc/testsuite/gcc.dg/bitint-53.c.jj 2023-12-12 18:28:16.203949817
> +0100
> +++ gcc/testsuite/gcc.dg/bitint-53.c 2023-12-12 18:27:47.307342133 +0100
> @@ -0,0 +1,17 @@
> +/* PR tree-optimization/112940 */
>
When investigating PR111591 with respect to TBAA and stack slot sharing
I noticed we're eventually scrapping a [TARGET_]MEM_REF offset when
rewriting the VAR_DECL base of the MEM_EXPR to use a pointer to the
partition instead. The following makes sure to preserve that.
Bootstrapped and tested on
On Wed, Dec 13, 2023 at 9:14 AM Di Zhao OS
wrote:
>
> Hello Richard,
>
> > -Original Message-
> > From: Richard Biener
> > Sent: Monday, December 11, 2023 7:01 PM
> > To: Di Zhao OS
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [PATC
On Wed, Dec 13, 2023 at 4:05 AM Alexandre Oliva wrote:
>
> On Dec 12, 2023, Richard Biener wrote:
>
> > On Tue, Dec 12, 2023 at 3:03 AM Alexandre Oliva wrote:
>
> >> DECL_NOT_GIMPLE_REG_P (arg) = 0;
>
> > I wonder why you clear this at all?
>
> That co
.cc.jj 2023-12-09 10:19:57.809813529 +0100
> +++ gcc/attribs.cc2023-12-11 09:55:28.894461698 +0100
> @@ -315,7 +315,7 @@ void
> free_attr_data ()
> {
>for (auto x : ignored_attributes_table)
> -delete[] x;
> +delete x;
>ignored_attributes_table.release ();
>
The following avoids creating an unsupported VEC_PERM after vector
lowering from the pattern merging a bit-insert from a bit-field-ref
to a VEC_PERM. For the already existing s390 testcase we get
TImode vectors which later ICE during attempted expansion of
a vec_perm_const.
Pushed accidentially
The following does away with the fake edge adding as in the original
PR112961 fix and instead exposes handling of entry PHIs as additional
parameter of the region VN run.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
PR tree-optimization/112991
PR
While tidying the prototype patch I've done for the reduced testcase
in PR111591 and in that process trying to produce a testcase that
is miscompiled by stack slot coalescing and the TBAA info that
remains un-altered I've realized we do not need to adjust TBAA info.
The following documents this
On Tue, 12 Dec 2023, Peter Bergner wrote:
> On 12/12/23 8:36 PM, Jason Merrill wrote:
> > This test is failing for me below C++17, I think you need
> >
> > // { dg-do compile { target c++17 } }
> > or
> > // { dg-require-effective-target c++17 }
>
> Sorry about that. Should we do the above or
> Am 12.12.2023 um 19:51 schrieb Peter Bergner :
>
> On 12/12/23 12:45 PM, Peter Bergner wrote:
>> +/* PR target/112822 */
>
> Oops, this should be:
>
> /* PR tree-optimization/112822 */
>
> It's fixed on my end.
Ok
Richard
> Peter
>
>
>
>
> Am 12.12.2023 um 17:50 schrieb Martin Jambor :
>
> Hi,
>
> PR 112822 revealed a corner case in load_assign_lhs_subreplacements
> where it creates invalid gimple: an assignment where on the LHS there
> is a complex variable which however is not a gimple register because
> it has partial
On Tue, Dec 12, 2023 at 3:38 PM Jan Hubicka wrote:
>
> Hi,
> this patch disables use of FMA in matrix multiplication loop for generic (for
> x86-64-v3) and zen4. I tested this on zen4 and Xenon Gold Gold 6212U.
>
> For Intel this is neutral both on the matrix multiplication microbenchmark
>
The following makes sure to also process the (empty) latch when
performing CSE on the if-converted loop body. That's important
to get all uses of copies propagated out on the backedge as well.
To avoid CSE on the PHI nodes itself which is prohibitive
(see PR90402) this temporarily adds a fake
On Tue, 12 Dec 2023, Richard Sandiford wrote:
> Richard Biener writes:
> > The following aovids over/under-read of storage when vectorizing
> > a non-grouped load with SLP. Instead of forcing peeling for gaps
> > use a smaller load for the last vector which might acces
On Tue, Dec 12, 2023 at 10:05 AM Florian Weimer wrote:
>
> * Richard Biener:
>
> > If it were possible I'd axe x86_64-v4. Maybe we should add a x86_64-v3.5
> > that sits inbetween v3 and v4, offering AVX512 but restricted to 256bit
> > (and obviously not requiring
On Tue, Dec 12, 2023 at 7:12 AM liuhongt wrote:
>
> x86 doesn't support horizontal reduction instructions, reduc_op_scal_m
> is emulated with vec_extract_half + op(half vector length)
> Take that into account when calculating cost for vectorization.
>
> Bootstrapped and regtested on
On Tue, Dec 12, 2023 at 3:03 AM Alexandre Oliva wrote:
>
>
> When generating code for an internal strub wrapper, don't clear the
> DECL_NOT_GIMPLE_REG_P flag of volatile args, and gimplify them both
> before and after any conversion.
>
> While at that, move variable TMP into narrower scopes so
On Tue, 12 Dec 2023, Richard Sandiford wrote:
> Richard Biener writes:
> > On Mon, 11 Dec 2023, Tamar Christina wrote:
> >> @@ -5553,6 +5554,83 @@ integer_type_for_mask (tree var, vec_info *vinfo)
> >>return build_nonstandard_integer_type (def_stmt_in
The following aovids over/under-read of storage when vectorizing
a non-grouped load with SLP. Instead of forcing peeling for gaps
use a smaller load for the last vector which might access excess
elements. This builds upon the existing optimization avoiding
peeling for gaps, generalizing it to
cost_vec));
> +
> }
>
>if (node)
> @@ -13131,6 +13345,12 @@ vect_transform_stmt (vec_info *vinfo,
> gcc_assert (done);
>break;
>
> +case loop_exit_ctrl_vec_info_type:
> + done = vectorizable_early_exit (vinfo, stmt_info, gsi, _stmt,
> + slp_node, NULL);
> + gcc_assert (done);
> + break;
> +
> default:
>if (!STMT_VINFO_LIVE_P (stmt_info))
> {
> @@ -14321,10 +14541,19 @@ vect_get_vector_types_for_stmt (vec_info *vinfo,
> stmt_vec_info stmt_info,
> }
>else
> {
> + gcond *cond = NULL;
>if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
> scalar_type = TREE_TYPE (DR_REF (dr));
>else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
> scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
> + else if ((cond = dyn_cast (stmt)))
> + {
> + /* We can't convert the scalar type to boolean yet, since booleans
> have a
> + single bit precision and we need the vector boolean to be a
> + representation of the integer mask. So set the correct integer
> type and
> + convert to boolean vector once we have a vectype. */
> + scalar_type = TREE_TYPE (gimple_cond_lhs (cond));
You should get into the vect_use_mask_type_p (stmt_info) path for
early exit conditions (see above with regard to mask_precision).
> + }
>else
> scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
>
> @@ -14339,12 +14568,18 @@ vect_get_vector_types_for_stmt (vec_info *vinfo,
> stmt_vec_info stmt_info,
>"get vectype for scalar type: %T\n", scalar_type);
> }
>vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
> +
>if (!vectype)
> return opt_result::failure_at (stmt,
> "not vectorized:"
> " unsupported data-type %T\n",
> scalar_type);
>
> + /* If we were a gcond, convert the resulting type to a vector boolean
> type now
> + that we have the correct integer mask type. */
> + if (cond)
> + vectype = truth_type_for (vectype);
> +
which makes this moot.
Richard.
>if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
> }
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
The following builds upon the last fix, making sure we only value-number
to visited (un-)defs, otherwise prefer .VN_TOP.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
PR tree-optimization/112939
* tree-ssa-sccvn.cc (visit_phi): When all args are undefined
> Am 11.12.2023 um 20:12 schrieb Jason Merrill :
>
> On 12/11/23 03:02, Richard Biener wrote:
>>> On Sun, 10 Dec 2023, Jason Merrill wrote:
>>> On 12/10/23 05:22, Richard Biener wrote:
>>>>> Am 09.12.2023 um 21:13 schrieb Jason Merrill :
>>>
; 在 2023年12月11日星期一,Richard Biener 写道:
>>
>> On Sun, Dec 10, 2023 at 4:00 PM xndcn wrote:
>> >
>> > Hi, I am a newbie in GCC, and I do not have access to git repo.
>> >
>> > I found some misleading error messages in verify_gimple_assign_single
>
On Wed, Nov 29, 2023 at 3:36 PM Di Zhao OS
wrote:
>
> > -Original Message-
> > From: Richard Biener
> > Sent: Tuesday, November 21, 2023 9:01 PM
> > To: Di Zhao OS
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [PATCH v4]
_info stmt_info,
> loop_vec_info loop_vinfo,
>
>/* cond stmt other than loop exit cond. */
>gimple *stmt = STMT_VINFO_STMT (stmt_info);
> - if (is_a (stmt)
> + if (is_ctrl_stmt (stmt)
>&& LOOP_VINFO_LOOP_IV_COND (loop_vinfo) != stmt
>&& (!loop->inner
|| vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
> + cost_vec));
> +
> }
>
>if (node)
> @@ -13131,6 +13332,12 @@ vect_transform_stmt (vec_info *vinfo,
>gcc_assert (done);
>break;
>
&g
The following adds no_icf handling for variables where the attribute
was rejected. It also fixes the check for no_icf by checking both
the source and the targets decl.
Bootstrap / regtest running on x86_64-unknown-linux-gnu.
This would solve the AVR issue with merging of "progmem" attributed
On Mon, 11 Dec 2023, Tamar Christina wrote:
> > -Original Message-
> > From: Richard Biener
> > Sent: Monday, December 11, 2023 7:38 AM
> > To: Richard Sandiford
> > Cc: Tamar Christina ; gcc-patches@gcc.gnu.org; nd
> > ; j...@ventanamicro.com
>
s "-Og -fstrub=strict -fdump-rtl-expand" } */
> +/* { dg-options "-Og -fstrub=strict -fdump-rtl-expand -fno-stack-protector"
> } */
> /* { dg-require-effective-target strub } */
>
> /* At -Og, without -fno-inline, we fully expand enter, but neither update nor
>
> Jakub
>
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Mon, Dec 11, 2023 at 7:51 AM liuhongt wrote:
>
> > since you are looking at TYPE_PRECISION below you want
> > VECTOR_INTIEGER_TYPE_P here as well? The alternative
> > would be to compare TYPE_SIZE.
> >
> > Some of the checks feel redundant but are probably good for
> > documentation purposes.
On Mon, Dec 11, 2023 at 7:39 AM Xi Ruoyao wrote:
>
> Ping again.
OK, sorry for the delay.
Richard.
> On Fri, 2023-12-01 at 13:44 +0800, Xi Ruoyao wrote:
> > Ping.
> >
> > On Fri, 2023-11-24 at 17:09 +0800, Xi Ruoyao wrote:
> > > With -fno-fp-int-builtin-inexact, trunc is not allowed to raise
>
On Sun, Dec 10, 2023 at 8:57 PM Andrew Pinski wrote:
>
> From: Andrew Pinski
>
> The check for the type seems unnecessary and gets in the way sometimes.
> Also with a patch I am working on for match.pd, it causes a failure to happen.
> Before my patch the IR was:
> _1 = BIT_FIELD_REF ;
> _2
On Sun, Dec 10, 2023 at 8:57 PM Andrew Pinski wrote:
>
> When I moved two_value to match.pd, I removed the check for the {0,+-1}
> as I had placed it after the {0,+-1} case for cond in match.pd.
> In the case of {0,+-1} and non boolean, before we would optmize those
> case to just `(convert)a`
On Sun, 10 Dec 2023, Jason Merrill wrote:
> On 12/10/23 05:22, Richard Biener wrote:
> >> Am 09.12.2023 um 21:13 schrieb Jason Merrill :
> >>
> >> On 11/2/23 21:18, Nathaniel Shead wrote:
> >>> Bootstrapped and regtested on x86-64_pc_linux_gnu.
> &
On Sun, Dec 10, 2023 at 4:00 PM xndcn wrote:
>
> Hi, I am a newbie in GCC, and I do not have access to git repo.
>
> I found some misleading error messages in verify_gimple_assign_single
> function of tree-cfg.cc. It prompt error "invalid RHS for gimple memory
> store: ", but it checks lhs in
On Sun, Dec 10, 2023 at 10:30 AM Xi Ruoyao wrote:
>
> On Sun, 2023-12-10 at 01:21 -0800, Andrew Pinski wrote:
> > diff --git a/gcc/expr.cc b/gcc/expr.cc
> > index 6da51f2aca2..4686cacd22f 100644
> > --- a/gcc/expr.cc
> > +++ b/gcc/expr.cc
> > @@ -10209,8 +10209,9 @@ expand_expr_real_2 (sepops
On Sun, Dec 10, 2023 at 10:21 AM Andrew Pinski wrote:
>
> After r14-1655-g52c92fb3f40050 (and the other commits
> which touch zero_one_valued_p), we end up with a with
> `bool * a` but where the bool is an SSA name that might not
> have non-zero bits set on it (to 0x1) even though it
> does the
se
> > + basic_block bb = ip_normal_pos (loop);
> > + if (!bb)
> > +return NULL;
> > +
> > + edge exit = EDGE_SUCC (bb, 0);
> > + if (exit->dest == loop->latch)
> > +return EDGE_SUCC (bb, 1);
> > + return exit;
> > +#endif
> > }
> >
> > /* Function bb_in_loop_p
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Sat, Dec 9, 2023 at 8:05 AM Alexandre Oliva wrote:
>
> Scratch the previous one, the "slightly different version" I had before
> it was not entirely broken due to unnecessary, suboptimal and incorrect
> use of ctz. Here I have yet another implementation of that loop that
> should perform
On Sat, Dec 9, 2023 at 3:25 AM Alexandre Oliva wrote:
>
>
> smallest_int_mode_for_size may abort when the requested mode is not
> available. Call int_mode_for_size instead, that signals the
> unsatisfiable request in a more graceful way.
>
> Regstrapped on x86_64-linux-gnu. Ok to install?
>
>
>
On Wed, Nov 22, 2023 at 11:45 PM Jason Merrill wrote:
>
> Tested x86_64-pc-linux-gnu, OK for trunk?
OK
> -- 8< --
>
> -Waddress-of-packed-member, in addition to the documented warning about
> taking the address of a packed member, also warns about casting from
> a pointer to a TYPE_PACKED type
On Sat, Dec 9, 2023 at 3:09 AM Alexandre Oliva wrote:
>
> On Dec 7, 2023, Alexandre Oliva wrote:
>
> > Thanks for raising the issue. Maybe there should be at least a comment
> > there, and perhaps some asserts to check that pointer and reference
> > types don't make to indirect_parms.
>
>
> Am 10.12.2023 um 12:21 schrieb Alexander Monakov :
>
>
> On Sun, 10 Dec 2023, Richard Biener wrote:
>
>>> It seems wrong to me: CLOBBER_EOL is documented to mean that the storage is
>>> expiring at that point as well, which a (pseudo-)destructor does no
> Am 09.12.2023 um 21:13 schrieb Jason Merrill :
>
> On 11/2/23 21:18, Nathaniel Shead wrote:
>> Bootstrapped and regtested on x86-64_pc_linux_gnu.
>> I'm not entirely sure if the change I made to have destructors clobber with
>> CLOBBER_EOL instead of CLOBBER_UNDEF is appropriate, but
> Am 09.12.2023 um 10:35 schrieb Jakub Jelinek :
>
> Hi!
>
> This function is never called when param_l1_cache_line_size is 0,
> but it uses int and unsigned int variables to hold alignment in
> bits, so for large param_l1_cache_line_size it is zero and e.g.
> DECL_ALIGN () %
On Fri, 8 Dec 2023, Tamar Christina wrote:
> > -Original Message-
> > From: Richard Biener
> > Sent: Friday, December 8, 2023 10:28 AM
> > To: Tamar Christina
> > Cc: gcc-patches@gcc.gnu.org; nd ; j...@ventanamicro.com
> > Subject: R
The following avoids spurious uninit diagnostics for SSA name
copies which mostly appear when the source is marked as abnormal
which prevents copy propagation.
To prevent regressions I remove the bail out for anonymous SSA
names in the PHI arg place from warn_uninitialized_phi leaving
that to
8,12 @@ vect_transform_stmt (vec_info *vinfo,
>gcc_assert (done);
>break;
>
> +case loop_exit_ctrl_vec_info_type:
> + done = vectorizable_early_exit (vinfo, stmt_info, gsi, _stmt,
> + slp_node, NULL);
> + gcc_assert (done);
> + break;
> +
> default:
>if (!STMT_VINFO_LIVE_P (stmt_info))
> {
> @@ -14321,10 +14514,19 @@ vect_get_vector_types_for_stmt (vec_info *vinfo,
> stmt_vec_info stmt_info,
> }
>else
> {
> + gcond *cond = NULL;
>if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
> scalar_type = TREE_TYPE (DR_REF (dr));
>else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
> scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
> + else if ((cond = dyn_cast (stmt)))
> + {
> + /* We can't convert the scalar type to boolean yet, since booleans
> have a
> + single bit precision and we need the vector boolean to be a
> + representation of the integer mask. So set the correct integer
> type and
> + convert to boolean vector once we have a vectype. */
> + scalar_type = TREE_TYPE (gimple_cond_lhs (cond));
> + }
>else
> scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
>
> @@ -14339,12 +14541,18 @@ vect_get_vector_types_for_stmt (vec_info *vinfo,
> stmt_vec_info stmt_info,
>"get vectype for scalar type: %T\n", scalar_type);
> }
>vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
> +
>if (!vectype)
> return opt_result::failure_at (stmt,
> "not vectorized:"
> " unsupported data-type %T\n",
> scalar_type);
>
> + /* If we were a gcond, convert the resulting type to a vector boolean
> type now
> + that we have the correct integer mask type. */
> + if (cond)
> + vectype = truth_type_for (vectype);
> +
>if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
> }
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Thu, Nov 16, 2023 at 11:49 AM liuhongt wrote:
>
> Update in V2:
> 1) Add some comments before the pattern.
> 2) Remove ? from view_convert.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> When I'm working on PR112443, I notice there's some misoptimizations:
>
The following removes the second GIMPLE function dump after
remove_ssa_form which used to rewrite the IL with the coalescing
result but doesn't do so since a long time now.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
* tree-outof-ssa.cc (rewrite_out_of_ssa): Dump GIMPLE
cc.dg/bitint-52.c 2023-12-08 00:35:21.983205440 +0100
> @@ -0,0 +1,22 @@
> +/* PR tree-optimization/112902 */
> +/* { dg-do compile { target bitint } } */
> +/* { dg-options "-std=c23 -O2" } */
> +
> +double c;
> +#if __BITINT_MAXWIDTH__ >= 2048
> +_BitInt (512)
"-O2" } */
> +
> +float f;
> +#if __BITINT_MAXWIDTH__ >= 256
> +_BitInt(256) i;
> +
> +void
> +foo (void)
> +{
> + f *= 4 * i;
> +}
> +#endif
>
> Jakub
>
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
d.cc2023-12-07 11:57:17.869611646 +0100
> @@ -9044,7 +9044,7 @@ extend_h_i_d (void)
>if (reserve > 0
>&& ! h_i_d.space (reserve))
> {
> - h_i_d.safe_grow_cleared (3 * get_max_uid () / 2, true);
> + h_i_d.safe_grow_cleared (3U * get_max_uid () / 2, true);
>
On Thu, 7 Dec 2023, Hans-Peter Nilsson wrote:
> > Date: Mon, 4 Dec 2023 12:58:03 +0100 (CET)
> > From: Richard Biener
>
> > On Sat, 2 Dec 2023, Hans-Peter Nilsson wrote:
> > > > Date: Fri, 1 Dec 2023 08:07:14 +0100 (CET)
> > > > From: Rich
On Thu, Dec 7, 2023 at 6:52 PM Alexandre Oliva wrote:
>
> On Dec 7, 2023, Thomas Schwinge wrote:
>
> > Thank you for looking into this so promptly!
>
> You're welcome ;-)
>
>
> > during IPA pass: emutls
> > [...]/source-gcc/gcc/testsuite/c-c++-common/strub-unsupported-3.c:18:1:
> >
ion:
/* The following table lists the uses of each of the above flags and
for which types of nodes they are defined.
...
OK with that change. Let's see how it goes ...
Thanks,
Richard.
> Thanks,
> Hao
>
>
> From: Richard Biener
> Sent: Wednesday, Decem
On Thu, Dec 7, 2023 at 1:20 PM Richard Sandiford
wrote:
>
> Richard Biener writes:
> > On Wed, Dec 6, 2023 at 7:44 PM Philipp Tomsich
> > wrote:
> >>
> >> On Wed, 6 Dec 2023 at 23:32, Richard Biener
> >> wrote:
> >> >
> >>
On Mon, Dec 4, 2023 at 10:34 AM Uros Bizjak wrote:
>
> On Wed, Nov 29, 2023 at 1:25 PM Richard Biener
> wrote:
> >
> > On Wed, Nov 29, 2023 at 10:35 AM Uros Bizjak wrote:
> > >
> > > The compiler, configured with --enable-checking=yes,rtl,extra ICEs wi
On Thu, 7 Dec 2023, Jakub Jelinek wrote:
> On Thu, Dec 07, 2023 at 11:12:39AM +0100, Richard Biener wrote:
> > > 2023-12-07 Jakub Jelinek
> > >
> > > PR middle-end/112411
> > > * params.opt (-param=min-nondebug-insn-uid=): Add
> > >
> +/* { dg-skip-if "For 32-bit hosts such param is too much and even for 64-bit
> might require hundreds of GB of RAM" { *-*-* } { "--param
> min-nondebug-insn-uid=1073741824" } } */
>
> /*-*/
> /*--- Block sorting machinery ---*/
>
>
> Jakub
>
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
p-if "AArch64 does not support these bounds." { aarch64*-*-* } {
> "--param stack-clash-protection-*" } } */
> +/* { dg-skip-if "For 32-bit hosts such param is too much and even for 64-bit
> might require hundreds of GB of RAM" { *-*-* } { "--param
> min-nondebug-insn-uid=1073741824" } } */
>
> /*-*/
> /*--- Block sorting machinery ---*/
>
> Jakub
>
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Thu, Dec 7, 2023 at 8:54 AM Jakub Jelinek wrote:
>
> Hi!
>
> As can be seen in the second testcase, -fno-debug-cpp is actually
> implemented the same as -fdebug-cpp and so doesn't turn the debugging
> off.
>
> The following patch fixes that.
>
> Bootstrapped/regtested on x86_64-linux and
"-O2 -std=c23" } */
> +
> +struct S { _BitInt(64) b; };
> +
> +struct S
> +foo (_BitInt(64) p)
> +{
> + return (struct S) { p };
> +}
> +
> +#if __BITINT_MAXWIDTH__ >= 3924
> +struct T { _BitInt(3924) b; };
> +
> +struct T
> +bar (_BitInt(3924) p)
> +{
&
t; +bar (void)
> +{
> + __builtin_add_overflow (g, h, );
> + __builtin_sub_overflow (i, j, );
> + __builtin_mul_overflow (k, l, );
> +}
> +#endif
> +
> +_BitInt(32) m, n, o, p, q, r;
> +
> +void
> +baz (void)
> +{
> + __builtin_add_overflow (m, n, );
&g
On Wed, 6 Dec 2023, Andre Vieira (lists) wrote:
> Hi,
>
> This patch addresses the issue reported in PR target/112787 by improving the
> compute type selection. We do this by not considering types with more
> elements
> than the type we are lowering since we'd reject such types anyway.
>
>
Anagnostakis
> wrote:
>>
>> Hi Richard,
>>
>> thanks for the useful comments.
>>
>> On Wed, Dec 6, 2023 at 4:32 PM Richard Biener
>> wrote:
>>>
>>> On Wed, Dec 6, 2023 at 2:48 PM Manos Anagnostakis
>>> wrote:
>>> &
On Wed, Dec 6, 2023 at 7:44 PM Philipp Tomsich wrote:
>
> On Wed, 6 Dec 2023 at 23:32, Richard Biener
> wrote:
> >
> > On Wed, Dec 6, 2023 at 2:48 PM Manos Anagnostakis
> > wrote:
> > >
> > > This is an RTL pass that detects store forwarding f
On Thu, Dec 7, 2023 at 4:34 AM Alexandre Oliva wrote:
>
> On Dec 6, 2023, Alexandre Oliva wrote:
>
> > Disabling the runtime bits is easy, once we determine what condition we
> > wish to test for. I suppose testing for target support in the compiler,
> > issuing a 'sorry' in case the feature
On Wed, Dec 6, 2023 at 11:12 PM Alexandre Oliva wrote:
>
> On Dec 6, 2023, Thomas Schwinge wrote:
>
> > As I understand things, this cannot be implemented (at the call site) for
> > nvptx, given that the callee's stack is not visible there: PTX is unusual
> > in that the concept of a "standard"
On Wed, Dec 6, 2023 at 2:48 PM Manos Anagnostakis
wrote:
>
> This is an RTL pass that detects store forwarding from stores to larger loads
> (load pairs).
>
> This optimization is SPEC2017-driven and was found to be beneficial for some
> benchmarks,
> through testing on ampere1/ampere1a
On Wed, Dec 6, 2023 at 10:46 AM Hao Liu OS wrote:
>
> Hi,
>
> Update the patch to fix problems in the test case:
> - add "-details" option to the dump command
> - add dg-require and target filters to avoid potential failures on platforms
> that don't support vectorization.
Interesting simple
piler struct __emutls_object doesn't have
> + a union in there and is only created when actually needed for
> + the calls to the builtins, so the builtins are created with void *
> + arguments rather than struct __emutls_object *. Avoid
> + -Wbuiltin-declaration-mismatch warnings.
s)
> + return res;
> }
> else if (gcall *call = dyn_cast (stmt_vinfo->stmt))
> {
> @@ -820,6 +835,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info
> loop_vinfo, bool *fatal)
> return res;
> }
> }
> + else
> + gcc_unreachable ();
> }
>else
> FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
> @@ -13044,11 +13061,12 @@ vect_analyze_stmt (vec_info *vinfo,
>node_instance, cost_vec);
>if (!res)
> return res;
> - }
> +}
>
>switch (STMT_VINFO_DEF_TYPE (stmt_info))
> {
>case vect_internal_def:
> + case vect_condition_def:
> break;
>
>case vect_reduction_def:
> @@ -13081,6 +13099,7 @@ vect_analyze_stmt (vec_info *vinfo,
> {
>gcall *call = dyn_cast (stmt_info->stmt);
>gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
> + || gimple_code (stmt_info->stmt) == GIMPLE_COND
> || (call && gimple_call_lhs (call) == NULL_TREE));
>*need_to_vectorize = true;
> }
> @@ -13855,6 +13874,8 @@ vect_is_simple_use (vec_info *vinfo, stmt_vec_info
> stmt, slp_tree slp_node,
> else
> *op = gimple_op (ass, operand + 1);
> }
> + else if (gcond *cond = dyn_cast (stmt->stmt))
> + *op = gimple_op (cond, operand);
>else if (gcall *call = dyn_cast (stmt->stmt))
> *op = gimple_call_arg (call, operand);
>else
> @@ -14465,6 +14486,8 @@ vect_get_vector_types_for_stmt (vec_info *vinfo,
> stmt_vec_info stmt_info,
>*nunits_vectype_out = NULL_TREE;
>
>if (gimple_get_lhs (stmt) == NULL_TREE
> + /* Allow vector conditionals through here. */
> + && !is_a (stmt)
>/* MASK_STORE has no lhs, but is ok. */
>&& !gimple_call_internal_p (stmt, IFN_MASK_STORE))
> {
> @@ -14481,7 +14504,7 @@ vect_get_vector_types_for_stmt (vec_info *vinfo,
> stmt_vec_info stmt_info,
> }
>
>return opt_result::failure_at (stmt,
> - "not vectorized: irregular stmt.%G", stmt);
> + "not vectorized: irregular stmt: %G",
> stmt);
> }
>
>tree vectype;
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
e
> if STMT_INFO is not live or if vectorizable_live_operation can handle it.
> @@ -12949,7 +13123,9 @@ vect_analyze_stmt (vec_info *vinfo,
> || vectorizable_lc_phi (as_a (vinfo),
> stmt_info, NULL, node)
> || vectorizable_recurr (as_a (
;
> + /* Check if it's an induction and multiple exits. In this case there will
> be
> + a usage later on after peeling which is needed for the alternate exit.
> */
> + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
> + && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
> +{
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> +"vec_stmt_relevant_p: induction forced for "
> +"early break.\n");
> + *live_p = true;
> +
> +}
> +
>if (*live_p && *relevant == vect_unused_in_scope
>&& !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
> {
> @@ -1774,7 +1788,7 @@ compare_step_with_zero (vec_info *vinfo, stmt_vec_info
> stmt_info)
> /* If the target supports a permute mask that reverses the elements in
> a vector of type VECTYPE, return that mask, otherwise return null. */
>
> -static tree
> +tree
> perm_mask_for_reverse (tree vectype)
> {
>poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
> @@ -12720,20 +12734,27 @@ can_vectorize_live_stmts (vec_info *vinfo,
> stmt_vec_info stmt_info,
> bool vec_stmt_p,
> stmt_vector_for_cost *cost_vec)
> {
> + loop_vec_info loop_vinfo = dyn_cast (vinfo);
>if (slp_node)
> {
>stmt_vec_info slp_stmt_info;
>unsigned int i;
>FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
> {
> - if (STMT_VINFO_LIVE_P (slp_stmt_info)
> + if ((STMT_VINFO_LIVE_P (slp_stmt_info)
> +|| (loop_vinfo
> +&& LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
> +&& STMT_VINFO_DEF_TYPE (slp_stmt_info)
> + == vect_induction_def))
> && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
> slp_node_instance, i,
> vec_stmt_p, cost_vec))
> return false;
> }
> }
> - else if (STMT_VINFO_LIVE_P (stmt_info)
> + else if ((STMT_VINFO_LIVE_P (stmt_info)
> + || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
> + && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def))
> && !vectorizable_live_operation (vinfo, stmt_info,
> slp_node, slp_node_instance, -1,
> vec_stmt_p, cost_vec))
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index
> 15c7f75b1f3c61ab469f1b1970dae9c6ac1a9f55..974f617d54a14c903894dd20d60098ca259c96f2
> 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2248,6 +2248,7 @@ extern bool vect_is_simple_use (vec_info *,
> stmt_vec_info, slp_tree,
> enum vect_def_type *,
> tree *, stmt_vec_info * = NULL);
> extern bool vect_maybe_update_slp_op_vectype (slp_tree, tree);
> +extern tree perm_mask_for_reverse (tree);
> extern bool supportable_widening_operation (vec_info*, code_helper,
> stmt_vec_info, tree, tree,
> code_helper*, code_helper*,
>
--
Richard Biener
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Wed, 6 Dec 2023, Tamar Christina wrote:
> > -Original Message-
> > From: Richard Biener
> > Sent: Wednesday, December 6, 2023 8:32 AM
> > To: Tamar Christina
> > Cc: gcc-patches@gcc.gnu.org; nd ; j...@ventanamicro.com
> > Subject: Re: [PATCH 12/
On Wed, 6 Dec 2023, Tamar Christina wrote:
> > -Original Message-
> > From: Richard Biener
> > Sent: Wednesday, December 6, 2023 8:18 AM
> > To: Tamar Christina
> > Cc: gcc-patches@gcc.gnu.org; nd ; j...@ventanamicro.com
> > Subject: Re: [PATC
701 - 800 of 25078 matches
Mail list logo