date:20230814

From: Pan Li 

This patch would like to support the rounding mode API for the
VFCVT.XU.F.V as the below samples.

* __riscv_vfcvt_xu_f_v_u32m1_rm
* __riscv_vfcvt_xu_f_v_u32m1_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(vfcvt_xu_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfcvt_xu_frm): New intrinsic function definition.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-cvt-xu.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  |  2 ++
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  1 +
 .../riscv/rvv/base/float-point-cvt-xu.c   | 29 +++
 4 files changed, 33 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-xu.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 754a53efd3d..8eb89a05580 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -2482,6 +2482,7 @@ static CONSTEXPR const vmv_v vfmv_v_obj;
 static CONSTEXPR const vfcvt_x vfcvt_x_obj;
 static CONSTEXPR const vfcvt_x_frm vfcvt_x_frm_obj;
 static CONSTEXPR const vfcvt_x vfcvt_xu_obj;
+static CONSTEXPR const vfcvt_x_frm vfcvt_xu_frm_obj;
 static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_x_obj;
 static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_xu_obj;
 static CONSTEXPR const vfcvt_f vfcvt_f_obj;
@@ -2732,6 +2733,7 @@ BASE (vfmv_v)
 BASE (vfcvt_x)
 BASE (vfcvt_x_frm)
 BASE (vfcvt_xu)
+BASE (vfcvt_xu_frm)
 BASE (vfcvt_rtz_x)
 BASE (vfcvt_rtz_xu)
 BASE (vfcvt_f)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 50a7d7ffb6f..98b61655692 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -207,6 +207,7 @@ extern const function_base *const vfmv_v;
 extern const function_base *const vfcvt_x;
 extern const function_base *const vfcvt_x_frm;
 extern const function_base *const vfcvt_xu;
+extern const function_base *const vfcvt_xu_frm;
 extern const function_base *const vfcvt_rtz_x;
 extern const function_base *const vfcvt_rtz_xu;
 extern const function_base *const vfcvt_f;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 8b6a7cc49f3..613bbe7a855 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -446,6 +446,7 @@ DEF_RVV_FUNCTION (vfcvt_f, alu, full_preds, i_to_f_x_v_ops)
 DEF_RVV_FUNCTION (vfcvt_f, alu, full_preds, u_to_f_xu_v_ops)
 
 DEF_RVV_FUNCTION (vfcvt_x_frm, alu_frm, full_preds, f_to_i_f_v_ops)
+DEF_RVV_FUNCTION (vfcvt_xu_frm, alu_frm, full_preds, f_to_u_f_v_ops)
 
 // 13.18. Widening Floating-Point/Integer Type-Convert Instructions
 DEF_RVV_FUNCTION (vfwcvt_x, alu, full_preds, f_to_wi_f_v_ops)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-xu.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-xu.c
new file mode 100644
index 000..bb164b2b001
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-xu.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vuint32m1_t
+test_riscv_vfcvt_xu_f_v_u32m1_rm (vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_xu_f_v_u32m1_rm (op1, 0, vl);
+}
+
+vuint32m1_t
+test_vfcvt_xu_f_v_u32m1_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_xu_f_v_u32m1_rm_m (mask, op1, 1, vl);
+}
+
+vuint32m1_t
+test_riscv_vfcvt_xu_f_vv_u32m1 (vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_xu_f_v_u32m1 (op1, vl);
+}
+
+vuint32m1_t
+test_vfcvt_xu_f_v_u32m1_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_xu_f_v_u32m1_m (mask, op1, vl);
+}
+
+/* { dg-final { scan-assembler-times {vfcvt\.xu\.f\.v\s+v[0-9]+,\s*v[0-9]+} 4 
} } */
+/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 2 } } */
-- 
2.34.1

[PATCH] CRIS: Don't include tree.h in cris-protos.h, PR bootstrap/111021

2023-08-14 Thread Hans-Peter Nilsson via Gcc-patches

I'll commit this in a few hours pending testing.  It seems
trivial enough to be posted before testing is finished
though, now that it has passed the previous
point-of-breakage.  JFTR, I'm testing against the version
with the "first" breaking commit: r14-3092, not r14-3093 the
one with recog.h.

-- >8 --
While there's another patch that fixes the immediate error
in the PR by other means, the include of tree.h here is
something I prefer to avoid.

PR bootstrap/111021
* config/cris/cris-protos.h: Revert recent change.
* config/cris/cris.cc (cris_legitimate_address_p): Remove
code_helper unused parameter.
(cris_legitimate_address_p_hook): New wrapper function.
(TARGET_LEGITIMATE_ADDRESS_P): Change to
cris_legitimate_address_p_hook.
---
 gcc/config/cris/cris-protos.h |  5 +
 gcc/config/cris/cris.cc   | 13 +++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h
index 58555943986c..666e04f9eeec 100644
--- a/gcc/config/cris/cris-protos.h
+++ b/gcc/config/cris/cris-protos.h
@@ -20,8 +20,6 @@ along with GCC; see the file COPYING3.  If not see
 
 /* Prototypes for the CRIS port.  */
 
-#include "tree.h" /* For ERROR_MARK.  */
-
 extern bool cris_simple_epilogue (void);
 #ifdef RTX_CODE
 extern const char *cris_op_str (rtx);
@@ -36,8 +34,7 @@ extern bool cris_base_or_autoincr_p (const_rtx, bool);
 extern bool cris_bdap_index_p (const_rtx, bool);
 extern void cris_reduce_compare (rtx *, rtx *, rtx *);
 extern bool cris_biap_index_p (const_rtx, bool);
-extern bool cris_legitimate_address_p (machine_mode, rtx, bool,
-  code_helper = ERROR_MARK);
+extern bool cris_legitimate_address_p (machine_mode, rtx, bool);
 extern bool cris_store_multiple_op_p (rtx);
 extern bool cris_movem_load_rest_p (rtx);
 extern void cris_asm_output_symbol_ref (FILE *, rtx);
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
index 853c07920f07..4eaaf2184b63 100644
--- a/gcc/config/cris/cris.cc
+++ b/gcc/config/cris/cris.cc
@@ -168,6 +168,8 @@ static unsigned int cris_hard_regno_nregs (unsigned int, 
machine_mode);
 static bool cris_hard_regno_mode_ok (unsigned int, machine_mode);
 static HOST_WIDE_INT cris_static_rtx_alignment (machine_mode);
 static HOST_WIDE_INT cris_constant_alignment (const_tree, HOST_WIDE_INT);
+static bool cris_legitimate_address_p_hook (machine_mode, rtx, bool,
+   code_helper);
 
 /* This is the parsed result of the "-max-stack-stackframe=" option.  If
it (still) is zero, then there was no such option given.  */
@@ -217,7 +219,7 @@ int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION;
 #define TARGET_INIT_LIBFUNCS cris_init_libfuncs
 
 #undef TARGET_LEGITIMATE_ADDRESS_P
-#define TARGET_LEGITIMATE_ADDRESS_P cris_legitimate_address_p
+#define TARGET_LEGITIMATE_ADDRESS_P cris_target_legitimate_address_p
 
 #undef TARGET_PREFERRED_RELOAD_CLASS
 #define TARGET_PREFERRED_RELOAD_CLASS cris_preferred_reload_class
@@ -1536,8 +1538,15 @@ cris_biap_index_p (const_rtx x, bool strict)
 
 /* Worker function for TARGET_LEGITIMATE_ADDRESS_P.  */
 
+static bool
+cris_legitimate_address_p_hook (machine_mode mode, rtx x, bool strict,
+   code_helper)
+{
+  return cris_legitimate_address_p (mode, x, strict);
+}
+
 bool
-cris_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper)
+cris_legitimate_address_p (machine_mode mode, rtx x, bool strict)
 {
   const_rtx x1, x2;
 
-- 
2.30.2

Re: IRA update_equiv_regs for (was Re: ICE for interim fix for PR/110748)





On 8/14/23 18:35, Vineet Gupta wrote:


On 8/11/23 17:04, Jeff Law wrote:


I'm wondering (naively) if there is some way to tune this - for a 
given backend. In general it would make sense to do the replacement, 
but not if the cost changes (e.g. consts could be embedded in x86 
insn freely, but not for RISC-V where this is costly and if something 
is split, it might been intentional.

I'm not immediately aware of a way to tune.

When it comes to tuning, the toplevel questions are do we have any of 
the info we need to tune at the point where the transformation occurs. 
The two most obvious pieces here would be loop info an register pressure.


ie, do we have enough loop structure to know if the def is at a 
shallower loop nest than the use.  There's a reasonable chance we have 
this information as my recollection is this analysis is done fairly 
early in IRA.


But that means we likely don't have any sense of register pressure at 
the points between the def and use.   So the most useful metric for 
tuning isn't really available.


I'd argue that even if the register pressure were high, in some cases, 
there's just no way around it and RA needs to honor what the backend did 
apriori (split in this case), otherwise we end up with something which 
doesn't compute literally and leads to ICE. I'm puzzled that in this 
case, intentional implementation is getting in the way. So while I don't 
care about the -0.0 case in itself, it seems with the current framework 
we can't just achieve the results, other that the roundabout way of 
peephole2 you alluded to.
I think you'll run into a lot of resistance with that approach.   The 
fact it we're being a bit sneaky and telling a bit of a fib in the 
backend (claiming support for certain capabilities that don't actually 
exist).


As many have said, lie to GCC and ultimately it will gets revenge.  This 
is but one example.


When we lie to some parts of gcc, we may well trigger undesirable 
behavior later in the pipeline.  It's a tradeoff and sometimes we have 
to back out those little lies.










The one thing that stands out is we don't do this transformation at 
all when register pressure sensitive scheduling is enabled. And we 
really should be turning that on by default.  Our data shows register 
pressure sensitive scheduling is about a 6-7% cycle improvement on 
x264 as it avoids spilling in those key satd loops.



 /* Don't move insns if live range shrinkage or register
 pressure-sensitive scheduling were done because it will not
 improve allocation but likely worsen insn scheduling.  */
  if (optimize
  && !flag_live_range_shrinkage
  && !(flag_sched_pressure && flag_schedule_insns))
    combine_and_move_insns ();



So you might want to look at register pressure sensitive scheduling 
first.  If you go into x264_r from specint and look at 
x264_pixel_satd_8x4.  First verify the loops are fully unrolled. If 
they are, then look for 32bit loads/stores into the stack.  If you 
have them, then you're spilling and getting crappy performance.  Using 
register pressure sensitive scheduling should help significantly.


Is that -fira-loop-pressure ?

-fsched-pressure I think.




We've certainly seen that internally.  The plan was to submit a patch 
to make register pressure sensitive scheduling the default when the 
scheduler is enabled.  We just haven't pushed on it.  If you can 
verify that you're seeing spilling as well, then it'd certainly 
bolster the argument that register-pressure-sensitive-scheduling is 
desirable.


I can confirm that the loop is fully unrolled and there's a zillion 
stack spills there for intermediate computes (-Ofast 
-march=rv64gc_zba_zbb_zbs, no V in that build).
Yea, you'll take a big hit from those spills.  Good to get a 
confirmation that you're seeing it too.


The fix should be pretty simple.  We just turn on -fsched-pressure in 
the RV backend.



Jeff

Re: IRA update_equiv_regs for (was Re: ICE for interim fix for PR/110748)





On 8/12/23 10:44, Jivan Hakobyan wrote:

Yes, as mentioned Jeff I have some work in that scope.

The first is related to address computation when it has a large constant 
part.

Suppose we have this code:

     int  consume (void *);
     int foo (void) {
        int x[1000];
        return consume (x);
     }

before IRA we have the following sequence
     19: r140:DI=0xf000
     20: r136:DI=r140:DI+0x60
       REG_EQUAL 0xf060
     8: a0:DI=frame:DI+r136:DI
       REG_DEAD r136:DI

but during IRA (eliminate_regs_in_insn) insn 8 transforms to
    8: a0:DI=r136:DI+0xfa0+frame:DI
         REG_DEAD r136:DI

and in the end, we get the wrong sequence.
    21: r136:DI=0xf060
       REG_EQUIV 0xf060
    25: r143:DI=0x1000
    26: r142:DI=r143:DI-0x60
       REG_DEAD r143:DI
       REG_EQUAL 0xfa0
    27: r142:DI=r142:DI+r136:DI
       REG_DEAD r136:DI
    8: a0:DI=r142:DI+frame:DI
       REG_DEAD r142:DI

My changes prevent that transformation.
I have tested on spec and did not get regressions.
Besides. executed 40B fewer instructions.
Right.  And this looks like a generic failing of the register 
elimination code to simplify after eliminating fp/ap to sp.  It's a bit 
of a surprise as I thought that code had some simplification 
capabilities.   But clearly if it has that ability it isn't working 
well.  Part of me wondered if it's falling down due to constants not 
fitting in a 12 bit signed immediate.   I've got a TODO to look at your 
patch in this space.  Maybe tonight if I can keep moving things off my 
TODO list ;-)




The second work related to hoisting out loop invariant code.
I have a test case where SP + const can be hoisted out.
..
.L3:
       call foo
       addi a5,sp,16
       sh3add a0,a0,a5
...

Before IRA that code is already out of the loop, but IRA moves back.
My approach was done in update_equiv_regs().
It prevents any move if its uses and defs are held in a single place, 
and used in the loop.

Currently, that improvement is under evaluation.
Yea, we're going to need to sit down with this.  IRA is working per 
design and we may be able to avoid these problems with -fsched-pressure, 
but it feels a bit hackish.



Jeff

Re: [2/2] RISC-V: Constant FP Optimization with 'Zfa'

2023-08-14 Thread Tsukasa OI via Gcc-patches

On 2023/08/14 21:51, Jin Ma wrote:
> Hi Tsukasa,
>   What a coincidence, I also implemented zfa extension, which also includes 
> fli related instructions :)

Hi, I'm glad to know that someone is working on this extension more
comprehensively (especially when "someone" is an experienced GCC
contributor).  I prefer your patch set in general and glad to learn from
your patch set and your response that my approach was not *that* bad as
I expected.

When a new extension gets available, I will be more confident making a
patch set for GCC (as I already do in GNU Binutils).

> 
> links: https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627294.html
> 
>> +  if (!TARGET_HARD_FLOAT || !TARGET_ZFA)
>> +return result;
>> +  switch (GET_MODE (x))
>> +{
>> +case HFmode:
>> +  /* Not only 'Zfhmin', either 'Zfh' or 'Zvfh' is required.  */
>> +  if (!TARGET_ZFH && !TARGET_ZVFH)
> 
> When Zvfh means that zfh is also on, so there may be no need to judge
> the TARGET_ZVFH here. By the way,the format here seems wrong, maybe 'tab'
> is needed for alignment?

For indentation, I believe this is okay considering 3 indent (soft tab)
from the top (meaning 6 spaces).

For specification requirements, I think I'm correct.

The spec says that 'Zvfh' depends on 'Zve32f' and 'Zfhmin'.  'Zfhmin' is
a conversion-only 'Zfh' subset ('Zve32f' doesn't require any
FP16-related extensions).

Note that "fli.h" requires 'Zfa' and ('Zfh' and/or 'Zvfh').

So, 'Zfh' alone will not be sufficient to check requirements to the
"fli.h" instruction.  So, checking TARGET_ZFH || TARGET_ZVFH (for
existence of the "fli.h") should be correct and I think your patch needs
to be changed "in the long term".

"In the long term" means that, current GNU Binutils has a bug which
"fli.h" requires 'Zfa' and 'Zfh' ('Zfa' and 'Zvfh' does not work).
My initial 'Zfa' proposal (improved by Christoph Müllner and upstreamed
into master) intentionally ignored this case because I assumed that
approval/ratification of 'Zvfh' will take some time and we have a time
to fix before a release of Binutils following approval of both 'Zfa' and
'Zvfh' (it turned out to be wrong).

cf. 

So, "fixing" this part (on your patch) alone will not make the program
work (on the simulator) because current buggy GNU Binutils won't accept
it.  I'm working on it on the GNU Binutils side.

> 
>> +return result;
>> +  break;
>> +case SFmode: break;
>> +case DFmode: break;
> 
> Maybe we still have to judge TARGET_DOUBLE_FLOAT?

Indeed.  I just missed that.

> 
>> +default: return result;
>> +}
>> +
>> +  if (!CONST_DOUBLE_P (x))
>> +return result;
> 
> I think it might be better to judge whether x satisfies the CONST_DOUBLE_P
> before switch (GET_MODE (x)) above.

That's correct.  I think that's a part of leftover when I'm experimenting.

> 
>> +
>> +  r = *CONST_DOUBLE_REAL_VALUE (x);
>> +
>> +  if (REAL_VALUE_ISNAN (r))
>> +{
>> +  long reprs[2] = { 0 };
>> +  /* Compare with canonical NaN.  */
>> +  switch (GET_MODE (x))
>> +{
>> +case HFmode:
>> +  reprs[0] = real_to_target (NULL, ,
>> + float_mode_for_size (16).require ());
>> +  /* 0x7e00: Canonical NaN for binary16.  */
>> +  if (reprs[0] != 0x7e00)
>> +return result;
>> +  break;
>> +case SFmode:
>> +  reprs[0] = real_to_target (NULL, ,
>> + float_mode_for_size (32).require ());
>> +  /* 0x7fc0: Canonical NaN for binary32.  */
>> +  if (reprs[0] != 0x7fc0)
>> +return result;
>> +  break;
>> +case DFmode:
>> +  real_to_target (reprs, , float_mode_for_size (64).require ());
>> +  if (FLOAT_WORDS_BIG_ENDIAN)
>> +std::swap (reprs[0], reprs[1]);
>> +  /* 0x7ff8_: Canonical NaN for binary64.  */
>> +  if (reprs[0] != 0 || reprs[1] != 0x7ff8)
>> +return result;
>> +  break;
>> +default:
>> +  gcc_unreachable ();
>> +}
>> +  result.type = RISCV_FLOAT_CONST_NAN;
>> +  result.valid = true;
>> +  return result;
>> +}
>> +  else if (REAL_VALUE_ISINF (r))
>> +{
>> +  if (REAL_VALUE_NEGATIVE (r))
>> +return result;
>> +  result.type = RISCV_FLOAT_CONST_INF;
>> +  result.valid = true;
>> +  return result;
>> +}
>> +
>> +  bool sign = REAL_VALUE_NEGATIVE (r);
>> +  result.sign = sign;
>> +
>> +  r = real_value_abs ();
>> +  /* GCC internally does not use IEEE754-like encoding (where normalized
>> + significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.cc).
>> + So, this exponent_p1 variable equals IEEE754 unbiased exponent + 1.  */
>> +  int exponent_p1 = REAL_EXP ();
>> +
>> +  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
>> + highest (sign) bit, with a fixed binary point at bit point_pos.
>> + m1 holds the low part of the mantissa, m2

Re: [PATCH 2/3] ivopts: Call valid_mem_ref_p with code_helper [PR110248]

2023-08-14 Thread Hans-Peter Nilsson via Gcc-patches

> Date: Mon, 14 Aug 2023 16:47:40 +0800
> From: "Kewen.Lin via Gcc-patches" 

> on 2023/8/14 15:53, Jan-Benedict Glaw wrote:
> > echo timestamp > s-constrs-h
> > /var/lib/laminar/run/gcc-local/82/local-toolchain-install/bin/g++ 
> > -std=c++11 -c   -g -O2   -DIN_GCC-fno-exceptions -fno-rtti 
> > -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
> > -Wcast-qual -Wmissing-format-attribute -Wconditionally-supported 
> > -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros 
> > -Wno-overlength-strings -Werror -fno-common  -DHAVE_CONFIG_H  
> > -DGENERATOR_FILE -I. -Ibuild -I../../gcc/gcc -I../../gcc/gcc/build 
> > -I../../gcc/gcc/../include  -I../../gcc/gcc/../libcpp/include  \
> >  -o build/gencondmd.o build/gencondmd.cc
> > In file included from ../../gcc/gcc/tree.h:23,
> >  from ../../gcc/gcc/recog.h:24,
> >  from build/gencondmd.cc:40:
> > ../../gcc/gcc/tree-core.h:145:10: fatal error: all-tree.def: No such file 
> > or directory
> >   145 | #include "all-tree.def"
> 
> 
> Thanks for reporting and sorry for the breakage.  This failure only gets 
> exposed if
> all-tree.def isn't generated before compiling these gen*.cc including recog.h 
> during the
> build.  It explains why I didn't catch this failure before.  I will check the 
> existing
> practice and post a patch soon.

I entered PR bootstrap/111021 for a similar breakage.  Looks
like I won't be able to work around it for CRIS (as alluded
in the PR) as recog.h was hacked too; not just
${TARGET}-protos.h. :(

Please consider defaulting to NULL or something like that
instead of introducing a tree.h-et-al dependency.

brgds, H-P

[PATCH] Makefile.in: Make recog.h depend on $(TREE_H)

Hi,

Commit r14-3093 introduced a random build failure on
build/gencondmd.cc building.  Since r14-3093 makes recog.h
include tree.h, which further includes (depends on) some
files that are generated during the building, such as:
all-tree.def, tree-check.h etc, when building file
build/gencondmd.cc, the build can fail if these dependencies
are not ready.  So this patch is to teach this dependence.

Thank Jan-Benedict Glaw for testing this!

gcc/ChangeLog:

* Makefile.in (RECOG_H): Add $(TREE_H) as dependence.
---
 gcc/Makefile.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 2429128cbf2..9dddb65b45d 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -991,7 +991,7 @@ GIMPLE_H = gimple.h gimple.def gsstruct.def $(VEC_H) \
$(GGC_H) $(BASIC_BLOCK_H) $(TREE_H) tree-ssa-operands.h \
tree-ssa-alias.h $(INTERNAL_FN_H) $(HASH_TABLE_H) is-a.h
 GCOV_IO_H = gcov-io.h version.h auto-host.h gcov-counter.def
-RECOG_H = recog.h
+RECOG_H = recog.h $(TREE_H)
 EMIT_RTL_H = emit-rtl.h
 FLAGS_H = flags.h flag-types.h $(OPTIONS_H)
 OPTIONS_H = options.h flag-types.h $(OPTIONS_H_EXTRA)
--
2.31.1

Re: [PATCH v1] RISC-V: Support RVV VFCVT.X.F.V rounding mode intrinsic API

2023-08-14 Thread juzhe.zh...@rivai.ai

For conversion API, I would prefer kito review since I am not sure about this 
stuff.



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-08-15 10:55
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support RVV VFCVT.X.F.V rounding mode intrinsic API
From: Pan Li 
 
This patch would like to support the rounding mode API for the
VFCVT.X.F.V as the below samples.
 
* __riscv_vfcvt_x_f_v_i32m1_rm
* __riscv_vfcvt_x_f_v_i32m1_rm_m
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv-vector-builtins-bases.cc
(class vfcvt_x_frm): New class for frm.
(vfcvt_x_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfcvt_x_frm): New intrinsic function definition.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/float-point-cvt-x.c: New test.
---
.../riscv/riscv-vector-builtins-bases.cc  | 17 +++
.../riscv/riscv-vector-builtins-bases.h   |  1 +
.../riscv/riscv-vector-builtins-functions.def |  2 ++
.../riscv/rvv/base/float-point-cvt-x.c| 29 +++
4 files changed, 49 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-x.c
 
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index f2124080ef9..754a53efd3d 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -660,6 +660,21 @@ public:
   }
};
+/* Implements below instructions for frm
+   - vfcvt_x
+*/
+template
+class vfcvt_x_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  rtx expand (function_expander ) const override
+  {
+return e.use_exact_insn (code_for_pred_fcvt_x_f (UNSPEC, e.arg_mode (0)));
+  }
+};
+
/* Implements vrsub.  */
class vrsub : public function_base
{
@@ -2465,6 +2480,7 @@ static CONSTEXPR const vfclass vfclass_obj;
static CONSTEXPR const vmerge vfmerge_obj;
static CONSTEXPR const vmv_v vfmv_v_obj;
static CONSTEXPR const vfcvt_x vfcvt_x_obj;
+static CONSTEXPR const vfcvt_x_frm vfcvt_x_frm_obj;
static CONSTEXPR const vfcvt_x vfcvt_xu_obj;
static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_x_obj;
static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_xu_obj;
@@ -2714,6 +2730,7 @@ BASE (vfclass)
BASE (vfmerge)
BASE (vfmv_v)
BASE (vfcvt_x)
+BASE (vfcvt_x_frm)
BASE (vfcvt_xu)
BASE (vfcvt_rtz_x)
BASE (vfcvt_rtz_xu)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 2a9381eec5e..50a7d7ffb6f 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -205,6 +205,7 @@ extern const function_base *const vfclass;
extern const function_base *const vfmerge;
extern const function_base *const vfmv_v;
extern const function_base *const vfcvt_x;
+extern const function_base *const vfcvt_x_frm;
extern const function_base *const vfcvt_xu;
extern const function_base *const vfcvt_rtz_x;
extern const function_base *const vfcvt_rtz_xu;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 34def6bb82f..8b6a7cc49f3 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -445,6 +445,8 @@ DEF_RVV_FUNCTION (vfcvt_rtz_xu, alu, full_preds, 
f_to_u_f_v_ops)
DEF_RVV_FUNCTION (vfcvt_f, alu, full_preds, i_to_f_x_v_ops)
DEF_RVV_FUNCTION (vfcvt_f, alu, full_preds, u_to_f_xu_v_ops)
+DEF_RVV_FUNCTION (vfcvt_x_frm, alu_frm, full_preds, f_to_i_f_v_ops)
+
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
DEF_RVV_FUNCTION (vfwcvt_x, alu, full_preds, f_to_wi_f_v_ops)
DEF_RVV_FUNCTION (vfwcvt_xu, alu, full_preds, f_to_wu_f_v_ops)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-x.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-x.c
new file mode 100644
index 000..e090f0f97e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-x.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vint32m1_t
+test_riscv_vfcvt_x_f_vv_i32m1_rm (vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_x_f_v_i32m1_rm (op1, 0, vl);
+}
+
+vint32m1_t
+test_vfcvt_x_f_vv_i32m1_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_x_f_v_i32m1_rm_m (mask, op1, 1, vl);
+}
+
+vint32m1_t
+test_riscv_vfcvt_x_f_vv_i32m1 (vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_x_f_v_i32m1 (op1, vl);
+}
+
+vint32m1_t
+test_vfcvt_x_f_vv_i32m1_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_x_f_v_i32m1_m (mask, op1, vl);
+}
+
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+} 4 } 
} */
+/* { dg-final { scan-assembler-times

[PATCH v1] RISC-V: Support RVV VFCVT.X.F.V rounding mode intrinsic API

From: Pan Li 

This patch would like to support the rounding mode API for the
VFCVT.X.F.V as the below samples.

* __riscv_vfcvt_x_f_v_i32m1_rm
* __riscv_vfcvt_x_f_v_i32m1_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfcvt_x_frm): New class for frm.
(vfcvt_x_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfcvt_x_frm): New intrinsic function definition.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-cvt-x.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  | 17 +++
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  2 ++
 .../riscv/rvv/base/float-point-cvt-x.c| 29 +++
 4 files changed, 49 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-x.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index f2124080ef9..754a53efd3d 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -660,6 +660,21 @@ public:
   }
 };
 
+/* Implements below instructions for frm
+   - vfcvt_x
+*/
+template
+class vfcvt_x_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  rtx expand (function_expander ) const override
+  {
+return e.use_exact_insn (code_for_pred_fcvt_x_f (UNSPEC, e.arg_mode (0)));
+  }
+};
+
 /* Implements vrsub.  */
 class vrsub : public function_base
 {
@@ -2465,6 +2480,7 @@ static CONSTEXPR const vfclass vfclass_obj;
 static CONSTEXPR const vmerge vfmerge_obj;
 static CONSTEXPR const vmv_v vfmv_v_obj;
 static CONSTEXPR const vfcvt_x vfcvt_x_obj;
+static CONSTEXPR const vfcvt_x_frm vfcvt_x_frm_obj;
 static CONSTEXPR const vfcvt_x vfcvt_xu_obj;
 static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_x_obj;
 static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_xu_obj;
@@ -2714,6 +2730,7 @@ BASE (vfclass)
 BASE (vfmerge)
 BASE (vfmv_v)
 BASE (vfcvt_x)
+BASE (vfcvt_x_frm)
 BASE (vfcvt_xu)
 BASE (vfcvt_rtz_x)
 BASE (vfcvt_rtz_xu)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 2a9381eec5e..50a7d7ffb6f 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -205,6 +205,7 @@ extern const function_base *const vfclass;
 extern const function_base *const vfmerge;
 extern const function_base *const vfmv_v;
 extern const function_base *const vfcvt_x;
+extern const function_base *const vfcvt_x_frm;
 extern const function_base *const vfcvt_xu;
 extern const function_base *const vfcvt_rtz_x;
 extern const function_base *const vfcvt_rtz_xu;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 34def6bb82f..8b6a7cc49f3 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -445,6 +445,8 @@ DEF_RVV_FUNCTION (vfcvt_rtz_xu, alu, full_preds, 
f_to_u_f_v_ops)
 DEF_RVV_FUNCTION (vfcvt_f, alu, full_preds, i_to_f_x_v_ops)
 DEF_RVV_FUNCTION (vfcvt_f, alu, full_preds, u_to_f_xu_v_ops)
 
+DEF_RVV_FUNCTION (vfcvt_x_frm, alu_frm, full_preds, f_to_i_f_v_ops)
+
 // 13.18. Widening Floating-Point/Integer Type-Convert Instructions
 DEF_RVV_FUNCTION (vfwcvt_x, alu, full_preds, f_to_wi_f_v_ops)
 DEF_RVV_FUNCTION (vfwcvt_xu, alu, full_preds, f_to_wu_f_v_ops)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-x.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-x.c
new file mode 100644
index 000..e090f0f97e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-cvt-x.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vint32m1_t
+test_riscv_vfcvt_x_f_vv_i32m1_rm (vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_x_f_v_i32m1_rm (op1, 0, vl);
+}
+
+vint32m1_t
+test_vfcvt_x_f_vv_i32m1_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_x_f_v_i32m1_rm_m (mask, op1, 1, vl);
+}
+
+vint32m1_t
+test_riscv_vfcvt_x_f_vv_i32m1 (vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_x_f_v_i32m1 (op1, vl);
+}
+
+vint32m1_t
+test_vfcvt_x_f_vv_i32m1_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfcvt_x_f_v_i32m1_m (mask, op1, vl);
+}
+
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+} 4 } 
} */
+/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 2 } } */
-- 
2.34.1

Re: [PATCH] vect: Move VMAT_GATHER_SCATTER handlings from final loop nest

on 2023/8/14 22:16, Richard Sandiford wrote:
> "Kewen.Lin"  writes:
>> Hi Richard,
>>
>> on 2023/8/14 20:20, Richard Sandiford wrote:
>>> Thanks for the clean-ups.  But...
>>>
>>> "Kewen.Lin"  writes:
 Hi,

 Following Richi's suggestion [1], this patch is to move the
 handlings on VMAT_GATHER_SCATTER in the final loop nest
 of function vectorizable_load to its own loop.  Basically
 it duplicates the final loop nest, clean up some useless
 set up code for the case of VMAT_GATHER_SCATTER, remove some
 unreachable code.  Also remove the corresponding handlings
 in the final loop nest.

 Bootstrapped and regtested on x86_64-redhat-linux,
 aarch64-linux-gnu and powerpc64{,le}-linux-gnu.

 [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623329.html

 Is it ok for trunk?

 BR,
 Kewen
 -

 gcc/ChangeLog:

* tree-vect-stmts.cc (vectorizable_load): Move the handlings on
VMAT_GATHER_SCATTER in the final loop nest to its own loop,
and update the final nest accordingly.
 ---
  gcc/tree-vect-stmts.cc | 361 +
  1 file changed, 219 insertions(+), 142 deletions(-)
>>>
>>> ...that seems like quite a lot of +s.  Is there nothing we can do to
>>> avoid the cut-&-paste?
>>
>> Thanks for the comments!  I'm not sure if I get your question, if we
>> want to move out the handlings of VMAT_GATHER_SCATTER, the new +s seem
>> inevitable?  Your concern is mainly about git blame history?
> 
> No, it was more that 219-142=77, so it seems like a lot of lines
> are being duplicated rather than simply being moved.  (Unlike for
> VMAT_LOAD_STORE_LANES, which was even a slight LOC saving, and so
> was a clear improvement.)
> 
> So I was just wondering if there was any obvious factoring-out that
> could be done to reduce the duplication.

ah, thanks for the clarification!

I think the main duplication are on the loop body beginning and end,
let's take a look at them in details:

+  if (memory_access_type == VMAT_GATHER_SCATTER)
+{
+  gcc_assert (alignment_support_scheme == dr_aligned
+ || alignment_support_scheme == dr_unaligned_supported);
+  gcc_assert (!grouped_load && !slp_perm);
+
+  unsigned int inside_cost = 0, prologue_cost = 0;

// These above are newly added.

+  for (j = 0; j < ncopies; j++)
+   {
+ /* 1. Create the vector or array pointer update chain.  */
+ if (j == 0 && !costing_p)
+   {
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+   vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
+slp_node, _info, _ptr,
+_offsets);
+ else
+   dataref_ptr
+ = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+ at_loop, offset, , gsi,
+ _incr, false, bump);
+   }
+ else if (!costing_p)
+   {
+ gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+ if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+   dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+  gsi, stmt_info, bump);
+   }

// These are for dataref_ptr, in the final looop nest we deal with more cases
on simd_lane_access_p and diff_first_stmt_info, but don't handle
STMT_VINFO_GATHER_SCATTER_P any more, very few (one case) can be shared between,
IMHO factoring out it seems like a overkill.

+
+ if (mask && !costing_p)
+   vec_mask = vec_masks[j];

// It's merged out from j == 0 and j != 0

+
+ gimple *new_stmt = NULL;
+ for (i = 0; i < vec_num; i++)
+   {
+ tree final_mask = NULL_TREE;
+ tree final_len = NULL_TREE;
+ tree bias = NULL_TREE;
+ if (!costing_p)
+   {
+ if (loop_masks)
+   final_mask
+ = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+   vec_num * ncopies, vectype,
+   vec_num * j + i);
+ if (vec_mask)
+   final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+  final_mask, vec_mask, gsi);
+
+ if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+   dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+  gsi, stmt_info, bump);
+   }

// This part is directly copied from the original, the original gets updated by
removing && !STMT_VINFO_GATHER_SCATTER_P.  Due to its size, I didn't consider
this before, do you prefer me to factor this part out?

+

Re: [PATCH v1 1/6] LoongArch: a symmetric multilib subdir layout

2023-08-14 Thread Yujie Yang

> I came up with another idea. What if we:
> 
> 1. Keep the "default" ABI libs in the toplevel directory. There is
> *always* a default ABI so treating it specially is not really nonsense.
> 2. Create a symlink for consistency. For example, if --with-abi=lp64d, -
> -with-multilib-list=lp64d,lp64s:
> 
>  * /usr/lib/gcc/loongarch64-linux-gnu/14.0.0 contains the lp64d
>libraries.
>  * /usr/lib/gcc/loongarch64-linux-gnu/14.0.0/lp64s contains the lp64s
>libraries.
>  * /usr/lib/gcc/loongarch64-linux-gnu/14.0.0/lp64d is a symlink to "."
> 
> Then we can refer to the lp64d libgcc.a with both
> /usr/lib/gcc/loongarch64-linux-gnu/14.0.0/lp64d/libgcc.a, and
> /usr/lib/gcc/loongarch64-linux-gnu/14.0.0/libgcc.a.
> 
> For referring to the default multilib, the non-suffixed
> /usr/lib/gcc/loongarch64-linux-gnu/14.0.0 path should be used; for
> referring lp64d (no matter what the default is),
> /usr/lib/gcc/loongarch64-linux-gnu/14.0.0/lp64d should be used.
> 
> The symlink can be created by the GCC building system or manually by the
> distro maintainer (or gcc packager).
> 
> Thoughts?
> 
> -- 
> Xi Ruoyao  School of Aerospace Science and
> Technology, Xidian University

Yes, this also eliminates the duplicate build.

In this case, the symlink would not really be necessary since the
toplevel directory is searched by the driver for all ABI configurations
anyways.

It is even easier to implement:

1. (gcc/config.gcc) stipulate that
   loongarch64-linux-gnu== --with-abi=lp64d,
   loongarch64-linux-gnuf64 == --with-abi=lp64d,
   loongarch64-linux-gnuf32 == --with-abi=lp64f,
   loongarch64-linux-gnusf  == --with-abi=lp64s,
   and no customization is allowed.
   (maybe we can simply remove --with-abi?)

2. (config-ml.in) delete the "default" multisubdir from ${multidirs}.
   (which is base/lp64d for --with-abi=lp64d)

No other tweaking in config-ml.in is required.  So this seems to be
canonical.

The only problem is, I understand that triplets are essential to
the GNU build system, but should they always imply the default
ABI to be used when the compiler is invoked without an argument?

RE: [PATCH v4] Mode-Switching: Fix SET_SRC ICE for create_pre_exit

2023-08-14 Thread Li, Pan2 via Gcc-patches

Committed as passed both the bootstrap and regression test in x86, thanks Jeff.

Pan

-Original Message-
From: Jeff Law  
Sent: Tuesday, August 15, 2023 1:21 AM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Wang, Yanzhang 

Subject: Re: [PATCH v4] Mode-Switching: Fix SET_SRC ICE for create_pre_exit



On 8/12/23 18:56, pan2...@intel.com wrote:
> From: Pan Li 
> 
> In same cases, like gcc/testsuite/gcc.dg/pr78148.c in RISC-V, there will
> be only 1 operand when SET_SRC in create_pre_exit. For example as below.
> 
> (insn 13 9 14 2 (clobber (reg/i:TI 10 a0)) 
> "gcc/testsuite/gcc.dg/pr78148.c":24:1 -1
>(expr_list:REG_UNUSED (reg/i:TI 10 a0)
>  (nil)))
> 
> Unfortunately, SET_SRC requires at least 2 operands and then Segment
> Fault here. For SH4 part result in Segment Fault, it looks like only
> valid when the return_copy_pat is load or something like that. Thus,
> this patch try to fix it by restrict the SET insn for SET_SRC.
> 
> Signed-off-by: Pan Li 
> 
> gcc/ChangeLog:
> 
>   * mode-switching.cc (create_pre_exit): Add SET insn check.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/riscv/mode-switch-ice-1.c: New test.
OK.  Thanks for the updated version.

jeff

RE: [PATCH] x86: Update model values for Raptorlake.

2023-08-14 Thread Cui, Lili via Gcc-patches

Sorry, I should have built the patch while backporting, and thanks for your 
report and suggestions.
I'll backport another patch to fix the problems after finishing bootstraps, 
probably in couple hours.

Thank you!
Lili.

> -Original Message-
> From: Jonathan Wakely 
> Sent: Monday, August 14, 2023 10:26 PM
> To: Cui, Lili 
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao 
> Subject: Re: [PATCH] x86: Update model values for Raptorlake.
> 
> On 14/08/23 15:19 +0100, Jonathan Wakely wrote:
> >On 14/08/23 04:37 +, Pan Li via Gcc-patches wrote:
> >>Committed as obvious, and backported to GCC13.
> >
> >Did you try building it on gcc-13?
> >
> >case 0x97:
> >case 0x9a:
> >case 0xbf:
> >  /* Alder Lake.  */
> >case 0xb7:
> >case 0xba:
> >case 0xbf:
> >  /* Raptor Lake.  */
> >
> >
> >This fails:
> >
> >In file included from /home/test/src/gcc-13/gcc/config/i386/driver-
> i386.cc:31:
> >/home/test/src/gcc-13/gcc/common/config/i386/cpuinfo.h: In function ‘const
> char* get_intel_cpu(__processor_model*, __processor_model2*, unsigned
> int*)’:
> >/home/test/src/gcc-13/gcc/common/config/i386/cpuinfo.h:543:5: error:
> duplicate case value
> >  543 | case 0xbf:
> >  | ^~~~
> >/home/test/src/gcc-13/gcc/common/config/i386/cpuinfo.h:539:5: note:
> previously used here
> >  539 | case 0xbf:
> >  | ^~~~
> >
> >Please fix or revert.
> 
> 
> The backported patch is not the same as the trunk one, it adds two new cases
> not one. But one of them is a duplicate of one you already added in January
> 2022, in 4bd5297f665fd3ba5691297c016809f3501e7fba
> 
> No matter how obvious a patch is, if it touches code (not just comments or
> docs) please don't commit without even building it once.
> 
> Also, backports should typically say something in the git commit message, e.g.
> using git gcc-backport (or git cherry-pick -x) will automatically add:
> 
> (cherry picked from commit 003016a40844701c48851020df672b70f3446bdb)
> 
> to the commit message.
> 
> 
> 
> 
> 
> >>Lili.
> >>
> >>
> >>Update model values for Raptorlake according to SDM.
> >>
> >>gcc/ChangeLog
> >>
> >>* common/config/i386/cpuinfo.h (get_intel_cpu): Add model value
> 0xba
> >>to Raptorlake.
> >>---
> >>gcc/common/config/i386/cpuinfo.h | 1 +
> >>1 file changed, 1 insertion(+)
> >>
> >>diff --git a/gcc/common/config/i386/cpuinfo.h
> >>b/gcc/common/config/i386/cpuinfo.h
> >>index ae48bc17771..dd7f0f6abfd 100644
> >>--- a/gcc/common/config/i386/cpuinfo.h
> >>+++ b/gcc/common/config/i386/cpuinfo.h
> >>@@ -537,6 +537,7 @@ get_intel_cpu (struct __processor_model
> *cpu_model,
> >>case 0x9a:
> >>  /* Alder Lake.  */
> >>case 0xb7:
> >>+case 0xba:
> >>case 0xbf:
> >>  /* Raptor Lake.  */
> >>case 0xaa:

RE: Bootstrap fail on GCC 13 (was: Re: [PATCH] x86: Update model values for Alderlake, Rocketlake and Raptorlake.)

2023-08-14 Thread Cui, Lili via Gcc-patches

Sorry, I should have built the patch while backporting.
I'll backport another patch to fix the problems after finishing bootstraps, 
probably in couple hours.

Thank you!
Lili.

> -Original Message-
> From: Tobias Burnus 
> Sent: Monday, August 14, 2023 5:34 PM
> To: gcc-patches@gcc.gnu.org; Cui, Lili 
> Subject: Bootstrap fail on GCC 13 (was: Re: [PATCH] x86: Update model values
> for Alderlake, Rocketlake and Raptorlake.)
> 
> Hi,
> 
> your GCC 13 commit
> https://gcc.gnu.org/r13-7720-g0fa76e35a5f9e1 x86: Update model values for
> Raptorlake.
> 
> causes a build fail:
> 
> gcc/common/config/i386/cpuinfo.h: In function ‘const char*
> get_intel_cpu(__processor_model*, __processor_model2*, unsigned int*)’:
> gcc/common/config/i386/cpuinfo.h:543:5: error: duplicate case value
>543 | case 0xbf:
>| ^~~~
> gcc/common/config/i386/cpuinfo.h:539:5: note: previously used here
>539 | case 0xbf:
>| ^~~~
> 
> Your patch did:
> 
>   case 0x97:
>   case 0x9a:
>   case 0xbf:   << Existing case value
> /* Alder Lake.  */
>   case 0xb7:
> +case 0xba:
> +case 0xbf:  << Newly added same case value
> /* Raptor Lake.  */
> 
> 
> Tobias
> 
> On 29.06.23 05:06, Cui, Lili via Gcc-patches wrote:
> > I will directly commit this patch, it can be considered as an obvious patch.
> >
> > Thanks,
> > Lili.
> >
> >> -Original Message-
> >> From: Gcc-patches
> >>  On Behalf Of
> >> Cui, Lili via Gcc-patches
> >> Sent: Wednesday, June 28, 2023 6:52 PM
> >> To: gcc-patches@gcc.gnu.org
> >> Cc: Liu, Hongtao 
> >> Subject: [PATCH] x86: Update model values for Alderlake, Rocketlake
> >> and Raptorlake.
> >>
> >> Hi Hongtao,
> >>
> >> This patch is to update model values for Alderlake, Rocketlake and
> >> Raptorlake according to SDM.
> >>
> >> Ok for trunk?
> >>
> >> Thanks.
> >> Lili.
> >>
> >> Update model values for Alderlake, Rocketlake and Raptorlake
> >> according to SDM.
> >>
> >> gcc/ChangeLog
> >>
> >>  * common/config/i386/cpuinfo.h (get_intel_cpu): Remove model
> >> value 0xa8
> >>  from Rocketlake, move model value 0xbf from Alderlake to
> >> Raptorlake.
> >> ---
> >>   gcc/common/config/i386/cpuinfo.h | 3 +--
> >>   1 file changed, 1 insertion(+), 2 deletions(-)
> >>
> >> diff --git a/gcc/common/config/i386/cpuinfo.h
> >> b/gcc/common/config/i386/cpuinfo.h
> >> index 61559ed9de2..ae48bc17771 100644
> >> --- a/gcc/common/config/i386/cpuinfo.h
> >> +++ b/gcc/common/config/i386/cpuinfo.h
> >> @@ -463,7 +463,6 @@ get_intel_cpu (struct __processor_model
> >> *cpu_model,
> >> cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE;
> >> break;
> >>   case 0xa7:
> >> -case 0xa8:
> >> /* Rocket Lake.  */
> >> cpu = "rocketlake";
> >> CHECK___builtin_cpu_is ("corei7"); @@ -536,9 +535,9 @@
> >> get_intel_cpu (struct __processor_model *cpu_model,
> >> break;
> >>   case 0x97:
> >>   case 0x9a:
> >> -case 0xbf:
> >> /* Alder Lake.  */
> >>   case 0xb7:
> >> +case 0xbf:
> >> /* Raptor Lake.  */
> >>   case 0xaa:
> >>   case 0xac:
> >> --
> >> 2.25.1
> -
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201,
> 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer:
> Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München;
> Registergericht München, HRB 106955

Re: [PATCH] RISC-V: Fix autovec_length_operand predicate[PR110989]

2023-08-14 Thread juzhe.zh...@rivai.ai

Ping. Seems no objection ?
Will commit it soon.

Thanks.


juzhe.zh...@rivai.ai
 
From: Juzhe-Zhong
Date: 2023-08-12 22:15
To: gcc-patches
CC: kito.cheng; kito.cheng; rdapp.gcc; jeffreyalaw; Juzhe-Zhong
Subject: [PATCH] RISC-V: Fix autovec_length_operand predicate[PR110989]
Currently, autovec_length_operand predicate incorrect configuration is
discovered in PR110989 since this following situation:
 
vect__6.24_107 = .MASK_LEN_LOAD (vectp.22_105, 32B, mask__49.21_99, 
POLY_INT_CST [2, 2], 0); ---> dummy length = VF.
 
The current autovec length operand failed to recognize the VF dummy length.
 
-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -Ofast 
-fno-schedule-insns -fno-schedule-insns2:
 
Before this patch:
 
srli a4,s0,2
addi a4,a4,-3
srli s0,s0,3
vsetvli a5,zero,e64,m1,ta,ma
vid.v v1
vmul.vx v1,v1,a4
addi a4,s0,-2
vadd.vx v1,v1,a4
addi a4,s0,-1
vslide1up.vx v2,v1,a4
vmv.v.x v1,a4
vand.vv v1,v2,v1
vl1re64.v v3,0(t2)
vrgather.vv v2,v3,v1
vmv.v.i v1,0
vmfeq.vv v0,v2,v1
vsetvli zero,s0,e32,mf2,ta,ma---> s0 = POLY (2,2)
vle32.v v3,0(t3),v0.t
vsetvli a5,zero,e64,m1,ta,ma
vmfne.vv v0,v2,v1
vsetvli zero,zero,e32,mf2,ta,ma
vfwcvt.f.x.v v1,v3
vsetvli zero,zero,e64,m1,ta,ma
vmerge.vvm v1,v1,v2,v0
vslidedown.vx v1,v1,a4
vfmv.f.s fa5,v1
j .L6
 
After this patch:
 
srli a4,s0,2
addi a4,a4,-3
srli s0,s0,3
vsetvli a5,zero,e64,m1,ta,ma
vid.v v1
vmul.vx v1,v1,a4
addi a4,s0,-2
vadd.vx v1,v1,a4
addi s0,s0,-1
vslide1up.vx v2,v1,s0
vmv.v.x v1,s0
vand.vv v1,v2,v1
vl1re64.v v3,0(t2)
vrgather.vv v2,v3,v1
vmv.v.i v1,0
vmfeq.vv v0,v2,v1
vle32.v v3,0(t3),v0.t
vmfne.vv v0,v2,v1
vsetvli zero,zero,e32,mf2,ta,ma
vfwcvt.f.x.v v1,v3
vsetvli zero,zero,e64,m1,ta,ma
vmerge.vvm v1,v1,v2,v0
vslidedown.vx v1,v1,s0
vfmv.f.s fa5,v1
j .L6
 
2 vsetvli insns are reduced.
 
gcc/ChangeLog:
 
* config/riscv/predicates.md: Fix predicate.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/pr110989.c: Add vsetvli assembly check.
 
---
gcc/config/riscv/predicates.md| 5 +
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110989.c | 7 ++-
2 files changed, 7 insertions(+), 5 deletions(-)
 
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 9db28c2def7..b6ebdcf55de 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -282,10 +282,7 @@
(define_special_predicate "autovec_length_operand"
   (ior (match_operand 0 "pmode_register_operand")
-   (ior (match_operand 0 "const_csr_operand")
-(match_test "rtx_equal_p (op, gen_int_mode
- (GET_MODE_NUNITS (GET_MODE (op)),
-   Pmode))"
+   (match_code "const_int,const_poly_int")))
(define_predicate "reg_or_mem_operand"
   (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110989.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110989.c
index cf3b247e604..6e163a55c56 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110989.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110989.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d 
--param=riscv-autovec-preference=scalable -Ofast" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d 
--param=riscv-autovec-preference=scalable -Ofast -fno-schedule-insns 
-fno-schedule-insns2" } */
int a, b, c;
double *d;
@@ -9,3 +9,8 @@ void e() {
 f = *d ?: *( + c);
   b = f;
}
+
+/* { dg-final { scan-assembler-times {vsetvli} 3 } }  */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x0-9]+,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 1 } } */
-- 
2.36.1

Re: cpymem for RISCV with v extension

2023-08-14 Thread Joern Rennecke

On Fri, 4 Aug 2023 at 21:52, Jeff Law  wrote:

> > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> > index b4884a30872..e61110fa3ad 100644
> > --- a/gcc/config/riscv/riscv-v.cc
> > +++ b/gcc/config/riscv/riscv-v.cc
> > @@ -49,6 +49,7 @@
> >   #include "tm-constrs.h"
> >   #include "rtx-vector-builder.h"
> >   #include "targhooks.h"
> > +#include "predict.h"
> Not sure this is needed, but I didn't scan for it explicitly.  If it's
> not needed, then remove it.

It is needed to declare optimize_function_for_size_p .

Re: [PATCH v4 1/6] LoongArch: Add Loongson SX vector directive compilation framework.

2023-08-14 Thread PanChenghui

Yes, there's some confliction inside -mlsx/-mlasx option impl because
this patch set is based on the older option framework, we will try to
resolve this problem later.

On Tue, 2023-08-15 at 09:18 +0800, Xi Ruoyao wrote:
> I guess there is a merge conflict with Yujie's "-msimd=" patch and
> you
> may need to collaborate to resolve it.  Maybe just add -msimd in this
> series.
> 
> On Tue, 2023-08-15 at 09:05 +0800, Chenghui Pan wrote:
> > From: Lulu Cheng 
> > 
> > gcc/ChangeLog:
> > 
> > * config/loongarch/genopts/loongarch-strings: Add
> > compilation framework.
> > * config/loongarch/genopts/loongarch.opt.in: Ditto.
> > * config/loongarch/loongarch-c.cc
> > (loongarch_cpu_cpp_builtins): Ditto.
> > * config/loongarch/loongarch-def.c: Ditto.
> > * config/loongarch/loongarch-def.h (N_ISA_EXT_TYPES):
> > Ditto.
> > (ISA_EXT_SIMD_LSX): Ditto.
> > (N_SWITCH_TYPES): Ditto.
> > (SW_LSX): Ditto.
> > (struct loongarch_isa): Ditto.
> > * config/loongarch/loongarch-driver.cc (APPEND_SWITCH):
> > Ditto.
> > (driver_get_normalized_m_opts): Ditto.
> > * config/loongarch/loongarch-driver.h
> > (driver_get_normalized_m_opts): Ditto.
> > * config/loongarch/loongarch-opts.cc
> > (loongarch_config_target): Ditto.
> > (isa_str): Ditto.
> > * config/loongarch/loongarch-opts.h (ISA_HAS_LSX): Ditto.
> > * config/loongarch/loongarch-str.h (OPTSTR_LSX): Ditto.
> > * config/loongarch/loongarch.opt: Ditto.
> 
> /* snip */
>

Re: [PATCH v1 1/6] LoongArch: a symmetric multilib subdir layout

2023-08-14 Thread Xi Ruoyao via Gcc-patches

On Mon, 2023-08-14 at 19:16 +0800, Xi Ruoyao wrote:
> On Mon, 2023-08-14 at 18:18 +0800, Yujie Yang wrote:
> > On Mon, Aug 14, 2023 at 03:48:53PM +0800, Xi Ruoyao wrote:
> > > On Mon, 2023-08-14 at 15:37 +0800, Yujie Yang wrote:
> > > > On Mon, Aug 14, 2023 at 01:38:40PM +0800, Xi Ruoyao wrote:
> > > > > On Mon, 2023-08-14 at 11:57 +0800, Yang Yujie wrote:
> > > > > 
> > > > > > However, for LoongArch, we do not want such a "toplevel" library
> > > > > > installation since the default ABI may change.  We expect all
> > > > > > multilib variants of libraries to be installed to their designated
> > > > > > ABI-specific subdirs (e.g. base/lp64d) of the GCC libdir, so that
> > > > > > the default ABI can be configured arbitrarily (with --with-abi)
> > > > > > while the gcc libdir layout stays consistent.  This could be
> > > > > > helpful for the distribution packaging of GCC libraries.
> > > > > 
> > > > > Have you tested a --disable-multilib configuration?  To me with --
> > > > > disable-configuration everything should be still in the toplevel
> > > > > directory, not any sub-directory.
> > > > 
> > > > That's a good point, sorry I missed --disable-multilib here.
> > > > 
> > > > However, you don't really need --disable-multilib since
> > > > the libraries are only built once in the default ABI configuration
> > > > as long as --with-multilib-list does not request anything more than
> > > > that.
> > > > 
> > > > Maybe we should force-enabling multilib in all cases.
> > > 
> > > I really don't like this.  Why must I always remind my self "hey, this
> > > is LoongArch, there is a different directory layout" when I don't need
> > > multilib at all?
> > > 
> > 
> > AFAIK, the two main uses of the multisubdir layout are in the C++
> > header directory and the GCC libdir (where libgcc.a resides), respectively.
> > The GCC libdir is fine since they are private to a user's GCC build.
> > However, the C++ header directory is shared across the system unless
> > an alternative sysroot is chosen, so the consisentency of the multilib
> > layout matters.
> 
> The C++ header directory should also be considered private to the GCC
> build.  AFAIK no distro supports "overwriting a part of the system", so
> you cannot just install a custom GCC build and overwrite the system C++
> header directory.  For a cross compiler, the C++ header directory is
> $prefix/$target_triple/include/c++/$gcc_version/$multi_dir, the C++
> header in $sysroot/usr/include/c++ (if it ever exists) will not be used
> at all.
> 
> > So theoretically, the toplevel libraries should have the same ABI under
> > the the target triplet.  However, for many architectures, the
> > "--with-abi + MULTILIB_DEFAULT" scheme may cause the toplevel to be
> > configured to have different meanings.
> 
> https://gcc.gnu.org/PR104085 is an example of the issue caused by the
> different meaning.
> 
> > So I think it's also a reasonable approach that we just simply eliminate
> > the ambiguous toplevel libraries and use a symmetric layout instead.
> 
> I don't like the inconsistency among different GCC ports.  If all ports
> use the same approach I'll not object.

I came up with another idea. What if we:

1. Keep the "default" ABI libs in the toplevel directory. There is
*always* a default ABI so treating it specially is not really nonsense.
2. Create a symlink for consistency. For example, if --with-abi=lp64d, -
-with-multilib-list=lp64d,lp64s:

 * /usr/lib/gcc/loongarch64-linux-gnu/14.0.0 contains the lp64d
   libraries.
 * /usr/lib/gcc/loongarch64-linux-gnu/14.0.0/lp64s contains the lp64s
   libraries.
 * /usr/lib/gcc/loongarch64-linux-gnu/14.0.0/lp64d is a symlink to "."

Then we can refer to the lp64d libgcc.a with both
/usr/lib/gcc/loongarch64-linux-gnu/14.0.0/lp64d/libgcc.a, and
/usr/lib/gcc/loongarch64-linux-gnu/14.0.0/libgcc.a.

For referring to the default multilib, the non-suffixed
/usr/lib/gcc/loongarch64-linux-gnu/14.0.0 path should be used; for
referring lp64d (no matter what the default is),
/usr/lib/gcc/loongarch64-linux-gnu/14.0.0/lp64d should be used.

The symlink can be created by the GCC building system or manually by the
distro maintainer (or gcc packager).

Thoughts?

-- 
Xi Ruoyao  School of Aerospace Science and
Technology, Xidian University

Re: RISCV test infrastructure for d / v / zfh extensions

2023-08-14 Thread Joern Rennecke

On Tue, 1 Aug 2023 at 14:44, Robin Dapp  wrote:
>
> Hi Joern,
>
> thanks, I believe this will help with testing.
>
> > +proc check_effective_target_riscv_v { } {
> > +return [check_no_compiler_messages riscv_ext_v assembly {
> > +   #ifndef __riscv_v
> > +   #error "Not __riscv_v"
> > +   #endif
> > +}]
> > +}
> This can be replaced by riscv_vector or vice versa.

Hmm, you are right.  I personally prefer my version because it allows
consistent naming of the
different tests, also easily extendible when new extensions need testing.
Although the riscv_vector name has the advantage that it is better
legible for people who are
not used to dealing with RISC_V extension names.  If we keep
riscv_vector, it would make
sense to name the other tests also something more verbose, e.g. change
riscv_d into
riscv_double_fp or even riscv_double_precision_floating_point .
It would be nice to hear other people's opinions on the naming.

> > +# Return 1 if we can execute code when using dg-add-options riscv_v
> > +
> > +proc check_effective_target_riscv_v_ok { } {
> > +# If the target already supports v without any added options,
> > +# we may assume we can execute just fine.
> > +if { [check_effective_target_riscv_v] } {
> > + return 1
> > +}
> > +
> > +# check if we can execute vector insns with the given hardware or
> > +# simulator
> > +set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] ]
> > +if { [check_runtime ${gcc_march}_exec {
> > +   int main() {  asm("vsetivli t0, 9, e8, m1, tu, ma"); return 0; } } 
> > "-march=${gcc_march}"] } {
> > + return 1
> > +}
> > +
> > +# Possible future extensions: If the target is a simulator, 
> > dg-add-options
> > +# might change its config to make it allow vector insns, or we might 
> > use
> > +# options to set special elf flags / sections to effect that.
> > +
> > +return 0
> > +}
> So in general we would add {dg-add-options riscv_v} for every
> test that requires compile-time vector support?
>
> For a run test we would check {dg-require-effective-target riscv_v_ok}
> before?

Yes.

> Would it make sense to skip the first check here
> (check_effective_target_riscv_v) so we have a proper runtime check?

My starting point was that the changing of global testsuite variables around -
as the original RISC-V vector patches did - is wrong.  The user asked to test
a particular target (or set targets, for multilibs), and that target
is the one to test,
so we can't just assume it has other hardware features that are not implied by
the target.
Contrarily, the target that the user requested to test can be assumed to be
available for testing.  Testing that it actually works is a part of
the point of the
test.  If I ask for a dejagnu test for a target that has vector support, I would
hope that the vector support is also tested, not backing off if it finds that
there is a problem with the target,

Although it should get tested anyway with generic tests, even though this
does not happen at the moment (this is for another PR I intend to open
and address).

> Right now we assume the runtime can execute vector instructions if
> the compiler can emit them.

The way I look at things, when the macro  __riscv_v is defined,
the compiler asserts that it is compiling for a target that has vector support,
because it was instructed by configuration / options to emit code for that
target.  Which we can take as evidence that dejagnu is run with options
to select that target (either explicitly or by default due to the
configuration of
the compiler under test)

>  You could replace riscv_vector_hw and
> riscv_zvfh_hw by your versions then and we'd have a clear separation
> between runtime and compile time.
> We would just need to make sure not to add "v" twice if it's already
> in the march string.

The check_effective_target_riscv_v test (or riscv_vector of that name
is preferred)
in add_options_for_riscv_v would be unaffected by such a change.
This is purely a matter of what failure mode people want to see if the
execution target can't execute programs for the specified target.  Do
we want it to be a silent side-note in the verbose log file, or complain
for every single test?

> > +if { [string equal $gcc_march "imafd"] } {
> > + set gcc_march "g"
> > +}
> Wouldn't we want to always replace "imafd" with "g" for
> simplicity/consistency and not just the exact string?

The tests are arranged such that the exact string "imafd" will appear
there if that is supported.  Note that the test is inside the loop.

I see that the indentation looks garbled, I got to fix that.

> > +proc add_options_for_riscv_v { flags } {
> > +if { [lsearch $flags -march=*] >= 0 } {
> > + # If there are multiple -march flags, we have to adjust all of them.
> > + # ??? Is there a way to make the match specific to a full list 
> > element?
> > + # as it is, we might match something inside a string.
> > + return [regsub

Re: [PATCH v4 1/6] LoongArch: Add Loongson SX vector directive compilation framework.

2023-08-14 Thread Xi Ruoyao via Gcc-patches

I guess there is a merge conflict with Yujie's "-msimd=" patch and you
may need to collaborate to resolve it.  Maybe just add -msimd in this
series.

On Tue, 2023-08-15 at 09:05 +0800, Chenghui Pan wrote:
> From: Lulu Cheng 
> 
> gcc/ChangeLog:
> 
> * config/loongarch/genopts/loongarch-strings: Add compilation 
> framework.
> * config/loongarch/genopts/loongarch.opt.in: Ditto.
> * config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins): Ditto.
> * config/loongarch/loongarch-def.c: Ditto.
> * config/loongarch/loongarch-def.h (N_ISA_EXT_TYPES): Ditto.
> (ISA_EXT_SIMD_LSX): Ditto.
> (N_SWITCH_TYPES): Ditto.
> (SW_LSX): Ditto.
> (struct loongarch_isa): Ditto.
> * config/loongarch/loongarch-driver.cc (APPEND_SWITCH): Ditto.
> (driver_get_normalized_m_opts): Ditto.
> * config/loongarch/loongarch-driver.h (driver_get_normalized_m_opts): 
> Ditto.
> * config/loongarch/loongarch-opts.cc (loongarch_config_target): Ditto.
> (isa_str): Ditto.
> * config/loongarch/loongarch-opts.h (ISA_HAS_LSX): Ditto.
> * config/loongarch/loongarch-str.h (OPTSTR_LSX): Ditto.
> * config/loongarch/loongarch.opt: Ditto.

/* snip */

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

Re: [PATCH v1 1/6] LoongArch: a symmetric multilib subdir layout

2023-08-14 Thread Yujie Yang

On Mon, Aug 14, 2023 at 03:48:53PM +0800, Xi Ruoyao wrote:
> On Mon, 2023-08-14 at 15:37 +0800, Yujie Yang wrote:
> > On Mon, Aug 14, 2023 at 01:38:40PM +0800, Xi Ruoyao wrote:
> > > On Mon, 2023-08-14 at 11:57 +0800, Yang Yujie wrote:
> > > 
> > > > However, for LoongArch, we do not want such a "toplevel" library
> > > > installation since the default ABI may change.  We expect all
> > > > multilib variants of libraries to be installed to their designated
> > > > ABI-specific subdirs (e.g. base/lp64d) of the GCC libdir, so that
> > > > the default ABI can be configured arbitrarily (with --with-abi)
> > > > while the gcc libdir layout stays consistent.  This could be
> > > > helpful for the distribution packaging of GCC libraries.
> > > 
> > > Have you tested a --disable-multilib configuration?  To me with --
> > > disable-configuration everything should be still in the toplevel
> > > directory, not any sub-directory.
> > 
> > That's a good point, sorry I missed --disable-multilib here.
> > 
> > However, you don't really need --disable-multilib since
> > the libraries are only built once in the default ABI configuration
> > as long as --with-multilib-list does not request anything more than
> > that.
> > 
> > Maybe we should force-enabling multilib in all cases.
> 
> I really don't like this.  Why must I always remind my self "hey, this
> is LoongArch, there is a different directory layout" when I don't need
> multilib at all?
> 

AFAIK, the two main uses of the multisubdir layout are in the C++
header directory and the GCC libdir (where libgcc.a resides), respectively.
The GCC libdir is fine since they are private to a user's GCC build.
However, the C++ header directory is shared across the system unless
an alternative sysroot is chosen, so the consisentency of the multilib
layout matters.

So theoretically, the toplevel libraries should have the same ABI under
the the target triplet.  However, for many architectures, the
"--with-abi + MULTILIB_DEFAULT" scheme may cause the toplevel to be
configured to have different meanings.

So I think it's also a reasonable approach that we just simply eliminate
the ambiguous toplevel libraries and use a symmetric layout instead.

Re: [PATCH v1 2/6] LoongArch: improved target configuration interface

2023-08-14 Thread Yujie Yang

On Mon, Aug 14, 2023 at 01:22:32PM +0800, Xi Ruoyao wrote:
> On Mon, 2023-08-14 at 11:57 +0800, Yang Yujie wrote:
> > The configure script and the GCC driver are updated so that
> > it is easier to customize and control GCC builds for targeting
> > different LoongArch implementations.
> > 
> > * Support options for LoongArch SIMD extensions:
> >   new configure options --with-simd={none,lsx,lasx};
> >   new driver options -m[no]-l[a]sx / -msimd={none,lsx,lasx}.
> 
> What's the relationship between -mlasx and -msimd=lasx?  What will
> happen if the user specifies -mlasx -msimd=none or -mlasx -msimd=lsx?
> 
> -- 
> Xi Ruoyao 
> School of Aerospace Science and Technology, Xidian University

At this moment we make sure all "flags" (that expresses a config "delta")
are processed after the "parameters", which is documented in the LoongArch
Toolchain Conventions[1].

So if -msimd=* and -m[no-]l[a]sx appear together on the driver command
line, the final configuration would be derived by first applying -msimd=*
and then the sequence of -m[no-]l[a]sx, in the order they appear.

This is similar to the relationship between -msoft-float and -mfpu / -mabi,
where -mfpu / -mabi are applied first, and -msoft-float modifies the existing
target configuration states (FPU, ABI).

[1] currently released at https://github.com/loongson/LoongArch-Documentation
/blob/main/docs/LoongArch-toolchain-conventions-EN.adoc

[PATCH v4 1/6] LoongArch: Add Loongson SX vector directive compilation framework.

2023-08-14 Thread Chenghui Pan

From: Lulu Cheng 

gcc/ChangeLog:

* config/loongarch/genopts/loongarch-strings: Add compilation framework.
* config/loongarch/genopts/loongarch.opt.in: Ditto.
* config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins): Ditto.
* config/loongarch/loongarch-def.c: Ditto.
* config/loongarch/loongarch-def.h (N_ISA_EXT_TYPES): Ditto.
(ISA_EXT_SIMD_LSX): Ditto.
(N_SWITCH_TYPES): Ditto.
(SW_LSX): Ditto.
(struct loongarch_isa): Ditto.
* config/loongarch/loongarch-driver.cc (APPEND_SWITCH): Ditto.
(driver_get_normalized_m_opts): Ditto.
* config/loongarch/loongarch-driver.h (driver_get_normalized_m_opts): 
Ditto.
* config/loongarch/loongarch-opts.cc (loongarch_config_target): Ditto.
(isa_str): Ditto.
* config/loongarch/loongarch-opts.h (ISA_HAS_LSX): Ditto.
* config/loongarch/loongarch-str.h (OPTSTR_LSX): Ditto.
* config/loongarch/loongarch.opt: Ditto.
---
 .../loongarch/genopts/loongarch-strings   |  3 +
 gcc/config/loongarch/genopts/loongarch.opt.in |  8 +-
 gcc/config/loongarch/loongarch-c.cc   |  7 ++
 gcc/config/loongarch/loongarch-def.c  |  4 +
 gcc/config/loongarch/loongarch-def.h  |  7 +-
 gcc/config/loongarch/loongarch-driver.cc  | 10 +++
 gcc/config/loongarch/loongarch-driver.h   |  1 +
 gcc/config/loongarch/loongarch-opts.cc| 82 ++-
 gcc/config/loongarch/loongarch-opts.h |  1 +
 gcc/config/loongarch/loongarch-str.h  |  2 +
 gcc/config/loongarch/loongarch.opt|  8 +-
 11 files changed, 128 insertions(+), 5 deletions(-)

diff --git a/gcc/config/loongarch/genopts/loongarch-strings 
b/gcc/config/loongarch/genopts/loongarch-strings
index a40998ead97..24a5025061f 100644
--- a/gcc/config/loongarch/genopts/loongarch-strings
+++ b/gcc/config/loongarch/genopts/loongarch-strings
@@ -40,6 +40,9 @@ OPTSTR_SOFT_FLOAT soft-float
 OPTSTR_SINGLE_FLOAT   single-float
 OPTSTR_DOUBLE_FLOAT   double-float
 
+# SIMD extensions
+OPTSTR_LSX lsx
+
 # -mabi=
 OPTSTR_ABI_BASE  abi
 STR_ABI_BASE_LP64Dlp64d
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 4b9b4ac273e..338d77a7e40 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -76,6 +76,9 @@ m@@OPTSTR_DOUBLE_FLOAT@@
 Target Driver RejectNegative Var(la_opt_switches) Mask(FORCE_F64) 
Negative(m@@OPTSTR_SOFT_FLOAT@@)
 Allow hardware floating-point instructions to cover both 32-bit and 64-bit 
operations.
 
+m@@OPTSTR_LSX@@
+Target RejectNegative Var(la_opt_switches) Mask(LSX) Negative(m@@OPTSTR_LSX@@)
+Enable LoongArch SIMD Extension (LSX).
 
 ;; Base target models (implies ISA & tune parameters)
 Enum
@@ -125,11 +128,14 @@ Target RejectNegative Joined ToLower Enum(abi_base) 
Var(la_opt_abi_base) Init(M_
 Variable
 int la_opt_abi_ext = M_OPTION_NOT_SEEN
 
-
 mbranch-cost=
 Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
 -mbranch-cost=COST Set the cost of branches to roughly COST instructions.
 
+mmemvec-cost=
+Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) 
IntegerRange(1, 5)
+mmemvec-cost=COST  Set the cost of vector memory access instructions.
+
 mcheck-zero-division
 Target Mask(CHECK_ZERO_DIV)
 Trap on integer divide by zero.
diff --git a/gcc/config/loongarch/loongarch-c.cc 
b/gcc/config/loongarch/loongarch-c.cc
index 67911b78f28..b065921adc3 100644
--- a/gcc/config/loongarch/loongarch-c.cc
+++ b/gcc/config/loongarch/loongarch-c.cc
@@ -99,6 +99,13 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
   else
 builtin_define ("__loongarch_frlen=0");
 
+  if (ISA_HAS_LSX)
+{
+  builtin_define ("__loongarch_simd");
+  builtin_define ("__loongarch_sx");
+  builtin_define ("__loongarch_sx_width=128");
+}
+
   /* Native Data Sizes.  */
   builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE);
   builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE);
diff --git a/gcc/config/loongarch/loongarch-def.c 
b/gcc/config/loongarch/loongarch-def.c
index 6729c857f7c..28e24c62249 100644
--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
@@ -49,10 +49,12 @@ loongarch_cpu_default_isa[N_ARCH_TYPES] = {
   [CPU_LOONGARCH64] = {
   .base = ISA_BASE_LA64V100,
   .fpu = ISA_EXT_FPU64,
+  .simd = 0,
   },
   [CPU_LA464] = {
   .base = ISA_BASE_LA64V100,
   .fpu = ISA_EXT_FPU64,
+  .simd = ISA_EXT_SIMD_LSX,
   },
 };
 
@@ -147,6 +149,7 @@ loongarch_isa_ext_strings[N_ISA_EXT_TYPES] = {
   [ISA_EXT_FPU64] = STR_ISA_EXT_FPU64,
   [ISA_EXT_FPU32] = STR_ISA_EXT_FPU32,
   [ISA_EXT_NOFPU] = STR_ISA_EXT_NOFPU,
+  [ISA_EXT_SIMD_LSX] = OPTSTR_LSX,
 };
 
 const char*
@@ -176,6 +179,7 @@ loongarch_switch_strings[] = {
   [SW_SOFT_FLOAT]= OPTSTR_SOFT_FLOAT,
   [SW_SINGLE_FLOAT]  =

[PATCH v4 4/6] LoongArch: Add Loongson ASX vector directive compilation framework.

2023-08-14 Thread Chenghui Pan

From: Lulu Cheng 

gcc/ChangeLog:

* config/loongarch/genopts/loongarch-strings: Add compilation framework.
* config/loongarch/genopts/loongarch.opt.in: Ditto.
* config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins): Ditto.
* config/loongarch/loongarch-def.c: Ditto.
* config/loongarch/loongarch-def.h (N_ISA_EXT_TYPES): Ditto.
(ISA_EXT_SIMD_LASX): Ditto.
(N_SWITCH_TYPES): Ditto.
(SW_LASX): Ditto.
* config/loongarch/loongarch-driver.cc (driver_get_normalized_m_opts): 
Ditto.
* config/loongarch/loongarch-driver.h (driver_get_normalized_m_opts): 
Ditto.
* config/loongarch/loongarch-opts.cc (isa_str): Ditto.
* config/loongarch/loongarch-opts.h (ISA_HAS_LSX): Ditto.
(ISA_HAS_LASX): Ditto.
* config/loongarch/loongarch-str.h (OPTSTR_LASX): Ditto.
* config/loongarch/loongarch.opt: Ditto.
---
 gcc/config/loongarch/genopts/loongarch-strings |  1 +
 gcc/config/loongarch/genopts/loongarch.opt.in  |  4 
 gcc/config/loongarch/loongarch-c.cc| 11 +++
 gcc/config/loongarch/loongarch-def.c   |  4 +++-
 gcc/config/loongarch/loongarch-def.h   |  6 --
 gcc/config/loongarch/loongarch-driver.cc   |  2 +-
 gcc/config/loongarch/loongarch-driver.h|  1 +
 gcc/config/loongarch/loongarch-opts.cc |  9 -
 gcc/config/loongarch/loongarch-opts.h  |  4 +++-
 gcc/config/loongarch/loongarch-str.h   |  1 +
 gcc/config/loongarch/loongarch.opt |  4 
 11 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/gcc/config/loongarch/genopts/loongarch-strings 
b/gcc/config/loongarch/genopts/loongarch-strings
index 24a5025061f..35d08f5967d 100644
--- a/gcc/config/loongarch/genopts/loongarch-strings
+++ b/gcc/config/loongarch/genopts/loongarch-strings
@@ -42,6 +42,7 @@ OPTSTR_DOUBLE_FLOAT   double-float
 
 # SIMD extensions
 OPTSTR_LSX lsx
+OPTSTR_LASXlasx
 
 # -mabi=
 OPTSTR_ABI_BASE  abi
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 338d77a7e40..afde23c9661 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -80,6 +80,10 @@ m@@OPTSTR_LSX@@
 Target RejectNegative Var(la_opt_switches) Mask(LSX) Negative(m@@OPTSTR_LSX@@)
 Enable LoongArch SIMD Extension (LSX).
 
+m@@OPTSTR_LASX@@
+Target RejectNegative Var(la_opt_switches) Mask(LASX) 
Negative(m@@OPTSTR_LASX@@)
+Enable LoongArch Advanced SIMD Extension (LASX).
+
 ;; Base target models (implies ISA & tune parameters)
 Enum
 Name(cpu_type) Type(int)
diff --git a/gcc/config/loongarch/loongarch-c.cc 
b/gcc/config/loongarch/loongarch-c.cc
index b065921adc3..2747fb9e472 100644
--- a/gcc/config/loongarch/loongarch-c.cc
+++ b/gcc/config/loongarch/loongarch-c.cc
@@ -104,8 +104,19 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
   builtin_define ("__loongarch_simd");
   builtin_define ("__loongarch_sx");
   builtin_define ("__loongarch_sx_width=128");
+
+  if (!ISA_HAS_LASX)
+   builtin_define ("__loongarch_simd_width=128");
 }
 
+  if (ISA_HAS_LASX)
+{
+  builtin_define ("__loongarch_asx");
+  builtin_define ("__loongarch_asx_width=256");
+  builtin_define ("__loongarch_simd_width=256");
+}
+
+
   /* Native Data Sizes.  */
   builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE);
   builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE);
diff --git a/gcc/config/loongarch/loongarch-def.c 
b/gcc/config/loongarch/loongarch-def.c
index 28e24c62249..bff92c86532 100644
--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
@@ -54,7 +54,7 @@ loongarch_cpu_default_isa[N_ARCH_TYPES] = {
   [CPU_LA464] = {
   .base = ISA_BASE_LA64V100,
   .fpu = ISA_EXT_FPU64,
-  .simd = ISA_EXT_SIMD_LSX,
+  .simd = ISA_EXT_SIMD_LASX,
   },
 };
 
@@ -150,6 +150,7 @@ loongarch_isa_ext_strings[N_ISA_EXT_TYPES] = {
   [ISA_EXT_FPU32] = STR_ISA_EXT_FPU32,
   [ISA_EXT_NOFPU] = STR_ISA_EXT_NOFPU,
   [ISA_EXT_SIMD_LSX] = OPTSTR_LSX,
+  [ISA_EXT_SIMD_LASX] = OPTSTR_LASX,
 };
 
 const char*
@@ -180,6 +181,7 @@ loongarch_switch_strings[] = {
   [SW_SINGLE_FLOAT]  = OPTSTR_SINGLE_FLOAT,
   [SW_DOUBLE_FLOAT]  = OPTSTR_DOUBLE_FLOAT,
   [SW_LSX]   = OPTSTR_LSX,
+  [SW_LASX]  = OPTSTR_LASX,
 };
 
 
diff --git a/gcc/config/loongarch/loongarch-def.h 
b/gcc/config/loongarch/loongarch-def.h
index f34cffcfb9b..0bbcdb03d22 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -64,7 +64,8 @@ extern const char* loongarch_isa_ext_strings[];
 #define ISA_EXT_FPU642
 #define N_ISA_EXT_FPU_TYPES   3
 #define ISA_EXT_SIMD_LSX  3
-#define N_ISA_EXT_TYPES  4
+#define ISA_EXT_SIMD_LASX 4
+#define N_ISA_EXT_TYPES  5
 
 /* enum abi_base */
 extern const char*

[PATCH v4 0/6] Add Loongson SX/ASX instruction support to LoongArch target.

2023-08-14 Thread Chenghui Pan

This is an update of:
https://gcc.gnu.org/pipermail/gcc-patches/2023-August/626194.html

This version of patch set only introduces some small simplications of
implementation. Because I missed the size limitation of mail size, the
huge testsuite patches of v2 and v3 are not shown in the mail list. So,
testsuite patches are splited from this patch set again and will be submitted 
independently in the future.

Binutils-gdb introduced LSX/LASX support since 2.41 release:
https://lists.gnu.org/archive/html/info-gnu/2023-07/msg9.html

Brief history of patch set version:
v1 -> v2:
- Reduce usage of "unspec" in RTL template.
- Append Support of ADDR_REG_REG in LSX and LASX.
- Constraint docs are appended in gcc/doc/md.texi and ccomment block.
- Codes related to vecarg are removed.
- Testsuite of LSX and LASX is added in v2. (Because of the size limitation of
  mail list, these patches are not shown)
- Adjust the loongarch_expand_vector_init() function to reduce instruction 
  output amount.
- Some minor implementation changes of RTL templates.

v2 -> v3:
- Revert vabsd/xvabsd RTL templates to unspec impl.
- Resolve warning in gcc/config/loongarch/loongarch.cc when bootstrapping 
  with BOOT_CFLAGS="-O2 -ftree-vectorize -fno-vect-cost-model -mlasx".
- Remove redundant definitions in lasxintrin.h.
- Refine commit info.

Lulu Cheng (6):
  LoongArch: Add Loongson SX vector directive compilation framework.
  LoongArch: Add Loongson SX base instruction support.
  LoongArch: Add Loongson SX directive builtin function support.
  LoongArch: Add Loongson ASX vector directive compilation framework.
  LoongArch: Add Loongson ASX base instruction support.
  LoongArch: Add Loongson ASX directive builtin function support.

 gcc/config.gcc|2 +-
 gcc/config/loongarch/constraints.md   |  131 +-
 .../loongarch/genopts/loongarch-strings   |4 +
 gcc/config/loongarch/genopts/loongarch.opt.in |   12 +-
 gcc/config/loongarch/lasx.md  | 5122 
 gcc/config/loongarch/lasxintrin.h | 5338 +
 gcc/config/loongarch/loongarch-builtins.cc| 2686 -
 gcc/config/loongarch/loongarch-c.cc   |   18 +
 gcc/config/loongarch/loongarch-def.c  |6 +
 gcc/config/loongarch/loongarch-def.h  |9 +-
 gcc/config/loongarch/loongarch-driver.cc  |   10 +
 gcc/config/loongarch/loongarch-driver.h   |2 +
 gcc/config/loongarch/loongarch-ftypes.def |  666 +-
 gcc/config/loongarch/loongarch-modes.def  |   39 +
 gcc/config/loongarch/loongarch-opts.cc|   89 +-
 gcc/config/loongarch/loongarch-opts.h |3 +
 gcc/config/loongarch/loongarch-protos.h   |   35 +
 gcc/config/loongarch/loongarch-str.h  |3 +
 gcc/config/loongarch/loongarch.cc | 4586 +-
 gcc/config/loongarch/loongarch.h  |  117 +-
 gcc/config/loongarch/loongarch.md |   56 +-
 gcc/config/loongarch/loongarch.opt|   12 +-
 gcc/config/loongarch/lsx.md   | 4481 ++
 gcc/config/loongarch/lsxintrin.h  | 5181 
 gcc/config/loongarch/predicates.md|  333 +-
 gcc/doc/md.texi   |   11 +
 26 files changed, 28668 insertions(+), 284 deletions(-)
 create mode 100644 gcc/config/loongarch/lasx.md
 create mode 100644 gcc/config/loongarch/lasxintrin.h
 create mode 100644 gcc/config/loongarch/lsx.md
 create mode 100644 gcc/config/loongarch/lsxintrin.h

-- 
2.36.0

RE: [PATCH v1] RISC-V: Support RVV VFREC7 rounding mode intrinsic API

2023-08-14 Thread Li, Pan2 via Gcc-patches

Committed, thanks Kito.

Pan

From: Kito Cheng 
Sent: Monday, August 14, 2023 11:02 PM
To: Li, Pan2 
Cc: Wang, Yanzhang ; gcc-patches 
; 钟居哲 
Subject: Re: [PATCH v1] RISC-V: Support RVV VFREC7 rounding mode intrinsic API

Checked with doc and llvm implementation, LGTM

Re: IRA update_equiv_regs for (was Re: ICE for interim fix for PR/110748)

2023-08-14 Thread Vineet Gupta




On 8/11/23 17:04, Jeff Law wrote:


I'm wondering (naively) if there is some way to tune this - for a 
given backend. In general it would make sense to do the replacement, 
but not if the cost changes (e.g. consts could be embedded in x86 
insn freely, but not for RISC-V where this is costly and if something 
is split, it might been intentional.

I'm not immediately aware of a way to tune.

When it comes to tuning, the toplevel questions are do we have any of 
the info we need to tune at the point where the transformation occurs. 
The two most obvious pieces here would be loop info an register pressure.


ie, do we have enough loop structure to know if the def is at a 
shallower loop nest than the use.  There's a reasonable chance we have 
this information as my recollection is this analysis is done fairly 
early in IRA.


But that means we likely don't have any sense of register pressure at 
the points between the def and use.   So the most useful metric for 
tuning isn't really available.


I'd argue that even if the register pressure were high, in some cases, 
there's just no way around it and RA needs to honor what the backend did 
apriori (split in this case), otherwise we end up with something which 
doesn't compute literally and leads to ICE. I'm puzzled that in this 
case, intentional implementation is getting in the way. So while I don't 
care about the -0.0 case in itself, it seems with the current framework 
we can't just achieve the results, other that the roundabout way of 
peephole2 you alluded to.




The one thing that stands out is we don't do this transformation at 
all when register pressure sensitive scheduling is enabled. And we 
really should be turning that on by default.  Our data shows register 
pressure sensitive scheduling is about a 6-7% cycle improvement on 
x264 as it avoids spilling in those key satd loops.



 /* Don't move insns if live range shrinkage or register
 pressure-sensitive scheduling were done because it will not
 improve allocation but likely worsen insn scheduling.  */
  if (optimize
  && !flag_live_range_shrinkage
  && !(flag_sched_pressure && flag_schedule_insns))
    combine_and_move_insns ();



So you might want to look at register pressure sensitive scheduling 
first.  If you go into x264_r from specint and look at 
x264_pixel_satd_8x4.  First verify the loops are fully unrolled. If 
they are, then look for 32bit loads/stores into the stack.  If you 
have them, then you're spilling and getting crappy performance.  Using 
register pressure sensitive scheduling should help significantly.


Is that -fira-loop-pressure ?


We've certainly seen that internally.  The plan was to submit a patch 
to make register pressure sensitive scheduling the default when the 
scheduler is enabled.  We just haven't pushed on it.  If you can 
verify that you're seeing spilling as well, then it'd certainly 
bolster the argument that register-pressure-sensitive-scheduling is 
desirable.


I can confirm that the loop is fully unrolled and there's a zillion 
stack spills there for intermediate computes (-Ofast 
-march=rv64gc_zba_zbb_zbs, no V in that build).


Thx,
-Vineet

Is this a bug for __builtin_dynamic_object_size?

2023-08-14 Thread Qing Zhao via Gcc-patches

Hi, Sid,

For the following testing case:

#include 

#define noinline __attribute__((__noinline__))

static void noinline alloc_buf_more (int index)
{
  struct annotated {
long foo;
char b;
char array[index];
long c;
  } q, *p;

  p = 

  printf("the__bdos of p->array whole max is %d \n", 
__builtin_dynamic_object_size(p->array, 0)); 
  printf("the__bdos of p->array sub max is %d \n", 
__builtin_dynamic_object_size(p->array, 1));  
  printf("the__bdos of p->array whole min is %d \n", 
__builtin_dynamic_object_size(p->array, 2)); 
  printf("the__bdos of p->array sub min is %d \n", 
__builtin_dynamic_object_size(p->array, 3)); 

  return;
}

int main ()
{
  alloc_buf_more (10);
  return 0;
}

If I compile it with the latest upstream gcc and run it:

/home/opc/Install/latest-d/bin/gcc -O t.c
the__bdos of p->array whole max is 23 
the__bdos of p->array sub max is 23 
the__bdos of p->array whole min is 23 
the__bdos of p->array sub min is 23 

In which__builtin_dynamic_object_size(p->array, 0) and 
__builtin_dynamic_object_size(p->array, 1) return the same size, this seems 
wrong to me. 

There is one line in tree-object-size.cc might relate to this bug: (in the 
routine “addr_object_size”)

 603   if (! TYPE_SIZE_UNIT (TREE_TYPE (var))
 604   || ! tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (var)))
 605   || (pt_var_size && TREE_CODE (pt_var_size) == INTEGER_CST
 606   && tree_int_cst_lt (pt_var_size,
 607   TYPE_SIZE_UNIT (TREE_TYPE (var)
 608 var = pt_var;

I suspect that the above line 604 “ ! tree_fits_uhwi_p (TYPE_SIZE_UNIT 
(TREE_TYPE (var)))” relates to this bug, since the TYPESIZE of the VLA “array” 
is not a unsigned HOST_WIDE_INT, but we still can use its TYPESIZE for 
dynamic_object_size?

What do you think?

Thanks.

Qing

Re: [PATCH v9] RISC-V: Add the 'zfa' extension, version 0.2





On 8/14/23 00:10, Jin Ma wrote:

Additional links：
v10, the patch that needs to be reviewed again:
http://patchwork.ozlabs.org/project/gcc/patch/20230814055033.1995-1-ji...@linux.alibaba.com/

v9 and the previous review comments:
http://patchwork.ozlabs.org/project/gcc/patch/20230515131628.953-1-ji...@linux.alibaba.com/

Zfa patch in master branch of binutils-gdb
https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=1f3fc45bddc7147a2e59346a59290094137ef1e1
Will do.  We'll also have to evaluate against Tsukasa's work.  As we saw 
with Zicond there may be cases that are better handled by one vs the 
other and we may end up taking pieces from both.


jeff

Re: [PATCH] RISC-V: Handle no_insn in TARGET_SCHED_VARIABLE_ISSUE.

2023-08-14 Thread Stefan Schulze Frielinghaus via Gcc-patches





On 8/14/23 14:33, Edwin Lu wrote:


On 8/11/2023 6:29 AM, Jeff Law via Gcc-patches wrote:



On 8/10/23 21:45, Palmer Dabbelt wrote:



This seems pretty mechinacial: just scrub through our MDs to check 
for any un-typed insns, then add the assert and fix the failures. 
You're more than welcome to have at it, but LMK if you want me to try 
and find some time for someone to do it -- certainly seems like a 
good way for someone new to dig in a bit.
Yes, definitely mechanical.  And yes, it's a good way for someone to 
start to get familiar with these bits -- I used the lack of types on 
some of the bitmanip insns to help ramp up Raphael and one of the RAU 
guys in this space.


Jeff


Hi, Palmer sent me this thread to take a look at. I can start working on 
this.
Sounds good.  The goal is to make sure that every insn has a type and 
once we hit that milestone we can enable the currently #if 0'd assert in 
riscv_sched_variable_issue to help ensure we don't introduce any new 
insns without types in the future.


If you have any questions, don't hesitate to reach out.

jeff

Re: [PATCH] Add support for vector conitional not

2023-08-14 Thread Andrew Pinski via Gcc-patches

On Mon, Aug 14, 2023 at 2:37 PM Richard Sandiford via Gcc-patches
 wrote:
>
> Andrew Pinski via Gcc-patches  writes:
> > Like the support conditional neg (r12-4470-g20dcda98ed376cb61c74b2c71),
> > this just adds conditional not too.
> > Also we should be able to turn `(a ? -1 : 0) ^ b` into a conditional
> > not.
> >
> > OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu.
> >
> > gcc/ChangeLog:
> >
> >   * internal-fn.def (COND_NOT): New internal function.
> >   * match.pd (UNCOND_UNARY, COND_UNARY): Add bit_not/not
> >   to the lists.
> >   (`vec (a ? -1 : 0) ^ b`): New pattern to convert
> >   into conditional not.
> >   * optabs.def (cond_one_cmpl): New optab.
> >   (cond_len_one_cmpl): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   PR target/110986
> >   * gcc.target/aarch64/sve/cond_unary_9.c: New test.
> > ---
> >  gcc/internal-fn.def   |  2 ++
> >  gcc/match.pd  | 15 --
> >  gcc/optabs.def|  2 ++
> >  .../gcc.target/aarch64/sve/cond_unary_9.c | 20 +++
> >  4 files changed, 37 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c
> >
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index b3c410f4b6a..3e8693dfddb 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
> >   lround2.
> >
> > - cond_binary: a conditional binary optab, such as cond_add
> > +   - cond_unary: a conditional unary optab, such as cond_neg
> > - cond_ternary: a conditional ternary optab, such as cond_fma_rev
> >
> > - fold_left: for scalar = FN (scalar, vector), keyed off the vector mode
> > @@ -276,6 +277,7 @@ DEF_INTERNAL_COND_FN (FNMA, ECF_CONST, fnma, ternary)
> >  DEF_INTERNAL_COND_FN (FNMS, ECF_CONST, fnms, ternary)
> >
> >  DEF_INTERNAL_COND_FN (NEG, ECF_CONST, neg, unary)
> > +DEF_INTERNAL_COND_FN (NOT, ECF_CONST, one_cmpl, unary)
> >
> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 6791060891d..2ee6d24ccee 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -84,9 +84,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >
> >  /* Unary operations and their associated IFN_COND_* function.  */
> >  (define_operator_list UNCOND_UNARY
> > -  negate)
> > +  negate bit_not)
> >  (define_operator_list COND_UNARY
> > -  IFN_COND_NEG)
> > +  IFN_COND_NEG IFN_COND_NOT)
> >
> >  /* Binary operations and their associated IFN_COND_* function.  */
> >  (define_operator_list UNCOND_BINARY
> > @@ -8482,6 +8482,17 @@ and,
> >  && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >   (cond_op (bit_not @0) @2 @1)
> >
> > +/* `(a ? -1 : 0) ^ b` can be converted into a conditional not.  */
> > +(simplify
> > + (bit_xor:c (vec_cond @0 uniform_integer_cst_p@1 uniform_integer_cst_p@2) 
> > @3)
> > + (if (canonicalize_math_after_vectorization_p ()
> > +  && vectorized_internal_fn_supported_p (IFN_COND_NOT, type)
> > +  && is_truth_type_for (type, TREE_TYPE (@0)))
> > + (if (integer_all_onesp (@1) && integer_zerop (@2))
> > +  (IFN_COND_NOT @0 @3 @3))
> > +  (if (integer_all_onesp (@2) && integer_zerop (@1))
> > +   (vec_cond (bit_not @0) @3 @3
>
> Looks like this should be IFN_COND_NOT rather than vec_cond.

Yes that should have been IFN_COND_NOT, when I was converting it to be
explicitly IFN_COND_NOT rather than depending on vec_cond, I had
missed that part of the conversion.
Thanks for noticing that.

>
> LGTM otherwise, but please give Richi 24hrs to comment.

Will do.

Thanks,
Andrew


>
> Thanks,
> Richard
>
> > +
> >  /* Simplify:
> >
> >   a = a1 op a2
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 1ea1947b3b5..a58819bc665 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -254,6 +254,7 @@ OPTAB_D (cond_fms_optab, "cond_fms$a")
> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> >  OPTAB_D (cond_neg_optab, "cond_neg$a")
> > +OPTAB_D (cond_one_cmpl_optab, "cond_one_cmpl$a")
> >  OPTAB_D (cond_len_add_optab, "cond_len_add$a")
> >  OPTAB_D (cond_len_sub_optab, "cond_len_sub$a")
> >  OPTAB_D (cond_len_smul_optab, "cond_len_mul$a")
> > @@ -278,6 +279,7 @@ OPTAB_D (cond_len_fms_optab, "cond_len_fms$a")
> >  OPTAB_D (cond_len_fnma_optab, "cond_len_fnma$a")
> >  OPTAB_D (cond_len_fnms_optab, "cond_len_fnms$a")
> >  OPTAB_D (cond_len_neg_optab, "cond_len_neg$a")
> > +OPTAB_D (cond_len_one_cmpl_optab, "cond_len_one_cmpl$a")
> >  OPTAB_D (cmov_optab, "cmov$a6")
> >  OPTAB_D (cstore_optab, "cstore$a4")
> >  OPTAB_D (ctrap_optab, "ctrap$a4")
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c 
> > b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c
> > new file mode 100644
> > index 000..d6bc0409630
> > --- /dev/null
>

Re: [RFC] GCC Security policy

2023-08-14 Thread Siddhesh Poyarekar


On 2023-08-14 17:16, Alexander Monakov wrote:


On Mon, 14 Aug 2023, Siddhesh Poyarekar wrote:


1. It makes it clear to users of the project the scope in which the project
could be used and what safety it could reasonably expect from the project.  In
the context of GCC for example, it cannot expect the compiler to do a safety
check of untrusted sources; the compiler will consider #include "/etc/passwd"
just as valid code as #include  and as a result, the onus is on the
user environment to validate the input sources for safety.


Whoa, no. We shouldn't make such statements unless we are prepared to explain
to users how such validation can be practically implemented, which I'm sure
we cannot in this case, due to future extensions such as the #embed directive,
and ability to obfuscate filenames using the preprocessor.


There's no practical (programmatic) way to do such validation; it has to 
be a manual audit, which is why source code passed to the compiler has 
to be *trusted*.



I think it would be more honest to say that crafted sources can result in
arbitrary code execution with the privileges of the user invoking the compiler,
and hence the operator may want to ensure that no sensitive data is available
to that user (via measures ranging from plain UNIX permissions, to chroots,
to virtual machines, to air-gapped computers, depending on threat model).


Right, that's what we're essentially trying to convey in the security 
policy text.  It doesn't go into mechanisms for securing execution 
(because that's really beyond the scope of the *project's* policy IMO) 
but it states unambiguously that input to the compiler must be trusted:


"""
  ... It is necessary that
all source code inputs to the compiler are trusted, since it is
impossible for the driver to validate input source code beyond
conformance to a programming language standard...
"""


Resource consumption is another good reason to sandbox compilers.


Agreed, we make that specific recommendation in the context of libgccjit.

Thanks,
Sid

Re: [PATCH v4 2/8] libcpp: diagnostics: Support generated data in expanded locations

2023-08-14 Thread Lewis Hyatt via Gcc-patches

On Fri, Aug 11, 2023 at 07:02:49PM -0400, David Malcolm wrote:
> On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:
> > The previous patch in this series introduced the concept of LC_GEN line
> > maps. This patch continues on the path to using them to improve _Pragma
> > diagnostics, by adding a new source_id SRC member to struct
> > expanded_location, which is populated by linemap_expand_location. This
> > member allows call sites to detect and handle when a location refers to
> > generated data rather than a plain file name.
> > 
> > The previous FILE member of expanded_location is preserved (although
> > redundant with SRC), so that call sites which do not and never will care
> > about generated data do not need to be concerned about it. Call sites that
> > will care are modified here, to use SRC rather than FILE for comparing
> > locations.
> 
> Thanks; this seems like a good approach.
> 
> 
> [...snip...]
> 
> > diff --git a/gcc/edit-context.cc b/gcc/edit-context.cc
> > index 6f5bc6b9d8f..15052aec417 100644
> > --- a/gcc/edit-context.cc
> > +++ b/gcc/edit-context.cc
> > @@ -295,7 +295,7 @@ edit_context::apply_fixit (const fixit_hint *hint)
> >  {
> >expanded_location start = expand_location (hint->get_start_loc ());
> >expanded_location next_loc = expand_location (hint->get_next_loc ());
> > -  if (start.file != next_loc.file)
> > +  if (start.src != next_loc.src || start.src.is_buffer ())
> >  return false;
> >if (start.line != next_loc.line)
> >  return false;
> 
> Thinking about fix-it hints, it makes sense to reject attempts to
> create fix-it hints within generated strings, as we can't apply them or
> visualize them.
> 
> Does anywhere in the patch kit do that?  Either of 
>   rich_location::maybe_add_fixit
> or
>   rich_location::reject_impossible_fixit
> would be good places to do that.
>

So rich_location::reject_impossible_fixit does reject them for _Pragmas now,
because what the frontend sees and passes to it is a virtual location, and it
always rejects virtual locations. But it doesn't reject arbitrary generated
data locations that may be created in an ordinary non-virtual location. I
think it's this one-line change to reject those:

-- >8 --

diff --git a/libcpp/line-map.cc b/libcpp/line-map.cc
index 835e8e1b8cd..382594637ad 100644
--- a/libcpp/line-map.cc
+++ b/libcpp/line-map.cc
@@ -2545,7 +2545,8 @@ rich_location::maybe_add_fixit (location_t start,
 = linemap_client_expand_location_to_spelling_point (next_loc,
LOCATION_ASPECT_START);
   /* They must be within the same file...  */
-  if (exploc_start.src != exploc_next_loc.src)
+  if (exploc_start.src != exploc_next_loc.src
+  || exploc_start.src.is_buffer ())
 {
   stop_supporting_fixits ();
   return;

-- >8 --

However, there are many selftests in diagnostic-show-locus.cc that actually
verify we generate the fixit hints for generated data, so I would need also to
change those to skip the test in this case as well. That looks like this:

-- >8 --

diff --git a/gcc/diagnostic-show-locus.cc b/gcc/diagnostic-show-locus.cc
index 62c60645e88..884c55e91e9 100644
--- a/gcc/diagnostic-show-locus.cc
+++ b/gcc/diagnostic-show-locus.cc
@@ -3824,6 +3824,8 @@ test_diagnostic_show_locus_one_liner (const 
line_table_case _)
   test_one_liner_simple_caret ();
   test_one_liner_caret_and_range ();
   test_one_liner_multiple_carets_and_ranges ();
+  if (!ltt.m_generated_data)
+{
   test_one_liner_fixit_insert_before ();
   test_one_liner_fixit_insert_after ();
   test_one_liner_fixit_remove ();
@@ -3835,6 +3837,7 @@ test_diagnostic_show_locus_one_liner (const 
line_table_case _)
   test_one_liner_many_fixits_2 ();
   test_one_liner_labels ();
 }
+}

 /* Version of all one-liner tests exercising multibyte awareness.  For
simplicity we stick to using two multibyte characters in the test, U+1F602
@@ -4419,6 +4422,8 @@ test_diagnostic_show_locus_one_liner_utf8 (const 
line_table_case _)
   test_one_liner_simple_caret_utf8 ();
   test_one_liner_caret_and_range_utf8 ();
   test_one_liner_multiple_carets_and_ranges_utf8 ();
+  if (!ltt.m_generated_data)
+{
   test_one_liner_fixit_insert_before_utf8 ();
   test_one_liner_fixit_insert_after_utf8 ();
   test_one_liner_fixit_remove_utf8 ();
@@ -4428,6 +4433,7 @@ test_diagnostic_show_locus_one_liner_utf8 (const 
line_table_case _)
   test_one_liner_fixit_validation_adhoc_locations_utf8 ();
   test_one_liner_many_fixits_1_utf8 ();
   test_one_liner_many_fixits_2_utf8 ();
+}
   test_one_liner_labels_utf8 ();
   test_one_liner_colorized_utf8 ();
 }
@@ -5726,15 +5732,15 @@ diagnostic_show_locus_cc_tests ()
   for_each_line_table_case (test_diagnostic_show_locus_one_liner, true);
   for_each_line_table_case (test_diagnostic_show_locus_one_liner_utf8, true);
   for_each_line_table_case (test_add_location_if_nearby, true);
-  for_each_line_table_case

Re: [PATCH] Add support for vector conitional not

2023-08-14 Thread Richard Sandiford via Gcc-patches

Andrew Pinski via Gcc-patches  writes:
> Like the support conditional neg (r12-4470-g20dcda98ed376cb61c74b2c71),
> this just adds conditional not too.
> Also we should be able to turn `(a ? -1 : 0) ^ b` into a conditional
> not.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu.
>
> gcc/ChangeLog:
>
>   * internal-fn.def (COND_NOT): New internal function.
>   * match.pd (UNCOND_UNARY, COND_UNARY): Add bit_not/not
>   to the lists.
>   (`vec (a ? -1 : 0) ^ b`): New pattern to convert
>   into conditional not.
>   * optabs.def (cond_one_cmpl): New optab.
>   (cond_len_one_cmpl): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>   PR target/110986
>   * gcc.target/aarch64/sve/cond_unary_9.c: New test.
> ---
>  gcc/internal-fn.def   |  2 ++
>  gcc/match.pd  | 15 --
>  gcc/optabs.def|  2 ++
>  .../gcc.target/aarch64/sve/cond_unary_9.c | 20 +++
>  4 files changed, 37 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index b3c410f4b6a..3e8693dfddb 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
>   lround2.
>  
> - cond_binary: a conditional binary optab, such as cond_add
> +   - cond_unary: a conditional unary optab, such as cond_neg
> - cond_ternary: a conditional ternary optab, such as cond_fma_rev
>  
> - fold_left: for scalar = FN (scalar, vector), keyed off the vector mode
> @@ -276,6 +277,7 @@ DEF_INTERNAL_COND_FN (FNMA, ECF_CONST, fnma, ternary)
>  DEF_INTERNAL_COND_FN (FNMS, ECF_CONST, fnms, ternary)
>  
>  DEF_INTERNAL_COND_FN (NEG, ECF_CONST, neg, unary)
> +DEF_INTERNAL_COND_FN (NOT, ECF_CONST, one_cmpl, unary)
>  
>  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
>  
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 6791060891d..2ee6d24ccee 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -84,9 +84,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  
>  /* Unary operations and their associated IFN_COND_* function.  */
>  (define_operator_list UNCOND_UNARY
> -  negate)
> +  negate bit_not)
>  (define_operator_list COND_UNARY
> -  IFN_COND_NEG)
> +  IFN_COND_NEG IFN_COND_NOT)
>  
>  /* Binary operations and their associated IFN_COND_* function.  */
>  (define_operator_list UNCOND_BINARY
> @@ -8482,6 +8482,17 @@ and,
>  && is_truth_type_for (op_type, TREE_TYPE (@0)))
>   (cond_op (bit_not @0) @2 @1)
>  
> +/* `(a ? -1 : 0) ^ b` can be converted into a conditional not.  */
> +(simplify
> + (bit_xor:c (vec_cond @0 uniform_integer_cst_p@1 uniform_integer_cst_p@2) @3)
> + (if (canonicalize_math_after_vectorization_p ()
> +  && vectorized_internal_fn_supported_p (IFN_COND_NOT, type)
> +  && is_truth_type_for (type, TREE_TYPE (@0)))
> + (if (integer_all_onesp (@1) && integer_zerop (@2))
> +  (IFN_COND_NOT @0 @3 @3))
> +  (if (integer_all_onesp (@2) && integer_zerop (@1))
> +   (vec_cond (bit_not @0) @3 @3

Looks like this should be IFN_COND_NOT rather than vec_cond.

LGTM otherwise, but please give Richi 24hrs to comment.

Thanks,
Richard

> +
>  /* Simplify:
>  
>   a = a1 op a2
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 1ea1947b3b5..a58819bc665 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -254,6 +254,7 @@ OPTAB_D (cond_fms_optab, "cond_fms$a")
>  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
>  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
>  OPTAB_D (cond_neg_optab, "cond_neg$a")
> +OPTAB_D (cond_one_cmpl_optab, "cond_one_cmpl$a")
>  OPTAB_D (cond_len_add_optab, "cond_len_add$a")
>  OPTAB_D (cond_len_sub_optab, "cond_len_sub$a")
>  OPTAB_D (cond_len_smul_optab, "cond_len_mul$a")
> @@ -278,6 +279,7 @@ OPTAB_D (cond_len_fms_optab, "cond_len_fms$a")
>  OPTAB_D (cond_len_fnma_optab, "cond_len_fnma$a")
>  OPTAB_D (cond_len_fnms_optab, "cond_len_fnms$a")
>  OPTAB_D (cond_len_neg_optab, "cond_len_neg$a")
> +OPTAB_D (cond_len_one_cmpl_optab, "cond_len_one_cmpl$a")
>  OPTAB_D (cmov_optab, "cmov$a6")
>  OPTAB_D (cstore_optab, "cstore$a4")
>  OPTAB_D (ctrap_optab, "ctrap$a4")
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c
> new file mode 100644
> index 000..d6bc0409630
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 
> -fdump-tree-optimized" } */
> +
> +/* This is a reduced version of cond_unary_5.c */
> +
> +void __attribute__ ((noipa))
> +f (short *__restrict r,
> +   short *__restrict a,
> +   short *__restrict pred)
> +{
> +  for (int i = 0; i < 1024; ++i)
> +r[i] = pred[i] != 0 ? ~(a[i]) : a[i];
> +}
> +
> +/* { dg-final { scan-assembler-times

Re: [RFC] GCC Security policy

2023-08-14 Thread Alexander Monakov

On Mon, 14 Aug 2023, Siddhesh Poyarekar wrote:

> 1. It makes it clear to users of the project the scope in which the project
> could be used and what safety it could reasonably expect from the project.  In
> the context of GCC for example, it cannot expect the compiler to do a safety
> check of untrusted sources; the compiler will consider #include "/etc/passwd"
> just as valid code as #include  and as a result, the onus is on the
> user environment to validate the input sources for safety.

Whoa, no. We shouldn't make such statements unless we are prepared to explain
to users how such validation can be practically implemented, which I'm sure
we cannot in this case, due to future extensions such as the #embed directive,
and ability to obfuscate filenames using the preprocessor.

I think it would be more honest to say that crafted sources can result in
arbitrary code execution with the privileges of the user invoking the compiler,
and hence the operator may want to ensure that no sensitive data is available
to that user (via measures ranging from plain UNIX permissions, to chroots,
to virtual machines, to air-gapped computers, depending on threat model).

Resource consumption is another good reason to sandbox compilers.

Alexander

Re: [PATCH] RISC-V: Handle no_insn in TARGET_SCHED_VARIABLE_ISSUE.

2023-08-14 Thread Edwin Lu




On 8/11/2023 6:29 AM, Jeff Law via Gcc-patches wrote:



On 8/10/23 21:45, Palmer Dabbelt wrote:



This seems pretty mechinacial: just scrub through our MDs to check 
for any un-typed insns, then add the assert and fix the failures.  
You're more than welcome to have at it, but LMK if you want me to try 
and find some time for someone to do it -- certainly seems like a 
good way for someone new to dig in a bit.
Yes, definitely mechanical.  And yes, it's a good way for someone to 
start to get familiar with these bits -- I used the lack of types on 
some of the bitmanip insns to help ramp up Raphael and one of the RAU 
guys in this space.


Jeff


Hi, Palmer sent me this thread to take a look at. I can start working on 
this.


Edwin Lu

[pushed][LRA]: Process output stack pointer reloads before emitting reload insns

2023-08-14 Thread Vladimir Makarov via Gcc-patches


The patch fixes a failure of building aarch64 port with my yesterday patch.

The patch was successfully bootstrapped on x86-64 and aarch64.
commit c4760c0161f92b92361feba11836e3d066bb330c
Author: Vladimir N. Makarov 
Date:   Mon Aug 14 16:06:27 2023 -0400

[LRA]: Process output stack pointer reloads before emitting reload insns

Previous patch setting up asserts for processing stack pointer reloads
caught an error in code moving sp offset.  This resulted in failure of
building aarch64 port. The code wrongly processed insns beyond the
output reloads of the current insn.  This patch fixes it.

gcc/ChangeLog:

* lra-constraints.cc (curr_insn_transform): Process output stack
pointer reloads before emitting reload insns.

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 8d9443adeb6..c718bedff32 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -4840,7 +4840,6 @@ curr_insn_transform (bool check_only_p)
/* Most probably there are no enough registers to satisfy asm insn: */
lra_asm_insn_error (curr_insn);
 }
-  lra_process_new_insns (curr_insn, before, after, "Inserting insn reload");
   if (goal_alt_out_sp_reload_p)
 {
   /* We have an output stack pointer reload -- update sp offset: */
@@ -4863,6 +4862,7 @@ curr_insn_transform (bool check_only_p)
  }
   lra_assert (done_p);
 }
+  lra_process_new_insns (curr_insn, before, after, "Inserting insn reload");
   return change_p;
 }

Re: [PATCH 0/3] fortran: fix length one character dummy args [PR110419]

2023-08-14 Thread Mikael Morin


Hello,

Le 13/08/2023 à 23:16, Harald Anlauf via Fortran a écrit :

Hi Mikael,

Am 09.08.23 um 22:21 schrieb Mikael Morin via Gcc-patches:

Hello,


(...)


I have regression tested this on x86_64-unknown-linux-gnu, and
powerpc64-unknown-linux-gnu (both -m32 and -m64).
OK for master?


this looks good to me.

There was only one thing I was uncertain what the right way is:
you chose to use mpz_cmp_ui in the length check in the new helper
function gfc_length_one_character_type_p, while in many other places
the length check uses mpz_cmp_si.

Admittedly, a negative (effective/declared) character length can never
occur, except maybe at intermediate times during resolution before this
is fixed up in accordance with the standard.  So this is probably more
a cosmetic decision, and you can decide to use either variant.


That's well spotted, but I think it doesn't matter in this case.
There are only two cases: whether the length is 1, and whether the 
length is different from 1.  In each of those two cases, gfc_cmp_si and 
gfc_cmp_ui return both either zero or non-zero.


I'm afraid of last-minute changes, so I prefer to keep the patch as is.

Thanks for the review.

Mikael

Re: [RFC] GCC Security policy

2023-08-14 Thread Siddhesh Poyarekar

On 2023-08-14 14:51, Richard Sandiford wrote:

I think it would help to clarify what the aim of the security policy is.
Specifically:

(1) What service do we want to provide to users by classifying one thing
as a security bug and another thing as not a security bug?

(2) What service do we want to provide to the GNU community by the same
classification?

I think it will be easier to agree on the classification if we first
agree on that.

I actually wanted to do a talk on this at the Cauldron this year and
*then* propose this for the gcc community, but I guess we could do this
early :)

So the core intent of a security policy for a project is to make clear
the security stance of the project, specifying to the extent possible
what kind of uses are considered safe and what kinds of bugs would be
considered security issues in the context of those uses.

There are a few advantages of doing this:

1. It makes it clear to users of the project the scope in which the
project could be used and what safety it could reasonably expect from
the project. In the context of GCC for example, it cannot expect the
compiler to do a safety check of untrusted sources; the compiler will
consider #include "/etc/passwd" just as valid code as #include
and as a result, the onus is on the user environment to validate the
input sources for safety.

2. It helps the security community (Mitre and other CNAs and security
researchers) set correct expectations of the project so that they don't
cry wolf for every segfault or ICE under the pretext that code could
presumably be run as a service somehow and hence result in a "DoS".

3. This in turn helps stave off spurious CVE submissions that cause
needless churn in downstream distributions. LLVM is already starting to
see this[1] and it's only a matter of time before people start doing
this for GCC.

4. It helps make a distinction between important bugs and security bugs;
they're often conflated as one and the same thing. Security bugs are
special because they require different handling from those that do not
have a security impact, regardless of their actual importance.
Unfortunately one of the reasons they're special is because there's a
bunch of (pretty dumb) automation out there that rings alarm bells on
every single CVE. Without a clear understanding of the context under
which a project can be used, these alarm bells can be made unreasonably
loud (due to incorrect scoring, see the LLVM CVE for instance; just one
element in that vector changes the score from 0.0 to 5.5), causing
needless churn in not just the code base but in downstream releases and
end user environments.

5. This exercise is also a great start in developing an understanding of
which parts in GCC are security sensitive and in what sense. Runtime
libraries for example have a direct impact on application security.
Compiler impact is a little less direct. Hardening features have
another effect, but it's more mitigation-oriented than direct safety.
This also informs us about the impact of various project actions such as
bundling third-party libraries and development and maintenance of
tooling within GCC and will hopefully guide policies around those practices.

I hope this is a sufficient start. We don't necessarily want to get
into the business of acknowledging or rejecting security issues as
upstream at the moment (but see also the CNA discussion[2] of what we
intend to do in that space for glibc) but having uniform upstream
guidelines would be helpful to researchers as well as downstream
consumers to help decide what constitutes a security issue.

Thanks,
Sid

[1] https://nvd.nist.gov/vuln/detail/CVE-2023-29932
[2]
https://inbox.sourceware.org/libc-alpha/1a44f25a-5aa3-28b7-1ecb-b3991d44c...@gotplt.org/T/

Re: [PATCH] Fortran: Avoid accessing gfc_charlen when not looking at BT_CHARACTER (PR 110677)

2023-08-14 Thread Harald Anlauf via Gcc-patches


Hi Martin,

Am 14.08.23 um 19:39 schrieb Martin Jambor:

Hello,

this patch addresses an issue uncovered by the undefined behavior
sanitizer.  In function resolve_structure_cons in resolve.cc there is
a test starting with:

   if (cons->expr->ts.type == BT_CHARACTER && comp->ts.u.cl
  && comp->ts.u.cl->length
  && comp->ts.u.cl->length->expr_type == EXPR_CONSTANT

and UBSAN complained of loads from comp->ts.u.cl->length->expr_type of
integer value 1818451807 which is outside of the value range expr_t
enum.  If I understand the code correctly it the entire load was
unwanted because comp->ts.type in those cases is BT_CLASS and not
BT_CHARACTER.  This patch simply adds a check to make sure it is only
accessed in those cases.

I have verified that the UPBSAN failure goes away with this patch, it
also passes bootstrap and testing on x86_64-linux.  OK for master?


this looks good to me.

Looking at that code block, there is a potential other UB a few lines
below, where (hopefully integer) string lengths are to be passed to
mpz_cmp.

If the string length is ill-defined (e.g. non-integer), value.integer
is undefined.  We've seen this elsewhere, where on BE platforms that
undefined value was interpreted as some large integer and giving
failures on those platforms.  One could similarly add the following
checks here (on top of your patch):

diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index e7c8d919bef..43095406c16 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -1401,6 +1401,8 @@ resolve_structure_cons (gfc_expr *expr, int init)
  && comp->ts.u.cl->length->expr_type == EXPR_CONSTANT
  && cons->expr->ts.u.cl && cons->expr->ts.u.cl->length
  && cons->expr->ts.u.cl->length->expr_type == EXPR_CONSTANT
+ && cons->expr->ts.u.cl->length->ts.type == BT_INTEGER
+ && comp->ts.u.cl->length->ts.type == BT_INTEGER
  && mpz_cmp (cons->expr->ts.u.cl->length->value.integer,
  comp->ts.u.cl->length->value.integer) != 0)
{

It is up to you whether you want to add this.

Thanks for the patch!

Harald



Thanks,

Martin



gcc/fortran/ChangeLog:

2023-08-14  Martin Jambor  

PR fortran/110677
* resolve.cc (resolve_structure_cons): Check comp->ts is character
type before accessing stuff through comp->ts.u.cl.
---
  gcc/fortran/resolve.cc | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index e7c8d919bef..5b4dfc5fcd2 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -1396,8 +1396,9 @@ resolve_structure_cons (gfc_expr *expr, int init)
 the one of the structure, ensure this if the lengths are known at
 compile time and when we are dealing with PARAMETER or structure
 constructors.  */
-  if (cons->expr->ts.type == BT_CHARACTER && comp->ts.u.cl
- && comp->ts.u.cl->length
+  if (cons->expr->ts.type == BT_CHARACTER
+ && comp->ts.type == BT_CHARACTER
+ && comp->ts.u.cl && comp->ts.u.cl->length
  && comp->ts.u.cl->length->expr_type == EXPR_CONSTANT
  && cons->expr->ts.u.cl && cons->expr->ts.u.cl->length
  && cons->expr->ts.u.cl->length->expr_type == EXPR_CONSTANT

Re: [pushed]LRA]: Fix asserts for output stack pointer reloads

2023-08-14 Thread Vladimir Makarov via Gcc-patches




On 8/14/23 14:37, Prathamesh Kulkarni wrote:

On Mon, 14 Aug 2023 at 06:39, Vladimir Makarov via Gcc-patches
 wrote:

The following patch fixes useless asserts in my latest patch
implementing output stack pointer reloads.

Hi Vladimir,
It seems that this patch caused the following ICE on aarch64-linux-gnu
while building cp-demangle.c:
compile:  
/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/stage1-build/./gcc/xgcc
-B/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/stage1-build/./gcc/
-B/usr/local/aarch64-unknown-linux-gnu/bin/
-B/usr/local/aarch64-unknown-linux-gnu/lib/ -isystem
/usr/local/aarch64-unknown-linux-gnu/include -isystem
/usr/local/aarch64-unknown-linux-gnu/sys-include -DHAVE_CONFIG_H -I..
-I/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/gcc/libstdc++-v3/../libiberty
-I/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/gcc/libstdc++-v3/../include
-D_GLIBCXX_SHARED
-I/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/stage1-build/aarch64-unknown-linux-gnu/libstdc++-v3/include/aarch64-unknown-linux-gnu
-I/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/stage1-build/aarch64-unknown-linux-gnu/libstdc++-v3/include
-I/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/gcc/libstdc++-v3/libsupc++
-g -O2 -DIN_GLIBCPP_V3 -Wno-error -c cp-demangle.c  -fPIC -DPIC -o
cp-demangle.o
during RTL pass: reload
cp-demangle.c: In function ‘d_demangle_callback.constprop’:
cp-demangle.c:6815:1: internal compiler error: in curr_insn_transform,
at lra-constraints.cc:4854
  6815 | }
   | ^
0xce6b37 curr_insn_transform
 ../../gcc/gcc/lra-constraints.cc:4854
0xce7887 lra_constraints(bool)
 ../../gcc/gcc/lra-constraints.cc:5478
0xccdfa7 lra(_IO_FILE*)
 ../../gcc/gcc/lra.cc:2419
0xc7e417 do_reload
 ../../gcc/gcc/ira.cc:5970
0xc7e417 execute
 ../../gcc/gcc/ira.cc:6156
Please submit a full bug report, with preprocessed source (by using
-freport-bug).
Please include the complete backtrace with any bug report.


Sorry, I should have bootstrapped my patch on aarch64.

The asserts actually seems very useful as I found they caught a bug in 
my previous patch.


I'll push a patch fixing the problems after finishing bootstraps, 
probably in couple hours.


Thank you

Re: [RFC] GCC Security policy

2023-08-14 Thread Richard Sandiford via Gcc-patches

I think it would help to clarify what the aim of the security policy is.
Specifically:

(1) What service do we want to provide to users by classifying one thing
as a security bug and another thing as not a security bug?

(2) What service do we want to provide to the GNU community by the same
classification?

I think it will be easier to agree on the classification if we first
agree on that.

Siddhesh Poyarekar  writes:
> Hi,
>
> Here's the updated draft of the top part of the security policy with all 
> of the recommendations incorporated.
>
> Thanks,
> Sid
>
>
> What is a GCC security bug?
> ===
>
>  A security bug is one that threatens the security of a system or
>  network, or might compromise the security of data stored on it.
>  In the context of GCC there are multiple ways in which this might
>  happen and they're detailed below.
>
> Compiler drivers, programs, libgccjit and support libraries
> ---
>
>  The compiler driver processes source code, invokes other programs
>  such as the assembler and linker and generates the output result,
>  which may be assembly code or machine code.  It is necessary that
>  all source code inputs to the compiler are trusted, since it is
>  impossible for the driver to validate input source code beyond
>  conformance to a programming language standard.
>
>  The GCC JIT implementation, libgccjit, is intended to be plugged
>  into applications to translate input source code in the application
>  context.  Limitations that apply to the compiler
>  driver, apply here too in terms of sanitizing inputs, so it is
>  recommended that inputs are either sanitized by an external program
>  to allow only trusted, safe execution in the context of the
>  application or the JIT execution context is appropriately sandboxed
>  to contain the effects of any bugs in the JIT or its generated code
>  to the sandboxed environment.
>
>  Support libraries such as libiberty, libcc1 libvtv and libcpp have
>  been developed separately to share code with other tools such as
>  binutils and gdb.  These libraries again have similar challenges to
>  compiler drivers.  While they are expected to be robust against
>  arbitrary input, they should only be used with trusted inputs.
>
>  Libraries such as zlib that bundled into GCC to build it will be
>  treated the same as the compiler drivers and programs as far as
>  security coverage is concerned.  However if you find an issue in
>  these libraries independent of their use in GCC, you should reach
>  out to their upstream projects to report them.
>
>  As a result, the only case for a potential security issue in all
>  these cases is when it ends up generating vulnerable output for
>  valid input source code.
>
>  As a result, the only case for a potential security issue in the
>  compiler is when it generates vulnerable application code for
>  trusted input source code that is conforming to the relevant
>  programming standard or extensions documented as supported by GCC
>  and the algorithm expressed in the source code does not have the
>  vulnerability.  The output application code could be considered
>  vulnerable if it produces an actual vulnerability in the target
>  application, specifically in the following cases:
>
>  - The application dereferences an invalid memory location despite
>the application sources being valid.
>  - The application reads from or writes to a valid but incorrect
>memory location, resulting in an information integrity issue or an
>information leak.
>  - The application ends up running in an infinite loop or with
>severe degradation in performance despite the input sources having
>no such issue, resulting in a Denial of Service.  Note that
>correct but non-performant code is not a security issue candidate,
>this only applies to incorrect code that may result in performance
>degradation severe enough to amount to a denial of service.
>  - The application crashes due to the generated incorrect code,
>resulting in a Denial of Service.

One difficulty is that wrong-code bugs are rarely confined to
a particular source code structure.  Something that causes a
miscompilation of a bounds check could later be discovered to cause a
miscompilation of something that is less obviously security-sensitive.
Or the same thing could happen in reverse.  And it's common for the
same bug to be reported multiple times, against different testcases.

The proposal says that certain kinds of wrong code could be a security
bug.  But what will be the criteria for deciding whether a wrong code
bug that *could* be classified as a security bug is in fact a security
bug?  Does someone have to show that at least one

Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST vectorization

Hi everyone,

I have bootstrapped and regtested the patch below on s390.  For the
64-bit target I do not see any changes regarding the testsuite.  For the
31-bit target I see the following failures:

FAIL: gcc.dg/vect/no-scevccp-outer-14.c (internal compiler error: in require, 
at machmode.h:313)
FAIL: gcc.dg/vect/no-scevccp-outer-14.c (test for excess errors)
FAIL: gcc.dg/vect/pr50451.c (internal compiler error: in require, at 
machmode.h:313)
FAIL: gcc.dg/vect/pr50451.c (test for excess errors)
FAIL: gcc.dg/vect/pr50451.c -flto -ffat-lto-objects (internal compiler error: 
in require, at machmode.h:313)
FAIL: gcc.dg/vect/pr50451.c -flto -ffat-lto-objects (test for excess errors)
FAIL: gcc.dg/vect/pr53773.c (internal compiler error: in require, at 
machmode.h:313)
FAIL: gcc.dg/vect/pr53773.c (test for excess errors)
FAIL: gcc.dg/vect/pr53773.c -flto -ffat-lto-objects (internal compiler error: 
in require, at machmode.h:313)
FAIL: gcc.dg/vect/pr53773.c -flto -ffat-lto-objects (test for excess errors)
FAIL: gcc.dg/vect/pr71407.c (internal compiler error: in require, at 
machmode.h:313)
FAIL: gcc.dg/vect/pr71407.c (test for excess errors)
FAIL: gcc.dg/vect/pr71407.c -flto -ffat-lto-objects (internal compiler error: 
in require, at machmode.h:313)
FAIL: gcc.dg/vect/pr71407.c -flto -ffat-lto-objects (test for excess errors)
FAIL: gcc.dg/vect/pr71416-1.c (internal compiler error: in require, at 
machmode.h:313)
FAIL: gcc.dg/vect/pr71416-1.c (test for excess errors)
FAIL: gcc.dg/vect/pr71416-1.c -flto -ffat-lto-objects (internal compiler error: 
in require, at machmode.h:313)
FAIL: gcc.dg/vect/pr71416-1.c -flto -ffat-lto-objects (test for excess errors)
FAIL: gcc.dg/vect/pr94443.c (internal compiler error: in require, at 
machmode.h:313)
FAIL: gcc.dg/vect/pr94443.c (test for excess errors)
FAIL: gcc.dg/vect/pr94443.c -flto -ffat-lto-objects (internal compiler error: 
in require, at machmode.h:313)
FAIL: gcc.dg/vect/pr94443.c -flto -ffat-lto-objects (test for excess errors)
FAIL: gcc.dg/vect/pr97558.c (internal compiler error: in require, at 
machmode.h:313)
FAIL: gcc.dg/vect/pr97558.c (test for excess errors)
FAIL: gcc.dg/vect/pr97558.c -flto -ffat-lto-objects (internal compiler error: 
in require, at machmode.h:313)
FAIL: gcc.dg/vect/pr97558.c -flto -ffat-lto-objects (test for excess errors)
FAIL: gcc.dg/vect/vect-reduc-pattern-3.c -flto -ffat-lto-objects (internal 
compiler error: in require, at machmode.h:313)
FAIL: gcc.dg/vect/vect-reduc-pattern-3.c -flto -ffat-lto-objects (test for 
excess errors)
UNRESOLVED: gcc.dg/vect/no-scevccp-outer-14.c compilation failed to produce 
executable
UNRESOLVED: gcc.dg/vect/pr53773.c -flto -ffat-lto-objects  scan-tree-dump-times 
optimized "\\* 10" 2
UNRESOLVED: gcc.dg/vect/pr53773.c scan-tree-dump-times optimized "\\* 10" 2
UNRESOLVED: gcc.dg/vect/pr71416-1.c -flto -ffat-lto-objects compilation failed 
to produce executable
UNRESOLVED: gcc.dg/vect/pr71416-1.c compilation failed to produce executable
UNRESOLVED: gcc.dg/vect/vect-reduc-pattern-3.c -flto -ffat-lto-objects 
compilation failed to produce executable

I've randomely picked pr50451.c and ran gcc against it which results in:

during GIMPLE pass: vect
dump file: pr50451.c.174t.vect
/gcc-verify-workdir/patched/src/gcc/testsuite/gcc.dg/vect/pr50451.c: In 
function ‘foo’:
/gcc-verify-workdir/patched/src/gcc/testsuite/gcc.dg/vect/pr50451.c:5:1: 
internal compiler error: in require, at machmode.h:313
0x1265d21 opt_mode::require() const
/gcc-verify-workdir/patched/src/gcc/machmode.h:313
0x1d7e4e9 opt_mode::require() const
/gcc-verify-workdir/patched/src/gcc/vec.h:955
0x1d7e4e9 vect_verify_loop_lens
/gcc-verify-workdir/patched/src/gcc/tree-vect-loop.cc:1471
0x1da29ab vect_analyze_loop_2
/gcc-verify-workdir/patched/src/gcc/tree-vect-loop.cc:2929
0x1da40c7 vect_analyze_loop_1
/gcc-verify-workdir/patched/src/gcc/tree-vect-loop.cc:3330
0x1da499d vect_analyze_loop(loop*, vec_info_shared*)
/gcc-verify-workdir/patched/src/gcc/tree-vect-loop.cc:3484
0x1deed27 try_vectorize_loop_1
/gcc-verify-workdir/patched/src/gcc/tree-vectorizer.cc:1064
0x1deed27 try_vectorize_loop
/gcc-verify-workdir/patched/src/gcc/tree-vectorizer.cc:1180
0x1def5c1 execute
/gcc-verify-workdir/patched/src/gcc/tree-vectorizer.cc:1296
Please submit a full bug report, with preprocessed source (by using 
-freport-bug).
Please include the complete backtrace with any bug report.
See  for instructions.

I will come back to this tomorrow.

Cheers,
Stefan

On Mon, Aug 14, 2023 at 08:45:21PM +0800, Kewen.Lin via Gcc-patches wrote:
> Hi Juzhe,
> 
> on 2023/8/14 20:08, juzhe.zh...@rivai.ai wrote:
> > Hi, Kewin.
> > 
> > Could you test 'can_vec_extract_var_idx_p' and send V5 patch when you pass 
> > the testing?
> 
> The below diff was bootstrapped and regress-tested on Power10 LE.  Comparing 
> to the
> previous v4, the only changes should be the proposed 
>

Re: [pushed]LRA]: Fix asserts for output stack pointer reloads

2023-08-14 Thread Prathamesh Kulkarni via Gcc-patches

On Mon, 14 Aug 2023 at 06:39, Vladimir Makarov via Gcc-patches
 wrote:
>
> The following patch fixes useless asserts in my latest patch
> implementing output stack pointer reloads.
Hi Vladimir,
It seems that this patch caused the following ICE on aarch64-linux-gnu
while building cp-demangle.c:
compile:  
/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/stage1-build/./gcc/xgcc
-B/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/stage1-build/./gcc/
-B/usr/local/aarch64-unknown-linux-gnu/bin/
-B/usr/local/aarch64-unknown-linux-gnu/lib/ -isystem
/usr/local/aarch64-unknown-linux-gnu/include -isystem
/usr/local/aarch64-unknown-linux-gnu/sys-include -DHAVE_CONFIG_H -I..
-I/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/gcc/libstdc++-v3/../libiberty
-I/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/gcc/libstdc++-v3/../include
-D_GLIBCXX_SHARED
-I/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/stage1-build/aarch64-unknown-linux-gnu/libstdc++-v3/include/aarch64-unknown-linux-gnu
-I/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/stage1-build/aarch64-unknown-linux-gnu/libstdc++-v3/include
-I/home/prathamesh.kulkarni/gnu-toolchain/gcc/master/gcc/libstdc++-v3/libsupc++
-g -O2 -DIN_GLIBCPP_V3 -Wno-error -c cp-demangle.c  -fPIC -DPIC -o
cp-demangle.o
during RTL pass: reload
cp-demangle.c: In function ‘d_demangle_callback.constprop’:
cp-demangle.c:6815:1: internal compiler error: in curr_insn_transform,
at lra-constraints.cc:4854
 6815 | }
  | ^
0xce6b37 curr_insn_transform
../../gcc/gcc/lra-constraints.cc:4854
0xce7887 lra_constraints(bool)
../../gcc/gcc/lra-constraints.cc:5478
0xccdfa7 lra(_IO_FILE*)
../../gcc/gcc/lra.cc:2419
0xc7e417 do_reload
../../gcc/gcc/ira.cc:5970
0xc7e417 execute
../../gcc/gcc/ira.cc:6156
Please submit a full bug report, with preprocessed source (by using
-freport-bug).
Please include the complete backtrace with any bug report.

Thanks,
Prathamesh

[PATCH 9/9] arm: [MVE intrinsics] rework vmullbq_poly vmulltq_poly

Implement vmull[bt]q_poly using the new MVE builtins framework.

2023-08-14  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-base.cc (vmullbq_poly)
(vmulltq_poly): New.
* config/arm/arm-mve-builtins-base.def (vmullbq_poly)
(vmulltq_poly): New.
* config/arm/arm-mve-builtins-base.h (vmullbq_poly)
(vmulltq_poly): New.
* config/arm/arm_mve.h (vmulltq_poly): Remove.
(vmullbq_poly): Remove.
(vmullbq_poly_m): Remove.
(vmulltq_poly_m): Remove.
(vmullbq_poly_x): Remove.
(vmulltq_poly_x): Remove.
(vmulltq_poly_p8): Remove.
(vmullbq_poly_p8): Remove.
(vmulltq_poly_p16): Remove.
(vmullbq_poly_p16): Remove.
(vmullbq_poly_m_p8): Remove.
(vmullbq_poly_m_p16): Remove.
(vmulltq_poly_m_p8): Remove.
(vmulltq_poly_m_p16): Remove.
(vmullbq_poly_x_p8): Remove.
(vmullbq_poly_x_p16): Remove.
(vmulltq_poly_x_p8): Remove.
(vmulltq_poly_x_p16): Remove.
(__arm_vmulltq_poly_p8): Remove.
(__arm_vmullbq_poly_p8): Remove.
(__arm_vmulltq_poly_p16): Remove.
(__arm_vmullbq_poly_p16): Remove.
(__arm_vmullbq_poly_m_p8): Remove.
(__arm_vmullbq_poly_m_p16): Remove.
(__arm_vmulltq_poly_m_p8): Remove.
(__arm_vmulltq_poly_m_p16): Remove.
(__arm_vmullbq_poly_x_p8): Remove.
(__arm_vmullbq_poly_x_p16): Remove.
(__arm_vmulltq_poly_x_p8): Remove.
(__arm_vmulltq_poly_x_p16): Remove.
(__arm_vmulltq_poly): Remove.
(__arm_vmullbq_poly): Remove.
(__arm_vmullbq_poly_m): Remove.
(__arm_vmulltq_poly_m): Remove.
(__arm_vmullbq_poly_x): Remove.
(__arm_vmulltq_poly_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   2 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm_mve.h | 248 ---
 4 files changed, 6 insertions(+), 248 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index 3620c56865d..ed5eba656c1 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -331,6 +331,8 @@ FUNCTION_WITHOUT_N_NO_F (vmovntq, VMOVNTQ)
 FUNCTION_WITHOUT_N_NO_F (vmulhq, VMULHQ)
 FUNCTION (vmullbq_int, unspec_mve_function_exact_insn_vmull, (VMULLBQ_INT_S, 
VMULLBQ_INT_U, VMULLBQ_INT_M_S, VMULLBQ_INT_M_U))
 FUNCTION (vmulltq_int, unspec_mve_function_exact_insn_vmull, (VMULLTQ_INT_S, 
VMULLTQ_INT_U, VMULLTQ_INT_M_S, VMULLTQ_INT_M_U))
+FUNCTION (vmullbq_poly, unspec_mve_function_exact_insn_vmull_poly, 
(VMULLBQ_POLY_P, VMULLBQ_POLY_M_P))
+FUNCTION (vmulltq_poly, unspec_mve_function_exact_insn_vmull_poly, 
(VMULLTQ_POLY_P, VMULLTQ_POLY_M_P))
 FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
 FUNCTION_WITH_RTX_M_N_NO_F (vmvnq, NOT, VMVNQ)
 FUNCTION (vnegq, unspec_based_mve_function_exact_insn, (NEG, NEG, NEG, -1, -1, 
-1, VNEGQ_M_S, -1, VNEGQ_M_F, -1, -1, -1))
diff --git a/gcc/config/arm/arm-mve-builtins-base.def 
b/gcc/config/arm/arm-mve-builtins-base.def
index db811bec479..01dfbdef8a3 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -80,6 +80,8 @@ DEF_MVE_FUNCTION (vmovntq, binary_move_narrow, integer_16_32, 
m_or_none)
 DEF_MVE_FUNCTION (vmulhq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmullbq_int, binary_widen, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmulltq_int, binary_widen, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vmullbq_poly, binary_widen_poly, poly_8_16, mx_or_none)
+DEF_MVE_FUNCTION (vmulltq_poly, binary_widen_poly, poly_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmvnq, mvn, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_signed, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h 
b/gcc/config/arm/arm-mve-builtins-base.h
index 5652fb7c701..c574c32ac53 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -104,6 +104,8 @@ extern const function_base *const vmovntq;
 extern const function_base *const vmulhq;
 extern const function_base *const vmullbq_int;
 extern const function_base *const vmulltq_int;
+extern const function_base *const vmullbq_poly;
+extern const function_base *const vmulltq_poly;
 extern const function_base *const vmulq;
 extern const function_base *const vmvnq;
 extern const function_base *const vnegq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 837864aaf29..b82d94e59bd 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -44,14 +44,10 @@
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
 #define vornq(__a, __b) __arm_vornq(__a, __b)
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
-#define vmulltq_poly(__a, __b) __arm_vmulltq_poly(__a, __b)
-#define

[PATCH 4/9] arm: [MVE intrinsics] rework vmullbq_int vmulltq_int

Implement vmullbq_int, vmulltq_int using the new MVE builtins
framework.

2023-08-14  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-base.cc (vmullbq_int, vmulltq_int):
New.
* config/arm/arm-mve-builtins-base.def (vmullbq_int, vmulltq_int):
New.
* config/arm/arm-mve-builtins-base.h (vmullbq_int, vmulltq_int):
New.
* config/arm/arm_mve.h (vmulltq_int): Remove.
(vmullbq_int): Remove.
(vmullbq_int_m): Remove.
(vmulltq_int_m): Remove.
(vmullbq_int_x): Remove.
(vmulltq_int_x): Remove.
(vmulltq_int_u8): Remove.
(vmullbq_int_u8): Remove.
(vmulltq_int_s8): Remove.
(vmullbq_int_s8): Remove.
(vmulltq_int_u16): Remove.
(vmullbq_int_u16): Remove.
(vmulltq_int_s16): Remove.
(vmullbq_int_s16): Remove.
(vmulltq_int_u32): Remove.
(vmullbq_int_u32): Remove.
(vmulltq_int_s32): Remove.
(vmullbq_int_s32): Remove.
(vmullbq_int_m_s8): Remove.
(vmullbq_int_m_s32): Remove.
(vmullbq_int_m_s16): Remove.
(vmullbq_int_m_u8): Remove.
(vmullbq_int_m_u32): Remove.
(vmullbq_int_m_u16): Remove.
(vmulltq_int_m_s8): Remove.
(vmulltq_int_m_s32): Remove.
(vmulltq_int_m_s16): Remove.
(vmulltq_int_m_u8): Remove.
(vmulltq_int_m_u32): Remove.
(vmulltq_int_m_u16): Remove.
(vmullbq_int_x_s8): Remove.
(vmullbq_int_x_s16): Remove.
(vmullbq_int_x_s32): Remove.
(vmullbq_int_x_u8): Remove.
(vmullbq_int_x_u16): Remove.
(vmullbq_int_x_u32): Remove.
(vmulltq_int_x_s8): Remove.
(vmulltq_int_x_s16): Remove.
(vmulltq_int_x_s32): Remove.
(vmulltq_int_x_u8): Remove.
(vmulltq_int_x_u16): Remove.
(vmulltq_int_x_u32): Remove.
(__arm_vmulltq_int_u8): Remove.
(__arm_vmullbq_int_u8): Remove.
(__arm_vmulltq_int_s8): Remove.
(__arm_vmullbq_int_s8): Remove.
(__arm_vmulltq_int_u16): Remove.
(__arm_vmullbq_int_u16): Remove.
(__arm_vmulltq_int_s16): Remove.
(__arm_vmullbq_int_s16): Remove.
(__arm_vmulltq_int_u32): Remove.
(__arm_vmullbq_int_u32): Remove.
(__arm_vmulltq_int_s32): Remove.
(__arm_vmullbq_int_s32): Remove.
(__arm_vmullbq_int_m_s8): Remove.
(__arm_vmullbq_int_m_s32): Remove.
(__arm_vmullbq_int_m_s16): Remove.
(__arm_vmullbq_int_m_u8): Remove.
(__arm_vmullbq_int_m_u32): Remove.
(__arm_vmullbq_int_m_u16): Remove.
(__arm_vmulltq_int_m_s8): Remove.
(__arm_vmulltq_int_m_s32): Remove.
(__arm_vmulltq_int_m_s16): Remove.
(__arm_vmulltq_int_m_u8): Remove.
(__arm_vmulltq_int_m_u32): Remove.
(__arm_vmulltq_int_m_u16): Remove.
(__arm_vmullbq_int_x_s8): Remove.
(__arm_vmullbq_int_x_s16): Remove.
(__arm_vmullbq_int_x_s32): Remove.
(__arm_vmullbq_int_x_u8): Remove.
(__arm_vmullbq_int_x_u16): Remove.
(__arm_vmullbq_int_x_u32): Remove.
(__arm_vmulltq_int_x_s8): Remove.
(__arm_vmulltq_int_x_s16): Remove.
(__arm_vmulltq_int_x_s32): Remove.
(__arm_vmulltq_int_x_u8): Remove.
(__arm_vmulltq_int_x_u16): Remove.
(__arm_vmulltq_int_x_u32): Remove.
(__arm_vmulltq_int): Remove.
(__arm_vmullbq_int): Remove.
(__arm_vmullbq_int_m): Remove.
(__arm_vmulltq_int_m): Remove.
(__arm_vmullbq_int_x): Remove.
(__arm_vmulltq_int_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   2 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm_mve.h | 648 ---
 4 files changed, 6 insertions(+), 648 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index e31095ae112..3620c56865d 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -329,6 +329,8 @@ FUNCTION_WITHOUT_N_NO_F (vmovltq, VMOVLTQ)
 FUNCTION_WITHOUT_N_NO_F (vmovnbq, VMOVNBQ)
 FUNCTION_WITHOUT_N_NO_F (vmovntq, VMOVNTQ)
 FUNCTION_WITHOUT_N_NO_F (vmulhq, VMULHQ)
+FUNCTION (vmullbq_int, unspec_mve_function_exact_insn_vmull, (VMULLBQ_INT_S, 
VMULLBQ_INT_U, VMULLBQ_INT_M_S, VMULLBQ_INT_M_U))
+FUNCTION (vmulltq_int, unspec_mve_function_exact_insn_vmull, (VMULLTQ_INT_S, 
VMULLTQ_INT_U, VMULLTQ_INT_M_S, VMULLTQ_INT_M_U))
 FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
 FUNCTION_WITH_RTX_M_N_NO_F (vmvnq, NOT, VMVNQ)
 FUNCTION (vnegq, unspec_based_mve_function_exact_insn, (NEG, NEG, NEG, -1, -1, 
-1, VNEGQ_M_S, -1, VNEGQ_M_F, -1, -1, -1))
diff --git a/gcc/config/arm/arm-mve-builtins-base.def 
b/gcc/config/arm/arm-mve-builtins-base.def
index e7d466f2efd..db811bec479 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++

[PATCH 8/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull_poly

Introduce a function that will be used to build vmull[bt]q_poly
intrinsics that use poly types.

2023-08-14  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-functions.h (class
unspec_mve_function_exact_insn_vmull_poly): New.
---
 gcc/config/arm/arm-mve-builtins-functions.h | 56 -
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm-mve-builtins-functions.h 
b/gcc/config/arm/arm-mve-builtins-functions.h
index c0fc450f886..eba1f071af0 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -838,7 +838,6 @@ public:
   }
 };
 
-
 /* Map the vmull-related function directly to CODE (UNSPEC, UNSPEC, M)
where M is the vector mode associated with type suffix 0.  We need
this special case because the builtins have _int in their
@@ -912,6 +911,61 @@ public:
   }
 };
 
+/* Map the vmull_poly-related function directly to CODE (UNSPEC,
+   UNSPEC, M) where M is the vector mode associated with type suffix
+   0.  We need this special case because the builtins have _poly in
+   their names, and use the special poly type..  */
+class unspec_mve_function_exact_insn_vmull_poly : public function_base
+{
+public:
+  CONSTEXPR unspec_mve_function_exact_insn_vmull_poly (int unspec_for_poly,
+  int unspec_for_m_poly)
+: m_unspec_for_poly (unspec_for_poly),
+  m_unspec_for_m_poly (unspec_for_m_poly)
+  {}
+
+  /* The unspec code associated with signed-integer, unsigned-integer
+ and poly operations respectively.  It covers the cases with and
+ without the _m predicate.  */
+  int m_unspec_for_poly;
+  int m_unspec_for_m_poly;
+
+  rtx
+  expand (function_expander ) const override
+  {
+insn_code code;
+
+if (e.mode_suffix_id != MODE_none)
+  gcc_unreachable ();
+
+if (! e.type_suffix (0).poly_p)
+  gcc_unreachable ();
+
+switch (e.pred)
+  {
+  case PRED_none:
+   /* No predicate, no suffix.  */
+   code = code_for_mve_q_poly (m_unspec_for_poly, m_unspec_for_poly, 
e.vector_mode (0));
+   return e.use_exact_insn (code);
+
+  case PRED_m:
+   /* No suffix, "m" predicate.  */
+   code = code_for_mve_q_poly_m (m_unspec_for_m_poly, m_unspec_for_m_poly, 
e.vector_mode (0));
+   return e.use_cond_insn (code, 0);
+
+  case PRED_x:
+   /* No suffix, "x" predicate.  */
+   code = code_for_mve_q_poly_m (m_unspec_for_m_poly, m_unspec_for_m_poly, 
e.vector_mode (0));
+   return e.use_pred_x_insn (code);
+
+  default:
+   gcc_unreachable ();
+  }
+
+gcc_unreachable ();
+  }
+};
+
 } /* end namespace arm_mve */
 
 /* Declare the global function base NAME, creating it from an instance
-- 
2.34.1

[PATCH 7/9] arm: [MVE intrinsics] add binary_widen_poly shape

This patch adds the binary_widen_poly shape description.

2023-08-14  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (binary_widen_poly): New.
* config/arm/arm-mve-builtins-shapes.h (binary_widen_poly): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 49 +++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 50 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 761da4d8ece..23eb9d0e69b 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1187,6 +1187,55 @@ struct binary_widen_def : public overloaded_base<0>
 };
 SHAPE (binary_widen)
 
+/* _t vfoo[_t0](_t, _t)
+
+   Example: vmullbq_poly.
+   uint32x4_t [__arm_]vmullbq_poly[_p16](uint16x8_t a, uint16x8_t b)
+   uint32x4_t [__arm_]vmullbq_poly_m[_p16](uint32x4_t inactive, uint16x8_t a, 
uint16x8_t b, mve_pred16_t p)
+   uint32x4_t [__arm_]vmullbq_poly_x[_p16](uint16x8_t a, uint16x8_t b, 
mve_pred16_t p)  */
+struct binary_widen_poly_def : public overloaded_base<0>
+{
+  void
+  build (function_builder , const function_group_info ,
+bool preserve_user_namespace) const override
+  {
+b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+build_all (b, "vU0,vp0,vp0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver ) const override
+  {
+unsigned int i, nargs;
+type_suffix_index type;
+if (!r.check_gp_argument (2, i, nargs)
+   || (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES)
+  return error_mark_node;
+
+/* infer_vector_type found the 'unsigned' version of the 'poly'
+   type we are looking for, so find the 'poly' type with the same
+   width.  */
+type = find_type_suffix (TYPE_poly, type_suffixes[type].element_bits);
+
+type_suffix_index wide_suffix
+  = find_type_suffix (TYPE_unsigned,
+ type_suffixes[type].element_bits * 2);
+
+/* Require the 'poly' type, require_matching_vector_type would try
+   and fail with the 'unsigned' one.  */
+if (!r.require_vector_type (i, type_suffixes[type].vector_type))
+  return error_mark_node;
+
+/* Check the inactive argument has the wide type.  */
+if ((r.pred == PRED_m)
+   && (r.infer_vector_type (0) != wide_suffix))
+  return r.report_no_such_form (type);
+
+return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_widen_poly)
+
 /* _t vfoo[_n_t0](_t, const int)
 
Check that 'imm' is in the [1..#bits] range.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h 
b/gcc/config/arm/arm-mve-builtins-shapes.h
index fa6ec4fc002..a93245321c9 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -57,6 +57,7 @@ namespace arm_mve
 extern const function_shape *const binary_widen;
 extern const function_shape *const binary_widen_n;
 extern const function_shape *const binary_widen_opt_n;
+extern const function_shape *const binary_widen_poly;
 extern const function_shape *const cmp;
 extern const function_shape *const create;
 extern const function_shape *const inherent;
-- 
2.34.1

[PATCH 3/9] arm: [MVE intrinsics] add binary_widen shape

This patch adds the binary_widen shape description.

2023-08-14  Christophe Lyon  

gcc/:

* config/arm/arm-mve-builtins-shapes.cc (binary_widen): New.
* config/arm/arm-mve-builtins-shapes.h (binary_widen): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 42 +++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  5 +--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 1f22201ac95..c8eb3351ef2 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1129,6 +1129,48 @@ struct binary_rshift_narrow_unsigned_def : public 
overloaded_base<0>
 };
 SHAPE (binary_rshift_narrow_unsigned)
 
+/* _t vfoo[_t0](_t, _t)
+
+   Example: vmullbq.
+   int32x4_t [__arm_]vmullbq_int[_s16](int16x8_t a, int16x8_t b)
+   int32x4_t [__arm_]vmullbq_int_m[_s16](int32x4_t inactive, int16x8_t a, 
int16x8_t b, mve_pred16_t p)
+   int32x4_t [__arm_]vmullbq_int_x[_s16](int16x8_t a, int16x8_t b, 
mve_pred16_t p)  */
+struct binary_widen_def : public overloaded_base<0>
+{
+  void
+  build (function_builder , const function_group_info ,
+bool preserve_user_namespace) const override
+  {
+b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+build_all (b, "vw0,v0,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver ) const override
+  {
+unsigned int i, nargs;
+type_suffix_index type;
+if (!r.check_gp_argument (2, i, nargs)
+   || (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES)
+  return error_mark_node;
+
+type_suffix_index wide_suffix
+  = find_type_suffix (type_suffixes[type].tclass,
+ type_suffixes[type].element_bits * 2);
+
+if (!r.require_matching_vector_type (i, type))
+  return error_mark_node;
+
+/* Check the inactive argument has the wide type.  */
+if ((r.pred == PRED_m)
+   && (r.infer_vector_type (0) != wide_suffix))
+  return r.report_no_such_form (type);
+
+return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_widen)
+
 /* _t vfoo[_n_t0](_t, const int)
 
Check that 'imm' is in the [1..#bits] range.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h 
b/gcc/config/arm/arm-mve-builtins-shapes.h
index a1842f5845c..fa6ec4fc002 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -35,13 +35,13 @@ namespace arm_mve
   {
 
 extern const function_shape *const binary;
-extern const function_shape *const binary_lshift;
-extern const function_shape *const binary_lshift_r;
 extern const function_shape *const binary_acc_int32;
 extern const function_shape *const binary_acc_int64;
 extern const function_shape *const binary_acca_int32;
 extern const function_shape *const binary_acca_int64;
 extern const function_shape *const binary_imm32;
+extern const function_shape *const binary_lshift;
+extern const function_shape *const binary_lshift_r;
 extern const function_shape *const binary_lshift_unsigned;
 extern const function_shape *const binary_maxamina;
 extern const function_shape *const binary_maxavminav;
@@ -54,6 +54,7 @@ namespace arm_mve
 extern const function_shape *const binary_rshift;
 extern const function_shape *const binary_rshift_narrow;
 extern const function_shape *const binary_rshift_narrow_unsigned;
+extern const function_shape *const binary_widen;
 extern const function_shape *const binary_widen_n;
 extern const function_shape *const binary_widen_opt_n;
 extern const function_shape *const cmp;
-- 
2.34.1

[PATCH 6/9] arm: [MVE intrinsics] add support for U and p formats in parse_element_type

Introduce these two format specifiers to define the shape of
vmull[bt]q_poly intrinsics.

'U' is used to define a double-width unsigned
'p' is used to define an element of 'poly' type.

2023-08-14  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (parse_element_type): Add
support for 'U' and 'p' format specifiers.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 16 
 1 file changed, 16 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index c8eb3351ef2..761da4d8ece 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -61,10 +61,12 @@ apply_predication (const function_instance , tree 
return_type,
 
[01]- the element type in type suffix 0 or 1 of INSTANCE.
h  - a half-sized version of 
+   p  - a poly type with the same width as 
s - a signed type with the given number of bits
s[01]   - a signed type with the same width as type suffix 0 or 1
u - an unsigned type with the given number of bits
u[01]   - an unsigned type with the same width as type suffix 0 or 1
+   U  - an unsigned type with the double width as 
w  - a double-sized version of 
x - a type with the given number of bits and same signedness
  as the next argument.
@@ -102,6 +104,20 @@ parse_element_type (const function_instance , 
const char *)
   type_suffixes[suffix].element_bits * 2);
 }
 
+   if (ch == 'U')
+{
+  type_suffix_index suffix = parse_element_type (instance, format);
+  return find_type_suffix (TYPE_unsigned,
+  type_suffixes[suffix].element_bits * 2);
+}
+
+   if (ch == 'p')
+{
+  type_suffix_index suffix = parse_element_type (instance, format);
+  return find_type_suffix (TYPE_poly,
+  type_suffixes[suffix].element_bits);
+}
+
   if (ch == 'x')
 {
   const char *next = format;
-- 
2.34.1

[PATCH 5/9] arm: [MVE intrinsics] add support for p8 and p16 polynomial types

Although they look like aliases for u8 and u16, we need to define them
so that we can handle p8 and p16 suffixes with the general framework.

They will be used by vmull[bt]q_poly intrinsics.

2023-08-14  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins.cc (type_suffixes): Handle poly_p
field..
(TYPES_poly_8_16): New.
(poly_8_16): New.
* config/arm/arm-mve-builtins.def (p8): New type suffix.
(p16): Likewise.
* config/arm/arm-mve-builtins.h (enum type_class_index): Add
TYPE_poly.
(struct type_suffix_info): Add poly_p field.
---
 gcc/config/arm/arm-mve-builtins.cc  | 6 ++
 gcc/config/arm/arm-mve-builtins.def | 2 ++
 gcc/config/arm/arm-mve-builtins.h   | 5 -
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm-mve-builtins.cc 
b/gcc/config/arm/arm-mve-builtins.cc
index 7eec9d2861c..fa8b0ad36b3 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -128,6 +128,7 @@ CONSTEXPR const type_suffix_info 
type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
 TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \
 TYPE_##CLASS == TYPE_unsigned, \
 TYPE_##CLASS == TYPE_float, \
+TYPE_##CLASS == TYPE_poly, \
 0, \
 MODE },
 #include "arm-mve-builtins.def"
@@ -177,6 +178,10 @@ CONSTEXPR const type_suffix_info 
type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
 #define TYPES_all_signed(S, D) \
   S (s8), S (s16), S (s32)
 
+/* _p8 _p16.  */
+#define TYPES_poly_8_16(S, D) \
+  S (p8), S (p16)
+
 /* _u8 _u16 _u32.  */
 #define TYPES_all_unsigned(S, D) \
   S (u8), S (u16), S (u32)
@@ -275,6 +280,7 @@ DEF_MVE_TYPES_ARRAY (integer_8);
 DEF_MVE_TYPES_ARRAY (integer_8_16);
 DEF_MVE_TYPES_ARRAY (integer_16_32);
 DEF_MVE_TYPES_ARRAY (integer_32);
+DEF_MVE_TYPES_ARRAY (poly_8_16);
 DEF_MVE_TYPES_ARRAY (signed_16_32);
 DEF_MVE_TYPES_ARRAY (signed_32);
 DEF_MVE_TYPES_ARRAY (reinterpret_integer);
diff --git a/gcc/config/arm/arm-mve-builtins.def 
b/gcc/config/arm/arm-mve-builtins.def
index e3f37876210..e2cf1baf370 100644
--- a/gcc/config/arm/arm-mve-builtins.def
+++ b/gcc/config/arm/arm-mve-builtins.def
@@ -63,6 +63,8 @@ DEF_MVE_TYPE_SUFFIX (u8, uint8x16_t, unsigned, 8, V16QImode)
 DEF_MVE_TYPE_SUFFIX (u16, uint16x8_t, unsigned, 16, V8HImode)
 DEF_MVE_TYPE_SUFFIX (u32, uint32x4_t, unsigned, 32, V4SImode)
 DEF_MVE_TYPE_SUFFIX (u64, uint64x2_t, unsigned, 64, V2DImode)
+DEF_MVE_TYPE_SUFFIX (p8, uint8x16_t, poly, 8, V16QImode)
+DEF_MVE_TYPE_SUFFIX (p16, uint16x8_t, poly, 16, V8HImode)
 #undef REQUIRES_FLOAT
 
 #define REQUIRES_FLOAT true
diff --git a/gcc/config/arm/arm-mve-builtins.h 
b/gcc/config/arm/arm-mve-builtins.h
index c9b51a0c77b..37b8223dfb2 100644
--- a/gcc/config/arm/arm-mve-builtins.h
+++ b/gcc/config/arm/arm-mve-builtins.h
@@ -146,6 +146,7 @@ enum type_class_index
   TYPE_float,
   TYPE_signed,
   TYPE_unsigned,
+  TYPE_poly,
   NUM_TYPE_CLASSES
 };
 
@@ -221,7 +222,9 @@ struct type_suffix_info
   unsigned int unsigned_p : 1;
   /* True if the suffix is for a floating-point type.  */
   unsigned int float_p : 1;
-  unsigned int spare : 13;
+  /* True if the suffix is for a polynomial type.  */
+  unsigned int poly_p : 1;
+  unsigned int spare : 12;
 
   /* The associated vector or predicate mode.  */
   machine_mode vector_mode : 16;
-- 
2.34.1

[PATCH 2/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull

Introduce a function that will be used to build vmull intrinsics with
the _int variant.

2023-08-14  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-functions.h (class
unspec_mve_function_exact_insn_vmull): New.
---
 gcc/config/arm/arm-mve-builtins-functions.h | 74 +
 1 file changed, 74 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-functions.h 
b/gcc/config/arm/arm-mve-builtins-functions.h
index a6573844319..c0fc450f886 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -838,6 +838,80 @@ public:
   }
 };
 
+
+/* Map the vmull-related function directly to CODE (UNSPEC, UNSPEC, M)
+   where M is the vector mode associated with type suffix 0.  We need
+   this special case because the builtins have _int in their
+   names.  */
+class unspec_mve_function_exact_insn_vmull : public function_base
+{
+public:
+  CONSTEXPR unspec_mve_function_exact_insn_vmull (int unspec_for_sint,
+ int unspec_for_uint,
+ int unspec_for_m_sint,
+ int unspec_for_m_uint)
+: m_unspec_for_sint (unspec_for_sint),
+  m_unspec_for_uint (unspec_for_uint),
+  m_unspec_for_m_sint (unspec_for_m_sint),
+  m_unspec_for_m_uint (unspec_for_m_uint)
+  {}
+
+  /* The unspec code associated with signed-integer and
+ unsigned-integer operations respectively.  It covers the cases
+ with and without the _m predicate.  */
+  int m_unspec_for_sint;
+  int m_unspec_for_uint;
+  int m_unspec_for_m_sint;
+  int m_unspec_for_m_uint;
+
+  rtx
+  expand (function_expander ) const override
+  {
+insn_code code;
+
+if (! e.type_suffix (0).integer_p)
+  gcc_unreachable ();
+
+if (e.mode_suffix_id != MODE_none)
+  gcc_unreachable ();
+
+switch (e.pred)
+  {
+  case PRED_none:
+   /* No predicate, no suffix.  */
+   if (e.type_suffix (0).unsigned_p)
+ code = code_for_mve_q_int (m_unspec_for_uint, m_unspec_for_uint, 
e.vector_mode (0));
+   else
+ code = code_for_mve_q_int (m_unspec_for_sint, m_unspec_for_sint, 
e.vector_mode (0));
+
+   return e.use_exact_insn (code);
+
+  case PRED_m:
+   /* No suffix, "m" predicate.  */
+   if (e.type_suffix (0).unsigned_p)
+ code = code_for_mve_q_int_m (m_unspec_for_m_uint, 
m_unspec_for_m_uint, e.vector_mode (0));
+   else
+ code = code_for_mve_q_int_m (m_unspec_for_m_sint, 
m_unspec_for_m_sint, e.vector_mode (0));
+
+   return e.use_cond_insn (code, 0);
+
+  case PRED_x:
+   /* No suffix, "x" predicate.  */
+   if (e.type_suffix (0).unsigned_p)
+ code = code_for_mve_q_int_m (m_unspec_for_m_uint, 
m_unspec_for_m_uint, e.vector_mode (0));
+   else
+ code = code_for_mve_q_int_m (m_unspec_for_m_sint, 
m_unspec_for_m_sint, e.vector_mode (0));
+
+   return e.use_pred_x_insn (code);
+
+  default:
+   gcc_unreachable ();
+  }
+
+gcc_unreachable ();
+  }
+};
+
 } /* end namespace arm_mve */
 
 /* Declare the global function base NAME, creating it from an instance
-- 
2.34.1

[PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq

Factorize vmullbq, vmulltq so that they use the same parameterized
names.

2023-08-14  Christophe Lyon  

gcc/
* config/arm/iterators.md (mve_insn): Add vmullb, vmullt.
(isu): Add VMULLBQ_INT_S, VMULLBQ_INT_U, VMULLTQ_INT_S,
VMULLTQ_INT_U.
(supf): Add VMULLBQ_POLY_P, VMULLTQ_POLY_P, VMULLBQ_POLY_M_P,
VMULLTQ_POLY_M_P.
(VMULLBQ_INT, VMULLTQ_INT, VMULLBQ_INT_M, VMULLTQ_INT_M): Delete.
(VMULLxQ_INT, VMULLxQ_POLY, VMULLxQ_INT_M, VMULLxQ_POLY_M): New.
* config/arm/mve.md (mve_vmullbq_int_)
(mve_vmulltq_int_): Merge into ...
(@mve_q_int_) ... this.
(mve_vmulltq_poly_p, mve_vmullbq_poly_p): Merge into ...
(@mve_q_poly_): ... this.
(mve_vmullbq_int_m_, mve_vmulltq_int_m_): Merge 
into ...
(@mve_q_int_m_): ... this.
(mve_vmullbq_poly_m_p, mve_vmulltq_poly_m_p): Merge into ...
(@mve_q_poly_m_): ... this.
---
 gcc/config/arm/iterators.md |  23 +++--
 gcc/config/arm/mve.md   | 100 
 2 files changed, 38 insertions(+), 85 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index b13ff53d36f..fb003bcd67b 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -917,6 +917,7 @@
 
 (define_int_attr mve_insn [
 (UNSPEC_VCADD90 "vcadd") (UNSPEC_VCADD270 "vcadd")
+(UNSPEC_VCMLA "vcmla") (UNSPEC_VCMLA90 "vcmla") 
(UNSPEC_VCMLA180 "vcmla") (UNSPEC_VCMLA270 "vcmla")
 (UNSPEC_VCMUL "vcmul") (UNSPEC_VCMUL90 "vcmul") 
(UNSPEC_VCMUL180 "vcmul") (UNSPEC_VCMUL270 "vcmul")
 (VABAVQ_P_S "vabav") (VABAVQ_P_U "vabav")
 (VABAVQ_S "vabav") (VABAVQ_U "vabav")
@@ -1044,6 +1045,13 @@
 (VMOVNTQ_S "vmovnt") (VMOVNTQ_U "vmovnt")
 (VMULHQ_M_S "vmulh") (VMULHQ_M_U "vmulh")
 (VMULHQ_S "vmulh") (VMULHQ_U "vmulh")
+(VMULLBQ_INT_M_S "vmullb") (VMULLBQ_INT_M_U "vmullb")
+(VMULLBQ_INT_S "vmullb") (VMULLBQ_INT_U "vmullb")
+(VMULLBQ_POLY_M_P "vmullb") (VMULLTQ_POLY_M_P "vmullt")
+(VMULLBQ_POLY_P "vmullb")
+(VMULLTQ_INT_M_S "vmullt") (VMULLTQ_INT_M_U "vmullt")
+(VMULLTQ_INT_S "vmullt") (VMULLTQ_INT_U "vmullt")
+(VMULLTQ_POLY_P "vmullt")
 (VMULQ_M_N_S "vmul") (VMULQ_M_N_U "vmul") (VMULQ_M_N_F "vmul")
 (VMULQ_M_S "vmul") (VMULQ_M_U "vmul") (VMULQ_M_F "vmul")
 (VMULQ_N_S "vmul") (VMULQ_N_U "vmul") (VMULQ_N_F "vmul")
@@ -1209,7 +1217,6 @@
 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub") (VSUBQ_M_N_F "vsub")
 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F "vsub")
 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F "vsub")
-(UNSPEC_VCMLA "vcmla") (UNSPEC_VCMLA90 "vcmla") 
(UNSPEC_VCMLA180 "vcmla") (UNSPEC_VCMLA270 "vcmla")
 ])
 
 (define_int_attr isu[
@@ -1246,6 +1253,8 @@
 (VMOVNBQ_S "i") (VMOVNBQ_U "i")
 (VMOVNTQ_M_S "i") (VMOVNTQ_M_U "i")
 (VMOVNTQ_S "i") (VMOVNTQ_U "i")
+(VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u")
+(VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u")
 (VNEGQ_M_S "s")
 (VQABSQ_M_S "s")
 (VQMOVNBQ_M_S "s") (VQMOVNBQ_M_U "u")
@@ -2330,6 +2339,10 @@
   (VMLADAVQ_U "u") (VMULHQ_S "s") (VMULHQ_U "u")
   (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u") (VQADDQ_S "s")
   (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u") (VQADDQ_U "u")
+  (VMULLBQ_POLY_P "p")
+  (VMULLTQ_POLY_P "p")
+  (VMULLBQ_POLY_M_P "p")
+  (VMULLTQ_POLY_M_P "p")
   (VMULQ_N_S "s") (VMULQ_N_U "u") (VMULQ_S "s")
   (VMULQ_U "u")
   (VQADDQ_N_S "s") (VQADDQ_N_U "u")
@@ -2713,8 +2726,8 @@
 (define_int_iterator VMINVQ [VMINVQ_U VMINVQ_S])
 (define_int_iterator VMLADAVQ [VMLADAVQ_U VMLADAVQ_S])
 (define_int_iterator VMULHQ [VMULHQ_S VMULHQ_U])
-(define_int_iterator VMULLBQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S])
-(define_int_iterator VMULLTQ_INT [VMULLTQ_INT_U VMULLTQ_INT_S])
+(define_int_iterator VMULLxQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S VMULLTQ_INT_U 
VMULLTQ_INT_S])
+(define_int_iterator VMULLxQ_POLY [VMULLBQ_POLY_P VMULLTQ_POLY_P])
 (define_int_iterator VMULQ [VMULQ_U VMULQ_S])
 (define_int_iterator VMULQ_N [VMULQ_N_U VMULQ_N_S])
 (define_int_iterator VQADDQ [VQADDQ_U VQADDQ_S])
@@ -2815,7 +2828,8 @@
 (define_int_iterator VSLIQ_M_N [VSLIQ_M_N_U VSLIQ_M_N_S])
 (define_int_iterator VRSHLQ_M [VRSHLQ_M_S VRSHLQ_M_U])
 (define_int_iterator VMINQ_M [VMINQ_M_S VMINQ_M_U])
-(define_int_iterator VMULLBQ_INT_M [VMULLBQ_INT_M_U VMULLBQ_INT_M_S])
+(define_int_iterator VMULLxQ_INT_M [VMULLBQ_INT_M_U VMULLBQ_INT_M_S

Re: [PATCH v3] c++: extend cold, hot attributes to classes

2023-08-14 Thread Jason Merrill via Gcc-patches

On 8/11/23 09:18, Javier Martinez wrote:

Hi Jason,

Regarding the initialization example - no, the set of classes that we 
consider cold is more loosely defined.

On Thu, Aug 10, 2023 at 11:01 PM Jason Merrill > wrote:

 > Yes, but that's because the implicit op== isn't declared lazily like
 > some other special member functions (CLASSTYPE_LAZY_*/lazily_declare_fn)
 > which can happen after the class is complete.

I see, thanks. I have fixed this now by injecting it directly from 
lazily_declare_fn, works well. Doing it from grokclassfn instead seems 
to be a nuisance because the explicit method attribute might be 
processed after the class-propagated attribute is injected, which is the 
wrong way around for the desired precedence.

 > I think it would work to check for (flags & (ATTR_FLAG_FUNCTION_NEXT |
 > ATTR_FLAG_DECL_NEXT)) and return without warning in that case.  You'd
 > still set *no_add_attr.

Correct, done.

I have added the patch as an attachment, if it garbles it then I will 
use git-send-email next time.

That worked fine, thanks.

@@ -1110,6 +1110,28 @@ handle_hot_attribute (tree *node, tree name, tree 
ARG_UNUSED (args),
 {
   /* Attribute hot processing is done later with lookup_attribute.  */
 }
+  else if ((TREE_CODE (*node) == RECORD_TYPE
+   || TREE_CODE (*node) == UNION_TYPE)
+ && c_dialect_cxx ())

I think you also want to check for ATTR_FLAG_TYPE_IN_PLACE.

@@ -7866,6 +7891,10 @@ finish_struct (tree t, tree attributes)
   && !LAMBDA_TYPE_P (t))
 add_stmt (build_min (TAG_DEFN, t));

+  /* This must be done after all lazily declared special member functions

+ have been injected.  */
+  propagate_class_warmth_attribute (t);

Maybe call this in check_bases_and_members instead?

Jason

[PATCH] arm: [MVE intrinsics] Remove dead check for float type in parse_element_type

Fix a likely copy/paste error, where we check if ch == 'f' after we
checked it's either 's' or 'u'.

2023-08-14  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (parse_element_type):
Remove dead check.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 1633084608e..23eb9d0e69b 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -80,8 +80,7 @@ parse_element_type (const function_instance , const 
char *)
 
   if (ch == 's' || ch == 'u')
 {
-  type_class_index tclass = (ch == 'f' ? TYPE_float
-: ch == 's' ? TYPE_signed
+  type_class_index tclass = (ch == 's' ? TYPE_signed
 : TYPE_unsigned);
   char *end;
   unsigned int bits = strtol (format, , 10);
-- 
2.34.1

[PATCH] arm: [MVE intrinsics] fix binary_acca_int32 and binary_acca_int64 shapes

Fix these two shapes, where we were failing to check the last
non-predicate parameter.

2023-08-14  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-shapes.cc (binary_acca_int32): Fix loop 
bound.
(binary_acca_int64): Likewise.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc 
b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 6d477a84330..1633084608e 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -455,7 +455,7 @@ struct binary_acca_int32_def : public overloaded_base<0>
|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
   return error_mark_node;
 
-unsigned int last_arg = i;
+unsigned int last_arg = i + 1;
 for (i = 1; i < last_arg; i++)
   if (!r.require_matching_vector_type (i, type))
return error_mark_node;
@@ -492,7 +492,7 @@ struct binary_acca_int64_def : public overloaded_base<0>
|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
   return error_mark_node;
 
-unsigned int last_arg = i;
+unsigned int last_arg = i + 1;
 for (i = 1; i < last_arg; i++)
   if (!r.require_matching_vector_type (i, type))
return error_mark_node;
-- 
2.34.1

Re: [PATCH v1] c++: follow DR 2386 and update implementation of get_tuple_size [PR110216]

2023-08-14 Thread Jason Merrill via Gcc-patches

On 8/12/23 04:16, gnaggnoyil wrote:

DR 2386 updated the tuple_size requirements for structured binding and
it now requires tuple_size to be considered only if
std::tuple_size names a complete class type with member value. GCC
before this patch does not follow the updated requrements, and this
patch is intended to implement it.

DR 2386
PR c++/110216

gcc/cp/ChangeLog:

* decl.cc (get_tuple_size): Update implemetation to follow DR 2386.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/decomp10.C: Update expected error message for DR 2386.
* g++.dg/cpp1z/pr110216.C: New test.

Signed-off-by: gnaggnoyil

Pushed, thanks!

Note that the GCC DCO policy (https://gcc.gnu.org/dco.html) requires
real names in the sign-off; in this case I've applied the patch anyway
because it is small enough that it's not legally significant for copyright.

I think if you want to contribute larger patches under this pseudonym,
you should file a copyright assignment with the FSF, which explicitly
allows this. "If a contributor wants the FSF to publish only a
pseudonym, that is ok. The contributor should say this, and state the
desired pseudonym, when answering the request- form. The actual legal
papers will use the real name, but the FSF will publish only the
pseudonym."[2]

Thanks again,
Jason

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/Documentation/process?id=d4563201f33a022fc0353033d9dfeb1606a88330

[2] https://www.gnu.org/prep/maintain/maintain.html#Copyright-Papers

[PATCH] Fortran: Avoid accessing gfc_charlen when not looking at BT_CHARACTER (PR 110677)

2023-08-14 Thread Martin Jambor

Hello,

this patch addresses an issue uncovered by the undefined behavior
sanitizer.  In function resolve_structure_cons in resolve.cc there is
a test starting with:

  if (cons->expr->ts.type == BT_CHARACTER && comp->ts.u.cl
  && comp->ts.u.cl->length
  && comp->ts.u.cl->length->expr_type == EXPR_CONSTANT

and UBSAN complained of loads from comp->ts.u.cl->length->expr_type of
integer value 1818451807 which is outside of the value range expr_t
enum.  If I understand the code correctly it the entire load was
unwanted because comp->ts.type in those cases is BT_CLASS and not
BT_CHARACTER.  This patch simply adds a check to make sure it is only
accessed in those cases.

I have verified that the UPBSAN failure goes away with this patch, it
also passes bootstrap and testing on x86_64-linux.  OK for master?

Thanks,

Martin



gcc/fortran/ChangeLog:

2023-08-14  Martin Jambor  

PR fortran/110677
* resolve.cc (resolve_structure_cons): Check comp->ts is character
type before accessing stuff through comp->ts.u.cl.
---
 gcc/fortran/resolve.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index e7c8d919bef..5b4dfc5fcd2 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -1396,8 +1396,9 @@ resolve_structure_cons (gfc_expr *expr, int init)
 the one of the structure, ensure this if the lengths are known at
 compile time and when we are dealing with PARAMETER or structure
 constructors.  */
-  if (cons->expr->ts.type == BT_CHARACTER && comp->ts.u.cl
- && comp->ts.u.cl->length
+  if (cons->expr->ts.type == BT_CHARACTER
+ && comp->ts.type == BT_CHARACTER
+ && comp->ts.u.cl && comp->ts.u.cl->length
  && comp->ts.u.cl->length->expr_type == EXPR_CONSTANT
  && cons->expr->ts.u.cl && cons->expr->ts.u.cl->length
  && cons->expr->ts.u.cl->length->expr_type == EXPR_CONSTANT
-- 
2.41.0

[pushed] c++: -fconcepts and __cpp_concepts

2023-08-14 Thread Jason Merrill via Gcc-patches

Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

Since -fconcepts no longer implies -fconcepts-ts, we shouldn't advertise TS
support with __cpp_concepts=201507L.  Also fix one case where -std=c++14
-fconcepts wasn't working (as found by range-v3 calendar).  Fixing other
cases is not a priority, probably better to reject that flag combination if
there are further issues.

gcc/c-family/ChangeLog:

* c-cppbuiltin.cc (c_cpp_builtins): Adjust __cpp_concepts.

gcc/cp/ChangeLog:

* parser.cc (cp_parser_simple_type_specifier): Handle -std=c++14
-fconcepts.
---
 gcc/c-family/c-cppbuiltin.cc | 2 +-
 gcc/cp/parser.cc | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-cppbuiltin.cc b/gcc/c-family/c-cppbuiltin.cc
index 6bd4c1261a7..f2b12fd63db 100644
--- a/gcc/c-family/c-cppbuiltin.cc
+++ b/gcc/c-family/c-cppbuiltin.cc
@@ -1089,7 +1089,7 @@ c_cpp_builtins (cpp_reader *pfile)
}
   if (flag_concepts)
 {
- if (cxx_dialect >= cxx20)
+ if (cxx_dialect >= cxx20 || !flag_concepts_ts)
cpp_define (pfile, "__cpp_concepts=202002L");
   else
 cpp_define (pfile, "__cpp_concepts=201507L");
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 2d27376d988..7f646704d3f 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -20017,12 +20017,13 @@ cp_parser_simple_type_specifier (cp_parser* parser,
   /* Otherwise, look for a type-name.  */
   if (!type)
{
- if (cxx_dialect >= cxx17)
+ if (cxx_dialect >= cxx17 || flag_concepts)
cp_parser_parse_tentatively (parser);
 
  type = cp_parser_type_name (parser, (qualified_p && typename_p));
 
- if (cxx_dialect >= cxx17 && !cp_parser_parse_definitely (parser))
+ if ((cxx_dialect >= cxx17 || flag_concepts)
+ && !cp_parser_parse_definitely (parser))
type = NULL_TREE;
}
 

base-commit: 2d2b05f0691799f03062bf5c436462f14cad3e7c
-- 
2.39.3

Re: [PATCH v4] Mode-Switching: Fix SET_SRC ICE for create_pre_exit





On 8/12/23 18:56, pan2...@intel.com wrote:

From: Pan Li 

In same cases, like gcc/testsuite/gcc.dg/pr78148.c in RISC-V, there will
be only 1 operand when SET_SRC in create_pre_exit. For example as below.

(insn 13 9 14 2 (clobber (reg/i:TI 10 a0)) 
"gcc/testsuite/gcc.dg/pr78148.c":24:1 -1
   (expr_list:REG_UNUSED (reg/i:TI 10 a0)
 (nil)))

Unfortunately, SET_SRC requires at least 2 operands and then Segment
Fault here. For SH4 part result in Segment Fault, it looks like only
valid when the return_copy_pat is load or something like that. Thus,
this patch try to fix it by restrict the SET insn for SET_SRC.

Signed-off-by: Pan Li 

gcc/ChangeLog:

* mode-switching.cc (create_pre_exit): Add SET insn check.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/mode-switch-ice-1.c: New test.

OK.  Thanks for the updated version.

jeff

Re: [PATCH] Fix for bug libstdc++/110860

2023-08-14 Thread Jonathan Wakely via Gcc-patches

On Mon, 14 Aug 2023 at 10:58, Paul Dreik via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> The patch below fixes an issue with the fix already committed for
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110860 which unfortunately
> was not sufficient for small magnitude floating point values.
>
> With the patch in place, the code now survives the fuzzing I used to
> find the problem in the first place. Tested on amd64.
>
> I prepared the patch using git show, which should include the signoff as
> instructed per the DCO.
>

I couldn't apply the patch directly from the email, but I'm not sure where
it got mangled. I just applied it by hand instead.

Pushed to trunk, thanks for the patch!

I'll push it to gcc-13 shortly too.




>
> Thanks, Paul
>
> 
> commit 848b8d948787495e64ed9c55d681eccf730b74fb
> Author: Paul Dreik 
> Date:   Mon Aug 14 11:52:30 2023 +0200
>
>  libstdc++: Avoid problematic use of log10 in std::format [PR110860]
>
>  If abs(__v) is smaller than one, the result will be on the
>  form 0.x. It is only if the magnitude is large that more digits
>  are needed before the decimal dot.
>
>  This uses frexp instead of log10 which should be less expensive
>  and have sufficient precision for the desired purpose.
>
>  It removes the problematic cases where log10 will be negative or not
>  fit in an int.
>
>  Signed-off-by: Paul Dreik 
>
> diff --git a/libstdc++-v3/include/std/format
> b/libstdc++-v3/include/std/format
> index f4520ff3f..729e3d4b9 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -1490,14 +1490,22 @@ namespace __format
>   // If the buffer is too small it's probably because of a
> large
>   // precision, or a very large value in fixed format.
>   size_t __guess = 8 + __prec;
> - if (__fmt == chars_format::fixed && __v != 0) // +ddd.prec
> + if (__fmt == chars_format::fixed) // +ddd.prec
> {
> - if constexpr (is_same_v<_Fp, float>)
> -   __guess += __builtin_log10f(__v < 0.0f ? -__v : __v);
> - else if constexpr (is_same_v<_Fp, double>)
> -   __guess += __builtin_log10(__v < 0.0 ? -__v : __v);
> - else if constexpr (is_same_v<_Fp, long double>)
> -   __guess += __builtin_log10l(__v < 0.0l ? -__v : __v);
> + if constexpr (is_same_v<_Fp, float> || is_same_v<_Fp,
> double> ||
> is_same_v<_Fp, long double>)
> +   {
> + // the number of digits to the left of the decimal
> point
> + // is floor(log10(max(abs(__v),1)))+1
> + int __exp{};
> + if constexpr (is_same_v<_Fp, float>)
> +   __builtin_frexpf(__v, &__exp);
> + else if constexpr (is_same_v<_Fp, double>)
> +   __builtin_frexp(__v, &__exp);
> + else if constexpr (is_same_v<_Fp, long double>)
> +   __builtin_frexpl(__v, &__exp);
> + if (__exp>0)
> +   __guess += 1U + __exp * 4004U / 13301U; //
> log10(2) approx.
> +   }
>   else
> __guess += numeric_limits<_Fp>::max_exponent10;
> }
>

Re: [PATCH] tree-optimization/110991 - unroll size estimate after vectorization

2023-08-14 Thread Jan Hubicka via Gcc-patches

> The following testcase shows that we are bad at identifying inductions
> that will be optimized away after vectorizing them because SCEV doesn't
> handle vectorized defs.  The following rolls a simpler identification
> of SSA cycles covering a PHI and an assignment with a binary operator
> with a constant second operand.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> Note, I also have a more general approach (will reply to this mail
> with an RFC).

Looks good to me.  This clearly be generalized to more complicated
expressions, so that is what you plan to do next?

Honza
> 
> Any comments on this particular change?
> 
>   PR tree-optimization/110991
>   * tree-ssa-loop-ivcanon.cc (constant_after_peeling): Handle
>   VIEW_CONVERT_EXPR , handle more simple IV-like SSA cycles
>   that will end up constant.
> 
>   * gcc.dg/tree-ssa/cunroll-16.c: New testcase.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c | 17 
>  gcc/tree-ssa-loop-ivcanon.cc   | 46 +-
>  2 files changed, 62 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c
> new file mode 100644
> index 000..9bb66ff8299
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c
> @@ -0,0 +1,17 @@
> +/* PR/110991 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-cunroll-details -fdump-tree-optimized" } */
> +
> +static unsigned char a;
> +static signed char b;
> +void foo(void);
> +int main() {
> +  a = 25;
> +  for (; a > 13; --a)
> +b = a > 127 ?: a << 3;
> +  if (!b)
> +foo();
> +}
> +
> +/* { dg-final { scan-tree-dump "optimized: loop with \[0-9\]\+ iterations 
> completely unrolled" "cunroll" } } */
> +/* { dg-final { scan-tree-dump-not "foo" "optimized" } } */
> diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
> index a895e8e65be..99e50ee2efe 100644
> --- a/gcc/tree-ssa-loop-ivcanon.cc
> +++ b/gcc/tree-ssa-loop-ivcanon.cc
> @@ -166,6 +166,11 @@ constant_after_peeling (tree op, gimple *stmt, class 
> loop *loop)
>if (CONSTANT_CLASS_P (op))
>  return true;
>  
> +  /* Get at the actual SSA operand.  */
> +  if (handled_component_p (op)
> +  && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
> +op = TREE_OPERAND (op, 0);
> +
>/* We can still fold accesses to constant arrays when index is known.  */
>if (TREE_CODE (op) != SSA_NAME)
>  {
> @@ -198,7 +203,46 @@ constant_after_peeling (tree op, gimple *stmt, class 
> loop *loop)
>tree ev = analyze_scalar_evolution (loop, op);
>if (chrec_contains_undetermined (ev)
>|| chrec_contains_symbols (ev))
> -return false;
> +{
> +  if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
> + {
> +   gassign *ass = nullptr;
> +   gphi *phi = nullptr;
> +   if (is_a  (SSA_NAME_DEF_STMT (op)))
> + {
> +   ass = as_a  (SSA_NAME_DEF_STMT (op));
> +   if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
> + phi = dyn_cast 
> + (SSA_NAME_DEF_STMT (gimple_assign_rhs1  (ass)));
> + }
> +   else if (is_a  (SSA_NAME_DEF_STMT (op)))
> + {
> +   phi = as_a  (SSA_NAME_DEF_STMT (op));
> +   if (gimple_bb (phi) == loop->header)
> + {
> +   tree def = gimple_phi_arg_def_from_edge
> + (phi, loop_latch_edge (loop));
> +   if (TREE_CODE (def) == SSA_NAME
> +   && is_a  (SSA_NAME_DEF_STMT (def)))
> + ass = as_a  (SSA_NAME_DEF_STMT (def));
> + }
> + }
> +   if (ass && phi)
> + {
> +   tree rhs1 = gimple_assign_rhs1 (ass);
> +   if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
> +   && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
> +   && rhs1 == gimple_phi_result (phi)
> +   && gimple_bb (phi) == loop->header
> +   && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
> +   == gimple_assign_lhs (ass))
> +   && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
> +  (phi, loop_preheader_edge (loop)
> + return true;
> + }
> + }
> +  return false;
> +}
>return true;
>  }
>  
> -- 
> 2.35.3

Re: [PATCH v2] analyzer: New option fanalyzer-show-events-in-system-headers [PR110543]

2023-08-14 Thread David Malcolm via Gcc-patches

On Mon, 2023-08-14 at 17:48 +0200, priour...@gmail.com wrote:
> From: benjamin priour 
> 
> Plenty useful, thanks David. I've adjusted some few things, especially
> the artifacts of earlier versions I missed when building the commit.
> 
> I didn't how to test for warnings within , I couldn't figure a 
> portable test.
> I cannot pinpoint the line the warning is issued at in an inline DejaGNU 
> directive,
> nor can I safely say the stack depth if I check a multiline-output (nor the 
> methods names)
> 
> In the end, I found out an alternative, I am checking for the presence of 
> event "entry of 'main'".
> Indeed, diagnostic_manager::finish_pruning comment's reads
> If all we're left with is in one function, then filter function entry events.
> The provided test case can only goes into main and std::* frames, so if 
> "entry of 'main'" exists,
> it means we are also going into std::* frames.
> 
> I've also adjusted the comment of prune_system_headers, analyzer.opt and 
> added an entry to invoker.texi.
> 
> Successfully regstrapped off trunk
> 54be338589ea93ad4ff53d22adde476a0582537b on x86_64-linux-gnu.

Thanks for the updated patch.

This is ready to push to trunk.

Dave

Avoid division by zero in fold_loop_internal_call

2023-08-14 Thread Jan Hubicka via Gcc-patches

Hi,
My patch to fix profile after folding internal call is missing check for the
case profile was already zero before if-conversion.

Bootstrapped/regtested x86_64-linux, comitted.

gcc/ChangeLog:

PR gcov-profile/110988
* tree-cfg.cc (fold_loop_internal_call): Avoid division by zero.

diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index ab1f8067c54..105f4b1c953 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -7734,11 +7734,14 @@ fold_loop_internal_call (gimple *g, tree value)
 test.  This should not happen as the guarded code should
 start with pre-header.  */
  gcc_assert (single_pred_edge (taken_edge->dest));
- taken_edge->dest->count
-   = taken_edge->dest->count.apply_scale (new_count,
-  old_count);
- scale_strictly_dominated_blocks (taken_edge->dest,
-  new_count, old_count);
+ if (old_count.nonzero_p ())
+   {
+ taken_edge->dest->count
+   = taken_edge->dest->count.apply_scale (new_count,
+  old_count);
+ scale_strictly_dominated_blocks (taken_edge->dest,
+  new_count, old_count);
+   }
}
}
 }

[PATCH v2] analyzer: New option fanalyzer-show-events-in-system-headers [PR110543]

2023-08-14 Thread Benjamin Priour via Gcc-patches

From: benjamin priour 

Plenty useful, thanks David. I've adjusted some few things, especially
the artifacts of earlier versions I missed when building the commit.

I didn't how to test for warnings within , I couldn't figure a portable 
test.
I cannot pinpoint the line the warning is issued at in an inline DejaGNU 
directive,
nor can I safely say the stack depth if I check a multiline-output (nor the 
methods names)

In the end, I found out an alternative, I am checking for the presence of event 
"entry of 'main'".
Indeed, diagnostic_manager::finish_pruning comment's reads
If all we're left with is in one function, then filter function entry events.
The provided test case can only goes into main and std::* frames, so if "entry 
of 'main'" exists,
it means we are also going into std::* frames.

I've also adjusted the comment of prune_system_headers, analyzer.opt and added 
an entry to invoker.texi.

Successfully regstrapped off trunk
54be338589ea93ad4ff53d22adde476a0582537b on x86_64-linux-gnu.

Thanks,
Benjamin.

Patch below.


This patch introduces -fanalyzer-show-events-in-system-headers,
disabled by default.

This option reduces the noise of the analyzer emitted diagnostics
when dealing with system headers.
The new option only affects the display of the diagnostics,
but doesn't hinder the actual analysis.

Given a diagnostics path diving into a system header in the form
[
  prefix events...,
  system header call,
system header entry,
events within system headers...,
  system header return,
  suffix events...
]
then disabling the option (either by default or explicitly)
will shorten the path into:
[
  prefix events...,
  system header call,
  system header return,
  suffix events...
]

Signed-off-by: benjamin priour 

gcc/analyzer/ChangeLog:

PR analyzer/110543
* analyzer.opt: Add new option.
* diagnostic-manager.cc
(diagnostic_manager::prune_path): Call prune_system_headers.
(prune_frame): New function that deletes all events in a frame.
(diagnostic_manager::prune_system_headers): New function.
* diagnostic-manager.h: Add prune_system_headers declaration.

gcc/ChangeLog:

PR analyzer/110543
* doc/invoke.texi: Add documentation of
fanalyzer-show-events-in-system-headers

gcc/testsuite/ChangeLog:

PR analyzer/110543
* g++.dg/analyzer/fanalyzer-show-events-in-system-headers-default.C:
New test.
* g++.dg/analyzer/fanalyzer-show-events-in-system-headers-no.C:
New test.
* g++.dg/analyzer/fanalyzer-show-events-in-system-headers.C:
New test.
---
 gcc/analyzer/analyzer.opt |  4 +
 gcc/analyzer/diagnostic-manager.cc| 96 +++
 gcc/analyzer/diagnostic-manager.h |  1 +
 gcc/doc/invoke.texi   |  9 ++
 ...er-show-events-in-system-headers-default.C | 18 
 ...nalyzer-show-events-in-system-headers-no.C | 19 
 .../fanalyzer-show-events-in-system-headers.C | 14 +++
 7 files changed, 161 insertions(+)
 create mode 100644 
gcc/testsuite/g++.dg/analyzer/fanalyzer-show-events-in-system-headers-default.C
 create mode 100644 
gcc/testsuite/g++.dg/analyzer/fanalyzer-show-events-in-system-headers-no.C
 create mode 100644 
gcc/testsuite/g++.dg/analyzer/fanalyzer-show-events-in-system-headers.C

diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt
index 2760aaa8151..7917473d122 100644
--- a/gcc/analyzer/analyzer.opt
+++ b/gcc/analyzer/analyzer.opt
@@ -290,6 +290,10 @@ fanalyzer-transitivity
 Common Var(flag_analyzer_transitivity) Init(0)
 Enable transitivity of constraints during analysis.
 
+fanalyzer-show-events-in-system-headers
+Common Var(flag_analyzer_show_events_in_system_headers) Init(0)
+Show events within system headers in analyzer execution paths.
+
 fanalyzer-call-summaries
 Common Var(flag_analyzer_call_summaries) Init(0)
 Approximate the effect of function calls to simplify analysis.
diff --git a/gcc/analyzer/diagnostic-manager.cc 
b/gcc/analyzer/diagnostic-manager.cc
index cfca305d552..430c4dc3d58 100644
--- a/gcc/analyzer/diagnostic-manager.cc
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tree.h"
+#include "input.h"
 #include "pretty-print.h"
 #include "gcc-rich-location.h"
 #include "gimple-pretty-print.h"
@@ -2281,6 +2282,8 @@ diagnostic_manager::prune_path (checker_path *path,
   path->maybe_log (get_logger (), "path");
   prune_for_sm_diagnostic (path, sm, sval, state);
   prune_interproc_events (path);
+  if (! flag_analyzer_show_events_in_system_headers)
+prune_system_headers (path);
   consolidate_conditions (path);
   finish_pruning (path);
   path->maybe_log (get_logger (), "pruned");
@@ -2667,6 +2670,99 @@ diagnostic_manager::prune_interproc_events (checker_path 
*path) const
   while (changed);
 }
 
+/* Remove everything within [call

Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header

2023-08-14 Thread Arsen Arsenović via Gcc-patches


Arsen Arsenović  writes:

> This commit replaces the ad-hoc logic in  with an AutoGen
> database that (mostly) declaratively generates a version.h bit which
> combines all of the FTM logic across all headers together.
>
> This generated header defines macros of the form __glibcxx_foo,
> equivalent to their __cpp_lib_foo variants, according to rules specified
> in version.def and, optionally, if __glibcxx_want_foo or
> __glibcxx_want_all are defined, also defines __cpp_lib_foo forms with
> the same definition.
>
> libstdc++-v3/ChangeLog:
>
>   * include/Makefile.am (bits_freestanding): Add version.h.
>   (allcreated): Add version.h.
>   (${bits_srcdir}/version.h): New rule.  Regenerates
>   version.h out of version.{def,tpl}.
>   * include/Makefile.in: Regenerate.
>   * include/bits/version.def: New file.  Declares a list of
>   all feature test macros, their values and their preconditions.
>   * include/bits/version.tpl: New file.  Turns version.def
>   into a sequence of #if blocks.
>   * include/bits/version.h: New file.  Generated from
>   version.def.
>   * include/std/version: Replace with a __glibcxx_want_all define
>   and bits/version.h include.
> ---
> This patchset is a rebase of
> https://inbox.sourceware.org/libstdc++/20230429101640.1697750-1-ar...@aarsen.me/
>
> ... passing the same two checks (difall / vercmp) I wrote for the first
> pass.  Testsuite runs are still pending.
>
> Changes in this revision:
> - Replace the ${bits_srcdir}/version.h rule with a update-version phony,
> - Add the new __cpp_lib_chrono value,
> - Add __cpp_lib_{ranges_{contains,find_last,fold,iota}},
> - Add comments to various replaced conditions which summarize their
>   condition,
> - Correct a few minor errors spotted in review
>
> OK for trunk (if those testsuite runs end up clean)?

Same tests as the first time around passed on x86_64-pc-linux-gnu (that
is, regression testing + a large libstdc++ harness).
-- 
Arsen Arsenović


signature.asc
Description: PGP signature

Re: [PATCH v1] RISC-V: Support RVV VFREC7 rounding mode intrinsic API

2023-08-14 Thread Kito Cheng via Gcc-patches

Checked with doc and llvm implementation, LGTM

Re: [PATCH][RFC] tree-optimization/92335 - Improve sinking heuristics for vectorization

2023-08-14 Thread Prathamesh Kulkarni via Gcc-patches

On Mon, 7 Aug 2023 at 13:19, Richard Biener  wrote:
>
> On Mon, Aug 7, 2023 at 2:05 AM Prathamesh Kulkarni via Gcc-patches
>  wrote:
> >
> > On Thu, 3 Aug 2023 at 17:48, Richard Biener  wrote:
> > >
> > > On Thu, 3 Aug 2023, Richard Biener wrote:
> > >
> > > > On Thu, 3 Aug 2023, Richard Biener wrote:
> > > >
> > > > > On Thu, 3 Aug 2023, Prathamesh Kulkarni wrote:
> > > > >
> > > > > > On Wed, 2 Aug 2023 at 14:17, Richard Biener via Gcc-patches
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Mon, 31 Jul 2023, Jeff Law wrote:
> > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > On 7/28/23 01:05, Richard Biener via Gcc-patches wrote:
> > > > > > > > > The following delays sinking of loads within the same 
> > > > > > > > > innermost
> > > > > > > > > loop when it was unconditional before.  That's a not uncommon
> > > > > > > > > issue preventing vectorization when masked loads are not 
> > > > > > > > > available.
> > > > > > > > >
> > > > > > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> > > > > > > > >
> > > > > > > > > I have a followup patch improving sinking that without this 
> > > > > > > > > would
> > > > > > > > > cause more of the problematic sinking - now that we have a 
> > > > > > > > > second
> > > > > > > > > sink pass after loop opts this looks like a reasonable 
> > > > > > > > > approach?
> > > > > > > > >
> > > > > > > > > OK?
> > > > > > > > >
> > > > > > > > > Thanks,
> > > > > > > > > Richard.
> > > > > > > > >
> > > > > > > > >  PR tree-optimization/92335
> > > > > > > > >  * tree-ssa-sink.cc (select_best_block): Before loop
> > > > > > > > >  optimizations avoid sinking unconditional loads/stores
> > > > > > > > >  in innermost loops to conditional executed places.
> > > > > > > > >
> > > > > > > > >  * gcc.dg/tree-ssa/ssa-sink-10.c: Disable vectorizing.
> > > > > > > > >  * gcc.dg/tree-ssa/predcom-9.c: Clone from ssa-sink-10.c,
> > > > > > > > >  expect predictive commoning to happen instead of sinking.
> > > > > > > > >  * gcc.dg/vect/pr65947-3.c: Adjust.
> > > > > > > > I think it's reasonable -- there's probably going to be cases 
> > > > > > > > where it's not
> > > > > > > > great, but more often than not I think it's going to be a 
> > > > > > > > reasonable
> > > > > > > > heuristic.
> > > > > > > >
> > > > > > > > If there is undesirable fallout, better to find it over the 
> > > > > > > > coming months than
> > > > > > > > next spring.  So I'd suggest we go forward now to give more 
> > > > > > > > time to find any
> > > > > > > > pathological cases (if they exist).
> > > > > > >
> > > > > > > Agreed, I've pushed this now.
> > > > > > Hi Richard,
> > > > > > After this patch (committed in 
> > > > > > 399c8dd44ff44f4b496223c7cc980651c4d6f6a0),
> > > > > > pr65947-7.c "failed" for aarch64-linux-gnu:
> > > > > > FAIL: gcc.dg/vect/pr65947-7.c scan-tree-dump-not vect "LOOP 
> > > > > > VECTORIZED"
> > > > > > FAIL: gcc.dg/vect/pr65947-7.c -flto -ffat-lto-objects
> > > > > > scan-tree-dump-not vect "LOOP VECTORIZED"
> > > > > >
> > > > > > /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { 
> > > > > > target {
> > > > > > ! vect_fold_extract_last } } } } */
> > > > > >
> > > > > > With your commit, condition_reduction in pr65947-7.c gets vectorized
> > > > > > regardless of vect_fold_extract_last,
> > > > > > which gates the above test (which is an improvement, because the
> > > > > > function didn't get vectorized before the commit).
> > > > > >
> > > > > > The attached patch thus removes the gating on 
> > > > > > vect_fold_extract_last,
> > > > > > and the test passes again.
> > > > > > OK to commit ?
> > > > >
> > > > > OK.
> > > >
> > > > Or wait - the loop doesn't vectorize on x86_64, so I guess one
> > > > critical target condition is missing.  Can you figure out which?
> > >
> > > I see
> > >
> > > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/pr65947-7.c:18:21:
> > > note:   vect_is_simple_use: operand last_19 = PHI ,
> > > type of def: reduction
> > > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/pr65947-7.c:18:21:
> > > note:   vect_is_simple_use: vectype vector(4) int
> > > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/pr65947-7.c:18:21:
> > > missed:   multiple types in double reduction or condition reduction or
> > > fold-left reduction.
> > > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/pr65947-7.c:13:1:
> > > missed:   not vectorized: relevant phi not supported: last_19 = PHI
> > > 
> > > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/pr65947-7.c:18:21:
> > > missed:  bad operation or unsupported loop bound.
> > Hi Richard,
> > Looking at the aarch64 vect dump, it seems the loop in
> > condition_reduction gets vectorized with V4HI mode
> > while fails for other modes in vectorizable_condition:
> >
> >   if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
> >   && ncopies > 1)
> > {
> >   if (dump_enabled_p ())
> > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>

RE: [PATCH v1] RISC-V: Support RVV VFREC7 rounding mode intrinsic API

2023-08-14 Thread Li, Pan2 via Gcc-patches

Thanks Kito for comments, updated in PATCH v2.

https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627367.html

Pan

-Original Message-
From: Kito Cheng  
Sent: Monday, August 14, 2023 10:07 PM
To: 钟居哲 
Cc: Li, Pan2 ; gcc-patches ; Wang, 
Yanzhang 
Subject: Re: [PATCH v1] RISC-V: Support RVV VFREC7 rounding mode intrinsic API

> +template

You don't need a template class here since it can only be UNSPEC_VFREC7.

> +class vfrec7_frm : public function_base
> +{
> +public:
> +  bool has_rounding_mode_operand_p () const override { return true; }
> +
> +  rtx expand (function_expander ) const override
> +  {
> +return e.use_exact_insn (code_for_pred (UNSPEC, e.vector_mode ()));
> +  }
> +};
> +
> /* Implements vrsub.  */
> class vrsub : public function_base
> {
> @@ -2433,6 +2448,7 @@ static CONSTEXPR const unop vfsqrt_obj;
> static CONSTEXPR const unop_frm vfsqrt_frm_obj;
> static CONSTEXPR const float_misc vfrsqrt7_obj;
> static CONSTEXPR const float_misc vfrec7_obj;
> +static CONSTEXPR const vfrec7_frm vfrec7_frm_obj;

Then `static CONSTEXPR const vfrec7_frm vfrec7_frm_obj;` here

> static CONSTEXPR const binop vfmin_obj;
> static CONSTEXPR const binop vfmax_obj;
> static CONSTEXPR const float_misc vfsgnj_obj;

[PATCH v2] RISC-V: Support RVV VFREC7 rounding mode intrinsic API

From: Pan Li 

Update in v2:

1. Remove the template of vfrec7 frm class.
2. Update the vfrec7_frm_obj declaration.

Original logs:

This patch would like to support the rounding mode API for the
VFREC7 as the below samples.

* __riscv_vfrec7_v_f32m1_rm
* __riscv_vfrec7_v_f32m1_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfrec7_frm): New class for frm.
(vfrec7_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfrec7_frm): New intrinsic function definition.
* config/riscv/vector-iterators.md
(VFMISC): Remove VFREC7.
(misc_op): Ditto.
(float_insn_type): Ditto.
(VFMISC_FRM): New int iterator.
(misc_frm_op): New op for frm.
(float_frm_insn_type): New type for frm.
* config/riscv/vector.md (@pred_):
New pattern for misc frm.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-rec7.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  | 16 ++
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  2 ++
 gcc/config/riscv/vector-iterators.md  | 12 +--
 gcc/config/riscv/vector.md| 23 ++
 .../riscv/rvv/base/float-point-rec7.c | 31 +++
 6 files changed, 82 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-rec7.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 2074dac0f16..f2124080ef9 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -646,6 +646,20 @@ public:
   }
 };
 
+/* Implements below instructions for frm
+   - vfrec7
+*/
+class vfrec7_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  rtx expand (function_expander ) const override
+  {
+return e.use_exact_insn (code_for_pred (UNSPEC_VFREC7, e.vector_mode ()));
+  }
+};
+
 /* Implements vrsub.  */
 class vrsub : public function_base
 {
@@ -2433,6 +2447,7 @@ static CONSTEXPR const unop vfsqrt_obj;
 static CONSTEXPR const unop_frm vfsqrt_frm_obj;
 static CONSTEXPR const float_misc vfrsqrt7_obj;
 static CONSTEXPR const float_misc vfrec7_obj;
+static CONSTEXPR const vfrec7_frm vfrec7_frm_obj;
 static CONSTEXPR const binop vfmin_obj;
 static CONSTEXPR const binop vfmax_obj;
 static CONSTEXPR const float_misc vfsgnj_obj;
@@ -2681,6 +2696,7 @@ BASE (vfsqrt)
 BASE (vfsqrt_frm)
 BASE (vfrsqrt7)
 BASE (vfrec7)
+BASE (vfrec7_frm)
 BASE (vfmin)
 BASE (vfmax)
 BASE (vfsgnj)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 5c91381bd4c..2a9381eec5e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -187,6 +187,7 @@ extern const function_base *const vfsqrt;
 extern const function_base *const vfsqrt_frm;
 extern const function_base *const vfrsqrt7;
 extern const function_base *const vfrec7;
+extern const function_base *const vfrec7_frm;
 extern const function_base *const vfmin;
 extern const function_base *const vfmax;
 extern const function_base *const vfsgnj;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index a821aca6a4b..34def6bb82f 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -396,6 +396,8 @@ DEF_RVV_FUNCTION (vfrsqrt7, alu, full_preds, f_v_ops)
 // 13.10. Vector Floating-Point Reciprocal Estimate Instruction
 DEF_RVV_FUNCTION (vfrec7, alu, full_preds, f_v_ops)
 
+DEF_RVV_FUNCTION (vfrec7_frm, alu_frm, full_preds, f_v_ops)
+
 // 13.11. Vector Floating-Point MIN/MAX Instructions
 DEF_RVV_FUNCTION (vfmin, alu, full_preds, f_vvv_ops)
 DEF_RVV_FUNCTION (vfmin, alu, full_preds, f_vvf_ops)
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 30808ceb241..9dd611e254b 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -1867,7 +1867,9 @@ (define_int_iterator VSAT_SHIFT_OP [UNSPEC_VSSRL 
UNSPEC_VSSRA])
 
 (define_int_iterator VMISC [UNSPEC_VMSBF UNSPEC_VMSIF UNSPEC_VMSOF])
 
-(define_int_iterator VFMISC [UNSPEC_VFRSQRT7 UNSPEC_VFREC7])
+(define_int_iterator VFMISC [UNSPEC_VFRSQRT7])
+
+(define_int_iterator VFMISC_FRM [UNSPEC_VFREC7])
 
 (define_int_iterator VFCVTS [UNSPEC_VFCVT UNSPEC_UNSIGNED_VFCVT])
 
@@ -1890,9 +1892,13 @@ (define_int_attr sat_insn_type [(UNSPEC_VAADDU "vaalu") 
(UNSPEC_VAADD "vaalu")
(UNSPEC_VNCLIPU "vnclip")])
 
 (define_int_attr misc_op [(UNSPEC_VMSBF "sbf") (UNSPEC_VMSIF

Re: [PATCH] x86: Update model values for Raptorlake.

2023-08-14 Thread Jonathan Wakely via Gcc-patches


On 14/08/23 15:19 +0100, Jonathan Wakely wrote:

On 14/08/23 04:37 +, Pan Li via Gcc-patches wrote:

Committed as obvious, and backported to GCC13.


Did you try building it on gcc-13?

   case 0x97:
   case 0x9a:
   case 0xbf:
 /* Alder Lake.  */
   case 0xb7:
   case 0xba:
   case 0xbf:
 /* Raptor Lake.  */


This fails:

In file included from /home/test/src/gcc-13/gcc/config/i386/driver-i386.cc:31:
/home/test/src/gcc-13/gcc/common/config/i386/cpuinfo.h: In function ‘const 
char* get_intel_cpu(__processor_model*, __processor_model2*, unsigned int*)’:
/home/test/src/gcc-13/gcc/common/config/i386/cpuinfo.h:543:5: error: duplicate 
case value
 543 | case 0xbf:
 | ^~~~
/home/test/src/gcc-13/gcc/common/config/i386/cpuinfo.h:539:5: note: previously 
used here
 539 | case 0xbf:
 | ^~~~

Please fix or revert.



The backported patch is not the same as the trunk one, it adds two new
cases not one. But one of them is a duplicate of one you already added
in January 2022, in 4bd5297f665fd3ba5691297c016809f3501e7fba

No matter how obvious a patch is, if it touches code (not just
comments or docs) please don't commit without even building it once.

Also, backports should typically say something in the git commit
message, e.g. using git gcc-backport (or git cherry-pick -x) will
automatically add:

(cherry picked from commit 003016a40844701c48851020df672b70f3446bdb)

to the commit message.






Lili.


Update model values for Raptorlake according to SDM.

gcc/ChangeLog

* common/config/i386/cpuinfo.h (get_intel_cpu): Add model value 0xba
to Raptorlake.
---
gcc/common/config/i386/cpuinfo.h | 1 +
1 file changed, 1 insertion(+)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index ae48bc17771..dd7f0f6abfd 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -537,6 +537,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
   case 0x9a:
 /* Alder Lake.  */
   case 0xb7:
+case 0xba:
   case 0xbf:
 /* Raptor Lake.  */
   case 0xaa:

Re: [PATCH] x86: Update model values for Raptorlake.

2023-08-14 Thread Jonathan Wakely via Gcc-patches


On 14/08/23 04:37 +, Pan Li via Gcc-patches wrote:

Committed as obvious, and backported to GCC13.


Did you try building it on gcc-13?

case 0x97:
case 0x9a:
case 0xbf:
  /* Alder Lake.  */
case 0xb7:
case 0xba:
case 0xbf:
  /* Raptor Lake.  */


This fails:

In file included from /home/test/src/gcc-13/gcc/config/i386/driver-i386.cc:31:
/home/test/src/gcc-13/gcc/common/config/i386/cpuinfo.h: In function ‘const 
char* get_intel_cpu(__processor_model*, __processor_model2*, unsigned int*)’:
/home/test/src/gcc-13/gcc/common/config/i386/cpuinfo.h:543:5: error: duplicate 
case value
  543 | case 0xbf:
  | ^~~~
/home/test/src/gcc-13/gcc/common/config/i386/cpuinfo.h:539:5: note: previously 
used here
  539 | case 0xbf:
  | ^~~~

Please fix or revert.



Lili.


Update model values for Raptorlake according to SDM.

gcc/ChangeLog

* common/config/i386/cpuinfo.h (get_intel_cpu): Add model value 0xba
to Raptorlake.
---
gcc/common/config/i386/cpuinfo.h | 1 +
1 file changed, 1 insertion(+)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index ae48bc17771..dd7f0f6abfd 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -537,6 +537,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
case 0x9a:
  /* Alder Lake.  */
case 0xb7:
+case 0xba:
case 0xbf:
  /* Raptor Lake.  */
case 0xaa:

Re: [PATCH] vect: Move VMAT_GATHER_SCATTER handlings from final loop nest

2023-08-14 Thread Richard Sandiford via Gcc-patches

"Kewen.Lin"  writes:
> Hi Richard,
>
> on 2023/8/14 20:20, Richard Sandiford wrote:
>> Thanks for the clean-ups.  But...
>> 
>> "Kewen.Lin"  writes:
>>> Hi,
>>>
>>> Following Richi's suggestion [1], this patch is to move the
>>> handlings on VMAT_GATHER_SCATTER in the final loop nest
>>> of function vectorizable_load to its own loop.  Basically
>>> it duplicates the final loop nest, clean up some useless
>>> set up code for the case of VMAT_GATHER_SCATTER, remove some
>>> unreachable code.  Also remove the corresponding handlings
>>> in the final loop nest.
>>>
>>> Bootstrapped and regtested on x86_64-redhat-linux,
>>> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
>>>
>>> [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623329.html
>>>
>>> Is it ok for trunk?
>>>
>>> BR,
>>> Kewen
>>> -
>>>
>>> gcc/ChangeLog:
>>>
>>> * tree-vect-stmts.cc (vectorizable_load): Move the handlings on
>>> VMAT_GATHER_SCATTER in the final loop nest to its own loop,
>>> and update the final nest accordingly.
>>> ---
>>>  gcc/tree-vect-stmts.cc | 361 +
>>>  1 file changed, 219 insertions(+), 142 deletions(-)
>> 
>> ...that seems like quite a lot of +s.  Is there nothing we can do to
>> avoid the cut-&-paste?
>
> Thanks for the comments!  I'm not sure if I get your question, if we
> want to move out the handlings of VMAT_GATHER_SCATTER, the new +s seem
> inevitable?  Your concern is mainly about git blame history?

No, it was more that 219-142=77, so it seems like a lot of lines
are being duplicated rather than simply being moved.  (Unlike for
VMAT_LOAD_STORE_LANES, which was even a slight LOC saving, and so
was a clear improvement.)

So I was just wondering if there was any obvious factoring-out that
could be done to reduce the duplication.

Thanks,
Richard

[PATCH][GCC] aarch64: Add support for Cortex-A720 CPU

2023-08-14 Thread Richard Ball via Gcc-patches


This patch adds support for the Cortex-A720 CPU to GCC.

No regressions on aarch64-none-elf.

Ok for master?

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (AARCH64_CORE): Add Cortex-
A720 CPU.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Document Cortex-A720 CPU.
diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 
dbac497ef3aab410eb81db185b2e9532186888bb..5369dd3dd0fe695a371261547c76f034c29b9bcd
 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -176,6 +176,8 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  
(SVE2_BITPERM, MEMTAG,
 
 AARCH64_CORE("cortex-a715",  cortexa715, cortexa57, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd4d, -1)
 
+AARCH64_CORE("cortex-a720",  cortexa720, cortexa57, V9_2A,  (SVE2_BITPERM, 
MEMTAG), neoversen2, 0x41, 0xd81, -1)
+
 AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
I8MM, BF16), neoversen2, 0x41, 0xd48, -1)
 
 AARCH64_CORE("cortex-x3",  cortexx3, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
I8MM, BF16), neoversen2, 0x41, 0xd4e, -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 
2170980dddb0d5d410a49631ad26ff2e346b39dd..12d610f0f6580096eed9cf3de8ad3239efde5e4b
 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,demeter,neoversev2"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,neoversen2,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 
2c870d3c34b587ffc721b1f18f99ecd66d4217be..62537d9d09e25f864c27534b7ac2ec467ea24789
 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20517,7 +20517,8 @@ performance of the code.  Permissible values for this 
option are:
 @samp{cortex-a75.cortex-a55}, @samp{cortex-a76.cortex-a55},
 @samp{cortex-r82}, @samp{cortex-x1}, @samp{cortex-x1c}, @samp{cortex-x2},
 @samp{cortex-x3}, @samp{cortex-a510}, @samp{cortex-a520}, @samp{cortex-a710},
-@samp{cortex-a715}, @samp{ampere1}, @samp{ampere1a}, and @samp{native}.
+@samp{cortex-a715}, @samp{cortex-a720}, @samp{ampere1}, @samp{ampere1a},
+and @samp{native}.
 
 The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53},
 @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53},

Re: [PATCH v2 0/3] RISC-V: Support ZC* extensions.

2023-08-14 Thread Kito Cheng via Gcc-patches

Pushed to the trunk, with slight updates like rename and update testcases :)


On Wed, Jun 7, 2023 at 10:28 PM Kito Cheng via Gcc-patches
 wrote:
>
> Thanks Jiawei, v2 patch set are LGTM, but I would like to defer this until
> binutils part has merged, I know you guys already implement that for a
> while, so I think it’s almost there :)
>
> Jiawei 於 2023年6月7日 週三，20:57寫道：
>
> > RISC-V Code Size Reduction(ZC*) extensions is a group of extensions
> > which define subsets of the existing C extension (Zca, Zcd, Zcf) and new
> > extensions(Zcb, Zcmp, Zcmt) which only contain 16-bit encodings.[1]
> >
> > The implementation of the RISC-V Code Size Reduction extension in GCC is
> > an important step towards making the RISC-V architecture more efficient.
> >
> > The cooperation with OpenHW group has played a crucial role in this effort,
> > with facilitating the implementation, testing and validation. Currently
> > works can also find in OpenHW group's github repo.[2]
> >
> > Thanks to Tariq Kurd, Ibrahim Abu Kharmeh for help with explain the
> > specification, and Jeremy Bennett's patient guidance throughout the whole
> > development process.a
> >
> > V2 changes:
> > Fix Kito's comments in first version, Eswin assisted in optimizing the
> > implementation of Zcmp extension:
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-May/617440.html
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-May/617442.html
> >
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-June/620869.html
> >
> >
> > [1] github.com/riscv/riscv-code-size-reduction/tree/main/Zc-specification
> >
> > [2] github.com/openhwgroup/corev-gcc
> >
> > Co-Authored by: Charlie Keaney 
> > Co-Authored by: Mary Bennett 
> > Co-Authored by: Nandni Jamnadas 
> > Co-Authored by: Sinan Lin 
> > Co-Authored by: Simon Cook 
> > Co-Authored by: Shihua Liao 
> > Co-Authored by: Yulong Shi 
> >
> >   RISC-V: Minimal support for ZC extensions.
> >   RISC-V: Enable compressible features when use ZC* extensions.
> >   RISC-V: Add ZC* test for march args being passed.
> >
> >
> > Jiawei (3):
> >   RISC-V: Minimal support for ZC* extensions.
> >   RISC-V: Enable compressible features when use ZC* extensions.
> >   RISC-V: Add ZC* test for failed march args being passed.
> >
> >  gcc/common/config/riscv/riscv-common.cc   | 38 +++
> >  gcc/config/riscv/riscv-c.cc   |  2 +-
> >  gcc/config/riscv/riscv-opts.h | 16 ++
> >  gcc/config/riscv/riscv-shorten-memrefs.cc |  3 +-
> >  gcc/config/riscv/riscv.cc | 11 ---
> >  gcc/config/riscv/riscv.h  |  2 +-
> >  gcc/config/riscv/riscv.opt|  3 ++
> >  gcc/testsuite/gcc.target/riscv/arch-22.c  |  5 +++
> >  gcc/testsuite/gcc.target/riscv/arch-23.c  |  5 +++
> >  9 files changed, 78 insertions(+), 7 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/arch-22.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/arch-23.c
> >
> > --
> > 2.25.1
> >
> >

Re: [PATCH v1] RISC-V: Support RVV VFREC7 rounding mode intrinsic API

2023-08-14 Thread Kito Cheng via Gcc-patches

> +template

You don't need a template class here since it can only be UNSPEC_VFREC7.

> +class vfrec7_frm : public function_base
> +{
> +public:
> +  bool has_rounding_mode_operand_p () const override { return true; }
> +
> +  rtx expand (function_expander ) const override
> +  {
> +return e.use_exact_insn (code_for_pred (UNSPEC, e.vector_mode ()));
> +  }
> +};
> +
> /* Implements vrsub.  */
> class vrsub : public function_base
> {
> @@ -2433,6 +2448,7 @@ static CONSTEXPR const unop vfsqrt_obj;
> static CONSTEXPR const unop_frm vfsqrt_frm_obj;
> static CONSTEXPR const float_misc vfrsqrt7_obj;
> static CONSTEXPR const float_misc vfrec7_obj;
> +static CONSTEXPR const vfrec7_frm vfrec7_frm_obj;

Then `static CONSTEXPR const vfrec7_frm vfrec7_frm_obj;` here

> static CONSTEXPR const binop vfmin_obj;
> static CONSTEXPR const binop vfmax_obj;
> static CONSTEXPR const float_misc vfsgnj_obj;

Re: [PATCH] vect: Move VMAT_LOAD_STORE_LANES handlings from final loop nest

On Mon, Aug 14, 2023 at 2:49 PM Kewen.Lin  wrote:
>
> Hi Richi,
>
> on 2023/8/14 20:04, Richard Biener wrote:
> > On Mon, Aug 14, 2023 at 10:54 AM Kewen.Lin  wrote:
> >>
> >> Hi,
> >>
> >> Following Richi's suggestion [1], this patch is to move the
> >> handlings on VMAT_LOAD_STORE_LANES in the final loop nest
> >> of function vectorizable_load to its own loop.  Basically
> >> it duplicates the final loop nest, clean up some useless
> >> set up code for the case of VMAT_LOAD_STORE_LANES, remove
> >> some unreachable code.  Also remove the corresponding
> >> handlings in the final loop nest.
> >>
> >> Bootstrapped and regtested on x86_64-redhat-linux,
> >> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
> >
> > OK (I guess the big diff is mostly because of re-indenting).
>
> Thanks!  Yes, there is some code in the original final loop nest like
>
> if (memory_access_type == VMAT_LOAD_STORE_LANES)
>   {
> ...
>   }
> else
>   {
> ...
>   }
>
> Then the else arm is fully re-indented.
>
> The other patch on VMAT_GATHER_SCATTER looks a bit better since
> it doesn't need re-indenting.

Yes, that's also because VMAT_LOAD_STORE_LANES isn't for SLP so
it even makes more sense to split that case out.

Richard.

> BR,
> Kewen
>
> >
> > Thanks,
> > Richard.
> >
> >> [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623329.html
> >>
> >> gcc/ChangeLog:
> >>
> >> * tree-vect-stmts.cc (vectorizable_load): Move the handlings on
> >> VMAT_LOAD_STORE_LANES in the final loop nest to its own loop,
> >> and update the final nest accordingly.
> >> ---
> >>  gcc/tree-vect-stmts.cc | 1275 
> >>  1 file changed, 634 insertions(+), 641 deletions(-)
> >>
> >> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> >> index 4f2d088484c..c361e16cb7b 100644
> >> --- a/gcc/tree-vect-stmts.cc
> >> +++ b/gcc/tree-vect-stmts.cc
> >> @@ -10332,7 +10332,129 @@ vectorizable_load (vec_info *vinfo,
> >> vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
> >>_masks, mask_vectype);
> >>  }
> >> +
> >>tree vec_mask = NULL_TREE;
> >> +  if (memory_access_type == VMAT_LOAD_STORE_LANES)
> >> +{
> >> +  gcc_assert (alignment_support_scheme == dr_aligned
> >> + || alignment_support_scheme == dr_unaligned_supported);
> >> +  gcc_assert (grouped_load && !slp);
> >> +
> >> +  unsigned int inside_cost = 0, prologue_cost = 0;
> >> +  for (j = 0; j < ncopies; j++)
> >> +   {
> >> + if (costing_p)
> >> +   {
> >> + /* An IFN_LOAD_LANES will load all its vector results,
> >> +regardless of which ones we actually need.  Account
> >> +for the cost of unused results.  */
> >> + if (first_stmt_info == stmt_info)
> >> +   {
> >> + unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
> >> + stmt_vec_info next_stmt_info = first_stmt_info;
> >> + do
> >> +   {
> >> + gaps -= 1;
> >> + next_stmt_info = DR_GROUP_NEXT_ELEMENT 
> >> (next_stmt_info);
> >> +   }
> >> + while (next_stmt_info);
> >> + if (gaps)
> >> +   {
> >> + if (dump_enabled_p ())
> >> +   dump_printf_loc (MSG_NOTE, vect_location,
> >> +"vect_model_load_cost: %d "
> >> +"unused vectors.\n",
> >> +gaps);
> >> + vect_get_load_cost (vinfo, stmt_info, gaps,
> >> + alignment_support_scheme,
> >> + misalignment, false, 
> >> _cost,
> >> + _cost, cost_vec, 
> >> cost_vec,
> >> + true);
> >> +   }
> >> +   }
> >> + vect_get_load_cost (vinfo, stmt_info, 1, 
> >> alignment_support_scheme,
> >> + misalignment, false, _cost,
> >> + _cost, cost_vec, cost_vec, 
> >> true);
> >> + continue;
> >> +   }
> >> +
> >> + /* 1. Create the vector or array pointer update chain.  */
> >> + if (j == 0)
> >> +   dataref_ptr
> >> + = vect_create_data_ref_ptr (vinfo, first_stmt_info, 
> >> aggr_type,
> >> + at_loop, offset, , gsi,
> >> + _incr, false, bump);
> >> + else
> >> +   {
> >> + gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
> >> + dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, 
> >> gsi,
> >> +stmt_info, bump);

Re: Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST vectorization

On Mon, 14 Aug 2023, ??? wrote:

> Thanks Kewen.
> 
> But I saw there is 2 more files include:
> 
> +#include "memmodel.h"
> +#include "optabs.h"
> 
> Not sure whether Richard and Richi ok with that change ?

Yes, please just apply some common sense.

> Thanks.
> 
> 
> 
> juzhe.zh...@rivai.ai
>  
> From: Kewen.Lin
> Date: 2023-08-14 20:45
> To: juzhe.zh...@rivai.ai
> CC: Robin Dapp; richard.sandiford; rguenther; GCC Patches
> Subject: Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST 
> vectorization
> Hi Juzhe,
>  
> on 2023/8/14 20:08, juzhe.zh...@rivai.ai wrote:
> > Hi, Kewin.
> > 
> > Could you test 'can_vec_extract_var_idx_p' and send V5 patch when you pass 
> > the testing?
>  
> The below diff was bootstrapped and regress-tested on Power10 LE.  Comparing 
> to the
> previous v4, the only changes should be the proposed 
> can_vec_extract_var_idx_p and
> its required new includes as below:
>  
> +#include "memmodel.h"
> +#include "optabs.h"
> Could you have a double check?
>  
> Since I just tested it on Power10, you have the full ownership on the patch, 
> I'd leave
> the v5 posting to you.  Thanks!
>  
> BR,
> Kewen
> -
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index bc3063c3615..5ae9f69c7eb 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -32,6 +32,8 @@ along with GCC; see the file COPYING3.  If not see
> #include "tree-pass.h"
> #include "ssa.h"
> #include "optabs-tree.h"
> +#include "memmodel.h"
> +#include "optabs.h"
> #include "diagnostic-core.h"
> #include "fold-const.h"
> #include "stor-layout.h"
> @@ -10300,17 +10302,7 @@ vectorizable_live_operation (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>/* No transformation required.  */
>if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> {
> -   if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
> -OPTIMIZE_FOR_SPEED))
> - {
> -   if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "can't operate on partial vectors "
> - "because the target doesn't support extract "
> - "last reduction.\n");
> -   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
> - }
> -   else if (slp_node)
> +   if (slp_node)
> {
>   if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -10330,9 +10322,26 @@ vectorizable_live_operation (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>   else
> {
>   gcc_assert (ncopies == 1 && !slp_node);
> -   vect_record_loop_mask (loop_vinfo,
> -  _VINFO_MASKS (loop_vinfo),
> -  1, vectype, NULL);
> +   if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
> +   OPTIMIZE_FOR_SPEED))
> + vect_record_loop_mask (loop_vinfo,
> +_VINFO_MASKS (loop_vinfo),
> +1, vectype, NULL);
> +   else if (can_vec_extract_var_idx_p (
> + TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype
> + vect_record_loop_len (loop_vinfo,
> +   _VINFO_LENS (loop_vinfo),
> +   1, vectype, 1);
> +   else
> + {
> +   if (dump_enabled_p ())
> + dump_printf_loc (
> +   MSG_MISSED_OPTIMIZATION, vect_location,
> +   "can't operate on partial vectors "
> +   "because the target doesn't support extract "
> +   "last reduction.\n");
> +   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
> + }
> }
> }
>/* ???  Enable for loop costing as well.  */
> @@ -10358,7 +10367,9 @@ vectorizable_live_operation (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>gimple *vec_stmt;
>if (slp_node)
>  {
> -  gcc_assert (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
> +  gcc_assert (!loop_vinfo
> +   || (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
> +   && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)));
>  
>/* Get the correct slp vectorized stmt.  */
>vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];
> @@ -10402,7 +10413,42 @@ vectorizable_live_operation (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>  
>gimple_seq stmts = NULL;
>tree new_tree;
> -  if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
> +  if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
> + {
> +   /* Emit:
> +
> +SCALAR_RES = VEC_EXTRACT 
> +
> +  where VEC_LHS is the vectorized live-out result and MASK is
> +  the loop mask for the final iteration.  */
> +   gcc_assert (ncopies == 1 && !slp_node);
> +   gimple_seq tem = NULL;
> +   gimple_stmt_iterator gsi = gsi_last (tem);
> +   tree len
> + = vect_get_loop_len (loop_vinfo, ,
> + _VINFO_LENS (loop_vinfo),
> + 1, vectype, 0, 0);
> +
> +   /* BIAS - 1.  */
> +   signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> +   tree bias_minus_one
> + = int_const_binop (MINUS_EXPR,
> +build_int_cst (TREE_TYPE (len), biasval),
> +build_one_cst (TREE_TYPE (len)));
> +
> +   /* LAST_INDEX = LEN + (BIAS - 1).  */
> +   tree last_index = gimple_build (,

Re: [PATCH] tree-optimization/110991 - unroll size estimate after vectorization

On Mon, 14 Aug 2023, Richard Biener wrote:

> The following testcase shows that we are bad at identifying inductions
> that will be optimized away after vectorizing them because SCEV doesn't
> handle vectorized defs.  The following rolls a simpler identification
> of SSA cycles covering a PHI and an assignment with a binary operator
> with a constant second operand.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> Note, I also have a more general approach (will reply to this mail
> with an RFC).

So the following is an RFC, it replaces constant_after_peeling
with verifying all SSA operands are constants and then folding
the stmt, recording constant outcomes for further stmts becoming
constants.

We now traverse the loop body twice - once with the optimistic
constant initial values of IVs and after the first traversal
we drop these if the backedge value turns out non-constant.

We then use the outcomes from the second traversal for the size
estimate.

Now, we could use the sizes of the first traversal somehow
if we recorded them separately.  Maybe as followup.

I've again chickened out from doing the transform-with-value-numbering
approach, stopping when we hit a stmt copy limit.  The reason is
of course it's only reasonably simple if there's no branching in the
copied body (for example if we can resolve all branches during
unrolling).  Maybe we should really try harder here ...

I'm currently re-testing this (I made it less optimistic) and having
to fixup some fortran frontend -Warray-bound diagnostics (meh) as
we now unroll sth there.

Does this look better than trying to ad-hoc match the PHI "IV"s
that SCEV doesn't handle?

Thanks,
Richard.

>From 75bc2d108ebc23d513fa49664ffc6bcdb5559495 Mon Sep 17 00:00:00 2001
From: Richard Biener 
Date: Mon, 14 Aug 2023 12:02:41 +0200
Subject: [PATCH] test unroll
To: gcc-patches@gcc.gnu.org

---
 .../gcc.dg/fstack-protector-strong.c  |   4 +-
 gcc/tree-ssa-loop-ivcanon.cc  | 157 --
 2 files changed, 112 insertions(+), 49 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/fstack-protector-strong.c 
b/gcc/testsuite/gcc.dg/fstack-protector-strong.c
index 94dc3508f1a..fafa1917449 100644
--- a/gcc/testsuite/gcc.dg/fstack-protector-strong.c
+++ b/gcc/testsuite/gcc.dg/fstack-protector-strong.c
@@ -28,7 +28,7 @@ foo1 ()
 struct ArrayStruct
 {
   int a;
-  int array[10];
+  int array[18];
 };
 
 struct AA
@@ -43,7 +43,7 @@ foo2 ()
 {
   struct AA aa;
   int i;
-  for (i = 0; i < 10; ++i)
+  for (i = 0; i < 18; ++i)
 {
   aa.as.array[i] = i * (i-1) + i / 2;
 }
diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
index 99e50ee2efe..51543e43cbc 100644
--- a/gcc/tree-ssa-loop-ivcanon.cc
+++ b/gcc/tree-ssa-loop-ivcanon.cc
@@ -158,6 +158,7 @@ struct loop_size
   int num_branches_on_hot_path;
 };
 
+#if 0
 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 
 static bool
@@ -245,6 +246,7 @@ constant_after_peeling (tree op, gimple *stmt, class loop 
*loop)
 }
   return true;
 }
+#endif
 
 /* Computes an estimated number of insns in LOOP.
EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
@@ -276,6 +278,31 @@ tree_estimate_loop_size (class loop *loop, edge exit, edge 
edge_to_cancel,
 
   if (dump_file && (dump_flags & TDF_DETAILS))
 fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
+
+  static hash_map *vals;
+  vals = new hash_map;
+  edge pe = loop_preheader_edge (loop);
+  for (auto si = gsi_start_phis (loop->header);
+   !gsi_end_p (si); gsi_next ())
+{
+  if (virtual_operand_p (gimple_phi_result (*si)))
+   continue;
+  tree val = gimple_phi_arg_def_from_edge (*si, pe);
+  if (CONSTANT_CLASS_P (val))
+   {
+ vals->put (gimple_phi_result (*si), val);
+ tree ev = analyze_scalar_evolution (loop, gimple_phi_result (*si));
+ if (!chrec_contains_undetermined (ev)
+ && !chrec_contains_symbols (ev))
+   size->constant_iv = true;
+   }
+}
+
+  auto els_valueize = [] (tree op) -> tree
+{ if (tree *val = vals->get (op)) return *val; return op; };
+
+  auto process_loop = [&] () -> bool
+{
   for (i = 0; i < loop->num_nodes; i++)
 {
   if (edge_to_cancel && body[i] != edge_to_cancel->src
@@ -322,54 +349,47 @@ tree_estimate_loop_size (class loop *loop, edge exit, 
edge edge_to_cancel,
 "in last copy.\n");
  likely_eliminated_last = true;
}
- /* Sets of IV variables  */
- if (gimple_code (stmt) == GIMPLE_ASSIGN
- && constant_after_peeling (gimple_assign_lhs (stmt), stmt, 
loop))
+ /* Stores are not eliminated.  */
+ if (gimple_vdef (stmt))
+   continue;
+ /* Below we are using constant folding to decide whether
+we can elide a stmt.  While for the first iteration we
+

[PATCH] tree-optimization/110991 - unroll size estimate after vectorization

The following testcase shows that we are bad at identifying inductions
that will be optimized away after vectorizing them because SCEV doesn't
handle vectorized defs.  The following rolls a simpler identification
of SSA cycles covering a PHI and an assignment with a binary operator
with a constant second operand.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Note, I also have a more general approach (will reply to this mail
with an RFC).

Any comments on this particular change?

PR tree-optimization/110991
* tree-ssa-loop-ivcanon.cc (constant_after_peeling): Handle
VIEW_CONVERT_EXPR , handle more simple IV-like SSA cycles
that will end up constant.

* gcc.dg/tree-ssa/cunroll-16.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c | 17 
 gcc/tree-ssa-loop-ivcanon.cc   | 46 +-
 2 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c
new file mode 100644
index 000..9bb66ff8299
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c
@@ -0,0 +1,17 @@
+/* PR/110991 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cunroll-details -fdump-tree-optimized" } */
+
+static unsigned char a;
+static signed char b;
+void foo(void);
+int main() {
+  a = 25;
+  for (; a > 13; --a)
+b = a > 127 ?: a << 3;
+  if (!b)
+foo();
+}
+
+/* { dg-final { scan-tree-dump "optimized: loop with \[0-9\]\+ iterations 
completely unrolled" "cunroll" } } */
+/* { dg-final { scan-tree-dump-not "foo" "optimized" } } */
diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
index a895e8e65be..99e50ee2efe 100644
--- a/gcc/tree-ssa-loop-ivcanon.cc
+++ b/gcc/tree-ssa-loop-ivcanon.cc
@@ -166,6 +166,11 @@ constant_after_peeling (tree op, gimple *stmt, class loop 
*loop)
   if (CONSTANT_CLASS_P (op))
 return true;
 
+  /* Get at the actual SSA operand.  */
+  if (handled_component_p (op)
+  && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
+op = TREE_OPERAND (op, 0);
+
   /* We can still fold accesses to constant arrays when index is known.  */
   if (TREE_CODE (op) != SSA_NAME)
 {
@@ -198,7 +203,46 @@ constant_after_peeling (tree op, gimple *stmt, class loop 
*loop)
   tree ev = analyze_scalar_evolution (loop, op);
   if (chrec_contains_undetermined (ev)
   || chrec_contains_symbols (ev))
-return false;
+{
+  if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
+   {
+ gassign *ass = nullptr;
+ gphi *phi = nullptr;
+ if (is_a  (SSA_NAME_DEF_STMT (op)))
+   {
+ ass = as_a  (SSA_NAME_DEF_STMT (op));
+ if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
+   phi = dyn_cast 
+   (SSA_NAME_DEF_STMT (gimple_assign_rhs1  (ass)));
+   }
+ else if (is_a  (SSA_NAME_DEF_STMT (op)))
+   {
+ phi = as_a  (SSA_NAME_DEF_STMT (op));
+ if (gimple_bb (phi) == loop->header)
+   {
+ tree def = gimple_phi_arg_def_from_edge
+   (phi, loop_latch_edge (loop));
+ if (TREE_CODE (def) == SSA_NAME
+ && is_a  (SSA_NAME_DEF_STMT (def)))
+   ass = as_a  (SSA_NAME_DEF_STMT (def));
+   }
+   }
+ if (ass && phi)
+   {
+ tree rhs1 = gimple_assign_rhs1 (ass);
+ if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
+ && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
+ && rhs1 == gimple_phi_result (phi)
+ && gimple_bb (phi) == loop->header
+ && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
+ == gimple_assign_lhs (ass))
+ && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
+(phi, loop_preheader_edge (loop)
+   return true;
+   }
+   }
+  return false;
+}
   return true;
 }
 
-- 
2.35.3

Re: Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST vectorization

2023-08-14 Thread 钟居哲

Thanks Kewen.

But I saw there is 2 more files include:

+#include "memmodel.h"
+#include "optabs.h"

Not sure whether Richard and Richi ok with that change ?

Thanks.



juzhe.zh...@rivai.ai
 
From: Kewen.Lin
Date: 2023-08-14 20:45
To: juzhe.zh...@rivai.ai
CC: Robin Dapp; richard.sandiford; rguenther; GCC Patches
Subject: Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST 
vectorization
Hi Juzhe,
 
on 2023/8/14 20:08, juzhe.zh...@rivai.ai wrote:
> Hi, Kewin.
> 
> Could you test 'can_vec_extract_var_idx_p' and send V5 patch when you pass 
> the testing?
 
The below diff was bootstrapped and regress-tested on Power10 LE.  Comparing to 
the
previous v4, the only changes should be the proposed can_vec_extract_var_idx_p 
and
its required new includes as below:
 
+#include "memmodel.h"
+#include "optabs.h"
Could you have a double check?
 
Since I just tested it on Power10, you have the full ownership on the patch, 
I'd leave
the v5 posting to you.  Thanks!
 
BR,
Kewen
-
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index bc3063c3615..5ae9f69c7eb 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -32,6 +32,8 @@ along with GCC; see the file COPYING3.  If not see
#include "tree-pass.h"
#include "ssa.h"
#include "optabs-tree.h"
+#include "memmodel.h"
+#include "optabs.h"
#include "diagnostic-core.h"
#include "fold-const.h"
#include "stor-layout.h"
@@ -10300,17 +10302,7 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
   /* No transformation required.  */
   if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
-   if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
-OPTIMIZE_FOR_SPEED))
- {
-   if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't operate on partial vectors "
- "because the target doesn't support extract "
- "last reduction.\n");
-   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
- }
-   else if (slp_node)
+   if (slp_node)
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -10330,9 +10322,26 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
  else
{
  gcc_assert (ncopies == 1 && !slp_node);
-   vect_record_loop_mask (loop_vinfo,
-  _VINFO_MASKS (loop_vinfo),
-  1, vectype, NULL);
+   if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
+   OPTIMIZE_FOR_SPEED))
+ vect_record_loop_mask (loop_vinfo,
+_VINFO_MASKS (loop_vinfo),
+1, vectype, NULL);
+   else if (can_vec_extract_var_idx_p (
+ TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype
+ vect_record_loop_len (loop_vinfo,
+   _VINFO_LENS (loop_vinfo),
+   1, vectype, 1);
+   else
+ {
+   if (dump_enabled_p ())
+ dump_printf_loc (
+   MSG_MISSED_OPTIMIZATION, vect_location,
+   "can't operate on partial vectors "
+   "because the target doesn't support extract "
+   "last reduction.\n");
+   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+ }
}
}
   /* ???  Enable for loop costing as well.  */
@@ -10358,7 +10367,9 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
   gimple *vec_stmt;
   if (slp_node)
 {
-  gcc_assert (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
+  gcc_assert (!loop_vinfo
+   || (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+   && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)));
 
   /* Get the correct slp vectorized stmt.  */
   vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];
@@ -10402,7 +10413,42 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
 
   gimple_seq stmts = NULL;
   tree new_tree;
-  if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+  if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+ {
+   /* Emit:
+
+SCALAR_RES = VEC_EXTRACT 
+
+  where VEC_LHS is the vectorized live-out result and MASK is
+  the loop mask for the final iteration.  */
+   gcc_assert (ncopies == 1 && !slp_node);
+   gimple_seq tem = NULL;
+   gimple_stmt_iterator gsi = gsi_last (tem);
+   tree len
+ = vect_get_loop_len (loop_vinfo, ,
+ _VINFO_LENS (loop_vinfo),
+ 1, vectype, 0, 0);
+
+   /* BIAS - 1.  */
+   signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+   tree bias_minus_one
+ = int_const_binop (MINUS_EXPR,
+build_int_cst (TREE_TYPE (len), biasval),
+build_one_cst (TREE_TYPE (len)));
+
+   /* LAST_INDEX = LEN + (BIAS - 1).  */
+   tree last_index = gimple_build (, PLUS_EXPR, TREE_TYPE (len),
+   len, bias_minus_one);
+
+   /* SCALAR_RES = VEC_EXTRACT .  */
+   tree scalar_res
+ = gimple_build (, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
+ vec_lhs_phi, last_index);
+
+   /* Convert the extracted vector element to the scalar type.  */
+   new_tree = gimple_convert (, lhs_type, scalar_res);
+ }
+  else if (LOOP_VINFO_FULLY_MASKED_P

Re: [PATCH v1] RISC-V: Support RVV VFREC7 rounding mode intrinsic API

2023-08-14 Thread 钟居哲

I defer this patch's review to kito since I am not sure whether vfrec7 needs 
rounding mode.



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-08-14 20:49
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support RVV VFREC7 rounding mode intrinsic API
From: Pan Li 
 
This patch would like to support the rounding mode API for the
VFREC7 as the below samples.
 
* __riscv_vfrec7_v_f32m1_rm
* __riscv_vfrec7_v_f32m1_rm_m
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv-vector-builtins-bases.cc
(class vfrec7_frm): New class for frm.
(vfrec7_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfrec7_frm): New intrinsic function definition.
* config/riscv/vector-iterators.md
(VFMISC): Remove VFREC7.
(misc_op): Ditto.
(float_insn_type): Ditto.
(VFMISC_FRM): New int iterator.
(misc_frm_op): New op for frm.
(float_frm_insn_type): New type for frm.
* config/riscv/vector.md (@pred_):
New pattern for misc frm.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/float-point-rec7.c: New test.
---
.../riscv/riscv-vector-builtins-bases.cc  | 17 ++
.../riscv/riscv-vector-builtins-bases.h   |  1 +
.../riscv/riscv-vector-builtins-functions.def |  2 ++
gcc/config/riscv/vector-iterators.md  | 12 +--
gcc/config/riscv/vector.md| 23 ++
.../riscv/rvv/base/float-point-rec7.c | 31 +++
6 files changed, 83 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-rec7.c
 
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 2074dac0f16..249ac4e68cd 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -646,6 +646,21 @@ public:
   }
};
+/* Implements below instructions for frm
+   - vfrec7
+*/
+template
+class vfrec7_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  rtx expand (function_expander ) const override
+  {
+return e.use_exact_insn (code_for_pred (UNSPEC, e.vector_mode ()));
+  }
+};
+
/* Implements vrsub.  */
class vrsub : public function_base
{
@@ -2433,6 +2448,7 @@ static CONSTEXPR const unop vfsqrt_obj;
static CONSTEXPR const unop_frm vfsqrt_frm_obj;
static CONSTEXPR const float_misc vfrsqrt7_obj;
static CONSTEXPR const float_misc vfrec7_obj;
+static CONSTEXPR const vfrec7_frm vfrec7_frm_obj;
static CONSTEXPR const binop vfmin_obj;
static CONSTEXPR const binop vfmax_obj;
static CONSTEXPR const float_misc vfsgnj_obj;
@@ -2681,6 +2697,7 @@ BASE (vfsqrt)
BASE (vfsqrt_frm)
BASE (vfrsqrt7)
BASE (vfrec7)
+BASE (vfrec7_frm)
BASE (vfmin)
BASE (vfmax)
BASE (vfsgnj)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 5c91381bd4c..2a9381eec5e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -187,6 +187,7 @@ extern const function_base *const vfsqrt;
extern const function_base *const vfsqrt_frm;
extern const function_base *const vfrsqrt7;
extern const function_base *const vfrec7;
+extern const function_base *const vfrec7_frm;
extern const function_base *const vfmin;
extern const function_base *const vfmax;
extern const function_base *const vfsgnj;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index a821aca6a4b..34def6bb82f 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -396,6 +396,8 @@ DEF_RVV_FUNCTION (vfrsqrt7, alu, full_preds, f_v_ops)
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
DEF_RVV_FUNCTION (vfrec7, alu, full_preds, f_v_ops)
+DEF_RVV_FUNCTION (vfrec7_frm, alu_frm, full_preds, f_v_ops)
+
// 13.11. Vector Floating-Point MIN/MAX Instructions
DEF_RVV_FUNCTION (vfmin, alu, full_preds, f_vvv_ops)
DEF_RVV_FUNCTION (vfmin, alu, full_preds, f_vvf_ops)
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 30808ceb241..9dd611e254b 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -1867,7 +1867,9 @@ (define_int_iterator VSAT_SHIFT_OP [UNSPEC_VSSRL 
UNSPEC_VSSRA])
(define_int_iterator VMISC [UNSPEC_VMSBF UNSPEC_VMSIF UNSPEC_VMSOF])
-(define_int_iterator VFMISC [UNSPEC_VFRSQRT7 UNSPEC_VFREC7])
+(define_int_iterator VFMISC [UNSPEC_VFRSQRT7])
+
+(define_int_iterator VFMISC_FRM [UNSPEC_VFREC7])
(define_int_iterator VFCVTS [UNSPEC_VFCVT UNSPEC_UNSIGNED_VFCVT])
@@ -1890,9 +1892,13 @@ (define_int_attr sat_insn_type [(UNSPEC_VAADDU "vaalu") 
(UNSPEC_VAADD "vaalu")
(UNSPEC_VNCLIPU "vnclip")])
(define_int_attr misc_op [(UNSPEC_VMSBF "sbf") (UNSPEC_VMSIF "sif") 
(UNSPEC_VMSOF "sof")
-

Re: [RFC] GCC Security policy

2023-08-14 Thread Siddhesh Poyarekar


Hi,

Here's the updated draft of the top part of the security policy with all 
of the recommendations incorporated.


Thanks,
Sid


What is a GCC security bug?
===

A security bug is one that threatens the security of a system or
network, or might compromise the security of data stored on it.
In the context of GCC there are multiple ways in which this might
happen and they're detailed below.

Compiler drivers, programs, libgccjit and support libraries
---

The compiler driver processes source code, invokes other programs
such as the assembler and linker and generates the output result,
which may be assembly code or machine code.  It is necessary that
all source code inputs to the compiler are trusted, since it is
impossible for the driver to validate input source code beyond
conformance to a programming language standard.

The GCC JIT implementation, libgccjit, is intended to be plugged
into applications to translate input source code in the application
context.  Limitations that apply to the compiler
driver, apply here too in terms of sanitizing inputs, so it is
recommended that inputs are either sanitized by an external program
to allow only trusted, safe execution in the context of the
application or the JIT execution context is appropriately sandboxed
to contain the effects of any bugs in the JIT or its generated code
to the sandboxed environment.

Support libraries such as libiberty, libcc1 libvtv and libcpp have
been developed separately to share code with other tools such as
binutils and gdb.  These libraries again have similar challenges to
compiler drivers.  While they are expected to be robust against
arbitrary input, they should only be used with trusted inputs.

Libraries such as zlib that bundled into GCC to build it will be
treated the same as the compiler drivers and programs as far as
security coverage is concerned.  However if you find an issue in
these libraries independent of their use in GCC, you should reach
out to their upstream projects to report them.

As a result, the only case for a potential security issue in all
these cases is when it ends up generating vulnerable output for
valid input source code.

As a result, the only case for a potential security issue in the
compiler is when it generates vulnerable application code for
trusted input source code that is conforming to the relevant
programming standard or extensions documented as supported by GCC
and the algorithm expressed in the source code does not have the
vulnerability.  The output application code could be considered
vulnerable if it produces an actual vulnerability in the target
application, specifically in the following cases:

- The application dereferences an invalid memory location despite
  the application sources being valid.
- The application reads from or writes to a valid but incorrect
  memory location, resulting in an information integrity issue or an
  information leak.
- The application ends up running in an infinite loop or with
  severe degradation in performance despite the input sources having
  no such issue, resulting in a Denial of Service.  Note that
  correct but non-performant code is not a security issue candidate,
  this only applies to incorrect code that may result in performance
  degradation severe enough to amount to a denial of service.
- The application crashes due to the generated incorrect code,
  resulting in a Denial of Service.

Language runtime libraries
--

GCC also builds and distributes libraries that are intended to be
used widely to implement runtime support for various programming
languages.  These include the following:

* libada
* libatomic
* libbacktrace
* libcc1
* libcody
* libcpp
* libdecnumber
* libffi
* libgcc
* libgfortran
* libgm2
* libgo
* libgomp
* libiberty
* libitm
* libobjc
* libphobos
* libquadmath
* libsanitizer
* libssp
* libstdc++

These libraries are intended to be used in arbitrary contexts and as
a result, bugs in these libraries may be evaluated for security
impact.  However, some of these libraries, e.g. libgo, libphobos,
etc.  are not maintained in the GCC project, due to which the GCC
project may not be the correct point of contact for them.  You are
encouraged to look at README files within those library directories
to locate the canonical security contact point for those projects
and include them in the report.  Once the issue is fixed in the
upstream project, the fix will be synced into GCC in a future
release.

Most security vulnerabilities in these runtime libraries arise when
an application

Re: [PATCH] vect: Move VMAT_GATHER_SCATTER handlings from final loop nest

Hi Richard,

on 2023/8/14 20:20, Richard Sandiford wrote:
> Thanks for the clean-ups.  But...
> 
> "Kewen.Lin"  writes:
>> Hi,
>>
>> Following Richi's suggestion [1], this patch is to move the
>> handlings on VMAT_GATHER_SCATTER in the final loop nest
>> of function vectorizable_load to its own loop.  Basically
>> it duplicates the final loop nest, clean up some useless
>> set up code for the case of VMAT_GATHER_SCATTER, remove some
>> unreachable code.  Also remove the corresponding handlings
>> in the final loop nest.
>>
>> Bootstrapped and regtested on x86_64-redhat-linux,
>> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
>>
>> [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623329.html
>>
>> Is it ok for trunk?
>>
>> BR,
>> Kewen
>> -
>>
>> gcc/ChangeLog:
>>
>>  * tree-vect-stmts.cc (vectorizable_load): Move the handlings on
>>  VMAT_GATHER_SCATTER in the final loop nest to its own loop,
>>  and update the final nest accordingly.
>> ---
>>  gcc/tree-vect-stmts.cc | 361 +
>>  1 file changed, 219 insertions(+), 142 deletions(-)
> 
> ...that seems like quite a lot of +s.  Is there nothing we can do to
> avoid the cut-&-paste?

Thanks for the comments!  I'm not sure if I get your question, if we
want to move out the handlings of VMAT_GATHER_SCATTER, the new +s seem
inevitable?  Your concern is mainly about git blame history?

BR,
Kewen

> 
> Richard
> 
>>
>> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
>> index c361e16cb7b..5e514eca19b 100644
>> --- a/gcc/tree-vect-stmts.cc
>> +++ b/gcc/tree-vect-stmts.cc
>> @@ -10455,6 +10455,218 @@ vectorizable_load (vec_info *vinfo,
>>return true;
>>  }
>>
>> +  if (memory_access_type == VMAT_GATHER_SCATTER)
>> +{
>> +  gcc_assert (alignment_support_scheme == dr_aligned
>> +  || alignment_support_scheme == dr_unaligned_supported);
>> +  gcc_assert (!grouped_load && !slp_perm);
>> +
>> +  unsigned int inside_cost = 0, prologue_cost = 0;
>> +  for (j = 0; j < ncopies; j++)
>> +{
>> +  /* 1. Create the vector or array pointer update chain.  */
>> +  if (j == 0 && !costing_p)
>> +{
>> +  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>> +vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
>> + slp_node, _info, _ptr,
>> + _offsets);
>> +  else
>> +dataref_ptr
>> +  = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
>> +  at_loop, offset, , gsi,
>> +  _incr, false, bump);
>> +}
>> +  else if (!costing_p)
>> +{
>> +  gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
>> +  if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>> +dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
>> +   gsi, stmt_info, bump);
>> +}
>> +
>> +  if (mask && !costing_p)
>> +vec_mask = vec_masks[j];
>> +
>> +  gimple *new_stmt = NULL;
>> +  for (i = 0; i < vec_num; i++)
>> +{
>> +  tree final_mask = NULL_TREE;
>> +  tree final_len = NULL_TREE;
>> +  tree bias = NULL_TREE;
>> +  if (!costing_p)
>> +{
>> +  if (loop_masks)
>> +final_mask
>> +  = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
>> +vec_num * ncopies, vectype,
>> +vec_num * j + i);
>> +  if (vec_mask)
>> +final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
>> +   final_mask, vec_mask, gsi);
>> +
>> +  if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>> +dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
>> +   gsi, stmt_info, bump);
>> +}
>> +
>> +  /* 2. Create the vector-load in the loop.  */
>> +  unsigned HOST_WIDE_INT align;
>> +  if (gs_info.ifn != IFN_LAST)
>> +{
>> +  if (costing_p)
>> +{
>> +  unsigned int cnunits = vect_nunits_for_cost (vectype);
>> +  inside_cost
>> += record_stmt_cost (cost_vec, cnunits, scalar_load,
>> +stmt_info, 0, vect_body);
>> +  continue;
>> +}
>> +  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>> +vec_offset = vec_offsets[vec_num * j + i];
>> +  tree zero = build_zero_cst (vectype);
>> +  tree scale = size_int (gs_info.scale);
>> +
>> +  if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
>> +{
>> +

Re: [RFC] [v2] Extend fold_vec_perm to handle VLA vectors

2023-08-14 Thread Richard Sandiford via Gcc-patches

Prathamesh Kulkarni  writes:
> On Thu, 10 Aug 2023 at 21:27, Richard Sandiford
>  wrote:
>>
>> Prathamesh Kulkarni  writes:
>> >> static bool
>> >> is_simple_vla_size (poly_uint64 size)
>> >> {
>> >>   if (size.is_constant ())
>> >> return false;
>> >>   for (int i = 1; i < ARRAY_SIZE (size.coeffs); ++i)
>> >> if (size[i] != (i <= 1 ? size[0] : 0))
>> > Just wondering is this should be (i == 1 ? size[0] : 0) since i is
>> > initialized to 1 ?
>>
>> Both work.  I prefer <= 1 because it doesn't depend on the micro
>> optimisation to start at coefficient 1.  In a theoretical 3-indeterminate
>> poly_int, we want the first 2 coefficients to be nonzero and the rest to
>> be zero.
>>
>> > IIUC, is_simple_vla_size should return true for polynomials of first
>> > degree and having same coeff like 4 + 4x ?
>>
>> FWIW, poly_int only supports first-degree polynomials at the moment.
>> coeffs>2 means there is more than one indeterminate, rather than a
>> higher power.
> Oh OK, thanks for the clarification.
>>
>> >>   return false;
>> >>   return true;
>> >> }
>> >>
>> >>
>> >>   FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
>> >> {
>> >>   auto nunits = GET_MODE_NUNITS (mode);
>> >>   if (!is_simple_vla_size (nunits))
>> >> continue;
>> >>   if (nunits[0] ...)
>> >> test_... (mode);
>> >>   ...
>> >>
>> >> }
>> >>
>> >> test_vnx4si_v4si and test_v4si_vnx4si look good.  But with the
>> >> loop structure above, I think we can apply the test_vnx4si and
>> >> test_vnx16qi to more cases.  So the classification isn't the
>> >> exact number of elements, but instead a limit.
>> >>
>> >> I think the nunits[0] conditions for test_vnx4si are as follows
>> >> (inspection only, so could be wrong):
>> >>
>> >> > +/* Test cases where result and input vectors are VNx4SI  */
>> >> > +
>> >> > +static void
>> >> > +test_vnx4si (machine_mode vmode)
>> >> > +{
>> >> > +  /* Case 1: mask = {0, ...} */
>> >> > +  {
>> >> > +tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1);
>> >> > +tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1);
>> >> > +poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>> >> > +
>> >> > +vec_perm_builder builder (len, 1, 1);
>> >> > +builder.quick_push (0);
>> >> > +vec_perm_indices sel (builder, 2, len);
>> >> > +tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
>> >> > +
>> >> > +tree expected_res[] = { vector_cst_elt (res, 0) };
>> > This should be { vector_cst_elt (arg0, 0) }; will fix in next patch.
>> >> > +validate_res (1, 1, res, expected_res);
>> >> > +  }
>> >>
>> >> nunits[0] >= 2 (could be all nunits if the inputs had 
>> >> nelts_per_pattern==1,
>> >> which I think would be better)
>> > IIUC, the vectors that can be used for a particular test should have
>> > nunits[0] >= res_npatterns,
>> > where res_npatterns is as computed in fold_vec_perm_cst without the
>> > canonicalization ?
>> > For above test -- res_npatterns = max(2, max (2, 1)) == 2, so we
>> > require nunits[0] >= 2 ?
>> > Which implies we can use above test for vectors with length 2 + 2x, 4 + 
>> > 4x, etc.
>>
>> Right, that's what I meant.  With the inputs as they stand it has to be
>> nunits[0] >= 2.  We need that form the inputs correctly.  But if the
>> inputs instead had nelts_per_pattern == 1, the test would work for all
>> nunits.
> In the attached patch, I have reordered the tests based on min or max limit.
> For tests where sel_npatterns < 3 (ie dup sequence), I have kept input
> npatterns = 1,
> so we can test more vector modes, and also input npatterns matter only
> for stepped sequence in sel
> (Since for a dup pattern we don't enforce the constraint of selecting
> elements from same input pattern).
> Does it look OK ?
>
> For the following tests with input vectors having shape (1, 3)
> sel = {0, 1, 2, ...}  // (1, 3)
> res = { arg0[0], arg0[1], arg0[2], ... } // (1, 3)
>
> and sel = {len, len + 1, len + 2, ... }  // (1, 3)
> res = { arg1[0], arg1[1], arg1[2], ... } // (1, 3)
>
> Altho res_npatterns = 1, I suppose these will need to be tested with
> vectors with length >= 4 + 4x,
> since index 2 can be ambiguous for length 2 + 2x  ?
> (In the patch, these are cases 2 and 3 in test_nunits_min_4)

Ah, yeah, fair point.  I guess that means:

+  /* Case 3: mask = {len, 0, 1, ...} // (1, 3)
+Test that stepped sequence of the pattern selects from arg0.
+res = { arg1[0], arg0[0], arg0[1], ... } // (1, 3)  */
+  {
+   tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
+   tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
+   poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
+
+   vec_perm_builder builder (len, 1, 3);
+   poly_uint64 mask_elems[] = { len, 0, 1 };
+   builder_push_elems (builder, mask_elems);
+
+   vec_perm_indices sel (builder, 2, len);
+   tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
+
+   tree expected_res[] = { ARG1(0),

RE: [2/2] RISC-V: Constant FP Optimization with 'Zfa'

2023-08-14 Thread Jin Ma via Gcc-patches

Hi Tsukasa,
  What a coincidence, I also implemented zfa extension, which also includes fli 
related instructions :)

links: https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627294.html

> +  if (!TARGET_HARD_FLOAT || !TARGET_ZFA)
> +return result;
> +  switch (GET_MODE (x))
> +{
> +case HFmode:
> +  /* Not only 'Zfhmin', either 'Zfh' or 'Zvfh' is required.  */
> +  if (!TARGET_ZFH && !TARGET_ZVFH)

When Zvfh means that zfh is also on, so there may be no need to judge
the TARGET_ZVFH here. By the way,the format here seems wrong, maybe 'tab'
is needed for alignment?

> + return result;
> +  break;
> +case SFmode: break;
> +case DFmode: break;

Maybe we still have to judge TARGET_DOUBLE_FLOAT?

> +default: return result;
> +}
> +
> +  if (!CONST_DOUBLE_P (x))
> +return result;

I think it might be better to judge whether x satisfies the CONST_DOUBLE_P
before switch (GET_MODE (x)) above.

> +
> +  r = *CONST_DOUBLE_REAL_VALUE (x);
> +
> +  if (REAL_VALUE_ISNAN (r))
> +{
> +  long reprs[2] = { 0 };
> +  /* Compare with canonical NaN.  */
> +  switch (GET_MODE (x))
> + {
> + case HFmode:
> +   reprs[0] = real_to_target (NULL, ,
> +  float_mode_for_size (16).require ());
> +   /* 0x7e00: Canonical NaN for binary16.  */
> +   if (reprs[0] != 0x7e00)
> + return result;
> +   break;
> + case SFmode:
> +   reprs[0] = real_to_target (NULL, ,
> +  float_mode_for_size (32).require ());
> +   /* 0x7fc0: Canonical NaN for binary32.  */
> +   if (reprs[0] != 0x7fc0)
> + return result;
> +   break;
> + case DFmode:
> +   real_to_target (reprs, , float_mode_for_size (64).require ());
> +   if (FLOAT_WORDS_BIG_ENDIAN)
> + std::swap (reprs[0], reprs[1]);
> +   /* 0x7ff8_: Canonical NaN for binary64.  */
> +   if (reprs[0] != 0 || reprs[1] != 0x7ff8)
> + return result;
> +   break;
> + default:
> +   gcc_unreachable ();
> + }
> +  result.type = RISCV_FLOAT_CONST_NAN;
> +  result.valid = true;
> +  return result;
> +}
> +  else if (REAL_VALUE_ISINF (r))
> +{
> +  if (REAL_VALUE_NEGATIVE (r))
> + return result;
> +  result.type = RISCV_FLOAT_CONST_INF;
> +  result.valid = true;
> +  return result;
> +}
> +
> +  bool sign = REAL_VALUE_NEGATIVE (r);
> +  result.sign = sign;
> +
> +  r = real_value_abs ();
> +  /* GCC internally does not use IEEE754-like encoding (where normalized
> + significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.cc).
> + So, this exponent_p1 variable equals IEEE754 unbiased exponent + 1.  */
> +  int exponent_p1 = REAL_EXP ();
> +
> +  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
> + highest (sign) bit, with a fixed binary point at bit point_pos.
> + m1 holds the low part of the mantissa, m2 the high part.
> + WARNING: If we ever have a representation using more than 2 * H_W_I - 1
> + bits for the mantissa, this can fail (low bits will be lost).  */
> +  bool fail = false;
> +  real_ldexp (, , (2 * HOST_BITS_PER_WIDE_INT - 1) - exponent_p1);
> +  wide_int w = real_to_integer (, , HOST_BITS_PER_WIDE_INT * 2);
> +  if (fail)
> +return result;
> +
> +  /* If the low part of the mantissa has bits set we cannot represent
> + the value.  */
> +  if (w.ulow () != 0)
> +return result;
> +  /* We have rejected the lower HOST_WIDE_INT, so update our
> + understanding of how many bits lie in the mantissa and
> + look only at the high HOST_WIDE_INT.  */
> +  unsigned HOST_WIDE_INT mantissa = w.elt (1);
> +
> +  /* We cannot represent the value 0.0.  */
> +  if (mantissa == 0)
> +return result;
> +
> +  /* We can only represent values with a mantissa of the form 1.xx.  */
> +  unsigned HOST_WIDE_INT mask
> +  = ((unsigned HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 4)) - 1;
> +  if ((mantissa & mask) != 0)
> +return result;
> +  mantissa >>= HOST_BITS_PER_WIDE_INT - 4;
> +  /* Now the lowest 3-bits of mantissa should form (1.xx)b.  */
> +  gcc_assert (mantissa & (1u << 2));
> +  /* Mask out the highest bit.  */
> +  mantissa &= ~(1u << 2);
> +
> +  if (mantissa == 0)
> +{
> +  /* We cannot represent any values but -1.0.  */
> +  if (exponent_p1 != 1 && sign)
> + return result;
> +  switch (exponent_p1)
> + {
> + case -15: /* 1.0 * 2^(-16)  */
> + case -14: /* 1.0 * 2^(-15)  */
> + case -7:  /* 1.0 * 2^(- 8)  */
> + case -6:  /* 1.0 * 2^(- 7)  */
> + case 8:   /* 1.0 * 2^(+ 7)  */
> + case 9:   /* 1.0 * 2^(+ 8)  */
> + case 16:  /* 1.0 * 2^(+15)  */
> + case 17:  /* 1.0 * 2^(+16)  */
> +   break;
> + default:
> +   if (exponent_p1 >= -3 && exponent_p1 <= 5)
> + /* 1.0 * 2^[-4,4]  */
> + break;
> +   switch (GET_MODE (x))
> +

Re: [PATCH] vect: Move VMAT_LOAD_STORE_LANES handlings from final loop nest

Hi Richi,

on 2023/8/14 20:04, Richard Biener wrote:
> On Mon, Aug 14, 2023 at 10:54 AM Kewen.Lin  wrote:
>>
>> Hi,
>>
>> Following Richi's suggestion [1], this patch is to move the
>> handlings on VMAT_LOAD_STORE_LANES in the final loop nest
>> of function vectorizable_load to its own loop.  Basically
>> it duplicates the final loop nest, clean up some useless
>> set up code for the case of VMAT_LOAD_STORE_LANES, remove
>> some unreachable code.  Also remove the corresponding
>> handlings in the final loop nest.
>>
>> Bootstrapped and regtested on x86_64-redhat-linux,
>> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
> 
> OK (I guess the big diff is mostly because of re-indenting).

Thanks!  Yes, there is some code in the original final loop nest like

if (memory_access_type == VMAT_LOAD_STORE_LANES)
  {
...
  }
else
  {
...
  }

Then the else arm is fully re-indented.

The other patch on VMAT_GATHER_SCATTER looks a bit better since
it doesn't need re-indenting.

BR,
Kewen

> 
> Thanks,
> Richard.
> 
>> [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623329.html
>>
>> gcc/ChangeLog:
>>
>> * tree-vect-stmts.cc (vectorizable_load): Move the handlings on
>> VMAT_LOAD_STORE_LANES in the final loop nest to its own loop,
>> and update the final nest accordingly.
>> ---
>>  gcc/tree-vect-stmts.cc | 1275 
>>  1 file changed, 634 insertions(+), 641 deletions(-)
>>
>> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
>> index 4f2d088484c..c361e16cb7b 100644
>> --- a/gcc/tree-vect-stmts.cc
>> +++ b/gcc/tree-vect-stmts.cc
>> @@ -10332,7 +10332,129 @@ vectorizable_load (vec_info *vinfo,
>> vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
>>_masks, mask_vectype);
>>  }
>> +
>>tree vec_mask = NULL_TREE;
>> +  if (memory_access_type == VMAT_LOAD_STORE_LANES)
>> +{
>> +  gcc_assert (alignment_support_scheme == dr_aligned
>> + || alignment_support_scheme == dr_unaligned_supported);
>> +  gcc_assert (grouped_load && !slp);
>> +
>> +  unsigned int inside_cost = 0, prologue_cost = 0;
>> +  for (j = 0; j < ncopies; j++)
>> +   {
>> + if (costing_p)
>> +   {
>> + /* An IFN_LOAD_LANES will load all its vector results,
>> +regardless of which ones we actually need.  Account
>> +for the cost of unused results.  */
>> + if (first_stmt_info == stmt_info)
>> +   {
>> + unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
>> + stmt_vec_info next_stmt_info = first_stmt_info;
>> + do
>> +   {
>> + gaps -= 1;
>> + next_stmt_info = DR_GROUP_NEXT_ELEMENT 
>> (next_stmt_info);
>> +   }
>> + while (next_stmt_info);
>> + if (gaps)
>> +   {
>> + if (dump_enabled_p ())
>> +   dump_printf_loc (MSG_NOTE, vect_location,
>> +"vect_model_load_cost: %d "
>> +"unused vectors.\n",
>> +gaps);
>> + vect_get_load_cost (vinfo, stmt_info, gaps,
>> + alignment_support_scheme,
>> + misalignment, false, _cost,
>> + _cost, cost_vec, cost_vec,
>> + true);
>> +   }
>> +   }
>> + vect_get_load_cost (vinfo, stmt_info, 1, 
>> alignment_support_scheme,
>> + misalignment, false, _cost,
>> + _cost, cost_vec, cost_vec, true);
>> + continue;
>> +   }
>> +
>> + /* 1. Create the vector or array pointer update chain.  */
>> + if (j == 0)
>> +   dataref_ptr
>> + = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
>> + at_loop, offset, , gsi,
>> + _incr, false, bump);
>> + else
>> +   {
>> + gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
>> + dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, 
>> gsi,
>> +stmt_info, bump);
>> +   }
>> + if (mask)
>> +   vec_mask = vec_masks[j];
>> +
>> + tree vec_array = create_vector_array (vectype, vec_num);
>> +
>> + tree final_mask = NULL_TREE;
>> + if (loop_masks)
>> +   final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
>> +ncopies, vectype, j);
>> + if (vec_mask)
>> +   final_mask =

[PATCH v1] RISC-V: Support RVV VFREC7 rounding mode intrinsic API

From: Pan Li 

This patch would like to support the rounding mode API for the
VFREC7 as the below samples.

* __riscv_vfrec7_v_f32m1_rm
* __riscv_vfrec7_v_f32m1_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfrec7_frm): New class for frm.
(vfrec7_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfrec7_frm): New intrinsic function definition.
* config/riscv/vector-iterators.md
(VFMISC): Remove VFREC7.
(misc_op): Ditto.
(float_insn_type): Ditto.
(VFMISC_FRM): New int iterator.
(misc_frm_op): New op for frm.
(float_frm_insn_type): New type for frm.
* config/riscv/vector.md (@pred_):
New pattern for misc frm.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-rec7.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  | 17 ++
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  2 ++
 gcc/config/riscv/vector-iterators.md  | 12 +--
 gcc/config/riscv/vector.md| 23 ++
 .../riscv/rvv/base/float-point-rec7.c | 31 +++
 6 files changed, 83 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-rec7.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 2074dac0f16..249ac4e68cd 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -646,6 +646,21 @@ public:
   }
 };
 
+/* Implements below instructions for frm
+   - vfrec7
+*/
+template
+class vfrec7_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  rtx expand (function_expander ) const override
+  {
+return e.use_exact_insn (code_for_pred (UNSPEC, e.vector_mode ()));
+  }
+};
+
 /* Implements vrsub.  */
 class vrsub : public function_base
 {
@@ -2433,6 +2448,7 @@ static CONSTEXPR const unop vfsqrt_obj;
 static CONSTEXPR const unop_frm vfsqrt_frm_obj;
 static CONSTEXPR const float_misc vfrsqrt7_obj;
 static CONSTEXPR const float_misc vfrec7_obj;
+static CONSTEXPR const vfrec7_frm vfrec7_frm_obj;
 static CONSTEXPR const binop vfmin_obj;
 static CONSTEXPR const binop vfmax_obj;
 static CONSTEXPR const float_misc vfsgnj_obj;
@@ -2681,6 +2697,7 @@ BASE (vfsqrt)
 BASE (vfsqrt_frm)
 BASE (vfrsqrt7)
 BASE (vfrec7)
+BASE (vfrec7_frm)
 BASE (vfmin)
 BASE (vfmax)
 BASE (vfsgnj)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 5c91381bd4c..2a9381eec5e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -187,6 +187,7 @@ extern const function_base *const vfsqrt;
 extern const function_base *const vfsqrt_frm;
 extern const function_base *const vfrsqrt7;
 extern const function_base *const vfrec7;
+extern const function_base *const vfrec7_frm;
 extern const function_base *const vfmin;
 extern const function_base *const vfmax;
 extern const function_base *const vfsgnj;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index a821aca6a4b..34def6bb82f 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -396,6 +396,8 @@ DEF_RVV_FUNCTION (vfrsqrt7, alu, full_preds, f_v_ops)
 // 13.10. Vector Floating-Point Reciprocal Estimate Instruction
 DEF_RVV_FUNCTION (vfrec7, alu, full_preds, f_v_ops)
 
+DEF_RVV_FUNCTION (vfrec7_frm, alu_frm, full_preds, f_v_ops)
+
 // 13.11. Vector Floating-Point MIN/MAX Instructions
 DEF_RVV_FUNCTION (vfmin, alu, full_preds, f_vvv_ops)
 DEF_RVV_FUNCTION (vfmin, alu, full_preds, f_vvf_ops)
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 30808ceb241..9dd611e254b 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -1867,7 +1867,9 @@ (define_int_iterator VSAT_SHIFT_OP [UNSPEC_VSSRL 
UNSPEC_VSSRA])
 
 (define_int_iterator VMISC [UNSPEC_VMSBF UNSPEC_VMSIF UNSPEC_VMSOF])
 
-(define_int_iterator VFMISC [UNSPEC_VFRSQRT7 UNSPEC_VFREC7])
+(define_int_iterator VFMISC [UNSPEC_VFRSQRT7])
+
+(define_int_iterator VFMISC_FRM [UNSPEC_VFREC7])
 
 (define_int_iterator VFCVTS [UNSPEC_VFCVT UNSPEC_UNSIGNED_VFCVT])
 
@@ -1890,9 +1892,13 @@ (define_int_attr sat_insn_type [(UNSPEC_VAADDU "vaalu") 
(UNSPEC_VAADD "vaalu")
(UNSPEC_VNCLIPU "vnclip")])
 
 (define_int_attr misc_op [(UNSPEC_VMSBF "sbf") (UNSPEC_VMSIF "sif") 
(UNSPEC_VMSOF "sof")
- (UNSPEC_VFRSQRT7 "rsqrt7") (UNSPEC_VFREC7 "rec7")])
+

Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST vectorization

Hi Juzhe,

on 2023/8/14 20:08, juzhe.zh...@rivai.ai wrote:
> Hi, Kewin.
> 
> Could you test 'can_vec_extract_var_idx_p' and send V5 patch when you pass 
> the testing?

The below diff was bootstrapped and regress-tested on Power10 LE.  Comparing to 
the
previous v4, the only changes should be the proposed can_vec_extract_var_idx_p 
and
its required new includes as below:

+#include "memmodel.h"
+#include "optabs.h"
 
Could you have a double check?

Since I just tested it on Power10, you have the full ownership on the patch, 
I'd leave
the v5 posting to you.  Thanks!

BR,
Kewen
-
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index bc3063c3615..5ae9f69c7eb 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -32,6 +32,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "ssa.h"
 #include "optabs-tree.h"
+#include "memmodel.h"
+#include "optabs.h"
 #include "diagnostic-core.h"
 #include "fold-const.h"
 #include "stor-layout.h"
@@ -10300,17 +10302,7 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
   /* No transformation required.  */
   if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
- if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
-  OPTIMIZE_FOR_SPEED))
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"can't operate on partial vectors "
-"because the target doesn't support extract "
-"last reduction.\n");
- LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
-   }
- else if (slp_node)
+ if (slp_node)
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -10330,9 +10322,26 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
  else
{
  gcc_assert (ncopies == 1 && !slp_node);
- vect_record_loop_mask (loop_vinfo,
-_VINFO_MASKS (loop_vinfo),
-1, vectype, NULL);
+ if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
+ OPTIMIZE_FOR_SPEED))
+   vect_record_loop_mask (loop_vinfo,
+  _VINFO_MASKS (loop_vinfo),
+  1, vectype, NULL);
+ else if (can_vec_extract_var_idx_p (
+TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype
+   vect_record_loop_len (loop_vinfo,
+ _VINFO_LENS (loop_vinfo),
+ 1, vectype, 1);
+ else
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (
+ MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't operate on partial vectors "
+ "because the target doesn't support extract "
+ "last reduction.\n");
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+   }
}
}
   /* ???  Enable for loop costing as well.  */
@@ -10358,7 +10367,9 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
   gimple *vec_stmt;
   if (slp_node)
 {
-  gcc_assert (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
+  gcc_assert (!loop_vinfo
+ || (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)));

   /* Get the correct slp vectorized stmt.  */
   vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];
@@ -10402,7 +10413,42 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,

   gimple_seq stmts = NULL;
   tree new_tree;
-  if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+  if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+   {
+ /* Emit:
+
+  SCALAR_RES = VEC_EXTRACT 
+
+where VEC_LHS is the vectorized live-out result and MASK is
+the loop mask for the final iteration.  */
+ gcc_assert (ncopies == 1 && !slp_node);
+ gimple_seq tem = NULL;
+ gimple_stmt_iterator gsi = gsi_last (tem);
+ tree len
+   = vect_get_loop_len (loop_vinfo, ,
+_VINFO_LENS (loop_vinfo),
+1, vectype, 0, 0);
+
+ /* BIAS - 1.  */
+ signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ tree bias_minus_one
+   = int_const_binop (MINUS_EXPR,
+  build_int_cst (TREE_TYPE (len), biasval),
+

Re: [PATCH] vect: Move VMAT_GATHER_SCATTER handlings from final loop nest

2023-08-14 Thread Richard Sandiford via Gcc-patches

Thanks for the clean-ups.  But...

"Kewen.Lin"  writes:
> Hi,
>
> Following Richi's suggestion [1], this patch is to move the
> handlings on VMAT_GATHER_SCATTER in the final loop nest
> of function vectorizable_load to its own loop.  Basically
> it duplicates the final loop nest, clean up some useless
> set up code for the case of VMAT_GATHER_SCATTER, remove some
> unreachable code.  Also remove the corresponding handlings
> in the final loop nest.
>
> Bootstrapped and regtested on x86_64-redhat-linux,
> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
>
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623329.html
>
> Is it ok for trunk?
>
> BR,
> Kewen
> -
>
> gcc/ChangeLog:
>
>   * tree-vect-stmts.cc (vectorizable_load): Move the handlings on
>   VMAT_GATHER_SCATTER in the final loop nest to its own loop,
>   and update the final nest accordingly.
> ---
>  gcc/tree-vect-stmts.cc | 361 +
>  1 file changed, 219 insertions(+), 142 deletions(-)

...that seems like quite a lot of +s.  Is there nothing we can do to
avoid the cut-&-paste?

Richard

>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index c361e16cb7b..5e514eca19b 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -10455,6 +10455,218 @@ vectorizable_load (vec_info *vinfo,
>return true;
>  }
>
> +  if (memory_access_type == VMAT_GATHER_SCATTER)
> +{
> +  gcc_assert (alignment_support_scheme == dr_aligned
> +   || alignment_support_scheme == dr_unaligned_supported);
> +  gcc_assert (!grouped_load && !slp_perm);
> +
> +  unsigned int inside_cost = 0, prologue_cost = 0;
> +  for (j = 0; j < ncopies; j++)
> + {
> +   /* 1. Create the vector or array pointer update chain.  */
> +   if (j == 0 && !costing_p)
> + {
> +   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> + vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
> +  slp_node, _info, _ptr,
> +  _offsets);
> +   else
> + dataref_ptr
> +   = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
> +   at_loop, offset, , gsi,
> +   _incr, false, bump);
> + }
> +   else if (!costing_p)
> + {
> +   gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
> +   if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> + dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
> +gsi, stmt_info, bump);
> + }
> +
> +   if (mask && !costing_p)
> + vec_mask = vec_masks[j];
> +
> +   gimple *new_stmt = NULL;
> +   for (i = 0; i < vec_num; i++)
> + {
> +   tree final_mask = NULL_TREE;
> +   tree final_len = NULL_TREE;
> +   tree bias = NULL_TREE;
> +   if (!costing_p)
> + {
> +   if (loop_masks)
> + final_mask
> +   = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
> + vec_num * ncopies, vectype,
> + vec_num * j + i);
> +   if (vec_mask)
> + final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
> +final_mask, vec_mask, gsi);
> +
> +   if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> + dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
> +gsi, stmt_info, bump);
> + }
> +
> +   /* 2. Create the vector-load in the loop.  */
> +   unsigned HOST_WIDE_INT align;
> +   if (gs_info.ifn != IFN_LAST)
> + {
> +   if (costing_p)
> + {
> +   unsigned int cnunits = vect_nunits_for_cost (vectype);
> +   inside_cost
> + = record_stmt_cost (cost_vec, cnunits, scalar_load,
> + stmt_info, 0, vect_body);
> +   continue;
> + }
> +   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> + vec_offset = vec_offsets[vec_num * j + i];
> +   tree zero = build_zero_cst (vectype);
> +   tree scale = size_int (gs_info.scale);
> +
> +   if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
> + {
> +   if (loop_lens)
> + final_len
> +   = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> +vec_num * ncopies, vectype,
> +vec_num * j + i, 1);
> +   else
> + final_len
>

[PATCH] RISC-V: Support MASK_LEN_{LOAD_LANES,STORE_LANES}

2023-08-14 Thread Juzhe-Zhong

This patch is depending on middle-end support:
https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627305.html

This patch allow us auto-vectorize this following case:

#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
  void __attribute__ ((noinline, noclone)) \
  NAME##_8 (OUTTYPE *__restrict dest, INTYPE *__restrict src,  \
MASKTYPE *__restrict cond, intptr_t n) \
  {\
for (intptr_t i = 0; i < n; ++i)   \
  if (cond[i]) \
dest[i] = (src[i * 8] + src[i * 8 + 1] + src[i * 8 + 2]\
   + src[i * 8 + 3] + src[i * 8 + 4] + src[i * 8 + 5]  \
   + src[i * 8 + 6] + src[i * 8 + 7]); \
  }

#define TEST2(NAME, OUTTYPE, INTYPE)   \
  TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, int32_t)  
 \

#define TEST1(NAME, OUTTYPE)   \
  TEST2 (NAME##_i32, OUTTYPE, int32_t) \

#define TEST(NAME) \
  TEST1 (NAME##_i32, int32_t)  \

TEST (test)

ASM:

test_i32_i32_f32_8:
ble a3,zero,.L5
.L3:
vsetvli a4,a3,e8,mf4,ta,ma
vle32.v v0,0(a2)
vsetvli a5,zero,e32,m1,ta,ma
vmsne.viv0,v0,0
vsetvli zero,a4,e32,m1,ta,ma
vlseg8e32.v v8,(a1),v0.t
vsetvli a5,zero,e32,m1,ta,ma
sllia6,a4,2
vadd.vv v1,v9,v8
sllia7,a4,5
vadd.vv v1,v1,v10
sub a3,a3,a4
vadd.vv v1,v1,v11
vadd.vv v1,v1,v12
vadd.vv v1,v1,v13
vadd.vv v1,v1,v14
vadd.vv v1,v1,v15
vsetvli zero,a4,e32,m1,ta,ma
vse32.v v1,0(a0),v0.t
add a2,a2,a6
add a1,a1,a7
add a0,a0,a6
bne a3,zero,.L3
.L5:
ret

gcc/ChangeLog:

* config/riscv/autovec.md (vec_mask_len_load_lanes): New 
pattern.
(vec_mask_len_store_lanes): Ditto.
(2): Fix pattern for ICE.
(2): Ditto.
* config/riscv/riscv-protos.h (expand_lanes_load_store): New function.
* config/riscv/riscv-v.cc (get_mask_mode): Add tuple mode mask mode.
(expand_lanes_load_store): New function.
* config/riscv/vector-iterators.md: New iterator.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c: Adapt 
tests.
* gcc.target/riscv/rvv/autovec/partial/slp-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/partial/slp-4.c: Ditto.
* gcc.target/riscv/rvv/rvv.exp: Add lanes test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-1.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-2.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-3.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-4.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-5.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-6.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-7.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-4.c: New 
test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-5.c: New 
test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-6.c: New 
test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-7.c: New 
test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-1.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-2.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-3.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-4.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-5.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-6.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-7.c: New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-4.c: New 
test.

Re: Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST vectorization

2023-08-14 Thread juzhe.zh...@rivai.ai

Hi, Kewin.

Could you test 'can_vec_extract_var_idx_p' and send V5 patch when you pass the 
testing?

Thanks.


juzhe.zh...@rivai.ai
 
From: Kewen.Lin
Date: 2023-08-14 17:19
To: Robin Dapp
CC: gcc-patches; richard.sandiford; rguenther; juzhe.zh...@rivai.ai
Subject: Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST 
vectorization
Hi Robin,
 
on 2023/8/14 16:58, Robin Dapp wrote:
> Hi Kewen,
> 
>> I did a bootstrapping and regression testing on Power10 (LE) and found a lot 
>> of failures.
> 
> I think the problem is that just like for vec_set we're expecting
> the vec_extract expander not to fail.  It is probably passed not a
> const int here anymore and therefore fails to expand?
 
Thanks for the comments!  Yeah, I think the expectation doesn't hold
on Power, as our vec_extract optab only support const index, that
is:
 
(define_expand "vec_extract"
  [(match_operand: 0 "register_operand")
   (match_operand:VEC_E 1 "vlogical_operand")
   (match_operand 2 "const_int_operand")]
  "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)"
{
  rs6000_expand_vector_extract (operands[0], operands[1], operands[2]);
  DONE;
})
 
> 
> can_vec_extract_var_idx_p is supposed to check if the backend
> supports extracting a variable index.
 
OK, it sounds that this new capability needs to further check with
function can_vec_extract_var_idx_p to ensure the ifn expanding work
as expected.  I re-spined by adding the below as your comments:
 
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 07f3717ed9d..80ba5cae84a 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10328,7 +10328,9 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
   else if (convert_optab_handler (vec_extract_optab,
   TYPE_MODE (vectype),
   TYPE_MODE (TREE_TYPE (vectype)))
-   != CODE_FOR_nothing)
+ != CODE_FOR_nothing
+   && can_vec_extract_var_idx_p (
+ TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype
 vect_record_loop_len (loop_vinfo,
   _VINFO_LENS (loop_vinfo),
   1, vectype, 1);
 
BR,
Kewen

Re: Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST vectorization

On Mon, 14 Aug 2023, juzhe.zh...@rivai.ai wrote:

> -   != CODE_FOR_nothing)
> + != CODE_FOR_nothing
> +   && can_vec_extract_var_idx_p (
> + TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE 
> (vectype
> 
> I think maybe 'can_vec_extract_var_idx_p' check is enough and remove 
> convert_optab_handler (vec_extract_optab,... check.
> Looking forward Richi's more comments.

Yes, I think can_vec_extract_var_idx_p already does that so no need to
duplicate it here.

Richard.

> Thanks.
> 
> 
> juzhe.zh...@rivai.ai
>  
> From: Kewen.Lin
> Date: 2023-08-14 17:19
> To: Robin Dapp
> CC: gcc-patches; richard.sandiford; rguenther; juzhe.zh...@rivai.ai
> Subject: Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST 
> vectorization
> Hi Robin,
>  
> on 2023/8/14 16:58, Robin Dapp wrote:
> > Hi Kewen,
> > 
> >> I did a bootstrapping and regression testing on Power10 (LE) and found a 
> >> lot of failures.
> > 
> > I think the problem is that just like for vec_set we're expecting
> > the vec_extract expander not to fail.  It is probably passed not a
> > const int here anymore and therefore fails to expand?
>  
> Thanks for the comments!  Yeah, I think the expectation doesn't hold
> on Power, as our vec_extract optab only support const index, that
> is:
>  
> (define_expand "vec_extract"
>   [(match_operand: 0 "register_operand")
>(match_operand:VEC_E 1 "vlogical_operand")
>(match_operand 2 "const_int_operand")]
>   "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)"
> {
>   rs6000_expand_vector_extract (operands[0], operands[1], operands[2]);
>   DONE;
> })
>  
> > 
> > can_vec_extract_var_idx_p is supposed to check if the backend
> > supports extracting a variable index.
>  
> OK, it sounds that this new capability needs to further check with
> function can_vec_extract_var_idx_p to ensure the ifn expanding work
> as expected.  I re-spined by adding the below as your comments:
>  
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 07f3717ed9d..80ba5cae84a 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10328,7 +10328,9 @@ vectorizable_live_operation (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>else if (convert_optab_handler (vec_extract_optab,
>TYPE_MODE (vectype),
>TYPE_MODE (TREE_TYPE 
> (vectype)))
> -   != CODE_FOR_nothing)
> + != CODE_FOR_nothing
> +   && can_vec_extract_var_idx_p (
> + TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE 
> (vectype
>  vect_record_loop_len (loop_vinfo,
>_VINFO_LENS (loop_vinfo),
>1, vectype, 1);
>  
> BR,
> Kewen
>  
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH] vect: Move VMAT_LOAD_STORE_LANES handlings from final loop nest

On Mon, Aug 14, 2023 at 10:54 AM Kewen.Lin  wrote:
>
> Hi,
>
> Following Richi's suggestion [1], this patch is to move the
> handlings on VMAT_LOAD_STORE_LANES in the final loop nest
> of function vectorizable_load to its own loop.  Basically
> it duplicates the final loop nest, clean up some useless
> set up code for the case of VMAT_LOAD_STORE_LANES, remove
> some unreachable code.  Also remove the corresponding
> handlings in the final loop nest.
>
> Bootstrapped and regtested on x86_64-redhat-linux,
> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.

OK (I guess the big diff is mostly because of re-indenting).

Thanks,
Richard.

> [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623329.html
>
> gcc/ChangeLog:
>
> * tree-vect-stmts.cc (vectorizable_load): Move the handlings on
> VMAT_LOAD_STORE_LANES in the final loop nest to its own loop,
> and update the final nest accordingly.
> ---
>  gcc/tree-vect-stmts.cc | 1275 
>  1 file changed, 634 insertions(+), 641 deletions(-)
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 4f2d088484c..c361e16cb7b 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -10332,7 +10332,129 @@ vectorizable_load (vec_info *vinfo,
> vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
>_masks, mask_vectype);
>  }
> +
>tree vec_mask = NULL_TREE;
> +  if (memory_access_type == VMAT_LOAD_STORE_LANES)
> +{
> +  gcc_assert (alignment_support_scheme == dr_aligned
> + || alignment_support_scheme == dr_unaligned_supported);
> +  gcc_assert (grouped_load && !slp);
> +
> +  unsigned int inside_cost = 0, prologue_cost = 0;
> +  for (j = 0; j < ncopies; j++)
> +   {
> + if (costing_p)
> +   {
> + /* An IFN_LOAD_LANES will load all its vector results,
> +regardless of which ones we actually need.  Account
> +for the cost of unused results.  */
> + if (first_stmt_info == stmt_info)
> +   {
> + unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
> + stmt_vec_info next_stmt_info = first_stmt_info;
> + do
> +   {
> + gaps -= 1;
> + next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
> +   }
> + while (next_stmt_info);
> + if (gaps)
> +   {
> + if (dump_enabled_p ())
> +   dump_printf_loc (MSG_NOTE, vect_location,
> +"vect_model_load_cost: %d "
> +"unused vectors.\n",
> +gaps);
> + vect_get_load_cost (vinfo, stmt_info, gaps,
> + alignment_support_scheme,
> + misalignment, false, _cost,
> + _cost, cost_vec, cost_vec,
> + true);
> +   }
> +   }
> + vect_get_load_cost (vinfo, stmt_info, 1, 
> alignment_support_scheme,
> + misalignment, false, _cost,
> + _cost, cost_vec, cost_vec, true);
> + continue;
> +   }
> +
> + /* 1. Create the vector or array pointer update chain.  */
> + if (j == 0)
> +   dataref_ptr
> + = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
> + at_loop, offset, , gsi,
> + _incr, false, bump);
> + else
> +   {
> + gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
> + dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, 
> gsi,
> +stmt_info, bump);
> +   }
> + if (mask)
> +   vec_mask = vec_masks[j];
> +
> + tree vec_array = create_vector_array (vectype, vec_num);
> +
> + tree final_mask = NULL_TREE;
> + if (loop_masks)
> +   final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
> +ncopies, vectype, j);
> + if (vec_mask)
> +   final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, 
> final_mask,
> +  vec_mask, gsi);
> +
> + gcall *call;
> + if (final_mask)
> +   {
> + /* Emit:
> +  VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
> +   VEC_MASK).  */
> + unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
> + tree alias_ptr = build_int_cst

Re: [PATCH] vect: Remove several useless VMAT_INVARIANT checks