[PATCH] x86: make VPTERNLOG* usable on less than 512-bit operands with just AVX512F

2023-06-13 Thread Jan Beulich via Gcc-patches
There's no reason to constrain this to AVX512VL, as the wider operation
is not usable for more narrow operands only when the possible memory
source is a non-broadcast one. This way even the scalar copysign3
can benefit from the operation being a single-insn one (leaving aside
moves which the compiler decides to insert for unclear reasons, and
leaving aside the fact that bcst_mem_operand() is too restrictive for
broadcast to be embedded right into VPTERNLOG*).

Along with this also request value duplication in
ix86_expand_copysign()'s call to ix86_build_signbit_mask(), eliminating
excess space allocation in .rodata.*, filled with zeros which are never
read.

gcc/

* config/i386/i386-expand.cc (ix86_expand_copysign): Request
value duplication by ix86_build_signbit_mask() when AVX512F and
not HFmode.
* config/i386/sse.md (*_vternlog_all): Convert to
2-alternative form. Adjust "mode" attribute. Add "enabled"
attribute.
(*_vpternlog_1): Relax to just TARGET_AVX512F.
(*_vpternlog_2): Likewise.
(*_vpternlog_3): Likewise.
---
I guess the underlying pattern, going along the lines of what
one_cmpl2 uses, can be applied elsewhere
as well.

HFmode could use embedded broadcast too for copysign and alike, but that
would need to be V2HF -> V8HF (for which I don't think there are any
existing patterns).

--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -2266,7 +2266,7 @@ ix86_expand_copysign (rtx operands[])
   else
 dest = NULL_RTX;
   op1 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode);
-  mask = ix86_build_signbit_mask (vmode, 0, 0);
+  mask = ix86_build_signbit_mask (vmode, TARGET_AVX512F && mode != HFmode, 0);
 
   if (CONST_DOUBLE_P (operands[1]))
 {
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -12399,11 +12399,11 @@
(set_attr "mode" "")])
 
 (define_insn "*_vternlog_all"
-  [(set (match_operand:V 0 "register_operand" "=v")
+  [(set (match_operand:V 0 "register_operand" "=v,v")
(unspec:V
- [(match_operand:V 1 "register_operand" "0")
-  (match_operand:V 2 "register_operand" "v")
-  (match_operand:V 3 "bcst_vector_operand" "vmBr")
+ [(match_operand:V 1 "register_operand" "0,0")
+  (match_operand:V 2 "register_operand" "v,v")
+  (match_operand:V 3 "bcst_vector_operand" "vBr,m")
   (match_operand:SI 4 "const_0_to_255_operand")]
  UNSPEC_VTERNLOG))]
   "TARGET_AVX512F
@@ -12411,10 +12411,22 @@
it's not real AVX512FP16 instruction.  */
   && (GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4
  || GET_CODE (operands[3]) != VEC_DUPLICATE)"
-  "vpternlog\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+{
+  if (TARGET_AVX512VL)
+return "vpternlog\t{%4, %3, %2, %0|%0, %2, %3, %4}";
+  else
+return "vpternlog\t{%4, %g3, %g2, %g0|%g0, %g2, %g3, %4}";
+}
   [(set_attr "type" "sselog")
(set_attr "prefix" "evex")
-   (set_attr "mode" "")])
+   (set (attr "mode")
+(if_then_else (match_test "TARGET_AVX512VL")
+ (const_string "")
+ (const_string "XI")))
+   (set (attr "enabled")
+   (if_then_else (eq_attr "alternative" "1")
+ (symbol_ref " == 64 || TARGET_AVX512VL")
+ (const_string "*")))])
 
 ;; There must be lots of other combinations like
 ;;
@@ -12443,7 +12455,7 @@
  (any_logic2:V
(match_operand:V 3 "regmem_or_bitnot_regmem_operand")
(match_operand:V 4 "regmem_or_bitnot_regmem_operand"]
-  "( == 64 || TARGET_AVX512VL)
+  "TARGET_AVX512F
&& ix86_pre_reload_split ()
&& (rtx_equal_p (STRIP_UNARY (operands[1]),
STRIP_UNARY (operands[4]))
@@ -12527,7 +12539,7 @@
  (match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
(match_operand:V 3 "regmem_or_bitnot_regmem_operand"))
  (match_operand:V 4 "regmem_or_bitnot_regmem_operand")))]
-  "( == 64 || TARGET_AVX512VL)
+  "TARGET_AVX512F
&& ix86_pre_reload_split ()
&& (rtx_equal_p (STRIP_UNARY (operands[1]),
STRIP_UNARY (operands[4]))
@@ -12610,7 +12622,7 @@
(match_operand:V 1 "regmem_or_bitnot_regmem_operand")
(match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
  (match_operand:V 3 "regmem_or_bitnot_regmem_operand")))]
-  "( == 64 || TARGET_AVX512VL)
+  "TARGET_AVX512F
&& ix86_pre_reload_split ()"
   "#"
   "&& 1"


[PATCH] x86: make better use of VBROADCASTSS / VPBROADCASTD

2023-06-13 Thread Jan Beulich via Gcc-patches
... in vec_dupv4sf / *vec_dupv4si. The respective broadcast insns are
never longer (yet sometimes shorter) than the corresponding VSHUFPS /
VPSHUFD, due to the immediate operand of the shuffle insns balancing the
need for VEX3 in the broadcast ones. When EVEX encoding is required the
broadcast insns are always shorter.

Add two new alternatives each, one covering the AVX2 case and one
covering AVX512.

gcc/

* config/i386/sse.md (vec_dupv4sf): New AVX2 and AVX512F
alternatives using vbroadcastss.
(*vec_dupv4si): New AVX2 and AVX512F alternatives using
vpbroadcastd.
---
I'm working from the assumption that the isa attributes to the original
1st and 2nd alternatives don't need further restricting (to sse2_noavx2
or avx_noavx2 as applicable), as the new earlier alternatives cover all
operand forms already when at least AVX2 is enabled.

Isn't prefix_extra use bogus here? What extra prefix does vbroadcastss
use? (Same further down in *vec_dupv4si and avx2_vbroadcasti128_
and elsewhere.)

Is use of Yv for the source operand really necessary in *vec_dupv4si?
I.e. would scalar integer values be put in XMM{16...31} when AVX512VL
isn't enabled? If so (*movsi_internal / *movdi_internal suggest they
might), wouldn't *vec_dupv2di need to use Yv as well in its 3rd
alternative (or just m, as Yv is already covered by the 2nd one)?

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -25798,38 +25798,42 @@
(const_int 1)))])
 
 (define_insn "vec_dupv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
+  [(set (match_operand:V4SF 0 "register_operand" "=Yv,v,v,v,x")
(vec_duplicate:V4SF
- (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
+ (match_operand:SF 1 "nonimmediate_operand" "v,vm,Yv,m,0")))]
   "TARGET_SSE"
   "@
+   vbroadcastss\t{%1, %0|%0, %1}
+   vbroadcastss\t{%1, %g0|%g0, %1}
vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
vbroadcastss\t{%1, %0|%0, %1}
shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "avx,avx,noavx")
-   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "maybe_evex,maybe_evex,orig")
-   (set_attr "mode" "V4SF")])
+  [(set_attr "isa" "avx2,avx512f,avx,avx,noavx")
+   (set_attr "type" "ssemov,ssemov,sseshuf1,ssemov,sseshuf1")
+   (set_attr "length_immediate" "0,0,1,0,1")
+   (set_attr "prefix_extra" "*,*,0,1,*")
+   (set_attr "prefix" "maybe_evex,evex,maybe_evex,maybe_evex,orig")
+   (set_attr "mode" "V4SF,V16SF,V4SF,V4SF,V4SF")])
 
 (define_insn "*vec_dupv4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=Yv,v,v,v,x")
(vec_duplicate:V4SI
- (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
+ (match_operand:SI 1 "nonimmediate_operand" "vm,vm,Yv,m,0")))]
   "TARGET_SSE"
   "@
+   vpbroadcastd\t{%1, %0|%0, %1}
+   vpbroadcastd\t{%1, %g0|%g0, %1}
%vpshufd\t{$0, %1, %0|%0, %1, 0}
vbroadcastss\t{%1, %0|%0, %1}
shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "sse2,avx,noavx")
-   (set_attr "type" "sselog1,ssemov,sselog1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
-   (set_attr "mode" "TI,V4SF,V4SF")
+  [(set_attr "isa" "avx2,avx512f,sse2,avx,noavx")
+   (set_attr "type" "ssemov,ssemov,sselog1,ssemov,sselog1")
+   (set_attr "length_immediate" "0,0,1,0,1")
+   (set_attr "prefix_extra" "*,*,0,1,*")
+   (set_attr "prefix" "maybe_evex,evex,maybe_vex,maybe_evex,orig")
+   (set_attr "mode" "TI,XI,TI,V4SF,V4SF")
(set (attr "preferred_for_speed")
- (cond [(eq_attr "alternative" "1")
+ (cond [(eq_attr "alternative" "3")
  (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC")
   ]
   (symbol_ref "true")))])


[PATCH] x86: add Bk and Br to comment list B's sub-chars

2023-06-13 Thread Jan Beulich via Gcc-patches
gcc/

* config/i386/constraints.md: Mention k and r for B.

--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -162,7 +162,9 @@
 ;;  g  GOT memory operand.
 ;;  m  Vector memory operand
 ;;  c  Constant memory operand
+;;  k  TLS address that allows insn using non-integer registers
 ;;  n  Memory operand without REX prefix
+;;  r  Broadcast memory operand
 ;;  s  Sibcall memory operand, not valid for TARGET_X32
 ;;  w  Call memory operand, not valid for TARGET_X32
 ;;  z  Constant call address operand.


[PATCH] x86/AVX512: use VMOVDDUP for broadcast to V2DF

2023-06-13 Thread Jan Beulich via Gcc-patches
Like is already the case for the AVX/AVX2 form, VMOVDDUP - acting on
double precision floating values - is more appropriate to use here, and
it can also result in shorter insn encodings when source is memory or
%xmm0...%xmm7, and no masking is applied (in allowing a 2-byte VEX
prefix then instead of a 3-byte one).

gcc/

* config/i386/sse.md (_vec_dup): Use
vmovddup.

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -25724,9 +25724,9 @@
   "TARGET_AVX512F"
 {
   /*  There is no DF broadcast (in AVX-512*) to 128b register.
-  Mimic it with integer variant.  */
+  Mimic it with vmovddup, just like vec_dupv2df does.  */
   if (mode == V2DFmode)
-return "vpbroadcastq\t{%1, %0|%0, %q1}";
+return "vmovddup\t{%1, %0|%0, %q1}";
 
   return "vbroadcast\t{%1, 
%0|%0, %1}";
 }


Re: [PATCH v5 6/6] libstdc++: make std::is_object dispatch to new built-in traits

2023-06-13 Thread Ken Matsui via Gcc-patches
On Tue, Jun 13, 2023 at 10:10 PM François Dumont  wrote:
>
>
> On 13/06/2023 00:22, Ken Matsui via Libstdc++ wrote:
> > This patch gets std::is_object to dispatch to new built-in traits,
> > __is_function, __is_reference, and __is_void.
> >
> > libstdc++-v3/ChangeLog:
> >   * include/std/type_traits (is_object): Use new built-in traits,
> >   __is_function, __is_reference, and __is_void.
> >   (__is_object): Define this built-in-like macro.
> >   (is_object_v): Use built-in traits through the build-in-like macro.
> >
> > Signed-off-by: Ken Matsui 
> > ---
> >   libstdc++-v3/include/std/type_traits | 19 +++
> >   1 file changed, 19 insertions(+)
> >
> > diff --git a/libstdc++-v3/include/std/type_traits 
> > b/libstdc++-v3/include/std/type_traits
> > index 780fcc00135..93335f94385 100644
> > --- a/libstdc++-v3/include/std/type_traits
> > +++ b/libstdc++-v3/include/std/type_traits
> > @@ -682,11 +682,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >   { };
> >
> > /// is_object
> > +#if __has_builtin(__is_function) && __has_builtin(__is_reference) \
> > +&& __has_builtin(__is_void)
> > +
> > +#define __is_object(_Tp) \
> > +  (!(__is_function(_Tp) || __is_reference(_Tp) || __is_void(_Tp)))
>
> Is this evaluation order random ? Are all those builtin functions
> performances equivalent ?
>
> I would have felt that __is_void is the simplest/fastest cause only for
> 'void' so would have put it first.

This particular order is derived from the original implementation:

```
   template
 struct is_object
 : public __not_<__or_, is_reference<_Tp>,
   is_void<_Tp>>>::type
 { };
```

>From what I can see, it appears there shouldn't be any disparities in
performance based on these implementations in /gcc/cp/semantics.cc:

```
+case CPTK_IS_FUNCTION:
+  return type_code1 == FUNCTION_TYPE;

+case CPTK_IS_REFERENCE:
+  return type_code1 == REFERENCE_TYPE;

+case CPTK_IS_VOID:
+  return VOID_TYPE_P (type1);
```

VOID_TYPE_P: gcc/tree.h

```
/* Nonzero if this type is the (possibly qualified) void type.  */
#define VOID_TYPE_P(NODE) (TREE_CODE (NODE) == VOID_TYPE)
```

> > +
> > +  template
> > +struct is_object
> > +: public __bool_constant<__is_object(_Tp)>
> > +{ };
> > +#else
> > template
> >   struct is_object
> >   : public __not_<__or_, is_reference<_Tp>,
> > is_void<_Tp>>>::type
> >   { };
> > +#endif
> >
> > template
> >   struct is_member_pointer;
> > @@ -3235,8 +3247,15 @@ template 
> > inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
> >   template 
> > inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
> > +
> > +#ifdef __is_object
> > +template 
> > +  inline constexpr bool is_object_v = __is_object(_Tp);
> > +#else
> >   template 
> > inline constexpr bool is_object_v = is_object<_Tp>::value;
> > +#endif
> > +
> >   template 
> > inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
> >   template 


Re: [PATCH v5 6/6] libstdc++: make std::is_object dispatch to new built-in traits

2023-06-13 Thread François Dumont via Gcc-patches



On 13/06/2023 00:22, Ken Matsui via Libstdc++ wrote:

This patch gets std::is_object to dispatch to new built-in traits,
__is_function, __is_reference, and __is_void.

libstdc++-v3/ChangeLog:
* include/std/type_traits (is_object): Use new built-in traits,
__is_function, __is_reference, and __is_void.
(__is_object): Define this built-in-like macro.
(is_object_v): Use built-in traits through the build-in-like macro.

Signed-off-by: Ken Matsui 
---
  libstdc++-v3/include/std/type_traits | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 780fcc00135..93335f94385 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -682,11 +682,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  { };
  
/// is_object

+#if __has_builtin(__is_function) && __has_builtin(__is_reference) \
+&& __has_builtin(__is_void)
+
+#define __is_object(_Tp) \
+  (!(__is_function(_Tp) || __is_reference(_Tp) || __is_void(_Tp)))


Is this evaluation order random ? Are all those builtin functions 
performances equivalent ?


I would have felt that __is_void is the simplest/fastest cause only for 
'void' so would have put it first.



+
+  template
+struct is_object
+: public __bool_constant<__is_object(_Tp)>
+{ };
+#else
template
  struct is_object
  : public __not_<__or_, is_reference<_Tp>,
is_void<_Tp>>>::type
  { };
+#endif
  
template

  struct is_member_pointer;
@@ -3235,8 +3247,15 @@ template 
inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
  template 
inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
+
+#ifdef __is_object
+template 
+  inline constexpr bool is_object_v = __is_object(_Tp);
+#else
  template 
inline constexpr bool is_object_v = is_object<_Tp>::value;
+#endif
+
  template 
inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
  template 


[PATCH] RISC-V: Use merge approach to optimize vector permutation

2023-06-13 Thread juzhe . zhong
From: Juzhe-Zhong 

This patch is to optimize the permuation case that is suiteable use
merge approach.

Consider this following case:
typedef int8_t vnx16qi __attribute__((vector_size (16)));

#define MASK_16 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 
31

void __attribute__ ((noipa))
merge0 (vnx16qi x, vnx16qi y, vnx16qi *out)
{
  vnx16qi v = __builtin_shufflevector ((vnx16qi) x, (vnx16qi) y, MASK_16);
  *(vnx16qi*)out = v;
} 

The gimple IR:
v_3 = VEC_PERM_EXPR ;

Selector = { 0, 17, 2, 19, 4, 21, 6, 23, 8, 9, 10, 27, 12, 29, 14, 31 }, the 
common expression:
{ 0, nunits + 1, 1, nunits + 2, 2, nunits + 3, ...  }

For this selector, we can use vmsltu + vmerge to optimize the codegen.

Before this patch:
merge0:
addia5,sp,16
vl1re8.vv3,0(a5)
li  a5,31
vsetivlizero,16,e8,m1,ta,mu
vmv.v.x v2,a5
lui a5,%hi(.LANCHOR0)
addia5,a5,%lo(.LANCHOR0)
vl1re8.vv1,0(a5)
vl1re8.vv4,0(sp)
vand.vv v1,v1,v2
vmsgeu.vi   v0,v1,16
vrgather.vv v2,v4,v1
vadd.vi v1,v1,-16
vrgather.vv v2,v3,v1,v0.t
vs1r.v  v2,0(a0)
ret

After this patch:
merge0:
addia5,sp,16
vl1re8.vv1,0(a5)
lui a5,%hi(.LANCHOR0)
addia5,a5,%lo(.LANCHOR0)
vsetivlizero,16,e8,m1,ta,ma
vl1re8.vv0,0(a5)
vl1re8.vv2,0(sp)
vmsltu.vi   v0,v0,16
vmerge.vvm  v1,v1,v2,v0
vs1r.v  v1,0(a0)
ret

The key of this optimization is that:
1. mask = vmsltu (selector, nunits)
2. result = vmerge (op0, op1, mask)

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_merge_patterns): New pattern.
(expand_vec_perm_const_1): Add merge optmization.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c: New test.

---
 gcc/config/riscv/riscv-v.cc   |  52 +
 .../riscv/rvv/autovec/vls-vlmax/merge-1.c | 101 +
 .../riscv/rvv/autovec/vls-vlmax/merge-2.c | 103 +
 .../riscv/rvv/autovec/vls-vlmax/merge-3.c | 109 +
 .../riscv/rvv/autovec/vls-vlmax/merge-4.c | 122 ++
 .../riscv/rvv/autovec/vls-vlmax/merge-5.c |  76 +++
 .../riscv/rvv/autovec/vls-vlmax/merge-6.c |  51 +
 .../riscv/rvv/autovec/vls-vlmax/merge-7.c |  25 +++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-1.c | 119 ++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-2.c | 121 ++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-3.c | 150 +
 .../riscv/rvv/autovec/vls-vlmax/merge_run-4.c | 210 ++
 .../riscv/rvv/autovec/vls-vlmax/merge_run-5.c |  89 
 .../riscv/rvv/autovec/vls-vlmax/merge_run-6.c |  59 +
 .../riscv/rvv/autovec/vls-vlmax/merge_run-7.c |  29 +++
 15 files changed, 1416 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c
 create mode 100644 

Re: [PATCH] rs6000: replace '(const_int 0)' to 'unspec:BLK [(const_int 0)]' for stack_tie

2023-06-13 Thread Jiufu Guo via Gcc-patches


Hi,

Segher Boessenkool  writes:

> Hi!
>
> As I said in a reply to the original patch: not okay.  Sorry.

Thanks a lot for your comments!
I'm also thinking about other solutions:
1. "set (mem/c:BLK (reg/f:DI 1 1) (const_int 0 [0])"
  This is the existing pattern.  It may be read as an action
  to clean an unknown-size memory block.

2. "set (mem/c:BLK (reg/f:DI 1 1) unspec:blk (const_int 0 [0])
UNSPEC_TIE".
  Current patch is using this one.

3. "set (mem/c:DI (reg/f:DI 1 1) unspec:DI (const_int 0 [0])
UNSPEC_TIE".
   This avoids using BLK on unspec, but using DI.

4. "set (mem/c:BLK (reg/f:DI 1 1) unspec (const_int 0 [0])
UNSPEC_TIE"
   There is still a mode for the unspec.


>
> But some comments on this patch:
>
> On Tue, Jun 13, 2023 at 08:23:35PM +0800, Jiufu Guo wrote:
>> +  && XINT (SET_SRC (set), 1) == UNSPEC_TIE
>> +  && XVECEXP (SET_SRC (set), 0, 0) == const0_rtx);
>
> This makes it required that the operand of an UNSPEC_TIE unspec is a
> const_int 0.  This should be documented somewhere.  Ideally you would
> want no operand at all here, but every unspec has an operand.

Right!  Since checked UNSPEC_TIE arleady, we may not need to check
the inner operand. Like " && XINT (SET_SRC (set), 1) == UNSPEC_TIE);".

>
>> +  RTVEC_ELT (p, i)
>> += gen_rtx_SET (mem, gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, const0_rtx),
>> +UNSPEC_TIE));
>
> If it is hard to indent your code, your code is trying to do to much.
> Just have an extra temporary?
>
>   rtx un = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, const0_rtx), 
> UNSPEC_TIE);
>   RTVEC_ELT (p, i) = gen_rtx_SET (mem, un);
>
> That is shorter even, and certainly more readable :-)

Yeap, thanks!

>
>> @@ -10828,7 +10829,9 @@ (define_expand "restore_stack_block"
>>operands[4] = gen_frame_mem (Pmode, operands[1]);
>>p = rtvec_alloc (1);
>>RTVEC_ELT (p, 0) = gen_rtx_SET (gen_frame_mem (BLKmode, operands[0]),
>> -  const0_rtx);
>> +  gen_rtx_UNSPEC (BLKmode,
>> +  gen_rtvec (1, const0_rtx),
>> +  UNSPEC_TIE));
>>operands[5] = gen_rtx_PARALLEL (VOIDmode, p);
>
> I have a hard time to see how this could ever be seen as clearer or more
> obvious or anything like that :-(

I was thinking about just invoking gen_stack_tie here.

BR,
Jeff (Jiufu Guo)

>
>
> Segher


Re: [PATCH] rs6000: replace '(const_int 0)' to 'unspec:BLK [(const_int 0)]' for stack_tie

2023-06-13 Thread Jiufu Guo via Gcc-patches


Hi Segher, David,

David Edelsohn  writes:

> On Tue, Jun 13, 2023 at 2:16 PM Segher Boessenkool
>  wrote:
>>
>> Hi!
>>
>> On Tue, Jun 13, 2023 at 10:15:49AM +0800, Jiufu Guo wrote:
>> > David Edelsohn  writes:
>> > >
>> > > This definitely seems to be a better solution.
>> > >
>> > > The TARGET_CONST_ANCHOR change should not be part of this patch.  Also
>> > > there is no ChangeLog for the patch.
>> >
>> > Thanks a lot for your quick review!! And sorry for the sending this patch
>> > in a hurry.  I would update the patch accordingly.
>>
>> > > This generally looks correct and consistent with other ports. I want
>> > > to give Segher a chance to double check it, if he wishes.
>>
>> The documentation is very clear that the only thing for which you can
>> have BLKmode is "mem".  Not unspec, only "mem".
>>
>> Let's not do this.  The existing code has clear and obvious semantics,
>> which is documented as well -- there is no reason to make it worse in
>> every respect.

Thanks for all your insight comments!

Yeap, while "unspec:BLK" is very widely used already on various ports.
And it seems a few place is using BLKmode without strictly align with
the document :( It would not be very good thing, but maybe no better
solutions.

For existing code "set (mem/c:BLK (reg/f:DI 1 1) (const_int 0 [0])"
Since it is a set, the operand set_src should be valid for
the mode of the set_dest. While set_src is 'const_int 0'.
And this 'set' may be mis-readed as 'a memory is zeroed' or
'no-op to a mem'. Using unspec here would just say this is an special
operation instead a normal 'const_int 0'.

BR,
Jeff (Jiufu Guo)

>
> Segher,
>
> Unfortunately, GCC now is inconsistent and this response is incorrect.
> The documentation is out of date or was ignored and the "facts on the
> ground" contradict your review.
>
> Yes, (const_int 0) is supposed to be a general no-op and BLKmode only
> is supposed to be used for MEM, but other major targets (arm, aarch64,
> riscv, s390) all use unspec:BLK and specifically UNSPEC_TIE.  rs6000
> is the only port that does not follow this convention.  The middle-end
> has adapted to the behavior of all of the other targets, whether that
> conformed to the documentation or not.  The rs6000 port needs to be
> fixed and Jiufu's approach is the correct one, consistent with all
> other targets for stack tie.  If the documentation differs, the
> documentation needs to be updated, not a different approach for the
> rs6000 port.  Jiufu's patch is correct.
>
> Thanks, David


Re: [RFC] Add stdckdint.h header for C23

2023-06-13 Thread Paul Eggert

On 6/12/23 23:28, Jakub Jelinek via Libc-alpha wrote:

On Mon, Jun 12, 2023 at 09:51:02PM +, Joseph Myers wrote:

On Sat, 10 Jun 2023, Jakub Jelinek via Gcc-patches wrote:


I have looked at gnulib stdckdint.h and they are full of workarounds
for various compilers, EDG doesn't do this, clang <= 14 can't multiply
__int128, ..., so I think the header belongs into the compiler rather
than C library, because it would be a nightmare to maintain it there.


I tend to agree. I don't see how to implement  in the C 
library, at least not for the C library's users.


It would be possible to implement  for C library internal 
use only, because then we could assume #include_next, and we could use 
the Gnulib implementation safely (that implementation is already present 
glibc internals, just under a different name). This could well be worth 
doing, because glibc internally needs ckd_add (or something equivalent) 
but glibc can't yet assume that it's built with GCC 14 (or whatever GCC 
version eventually supports ).




There is always the possibility to have the header co-owned by both
the compiler and C library, limits.h style.
Just
#if __has_include_next()
# include_next 
#endif


I don't see how you could implement __has_include_next() 
for arbitrary non-GCC compilers, which is what we'd need for glibc 
users. For glibc internals we can use "#include_next" more readily, 
since we assume a new-enough GCC. I.e. we could do something like this:


   #if 14 <= __GNUC__
   # include_next 
   #else
   # define ckd_add(r, a, b) INT_ADD_WRAPV (a, b, &(r))
   #endif

where INT_ADD_WRAPV is the already-existing glibc internal macro, and 
where we invoke ckd_add only with arguments free of side effects.


Re: [PATCH v1] RISC-V: Align the predictor style for define_insn_and_split

2023-06-13 Thread juzhe.zh...@rivai.ai
LGTM. 



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-06-14 10:15
To: gcc-patches
CC: juzhe.zhong; rdapp.gcc; jeffreyalaw; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Align the predictor style for define_insn_and_split
From: Pan Li 
 
This patch is considered as the follow up of the below PATCH.
 
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621347.html
 
We aligned the predictor style for the define_insn_and_split suggested
by Kito. To avoid potential issues before we hit.
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/autovec-opt.md: Align the predictor sytle.
* config/riscv/autovec.md: Ditto.
---
gcc/config/riscv/autovec-opt.md | 20 ++--
gcc/config/riscv/autovec.md | 24 
2 files changed, 22 insertions(+), 22 deletions(-)
 
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index aef28e445e1..fb1b07205aa 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -37,9 +37,9 @@ (define_insn_and_split 
"@pred_single_widen_mul"
  (match_operand: 4 "register_operand" "   vr,   vr"))
(match_operand:VWEXTI 3 "register_operand" "   vr,   vr"))
  (match_operand:VWEXTI 2 "vector_merge_operand"   "   vu,0")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
   {
 insn_code icode = code_for_pred_vf2 (, mode);
@@ -132,9 +132,9 @@ (define_insn_and_split "*not"
(bitmanip_bitwise:VB
  (not:VB (match_operand:VB 2 "register_operand" " vr"))
  (match_operand:VB 1 "register_operand" " vr")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
   {
 insn_code icode = code_for_pred_not (, mode);
@@ -159,9 +159,9 @@ (define_insn_and_split "*n"
  (any_bitwise:VB
(match_operand:VB 1 "register_operand" " vr")
(match_operand:VB 2 "register_operand" " vr"]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
   {
 insn_code icode = code_for_pred_n (, mode);
@@ -346,9 +346,9 @@ (define_insn_and_split 
"*vtrunc"
 (match_operand:VWEXTI 1 "register_operand" " vr,vr")
(any_extend:VWEXTI
   (match_operand: 2 "vector_shift_operand" " 
vr,vk")]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
{
   insn_code icode = code_for_pred_narrow (, mode);
@@ -364,9 +364,9 @@ (define_insn_and_split "*trunc"
   (any_shiftrt:VWEXTI
 (match_operand:VWEXTI 1 "register_operand"   " vr")
(match_operand: 2 "csr_operand" " rK"]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
{
   operands[2] = gen_lowpart (Pmode, operands[2]);
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index eadc2c5b595..c23a625afe1 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -155,9 +155,9 @@ (define_insn_and_split "3"
 (any_shift:VI
  (match_operand:VI 1 "register_operand"" vr")
  (match_operand: 2 "csr_operand"  " rK")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
{
   operands[2] = gen_lowpart (Pmode, operands[2]);
@@ -180,9 +180,9 @@ (define_insn_and_split "v3"
 (any_shift:VI
  (match_operand:VI 1 "register_operand" " vr,vr")
  (match_operand:VI 2 "vector_shift_operand" " vr,vk")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
{
   riscv_vector::emit_vlmax_insn (code_for_pred (, mode),
@@ -205,9 +205,9 @@ (define_insn_and_split "3"
   [(set (match_operand:VB 0 "register_operand" "=vr")
(any_bitwise:VB (match_operand:VB 1 "register_operand" " vr")
(match_operand:VB 2 "register_operand" " vr")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
   {
 insn_code icode = code_for_pred (, mode);
@@ -227,9 +227,9 @@ (define_insn_and_split "3"
(define_insn_and_split "one_cmpl2"
   [(set (match_operand:VB 0 "register_operand" "=vr")
(not:VB (match_operand:VB 1 "register_operand" " vr")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
   {
 insn_code icode = code_for_pred_not (mode);
@@ -366,9 +366,9 @@ (define_insn_and_split "2"
   [(set (match_operand:VWEXTI 0 "register_operand" "=")
 (any_extend:VWEXTI
  (match_operand: 1 "register_operand" "vr")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"

[PATCH v1] RISC-V: Align the predictor style for define_insn_and_split

2023-06-13 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch is considered as the follow up of the below PATCH.

https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621347.html

We aligned the predictor style for the define_insn_and_split suggested
by Kito. To avoid potential issues before we hit.

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/autovec-opt.md: Align the predictor sytle.
* config/riscv/autovec.md: Ditto.
---
 gcc/config/riscv/autovec-opt.md | 20 ++--
 gcc/config/riscv/autovec.md | 24 
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index aef28e445e1..fb1b07205aa 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -37,9 +37,9 @@ (define_insn_and_split 
"@pred_single_widen_mul"
  (match_operand: 4 "register_operand" "   vr,   
vr"))
(match_operand:VWEXTI 3 "register_operand" "   vr,   
vr"))
  (match_operand:VWEXTI 2 "vector_merge_operand"   "   vu,
0")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
   {
 insn_code icode = code_for_pred_vf2 (, mode);
@@ -132,9 +132,9 @@ (define_insn_and_split "*not"
(bitmanip_bitwise:VB
  (not:VB (match_operand:VB 2 "register_operand" " vr"))
  (match_operand:VB 1 "register_operand" " vr")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
   {
 insn_code icode = code_for_pred_not (, mode);
@@ -159,9 +159,9 @@ (define_insn_and_split "*n"
  (any_bitwise:VB
(match_operand:VB 1 "register_operand" " vr")
(match_operand:VB 2 "register_operand" " vr"]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
   {
 insn_code icode = code_for_pred_n (, mode);
@@ -346,9 +346,9 @@ (define_insn_and_split 
"*vtrunc"
 (match_operand:VWEXTI 1 "register_operand" " vr,vr")
(any_extend:VWEXTI
   (match_operand: 2 "vector_shift_operand" " 
vr,vk")]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
 {
   insn_code icode = code_for_pred_narrow (, mode);
@@ -364,9 +364,9 @@ (define_insn_and_split "*trunc"
   (any_shiftrt:VWEXTI
 (match_operand:VWEXTI 1 "register_operand"   " vr")
(match_operand: 2 "csr_operand" " rK"]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
 {
   operands[2] = gen_lowpart (Pmode, operands[2]);
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index eadc2c5b595..c23a625afe1 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -155,9 +155,9 @@ (define_insn_and_split "3"
 (any_shift:VI
  (match_operand:VI 1 "register_operand"" vr")
  (match_operand: 2 "csr_operand"  " rK")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
 {
   operands[2] = gen_lowpart (Pmode, operands[2]);
@@ -180,9 +180,9 @@ (define_insn_and_split "v3"
 (any_shift:VI
  (match_operand:VI 1 "register_operand" " vr,vr")
  (match_operand:VI 2 "vector_shift_operand" " vr,vk")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
 {
   riscv_vector::emit_vlmax_insn (code_for_pred (, mode),
@@ -205,9 +205,9 @@ (define_insn_and_split "3"
   [(set (match_operand:VB 0 "register_operand" "=vr")
(any_bitwise:VB (match_operand:VB 1 "register_operand" " vr")
(match_operand:VB 2 "register_operand" " vr")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
   {
 insn_code icode = code_for_pred (, mode);
@@ -227,9 +227,9 @@ (define_insn_and_split "3"
 (define_insn_and_split "one_cmpl2"
   [(set (match_operand:VB 0 "register_operand" "=vr")
(not:VB (match_operand:VB 1 "register_operand" " vr")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
   {
 insn_code icode = code_for_pred_not (mode);
@@ -366,9 +366,9 @@ (define_insn_and_split "2"
   [(set (match_operand:VWEXTI 0 "register_operand" "=")
 (any_extend:VWEXTI
  (match_operand: 1 "register_operand" "vr")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(const_int 0)]
 {
   insn_code icode = code_for_pred_vf2 (, mode);
@@ -409,9 

[PATCH, V6] Fix power10 fusion and -fstack-protector, PR target/105325

2023-06-13 Thread Michael Meissner via Gcc-patches
This patch fixes an issue where if you use the -fstack-protector and
-mcpu=power10 options and you have a large stack frame, the GCC compiler will
generate a LWA instruction with a large offset.

Unlike the previous versions of this patch, I dug into it, and I found it was
much more complex that I originally thought.

The important thing in the bug is that -fstack-protector is used, but it could
potentially happen with fused load-compare to any stack location when the stack
frame is larger than 32K without -fstack-protector.

Here is the initial fused initial insn that was created.  It refers to the
stack location based off of the virtrual frame pointer:

(insn 6 5 7 2 (parallel [
(set (reg:CC 119)
 (compare:CC (mem/c:SI (plus:DI (reg/f:DI 110 sfp)
(const_int -4))
 (const_int 0 [0])))
(clobber (scratch:DI))
])
 (nil))

After the stack size is finalized, the frame pointer removed, and the post
reload phase is run, the insn is now:

(insn 6 5 7 2 (parallel [
(set (reg:CC 100 0 [119])
 (compare:CC (mem/c:SI (plus:DI (reg/f:DI 1 1)
(const_int 40044))
 (const_int 0 [0])))
(clobber (reg:DI 9 9 [120]))
])
 (nil))

When the split2 pass is run after reload has finished the ds_form_mem_operand
predicate that was used for lwa and ld no longer returns true.  This means that
since the operand predicates aren't recognized, it won't be split.  Thus, it
goes all of the way to final.  The automatic prefix instruction support was not
run because the type was changed from "load" to "fused_load_cmpi".  This meant
that it was assume that the insn was only 8 bytes, and that we did not need to
prefer the lwa with a 'p'.

The solution involves:

1)  Don't use ds_form_mem_operand for ld and lwa, always use
non_update_memory_operand.

2)  Delete ds_form_mem_operand since it is no longer used.

3)  Use the "YZ" constraints for ld/lwa instead of "m".

4)  If we don't need to sign extend the lwa, convert it to lwz, and use
cmpwi instead of cmpdi.  Adjust the insn name to reflect the code
generate.

5)  Insure that the insn using lwa will be recognized as having a prefixed
operand (and hence the instruction length is 16 bytes instead of 8
bytes).

5a) Set the prefixed and maybe_prefix attributes to know that
fused_load_cmpi are also load insns;

5b) In the case where we are just setting CC and not using the memory
afterward, set the clobber to use a DI register, and put an
explicit sign_extend operation in the split;

5c) Set the sign_extend attribute to "yes".

5d) 5a-5c are the things that prefixed_load_p in rs6000.cc checks to
ensure that lwa is treated as a ds-form instruction and not as
a d-form instruction (i.e. lwz).

6)  Add a new test case for this case.

7)  Adjust the insn counts in fusion-p10-ldcmpi.c.  Because we are no
longer using ds_form_mem_operand, the ld and lwa instructions will fuse
x-form (reg+reg) addresses in addition ds-form (reg+offset or reg).

I have built bootstrap compilers and tested them on the following environments.
There were no regressions in any of the runs.

Little endian power10, long double is IBM 128-bit
Little endian power9, long double is IBM 128-bit
Little endian power9, long double is IEEE 128-bit
Big endian power8, long double is IBM 128-bit (32/64-bit tests run)

Can I check this patch into the master GCC branch?  After a waiting period, once
the previous changes to genfusion.pl are checked in, can I install this patch in
previous GCC compilers?

2023-06-12   Michael Meissner  

gcc/

* config/rs6000/genfusion.pl (gen_ld_cmpi_p10_one): Fix problems that
allowed prefixed lwa to be generated.
* config/rs6000/fusion.md: Regenerate.
* config/rs6000/predicates.md (ds_form_mem_operand): Delete.
* config/rs6000/rs6000.md (prefixed attribute): Add support for load
plus compare immediate fused insns.
(maybe_prefixed): Likewise.

gcc/testsuite/

* g++.target/powerpc/pr105325.C: New test.
* gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c: Update insn
counts.
---
 gcc/config/rs6000/fusion.md   | 27 +++---
 gcc/config/rs6000/genfusion.pl| 36 +++
 gcc/config/rs6000/predicates.md   | 14 
 gcc/config/rs6000/rs6000.md   |  4 +--
 gcc/testsuite/g++.target/powerpc/pr105325.C   | 26 ++
 .../gcc.target/powerpc/fusion-p10-ldcmpi.c| 16 +
 6 files changed, 81 insertions(+), 42 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/pr105325.C

diff 

Re: [PATCH] rs6000: replace '(const_int 0)' to 'unspec:BLK [(const_int 0)]' for stack_tie

2023-06-13 Thread Jiufu Guo via Gcc-patches
Hi,

Xi Ruoyao  writes:

> On Tue, 2023-06-13 at 20:23 +0800, Jiufu Guo via Gcc-patches wrote:
>
>> Compare with previous version, this addes ChangeLog and removes
>> const_anchor parts.
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621356.html.
>
> [Off topic]
>
> const_anchor is just broken now.  See
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104843 and the thread
> beginning at
> https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591470.html.  If
> you want to use it for rs6000 I guess you need to fix it first...

Thanks so much for pointing out this.  It seems about supporting
negative value, right?

As you say: for 1. "g(0x8123, 0x81240001)", it would be fine.

The generated insns are:
(insn 5 2 6 2 (set (reg:DI 117)
(const_int -2128347135 [0x81240001])) "negative.c":5:3 681 
{*movdi_internal64}
 (nil))
(insn 6 5 7 2 (set (reg:DI 118)
(plus:DI (reg:DI 117)
(const_int -2 [0xfffe]))) "negative.c":5:3 66 {*adddi3}
 (expr_list:REG_EQUAL (const_int -2128347137 [0x8123])
(nil)))

While for 2. "g (0x7fff, 0x8001)", the generated rtl insns:
(insn 5 2 6 2 (set (reg:DI 117)
(const_int -2147483647 [0x8001])) "negative.c":5:3 681 
{*movdi_internal64}
 (nil))
(insn 7 6 8 2 (set (reg:DI 3 3)
(const_int 2147483647 [0x7fff])) "negative.c":5:3 681 
{*movdi_internal64}
 (nil))

The current const_anchor does not generate sth like: "r3 = r117 - 2"
But I would lean to say it is the limitation of current implementation:
"0x8001" and "0x7fff" hit different anchors(even these
two values are 'close' on some aspect.)

BR,
Jeff (Jiufu Guo)

>
> To me const_anchor needs a complete rework but I don't want to spend my
> time on it.


[PATCH] [x86] Use x instead of v for alternative 2 (v, BH) in mov_internal.

2023-06-13 Thread liuhongt via Gcc-patches
Since there's no evex version for vpcmpeq ymm, ymm, ymm.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to trunk and backport to GCC13.

gcc/ChangeLog:

PR target/110227
* config/i386/sse.md (mov_internal>): Use x instead of v
for alternative 2 since there's no evex version for vpcmpeqd
ymm, ymm, ymm.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr110227.c: New test.
---
 gcc/config/i386/sse.md   |  2 +-
 gcc/testsuite/gcc.target/i386/pr110227.c | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110227.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 9bec09d354a..370ea6418a6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1324,7 +1324,7 @@ (define_expand "mov"
 
 (define_insn "mov_internal"
   [(set (match_operand:VMOVE 0 "nonimmediate_operand"
-"=v,v ,v,v ,m")
+"=v,v ,x,v ,m")
(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
 " C,,BH,vm,v"))]
   "TARGET_SSE
diff --git a/gcc/testsuite/gcc.target/i386/pr110227.c 
b/gcc/testsuite/gcc.target/i386/pr110227.c
new file mode 100644
index 000..9b59f5b6e49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110227.c
@@ -0,0 +1,11 @@
+/* { dg-do assemble { target { ! ia32 } } } */
+/* { dg-options " -O2 -mavx512vl" } */
+
+#include 
+
+void f()
+{
+  __m256i mask = _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
+  register __m256i reg asm("xmm16") = mask;
+  asm(""::"v"(reg));
+}
-- 
2.39.1.388.g2fc9e9ca3c



Re: [PATCH 1/4] rs6000: build constant via li;rotldi

2023-06-13 Thread Jiufu Guo via Gcc-patches


Hi,

David Edelsohn  writes:

> On Mon, Jun 12, 2023 at 11:30 PM Jiufu Guo  wrote:
>>
>>
>> Hi David,
>>
>> David Edelsohn  writes:
>> > On Wed, Jun 7, 2023 at 9:55 PM Jiufu Guo  wrote:
>> >
>> >  Hi,
>> >
>> >  This patch checks if a constant is possible to be rotated to/from a 
>> > positive
>> >  or negative value from "li". If so, we could use "li;rotldi" to build it.
>> >
>> >  Bootstrap and regtest pass on ppc64{,le}.
>> >  Is this ok for trunk?
>> >
>> >  BR,
>> >  Jeff (Jiufu)
>> >
>> >  gcc/ChangeLog:
>> >
>> >  * config/rs6000/rs6000.cc (can_be_rotated_to_positive_li): New 
>> > function.
>> >  (can_be_rotated_to_negative_li): New function.
>> >  (can_be_built_by_li_and_rotldi): New function.
>> >  (rs6000_emit_set_long_const): Call can_be_built_by_li_and_rotldi.
>> >
>> >  gcc/testsuite/ChangeLog:
>> >
>> >  * gcc.target/powerpc/const-build.c: New test.
>> >  ---
>> >   gcc/config/rs6000/rs6000.cc   | 64 +--
>> >   .../gcc.target/powerpc/const-build.c  | 54 
>> >   2 files changed, 112 insertions(+), 6 deletions(-)
>> >   create mode 100644 gcc/testsuite/gcc.target/powerpc/const-build.c
>> >
>> >  diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> >  index 42f49e4a56b..1dd0072350a 100644
>> >  --- a/gcc/config/rs6000/rs6000.cc
>> >  +++ b/gcc/config/rs6000/rs6000.cc
>> >  @@ -10258,6 +10258,48 @@ rs6000_emit_set_const (rtx dest, rtx source)
>> > return true;
>> >   }
>> >
>> >  +/* Check if C can be rotated to a positive value which 'li' instruction
>> >  +   is able to load.  If so, set *ROT to the number by which C is rotated,
>> >  +   and return true.  Return false otherwise.  */
>> >  +
>> >  +static bool
>> >  +can_be_rotated_to_positive_li (HOST_WIDE_INT c, int *rot)
>> >  +{
>> >  +  /* 49 leading zeros and 15 low bits on the positive value
>> >  + generated by 'li' instruction.  */
>> >  +  return can_be_rotated_to_lowbits (c, 15, rot);
>> >  +}
>> >  +
>> >  +/* Like can_be_rotated_to_positive_li, but check the negative value of 
>> > 'li'.  */
>> >  +
>> >  +static bool
>> >  +can_be_rotated_to_negative_li (HOST_WIDE_INT c, int *rot)
>> >  +{
>> >  +  return can_be_rotated_to_lowbits (~c, 15, rot);
>> >  +}
>> >  +
>> >  +/* Check if value C can be built by 2 instructions: one is 'li', another 
>> > is
>> >  +   rotldi.
>> >  +
>> >  +   If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
>> >  +   is set to -1, and return true.  Return false otherwise.  */
>> >  +
>> >
>> > I look at this feature and it's good, but I don't fully understand the 
>> > benefit of this level of abstraction.  Ideally all of the above functions 
>> > would
>> > be inlined.  They aren't reused.
>> >
>> >  +static bool
>> >  +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift,
>> >  +  HOST_WIDE_INT *mask)
>> >  +{
>> >  +  int n;
>> >  +  if (can_be_rotated_to_positive_li (c, )
>> >  +  || can_be_rotated_to_negative_li (c, ))
>> >
>> > Why not
>> >
>> > /* Check if C or ~C can be rotated to a positive or negative value
>> > which 'li' instruction is able to load.  */
>> > if (can_be_rotated_to_lowbits (c, 15, )
>> > || can_be_rotated_to_lowbits (~c, 15, ))
>>
>>
>> Thanks a lot for your review!!
>>
>> Your suggestions could also achieve my goal of using a new function:
>> Using "can_be_rotated_to_positive_li" is just trying to get a
>> straightforward name.  Like yours, the code's comments would also
>> make it easy to understand.
>
> I recognize that you are trying to be consistent with the other
> functions that you add in later patches, but it feels like overkill in
Yes :)
> abstraction to me.  Or maybe combine postive_li and negative_li into a
> single function so that the abstraction serves a purpose other than a
> tail call and creating an alias for a specific invocation of
> can_be_rotated_to_lowbits.
Get it.

Thanks for your valuable suggestion!

BR,
Jeff (Jiufu Guo)

>
> Thanks, David
>
>>
>> BR,
>> Jeff (Jiufu Guo)
>> >
>> > ...
>> >
>> > This is a style of software engineering, but it seems overkill to me when 
>> > the function is a single line that tail calls another function.  Am I 
>> > missing
>> > something?
>> >
>> > The rest of this patch looks good.
>> >
>> > Thanks, David
>> >
>> >  +{
>> >  +  *mask = HOST_WIDE_INT_M1;
>> >  +  *shift = HOST_BITS_PER_WIDE_INT - n;
>> >  +  return true;
>> >  +}
>> >  +
>> >  +  return false;
>> >  +}
>> >  +
>> >   /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>> >  Output insns to set DEST equal to the constant C as a series of
>> >  lis, ori and shl instructions.  */
>> >  @@ -10266,15 +10308,14 @@ static void
>> >   rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>> >   {
>> > rtx temp;
>> >  +  int shift;
>> >  +  HOST_WIDE_INT mask;
>> > HOST_WIDE_INT ud1, ud2, ud3, ud4;
>> >
>> > 

Re: [PATCH v1] RISC-V: Bugfix for vec_init repeating auto vectorization in RV32

2023-06-13 Thread juzhe.zh...@rivai.ai
>> unsigned int elen = TARGET_VECTOR_ELEN_64 ? 64 : 32;
Add comment here to demonstrate why you pick up elen to set the LIMIT.
I understand:
1. -march=zve32* ===> ELEN = 32
-march=zve64* ===> ELEN = 64
2. both vmv.v.x/vmv.s.x is restrict to the ELEN
For example, When ELEN=32 (-march=zve32*)
vsetvli ...e64,m1
vmv.v.x/vmv.s.x
We can't support such code sequence.

You should demonstrate it clearly in the comments.

Otherwise, this patch LGTM.


juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-06-14 08:58
To: gcc-patches
CC: juzhe.zhong; rdapp.gcc; jeffreyalaw; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Bugfix for vec_init repeating auto vectorization in 
RV32
From: Pan Li 
 
This patch would like to fix one bug exported by RV32 test case
multiple_rgroup_run-2.c. The mask should be restricted by elen in
vector, and the condition between the vmv.s.x and the vmv.v.x should
take inner_bits_size rather than constants.
 
Passed both the rv32 and rv64 riscv/rvv tests.
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (rvv_builder::get_merge_scalar_mask):
Take elen instead of scalar BITS_PER_WORD.
(expand_vector_init_merge_repeating_sequence): Use inner_bits_size
instead of scaler BITS_PER_WORD.
---
gcc/config/riscv/riscv-v.cc | 10 ++
1 file changed, 6 insertions(+), 4 deletions(-)
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index fb970344521..9270e258ca3 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -399,10 +399,11 @@ rvv_builder::get_merge_scalar_mask (unsigned int 
index_in_pattern) const
{
   unsigned HOST_WIDE_INT mask = 0;
   unsigned HOST_WIDE_INT base_mask = (1ULL << index_in_pattern);
+  unsigned int elen = TARGET_VECTOR_ELEN_64 ? 64 : 32;
-  gcc_assert (BITS_PER_WORD % npatterns () == 0);
+  gcc_assert (elen % npatterns () == 0);
-  int limit = BITS_PER_WORD / npatterns ();
+  int limit = elen / npatterns ();
   for (int i = 0; i < limit; i++)
 mask |= base_mask << (i * npatterns ());
@@ -1923,7 +1924,7 @@ expand_vector_init_merge_repeating_sequence (rtx target,
   rtx mask = gen_reg_rtx (mask_mode);
   rtx dup = gen_reg_rtx (dup_mode);
-  if (full_nelts <= BITS_PER_WORD) /* vmv.s.x.  */
+  if (full_nelts <= builder.inner_bits_size ()) /* vmv.s.x.  */
{
  rtx ops[] = {dup, gen_scalar_move_mask (dup_mask_mode),
RVV_VUNDEF (dup_mode), merge_mask};
@@ -1933,7 +1934,8 @@ expand_vector_init_merge_repeating_sequence (rtx target,
   else /* vmv.v.x.  */
{
  rtx ops[] = {dup, force_reg (GET_MODE_INNER (dup_mode), merge_mask)};
-   rtx vl = gen_int_mode (CEIL (full_nelts, BITS_PER_WORD), Pmode);
+   rtx vl = gen_int_mode (CEIL (full_nelts, builder.inner_bits_size ()),
+ Pmode);
  emit_nonvlmax_integer_move_insn (code_for_pred_broadcast (dup_mode),
   ops, vl);
}
-- 
2.34.1
 
 


[PATCH v1] RISC-V: Bugfix for vec_init repeating auto vectorization in RV32

2023-06-13 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to fix one bug exported by RV32 test case
multiple_rgroup_run-2.c. The mask should be restricted by elen in
vector, and the condition between the vmv.s.x and the vmv.v.x should
take inner_bits_size rather than constants.

Passed both the rv32 and rv64 riscv/rvv tests.

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-v.cc (rvv_builder::get_merge_scalar_mask):
Take elen instead of scalar BITS_PER_WORD.
(expand_vector_init_merge_repeating_sequence): Use inner_bits_size
instead of scaler BITS_PER_WORD.
---
 gcc/config/riscv/riscv-v.cc | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index fb970344521..9270e258ca3 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -399,10 +399,11 @@ rvv_builder::get_merge_scalar_mask (unsigned int 
index_in_pattern) const
 {
   unsigned HOST_WIDE_INT mask = 0;
   unsigned HOST_WIDE_INT base_mask = (1ULL << index_in_pattern);
+  unsigned int elen = TARGET_VECTOR_ELEN_64 ? 64 : 32;
 
-  gcc_assert (BITS_PER_WORD % npatterns () == 0);
+  gcc_assert (elen % npatterns () == 0);
 
-  int limit = BITS_PER_WORD / npatterns ();
+  int limit = elen / npatterns ();
 
   for (int i = 0; i < limit; i++)
 mask |= base_mask << (i * npatterns ());
@@ -1923,7 +1924,7 @@ expand_vector_init_merge_repeating_sequence (rtx target,
   rtx mask = gen_reg_rtx (mask_mode);
   rtx dup = gen_reg_rtx (dup_mode);
 
-  if (full_nelts <= BITS_PER_WORD) /* vmv.s.x.  */
+  if (full_nelts <= builder.inner_bits_size ()) /* vmv.s.x.  */
{
  rtx ops[] = {dup, gen_scalar_move_mask (dup_mask_mode),
RVV_VUNDEF (dup_mode), merge_mask};
@@ -1933,7 +1934,8 @@ expand_vector_init_merge_repeating_sequence (rtx target,
   else /* vmv.v.x.  */
{
  rtx ops[] = {dup, force_reg (GET_MODE_INNER (dup_mode), merge_mask)};
- rtx vl = gen_int_mode (CEIL (full_nelts, BITS_PER_WORD), Pmode);
+ rtx vl = gen_int_mode (CEIL (full_nelts, builder.inner_bits_size ()),
+Pmode);
  emit_nonvlmax_integer_move_insn (code_for_pred_broadcast (dup_mode),
   ops, vl);
}
-- 
2.34.1



[PATCH] LoongArch: Set default alignment for functions and labels with -mtune

2023-06-13 Thread Xi Ruoyao via Gcc-patches
The LA464 micro-architecture is sensitive to alignment of code.  The
Loongson team has benchmarked various combinations of function, the
results [1] show that 16-byte label alignment together with 32-byte
function alignment gives best results in terms of SPEC score.

Add a mtune-based table-driven mechanism to set the default of
-falign-{functions,labels}.  As LA464 is the first (and the only for
now) uarch supported by GCC, the same setting is also used for
the "generic" -mtune=loongarch64.  In the future we may set different
settings for LA{2,3,6}64 once we add the support for them.

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?

gcc/ChangeLog:

* config/loongarch/loongarch-tune.h (loongarch_align): New
struct.
* config/loongarch/loongarch-def.h (loongarch_cpu_align): New
array.
* config/loongarch/loongarch-def.c (loongarch_cpu_align): Define
the array.
* config/loongarch/loongarch.cc
(loongarch_option_override_internal): Set the value of
-falign-functions= if -falign-functions is enabled but no value
is given.  Likewise for -falign-labels=.
---
 gcc/config/loongarch/loongarch-def.c  | 12 
 gcc/config/loongarch/loongarch-def.h  |  1 +
 gcc/config/loongarch/loongarch-tune.h |  8 
 gcc/config/loongarch/loongarch.cc |  6 ++
 4 files changed, 27 insertions(+)

diff --git a/gcc/config/loongarch/loongarch-def.c 
b/gcc/config/loongarch/loongarch-def.c
index fc4ebbefede..6729c857f7c 100644
--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
@@ -72,6 +72,18 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
   },
 };
 
+struct loongarch_align
+loongarch_cpu_align[N_TUNE_TYPES] = {
+  [CPU_LOONGARCH64] = {
+.function = "32",
+.label = "16",
+  },
+  [CPU_LA464] = {
+.function = "32",
+.label = "16",
+  },
+};
+
 /* The following properties cannot be looked up directly using "cpucfg".
  So it is necessary to provide a default value for "unknown native"
  tune targets (i.e. -mtune=native while PRID does not correspond to
diff --git a/gcc/config/loongarch/loongarch-def.h 
b/gcc/config/loongarch/loongarch-def.h
index 778b1409956..fb8bb88eb52 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -144,6 +144,7 @@ extern int loongarch_cpu_issue_rate[];
 extern int loongarch_cpu_multipass_dfa_lookahead[];
 
 extern struct loongarch_cache loongarch_cpu_cache[];
+extern struct loongarch_align loongarch_cpu_align[];
 extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[];
 
 #ifdef __cplusplus
diff --git a/gcc/config/loongarch/loongarch-tune.h 
b/gcc/config/loongarch/loongarch-tune.h
index ba31c4f08c3..5c03262daff 100644
--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
@@ -48,4 +48,12 @@ struct loongarch_cache {
 int simultaneous_prefetches; /* number of parallel prefetch */
 };
 
+/* Alignment for functions and labels for best performance.  For new uarchs
+   the value should be measured via benchmarking.  See the documentation for
+   -falign-functions and -falign-labels in invoke.texi for the format.  */
+struct loongarch_align {
+  const char *function;/* default value for -falign-functions */
+  const char *label;   /* default value for -falign-labels */
+};
+
 #endif /* LOONGARCH_TUNE_H */
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index eb73d11b869..5b8b93eb24b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -6249,6 +6249,12 @@ loongarch_option_override_internal (struct gcc_options 
*opts)
   && !opts->x_optimize_size)
 opts->x_flag_prefetch_loop_arrays = 1;
 
+  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+opts->x_str_align_functions = 
loongarch_cpu_align[LARCH_ACTUAL_TUNE].function;
+
+  if (opts->x_flag_align_labels && !opts->x_str_align_labels)
+opts->x_str_align_labels = loongarch_cpu_align[LARCH_ACTUAL_TUNE].label;
+
   if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
 error ("%qs cannot be used for compiling a shared library",
   "-mdirect-extern-access");
-- 
2.41.0



Re: [PATCH v3] i386: Allow -mlarge-data-threshold with -mcmodel=large

2023-06-13 Thread Fangrui Song via Gcc-patches
On Mon, Jun 12, 2023 at 11:16 PM Jan Beulich  wrote:

> On 13.06.2023 05:28, Fangrui Song wrote:
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/large-data.c
> > @@ -0,0 +1,13 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target lp64 } */
> > +/* { dg-options "-O2 -mcmodel=large -mlarge-data-threshold=4" } */
> > +/* { dg-final { scan-assembler ".lbss" } } */
> > +/* { dg-final { scan-assembler ".bss" } } */
> > +/* { dg-final { scan-assembler ".ldata" } } */
> > +/* { dg-final { scan-assembler ".data" } } */
> > +/* { dg-final { scan-assembler ".lrodata" } } */
> > +/* { dg-final { scan-assembler ".rodata" } } */
>
> Aren't these regex-es, and hence the dots all need escaping or enclosing
> in square brackets?
>
> Jan
>

Good catch! I am not familiar with dg-* directives... I can send a v4, but
I'd like to know whether there are other comments.
(I don't have git write permission for gcc.)


-- 
宋方睿


[pushed] c/c++: use positive tone in missing header notes [PR84890]

2023-06-13 Thread David Malcolm via Gcc-patches
Quoting "How a computer should talk to people" (as quoted
in "Concepts Error Messages for Humans"):

"Various negative tones or actions are unfriendly: being manipulative,
not giving a second chance, talking down, using fashionable slang,
blaming. We must not seem to blame the person. We should avoid suggesting
that the person is inadequate. Phrases like "you forgot" may seem
harmless, but what if a computer said this to you four or five times in
two minutes? Anyway, the person may disagree, so why risk offense?"

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-1798-g7474c46cf2d371.

gcc/c-family/ChangeLog:
PR c/84890
* known-headers.cc
(suggest_missing_header::~suggest_missing_header): Reword note to
avoid negative tone of "forgetting".

gcc/cp/ChangeLog:
PR c/84890
* name-lookup.cc (missing_std_header::~missing_std_header): Reword
note to avoid negative tone of "forgetting".

gcc/testsuite/ChangeLog:
PR c/84890
* g++.dg/cpp2a/srcloc3.C: Update expected message.
* g++.dg/lookup/missing-std-include-2.C: Likewise.
* g++.dg/lookup/missing-std-include-3.C: Likewise.
* g++.dg/lookup/missing-std-include-6.C: Likewise.
* g++.dg/lookup/missing-std-include.C: Likewise.
* g++.dg/spellcheck-inttypes.C: Likewise.
* g++.dg/spellcheck-stdint.C: Likewise.
* g++.dg/spellcheck-stdlib.C: Likewise.
* gcc.dg/spellcheck-inttypes.c: Likewise.
* gcc.dg/spellcheck-stdbool.c: Likewise.
* gcc.dg/spellcheck-stdint.c: Likewise.
* gcc.dg/spellcheck-stdlib.c: Likewise.
---
 gcc/c-family/known-headers.cc |  2 +-
 gcc/cp/name-lookup.cc |  2 +-
 gcc/testsuite/g++.dg/cpp2a/srcloc3.C  |  2 +-
 .../g++.dg/lookup/missing-std-include-2.C |  8 +--
 .../g++.dg/lookup/missing-std-include-3.C |  2 +-
 .../g++.dg/lookup/missing-std-include-6.C |  4 +-
 .../g++.dg/lookup/missing-std-include.C   | 16 +++---
 gcc/testsuite/g++.dg/spellcheck-inttypes.C| 54 +--
 gcc/testsuite/g++.dg/spellcheck-stdint.C  | 40 +++---
 gcc/testsuite/g++.dg/spellcheck-stdlib.C  | 28 +-
 gcc/testsuite/gcc.dg/spellcheck-inttypes.c| 52 +-
 gcc/testsuite/gcc.dg/spellcheck-stdbool.c |  6 +--
 gcc/testsuite/gcc.dg/spellcheck-stdint.c  | 40 +++---
 gcc/testsuite/gcc.dg/spellcheck-stdlib.c  | 34 ++--
 14 files changed, 145 insertions(+), 145 deletions(-)

diff --git a/gcc/c-family/known-headers.cc b/gcc/c-family/known-headers.cc
index de92cfd6f3c..3484c867ca0 100644
--- a/gcc/c-family/known-headers.cc
+++ b/gcc/c-family/known-headers.cc
@@ -320,6 +320,6 @@ suggest_missing_header::~suggest_missing_header ()
   maybe_add_include_fixit (, m_header_hint, true);
   inform (,
  "%qs is defined in header %qs;"
- " did you forget to %<#include %s%>?",
+ " this is probably fixable by adding %<#include %s%>",
  m_name_str, m_header_hint, m_header_hint);
 }
diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc
index eb5c333b5ea..6ac58a35b56 100644
--- a/gcc/cp/name-lookup.cc
+++ b/gcc/cp/name-lookup.cc
@@ -6760,7 +6760,7 @@ class missing_std_header : public deferred_diagnostic
maybe_add_include_fixit (, header, true);
inform (,
"% is defined in header %qs;"
-   " did you forget to %<#include %s%>?",
+   " this is probably fixable by adding %<#include %s%>",
m_name_str, header, header);
   }
 else
diff --git a/gcc/testsuite/g++.dg/cpp2a/srcloc3.C 
b/gcc/testsuite/g++.dg/cpp2a/srcloc3.C
index 324e03cd548..c843e07fd4f 100644
--- a/gcc/testsuite/g++.dg/cpp2a/srcloc3.C
+++ b/gcc/testsuite/g++.dg/cpp2a/srcloc3.C
@@ -1,5 +1,5 @@
 // { dg-do compile { target c++20 } }
 
 auto x = __builtin_source_location (); // { dg-error "'source_location' is not 
a member of 'std'" }
-// { dg-message "std::source_location' is defined in header 
''; did you forget to '#include '" "" { 
target *-*-* } .-1 }
+// { dg-message "std::source_location' is defined in header 
''; this is probably fixable by adding '#include 
'" "" { target *-*-* } .-1 }
 // { dg-message "using '__builtin_source_location'" "" { target *-*-* } .-2 }
diff --git a/gcc/testsuite/g++.dg/lookup/missing-std-include-2.C 
b/gcc/testsuite/g++.dg/lookup/missing-std-include-2.C
index 51c604a9f1e..fa1ec0b0e66 100644
--- a/gcc/testsuite/g++.dg/lookup/missing-std-include-2.C
+++ b/gcc/testsuite/g++.dg/lookup/missing-std-include-2.C
@@ -16,10 +16,10 @@ namespace std
 void test (void)
 {
   std::string s ("hello world"); // { dg-error ".string. is not a member of 
.std." }
-  // { dg-message ".std::string. is defined in header ..; did you 
forget to .#include .?" "" { target *-*-* } .-1 }
+  // { dg-message ".std::string. is defined in header ..; this is 
probably fixable by adding .#include ." 

Fix templated conversion operator demangling

2023-06-13 Thread Nathan Sidwell via Gcc-patches
I came across this when working on the conversion operator deduction fix.  We'd 
successfully demangle an instantiation of 'template operator X & 
()', but fail for 'template operator X ()'.  The demangle printer 
was trying to specially handle the instantiation in the latter case -- seeing 
the template inst of X. That code appears to be completely unnecessary.  Added a 
bunch of conversion operator demangling tests.



nathan
--
Nathan SidwellFrom 5a897036187468d4ded330b90b2abdaff5061ed6 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell 
Date: Mon, 12 Jun 2023 19:37:04 -0400
Subject: [PATCH] c++: Fix templated convertion operator demangling

Instantiations of templated conversion operators failed to demangle
for cases such as 'operator X', but worked for 'operator X
&', due to thinking the template instantiation of X was the
instantiation of the conversion operator itself.

	libiberty/
	* cp-demangle.c (d_print_conversion): Remove incorrect
	template instantiation handling.
	* testsuite/demangle-expected: Add testcases.
---
 libiberty/cp-demangle.c   | 28 +++
 libiberty/testsuite/demangle-expected | 27 ++
 2 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/libiberty/cp-demangle.c b/libiberty/cp-demangle.c
index 18ab28fd028..3bd303a7544 100644
--- a/libiberty/cp-demangle.c
+++ b/libiberty/cp-demangle.c
@@ -6660,32 +6660,10 @@ d_print_conversion (struct d_print_info *dpi, int options,
   dpt.template_decl = dpi->current_template;
 }
 
-  if (d_left (dc)->type != DEMANGLE_COMPONENT_TEMPLATE)
-{
-  d_print_comp (dpi, options, d_left (dc));
-  if (dpi->current_template != NULL)
-	dpi->templates = dpt.next;
-}
-  else
-{
-  d_print_comp (dpi, options, d_left (d_left (dc)));
+  d_print_comp (dpi, options, d_left (dc));
 
-  /* For a templated cast operator, we need to remove the template
-	 parameters from scope after printing the operator name,
-	 so we need to handle the template printing here.  */
-  if (dpi->current_template != NULL)
-	dpi->templates = dpt.next;
-
-  if (d_last_char (dpi) == '<')
-	d_append_char (dpi, ' ');
-  d_append_char (dpi, '<');
-  d_print_comp (dpi, options, d_right (d_left (dc)));
-  /* Avoid generating two consecutive '>' characters, to avoid
-	 the C++ syntactic ambiguity.  */
-  if (d_last_char (dpi) == '>')
-	d_append_char (dpi, ' ');
-  d_append_char (dpi, '>');
-}
+  if (dpi->current_template != NULL)
+dpi->templates = dpt.next;
 }
 
 /* Initialize the information structure we use to pass around
diff --git a/libiberty/testsuite/demangle-expected b/libiberty/testsuite/demangle-expected
index 52dff883a18..0acd2d635db 100644
--- a/libiberty/testsuite/demangle-expected
+++ b/libiberty/testsuite/demangle-expected
@@ -1662,3 +1662,30 @@ X::F()::{lambda(int)#1}::operator()(int) const
 
 _Z1fIiEv1AIXnxtlT_EEE
 void f(A)
+
+_ZNO1Ycv1XEv
+Y::operator X() &&
+
+_ZNO1Ycv1XIT_EIvEEv
+Y::operator X() &&
+
+_ZNO1Y3bobEv
+Y::bob() &&
+
+_ZNR1Y3bobEv
+Y::bob() &
+
+_ZNKR1YcvRK1XIT_EIvEEv
+Y::operator X const&() const &
+
+_ZZN1XIiEcviEvE1y
+X::operator int()::y
+
+_ZZN1XIiEcv1ZIiEEvE1y
+X::operator Z()::y
+
+_ZZN1Xcv1ZIT_EIiEEvE1y
+X::operator Z()::y
+
+_ZZN1XIfEcv1ZIT_EIiEEvE1y
+X::operator Z()::y
-- 
2.40.1



Re: [PATCH] Fortran: fix passing of zero-sized array arguments to procedures [PR86277]

2023-06-13 Thread Harald Anlauf via Gcc-patches

Hi Steve,

On 6/13/23 19:45, Steve Kargl via Gcc-patches wrote:

On Mon, Jun 12, 2023 at 11:12:45PM +0200, Harald Anlauf via Fortran wrote:

Dear all,

the attached - actually rather small - patch is the result of a
rather intensive session with Mikael in an attempt to fix the
situation that we did not create proper temporaries when passing
zero-sized array arguments to procedures.  When the dummy argument
was declared as OPTIONAL, in many cases it was mis-detected as
non-present.  This also depended on the type of argument, and
was different for different intrinsic types, notably character,
and derived types, and should explain the rather large ratio of
the size of the provided testcases to the actual fix...

(What the patch does not address: we still generate too much code
for unneeded temporaries, often two temporaries instead of just
one.  I'll open a separate PR to track this.)

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

If this survives long enough on 14-trunk, would this be eligible
for a backport to 13-branch in time for 13.2?



OK to commit.

I've reviewed the bugzilla exchange between Mikael and you,
and agree with committing this and opening a new PR to
track the unneeded temporaries issue.


this is tracked here:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110241

Thanks for the review!

Harald



Re: [PATCH] Fix note_defect3 function

2023-06-13 Thread Akari Takahashi via Gcc-patches
Hi Jeff,

Thank you for your response. Regarding the divtab.cc file, I actually came
across it by accident while working on another task. I didn't have a
specific reason for investigating the file, but I noticed the issue and
thought it was worth bringing to your attention.

Thank you for taking care of this issue.

Best Regards,

Takahashi Akari

On Tue, Jun 13, 2023 at 10:50 PM Jeff Law  wrote:

>
>
> On 6/12/23 21:18, Akari Takahashi via Gcc-patches wrote:
> > Hello,
> >
> > I've noticed an issue with the note_defect3 function and have prepared a
> > patch to fix it. The function is missing a return statement, which is
> > causing undefined behavior. This patch adds the missing return statement,
> > ensuring that the function returns the correct value.
> >
> > Please consider applying this patch to correct the issue. Thank you.
> >
> > Best regards,
> >
> > Takahashi Akari
> Thanks.  Reviewing that file it looks like it was just supposed to be
> used for the sh5/sh5media processor.  My recollection is those were
> never actually produced and support for them was removed from GCC some
> time ago.  This file was missed during that removal.
>
> Rather than apply the patch, I think the better choice is to just remove
> the divtab.cc file unless someone has a real use for it.
>
> Is there a particular reason you were investigating the divtab.cc file?
>
> jeff
>


Re: [PATCH] rs6000: replace '(const_int 0)' to 'unspec:BLK [(const_int 0)]' for stack_tie

2023-06-13 Thread David Edelsohn via Gcc-patches
On Tue, Jun 13, 2023 at 2:16 PM Segher Boessenkool
 wrote:
>
> Hi!
>
> On Tue, Jun 13, 2023 at 10:15:49AM +0800, Jiufu Guo wrote:
> > David Edelsohn  writes:
> > >
> > > This definitely seems to be a better solution.
> > >
> > > The TARGET_CONST_ANCHOR change should not be part of this patch.  Also
> > > there is no ChangeLog for the patch.
> >
> > Thanks a lot for your quick review!! And sorry for the sending this patch
> > in a hurry.  I would update the patch accordingly.
>
> > > This generally looks correct and consistent with other ports. I want
> > > to give Segher a chance to double check it, if he wishes.
>
> The documentation is very clear that the only thing for which you can
> have BLKmode is "mem".  Not unspec, only "mem".
>
> Let's not do this.  The existing code has clear and obvious semantics,
> which is documented as well -- there is no reason to make it worse in
> every respect.

Segher,

Unfortunately, GCC now is inconsistent and this response is incorrect.
The documentation is out of date or was ignored and the "facts on the
ground" contradict your review.

Yes, (const_int 0) is supposed to be a general no-op and BLKmode only
is supposed to be used for MEM, but other major targets (arm, aarch64,
riscv, s390) all use unspec:BLK and specifically UNSPEC_TIE.  rs6000
is the only port that does not follow this convention.  The middle-end
has adapted to the behavior of all of the other targets, whether that
conformed to the documentation or not.  The rs6000 port needs to be
fixed and Jiufu's approach is the correct one, consistent with all
other targets for stack tie.  If the documentation differs, the
documentation needs to be updated, not a different approach for the
rs6000 port.  Jiufu's patch is correct.

Thanks, David


Re: [PATCH] RISC-V: Save and restore FCSR in interrupt functions to avoid program errors.

2023-06-13 Thread Palmer Dabbelt

On Tue, 13 Jun 2023 10:41:00 PDT (-0700), gcc-patches@gcc.gnu.org wrote:



On 6/13/23 00:41, Jin Ma wrote:

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_compute_frame_info): Allocate frame for 
FCSR.
(riscv_for_each_saved_reg): Save and restore FCSR in interrupt 
functions.
* config/riscv/riscv.md (riscv_frcsr): New patterns.
(riscv_fscsr): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/interrupt-fcsr-1.c: New test.
* gcc.target/riscv/interrupt-fcsr-2.c: New test.
* gcc.target/riscv/interrupt-fcsr-3.c: New test.

Looks pretty good.  Just a couple minor updates and I think we can push
this to the trunk.


We should update the C API doc as well, it's a bit vague as to whether 
the CSRs are saved: it just says the any used registers are saved, it's 
not clear if registers includes CSRs.


Unless I'm missing something, we also need to save/restore the V CSRs in 
interrupt functions as well?  They're treated the same way in the C API 
doc, so applying the same logic seems reasonable -- I'm not sure we 
really want to save/restore something like vstart, though...


I opened a PR for the API doc: 
https://github.com/riscv-non-isa/riscv-c-api-doc/pull/42



diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index de30bf4e567..4ef9692b4db 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4990,7 +4990,8 @@ riscv_compute_frame_info (void)
if (cfun->machine->interrupt_handler_p)
  {
HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
-  if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)))
+  if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))
+ || TARGET_HARD_FLOAT)
interrupt_save_prologue_temp = true;
  }

There's a comment before this IF block indicating when we need to save
the prologue temporary register (specifically in interrupt functions
with large frames).  That comment needs to be updated so that it
mentions interrupt functions on TARGET_HARD_FLOAT.


I think we're also missing Zfinx here: there's no F registers to save, 
but we should still have the same side effects visible in the CSRs.







@@ -5282,6 +5290,29 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, 
riscv_save_restore_fn fn,
}
}

+  if (regno == RISCV_PROLOGUE_TEMP_REGNUM
+ && TARGET_HARD_FLOAT
+ && cfun->machine->interrupt_handler_p
+ && cfun->machine->frame.fmask)
+   {
+ unsigned int fcsr_size = GET_MODE_SIZE (SImode);
+ if (!epilogue)
+   {
+ riscv_save_restore_reg (word_mode, regno, offset, fn);
+ offset -= fcsr_size;
+ emit_insn (gen_riscv_frcsr (gen_rtx_REG (SImode, 
RISCV_PROLOGUE_TEMP_REGNUM)));
+ riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM, 
offset, riscv_save_reg);
+   }
+ else
+   {
+ riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM, 
offset - fcsr_size, riscv_restore_reg);
+ emit_insn (gen_riscv_fscsr (gen_rtx_REG (SImode, 
RISCV_PROLOGUE_TEMP_REGNUM)));
+ riscv_save_restore_reg (word_mode, regno, offset, fn);
+ offset -= fcsr_size;
+   }
+ continue;
+   }

Note there is a macro RISCV_PROLOGUE_TEMP(MODE) which will create the
REG expression for the prologue temporary in the given mode.  That way
you don't have to call gen_rtx_REG directly here.

Jeff


This got snipped, but the tests should only check for the CSR 
save/restore on F/D systems (from looking at them they'd fail on soft 
float targets).


[wwwdocs] gcc-14/changes.html + projects/gomp/: GCC 14 OpenMP update

2023-06-13 Thread Tobias Burnus

First update for OpenMP changes that made it into GCC 14.

Wording, technical and other comments are welcome as always.

I intent to commit the attached patch tomorrow.

Tobias

PS: There were a bunch of other useful changes, but those "only" improved
and fixed features already supported or added parsing-only support. Thus,
while the former is important for users, it is not for the release notes.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
gcc-14/changes.html + projects/gomp/: GCC 14 OpenMP update

 htdocs/gcc-14/changes.html  | 15 +++
 htdocs/projects/gomp/index.html | 10 ++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 55d566b8..c403c94f 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -37,6 +37,21 @@ a work-in-progress.
 
 General Improvements
 
+
+  https://gcc.gnu.org/projects/gomp/;>OpenMP
+  
+
+  The requires directive's unified_address
+  requirement is now fulfilled by both AMD GCN and nvptx devices.
+
+
+  OpenMP 5.2: The OMP_TARGET_OFFLOAD=mandatory handling has
+  been updated for the clarifications and changes of the 5.2 specification.
+  For Fortran, the list of directives permitted in Fortran pure procedures
+  was extended.
+
+  
+
 
 New Languages and Language specific improvements
 
diff --git a/htdocs/projects/gomp/index.html b/htdocs/projects/gomp/index.html
index 328d17bd..55066f68 100644
--- a/htdocs/projects/gomp/index.html
+++ b/htdocs/projects/gomp/index.html
@@ -142,7 +142,7 @@ filing a bug report.
 
 Implementation status in libgomp manual:
 https://gcc.gnu.org/onlinedocs/libgomp/OpenMP-Implementation-Status.html;
->Mainline (GCC 13),
+>Mainline (GCC 14),
 https://gcc.gnu.org/onlinedocs/gcc-13.1.0/libgomp/OpenMP-Implementation-Status.html;
 >GCC 13,
 https://gcc.gnu.org/onlinedocs/gcc-12.1.0/libgomp/OpenMP-Implementation-Status.html;
@@ -313,12 +313,14 @@ than listed, depending on resolved corner cases and optimizations.
   GCC9
   GCC12
   GCC13
+  GCC14
 
 
   (atomic_default_mem_order)
   (dynamic_allocators)
   complete but no non-host devices provides unified_address or
-  unified_shared_memory
+  unified_shared_memory
+  complete but no non-host devices provides unified_shared_memory
 
   
   
@@ -836,7 +838,7 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 Extended list of directives permitted in Fortran pure procedures
-No
+GCC14
 
   
   
@@ -926,7 +928,7 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 Initial value of default-device-var ICV with OMP_TARGET_OFFLOAD=mandatory
-No
+GCC14
 
   
   


[patch] OpenMP: Set default-device-var with OMP_TARGET_OFFLOAD=mandatory

2023-06-13 Thread Tobias Burnus

I intent to commit this tomorrow, unless there are comments.

It does as it says (see commit log): It initializes default-device-var
to the value using the algorithm described in OpenMP 5.2, which
depends on whether OMP_TARGET_OFFLOAD=mandatory was set.

NOTE: With -foffload=disable there is no binary code but still
devices get found - such that default-device-var == 0 (= first
nonhost device). Thus, in that case, libgomp runs the code on that
device but as no binary data is available, host fallback is used.
(Even if there would be executable code for another device on
the system.)
With mandatory, this unintended host fallback is detected and an
error is diagnosed. One can argue whether keeping the devices
makes sense (e.g. because in a dynamic library device code will
be loaded later) or not (don't list if no code is available).

Note that TR11 (future OpenMP 6.0) extends OMP_DEFAULT_DEVICE and
adds OMP_AVAILABLE_DEVICES which permit a finer-grained control about
the device, including OMP_DEFAULT_DEVICE=initial (and 'invalid') which
the current scheme does not permit. (Well, there is
OMP_TARGET_OFFLOAD=disabled, but that's a too big hammer.)

Tobias

PS:  DejaGNU testing was done without offloading configured
and with remote testing on a system having an offload device,
which which does not support setting environment variables.
Manual testing was done with offloading enabled and depending
on the testcase, running on a system with and/or without offloading
hardware.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP: Set default-device-var with OMP_TARGET_OFFLOAD=mandatory

OMP_TARGET_OFFLOAD=mandatory handling was before inconsistent. Hence, in
OpenMP 5.2 it was clarified/extended by having implications on the
default-device-var; additionally, omp_initial_device and omp_invalid_device
enum values/PARAMETERs were added; support for it was added
in r13-1066-g1158fe43407568 including aborting for omp_invalid_device and
non-conforming device numbers. Only the mandatory handling was missing.

Namely, while the default-device-var is usually initialized to value 0,
with 'mandatory' it must have the value 'omp_invalid_device' if and only if
zero non-host devices are available. (The OMP_DEFAULT_DEVICE env var
overrides this as it comes semantically after the initialization.)

To achieve this, default-device-var is now initialized to MIN_INT. If
there is no 'mandatory', it is set to 0 directly after env var parsing.
Otherwise, it is updated in gomp_target_init to either 0 or
omp_invalid_device. To ensure INT_MIN is never seen by the user, both
the omp_get_default_device API routine and omp_display_env (user call
and OMP_DISPLAY_ENV env var) call gomp_init_targets_once() in that case.

libgomp/ChangeLog:

	* env.c (gomp_default_icv_values): Init default_device_var to
	an nonconforming value - INT_MIN.
	(initialize_env): After env-var parsing, set default_device_var to
	device 0 unless OMP_TARGET_OFFLOAD=mandatory.
	(omp_display_env): If default_device_var is INT_MIN, call
	gomp_init_targets_once.
	* icv-device.c (omp_get_default_device): Likewise.
	* libgomp.texi (OMP_DEFAULT_DEVICE): Update init description.
	(OpenMP 5.2 Impl. Status): Mark OMP_TARGET_OFFLOAD=mandatory as 'Y'.
	* target.c (resolve_device): Improve error message device-num < 0
	with 'mandatory' and no no-host devices available.
	(gomp_target_init): Set default-device-var if INT_MIN.
	* testsuite/libgomp.c/target-48.c: New test.
	* testsuite/libgomp.c/target-49.c: New test.
	* testsuite/libgomp.c/target-50.c: New test.
	* testsuite/libgomp.c/target-51.c: New test.
	* testsuite/libgomp.c/target-52.c: New test.
	* testsuite/libgomp.c/target-53.c: New test.
	* testsuite/libgomp.c/target-54.c: New test.

 libgomp/env.c| 13 --
 libgomp/icv-device.c |  4 +++
 libgomp/libgomp.texi |  4 ++-
 libgomp/target.c | 15 ++-
 libgomp/testsuite/libgomp.c/target-48.c  | 31 +++
 libgomp/testsuite/libgomp.c/target-49.c  | 18 +
 libgomp/testsuite/libgomp.c/target-50.c  | 27 
 libgomp/testsuite/libgomp.c/target-50a.c | 43 
 libgomp/testsuite/libgomp.c/target-51.c  | 24 ++
 libgomp/testsuite/libgomp.c/target-52.c  | 25 +++
 libgomp/testsuite/libgomp.c/target-53.c  | 22 
 libgomp/testsuite/libgomp.c/target-54.c  | 20 +++
 12 files changed, 242 insertions(+), 4 deletions(-)

diff --git a/libgomp/env.c b/libgomp/env.c
index e7a035b593c..25c0211dda1 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -62,13 +62,14 @@
 #include "secure_getenv.h"
 #include "environ.h"
 
-/* Default values of ICVs according to the 

Re: [PATCH] rs6000: replace '(const_int 0)' to 'unspec:BLK [(const_int 0)]' for stack_tie

2023-06-13 Thread Segher Boessenkool
Hi!

As I said in a reply to the original patch: not okay.  Sorry.

But some comments on this patch:

On Tue, Jun 13, 2023 at 08:23:35PM +0800, Jiufu Guo wrote:
> +   && XINT (SET_SRC (set), 1) == UNSPEC_TIE
> +   && XVECEXP (SET_SRC (set), 0, 0) == const0_rtx);

This makes it required that the operand of an UNSPEC_TIE unspec is a
const_int 0.  This should be documented somewhere.  Ideally you would
want no operand at all here, but every unspec has an operand.

> +  RTVEC_ELT (p, i)
> + = gen_rtx_SET (mem, gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, const0_rtx),
> + UNSPEC_TIE));

If it is hard to indent your code, your code is trying to do to much.
Just have an extra temporary?

  rtx un = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, const0_rtx), UNSPEC_TIE);
  RTVEC_ELT (p, i) = gen_rtx_SET (mem, un);

That is shorter even, and certainly more readable :-)

> @@ -10828,7 +10829,9 @@ (define_expand "restore_stack_block"
>operands[4] = gen_frame_mem (Pmode, operands[1]);
>p = rtvec_alloc (1);
>RTVEC_ELT (p, 0) = gen_rtx_SET (gen_frame_mem (BLKmode, operands[0]),
> -   const0_rtx);
> +   gen_rtx_UNSPEC (BLKmode,
> +   gen_rtvec (1, const0_rtx),
> +   UNSPEC_TIE));
>operands[5] = gen_rtx_PARALLEL (VOIDmode, p);

I have a hard time to see how this could ever be seen as clearer or more
obvious or anything like that :-(


Segher


Re: [PATCH] rs6000: replace '(const_int 0)' to 'unspec:BLK [(const_int 0)]' for stack_tie

2023-06-13 Thread Segher Boessenkool
Hi!

On Tue, Jun 13, 2023 at 10:15:49AM +0800, Jiufu Guo wrote:
> David Edelsohn  writes:
> >
> > This definitely seems to be a better solution.
> >
> > The TARGET_CONST_ANCHOR change should not be part of this patch.  Also
> > there is no ChangeLog for the patch.
> 
> Thanks a lot for your quick review!! And sorry for the sending this patch
> in a hurry.  I would update the patch accordingly.

> > This generally looks correct and consistent with other ports. I want
> > to give Segher a chance to double check it, if he wishes.

The documentation is very clear that the only thing for which you can
have BLKmode is "mem".  Not unspec, only "mem".

Let's not do this.  The existing code has clear and obvious semantics,
which is documented as well -- there is no reason to make it worse in
every respect!


Segher


[commited] Remove a couple mudflap remnants

2023-06-13 Thread Jeff Law via Gcc-patches


I happened to be digging into the specs to understand a build failure 
and spotted mflib and mfwrap.  Those were used by the mudflap system 
which we ripped out years ago and we just missed these.


I verified x86 still bootstraps after removing these bits.

Pushed to the trunk as obvious,

Jeff
commit b15d46e8057bf58b5e021011ee6e0c07d6cdf712
Author: Jeff Law 
Date:   Tue Jun 13 11:46:32 2023 -0600

Remove a couple mudflap remnants

I happened to be digging into the specs to understand a build
failure and spotted mflib and mfwrap.  Those were used by the
mudflap system which we ripped out years ago and we just missed
these.

I verified x86 still bootstraps after removing these bits.

Pushed to the trunk as obvious,
gcc/
* gcc.cc (LINK_COMMAND_SPEC): Remove mudflap spec handling.

diff --git a/gcc/gcc.cc b/gcc/gcc.cc
index 2ccca00d603..38155f8fa4b 100644
--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
@@ -1146,12 +1146,12 @@ proper position among the other output files.  */
"%{fuse-ld=*:-fuse-ld=%*} " LINK_COMPRESS_DEBUG_SPEC \
"%X %{o*} %{e*} %{N} %{n} %{r}\
 %{s} %{t} %{u*} %{z} %{Z} %{!nostdlib:%{!r:%{!nostartfiles:%S}}} \
-%{static|no-pie|static-pie:} %@{L*} %(mfwrap) %(link_libgcc) " \
+%{static|no-pie|static-pie:} %@{L*} %(link_libgcc) " \
 VTABLE_VERIFICATION_SPEC " " SANITIZER_EARLY_SPEC " %o "" \
 %{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*:%*} 1):\
%:include(libgomp.spec)%(link_gomp)}\
 %{fgnu-tm:%:include(libitm.spec)%(link_itm)}\
-%(mflib) " STACK_SPLIT_SPEC "\
+" STACK_SPLIT_SPEC "\
 %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} " SANITIZER_SPEC " \
 %{!nostdlib:%{!r:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}}\
 %{!nostdlib:%{!r:%{!nostartfiles:%E}}} %{T*}  \n%(post_link) }}"


[r14-1712 Regression] FAIL: gcc.target/i386/sse2-packuswb-1.c execution test on Linux/x86_64

2023-06-13 Thread haochen.jiang via Gcc-patches
On Linux/x86_64,

921b841350c4fc298d09f6c5674663e0f4208610 is the first bad commit
commit 921b841350c4fc298d09f6c5674663e0f4208610
Author: Kyrylo Tkachov 
Date:   Mon Jun 12 11:42:29 2023 +0100

simplify-rtx: Implement constant folding of SS_TRUNCATE, US_TRUNCATE

caused

FAIL: gcc.target/i386/avx2-vpackssdw-2.c execution test
FAIL: gcc.target/i386/avx2-vpackusdw-2.c execution test
FAIL: gcc.target/i386/avx2-vpackuswb-2.c execution test
FAIL: gcc.target/i386/avx-vpackuswb-1.c execution test
FAIL: gcc.target/i386/sse2-packuswb-1.c execution test

with GCC configured with

../../gcc/configure 
--prefix=/export/users/haochenj/src/gcc-bisect/master/master/r14-1712/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackssdw-2.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackssdw-2.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackssdw-2.c 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackssdw-2.c 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackusdw-2.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackusdw-2.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackusdw-2.c 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackusdw-2.c 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackuswb-2.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackuswb-2.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackuswb-2.c 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx2-vpackuswb-2.c 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx-vpackuswb-1.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx-vpackuswb-1.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx-vpackuswb-1.c 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx-vpackuswb-1.c 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/sse2-packuswb-1.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/sse2-packuswb-1.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/sse2-packuswb-1.c 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/sse2-packuswb-1.c 
--target_board='unix{-m64\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at haochen dot jiang at intel.com)


Re: [PATCH] Fortran: fix passing of zero-sized array arguments to procedures [PR86277]

2023-06-13 Thread Steve Kargl via Gcc-patches
On Mon, Jun 12, 2023 at 11:12:45PM +0200, Harald Anlauf via Fortran wrote:
> Dear all,
> 
> the attached - actually rather small - patch is the result of a
> rather intensive session with Mikael in an attempt to fix the
> situation that we did not create proper temporaries when passing
> zero-sized array arguments to procedures.  When the dummy argument
> was declared as OPTIONAL, in many cases it was mis-detected as
> non-present.  This also depended on the type of argument, and
> was different for different intrinsic types, notably character,
> and derived types, and should explain the rather large ratio of
> the size of the provided testcases to the actual fix...
> 
> (What the patch does not address: we still generate too much code
> for unneeded temporaries, often two temporaries instead of just
> one.  I'll open a separate PR to track this.)
> 
> Regtested on x86_64-pc-linux-gnu.  OK for mainline?
> 
> If this survives long enough on 14-trunk, would this be eligible
> for a backport to 13-branch in time for 13.2?
> 

OK to commit.

I've reviewed the bugzilla exchange between Mikael and you,
and agree with committing this and opening a new PR to 
track the unneeded temporaries issue.

-- 
Steve


Re: [PATCH] RISC-V: Save and restore FCSR in interrupt functions to avoid program errors.

2023-06-13 Thread Jeff Law via Gcc-patches




On 6/13/23 00:41, Jin Ma wrote:

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_compute_frame_info): Allocate frame for 
FCSR.
(riscv_for_each_saved_reg): Save and restore FCSR in interrupt 
functions.
* config/riscv/riscv.md (riscv_frcsr): New patterns.
(riscv_fscsr): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/interrupt-fcsr-1.c: New test.
* gcc.target/riscv/interrupt-fcsr-2.c: New test.
* gcc.target/riscv/interrupt-fcsr-3.c: New test.
Looks pretty good.  Just a couple minor updates and I think we can push 
this to the trunk.





diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index de30bf4e567..4ef9692b4db 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4990,7 +4990,8 @@ riscv_compute_frame_info (void)
if (cfun->machine->interrupt_handler_p)
  {
HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
-  if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)))
+  if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))
+ || TARGET_HARD_FLOAT)
interrupt_save_prologue_temp = true;
  }
There's a comment before this IF block indicating when we need to save 
the prologue temporary register (specifically in interrupt functions 
with large frames).  That comment needs to be updated so that it 
mentions interrupt functions on TARGET_HARD_FLOAT.





@@ -5282,6 +5290,29 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, 
riscv_save_restore_fn fn,
}
}
  
+  if (regno == RISCV_PROLOGUE_TEMP_REGNUM

+ && TARGET_HARD_FLOAT
+ && cfun->machine->interrupt_handler_p
+ && cfun->machine->frame.fmask)
+   {
+ unsigned int fcsr_size = GET_MODE_SIZE (SImode);
+ if (!epilogue)
+   {
+ riscv_save_restore_reg (word_mode, regno, offset, fn);
+ offset -= fcsr_size;
+ emit_insn (gen_riscv_frcsr (gen_rtx_REG (SImode, 
RISCV_PROLOGUE_TEMP_REGNUM)));
+ riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM, 
offset, riscv_save_reg);
+   }
+ else
+   {
+ riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM, 
offset - fcsr_size, riscv_restore_reg);
+ emit_insn (gen_riscv_fscsr (gen_rtx_REG (SImode, 
RISCV_PROLOGUE_TEMP_REGNUM)));
+ riscv_save_restore_reg (word_mode, regno, offset, fn);
+ offset -= fcsr_size;
+   }
+ continue;
+   }
Note there is a macro RISCV_PROLOGUE_TEMP(MODE) which will create the 
REG expression for the prologue temporary in the given mode.  That way 
you don't have to call gen_rtx_REG directly here.


Jeff


[committed] Remove sh5media divtab code

2023-06-13 Thread Jeff Law via Gcc-patches


Spurred by Akari Takahashi's patch to config/sh/divtab.cc, this removes 
divtab.cc completely.


divtab.cc was used to calculate a division table for the sh5 media 
processor.  GCC dropped support for that (unmanufactured) chip back in 
2016 and this file simply got missed AFAICT.


Pushed to the trunk,
Jeffcommit 99dec205a12f7dfa39fd43fc7c36d9974aaec526
Author: Jeff Law 
Date:   Tue Jun 13 11:10:21 2023 -0600

Remove sh5media divtab code

Spurred by Akari Takahashi's patch to config/sh/divtab.cc, this removes
divtab.cc completely.

divtab.cc was used to calculate a division table for the sh5 media
processor.  GCC dropped support for that (unmanufactured) chip back
in 2016 and this file simply got missed AFAICT.

gcc/
* config/sh/divtab.cc: Remove.

diff --git a/gcc/config/sh/divtab.cc b/gcc/config/sh/divtab.cc
deleted file mode 100644
index a69dd541370..000
--- a/gcc/config/sh/divtab.cc
+++ /dev/null
@@ -1,203 +0,0 @@
-/* Copyright (C) 2003-2023 Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-.  */
-
-
-/* Calculate division table for SH5Media integer division
-   Contributed by Joern Rennecke
-   joern.renne...@superh.com  */
-
-#include 
-#include 
-
-#define BITS 5
-#define N_ENTRIES (1 << BITS)
-#define CUTOFF_BITS 20
-
-#define BIAS (-330)
-
-double max_defect = 0.;
-double max_defect_x;
-
-double min_defect = 1e9;
-double min_defect_x;
-
-double max_defect2 = 0.;
-double max_defect2_x;
-
-double min_defect2 = 0.;
-double min_defect2_x;
-
-double min_defect3 = 01e9;
-double min_defect3_x;
-int min_defect3_val;
-
-double max_defect3 = 0.;
-double max_defect3_x;
-int max_defect3_val;
-
-static double
-note_defect3 (int val, double d2, double y2d, double x)
-{
-  int cutoff_val = val >> CUTOFF_BITS;
-  double cutoff;
-  double defect;
-
-  if (val < 0)
-cutoff_val++;
-  cutoff = (cutoff_val * (1< max_defect3)
-{
-  max_defect3 = defect;
-  max_defect3_x = x;
-  max_defect3_val = val;
-}
-  if (defect < min_defect3)
-{
-  min_defect3 = defect;
-  min_defect3_x = x;
-  min_defect3_val = val;
-}
-}
-
-/* This function assumes 32-bit integers.  */
-static double
-calc_defect (double x, int constant, int factor)
-{
-  double y0 = (constant - (int) floor ((x * factor * 64.))) / 16384.;
-  double y1 = 2 * y0 -y0 * y0 * (x + BIAS / (1.*(1LL<<30)));
-  double y2d0, y2d;
-  int y2d1;
-  double d, d2;
-
-  y1 = floor (y1 * (1024 * 1024 * 1024)) / (1024 * 1024 * 1024);
-  d = y1 - 1 / x;
-  if (d > max_defect)
-{
-  max_defect = d;
-  max_defect_x = x;
-}
-  if (d < min_defect)
-{
-  min_defect = d;
-  min_defect_x = x;
-}
-  y2d0 = floor (y1 * x * (1LL << 60-16));
-  y2d1 = (int) (long long) y2d0;
-  y2d = - floor ((y1 - y0 / (1<<30-14)) * y2d1) / (1LL<<44);
-  d2 = y1 + y2d - 1/x;
-  if (d2 > max_defect2)
-{
-  max_defect2 = d2;
-  max_defect2_x = x;
-}
-  if (d2 < min_defect2)
-{
-  min_defect2 = d2;
-  min_defect2_x = x;
-}
-  /* zero times anything is trivially zero.  */
-  note_defect3 ((1 << CUTOFF_BITS) - 1, d2, y2d, x);
-  note_defect3 (1 << CUTOFF_BITS, d2, y2d, x);
-  note_defect3 ((1U << 31) - (1 << CUTOFF_BITS), d2, y2d, x);
-  note_defect3 ((1U << 31) - 1, d2, y2d, x);
-  note_defect3 (-1, d2, y2d, x);
-  note_defect3 (-(1 << CUTOFF_BITS), d2, y2d, x);
-  note_defect3 ((1U << 31) - (1 << CUTOFF_BITS) + 1, d2, y2d, x);
-  note_defect3 (-(1U << 31), d2, y2d, x);
-  return d;
-}
-
-int
-main ()
-{
-  int i;
-  unsigned char factors[N_ENTRIES];
-  short constants[N_ENTRIES];
-  int steps = N_ENTRIES / 2;
-  double step = 1. / steps;
-  double eps30 = 1. / (1024 * 1024 * 1024);
-
-  for (i = 0; i < N_ENTRIES; i++)
-{
-  double x_low = (i < steps ? 1. : -3.) + i * step;
-  double x_high = x_low + step - eps30;
-  double x_med;
-  int factor, constant;
-  double low_defect, med_defect, high_defect, max_defect;
-
-  factor = (1./x_low- 1./x_high) / step * 256. + 0.5;
-  if (factor == 256)
-   factor = 255;
-  factors[i] = factor;
-  /* 

Re: [PATCH] Add __builtin_iseqsig()

2023-06-13 Thread FX Coudert via Gcc-patches
ping


> Hi,
> 
> (It took me a while to get back to this.)
> 
> This is a new and improved version of the patch at 
> https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602932.html
> It addresses the comment from Joseph that FE_INVALID should really be tested 
> in the case of both quiet and signaling NaNs, which is now done 
> systematically.
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu
> OK to commit?
> 
> FX


0001-Add-__builtin_iseqsig.patch
Description: Binary data


[committed] i386: Fix up whitespace in assembly

2023-06-13 Thread Jakub Jelinek via Gcc-patches
Hi!

I've noticed that standard_sse_constant_opcode emits some spurious
whitespace around tab, that isn't something which is done for
any other instruction and looks wrong.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk
as obvious.

2023-06-13  Jakub Jelinek  

* config/i386/i386.cc (standard_sse_constant_opcode): Remove
superfluous spaces around \t for vpcmpeqd.

--- gcc/config/i386/i386.cc.jj  2023-06-12 15:47:21.855511203 +0200
+++ gcc/config/i386/i386.cc 2023-06-13 14:33:49.940464312 +0200
@@ -5358,19 +5358,19 @@ standard_sse_constant_opcode (rtx_insn *
   if (GET_MODE_SIZE (mode) == 64)
{
  gcc_assert (TARGET_AVX512F);
- return "vpcmpeqd \t %t0, %t0, %t0";
+ return "vpcmpeqd\t%t0, %t0, %t0";
}
   else if (GET_MODE_SIZE (mode) == 32)
{
  gcc_assert (TARGET_AVX);
- return "vpcmpeqd \t %x0, %x0, %x0";
+ return "vpcmpeqd\t%x0, %x0, %x0";
}
   gcc_unreachable ();
 }
   else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
 {
   gcc_assert (TARGET_AVX512F);
-  return "vpcmpeqd \t %x0, %x0, %x0";
+  return "vpcmpeqd\t%x0, %x0, %x0";
 }
 
   gcc_unreachable ();

Jakub



[PATCH] libcpp: Diagnose #include after failed __has_include [PR80753]

2023-06-13 Thread Jakub Jelinek via Gcc-patches
Hi!

As can be seen in the testcase, we don't diagnose #include/#include_next
of a non-existent header if __has_include/__has_include_next is done for
that header first.
The problem is that we normally error the first time some header is not
found, but in the _cpp_FFK_HAS_INCLUDE case obviously don't want to diagnose
it, just expand it to 0.  And libcpp caches both successful includes and
unsuccessful ones.

The following patch fixes that by remembering that we haven't diagnosed
error when using __has_include* on it, and diagnosing it when using the
cache entry in normal mode the first time.

I think _cpp_FFK_NORMAL is the only mode in which we normally diagnose
errors, for _cpp_FFK_PRE_INCLUDE that open_file_failed isn't reached
and for _cpp_FFK_FAKE neither.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
after a while for backports?

2023-06-13  Jakub Jelinek  

PR preprocessor/80753
libcpp/
* files.cc (struct _cpp_file): Add deferred_error bitfield.
(_cpp_find_file): When finding a file in cache with deferred_error
set in _cpp_FFK_NORMAL mode, call open_file_failed and clear the flag.
Set deferred_error in _cpp_FFK_HAS_INCLUDE mode if open_file_failed
hasn't been called.
gcc/testsuite/
* c-c++-common/missing-header-5.c: New test.

--- libcpp/files.cc.jj  2023-01-16 11:52:16.326730483 +0100
+++ libcpp/files.cc 2023-06-13 11:27:59.867465878 +0200
@@ -109,6 +109,10 @@ struct _cpp_file
   /* If this file is implicitly preincluded.  */
   bool implicit_preinclude : 1;
 
+  /* Set if a header wasn't found with __has_include or __has_include_next
+ and error should be emitted if it is included normally.  */
+  bool deferred_error : 1;
+
   /* > 0: Known C++ Module header unit, <0: known not.  ==0, unknown  */
   int header_unit : 2;
 };
@@ -523,7 +527,14 @@ _cpp_find_file (cpp_reader *pfile, const
   cpp_file_hash_entry *entry
 = search_cache ((struct cpp_file_hash_entry *) *hash_slot, start_dir);
   if (entry)
-return entry->u.file;
+{
+  if (entry->u.file->deferred_error && kind == _cpp_FFK_NORMAL)
+   {
+ open_file_failed (pfile, entry->u.file, angle_brackets, loc);
+ entry->u.file->deferred_error = false;
+   }
+  return entry->u.file;
+}
 
   _cpp_file *file = make_cpp_file (start_dir, fname);
   file->implicit_preinclude
@@ -589,6 +600,8 @@ _cpp_find_file (cpp_reader *pfile, const
 
if (kind != _cpp_FFK_HAS_INCLUDE)
  open_file_failed (pfile, file, angle_brackets, loc);
+   else
+ file->deferred_error = true;
break;
  }
 
--- gcc/testsuite/c-c++-common/missing-header-5.c.jj2023-06-13 
11:29:49.345931030 +0200
+++ gcc/testsuite/c-c++-common/missing-header-5.c   2023-06-13 
11:25:34.952497526 +0200
@@ -0,0 +1,15 @@
+/* PR preprocessor/80753 */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+#if __has_include("nonexistent.h")
+# error
+#endif
+
+#include "nonexistent.h"
+
+/* { dg-message "nonexistent.h" "nonexistent.h" { target *-*-* } 0 } */
+/* { dg-message "terminated" "terminated" { target *-*-* } 0 } */
+
+/* This declaration should not receive any diagnostic.  */
+foo bar;

Jakub



Re: [PATCH] vect: Vectorize via libfuncs

2023-06-13 Thread Jeff Law via Gcc-patches




On 6/13/23 09:55, Andrew Stubbs wrote:

Subject:
[PATCH] vect: Vectorize via libfuncs
From:
Andrew Stubbs 
Date:
6/13/23, 09:55

To:
"gcc-patches@gcc.gnu.org" 


This patch allows vectorization when operators are available as 
libfuncs, rather that only as insns.


This will be useful for amdgcn where we plan to vectorize loops that 
contain integer division or modulus, but don't want to generate inline 
instructions for the division algorithm every time.


The change should have not affect architectures that do not define 
vector-mode libfuncs.


OK for mainline?

Andrew

230613-vect-allow-libfuncs.patch

vect: vectorize via libfuncs

This patch allows vectorization when the libfuncs are defined.

gcc/ChangeLog:

* tree-vect-generic.cc: Include optabs-libfuncs.h.
(get_compute_type): Check optab_libfunc.
* tree-vect-stmts.cc: Include optabs-libfuncs.h.
(vectorizable_operation): Check optab_libfunc.

diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index b7d4a919c55..4d784a70c0d 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -44,6 +44,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "gimple-fold.h"
  #include "gimple-match.h"
  #include "recog.h"  /* FIXME: for insn_data */
+#include "optabs-libfuncs.h"
  
  
  /* Build a ternary operation and gimplify it.  Emit code before GSI.

@@ -1714,7 +1715,8 @@ get_compute_type (enum tree_code code, optab op, tree 
type)
machine_mode compute_mode = TYPE_MODE (compute_type);
if (VECTOR_MODE_P (compute_mode))
{
- if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
+ if (op && (optab_handler (op, compute_mode) != CODE_FOR_nothing
+|| optab_libfunc (op, compute_mode)))
Formatting nit.  Bring the && down and align it under OP.  That'll 
require re-indenting the optab_libfunc call as well.


OK with that nit.

jeff


Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V

2023-06-13 Thread Jeff Law via Gcc-patches




On 6/12/23 17:39, juzhe.zhong wrote:
I take this work which is very important for VLA SLP too.  I will 
support VLS after I finish VLA SLP.
OK.  I think I'll mark Kito's patch as dropped and we'll wait for your 
implementation in this space.


jeff


[x86 PATCH] Convert ptestz of pandn into ptestc.

2023-06-13 Thread Roger Sayle

This patch is the next instalment in a set of backend patches around
improvements to ptest/vptest.  A previous patch optimized the sequence
t=pand(x,y); ptestz(t,t) into the equivalent ptestz(x,y), using the
property that ZF is set to (X) == 0.  This patch performs a similar
transformation, converting t=pandn(x,y); ptestz(t,t) into the (almost)
equivalent ptestc(y,x), using the property that the CF flags is set to
(~X) == 0.  The tricky bit is that this sets the CF flag instead of
the ZF flag, so we can only perform this transformation when we can
also convert the flags' consumer, as well as the producer.

For the test case:

int foo (__m128i x, __m128i y)
{
  __m128i a = x & ~y;
  return __builtin_ia32_ptestz128 (a, a);
}

With -O2 -msse4.1 we previously generated:

foo:pandn   %xmm0, %xmm1
xorl%eax, %eax
ptest   %xmm1, %xmm1
sete%al
ret

with this patch we now generate:

foo:xorl%eax, %eax
ptest   %xmm0, %xmm1
setc%al
ret

At the same time, this patch also provides alternative fixes for
PR target/109973 and PR target/110118, by recognizing that ptestc(x,x)
always sets the carry flag (X&~X is always zero).  This is achieved
both by recognizing the special case in ix86_expand_sse_ptest and with
a splitter to convert an eligible ptest into an stc.

The next piece is, of course, STV of "if (x & ~y)..."

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?

2023-06-13  Roger Sayle  

gcc/ChangeLog
* config/i386/i386-expand.cc (ix86_expand_sse_ptest): Recognize
expansion of ptestc with equal operands as returning const1_rtx.
* config/i386/i386.cc (ix86_rtx_costs): Provide accurate cost
estimates of UNSPEC_PTEST, where the ptest performs the PAND
or PAND of its operands.
* config/i386/sse.md (define_split): Transform CCCmode UNSPEC_PTEST
of reg_equal_p operands into an x86_stc instruction.
(define_split): Split pandn/ptestz/setne into ptestc/setnc.
(define_split): Split pandn/ptestz/sete into ptestc/setc.
(define_split): Split pandn/ptestz/je into ptestc/jc.
(define_split): Split pandn/ptestz/jne into ptestc/jnc.

gcc/testsuite/ChangeLog
* gcc.target/i386/avx-vptest-4.c: New test case.
* gcc.target/i386/avx-vptest-5.c: Likewise.
* gcc.target/i386/avx-vptest-6.c: Likewise.
* gcc.target/i386/pr109973-1.c: Update test case.
* gcc.target/i386/pr109973-2.c: Likewise.
* gcc.target/i386/sse4_1-ptest-4.c: New test case.
* gcc.target/i386/sse4_1-ptest-5.c: Likewise.
* gcc.target/i386/sse4_1-ptest-6.c: Likewise.


Thanks in advance,
Roger
--

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index def060a..1d11af2 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -10222,6 +10222,13 @@ ix86_expand_sse_ptest (const struct 
builtin_description *d, tree exp,
   machine_mode mode1 = insn_data[d->icode].operand[1].mode;
   enum rtx_code comparison = d->comparison;
 
+  /* ptest reg, reg sets the carry flag.  */
+  if (comparison == LTU
+  && (d->code == IX86_BUILTIN_PTESTC
+ || d->code == IX86_BUILTIN_PTESTC256)
+  && rtx_equal_p (op0, op1))
+return const1_rtx;
+
   if (VECTOR_MODE_P (mode0))
 op0 = safe_vector_operand (op0, mode0);
   if (VECTOR_MODE_P (mode1))
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 3a1444d..3e99e23 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21423,16 +21423,23 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
   else if (XINT (x, 1) == UNSPEC_PTEST)
{
  *total = cost->sse_op;
- if (XVECLEN (x, 0) == 2
- && GET_CODE (XVECEXP (x, 0, 0)) == AND)
+ rtx test_op0 = XVECEXP (x, 0, 0);
+ if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
+   return false;
+ if (GET_CODE (test_op0) == AND)
{
- rtx andop = XVECEXP (x, 0, 0);
- *total += rtx_cost (XEXP (andop, 0), GET_MODE (andop),
- AND, opno, speed)
-   + rtx_cost (XEXP (andop, 1), GET_MODE (andop),
-   AND, opno, speed);
- return true;
+ rtx and_op0 = XEXP (test_op0, 0);
+ if (GET_CODE (and_op0) == NOT)
+   and_op0 = XEXP (and_op0, 0);
+ *total += rtx_cost (and_op0, GET_MODE (and_op0),
+ AND, 0, speed)
+   + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
+   AND, 1, speed);
}
+ else
+   *total = rtx_cost (test_op0, GET_MODE (test_op0),
+  UNSPEC, 0, speed);
+ 

[PATCH] vect: Vectorize via libfuncs

2023-06-13 Thread Andrew Stubbs
This patch allows vectorization when operators are available as 
libfuncs, rather that only as insns.


This will be useful for amdgcn where we plan to vectorize loops that 
contain integer division or modulus, but don't want to generate inline 
instructions for the division algorithm every time.


The change should have not affect architectures that do not define 
vector-mode libfuncs.


OK for mainline?

Andrewvect: vectorize via libfuncs

This patch allows vectorization when the libfuncs are defined.

gcc/ChangeLog:

* tree-vect-generic.cc: Include optabs-libfuncs.h.
(get_compute_type): Check optab_libfunc.
* tree-vect-stmts.cc: Include optabs-libfuncs.h.
(vectorizable_operation): Check optab_libfunc.

diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index b7d4a919c55..4d784a70c0d 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -44,6 +44,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-fold.h"
 #include "gimple-match.h"
 #include "recog.h" /* FIXME: for insn_data */
+#include "optabs-libfuncs.h"
 
 
 /* Build a ternary operation and gimplify it.  Emit code before GSI.
@@ -1714,7 +1715,8 @@ get_compute_type (enum tree_code code, optab op, tree 
type)
   machine_mode compute_mode = TYPE_MODE (compute_type);
   if (VECTOR_MODE_P (compute_mode))
{
- if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
+ if (op && (optab_handler (op, compute_mode) != CODE_FOR_nothing
+|| optab_libfunc (op, compute_mode)))
return compute_type;
  if (code == MULT_HIGHPART_EXPR
  && can_mult_highpart_p (compute_mode,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index a7acc032d47..71a8cf2c6d4 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -56,6 +56,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-fold.h"
 #include "regs.h"
 #include "attribs.h"
+#include "optabs-libfuncs.h"
 
 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -6528,8 +6529,8 @@ vectorizable_operation (vec_info *vinfo,
  "no optab.\n");
  return false;
}
-  target_support_p = (optab_handler (optab, vec_mode)
- != CODE_FOR_nothing);
+  target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
+ || optab_libfunc (optab, vec_mode));
 }
 
   bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);


[PATCH, OpenACC 2.7] Implement self clause for compute constructs

2023-06-13 Thread Chung-Lin Tang via Gcc-patches
Hi Thomas,
This patch implements the compiler side for the 'self' clause for compute 
constructs:
parallel, kernels, and serial.

As you know, the actual "local device" device type for libgomp is not yet 
implemented,
so the libgomp side is basically just a simple duplicate of what host-fallback 
is doing,
though everything else should be completed by this patch.

Tested on powerpc64le-linux/nvptx, x64_64-linux/amdgcn tests pending.
Is this okay for trunk?

Thanks,
Chung-Lin

2023-06-13  Chung-Lin Tang  

gcc/c/ChangeLog:

* c-parser.cc (c_parser_oacc_compute_clause_self): New function.
(c_parser_oacc_all_clauses): Add new 'bool compute_p = false'
parameter, add parsing of self clause when compute_p is true.
(OACC_KERNELS_CLAUSE_MASK): Add PRAGMA_OACC_CLAUSE_SELF.
(OACC_PARALLEL_CLAUSE_MASK): Likewise,
(OACC_SERIAL_CLAUSE_MASK): Likewise.
(c_parser_oacc_compute): Adjust call to c_parser_oacc_all_clauses to
set compute_p argument to true.
* c-typeck.cc (c_finish_omp_clauses): Add OMP_CLAUSE_SELF case.

gcc/cp/ChangeLog:

* parser.cc (cp_parser_oacc_compute_clause_self): New function.
(cp_parser_oacc_all_clauses): Add new 'bool compute_p = false'
parameter, add parsing of self clause when compute_p is true.
(OACC_KERNELS_CLAUSE_MASK): Add PRAGMA_OACC_CLAUSE_SELF.
(OACC_PARALLEL_CLAUSE_MASK): Likewise,
(OACC_SERIAL_CLAUSE_MASK): Likewise.
(cp_parser_oacc_compute): Adjust call to c_parser_oacc_all_clauses to
set compute_p argument to true.
* pt.cc (tsubst_omp_clauses): Add OMP_CLAUSE_SELF case.
* c-typeck.cc (c_finish_omp_clauses): Add OMP_CLAUSE_SELF case, merged
with OMP_CLAUSE_IF case.

gcc/fortran/ChangeLog:

* gfortran.h (typedef struct gfc_omp_clauses): Add self_expr field.
* openmp.cc (enum omp_mask2): Add OMP_CLAUSE_SELF.
(gfc_match_omp_clauses): Add handling for OMP_CLAUSE_SELF.
(OACC_PARALLEL_CLAUSES): Add OMP_CLAUSE_SELF.
(OACC_KERNELS_CLAUSES): Likewise.
(OACC_SERIAL_CLAUSES): Likewise.
(resolve_omp_clauses): Add handling for omp_clauses->self_expr.
* trans-openmp.cc (gfc_trans_omp_clauses): Add handling of
clauses->self_expr and building of OMP_CLAUSE_SELF tree clause.
(gfc_split_omp_clauses): Add handling of self_expr field copy.

gcc/ChangeLog:

* gimplify.cc (gimplify_scan_omp_clauses): Add OMP_CLAUSE_SELF case.
(gimplify_adjust_omp_clauses): Likewise.
* omp-expand.cc (expand_omp_target): Add OMP_CLAUSE_SELF expansion code,
* omp-low.cc (scan_sharing_clauses): Add OMP_CLAUSE_SELF case.
* tree-core.h (enum omp_clause_code): Add OMP_CLAUSE_SELF enum.
* tree-nested.cc (convert_nonlocal_omp_clauses): Add OMP_CLAUSE_SELF
case.
(convert_local_omp_clauses): Likewise.
* tree-pretty-print.cc (dump_omp_clause): Add OMP_CLAUSE_SELF case.
* tree.cc (omp_clause_num_ops): Add OMP_CLAUSE_SELF entry.
(omp_clause_code_name): Likewise.
* tree.h (OMP_CLAUSE_SELF_EXPR): New macro.

gcc/testsuite/ChangeLog:

* c-c++-common/goacc/self-clause-1.c: New test.
* c-c++-common/goacc/self-clause-2.c: New test.
* gfortran.dg/goacc/self.f95: New test.

include/ChangeLog:

* gomp-constants.h (GOACC_FLAG_LOCAL_DEVICE): New flag bit value.

libgomp/ChangeLog:

* oacc-parallel.c (GOACC_parallel_keyed): Add code to handle
GOACC_FLAG_LOCAL_DEVICE case.
* testsuite/libgomp.oacc-c-c++-common/self-1.c: New test.From 449883981c8e1f707b47ff8f8dd70049b9ffda82 Mon Sep 17 00:00:00 2001
From: Chung-Lin Tang 
Date: Tue, 13 Jun 2023 08:44:31 -0700
Subject: [PATCH] OpenACC 2.7: Implement self clause for compute constructs

This patch implements the 'self' clause for compute constructs: parallel,
kernels, and serial. This clause conditionally uses the local device
(the host mult-core CPU) as the executing device of the compute region.

The actual implementation of the "local device" device type inside libgomp
(presumably using pthreads) is still not yet completed, so the libgomp
side is still implemented the exact same as host-fallback mode. (so as of now,
it essentially behaves like the 'if' clause with the condition inverted)

gcc/c/ChangeLog:

* c-parser.cc (c_parser_oacc_compute_clause_self): New function.
(c_parser_oacc_all_clauses): Add new 'bool compute_p = false'
parameter, add parsing of self clause when compute_p is true.
(OACC_KERNELS_CLAUSE_MASK): Add PRAGMA_OACC_CLAUSE_SELF.
(OACC_PARALLEL_CLAUSE_MASK): Likewise,
(OACC_SERIAL_CLAUSE_MASK): Likewise.
(c_parser_oacc_compute): Adjust call to c_parser_oacc_all_clauses to
set compute_p argument to true.
* c-typeck.cc (c_finish_omp_clauses): Add OMP_CLAUSE_SELF case.

gcc/cp/ChangeLog:

* parser.cc 

Re: [RFC] Add stdckdint.h header for C23

2023-06-13 Thread Joseph Myers
On Tue, 13 Jun 2023, Jakub Jelinek via Gcc-patches wrote:

> Yeah, having say __builtin_{clz,ctz,ffs,popcount,parity} variants which would
> be typegeneric and would allow say any normal integral or _BitInt type
> (or just unsigned versions thereof?) would be useful.  Even without _BitInt
> we have the problem that we don't have builtins for __uint128_t.
> 
> One question is if we should keep them UB on zero input or hardcode some 
> particular
> behavior for clz/ctz.  The backend defaults might not be appropriate, I
> think if we'd make it non-UB, using the precision of the type would be
> reasonable, whatever it is (__builtin_clzb ((unsigned _BitInt(126)) 0)
> might be 126 etc.).

FWIW the C2x stdbit.h operations all have defined semantics on special 
cases, except for the stdc_bit_ceil operations (where there's an NB 
comment on CD2 to be considered at next week's WG14 meeting requesting 
defined semantics there as well).  They're also all for unsigned 
arguments.  (Note there are also NB comments requesting removal of some of 
the operations as duplicates or near-duplicates of others.)

The stdbit.h header does seem naturally to be something for libc, given 
that (a) it has a lot of functions, not just type-generic macros, and (b) 
the type-generic macros are generally easy to implement (at least for the 
types supported in the standard) in a way that doesn't depend on any 
compiler extensions (or even on _Generic, many of them can be implemented 
just to call the function for unsigned long long).  It makes sense in due 
course for GCC to know the names there (after any get removed) as built-in 
functions, but mapping in a header to existing __builtin_* is generally 
easy until then.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [ping] driver: Forward '-lgfortran', '-lm' to offloading compilation

2023-06-13 Thread Joseph Myers
On Tue, 13 Jun 2023, Thomas Schwinge wrote:

> Hi!
> 
> On 2023-06-05T14:25:18+0200, I wrote:
> > OK to push the attached
> > "driver: Forward '-lgfortran', '-lm' to offloading compilation"?
> > (We didn't have a PR open for that, or did we?)
> 
> Ping.

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


RE: [PATCH v2] machine descriptor: New compact syntax for insn and insn_split in Machine Descriptions.

2023-06-13 Thread Tamar Christina via Gcc-patches
Hi All,

Updated patch with feedback addressed.


Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Any feedback?

Thanks,
Tamar

gcc/ChangeLog:

* gensupport.cc (class conlist, add_constraints, add_attributes,
skip_spaces, expect_char, preprocess_compact_syntax,
parse_section_layout, parse_section, convert_syntax): New.
(process_rtx): Check for conversion.
* genoutput.cc (process_template): Check for unresolved iterators.
(class data): Add compact_syntax_p.
(gen_insn): Use it.
* gensupport.h (compact_syntax): New.
(hash-set.h): Include.

Co-Authored-By: Omar Tahir 

--- inline copy of patch ---

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 
6a435eb44610960513e9739ac9ac1e8a27182c10..3bd1bcbc8beda9bbaea71c65118ecfa2cdace335
 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -27,6 +27,7 @@ See the next chapter for information on the C header file.
 from such an insn.
 * Output Statement::For more generality, write C code to output
 the assembler code.
+* Compact Syntax::  Compact syntax for writing machine descriptors.
 * Predicates::  Controlling what kinds of operands can be used
 for an insn.
 * Constraints:: Fine-tuning operand selection.
@@ -713,6 +714,167 @@ you can use @samp{*} inside of a @samp{@@} 
multi-alternative template:
 @end group
 @end smallexample
 
+@node Compact Syntax
+@section Compact Syntax
+@cindex compact syntax
+
+When a @code{define_insn} or @code{define_insn_and_split} has multiple
+alternatives it may be beneficial to use the compact syntax when specifying
+alternatives.
+
+This syntax puts the constraints and attributes on the same horizontal line as
+the instruction assembly template.
+
+As an example
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r")
+   (match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv"))]
+  ""
+  "@@
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %1
+   #
+   * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);"
+  "&& true"
+   [(const_int 0)]
+  @{
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
+ DONE;
+  @}
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm")
+   (set_attr "arch"   "*,*,*,*,*,sve")
+   (set_attr "length" "4,4,4,4,*,  4")
+]
+)
+@end group
+@end smallexample
+
+can be better expressed as:
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+   (match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  @{@@ [cons: =0, 1; attrs: type, arch, length]
+ [r , r  ; mov_reg  , *   , 4] mov\t%w0, %w1
+ [k , r  ; mov_reg  , *   , 4] ^
+ [r , k  ; mov_reg  , *   , 4] ^
+ [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
+ [r , n  ; mov_imm  , *   , *] #
+ [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", 
"%x0", operands[1]);
+  @}
+  "&& true"
+  [(const_int 0)]
+  @{
+aarch64_expand_mov_immediate (operands[0], operands[1]);
+DONE;
+  @}
+)
+@end group
+@end smallexample
+
+The syntax rules are as follows:
+@itemize @bullet
+@item
+Templates must start with @samp{@{@@} to use the new syntax.
+
+@item
+@samp{@{@@} is followed by a layout in parentheses which is @samp{cons:}
+followed by a comma-separated list of @code{match_operand}/@code{match_scratch}
+operand numbers, then a semicolon, followed by the same for attributes
+(@samp{attrs:}).  Operand modifiers can be placed in this section group as 
well.
+Both sections are optional (so you can use only @samp{cons}, or only
+@samp{attrs}, or both), and @samp{cons} must come before @samp{attrs} if
+present.
+
+@item
+Each alternative begins with any amount of whitespace.
+
+@item
+Following the whitespace is a comma-separated list of "constraints" and/or
+"attributes" within brackets @code{[]}, with sections separated by a semicolon.
+
+@item
+Should you want to copy the previous asm line, the symbol @code{^} can be used.
+This allows less copy pasting between alternative and reduces the number of
+lines to update on changes.
+
+@item
+When using C functions for output, the idiom @samp{* return @var{function};}
+can be replaced with the shorthand @samp{<< @var{function};}.
+
+@item
+Following the closing @samp{]} is any amount of whitespace, and then the actual
+asm output.
+
+@item
+Spaces are allowed in the list (they will simply be removed).
+
+@item
+All constraint alternatives should be specified.  For example, a list of
+of three blank alternatives should be written @samp{[,,]} rather than
+@samp{[]}.
+
+@item
+All attribute alternatives should be non-empty, with @samp{*}
+representing the default attribute value.  For example, a list of three
+default attribute values should be written @samp{[*,*,*]} rather than
+@samp{[]}.
+
+
+@item

Re: [RFC] Add stdckdint.h header for C23

2023-06-13 Thread Jakub Jelinek via Gcc-patches
On Tue, Jun 13, 2023 at 03:10:48PM +, Joseph Myers wrote:
> > So why does C2X say
> > Recommended practice
> > It is recommended to produce a diagnostic message if type2 or type3 are
> > not suitable integer types, or if *result is not a modifiable lvalue of
> > a suitable integer type.
> > ?
> > Or is it meant that a suitable integer type doesn't need to be necessarily
> > one that is listed in the previous paragraph?
> > Perhaps the checking could be guarded on #ifdef __STRICT_ANSI__, sure...
> 
> Diagnostics are better than doing something completely random - but making 
> it conditional when there are sensible semantics also makes sense.
> 
> It seems likely a future standard version will support these operations 
> for bit-precise types, at least.  (Bit-precise types are generally tricky 
> for type-generic operations; there's no standard way to select on them 
> with _Generic beyond listing individual types with specific widths, and no 
> standard way to determine the width of the bit-precise type of an 
> argument.  So implementing some type-generic operations for such types may 
> need new language extensions, prompting WG14 caution about requiring such 
> support - but this also makes support for such types in standard 
> type-generic macros in headers particularly valuable, precisely because 
> they can't be implemented purely in user code using standard language 
> features.)

Yeah, having say __builtin_{clz,ctz,ffs,popcount,parity} variants which would
be typegeneric and would allow say any normal integral or _BitInt type
(or just unsigned versions thereof?) would be useful.  Even without _BitInt
we have the problem that we don't have builtins for __uint128_t.

One question is if we should keep them UB on zero input or hardcode some 
particular
behavior for clz/ctz.  The backend defaults might not be appropriate, I
think if we'd make it non-UB, using the precision of the type would be
reasonable, whatever it is (__builtin_clzb ((unsigned _BitInt(126)) 0)
might be 126 etc.).

Jakub



Re: [PATCH] ipa: Self-DCE of uses of removed call LHSs (PR 108007)

2023-06-13 Thread Martin Jambor
Ping.

Thanks,

Martin

On Fri, May 12 2023, Martin Jambor wrote:
> Hi,
>
> PR 108007 is another manifestation where we rely on DCE to clean-up
> after IPA-SRA and if the user explicitely switches DCE off, IPA-SRA
> can leave behind statements which are fed uninitialized values and
> trap, even though their results are themselves never used.
>
> I have already fixed this for unused parameters in callees, this bug
> shows that almost the same thing can happen for removed returns, on
> the side of callers.  This means that the issue has to be fixed
> elsewhere, in call redirection.  This patch adds a function which
> recursivewly looks for uses of operations fed specific SSA names and
> removes them all.
>
> That would have been easy if it wasn't for debug statements during
> tree-inline (from which call redirection is also invoked).  Debug
> statements are decoupled from the rest at this point and iterating
> over uses of SSAs does not bring them up.  During tree-inline they are
> handled especially at the end, I assume in order to make sure that
> relative ordering of UIDs are the same with and without debug info.
>
> This means that during tree-inline we need to make a hash of killed
> SSAs, that we already have in copy_body_data, available to the
> function making the purging.  So the patch duly does also that, making
> the interface slightly ugly.
>
> Bootstrapped and tested on x86_64-linux.  OK for master?  (I am not sure
> the problem is grave enough to warrant backporting to release branches
> but can do that as well if people think I should.)
>
> Thanks,
>
> Martin
>
>
> gcc/ChangeLog:
>
> 2023-05-11  Martin Jambor  
>
>   PR ipa/108007
>   * cgraph.h (cgraph_edge): Add a parameter to
>   redirect_call_stmt_to_callee.
>   * ipa-param-manipulation.h (ipa_param_adjustments): Added a
>   parameter to modify_call.
>   * cgraph.cc (cgraph_edge::redirect_call_stmt_to_callee): New
>   parameter killed_ssas, pass it to padjs->modify_call.
>   * ipa-param-manipulation.cc (purge_transitive_uses): New function.
>   (ipa_param_adjustments::modify_call): New parameter killed_ssas.
>   Instead of substitutin uses, invoke purge_transitive_uses.  If
>   hash of killed SSAs has not been provided, create a temporary one
>   and release SSAs that have been added to it.
>   * tree-inline.cc (redirect_all_calls): Create
>   id->killed_new_ssa_names earlier, pass it to edge redirection,
>   adjust a comment.
>   (copy_body): Release SSAs in id->killed_new_ssa_names.
>
> gcc/testsuite/ChangeLog:
>
> 2023-05-11  Martin Jambor  
>
>   PR ipa/108007
>   * gcc.dg/ipa/pr108007.c: New test.
> ---
>  gcc/cgraph.cc   | 10 +++-
>  gcc/cgraph.h|  9 ++-
>  gcc/ipa-param-manipulation.cc   | 85 +
>  gcc/ipa-param-manipulation.h|  3 +-
>  gcc/testsuite/gcc.dg/ipa/pr108007.c | 32 +++
>  gcc/tree-inline.cc  | 28 ++
>  6 files changed, 129 insertions(+), 38 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/ipa/pr108007.c
>
> diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
> index e8f9bec8227..5e923bf0557 100644
> --- a/gcc/cgraph.cc
> +++ b/gcc/cgraph.cc
> @@ -1403,11 +1403,17 @@ cgraph_edge::redirect_callee (cgraph_node *n)
> speculative indirect call, remove "speculative" of the indirect call and
> also redirect stmt to it's final direct target.
>  
> +   When called from within tree-inline, KILLED_SSAs has to contain the 
> pointer
> +   to killed_new_ssa_names within the copy_body_data structure and SSAs
> +   discovered to be useless (if LHS is removed) will be added to it, 
> otherwise
> +   it needs to be NULL.
> +
> It is up to caller to iteratively transform each "speculative"
> direct call as appropriate.  */
>  
>  gimple *
> -cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge *e)
> +cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge *e,
> +hash_set  *killed_ssas)
>  {
>tree decl = gimple_call_fndecl (e->call_stmt);
>gcall *new_stmt;
> @@ -1527,7 +1533,7 @@ cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge 
> *e)
>   remove_stmt_from_eh_lp (e->call_stmt);
>  
>tree old_fntype = gimple_call_fntype (e->call_stmt);
> -  new_stmt = padjs->modify_call (e, false);
> +  new_stmt = padjs->modify_call (e, false, killed_ssas);
>cgraph_node *origin = e->callee;
>while (origin->clone_of)
>   origin = origin->clone_of;
> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> index f5f54769eda..c1a3691b6f5 100644
> --- a/gcc/cgraph.h
> +++ b/gcc/cgraph.h
> @@ -1833,9 +1833,16 @@ public:
>   speculative indirect call, remove "speculative" of the indirect call and
>   also redirect stmt to it's final direct target.
>  
> + When called from within tree-inline, KILLED_SSAs has to contain the
> + pointer to killed_new_ssa_names within the 

Re: [RFC] Add stdckdint.h header for C23

2023-06-13 Thread Joseph Myers
On Tue, 13 Jun 2023, Jakub Jelinek via Gcc-patches wrote:

> There is always the possibility to have the header co-owned by both
> the compiler and C library, limits.h style.
> Just 
> #if __has_include_next()
> # include_next 
> #endif
> perhaps guarded with some macro at the end of the GCC version and
> do the same at the start of the glibc version again perhaps with some macro.
> And leave the compiler specific part to the compiler (perhaps with some
> fallback in the libc version if the compiler specific part is missing) and
> have the library related part be provided by the C library?

This seems a reasonable approach.  If the structure types do make it into 
future standard versions, we'd need to work out exactly where the division 
is between compiler and library header responsibilities (where those 
pieces involving structure types but not library functions go, for 
example).  For operations using structure types with overflow flag we'd 
need also to work out to what extent it's appropriate to implement those 
purely in the header with _Generic versus adding new built-in functions or 
extending what the existing ones can do.

> > > What I'm struggling with is enforcing the weird restrictions
> > > C23 imposes on these.
> > 
> > It's not clear all those restrictions need to be enforced - this 
> > definitely seems like a case of undefined behavior to provide useful 
> > extension space, where for various of those restrictions there are unique 
> > sensible semantics to provide if the types in question are supported.
> 
> So why does C2X say
> Recommended practice
> It is recommended to produce a diagnostic message if type2 or type3 are
> not suitable integer types, or if *result is not a modifiable lvalue of
> a suitable integer type.
> ?
> Or is it meant that a suitable integer type doesn't need to be necessarily
> one that is listed in the previous paragraph?
> Perhaps the checking could be guarded on #ifdef __STRICT_ANSI__, sure...

Diagnostics are better than doing something completely random - but making 
it conditional when there are sensible semantics also makes sense.

It seems likely a future standard version will support these operations 
for bit-precise types, at least.  (Bit-precise types are generally tricky 
for type-generic operations; there's no standard way to select on them 
with _Generic beyond listing individual types with specific widths, and no 
standard way to determine the width of the bit-precise type of an 
argument.  So implementing some type-generic operations for such types may 
need new language extensions, prompting WG14 caution about requiring such 
support - but this also makes support for such types in standard 
type-generic macros in headers particularly valuable, precisely because 
they can't be implemented purely in user code using standard language 
features.)

-- 
Joseph S. Myers
jos...@codesourcery.com


RE: [PATCH] RISC-V: Remove duplicate `#include "riscv-vector-switch.def"`

2023-06-13 Thread Li, Pan2 via Gcc-patches
Committed, thanks Juzhe.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of juzhe.zh...@rivai.ai
Sent: Tuesday, June 13, 2023 6:56 PM
To: 丁乐华 ; gcc-patches 
Subject: Re: [PATCH] RISC-V: Remove duplicate `#include 
"riscv-vector-switch.def"`

Send V2 patch with changelog.

Thanks.


juzhe.zh...@rivai.ai
 
From: Lehua Ding
Date: 2023-06-13 18:53
To: gcc-patches; juzhe.zhong
Subject: [PATCH] RISC-V: Remove duplicate `#include "riscv-vector-switch.def"` 
Hi,
 
This patch remove the duplicate `#include "riscv-vector-switch.def"` statement 
and add #undef for ENTRY and TUPLE_ENTRY macros later.
 
Best,
Lehua
 
---
gcc/config/riscv/riscv-v.cc | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 
e1b85a5af91f..09c2abcbc623 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1210,7 +1210,6 @@ struct mode_vtype_group
   ratio_for_min_vlen64[MODE##mode] = RATIO_FOR_MIN_VLEN64; 
\
   vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128;   
\
   ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128; -#include 
"riscv-vector-switch.def"
#define TUPLE_ENTRY(MODE, REQUIREMENT, SUBPART_MODE, NF, VLMUL_FOR_MIN_VLEN32, \
RATIO_FOR_MIN_VLEN32, VLMUL_FOR_MIN_VLEN64,\
RATIO_FOR_MIN_VLEN64, VLMUL_FOR_MIN_VLEN128,   \
@@ -1224,6 +1223,8 @@ struct mode_vtype_group
   vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128;   
\
   ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128; #include 
"riscv-vector-switch.def"
+#undef ENTRY
+#undef TUPLE_ENTRY
   }
};
--
2.36.3
 


Re: [PATCH] modula2: Fix bootstrap

2023-06-13 Thread Jakub Jelinek via Gcc-patches
On Wed, Jun 07, 2023 at 09:42:22AM +0100, Andre Vieira (lists) wrote:
> I do need those includes and sorry I broke your bootstrap it didn't show up
> on my aarch64-unknown-linux-gnu bootstrap, I'm guessing the rules there were
> just run in a different order. Glad you were able to fix it :)

Unfortunately, it doesn't really work.
My x86_64-linux bootstrap today died again with:
In file included from ../../gcc/m2/gm2-gcc/gcc-consolidation.h:74,
 from ../../gcc/m2/gm2-lang.cc:24:
../../gcc/internal-fn.h:24:10: fatal error: insn-opinit.h: No such file or 
directory
   24 | #include "insn-opinit.h"
  |  ^~~
compilation terminated.
/home/jakub/src/gcc/obj36/./prev-gcc/xg++ 
-B/home/jakub/src/gcc/obj36/./prev-gcc/ -B/usr/local/x86_64-pc-linux-gnu/bin/ 
-nostdinc++ 
-B/home/jakub/src/gcc/obj36/prev-x86_64-pc-linux-gnu/libstdc++-v3/src/.libs 
-B/home/jakub/src/gcc/obj36/prev-x86_64-pc-linux-gnu/libstdc++-v3/libsupc++/.libs
  
-I/home/jakub/src/gcc/obj36/prev-x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu
  -I/home/jakub/src/gcc/obj36/prev-x86_64-pc-linux-gnu/libstdc++-v3/include  
-I/home/jakub/src/gcc/libstdc++-v3/libsupc++ 
-L/home/jakub/src/gcc/obj36/prev-x86_64-pc-linux-gnu/libstdc++-v3/src/.libs 
-L/home/jakub/src/gcc/obj36/prev-x86_64-pc-linux-gnu/libstdc++-v3/libsupc++/.libs
  -fno-PIE -c -g   -g -O2 -fchecking=1 -DIN_GCC -fno-exceptions -fno-rtti 
-fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
-Wcast-qual  -fno-common  -DHAVE_CONFIG_H \
 -I. -Im2/gm2-gcc -I../../gcc -I../../gcc/m2/gm2-gcc 
-I../../gcc/../include  -I../../gcc/../libcpp/include -I../../gcc/../libcody  
-I../../gcc/../libdecnumber -I../../gcc/../libdecnumber/bid -I../libdecnumber 
-I../../gcc/../libbacktrace   -I. -Im2/gm2-gcc -I../../gcc 
-I../../gcc/m2/gm2-gcc -I../../gcc/../include  -I../../gcc/../libcpp/include 
-I../../gcc/../libcody  -I../../gcc/../libdecnumber 
-I../../gcc/../libdecnumber/bid -I../libdecnumber -I../../gcc/../libbacktrace  
../../gcc/m2/gm2-gcc/m2type.cc -o m2/gm2-gcc/m2type.o
make[3]: *** [../../gcc/m2/Make-lang.in:570: m2/gm2-lang.o] Error 1
make[3]: *** Waiting for unfinished jobs
errors.  Dunno what is going on.
I've tried
--- gcc/m2/Make-lang.in.jj  2023-06-07 15:56:07.112684198 +0200
+++ gcc/m2/Make-lang.in 2023-06-13 16:08:55.409364765 +0200
@@ -511,7 +511,7 @@ GM2_LIBS_BOOT = m2/gm2-compiler-boot
 m2/gm2-libs-boot/libgm2.a \
 $(GM2-BOOT-O)
 
-$(GM2_C_OBJS) : | $(generated_files)
+m2_OBJS = $(GM2_C_OBJS)
 
 cc1gm2$(exeext): m2/stage1/cc1gm2$(exeext) $(m2.prev)
cp -p $< $@

but that doesn't really work either, this time not just random bootstrap
breakages from time to time, but all the time.
Including GM2_C_OBJS in m2_OBJS is I think the right thing, but that
results in predefining IN_GCC_FRONTEND macro and we have e.g.

/* Front ends should never have to include middle-end headers.  Enforce
   this by poisoning the header double-include protection defines.  */
#ifdef IN_GCC_FRONTEND
#pragma GCC poison GCC_RTL_H GCC_EXCEPT_H GCC_EXPR_H
#endif

in system.h to make sure that FE sources don't include rtl.h, except.h,
expr.h.  But m2/gm2-gcc/gcc-consolidation.h includes tons of the RTL
headers, rtl.h, df.h (twice), except.h; why?
Also, seems one of GM2_C_OBJS is some special copy of stor-layout.cc
which really isn't a FE file and so needs the RTL-ish headers.

Jakub



RE: [PATCH] RISC-V: Add comments of some functions

2023-06-13 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Tuesday, June 13, 2023 10:06 PM
To: juzhe.zh...@rivai.ai; gcc-patches@gcc.gnu.org
Cc: kito.ch...@gmail.com; kito.ch...@sifive.com; pal...@dabbelt.com; 
pal...@rivosinc.com; rdapp@gmail.com
Subject: Re: [PATCH] RISC-V: Add comments of some functions



On 6/12/23 20:26, juzhe.zh...@rivai.ai wrote:
> From: Juzhe-Zhong 
> 
> gcc/ChangeLog:
> 
>  * config/riscv/riscv-v.cc (rvv_builder::single_step_npatterns_p): 
> Add comment.
>  (shuffle_generic_patterns): Ditto.
>  (expand_vec_perm_const_1): Ditto.
OK.  Thanks!
jeff


Re: [PATCH] RISC-V: Implement vec_set and vec_extract.

2023-06-13 Thread Jeff Law via Gcc-patches




On 6/13/23 00:50, Robin Dapp wrote:

I suggest we implement vector calling convention even though it is not ratified 
yet.
We can allow calling convention to be enabled only when 
--param=riscv-autovec-preference=fixed-vlmax.
We have such issue:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110119 

if we don't have calling convention for fixed-vlmax.


Let's discuss this in the patchwork sync meeting later.
In fact I'd ask y'all start with this since my contribution would be 
minimal and I'll be in the car for the first ~30 minutes.


jeff


Re: [PATCH V2] RISC-V: Support RVV VLA SLP auto-vectorization

2023-06-13 Thread Jeff Law via Gcc-patches




On 6/12/23 20:27, juzhe.zh...@rivai.ai wrote:

Ok.
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/thread.html 


I have add comments as you suggested.
Thanks.  With those changes I think the main patch here (RVV VLA SLP 
auto-vectorization) is good to go.


jeff


Re: [PATCH] RISC-V: Add comments of some functions

2023-06-13 Thread Jeff Law via Gcc-patches




On 6/12/23 20:26, juzhe.zh...@rivai.ai wrote:

From: Juzhe-Zhong 

gcc/ChangeLog:

 * config/riscv/riscv-v.cc (rvv_builder::single_step_npatterns_p): Add 
comment.
 (shuffle_generic_patterns): Ditto.
 (expand_vec_perm_const_1): Ditto.

OK.  Thanks!
jeff


RE: [PATCH V3] RISC-V: Add more SLP tests

2023-06-13 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Tuesday, June 13, 2023 9:26 PM
To: juzhe.zh...@rivai.ai; gcc-patches@gcc.gnu.org
Cc: kito.ch...@gmail.com; kito.ch...@sifive.com; pal...@dabbelt.com; 
pal...@rivosinc.com; rdapp@gmail.com
Subject: Re: [PATCH V3] RISC-V: Add more SLP tests



On 6/13/23 05:38, juzhe.zh...@rivai.ai wrote:
> From: Juzhe-Zhong 
> 
> gcc/testsuite/ChangeLog:
> 
>  * gcc.target/riscv/rvv/autovec/partial/slp-10.c: New test.
>  * gcc.target/riscv/rvv/autovec/partial/slp-11.c: New test.
>  * gcc.target/riscv/rvv/autovec/partial/slp-13.c: New test.
>  * gcc.target/riscv/rvv/autovec/partial/slp-14.c: New test.
>  * gcc.target/riscv/rvv/autovec/partial/slp-15.c: New test.
>  * gcc.target/riscv/rvv/autovec/partial/slp_run-10.c: New test.
>  * gcc.target/riscv/rvv/autovec/partial/slp_run-11.c: New test.
>  * gcc.target/riscv/rvv/autovec/partial/slp_run-13.c: New test.
>  * gcc.target/riscv/rvv/autovec/partial/slp_run-14.c: New test.
>  * gcc.target/riscv/rvv/autovec/partial/slp_run-15.c: New test.
> 
> ---
OK
jeff



RE: [PATCH] RISC-V: Fix bug of VLA SLP auto-vectorization

2023-06-13 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Tuesday, June 13, 2023 9:30 PM
To: Robin Dapp ; juzhe.zh...@rivai.ai; 
gcc-patches@gcc.gnu.org
Cc: kito.ch...@gmail.com; kito.ch...@sifive.com; pal...@dabbelt.com; 
pal...@rivosinc.com
Subject: Re: [PATCH] RISC-V: Fix bug of VLA SLP auto-vectorization



On 6/13/23 06:16, Robin Dapp wrote:
> Hi Juzhe,
> 
> LGTM.  You could also add the aarch64 test disclaimer here again, but 
> no need for a V2.
Agreed.

jeff


Ping * 2 : Fwd: [V9][PATCH 1/2] Handle component_ref to a structre/union field including flexible array member [PR101832]

2023-06-13 Thread Qing Zhao via Gcc-patches
Hi,

I’d like to ping this patch again for the Middle-end approval (on 
gcc/tree-object-size.cc change).
This is an important patch to Linux Kernel security. 

The patch has addressed all the comments and suggestions raised during the 
review process. 
The C FE, Doc changes has been approved.
Most of the Middle-end changes been reviewed by Richard Biener and have been 
updated based on his suggestions.

The only change that has not been reviewed is the simple change in 
gcc/tree-object-size.cc.  which is simple and straightforward.
Please review this change and let me know whether it’s okay for commit to trunk?

Thanks a lot!

Qing

> On Jun 5, 2023, at 11:12 AM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> Ping on this patch.
> 
> The C FE and Doc changes has been approved.
> Please help to review and approve the Middle-end change.
> 
> Or provide guide on how to move this patch forward.
> 
> Thanks a lot for the help.
> 
> Qing
> 
> Begin forwarded message:
> 
> From: Qing Zhao mailto:qing.z...@oracle.com>>
> Subject: [V9][PATCH 1/2] Handle component_ref to a structre/union field 
> including flexible array member [PR101832]
> Date: May 30, 2023 at 2:30:28 PM EDT
> To: jos...@codesourcery.com, 
> richard.guent...@gmail.com, 
> ja...@redhat.com, 
> gcc-patches@gcc.gnu.org
> Cc: keesc...@chromium.org, 
> siddh...@gotplt.org, 
> uec...@tugraz.at, Qing Zhao 
> mailto:qing.z...@oracle.com>>
> 
> Richard or Jakub,
> 
> could you please review this patch and see whether it's Okay to commit?
> 
> thanks a lot.
> 
> Qing
> 
> ===
> 
> GCC extension accepts the case when a struct with a C99 flexible array member
> is embedded into another struct or union (possibly recursively) as the last
> field.
> __builtin_object_size should treat such struct as flexible size.
> 
> gcc/c/ChangeLog:
> 
> PR tree-optimization/101832
> * c-decl.cc (finish_struct): Set TYPE_INCLUDES_FLEXARRAY for
> struct/union type.
> 
> gcc/lto/ChangeLog:
> 
> PR tree-optimization/101832
> * lto-common.cc (compare_tree_sccs_1): Compare bit
> TYPE_NO_NAMED_ARGS_STDARG_P or TYPE_INCLUDES_FLEXARRAY properly
> for its corresponding type.
> 
> gcc/ChangeLog:
> 
> PR tree-optimization/101832
> * print-tree.cc (print_node): Print new bit 
> type_includes_flexarray.
> * tree-core.h (struct tree_type_common): Use bit no_named_args_stdarg_p
> as type_includes_flexarray for RECORD_TYPE or UNION_TYPE.
> * tree-object-size.cc (addr_object_size): Handle 
> structure/union type
> when it has flexible size.
> * tree-streamer-in.cc 
> (unpack_ts_type_common_value_fields): Stream
> in bit no_named_args_stdarg_p properly for its corresponding type.
> * tree-streamer-out.cc 
> (pack_ts_type_common_value_fields): Stream
> out bit no_named_args_stdarg_p properly for its corresponding type.
> * tree.h (TYPE_INCLUDES_FLEXARRAY): New macro TYPE_INCLUDES_FLEXARRAY.
> 
> gcc/testsuite/ChangeLog:
> 
> PR tree-optimization/101832
> * gcc.dg/builtin-object-size-pr101832.c: New test.
> 
> change TYPE_INCLUDES_FLEXARRAY to TYPE_INCLUDES_FLEXARRAY
> ---
> gcc/c/c-decl.cc   |  11 ++
> gcc/lto/lto-common.cc |   5 +-
> gcc/print-tree.cc |   5 +
> .../gcc.dg/builtin-object-size-pr101832.c | 134 ++
> gcc/tree-core.h   |   2 +
> gcc/tree-object-size.cc   |  
> 23 ++-
> gcc/tree-streamer-in.cc   |   
> 5 +-
> gcc/tree-streamer-out.cc  |  
>  5 +-
> gcc/tree.h|   7 +-
> 9 files changed, 192 insertions(+), 5 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
> 
> diff --git a/gcc/c/c-decl.cc 
> b/gcc/c/c-decl.cc
> index b5b491cf2da..0c718151f6d 100644
> --- a/gcc/c/c-decl.cc
> +++ b/gcc/c/c-decl.cc
> @@ -9282,6 +9282,17 @@ finish_struct (location_t loc, tree t, tree fieldlist, 
> tree attributes,
>  /* Set DECL_NOT_FLEXARRAY flag for FIELD_DECL x.  */
>  DECL_NOT_FLEXARRAY (x) = !is_flexible_array_member_p (is_last_field, x);
> 
> +  /* Set TYPE_INCLUDES_FLEXARRAY for the context of x, t.
> + when x is an array and is the last field.  */
> +  if (TREE_CODE (TREE_TYPE (x)) == ARRAY_TYPE)
> + TYPE_INCLUDES_FLEXARRAY (t)
> +  = is_last_field && flexible_array_member_type_p (TREE_TYPE (x));
> +  /* Recursively set 

Re: [PATCH] Fix note_defect3 function

2023-06-13 Thread Jeff Law via Gcc-patches




On 6/12/23 21:18, Akari Takahashi via Gcc-patches wrote:

Hello,

I've noticed an issue with the note_defect3 function and have prepared a
patch to fix it. The function is missing a return statement, which is
causing undefined behavior. This patch adds the missing return statement,
ensuring that the function returns the correct value.

Please consider applying this patch to correct the issue. Thank you.

Best regards,

Takahashi Akari
Thanks.  Reviewing that file it looks like it was just supposed to be 
used for the sh5/sh5media processor.  My recollection is those were 
never actually produced and support for them was removed from GCC some 
time ago.  This file was missed during that removal.


Rather than apply the patch, I think the better choice is to just remove 
the divtab.cc file unless someone has a real use for it.


Is there a particular reason you were investigating the divtab.cc file?

jeff


Re: [PATCH 1/4] rs6000: build constant via li;rotldi

2023-06-13 Thread David Edelsohn via Gcc-patches
On Mon, Jun 12, 2023 at 11:30 PM Jiufu Guo  wrote:
>
>
> Hi David,
>
> David Edelsohn  writes:
> > On Wed, Jun 7, 2023 at 9:55 PM Jiufu Guo  wrote:
> >
> >  Hi,
> >
> >  This patch checks if a constant is possible to be rotated to/from a 
> > positive
> >  or negative value from "li". If so, we could use "li;rotldi" to build it.
> >
> >  Bootstrap and regtest pass on ppc64{,le}.
> >  Is this ok for trunk?
> >
> >  BR,
> >  Jeff (Jiufu)
> >
> >  gcc/ChangeLog:
> >
> >  * config/rs6000/rs6000.cc (can_be_rotated_to_positive_li): New 
> > function.
> >  (can_be_rotated_to_negative_li): New function.
> >  (can_be_built_by_li_and_rotldi): New function.
> >  (rs6000_emit_set_long_const): Call can_be_built_by_li_and_rotldi.
> >
> >  gcc/testsuite/ChangeLog:
> >
> >  * gcc.target/powerpc/const-build.c: New test.
> >  ---
> >   gcc/config/rs6000/rs6000.cc   | 64 +--
> >   .../gcc.target/powerpc/const-build.c  | 54 
> >   2 files changed, 112 insertions(+), 6 deletions(-)
> >   create mode 100644 gcc/testsuite/gcc.target/powerpc/const-build.c
> >
> >  diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> >  index 42f49e4a56b..1dd0072350a 100644
> >  --- a/gcc/config/rs6000/rs6000.cc
> >  +++ b/gcc/config/rs6000/rs6000.cc
> >  @@ -10258,6 +10258,48 @@ rs6000_emit_set_const (rtx dest, rtx source)
> > return true;
> >   }
> >
> >  +/* Check if C can be rotated to a positive value which 'li' instruction
> >  +   is able to load.  If so, set *ROT to the number by which C is rotated,
> >  +   and return true.  Return false otherwise.  */
> >  +
> >  +static bool
> >  +can_be_rotated_to_positive_li (HOST_WIDE_INT c, int *rot)
> >  +{
> >  +  /* 49 leading zeros and 15 low bits on the positive value
> >  + generated by 'li' instruction.  */
> >  +  return can_be_rotated_to_lowbits (c, 15, rot);
> >  +}
> >  +
> >  +/* Like can_be_rotated_to_positive_li, but check the negative value of 
> > 'li'.  */
> >  +
> >  +static bool
> >  +can_be_rotated_to_negative_li (HOST_WIDE_INT c, int *rot)
> >  +{
> >  +  return can_be_rotated_to_lowbits (~c, 15, rot);
> >  +}
> >  +
> >  +/* Check if value C can be built by 2 instructions: one is 'li', another 
> > is
> >  +   rotldi.
> >  +
> >  +   If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
> >  +   is set to -1, and return true.  Return false otherwise.  */
> >  +
> >
> > I look at this feature and it's good, but I don't fully understand the 
> > benefit of this level of abstraction.  Ideally all of the above functions 
> > would
> > be inlined.  They aren't reused.
> >
> >  +static bool
> >  +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift,
> >  +  HOST_WIDE_INT *mask)
> >  +{
> >  +  int n;
> >  +  if (can_be_rotated_to_positive_li (c, )
> >  +  || can_be_rotated_to_negative_li (c, ))
> >
> > Why not
> >
> > /* Check if C or ~C can be rotated to a positive or negative value
> > which 'li' instruction is able to load.  */
> > if (can_be_rotated_to_lowbits (c, 15, )
> > || can_be_rotated_to_lowbits (~c, 15, ))
>
>
> Thanks a lot for your review!!
>
> Your suggestions could also achieve my goal of using a new function:
> Using "can_be_rotated_to_positive_li" is just trying to get a
> straightforward name.  Like yours, the code's comments would also
> make it easy to understand.

I recognize that you are trying to be consistent with the other
functions that you add in later patches, but it feels like overkill in
abstraction to me.  Or maybe combine postive_li and negative_li into a
single function so that the abstraction serves a purpose other than a
tail call and creating an alias for a specific invocation of
can_be_rotated_to_lowbits.

Thanks, David

>
> BR,
> Jeff (Jiufu Guo)
> >
> > ...
> >
> > This is a style of software engineering, but it seems overkill to me when 
> > the function is a single line that tail calls another function.  Am I 
> > missing
> > something?
> >
> > The rest of this patch looks good.
> >
> > Thanks, David
> >
> >  +{
> >  +  *mask = HOST_WIDE_INT_M1;
> >  +  *shift = HOST_BITS_PER_WIDE_INT - n;
> >  +  return true;
> >  +}
> >  +
> >  +  return false;
> >  +}
> >  +
> >   /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
> >  Output insns to set DEST equal to the constant C as a series of
> >  lis, ori and shl instructions.  */
> >  @@ -10266,15 +10308,14 @@ static void
> >   rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
> >   {
> > rtx temp;
> >  +  int shift;
> >  +  HOST_WIDE_INT mask;
> > HOST_WIDE_INT ud1, ud2, ud3, ud4;
> >
> > ud1 = c & 0x;
> >  -  c = c >> 16;
> >  -  ud2 = c & 0x;
> >  -  c = c >> 16;
> >  -  ud3 = c & 0x;
> >  -  c = c >> 16;
> >  -  ud4 = c & 0x;
> >  +  ud2 = (c >> 16) & 0x;
> >  +  ud3 = (c >> 32) & 0x;
> >  +  ud4 = (c >> 48) & 0x;
> >

Re: [PATCH] RISC-V: Fix bug of VLA SLP auto-vectorization

2023-06-13 Thread Jeff Law via Gcc-patches




On 6/13/23 06:16, Robin Dapp wrote:

Hi Juzhe,

LGTM.  You could also add the aarch64 test disclaimer here again,
but no need for a V2.

Agreed.

jeff


Re: [PATCH V3] RISC-V: Add more SLP tests

2023-06-13 Thread Jeff Law via Gcc-patches




On 6/13/23 05:38, juzhe.zh...@rivai.ai wrote:

From: Juzhe-Zhong 

gcc/testsuite/ChangeLog:

 * gcc.target/riscv/rvv/autovec/partial/slp-10.c: New test.
 * gcc.target/riscv/rvv/autovec/partial/slp-11.c: New test.
 * gcc.target/riscv/rvv/autovec/partial/slp-13.c: New test.
 * gcc.target/riscv/rvv/autovec/partial/slp-14.c: New test.
 * gcc.target/riscv/rvv/autovec/partial/slp-15.c: New test.
 * gcc.target/riscv/rvv/autovec/partial/slp_run-10.c: New test.
 * gcc.target/riscv/rvv/autovec/partial/slp_run-11.c: New test.
 * gcc.target/riscv/rvv/autovec/partial/slp_run-13.c: New test.
 * gcc.target/riscv/rvv/autovec/partial/slp_run-14.c: New test.
 * gcc.target/riscv/rvv/autovec/partial/slp_run-15.c: New test.

---

OK
jeff



[PATCH] Add -Wmissing-variable-declarations [PR65213].

2023-06-13 Thread Hamza Mahfooz
Resolves:
PR c/65213 - Extend -Wmissing-declarations to variables [i.e. add
-Wmissing-variable-declarations]

gcc/c-family/ChangeLog:

PR c/65213
* c.opt (-Wmissing-variable-declarations): New option.

gcc/c/ChangeLog:

PR c/65213
* c-decl.cc (start_decl): Handle -Wmissing-variable-declarations

gcc/ChangeLog:

PR c/65213
* doc/invoke.texi (-Wmissing-variable-declarations): Document
new option.

gcc/testsuite/ChangeLog:

PR c/65213
* gcc.dg/Wmissing-variable-declarations.c: New test.

Signed-off-by: Hamza Mahfooz 
---
 gcc/c-family/c.opt|  4 +++
 gcc/c/c-decl.cc   | 10 +-
 gcc/doc/invoke.texi   | 11 +--
 .../gcc.dg/Wmissing-variable-declarations.c   | 33 +++
 4 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/Wmissing-variable-declarations.c

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index cead1995561..42ad447f39b 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1043,6 +1043,10 @@ Wmissing-prototypes
 C ObjC Var(warn_missing_prototypes) Warning
 Warn about global functions without prototypes.
 
+Wmissing-variable-declarations
+C ObjC Var(warn_missing_variable_declarations) Warning
+Warn about global variables without previous declarations.
+
 Wmudflap
 C ObjC C++ ObjC++ WarnRemoved
 
diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 1af51c4acfc..8e276b2a846 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -5340,6 +5340,7 @@ start_decl (struct c_declarator *declarator, struct 
c_declspecs *declspecs,
location_t *lastloc /* = NULL */)
 {
   tree decl;
+  tree old_decl;
   tree tem;
   tree expr = NULL_TREE;
   enum deprecated_states deprecated_state = DEPRECATED_NORMAL;
@@ -5360,7 +5361,9 @@ start_decl (struct c_declarator *declarator, struct 
c_declspecs *declspecs,
   if (!decl || decl == error_mark_node)
 return NULL_TREE;
 
-  if (tree lastdecl = lastloc ? lookup_last_decl (decl) : NULL_TREE)
+  old_decl = lookup_last_decl (decl);
+
+  if (tree lastdecl = lastloc ? old_decl : NULL_TREE)
 if (lastdecl != error_mark_node)
   *lastloc = DECL_SOURCE_LOCATION (lastdecl);
 
@@ -5372,6 +5375,11 @@ start_decl (struct c_declarator *declarator, struct 
c_declspecs *declspecs,
   && TREE_PUBLIC (decl))
 warning (OPT_Wmain, "%q+D is usually a function", decl);
 
+  if (warn_missing_variable_declarations && VAR_P (decl)
+  && !DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) && old_decl == NULL_TREE)
+warning_at (DECL_SOURCE_LOCATION (decl), 
OPT_Wmissing_variable_declarations,
+   "no previous declaration for %qD", decl);
+
   if (initialized)
 /* Is it valid for this decl to have an initializer at all?
If not, set INITIALIZED to zero, which will indirectly
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 8fa3f9fae01..e9b51842234 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -496,8 +496,8 @@ Objective-C and Objective-C++ Dialects}.
 
 @item C and Objective-C-only Warning Options
 @gccoptlist{-Wbad-function-cast  -Wmissing-declarations
--Wmissing-parameter-type  -Wmissing-prototypes  -Wnested-externs
--Wold-style-declaration  -Wold-style-definition
+-Wmissing-parameter-type -Wmissing-prototypes -Wmissing-variable-declarations
+-Wnested-externs -Wold-style-declaration  -Wold-style-definition
 -Wstrict-prototypes  -Wtraditional  -Wtraditional-conversion
 -Wdeclaration-after-statement  -Wpointer-sign}
 
@@ -9565,6 +9565,13 @@ provide prototypes and a non-matching declaration 
declares an
 overload rather than conflict with an earlier declaration.
 Use @option{-Wmissing-declarations} to detect missing declarations in C++.
 
+@opindex Wmissing-variable-declarations
+@opindex Wno-missing-variable-declarations
+@item -Wmissing-variable-declarations @r{(C and Objective-C only)}
+Warn if a global variable is defined without a previous declaration.
+Use this option to detect global variables that do not have a matching
+extern declaration in a header file.
+
 @opindex Wmissing-declarations
 @opindex Wno-missing-declarations
 @item -Wmissing-declarations
diff --git a/gcc/testsuite/gcc.dg/Wmissing-variable-declarations.c 
b/gcc/testsuite/gcc.dg/Wmissing-variable-declarations.c
new file mode 100644
index 000..b292dbe8c22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Wmissing-variable-declarations.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-Wmissing-variable-declarations" } */
+
+int b0; /* { dg-warning "no previous declaration for 'b0'" } */
+
+int b1 = 1; /* { dg-warning "no previous declaration for 'b1'" } */
+
+int b2; /* { dg-warning "no previous declaration for 'b2'" } */
+int b2 = 2; 
+
+struct {
+int g0;
+} b3; /* { dg-warning "no previous declaration for 'b3'" } */
+
+int b4; /* { dg-warning "no previous declaration for 'b4'" } */
+int b4 = 3;
+extern int b4;
+
+static 

Re: [PATCH] rs6000: replace '(const_int 0)' to 'unspec:BLK [(const_int 0)]' for stack_tie

2023-06-13 Thread Xi Ruoyao via Gcc-patches
On Tue, 2023-06-13 at 20:23 +0800, Jiufu Guo via Gcc-patches wrote:

> Compare with previous version, this addes ChangeLog and removes
> const_anchor parts.
> https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621356.html.

[Off topic]

const_anchor is just broken now.  See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104843 and the thread
beginning at
https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591470.html.  If
you want to use it for rs6000 I guess you need to fix it first...

To me const_anchor needs a complete rework but I don't want to spend my
time on it.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH] rs6000: replace '(const_int 0)' to 'unspec:BLK [(const_int 0)]' for stack_tie

2023-06-13 Thread Jiufu Guo via Gcc-patches
Hi,

For stack_tie, currently below insn is generated:
(insn 15 14 16 3 (parallel [
 (set (mem/c:BLK (reg/f:DI 1 1) [1  A8])
 (const_int 0 [0]))
 ]) "/home/guojiufu/temp/gdb.c":13:3 922 {stack_tie}
  (nil))

It is "set (mem/c:BLK (reg/f:DI 1 1) (const_int 0 [0])".  This maybe
looks like "a memory block is zerored", while actually stack_tie
may be more like a placeholder, and does not generate any thing.

To avoid potential misunderstand, "UNPSEC:BLK [(const_int 0)].." could
be used here.

Compare with previous version, this addes ChangeLog and removes
const_anchor parts.
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621356.html.

Bootstrap pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)

gcc/ChangeLog:

* config/rs6000/predicates.md (tie_operand): Update to match new
stack_tie pattern.
* config/rs6000/rs6000-logue.cc (rs6000_emit_stack_tie): Update to
use the new stack_tie pattern.
* config/rs6000/rs6000.md (UNSPEC_TIE): New UNSPEC.
(restore_stack_block): Update to use the new stack_tie pattern.
(restore_stack_nonlocal): Likewise.
(stack_tie): Update pattern to use UNSPEC_TIE.
(stack_restore_tie): Likewise.  

---
 gcc/config/rs6000/predicates.md   | 11 +++
 gcc/config/rs6000/rs6000-logue.cc |  4 +++-
 gcc/config/rs6000/rs6000.md   | 14 ++
 4 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index a16ee30f0c0..4748cb37ce8 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1854,10 +1854,13 @@ (define_predicate "stmw_operation"
 (define_predicate "tie_operand"
   (match_code "parallel")
 {
-  return (GET_CODE (XVECEXP (op, 0, 0)) == SET
- && MEM_P (XEXP (XVECEXP (op, 0, 0), 0))
- && GET_MODE (XEXP (XVECEXP (op, 0, 0), 0)) == BLKmode
- && XEXP (XVECEXP (op, 0, 0), 1) == const0_rtx);
+  rtx set = XVECEXP (op, 0, 0);
+  return (GET_CODE (set) == SET
+ && MEM_P (SET_DEST (set))
+ && GET_MODE (SET_DEST (set)) == BLKmode
+ && GET_CODE (SET_SRC (set)) == UNSPEC
+ && XINT (SET_SRC (set), 1) == UNSPEC_TIE
+ && XVECEXP (SET_SRC (set), 0, 0) == const0_rtx);
 })
 
 ;; Match a small code model toc reference (or medium and large
diff --git a/gcc/config/rs6000/rs6000-logue.cc 
b/gcc/config/rs6000/rs6000-logue.cc
index bc6b153b59f..b99f43a8282 100644
--- a/gcc/config/rs6000/rs6000-logue.cc
+++ b/gcc/config/rs6000/rs6000-logue.cc
@@ -1463,7 +1463,9 @@ rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
   while (--i >= 0)
 {
   rtx mem = gen_frame_mem (BLKmode, regs[i]);
-  RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
+  RTVEC_ELT (p, i)
+   = gen_rtx_SET (mem, gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, const0_rtx),
+   UNSPEC_TIE));
 }
 
   emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d..fdcf8347812 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,7 @@ (define_c_enum "unspec"
UNSPEC_HASHCHK
UNSPEC_XXSPLTIDP_CONST
UNSPEC_XXSPLTIW_CONST
+   UNSPEC_TIE
   ])
 
 ;;
@@ -10828,7 +10829,9 @@ (define_expand "restore_stack_block"
   operands[4] = gen_frame_mem (Pmode, operands[1]);
   p = rtvec_alloc (1);
   RTVEC_ELT (p, 0) = gen_rtx_SET (gen_frame_mem (BLKmode, operands[0]),
- const0_rtx);
+ gen_rtx_UNSPEC (BLKmode,
+ gen_rtvec (1, const0_rtx),
+ UNSPEC_TIE));
   operands[5] = gen_rtx_PARALLEL (VOIDmode, p);
 })
 
@@ -10866,7 +10869,9 @@ (define_expand "restore_stack_nonlocal"
   operands[5] = gen_frame_mem (Pmode, operands[3]);
   p = rtvec_alloc (1);
   RTVEC_ELT (p, 0) = gen_rtx_SET (gen_frame_mem (BLKmode, operands[0]),
- const0_rtx);
+ gen_rtx_UNSPEC (BLKmode,
+ gen_rtvec (1, const0_rtx),
+ UNSPEC_TIE));
   operands[6] = gen_rtx_PARALLEL (VOIDmode, p);
 })
 
@@ -13898,7 +13903,8 @@ (define_insn "*save_fpregs__r1"
 ; not be moved over loads from or stores to stack memory.
 (define_insn "stack_tie"
   [(match_parallel 0 "tie_operand"
-  [(set (mem:BLK (reg 1)) (const_int 0))])]
+  [(set (mem:BLK (reg 1))
+   (unspec:BLK [(const_int 0)] UNSPEC_TIE))])]
   ""
   ""
   [(set_attr "length" "0")])
@@ -13910,7 +13916,7 @@ (define_insn "stack_restore_tie"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
(plus:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
 (match_operand:SI 2 

[PATCH] Fix memory leak in loop header copying

2023-06-13 Thread Richard Biener via Gcc-patches


Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-ssa-loop-ch.cc (ch_base::copy_headers): Free loop BBs.
---
 gcc/tree-ssa-loop-ch.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/tree-ssa-loop-ch.cc b/gcc/tree-ssa-loop-ch.cc
index 7fdef3bb11a..22252bee135 100644
--- a/gcc/tree-ssa-loop-ch.cc
+++ b/gcc/tree-ssa-loop-ch.cc
@@ -642,6 +642,7 @@ ch_base::copy_headers (function *fun)
   if (stmt_can_terminate_bb_p (gsi_stmt (bsi)))
 precise = false;
   }
+ free (bbs);
}
   if (precise
  && get_max_loop_iterations_int (loop) == 1)
-- 
2.35.3


Re: Re: [PATCH V3] RISC-V: Add more SLP tests

2023-06-13 Thread juzhe.zh...@rivai.ai
Ok. After floating-point binary. I will do floating-point ternary.
I think you do conversion next (widen floating point, float to int, int to 
float).

It seems that we almost done  most of the part autovec patterns in RISC-V port.
What else we can do? My second middle-end patch (LEN_MASK _* load/store) is 
blocked which is prerequisite for reduction
if you understand how reduction works.
 
Maybe next you could find the way to optimize vv->vx ?

Thanks.


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-06-13 20:00
To: juzhe.zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw
Subject: Re: [PATCH V3] RISC-V: Add more SLP tests
Hi Juzhe,
 
thanks, works for me as is.  I just hope somebody is going to take on the task
of making different LMUL SLP variants "scannable" at some point because
it would definitely increase our test coverage with these tests. (Or split
the tests manually and not iterate over LMUL)
 
Regards
Robin
 


Re: [PATCH] RISC-V: Fix bug of VLA SLP auto-vectorization

2023-06-13 Thread Robin Dapp via Gcc-patches
Hi Juzhe,

LGTM.  You could also add the aarch64 test disclaimer here again,
but no need for a V2. 

Regards
 Robin


[pushed] c++: mutable temps in rodata

2023-06-13 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

If the type of a temporary has mutable members, we can't set TREE_READONLY
on the VAR_DECL; this is parallel to the check in
cp_apply_type_quals_to_decl.

gcc/cp/ChangeLog:

* tree.cc (build_target_expr): Check TYPE_HAS_MUTABLE_P.

gcc/testsuite/ChangeLog:

* g++.dg/tree-ssa/initlist-opt6.C: New test.
---
 gcc/cp/tree.cc|  1 +
 gcc/testsuite/g++.dg/tree-ssa/initlist-opt6.C | 18 ++
 2 files changed, 19 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/initlist-opt6.C

diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc
index 751c9adeb62..799183dc646 100644
--- a/gcc/cp/tree.cc
+++ b/gcc/cp/tree.cc
@@ -522,6 +522,7 @@ build_target_expr (tree decl, tree value, tsubst_flags_t 
complain)
   if (CP_TYPE_CONST_NON_VOLATILE_P (type)
   && !TYPE_HAS_NONTRIVIAL_DESTRUCTOR (type)
   && !VOID_TYPE_P (TREE_TYPE (value))
+  && !TYPE_HAS_MUTABLE_P (type)
   && reduced_constant_expression_p (value))
 TREE_READONLY (decl) = true;
 
diff --git a/gcc/testsuite/g++.dg/tree-ssa/initlist-opt6.C 
b/gcc/testsuite/g++.dg/tree-ssa/initlist-opt6.C
new file mode 100644
index 000..ea1bf5d935e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/initlist-opt6.C
@@ -0,0 +1,18 @@
+// { dg-do compile { target c++11 } }
+// { dg-additional-options -fdump-tree-gimple }
+// { dg-final { scan-tree-dump-not {static const struct S} "gimple" } }
+
+// Test that mutable prevents putting this init-list array in rodata.
+
+#include 
+
+struct S {
+constexpr S(int i) : i(i) {}
+mutable int i;
+};
+
+void f(std::initializer_list);
+
+int main() {
+f({1,2,3});
+}

base-commit: d438b67e005bf8fc9e4af26410bf69816c30e969
-- 
2.31.1



Re: [PATCH V3] RISC-V: Add more SLP tests

2023-06-13 Thread Robin Dapp via Gcc-patches
Hi Juzhe,

thanks, works for me as is.  I just hope somebody is going to take on the task
of making different LMUL SLP variants "scannable" at some point because
it would definitely increase our test coverage with these tests. (Or split
the tests manually and not iterate over LMUL)

Regards
 Robin


[PATCH V3] RISC-V: Add more SLP tests

2023-06-13 Thread juzhe . zhong
From: Juzhe-Zhong 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/partial/slp-10.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-11.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-13.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-14.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-15.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-10.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-11.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-13.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-14.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-15.c: New test.

---
 .../riscv/rvv/autovec/partial/slp-10.c| 32 +++
 .../riscv/rvv/autovec/partial/slp-11.c| 33 +++
 .../riscv/rvv/autovec/partial/slp-13.c| 34 +++
 .../riscv/rvv/autovec/partial/slp-14.c| 33 +++
 .../riscv/rvv/autovec/partial/slp-15.c| 35 
 .../riscv/rvv/autovec/partial/slp_run-10.c| 33 +++
 .../riscv/rvv/autovec/partial/slp_run-11.c| 33 +++
 .../riscv/rvv/autovec/partial/slp_run-13.c| 47 +++
 .../riscv/rvv/autovec/partial/slp_run-14.c| 57 +++
 .../riscv/rvv/autovec/partial/slp_run-15.c| 56 ++
 10 files changed, 393 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-15.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c
new file mode 100644
index 000..c5215611e53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param 
riscv-autovec-preference=scalable -fno-vect-cost-model 
-fdump-tree-optimized-details" } */
+
+#include 
+
+#define VEC_PERM(TYPE) 
\
+  TYPE __attribute__ ((noinline, noclone)) 
\
+  vec_slp_##TYPE (TYPE *restrict a, int n) 
\
+  {
\
+for (int i = 0; i < n; ++i)
\
+  {
\
+   a[i * 2] += 10;\
+   a[i * 2 + 1] += 17;\
+  }
\
+  }
+
+#define TEST_ALL(T)
\
+  T (int8_t)   
\
+  T (uint8_t)  
\
+  T (int16_t)  
\
+  T (uint16_t) 
\
+  T (int32_t)  
\
+  T (uint32_t) 
\
+  T (int64_t)  
\
+  T (uint64_t)
+
+TEST_ALL (VEC_PERM)
+
+/* { dg-final { scan-tree-dump-times "{ 10, 17, ... }" 8 "optimized" } } */
+/* This testcase is from aarch64 and floating-point operations are removed.
+   TODO: We will add floating-point operations back and make them as common 
test in the future.  */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c
new file mode 100644
index 000..ccb5ab6831d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param 
riscv-autovec-preference=scalable -fno-vect-cost-model 
-fdump-tree-optimized-details" } */
+
+#include 
+
+#define VEC_PERM(TYPE) 

RE: [PATCH v5] RISC-V: Add vector psabi checking.

2023-06-13 Thread Li, Pan2 via Gcc-patches
Committed v6 with riscv.exp and rvv.exp passed, thanks Kito.

Pan

-Original Message-
From: Li, Pan2 
Sent: Monday, June 12, 2023 8:49 PM
To: Kito Cheng ; Wang, Yanzhang 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai
Subject: RE: [PATCH v5] RISC-V: Add vector psabi checking.

Sure thing, will commit it after all riscv.exp rvv.exp pass.

Pan

-Original Message-
From: Kito Cheng  
Sent: Monday, June 12, 2023 8:43 PM
To: Wang, Yanzhang 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; Li, Pan2 
Subject: Re: [PATCH v5] RISC-V: Add vector psabi checking.

Hi Yan-Zhang:

OK with one minor, go ahead IF the regression is clean.

Hi Pan:

Could you help to verify this patch and commit if the regression is clean?

thanks :)

> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp 
> b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> index 5e69235a268..ad79d0e9a8d 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> @@ -43,7 +43,7 @@ dg-init
>  # Main loop.
>  set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -mabi=$gcc_mabi -O3"

Add -Wno-psabi here rather than below, and also add it for
g++.target/riscv/rvv/rvv.exp

>  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/base/*.\[cS\]]] \
> -   "" $CFLAGS
> +   "-Wno-psabi" $CFLAGS
>  gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vsetvl/*.\[cS\]]] \
> "" $CFLAGS
>  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/*.\[cS\]]] \


Re: [PATCH] middle-end, i386: Pattern recognize add/subtract with carry [PR79173]

2023-06-13 Thread Jakub Jelinek via Gcc-patches
On Tue, Jun 13, 2023 at 08:40:36AM +, Richard Biener wrote:
> I suspect re-association can wreck things even more here.  I have
> to say the matching code is very hard to follow, not sure if
> splitting out a function matching
> 
>_22 = .{ADD,SUB}_OVERFLOW (_6, _5);
>_23 = REALPART_EXPR <_22>;
>_24 = IMAGPART_EXPR <_22>;
> 
> from _23 and _24 would help?

I've outlined 3 most often used sequences of statements or checks
into 3 helper functions, hope that helps.

> > +  while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3])
> > +   {
> > + gimple *g = SSA_NAME_DEF_STMT (rhs[0]);
> > + if (has_single_use (rhs[0])
> > + && is_gimple_assign (g)
> > + && (gimple_assign_rhs_code (g) == code
> > + || (code == MINUS_EXPR
> > + && gimple_assign_rhs_code (g) == PLUS_EXPR
> > + && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST)))
> > +   {
> > + rhs[0] = gimple_assign_rhs1 (g);
> > + tree  = rhs[2] ? rhs[3] : rhs[2];
> > + r = gimple_assign_rhs2 (g);
> > + if (gimple_assign_rhs_code (g) != code)
> > +   r = fold_build1 (NEGATE_EXPR, TREE_TYPE (r), r);
> 
> Can you use const_unop here?  In fact both will not reliably
> negate all constants (ick), so maybe we want a force_const_negate ()?

It is unsigned type NEGATE_EXPR of INTEGER_CST, so I think it should
work.  That said, changed it to const_unop and am just giving up on it
as if it wasn't a PLUS_EXPR with INTEGER_CST addend if const_unop doesn't
simplify.

> > + else if (addc_subc)
> > +   {
> > + if (!integer_zerop (arg2))
> > +   ;
> > + /* x = y + 0 + 0; x = y - 0 - 0; */
> > + else if (integer_zerop (arg1))
> > +   result = arg0;
> > + /* x = 0 + y + 0; */
> > + else if (subcode != MINUS_EXPR && integer_zerop (arg0))
> > +   result = arg1;
> > + /* x = y - y - 0; */
> > + else if (subcode == MINUS_EXPR
> > +  && operand_equal_p (arg0, arg1, 0))
> > +   result = integer_zero_node;
> > +   }
> 
> So this all performs simplifications but also constant folding.  In
> particular the match.pd re-simplification will invoke fold_const_call
> on all-constant argument function calls but does not do extra folding
> on partially constant arg cases but instead relies on patterns here.
> 
> Can you add all-constant arg handling to fold_const_call and
> consider moving cases like y + 0 + 0 to match.pd?

The reason I've done this here is that this is the spot where all other
similar internal functions are handled, be it the ubsan ones
- IFN_UBSAN_CHECK_{ADD,SUB,MUL}, or __builtin_*_overflow ones
- IFN_{ADD,SUB,MUL}_OVERFLOW, or these 2 new ones.  The code handles
there 2 constant arguments as well as various patterns that can be
simplified and has code to clean it up later, build a COMPLEX_CST,
or COMPLEX_EXPR etc. as needed.  So, I think we want to handle those
elsewhere, we should do it for all of those functions, but then
probably incrementally.

> > +@cindex @code{addc@var{m}5} instruction pattern
> > +@item @samp{addc@var{m}5}
> > +Adds operands 2, 3 and 4 (where the last operand is guaranteed to have
> > +only values 0 or 1) together, sets operand 0 to the result of the
> > +addition of the 3 operands and sets operand 1 to 1 iff there was no
> > +overflow on the unsigned additions, and to 0 otherwise.  So, it is
> > +an addition with carry in (operand 4) and carry out (operand 1).
> > +All operands have the same mode.
> 
> operand 1 set to 1 for no overflow sounds weird when specifying it
> as carry out - can you double check?

Fixed.

> > +@cindex @code{subc@var{m}5} instruction pattern
> > +@item @samp{subc@var{m}5}
> > +Similarly to @samp{addc@var{m}5}, except subtracts operands 3 and 4
> > +from operand 2 instead of adding them.  So, it is
> > +a subtraction with carry/borrow in (operand 4) and carry/borrow out
> > +(operand 1).  All operands have the same mode.
> > +
> 
> I wonder if we want to name them uaddc and usubc?  Or is this supposed
> to be simply the twos-complement "carry"?  I think the docs should
> say so then (note we do have uaddv and addv).

Makes sense, I've actually renamed even the internal functions etc.

Here is only lightly tested patch with everything but gimple-fold.cc
changed.

2023-06-13  Jakub Jelinek  

PR middle-end/79173
* internal-fn.def (UADDC, USUBC): New internal functions.
* internal-fn.cc (expand_UADDC, expand_USUBC): New functions.
(commutative_ternary_fn_p): Return true also for IFN_UADDC.
* optabs.def (uaddc5_optab, usubc5_optab): New optabs.
* tree-ssa-math-opts.cc (uaddc_cast, uaddc_ne0, uaddc_is_cplxpart,
match_uaddc_usubc): New functions.
(math_opts_dom_walker::after_dom_children): Call match_uaddc_usubc
for PLUS_EXPR, MINUS_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR unless
other optimizations have been 

Re: [ping] Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'

2023-06-13 Thread Tobias Burnus

On 13.06.23 12:42, Thomas Schwinge wrote:

On 2023-06-05T14:18:48+0200, I wrote:

OK to push the attached
"Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'"?


Subject: [PATCH] Add
  'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'

  gcc/testsuite/
  * gfortran.fortran-torture/execute/math.f90: Enhance for optional
  OpenACC, OpenMP 'target' usage.


I think it is more readable with a linebreak here and with "OpenACC
'serial' and OpenMP ..." instead of "OpenACC, OpenMP".

What I would like to see a hint somewhere in the commit log that the
libgomp files include the gfortran.fortran-torture file. I don't care
whether you add the hint before the changelog items as free text – or in
the bullet above (e.g. "as it is included in libgomp/testsuite") – or
after "New." in the following bullet list.


  libgomp/
  * testsuite/libgomp.fortran/fortran-torture_execute_math.f90: New.
  * testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90:
  Likewise.



---
  .../gfortran.fortran-torture/execute/math.f90 | 23 +--
  .../fortran-torture_execute_math.f90  |  4 
  .../fortran-torture_execute_math.f90  |  5 
  3 files changed, 30 insertions(+), 2 deletions(-)
  create mode 100644 
libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90
  create mode 100644 
libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90

diff --git a/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 
b/gcc/testsuite/gfortran.fortran-torture/execute/math.f90
index 17cc78f7a10..e71f669304f 100644
--- a/gcc/testsuite/gfortran.fortran-torture/execute/math.f90
+++ b/gcc/testsuite/gfortran.fortran-torture/execute/math.f90
@@ -1,9 +1,14 @@
  ! Program to test mathematical intrinsics
+
+! See also 
'libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90'; thus the 
'!$omp' directives.
+! See also 
'libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90'; thus 
the '!$acc' directives.


Likewise here: it is not completely obvious that this file is 'include'd
by the other testcases.

Maybe add a line "! This file is also included in:" and remove the "See
also" or some creative variant of it.

Minor remark: The OpenMP part is OK, but strict reading of the spec
requires an "omp declare target' if a subroutine is in a different
compilation unit. And according the glossary, that's the case here. In
practice, it also works without as it is in the same translation unit.
(compilation unit = for C/C++: translation unit, for Fortran:
subprogram). I think the HPE/Cray compiler will complain, but maybe only
when used with modules and not with subroutine subprograms. (As many
compilers write a .mod file for modules, a late change of attributes can
be more problematic.)

Otherwise LGTM.

Tobias

PS: I assume that you have check it with both with an in-build-tree and
an in-install-tree testsuite run.


+
  subroutine dotest (n, val4, val8, known)
 implicit none
 real(kind=4) val4, known
 real(kind=8) val8
 integer n
+   !$acc routine seq

 if (abs (val4 - known) .gt. 0.001) STOP 1
 if (abs (real (val8, kind=4) - known) .gt. 0.001) STOP 2
@@ -14,17 +19,20 @@ subroutine dotestc (n, val4, val8, known)
 complex(kind=4) val4, known
 complex(kind=8) val8
 integer n
+   !$acc routine seq
+
 if (abs (val4 - known) .gt. 0.001) STOP 3
 if (abs (cmplx (val8, kind=4) - known) .gt. 0.001) STOP 4
  end subroutine

-program testmath
+subroutine testmath
 implicit none
 real(kind=4) r, two4, half4
 real(kind=8) q, two8, half8
 complex(kind=4) cr
 complex(kind=8) cq
 external dotest, dotestc
+   !$acc routine seq

 two4 = 2.0
 two8 = 2.0_8
@@ -96,5 +104,16 @@ program testmath
 cq = log ((-1.0_8, -1.0_8))
 call dotestc (21, cr, cq, (0.3466, -2.3562))

-end program
+end subroutine

+program main
+   implicit none
+   external testmath
+
+   !$acc serial
+   !$omp target
+   call testmath
+   !$acc end serial
+   !$omp end target
+
+end program
diff --git a/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 
b/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90
new file mode 100644
index 000..3348a0bb3ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90
@@ -0,0 +1,4 @@
+! { dg-do run }
+! { dg-additional-options -foffload-options=-lm }
+
+include '../../../gcc/testsuite/gfortran.fortran-torture/execute/math.f90'
diff --git 
a/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 
b/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90
new file mode 100644
index 000..1b2ac440762
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90
@@ -0,0 +1,5 @@
+! { dg-do run }
+!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} }
+! { dg-additional-options -foffload-options=-lm }
+

Re: [PATCH] New finish_compare_by_pieces target hook (for x86).

2023-06-13 Thread Richard Biener via Gcc-patches
On Mon, Jun 12, 2023 at 4:04 PM Roger Sayle  wrote:
>
>
> The following simple test case, from PR 104610, shows that memcmp () == 0
> can result in some bizarre code sequences on x86.
>
> int foo(char *a)
> {
> static const char t[] = "0123456789012345678901234567890";
> return __builtin_memcmp(a, [0], sizeof(t)) == 0;
> }
>
> with -O2 currently contains both:
> xorl%eax, %eax
> xorl$1, %eax
> and also
> movl$1, %eax
> xorl$1, %eax
>
> Changing the return type of foo to _Bool results in the equally
> bizarre:
> xorl%eax, %eax
> testl   %eax, %eax
> sete%al
> and also
> movl$1, %eax
> testl   %eax, %eax
> sete%al
>
> All these sequences set the result to a constant, but this optimization
> opportunity only occurs very late during compilation, by basic block
> duplication in the 322r.bbro pass, too late for CSE or peephole2 to
> do anything about it.  The problem is that the idiom expanded by
> compare_by_pieces for __builtin_memcmp_eq contains basic blocks that
> can't easily be optimized by if-conversion due to the multiple
> incoming edges on the fail block.
>
> In summary, compare_by_pieces generates code that looks like:
>
> if (x[0] != y[0]) goto fail_label;
> if (x[1] != y[1]) goto fail_label;
> ...
> if (x[n] != y[n]) goto fail_label;
> result = 1;
> goto end_label;
> fail_label:
> result = 0;
> end_label:
>
> In theory, the RTL if-conversion pass could be enhanced to tackle
> arbitrarily complex if-then-else graphs, but the solution proposed
> here is to allow suitable targets to perform if-conversion during
> compare_by_pieces.  The x86, for example, can take advantage that
> all of the above comparisons set and test the zero flag (ZF), which
> can then be used in combination with sete.  Hence compare_by_pieces
> could instead generate:
>
> if (x[0] != y[0]) goto fail_label;
> if (x[1] != y[1]) goto fail_label;
> ...
> if (x[n] != y[n]) goto fail_label;
> fail_label:
> sete result
>
> which requires one less basic block, and the redundant conditional
> branch to a label immediately after is cleaned up by GCC's existing
> RTL optimizations.
>
> For the test case above, where -O2 -msse4 previously generated:
>
> foo:movdqu  (%rdi), %xmm0
> pxor.LC0(%rip), %xmm0
> ptest   %xmm0, %xmm0
> je  .L5
> .L2:movl$1, %eax
> xorl$1, %eax
> ret
> .L5:movdqu  16(%rdi), %xmm0
> pxor.LC1(%rip), %xmm0
> ptest   %xmm0, %xmm0
> jne .L2
> xorl%eax, %eax
> xorl$1, %eax
> ret
>
> we now generate:
>
> foo:movdqu  (%rdi), %xmm0
> pxor.LC0(%rip), %xmm0
> ptest   %xmm0, %xmm0
> jne .L2
> movdqu  16(%rdi), %xmm0
> pxor.LC1(%rip), %xmm0
> ptest   %xmm0, %xmm0
> .L2:sete%al
> movzbl  %al, %eax
> ret
>
> Using a target hook allows the large amount of intelligence already in
> compare_by_pieces to be re-used by the i386 backend, but this can also
> help other backends with condition flags where the equality result can
> be materialized.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?

What's the guarantee that the zero flag is appropriately set on all
edges incoming now and forever?  Does this require target specific
knowledge on how do_compare_rtx_and_jump is emitting RTL?

Do you see matching this in ifcvt to be unreasonable?  I'm thinking
of "reducing" the incoming edges pairwise without actually looking
at the ifcvt code.

Thanks,
Richard.

>
> 2023-06-12  Roger Sayle  
>
> gcc/ChangeLog
> * config/i386/i386.cc (ix86_finish_compare_by_pieces): New
> function to provide a backend specific implementation.
> (TARGET_FINISH_COMPARE_BY_PIECES): Use the above function.
>
> * doc/tm.texi.in (TARGET_FINISH_COMPARE_BY_PIECES): New @hook.
> * doc/tm.texi: Regenerate.
>
> * expr.cc (compare_by_pieces): Call finish_compare_by_pieces in
> targetm to finalize the RTL expansion.  Move the current
> implementation to a default target hook.
> * target.def (finish_compare_by_pieces): New target hook to allow
> compare_by_pieces to be customized by the target.
> * targhooks.cc (default_finish_compare_by_pieces): Default
> implementation moved here from expr.cc's compare_by_pieces.
> * targhooks.h (default_finish_compare_by_pieces): Prototype.
>
> gcc/testsuite/ChangeLog
> * gcc.target/i386/pieces-memcmp-1.c: New test case.
>
>
> Thanks in advance,
> Roger
> --
>


Re: [PATCH V2] RISC-V: Remove duplicate `#include "riscv-vector-switch.def"`

2023-06-13 Thread Lehua Ding
LGTM. 
 Thanks.
 Will merge it soon.


Thank you for such a prompt reply.

 

Re: [PATCH V2] RISC-V: Remove duplicate `#include "riscv-vector-switch.def"`

2023-06-13 Thread juzhe.zh...@rivai.ai
LGTM. 
Thanks.
Will merge it soon.


juzhe.zh...@rivai.ai
 
From: Lehua Ding
Date: 2023-06-13 18:59
To: gcc-patches; juzhe.zhong
Subject: [PATCH V2] RISC-V: Remove duplicate `#include 
"riscv-vector-switch.def"`
Hi,
 
This patch remove the duplicate `#include "riscv-vector-switch.def"` statement
and add #undef for ENTRY and TUPLE_ENTRY macros later.
 
Best,
Lehua
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (struct mode_vtype_group): Remove duplicate 
#include.
(ENTRY): Undef.
(TUPLE_ENTRY): Undef.
 
---
gcc/config/riscv/riscv-v.cc | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e1b85a5af91f..09c2abcbc623 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1210,7 +1210,6 @@ struct mode_vtype_group
   ratio_for_min_vlen64[MODE##mode] = RATIO_FOR_MIN_VLEN64; 
\
   vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128;   
\
   ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128;
-#include "riscv-vector-switch.def"
#define TUPLE_ENTRY(MODE, REQUIREMENT, SUBPART_MODE, NF, VLMUL_FOR_MIN_VLEN32, \
RATIO_FOR_MIN_VLEN32, VLMUL_FOR_MIN_VLEN64,\
RATIO_FOR_MIN_VLEN64, VLMUL_FOR_MIN_VLEN128,   \
@@ -1224,6 +1223,8 @@ struct mode_vtype_group
   vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128;   
\
   ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128;
#include "riscv-vector-switch.def"
+#undef ENTRY
+#undef TUPLE_ENTRY
   }
};
-- 
2.36.3
 


[PATCH V2] RISC-V: Remove duplicate `#include "riscv-vector-switch.def"`

2023-06-13 Thread Lehua Ding
Hi,

This patch remove the duplicate `#include "riscv-vector-switch.def"` statement
and add #undef for ENTRY and TUPLE_ENTRY macros later.

Best,
Lehua

gcc/ChangeLog:

* config/riscv/riscv-v.cc (struct mode_vtype_group): Remove duplicate 
#include.
(ENTRY): Undef.
(TUPLE_ENTRY): Undef.

---
 gcc/config/riscv/riscv-v.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e1b85a5af91f..09c2abcbc623 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1210,7 +1210,6 @@ struct mode_vtype_group
   ratio_for_min_vlen64[MODE##mode] = RATIO_FOR_MIN_VLEN64; 
\
   vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128;   
\
   ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128;
-#include "riscv-vector-switch.def"
 #define TUPLE_ENTRY(MODE, REQUIREMENT, SUBPART_MODE, NF, VLMUL_FOR_MIN_VLEN32, 
\
RATIO_FOR_MIN_VLEN32, VLMUL_FOR_MIN_VLEN64,\
RATIO_FOR_MIN_VLEN64, VLMUL_FOR_MIN_VLEN128,   \
@@ -1224,6 +1223,8 @@ struct mode_vtype_group
   vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128;   
\
   ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128;
 #include "riscv-vector-switch.def"
+#undef ENTRY
+#undef TUPLE_ENTRY
   }
 };
 
-- 
2.36.3



Re: [PATCH] RISC-V: Remove duplicate `#include "riscv-vector-switch.def"`

2023-06-13 Thread juzhe.zh...@rivai.ai
Send V2 patch with changelog.

Thanks.


juzhe.zh...@rivai.ai
 
From: Lehua Ding
Date: 2023-06-13 18:53
To: gcc-patches; juzhe.zhong
Subject: [PATCH] RISC-V: Remove duplicate `#include "riscv-vector-switch.def"`
Hi,
 
This patch remove the duplicate `#include "riscv-vector-switch.def"` statement
and add #undef for ENTRY and TUPLE_ENTRY macros later.
 
Best,
Lehua
 
---
gcc/config/riscv/riscv-v.cc | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e1b85a5af91f..09c2abcbc623 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1210,7 +1210,6 @@ struct mode_vtype_group
   ratio_for_min_vlen64[MODE##mode] = RATIO_FOR_MIN_VLEN64; 
\
   vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128;   
\
   ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128;
-#include "riscv-vector-switch.def"
#define TUPLE_ENTRY(MODE, REQUIREMENT, SUBPART_MODE, NF, VLMUL_FOR_MIN_VLEN32, \
RATIO_FOR_MIN_VLEN32, VLMUL_FOR_MIN_VLEN64,\
RATIO_FOR_MIN_VLEN64, VLMUL_FOR_MIN_VLEN128,   \
@@ -1224,6 +1223,8 @@ struct mode_vtype_group
   vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128;   
\
   ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128;
#include "riscv-vector-switch.def"
+#undef ENTRY
+#undef TUPLE_ENTRY
   }
};
-- 
2.36.3
 


[PATCH] RISC-V: Remove duplicate `#include "riscv-vector-switch.def"`

2023-06-13 Thread Lehua Ding
Hi,

This patch remove the duplicate `#include "riscv-vector-switch.def"` statement
and add #undef for ENTRY and TUPLE_ENTRY macros later.

Best,
Lehua

---
 gcc/config/riscv/riscv-v.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e1b85a5af91f..09c2abcbc623 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1210,7 +1210,6 @@ struct mode_vtype_group
   ratio_for_min_vlen64[MODE##mode] = RATIO_FOR_MIN_VLEN64; 
\
   vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128;   
\
   ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128;
-#include "riscv-vector-switch.def"
 #define TUPLE_ENTRY(MODE, REQUIREMENT, SUBPART_MODE, NF, VLMUL_FOR_MIN_VLEN32, 
\
RATIO_FOR_MIN_VLEN32, VLMUL_FOR_MIN_VLEN64,\
RATIO_FOR_MIN_VLEN64, VLMUL_FOR_MIN_VLEN128,   \
@@ -1224,6 +1223,8 @@ struct mode_vtype_group
   vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_MIN_VLEN128;   
\
   ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_MIN_VLEN128;
 #include "riscv-vector-switch.def"
+#undef ENTRY
+#undef TUPLE_ENTRY
   }
 };
 
-- 
2.36.3



[ping] driver: Forward '-lgfortran', '-lm' to offloading compilation

2023-06-13 Thread Thomas Schwinge
Hi!

On 2023-06-05T14:25:18+0200, I wrote:
> OK to push the attached
> "driver: Forward '-lgfortran', '-lm' to offloading compilation"?
> (We didn't have a PR open for that, or did we?)

Ping.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 5d3cb866cad3bbcf47c5e66825e5710e86cc017e Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Mon, 5 Jun 2023 11:26:37 +0200
Subject: [PATCH] driver: Forward '-lgfortran', '-lm' to offloading compilation

..., so that users don't manually need to specify
'-foffload-options=-lgfortran', '-foffload-options=-lm' in addition to
'-lgfortran', '-lm' (specified manually, or implicitly by the driver).

	gcc/
	* gcc.cc (driver_handle_option): Forward host '-lgfortran', '-lm'
	to offloading compilation.
	* config/gcn/mkoffload.cc (main): Adjust.
	* config/nvptx/mkoffload.cc (main): Likewise.
	* doc/invoke.texi (foffload-options): Update example.
	libgomp/
	* testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Don't
	set.
	* testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags):
	Likewise.
	* testsuite/libgomp.c/simd-math-1.c: Remove
	'-foffload-options=-lm'.
	* testsuite/libgomp.fortran/fortran-torture_execute_math.f90:
	Likewise.
	* testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90:
	Likewise.
---
 gcc/config/gcn/mkoffload.cc   | 12 
 gcc/config/nvptx/mkoffload.cc | 12 
 gcc/doc/invoke.texi   |  5 +-
 gcc/gcc.cc| 56 +++
 libgomp/testsuite/libgomp.c/simd-math-1.c |  1 -
 .../fortran-torture_execute_math.f90  |  1 -
 libgomp/testsuite/libgomp.fortran/fortran.exp |  2 -
 .../fortran-torture_execute_math.f90  |  1 -
 .../libgomp.oacc-fortran/fortran.exp  |  2 -
 9 files changed, 82 insertions(+), 10 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 988c12318fd..8b608bf024e 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -946,6 +946,18 @@ main (int argc, char **argv)
   else if (startswith (argv[i], STR))
 	gcn_stack_size = atoi (argv[i] + strlen (STR));
 #undef STR
+  /* Translate host into offloading libraries.  */
+  else if (strcmp (argv[i], "-l_GCC_gfortran") == 0
+	   || strcmp (argv[i], "-l_GCC_m") == 0)
+	{
+	  /* Elide '_GCC_'.  */
+	  size_t i_dst = strlen ("-l");
+	  size_t i_src = strlen ("-l_GCC_");
+	  char c;
+	  do
+	c = argv[i][i_dst++] = argv[i][i_src++];
+	  while (c != '\0');
+	}
 }
 
   if (!(fopenacc ^ fopenmp))
diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc
index 6cdea45cffe..aaea9fb320d 100644
--- a/gcc/config/nvptx/mkoffload.cc
+++ b/gcc/config/nvptx/mkoffload.cc
@@ -649,6 +649,18 @@ main (int argc, char **argv)
   else if (strcmp (argv[i], "-dumpbase") == 0
 	   && i + 1 < argc)
 	dumppfx = argv[++i];
+  /* Translate host into offloading libraries.  */
+  else if (strcmp (argv[i], "-l_GCC_gfortran") == 0
+	   || strcmp (argv[i], "-l_GCC_m") == 0)
+	{
+	  /* Elide '_GCC_'.  */
+	  size_t i_dst = strlen ("-l");
+	  size_t i_src = strlen ("-l_GCC_");
+	  char c;
+	  do
+	c = argv[i][i_dst++] = argv[i][i_src++];
+	  while (c != '\0');
+	}
 }
   if (!(fopenacc ^ fopenmp))
 fatal_error (input_location, "either %<-fopenacc%> or %<-fopenmp%> "
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d2d639c92d4..7b3a2a74459 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -2716,9 +2716,8 @@ the @option{-foffload-options=@var{target-list}=@var{options}} form.  The
 Typical command lines are
 
 @smallexample
--foffload-options=-lgfortran -foffload-options=-lm
--foffload-options="-lgfortran -lm" -foffload-options=nvptx-none=-latomic
--foffload-options=amdgcn-amdhsa=-march=gfx906 -foffload-options=-lm
+-foffload-options='-fno-math-errno -ffinite-math-only' -foffload-options=nvptx-none=-latomic
+-foffload-options=amdgcn-amdhsa=-march=gfx906 -foffload-options=-O3
 @end smallexample
 
 @opindex fopenacc
diff --git a/gcc/gcc.cc b/gcc/gcc.cc
index 2ccca00d603..15995206856 100644
--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
@@ -47,6 +47,9 @@ compilation is specified by a string called a "spec".  */
 #include "opts-jobserver.h"
 #include "common/common-target.h"
 
+#ifndef MATH_LIBRARY
+#define MATH_LIBRARY "m"
+#endif
 
 
 /* Manage the manipulation of env vars.
@@ -4117,6 +4120,48 @@ next_item:
 }
 }
 
+/* Forward certain options to offloading compilation.  */
+
+static void
+forward_offload_option (size_t opt_index, const char *arg, bool validated)
+{
+  switch (opt_index)
+{
+case OPT_l:
+  /* Use a '_GCC_' prefix and standard name ('-l_GCC_m' irrespective of the
+	 host's 'MATH_LIBRARY', for 

[ping] Add 'libgomp.{, oacc-}fortran/fortran-torture_execute_math.f90'

2023-06-13 Thread Thomas Schwinge
Hi!

On 2023-06-05T14:18:48+0200, I wrote:
> OK to push the attached
> "Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'"?

Ping.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 0d5095d8cd2d68113890a39a7fdb649198e576c1 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Fri, 2 Jun 2023 23:11:00 +0200
Subject: [PATCH] Add
 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'

	gcc/testsuite/
	* gfortran.fortran-torture/execute/math.f90: Enhance for optional
	OpenACC, OpenMP 'target' usage.
	libgomp/
	* testsuite/libgomp.fortran/fortran-torture_execute_math.f90: New.
	* testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90:
	Likewise.
---
 .../gfortran.fortran-torture/execute/math.f90 | 23 +--
 .../fortran-torture_execute_math.f90  |  4 
 .../fortran-torture_execute_math.f90  |  5 
 3 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90

diff --git a/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 b/gcc/testsuite/gfortran.fortran-torture/execute/math.f90
index 17cc78f7a10..e71f669304f 100644
--- a/gcc/testsuite/gfortran.fortran-torture/execute/math.f90
+++ b/gcc/testsuite/gfortran.fortran-torture/execute/math.f90
@@ -1,9 +1,14 @@
 ! Program to test mathematical intrinsics
+
+! See also 'libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90'; thus the '!$omp' directives.
+! See also 'libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90'; thus the '!$acc' directives.
+
 subroutine dotest (n, val4, val8, known)
implicit none
real(kind=4) val4, known
real(kind=8) val8
integer n
+   !$acc routine seq
 
if (abs (val4 - known) .gt. 0.001) STOP 1
if (abs (real (val8, kind=4) - known) .gt. 0.001) STOP 2
@@ -14,17 +19,20 @@ subroutine dotestc (n, val4, val8, known)
complex(kind=4) val4, known
complex(kind=8) val8
integer n
+   !$acc routine seq
+
if (abs (val4 - known) .gt. 0.001) STOP 3
if (abs (cmplx (val8, kind=4) - known) .gt. 0.001) STOP 4
 end subroutine
 
-program testmath
+subroutine testmath
implicit none
real(kind=4) r, two4, half4
real(kind=8) q, two8, half8
complex(kind=4) cr
complex(kind=8) cq
external dotest, dotestc
+   !$acc routine seq
 
two4 = 2.0
two8 = 2.0_8
@@ -96,5 +104,16 @@ program testmath
cq = log ((-1.0_8, -1.0_8))
call dotestc (21, cr, cq, (0.3466, -2.3562))
 
-end program
+end subroutine
 
+program main
+   implicit none
+   external testmath
+
+   !$acc serial
+   !$omp target
+   call testmath
+   !$acc end serial
+   !$omp end target
+
+end program
diff --git a/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 b/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90
new file mode 100644
index 000..3348a0bb3ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90
@@ -0,0 +1,4 @@
+! { dg-do run }
+! { dg-additional-options -foffload-options=-lm }
+
+include '../../../gcc/testsuite/gfortran.fortran-torture/execute/math.f90'
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 b/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90
new file mode 100644
index 000..1b2ac440762
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90
@@ -0,0 +1,5 @@
+! { dg-do run }
+!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} }
+! { dg-additional-options -foffload-options=-lm }
+
+include '../../../gcc/testsuite/gfortran.fortran-torture/execute/math.f90'
-- 
2.34.1



[PATCH V2] RISC-V: Add more SLP tests

2023-06-13 Thread juzhe . zhong
From: Juzhe-Zhong 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/partial/slp-10.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-11.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-10.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-11.c: New test.

---
 .../riscv/rvv/autovec/partial/slp-10.c| 32 ++
 .../riscv/rvv/autovec/partial/slp-11.c| 33 +++
 .../riscv/rvv/autovec/partial/slp_run-10.c| 33 +++
 .../riscv/rvv/autovec/partial/slp_run-11.c| 33 +++
 4 files changed, 131 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-11.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c
new file mode 100644
index 000..c5215611e53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param 
riscv-autovec-preference=scalable -fno-vect-cost-model 
-fdump-tree-optimized-details" } */
+
+#include 
+
+#define VEC_PERM(TYPE) 
\
+  TYPE __attribute__ ((noinline, noclone)) 
\
+  vec_slp_##TYPE (TYPE *restrict a, int n) 
\
+  {
\
+for (int i = 0; i < n; ++i)
\
+  {
\
+   a[i * 2] += 10;\
+   a[i * 2 + 1] += 17;\
+  }
\
+  }
+
+#define TEST_ALL(T)
\
+  T (int8_t)   
\
+  T (uint8_t)  
\
+  T (int16_t)  
\
+  T (uint16_t) 
\
+  T (int32_t)  
\
+  T (uint32_t) 
\
+  T (int64_t)  
\
+  T (uint64_t)
+
+TEST_ALL (VEC_PERM)
+
+/* { dg-final { scan-tree-dump-times "{ 10, 17, ... }" 8 "optimized" } } */
+/* This testcase is from aarch64 and floating-point operations are removed.
+   TODO: We will add floating-point operations back and make them as common 
test in the future.  */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c
new file mode 100644
index 000..ccb5ab6831d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param 
riscv-autovec-preference=scalable -fno-vect-cost-model 
-fdump-tree-optimized-details" } */
+
+#include 
+
+#define VEC_PERM(TYPE) 
\
+  TYPE __attribute__ ((noinline, noclone)) 
\
+  vec_slp_##TYPE (TYPE *restrict a, int n) 
\
+  {
\
+for (int i = 0; i < n; ++i)
\
+  {
\
+   a[i * 4] += 41;\
+   a[i * 4 + 1] += 25;\
+   a[i * 4 + 2] += 31;\
+   a[i * 4 + 3] += 62;\
+  }
\
+  }
+
+#define TEST_ALL(T)
\
+  T (int8_t)   
\
+  T (uint8_t)  
\
+  T (int16_t)  
\
+  T (uint16_t) 
\
+  T 

Re: Re: [PATCH] RISC-V: Add more SLP tests

2023-06-13 Thread juzhe.zh...@rivai.ai
>> as the tests are mostly directly from aarch64's testsuite I would
>> advise comments on where they were taken from as well as a TODO that
>> they should become common tests for a specific target selector
>> (vect_scalable_supported or something).

Ok.

 
>> (How about some assembly checks for the non-run tests?
No, I tried. I can't add assembly check in the tests since we are SLP using 
different LMUL.
Different LMUL will end up with different SLP style and their instructions are 
quite different.
Unless we can have assembly check with predicate recognizing LMUL=M1/M2/M4/M8



juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-06-13 18:08
To: juzhe.zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Add more SLP tests
Hi Juzhe,
 
as the tests are mostly directly from aarch64's testsuite I would
advise comments on where they were taken from as well as a TODO that
they should become common tests for a specific target selector
(vect_scalable_supported or something).
 
How about some assembly checks for the non-run tests?
 
Regards
Robin
 
 


[PATCH] c++: Fix ICE with parameter pack of decltype(auto) [PR103497]

2023-06-13 Thread Nathaniel Shead via Gcc-patches
(Another) ping.

https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616465.html

On Sat, Apr 22, 2023 at 04:25:13PM +1000, Nathaniel Shead wrote:
> Bootstrapped and tested on x86_64-pc-linux-gnu.
> 
> -- 8< --
> 
> This patch raises an error early when the decltype(auto) specifier is
> used as a parameter of a function. This prevents any issues with an
> unexpected tree type later on when performing the call.
> 
>   PR 103497
> 
> gcc/cp/ChangeLog:
> 
>   * parser.cc (cp_parser_simple_type_specifier): Add check for
>   decltype(auto) as function parameter.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/pr103497.C: New test.
> 
> Signed-off-by: Nathaniel Shead 
> ---
>  gcc/cp/parser.cc| 10 ++
>  gcc/testsuite/g++.dg/pr103497.C |  7 +++
>  2 files changed, 17 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/pr103497.C
> 
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index e5f032f2330..1415e07e152 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -19884,6 +19884,16 @@ cp_parser_simple_type_specifier (cp_parser* parser,
>&& cp_lexer_peek_nth_token (parser->lexer, 2)->type != CPP_SCOPE)
>  {
>type = saved_checks_value (token->u.tree_check_value);
> +  /* Within a function parameter declaration, decltype(auto) is always an
> +  error.  */
> +  if (parser->auto_is_implicit_function_template_parm_p
> +   && TREE_CODE (type) == TEMPLATE_TYPE_PARM
> +   && AUTO_IS_DECLTYPE (type))
> + {
> +   error_at (token->location,
> + "cannot declare a parameter with %");
> +   type = error_mark_node;
> + }
>if (decl_specs)
>   {
> cp_parser_set_decl_spec_type (decl_specs, type,
> diff --git a/gcc/testsuite/g++.dg/pr103497.C b/gcc/testsuite/g++.dg/pr103497.C
> new file mode 100644
> index 000..bcd421c2907
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr103497.C
> @@ -0,0 +1,7 @@
> +// { dg-do compile { target c++14 } }
> +
> +void foo(decltype(auto)... args);  // { dg-error "parameter with 
> .decltype.auto..|no parameter packs" }
> +
> +int main() {
> +  foo();
> +}
> -- 
> 2.34.1
> 


[PATCH] c++: Report invalid id-expression in decltype [PR100482]

2023-06-13 Thread Nathaniel Shead via Gcc-patches
(Another) ping.

On Sun, Apr 30, 2023 at 12:00:05PM +1000, Nathaniel Shead wrote:
> This patch ensures that any errors raised by finish_id_expression when
> parsing a decltype expression are properly reported, rather than
> potentially going ignored and causing invalid code to be accepted.
> 
> We can also now remove the separate check for templates without args as
> this is also checked for in finish_id_expression.
> 
>   PR 100482
> 
> gcc/cp/ChangeLog:
> 
>   * parser.cc (cp_parser_decltype_expr): Report errors raised by
>   finish_id_expression.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/pr100482.C: New test.
> 
> Signed-off-by: Nathaniel Shead 
> ---
>  gcc/cp/parser.cc| 22 +++---
>  gcc/testsuite/g++.dg/pr100482.C | 11 +++
>  2 files changed, 22 insertions(+), 11 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/pr100482.C
> 
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index e5f032f2330..20ebcdc3cfd 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -16508,10 +16508,6 @@ cp_parser_decltype_expr (cp_parser *parser,
>   expr = cp_parser_lookup_name_simple (parser, expr,
>id_expr_start_token->location);
>  
> -  if (expr && TREE_CODE (expr) == TEMPLATE_DECL)
> - /* A template without args is not a complete id-expression.  */
> - expr = error_mark_node;
> -
>if (expr
>&& expr != error_mark_node
>&& TREE_CODE (expr) != TYPE_DECL
> @@ -16532,13 +16528,17 @@ cp_parser_decltype_expr (cp_parser *parser,
> _msg,
>  id_expr_start_token->location));
>  
> -  if (expr == error_mark_node)
> -/* We found an id-expression, but it was something that we
> -   should not have found. This is an error, not something
> -   we can recover from, so note that we found an
> -   id-expression and we'll recover as gracefully as
> -   possible.  */
> -id_expression_or_member_access_p = true;
> +   if (error_msg)
> + {
> +   /* We found an id-expression, but it was something that we
> +  should not have found. This is an error, not something
> +  we can recover from, so report the error we found and
> +  we'll recover as gracefully as possible.  */
> +   cp_parser_parse_definitely (parser);
> +   cp_parser_error (parser, error_msg);
> +   id_expression_or_member_access_p = true;
> +   return error_mark_node;
> + }
>  }
>  
>if (expr
> diff --git a/gcc/testsuite/g++.dg/pr100482.C b/gcc/testsuite/g++.dg/pr100482.C
> new file mode 100644
> index 000..dcf6722fda5
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr100482.C
> @@ -0,0 +1,11 @@
> +// { dg-do compile { target c++11 } }
> +
> +namespace N {}
> +decltype(std) x;   // { dg-error "expected primary-expression" }
> +
> +struct S {};
> +decltype(S) y;  // { dg-error "argument to .decltype. must be an expression" 
> }
> +
> +template 
> +struct U {};
> +decltype(U) z;  // { dg-error "missing template arguments" }
> -- 
> 2.40.0
> 


[PATCH v2 0/3] c++: Track lifetimes in constant evaluation [PR70331,...]

2023-06-13 Thread Nathaniel Shead via Gcc-patches
(Another) ping. I also have some more changes on top of this patch set
as well if this looks good as it is.

https://gcc.gnu.org/pipermail/gcc-patches/2023-March/614811.html

Thanks!

On Wed, Mar 29, 2023 at 01:32:55PM +1100, Nathaniel Shead wrote:
> This is an update of the patch series at
> https://gcc.gnu.org/pipermail/gcc-patches/2023-March/614759.html
> 
> The main change is modifying the first patch to store the "expired" flag
> in the C++-specific lang_decl_base struct instead of tree_decl_common.
> The second and third patches to improve diagnostic locations are
> otherwise unchanged.
> 
> Bootstrapped and regression tested on x86_64 linux.
> 
> Nathaniel
> 
> ---
> 
> Nathaniel Shead (3):
>   c++: Track lifetimes in constant evaluation [PR70331,PR96630,PR98675]
>   c++: Improve constexpr error for dangling local variables
>   c++: Improve location information in constexpr evaluation
> 
>  gcc/cp/constexpr.cc   | 152 --
>  gcc/cp/cp-tree.h  |  10 +-
>  gcc/cp/module.cc  |   2 +
>  gcc/cp/semantics.cc   |   5 +-
>  gcc/cp/typeck.cc  |   5 +-
>  gcc/testsuite/g++.dg/cpp0x/constexpr-48089.C  |  10 +-
>  gcc/testsuite/g++.dg/cpp0x/constexpr-diag3.C  |   2 +-
>  gcc/testsuite/g++.dg/cpp0x/constexpr-ice20.C  |   2 +-
>  gcc/testsuite/g++.dg/cpp1y/constexpr-89481.C  |   3 +-
>  .../g++.dg/cpp1y/constexpr-lifetime1.C|  14 ++
>  .../g++.dg/cpp1y/constexpr-lifetime2.C|  20 +++
>  .../g++.dg/cpp1y/constexpr-lifetime3.C|  13 ++
>  .../g++.dg/cpp1y/constexpr-lifetime4.C|  11 ++
>  .../g++.dg/cpp1y/constexpr-lifetime5.C|  11 ++
>  gcc/testsuite/g++.dg/cpp1y/constexpr-union5.C |   4 +-
>  gcc/testsuite/g++.dg/cpp1y/pr68180.C  |   4 +-
>  .../g++.dg/cpp1z/constexpr-lambda6.C  |   4 +-
>  gcc/testsuite/g++.dg/cpp2a/bit-cast11.C   |  10 +-
>  gcc/testsuite/g++.dg/cpp2a/bit-cast12.C   |  10 +-
>  gcc/testsuite/g++.dg/cpp2a/bit-cast14.C   |  14 +-
>  gcc/testsuite/g++.dg/cpp2a/constexpr-98122.C  |   4 +-
>  .../g++.dg/cpp2a/constexpr-dynamic17.C|   5 +-
>  gcc/testsuite/g++.dg/cpp2a/constexpr-init1.C  |   5 +-
>  gcc/testsuite/g++.dg/cpp2a/constexpr-new12.C  |   6 +-
>  gcc/testsuite/g++.dg/cpp2a/constexpr-new3.C   |  10 +-
>  gcc/testsuite/g++.dg/ext/constexpr-vla2.C |   4 +-
>  gcc/testsuite/g++.dg/ext/constexpr-vla3.C |   4 +-
>  gcc/testsuite/g++.dg/ubsan/pr63956.C  |   4 +-
>  .../g++.dg/warn/Wreturn-local-addr-6.C|   3 -
>  .../25_algorithms/equal/constexpr_neg.cc  |   7 +-
>  30 files changed, 246 insertions(+), 112 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime1.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime2.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime3.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime4.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime5.C
> 
> -- 
> 2.34.1
> 


Re: [PATCH] RISC-V: Add more SLP tests

2023-06-13 Thread Robin Dapp via Gcc-patches
Hi Juzhe,

as the tests are mostly directly from aarch64's testsuite I would
advise comments on where they were taken from as well as a TODO that
they should become common tests for a specific target selector
(vect_scalable_supported or something).

How about some assembly checks for the non-run tests?

Regards
 Robin



[committed] libgomp/testsuite: Add requires-unified-addr-1.{c,f90} [PR109837]

2023-06-13 Thread Tobias Burnus

Add a testcase for "omp requires unified_address" as we hadn't one.

The feature itself worked since the beginning (hardware + implementation
wise); that the devices report 'omp requires unified_address' as
supported is newer: for nvptx since r13-3460-g131d18e928a3ea and for GCN
since r14-1584-gf1af7d65ff64fe (a week ago).

The test assumes (→ dg-output) that all offload devices support
unified_address; this implies: if an offloading device is available, it
also remains available after adding the unified-address requirement.
Goal: ensure that we don't end up with only host fallback.

Unified address implies: Pointer size is the same such that no
'is_device_ptr' is required to convert an opaque pointer, it also
permits to do device-pointer pointer arithmetic on the host. This
testcase also assumes that 'int' / 'integer' has the same size on host
and device. (If not: good luck with offloading in general!)

The test also makes the sound assumption that derived-type component
pointers are passed through with the derived-type itself such that the
pointer address remains well defined. In terms of the standard, it would
have an undefined association status.

Committed as Rev. r14-1783-gd5c58ad1ebaff9

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit d5c58ad1ebaff924c2546df074174cffb128feb8
Author: Tobias Burnus 
Date:   Tue Jun 13 11:27:47 2023 +0200

libgomp/testsuite: Add requires-unified-addr-1.{c,f90} [PR109837]

Add a testcase for 'omp requires unified_address' that is currently supported
by all devices but was not tested for.

libgomp/

PR libgomp/109837
* testsuite/libgomp.c-c++-common/requires-unified-addr-1.c: New test.
* testsuite/libgomp.fortran/requires-unified-addr-1.f90: New test.
---
 .../libgomp.c-c++-common/requires-unified-addr-1.c |  74 ++
 .../libgomp.fortran/requires-unified-addr-1.f90| 111 +
 2 files changed, 185 insertions(+)

diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-unified-addr-1.c b/libgomp/testsuite/libgomp.c-c++-common/requires-unified-addr-1.c
new file mode 100644
index 000..bff0a6b31ab
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/requires-unified-addr-1.c
@@ -0,0 +1,74 @@
+/* PR libgomp/109837 */
+
+#include 
+#include 
+#include 
+#include 
+
+#pragma omp requires unified_address
+
+#define N 15
+
+void
+test_device (int dev)
+{
+  struct st {
+int *ptr;
+int n;
+  };
+  struct st s;
+
+  s.n = 10;
+  s.ptr = (int *) omp_target_alloc (sizeof (int)*s.n, dev);
+  int *ptr1 = (int *) omp_target_alloc (sizeof (int)*N, dev);
+  assert (s.ptr != NULL);
+  assert (ptr1 != NULL);
+
+  int q[4] = {1,2,3,4};
+  int *qptr;
+  #pragma omp target enter data map(q) device(device_num: dev)
+  #pragma omp target data use_device_addr(q) device(device_num: dev)
+qptr = q;
+
+  #pragma omp target map(to:s) device(device_num: dev)
+  for (int i = 0; i < s.n; i++)
+s.ptr[i] = 23*i;
+
+  int *ptr2 = [3];
+
+  #pragma omp target firstprivate(qptr) map(tofrom:ptr2) device(device_num: dev)
+  for (int i = 0; i < 4; i++)
+*(qptr++) = ptr2[i];
+
+  #pragma omp target exit data map(q) device(device_num: dev)
+  for (int i = 0; i < 4; i++)
+q[i] = 23 * (i+3);
+
+  #pragma omp target map(to: ptr1) device(device_num: dev)
+  for (int i = 0; i < N; i++)
+ptr1[i] = 11*i;
+
+  int *ptr3 = (int *) malloc (sizeof (int)*N);
+  assert (0 == omp_target_memcpy(ptr3, ptr1, N * sizeof(int), 0, 0,
+ omp_get_initial_device(), dev));
+  for (int i = 0; i < N; i++)
+assert (ptr3[i] == 11*i);
+
+  free (ptr3);
+  omp_target_free (ptr1, dev);
+  omp_target_free (s.ptr, dev);
+}
+
+int
+main()
+{
+  int ntgts = omp_get_num_devices();
+  if (ntgts)
+fprintf (stderr, "Offloading devices exist\n");  /* { dg-output "Offloading devices exist(\n|\r\n|\r)" { target offload_device } } */
+  else
+fprintf (stderr, "Only host fallback\n");/* { dg-output "Only host fallback(\n|\r\n|\r)" { target { ! offload_device } } } */
+
+  for (int i = 0; i <= ntgts; i++)
+test_device (i);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/requires-unified-addr-1.f90 b/libgomp/testsuite/libgomp.fortran/requires-unified-addr-1.f90
new file mode 100644
index 000..f5a5adf093b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/requires-unified-addr-1.f90
@@ -0,0 +1,111 @@
+! PR libgomp/109837
+
+program main
+  use iso_c_binding
+  use iso_fortran_env
+  use omp_lib
+  implicit none (external, type)
+  !$omp requires unified_address
+
+  integer(c_intptr_t), parameter :: N = 15
+  integer :: i, ntgts
+
+  ntgts = omp_get_num_devices();
+  if (ntgts > 0) then
+write (ERROR_UNIT, '(a)') "Offloading devices exist" 

[PATCH] RISC-V: Fix bug of VLA SLP auto-vectorization

2023-06-13 Thread juzhe . zhong
From: Juzhe-Zhong 

Sorry for producing bugs in the previous VLA SLP patch.

Consider this following permutation:
_85 = VEC_PERM_EXPR <{ 99, 17, ... }, { 11, 80, ... }, { 0, POLY_INT_CST [4, 
4], 1, POLY_INT_CST [5, 4], 2, POLY_INT_CST [6, 4], ... }>;

The correct result should be:
_85 = { 99, 11, 17, 80, ... }

However, I did wrong in the previous patch.

Code sequence before this patch:

set mask = { 0, 1, 0, 1, ... }
set v0 = { 99, 17, 99, 17, ... }
set v1 = { 11, 80, 11, 80, ... }
set index = viota (mask) = { 0, 0, 1, 1, 2, 2, ... }
set result = vrgather_mu (v0, v1, index, mask) = { 99, 11, 99, 80 }
The result is incorrect.

After this patch:

set mask = { 0, 1, 0, 1, ... }
set index = viota (mask) = { 0, 0, 1, 1, 2, 2, ... }
set v0 = vrgather ({ 99, 17, 99, 17, ... }, index) = { 99, 99, 17, 17, ... }
set v1 = { 11, 80, 11, 80, ... }
set result = vrgather_mu (v0, v1, index, mask) = { 99, 11, 17, 80 }
The result is what we expected.

This issue was discovered in the test I appended in this patch with 
--param=riscv-autovec-lmul=2.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (emit_vlmax_decompress_insn): Fix bug.
(shuffle_decompress_patterns): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/partial/slp-12.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-12.c: New test.

---
 gcc/config/riscv/riscv-v.cc   |  8 ++---
 .../riscv/rvv/autovec/partial/slp-12.c| 33 +++
 .../riscv/rvv/autovec/partial/slp_run-12.c| 30 +
 3 files changed, 67 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-12.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 3ce2eb7f2ad..d797326d736 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -866,7 +866,7 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx 
sel, rtx mask)
  e q r d c b v a  # v11 destination after vrgather using viota.m under mask
 */
 static void
-emit_vlmax_decompress_insn (rtx target, rtx op, rtx mask)
+emit_vlmax_decompress_insn (rtx target, rtx op0, rtx op1, rtx mask)
 {
   machine_mode data_mode = GET_MODE (target);
   machine_mode sel_mode = related_int_vector_mode (data_mode).require ();
@@ -876,7 +876,8 @@ emit_vlmax_decompress_insn (rtx target, rtx op, rtx mask)
   rtx sel = gen_reg_rtx (sel_mode);
   rtx iota_ops[] = {sel, mask};
   emit_vlmax_insn (code_for_pred_iota (sel_mode), RVV_UNOP, iota_ops);
-  emit_vlmax_masked_gather_mu_insn (target, op, sel, mask);
+  emit_vlmax_gather_insn (target, op0, sel);
+  emit_vlmax_masked_gather_mu_insn (target, op1, sel, mask);
 }
 
 /* Emit merge instruction.  */
@@ -2444,8 +2445,7 @@ shuffle_decompress_patterns (struct expand_vec_perm_d *d)
   rtx const_vec = gen_const_vector_dup (sel_mode, 1);
   rtx mask = gen_reg_rtx (mask_mode);
   expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
-  emit_move_insn (d->target, op0);
-  emit_vlmax_decompress_insn (d->target, op1, mask);
+  emit_vlmax_decompress_insn (d->target, op0, op1, mask);
   return true;
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-12.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-12.c
new file mode 100644
index 000..4131fd71a74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-12.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param 
riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include 
+
+#define VEC_PERM(TYPE) 
\
+  TYPE __attribute__ ((noinline, noclone)) 
\
+  vec_slp_##TYPE (TYPE *restrict a, int n) 
\
+  {
\
+for (int i = 0; i < n; ++i)
\
+  {
\
+   a[i * 8] += 99;\
+   a[i * 8 + 1] += 11;\
+   a[i * 8 + 2] += 17;\
+   a[i * 8 + 3] += 80;\
+   a[i * 8 + 4] += 63;\
+   a[i * 8 + 5] += 37;\
+   a[i * 8 + 6] += 24;\
+   a[i * 8 + 7] += 81;\
+  }
\
+  }
+
+#define TEST_ALL(T)
\
+  T 

Re: [PATCH 4/4] rs6000: build constant via li/lis;rldic

2023-06-13 Thread Jiufu Guo via Gcc-patches


Hi David,

Thanks for your valuable comments!

David Edelsohn  writes:
>  
> On Wed, Jun 7, 2023 at 9:56 PM Jiufu Guo  wrote:
>
>  Hi,
>
>  This patch checks if a constant is possible to be built by "li;rldic".
>  We only need to take care of "negative li", other forms do not need to check.
>  For example, "negative lis" is just a "negative li" with an additional shift.
>
>  Bootstrap and regtest pass on ppc64{,le}.
>  Is this ok for trunk?
>
>  BR,
>  Jeff (Jiufu)
>
>  gcc/ChangeLog:
>
>  * config/rs6000/rs6000.cc (can_be_built_by_li_and_rldic): New 
> function.
>  (rs6000_emit_set_long_const): Call can_be_built_by_li_and_rldic.
>
> This is okay.
>
> Do you have any measurement of how expensive it is to test all of these 
> additional methods to generate a constant?  How much does this affect the
> compile time?

Yeap, Thanks for this very good question!
This patch is mostly using bitwise operations and if-conditions,
it would be expected not expensive.

Testcases were checked.  For example:
A case with ~1000 constants: most of them hit this feature.
With this feature, the compiling time is slightly faster.

0m1.985s(without patch) vs. 0m1.874s(with patch)
(note:D rs6000_emit_set_long_const does not occur in hot perf
functions.  So, the tricky time saving would not directly cause
by this feature.)

A case with ~1000 constants:(most are not hit by this feature)
0m2.493s(without patch) vs. 0m2.558s(with patch).

For runtime, actually, with the patch, it seems there is no visible
improvement in SPEC2017.  While I still feel this patch is
doing the right thing: use fewer instructions to build the constant.

BR,
Jeff (Jiufu Guo)

>
> Thanks, David
>
>  
>  
>  gcc/testsuite/ChangeLog:
>
>  * gcc.target/powerpc/const-build.c: Add more tests.
>  ---
>   gcc/config/rs6000/rs6000.cc   | 61 ++-
>   .../gcc.target/powerpc/const-build.c  | 28 +
>   2 files changed, 88 insertions(+), 1 deletion(-)
>
>  diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>  index 2a3fa733b45..cd04b6b5c82 100644
>  --- a/gcc/config/rs6000/rs6000.cc
>  +++ b/gcc/config/rs6000/rs6000.cc
>  @@ -10387,6 +10387,64 @@ can_be_built_by_li_lis_and_rldicr (HOST_WIDE_INT c, 
> int *shift,
> return false;
>   }
>
>  +/* Check if value C can be built by 2 instructions: one is 'li', another is
>  +   rldic.
>  +
>  +   If so, *SHIFT is set to the 'shift' operand of rldic; and *MASK is set
>  +   to the mask value about the 'mb' operand of rldic; and return true.
>  +   Return false otherwise.  */
>  +
>  +static bool
>  +can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT 
> *mask)
>  +{
>  +  /* There are 49 successive ones in the negative value of 'li'.  */
>  +  int ones = 49;
>  +
>  +  /* 1..1xx1..1: negative value of li --> 0..01..1xx0..0:
>  + right bits are shifted as 0's, and left 1's(and x's) are cleaned.  */
>  +  int tz = ctz_hwi (c);
>  +  int lz = clz_hwi (c);
>  +  int middle_ones = clz_hwi (~(c << lz));
>  +  if (tz + lz + middle_ones >= ones)
>  +{
>  +  *mask = ((1LL << (HOST_BITS_PER_WIDE_INT - tz - lz)) - 1LL) << tz;
>  +  *shift = tz;
>  +  return true;
>  +}
>  +
>  +  /* 1..1xx1..1 --> 1..1xx0..01..1: some 1's(following x's) are cleaned. */
>  +  int leading_ones = clz_hwi (~c);
>  +  int tailing_ones = ctz_hwi (~c);
>  +  int middle_zeros = ctz_hwi (c >> tailing_ones);
>  +  if (leading_ones + tailing_ones + middle_zeros >= ones)
>  +{
>  +  *mask = ~(((1ULL << middle_zeros) - 1ULL) << tailing_ones);
>  +  *shift = tailing_ones + middle_zeros;
>  +  return true;
>  +}
>  +
>  +  /* xx1..1xx: --> xx0..01..1xx: some 1's(following x's) are cleaned. */
>  +  /* Get the position for the first bit of successive 1.
>  + The 24th bit would be in successive 0 or 1.  */
>  +  HOST_WIDE_INT low_mask = (1LL << 24) - 1LL;
>  +  int pos_first_1 = ((c & (low_mask + 1)) == 0)
>  + ? clz_hwi (c & low_mask)
>  + : HOST_BITS_PER_WIDE_INT - ctz_hwi (~(c | low_mask));
>  +  middle_ones = clz_hwi (~c << pos_first_1);
>  +  middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_first_1));
>  +  if (pos_first_1 < HOST_BITS_PER_WIDE_INT
>  +  && middle_ones + middle_zeros < HOST_BITS_PER_WIDE_INT
>  +  && middle_ones + middle_zeros >= ones)
>  +{
>  +  *mask = ~(((1ULL << middle_zeros) - 1LL)
>  +   << (HOST_BITS_PER_WIDE_INT - pos_first_1));
>  +  *shift = HOST_BITS_PER_WIDE_INT - pos_first_1 + middle_zeros;
>  +  return true;
>  +}
>  +
>  +  return false;
>  +}
>  +
>   /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>  Output insns to set DEST equal to the constant C as a series of
>  lis, ori and shl instructions.  */
>  @@ -10435,7 +10493,8 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT 
> c)
>   }
> else if (can_be_built_by_li_lis_and_rotldi (c, , )
>  

[PATCH][committed] arm: Extend -mtp= arguments

2023-06-13 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

After discussing the -mtp= option with Arm's LLVM developers we'd like to extend
the functionality of the option somewhat.
There are actually 3 system registers that can be accessed for the thread 
pointer
in aarch32: tpidrurw, tpidruro, tpidrprw.  They are all read through the CP15 
co-processor
mechanism. The current -mtp=cp15 option reads the tpidruro register.
This patch extends -mtp to allow for the above three explicit tpidr names and
keeps -mtp=cp15 as an alias of -mtp=tpidruro for backwards compatibility.

There is more relevant discussion of the options at 
https://reviews.llvm.org/D152433 if you're interested.

Bootstrapped and tested on arm-none-linux-gnueabihf.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

* config/arm/arm-opts.h (enum arm_tp_type): Remove TP_CP15.
Add TP_TPIDRURW, TP_TPIDRURO, TP_TPIDRPRW values.
* config/arm/arm-protos.h (arm_output_load_tpidr): Declare prototype.
* config/arm/arm.cc (arm_option_reconfigure_globals): Replace TP_CP15
with TP_TPIDRURO.
(arm_output_load_tpidr): Define.
* config/arm/arm.h (TARGET_HARD_TP): Define in terms of TARGET_SOFT_TP.
* config/arm/arm.md (load_tp_hard): Call arm_output_load_tpidr to output
assembly.
(reload_tp_hard): Likewise.
* config/arm/arm.opt (tpidrurw, tpidruro, tpidrprw): New values for
arm_tp_type.
* doc/invoke.texi (Arm Options, mtp): Document new values.

gcc/testsuite/ChangeLog:

* gcc.target/arm/mtp.c: New test.
* gcc.target/arm/mtp_1.c: New test.
* gcc.target/arm/mtp_2.c: New test.
* gcc.target/arm/mtp_3.c: New test.
* gcc.target/arm/mtp_4.c: New test.


mtp-arm.patch
Description: mtp-arm.patch


[PATCH] RISC-V: Add more SLP tests

2023-06-13 Thread juzhe . zhong
From: Juzhe-Zhong 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/partial/slp-10.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-11.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-10.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-11.c: New test.

---
 .../riscv/rvv/autovec/partial/slp-10.c| 30 ++
 .../riscv/rvv/autovec/partial/slp-11.c| 31 +++
 .../riscv/rvv/autovec/partial/slp_run-10.c| 30 ++
 .../riscv/rvv/autovec/partial/slp_run-11.c| 30 ++
 4 files changed, 121 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-11.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c
new file mode 100644
index 000..b33e85c5be2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-10.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param 
riscv-autovec-preference=scalable -fno-vect-cost-model 
-fdump-tree-optimized-details" } */
+
+#include 
+
+#define VEC_PERM(TYPE) 
\
+  TYPE __attribute__ ((noinline, noclone)) 
\
+  vec_slp_##TYPE (TYPE *restrict a, int n) 
\
+  {
\
+for (int i = 0; i < n; ++i)
\
+  {
\
+   a[i * 2] += 10;\
+   a[i * 2 + 1] += 17;\
+  }
\
+  }
+
+#define TEST_ALL(T)
\
+  T (int8_t)   
\
+  T (uint8_t)  
\
+  T (int16_t)  
\
+  T (uint16_t) 
\
+  T (int32_t)  
\
+  T (uint32_t) 
\
+  T (int64_t)  
\
+  T (uint64_t)
+
+TEST_ALL (VEC_PERM)
+
+/* { dg-final { scan-tree-dump-times "{ 10, 17, ... }" 8 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c
new file mode 100644
index 000..c62eced99f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-11.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param 
riscv-autovec-preference=scalable -fno-vect-cost-model 
-fdump-tree-optimized-details" } */
+
+#include 
+
+#define VEC_PERM(TYPE) 
\
+  TYPE __attribute__ ((noinline, noclone)) 
\
+  vec_slp_##TYPE (TYPE *restrict a, int n) 
\
+  {
\
+for (int i = 0; i < n; ++i)
\
+  {
\
+   a[i * 4] += 41;\
+   a[i * 4 + 1] += 25;\
+   a[i * 4 + 2] += 31;\
+   a[i * 4 + 3] += 62;\
+  }
\
+  }
+
+#define TEST_ALL(T)
\
+  T (int8_t)   
\
+  T (uint8_t)  
\
+  T (int16_t)  
\
+  T (uint16_t) 
\
+  T (int32_t)  
\
+  T (uint32_t) 
\
+  T (int64_t) 

[PATCH][committed] aarch64: Extend -mtp= arguments

2023-06-13 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

After discussing the -mtp= option with Arm's LLVM developers we'd like to extend
the functionality of the option somewhat.
First of all, there is another TPIDR register that can be used to read the 
thread pointer:
TPIDRRO_EL0 (which can also be accessed by AArch32 under another name) so it 
makes sense
to add -mtp=tpidrr0_el0. This makes the existing arguments el0, el1, el2, el3 
somewhat
inconsistent in their naming so this patch introduces the more "full" names
tpidr_el0, tpidr_el1, tpidr_el2, tpidr_el3 and makes the above short names 
alias of these new ones.
Long story short, we preserve backwards compatibility and add a new TPIDR 
register to access through
-mtp that wasn't available previously.
There is more relevant discussion of the options at 
https://reviews.llvm.org/D152433 if you're interested.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

PR target/108779
* config/aarch64/aarch64-opts.h (enum aarch64_tp_reg): Add
AARCH64_TPIDRRO_EL0 value.
* config/aarch64/aarch64.cc (aarch64_output_load_tp):
* config/aarch64/aarch64.opt (tpidr_el0, tpidr_el1, tpidr_el2,
tpidr_el3, tpidrro_el3): New accepted values to -mtp=.
* doc/invoke.texi (AArch64 Options): Document new -mtp= options.

gcc/testsuite/ChangeLog:

PR target/108779
* gcc.target/aarch64/mtp_5.c: New test.
* gcc.target/aarch64/mtp_6.c: New test.
* gcc.target/aarch64/mtp_7.c: New test.
* gcc.target/aarch64/mtp_8.c: New test.
* gcc.target/aarch64/mtp_9.c: New test.


mtp-a64.patch
Description: mtp-a64.patch


[PATCHv3, rs6000] Add two peephole2 patterns for mr. insn

2023-06-13 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds two peephole2 patterns which help convert certain insn
sequences to "mr." instruction. These insn sequences can't be combined in
combine pass.

  Compared to last version, it changes the new mode iterator name from "Q"
to "WORD".

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Add two peephole patterns for "mr." insn

When investigating the issue mentioned in PR87871#c30 - if compare
and move pattern benefits before RA, I checked the assembly generated
for SPEC2017 and found that certain insn sequences aren't converted to
"mr." instructions.
Following two sequence are never to be combined to "mr." pattern as
there is no register link between them. This patch adds two peephole2
patterns to convert them to "mr." instructions.

cmp 0,3,0
mr 4,3

mr 4,3
cmp 0,3,0

The patch also creates a new mode iterator which decided by
TARGET_POWERPC64.  This mode iterator is used in "mr." and its split
pattern.  The original P iterator is wrong when -m32/-mpowerpc64 is set.
In this situation, the "mr." should compares the whole 64-bit register
with 0 other than the low 32-bit one.

gcc/
* config/rs6000/rs6000.md (peephole2 for compare_and_move): New.
(peephole2 for move_and_compare): New.
(mode_iterator WORD): New.  Set the mode to SI/DImode by
TARGET_POWERPC64.
(*mov_internal2): Change the mode iterator from P to WORD.
(split pattern for compare_and_move): Likewise.

gcc/testsuite/
* gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New.
* gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d..1f0fe85b9b5 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI])
 ; The size of a pointer.  Also, the size of the value that a record-condition
 ; (one with a '.') will compare; and the size used for arithmetic carries.
 (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
+(define_mode_iterator WORD [(SI "!TARGET_POWERPC64") (DI "TARGET_POWERPC64")])

 ; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
 ; PTImode is GPR only)
@@ -7879,9 +7880,9 @@ (define_split

 (define_insn "*mov_internal2"
   [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y")
-   (compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r")
+   (compare:CC (match_operand:WORD 1 "gpc_reg_operand" "0,r,r")
(const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
+   (set (match_operand:WORD 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
   ""
   "@
cmpi %2,%0,0
@@ -7891,11 +7892,41 @@ (define_insn "*mov_internal2"
(set_attr "dot" "yes")
(set_attr "length" "4,4,8")])

+(define_peephole2
+  [(set (match_operand:CC 2 "cc_reg_operand" "")
+   (compare:CC (match_operand:WORD 1 "int_reg_operand" "")
+   (const_int 0)))
+   (set (match_operand:WORD 0 "int_reg_operand" "")
+   (match_dup 1))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:WORD 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:WORD 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
+(define_peephole2
+  [(set (match_operand:WORD 0 "int_reg_operand" "")
+   (match_operand:WORD 1 "int_reg_operand" ""))
+   (set (match_operand:CC 2 "cc_reg_operand" "")
+   (compare:CC (match_dup 1)
+   (const_int 0)))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+  (compare:CC (match_operand:GPR 1 "int_reg_operand" "r")
+  (const_int 0)))
+ (set (match_operand:WORD 0 "int_reg_operand" "=r")
+  (match_dup 1))])]
+  ""
+)
+
 (define_split
   [(set (match_operand:CC 2 "cc_reg_not_cr0_operand")
-   (compare:CC (match_operand:P 1 "gpc_reg_operand")
+   (compare:CC (match_operand:WORD 1 "gpc_reg_operand")
(const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))]
+   (set (match_operand:WORD 0 "gpc_reg_operand") (match_dup 1))]
   "reload_completed"
   [(set (match_dup 0) (match_dup 1))
(set (match_dup 2)
diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c 
b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
new file mode 100644
index 000..29234dea7c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
@@ -0,0 +1,60 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
+/* { dg-options "-O2 -mregnames" } */
+
+/* Following instruction sequence is found in assembly of
+   Perl_block_start, 

Re: [PATCH] c, c++: Accept __builtin_classify_type (typename)

2023-06-13 Thread Jason Merrill via Gcc-patches

On 6/12/23 15:57, Jakub Jelinek wrote:

Hi!

As mentioned in my stdckdint.h mail, __builtin_classify_type has
a problem that argument promotion (the argument is passed to ...
prototyped builtin function) means that certain type classes will
simply never appear.
I think it is too late to change how it behaves, lots of code in the
wild might rely on the current behavior.

So, the following patch adds option to use a typename rather than
expression as the operand to the builtin, making it behave similarly
to sizeof, typeof or say the clang _Generic extension where the
first argument can be there not just expression, but also typename.

I think we have other prior art here, e.g. __builtin_va_arg also
expects typename.

I've added this to both C and C++, because it would be weird if it
supported it only in C and not in C++.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-06-12  Jakub Jelinek  

gcc/
* builtins.h (type_to_class): Declare.
* builtins.cc (type_to_class): No longer static.  Return
int rather than enum.
* doc/extend.texi (__builtin_classify_type): Document.
gcc/c/
* c-parser.cc (c_parser_postfix_expression_after_primary): Parse
__builtin_classify_type call with typename as argument.
gcc/cp/
* parser.cc (cp_parser_postfix_expression): Parse
__builtin_classify_type call with typename as argument.
* pt.cc (tsubst_copy_and_build): Handle __builtin_classify_type
with dependent typename as argument.
gcc/testsuite/
* c-c++-common/builtin-classify-type-1.c: New test.
* g++.dg/ext/builtin-classify-type-1.C: New test.
* g++.dg/ext/builtin-classify-type-2.C: New test.
* gcc.dg/builtin-classify-type-1.c: New test.

--- gcc/builtins.h.jj   2023-01-03 00:20:34.856089856 +0100
+++ gcc/builtins.h  2023-06-12 09:35:20.841902572 +0200
@@ -156,5 +156,6 @@ extern internal_fn associated_internal_f
  extern internal_fn replacement_internal_fn (gcall *);
  
  extern bool builtin_with_linkage_p (tree);

+extern int type_to_class (tree);
  
  #endif /* GCC_BUILTINS_H */

--- gcc/builtins.cc.jj  2023-05-20 15:31:09.03352 +0200
+++ gcc/builtins.cc 2023-06-12 09:35:31.709751296 +0200
@@ -113,7 +113,6 @@ static rtx expand_builtin_apply_args (vo
  static rtx expand_builtin_apply_args_1 (void);
  static rtx expand_builtin_apply (rtx, rtx, rtx);
  static void expand_builtin_return (rtx);
-static enum type_class type_to_class (tree);
  static rtx expand_builtin_classify_type (tree);
  static rtx expand_builtin_mathfn_3 (tree, rtx, rtx);
  static rtx expand_builtin_mathfn_ternary (tree, rtx, rtx);
@@ -1852,7 +1851,7 @@ expand_builtin_return (rtx result)
  
  /* Used by expand_builtin_classify_type and fold_builtin_classify_type.  */
  
-static enum type_class

+int
  type_to_class (tree type)
  {
switch (TREE_CODE (type))
--- gcc/doc/extend.texi.jj  2023-06-10 19:58:26.197478291 +0200
+++ gcc/doc/extend.texi 2023-06-12 18:06:24.629413024 +0200
@@ -14354,6 +14354,30 @@ need not be a constant.  @xref{Object Si
  description of the function.
  @enddefbuiltin
  
+@defbuiltin{int __builtin_classify_type (@var{arg})}

+@defbuiltinx{int __builtin_classify_type (@var{type})}
+The @code{__builtin_classify_type} returns a small integer with a category
+of @var{arg} argument's type, like void type, integer type, enumeral type,
+boolean type, pointer type, reference type, offset type, real type, complex
+type, function type, method type, record type, union type, array type,
+string type, etc.  When the argument is an expression, for
+backwards compatibility reason the argument is promoted like arguments
+passed to @code{...} in varargs function, so some classes are never returned
+in certain languages.  Alternatively, the argument of the builtin-in
+function can be a typename, such as the @code{typeof} specifier.
+
+@smallexample
+int a[2];
+__builtin_classify_type (a) == __builtin_classify_type (int[5]);
+__builtin_classify_type (a) == __builtin_classify_type (void*);
+__builtin_classify_type (typeof (a)) == __builtin_classify_type (int[5]);
+@end smallexample
+
+The first comparison will never be true, as @var{a} is implicitly converted
+to pointer.  The last two comparisons will be true as they classify
+pointers in the second case and arrays in the last case.
+@enddefbuiltin
+
  @defbuiltin{double __builtin_huge_val (void)}
  Returns a positive infinity, if supported by the floating-point format,
  else @code{DBL_MAX}.  This function is suitable for implementing the
--- gcc/c/c-parser.cc.jj2023-06-10 19:22:15.577205685 +0200
+++ gcc/c/c-parser.cc   2023-06-12 17:32:31.007413019 +0200
@@ -11213,6 +11213,32 @@ c_parser_postfix_expression_after_primar
literal_zero_mask = 0;
if (c_parser_next_token_is (parser, CPP_CLOSE_PAREN))
  exprlist = NULL;
+   else if (TREE_CODE (expr.value) == FUNCTION_DECL
+&& 

[PATCH] middle-end/110232 - fix native interpret of vector

2023-06-13 Thread Richard Biener via Gcc-patches
The following fixes native interpretation of a buffer as boolean
vector with bit-precision elements such as AVX512 vectors.  The
check whether the buffer covers the whole vector was broken for
bit-precision elements and the following instead implements it
based on the vector type size.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR middle-end/110232
* fold-const.cc (native_interpret_vector): Use TYPE_SIZE_UNIT
to check whether the buffer covers the whole vector.

* gcc.target/i386/pr110232.c: New testcase.
---
 gcc/fold-const.cc| 11 ---
 gcc/testsuite/gcc.target/i386/pr110232.c | 12 
 2 files changed, 16 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110232.c

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 84b0d06b819..9ea055d4523 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -8796,16 +8796,13 @@ native_interpret_vector_part (tree type, const unsigned 
char *bytes,
 static tree
 native_interpret_vector (tree type, const unsigned char *ptr, unsigned int len)
 {
-  tree etype;
-  unsigned int size;
-  unsigned HOST_WIDE_INT count;
+  unsigned HOST_WIDE_INT size;
 
-  etype = TREE_TYPE (type);
-  size = GET_MODE_SIZE (SCALAR_TYPE_MODE (etype));
-  if (!TYPE_VECTOR_SUBPARTS (type).is_constant ()
-  || size * count > len)
+  if (!tree_to_poly_uint64 (TYPE_SIZE_UNIT (type)).is_constant ()
+  || size > len)
 return NULL_TREE;
 
+  unsigned HOST_WIDE_INT count = TYPE_VECTOR_SUBPARTS (type).to_constant ();
   return native_interpret_vector_part (type, ptr, len, count, 1);
 }
 
diff --git a/gcc/testsuite/gcc.target/i386/pr110232.c 
b/gcc/testsuite/gcc.target/i386/pr110232.c
new file mode 100644
index 000..43b74b15e00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110232.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=znver4 --param vect-partial-vector-usage=2 
-fno-vect-cost-model -fdump-tree-vect" } */
+
+int a[4096];
+
+void foo ()
+{
+  for (int i = 1; i < 4095; ++i)
+a[i] = 42;
+}
+
+/* { dg-final { scan-tree-dump-not "VIEW_CONVERT_EXPR" "vect" } } */
-- 
2.35.3


[PATCH] Fix disambiguation against .MASK_LOAD

2023-06-13 Thread Richard Biener via Gcc-patches
Alias analysis was treating .MASK_LOAD as storing a full vector
which means we disambiguate against decls of smaller than vector size.
This complements the previous patch handling .MASK_STORE and fixes
runtime execution FAILs of gfortran.dg/matmul_3.f90 and
gfortran.dg/inline_sum_2.f90 when using AVX512 with full masked loop
vectorization on Zen4.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-ssa-alias.cc (ref_maybe_used_by_call_p_1): For
.MASK_LOAD and friends set the size of the access to unknown.
---
 gcc/tree-ssa-alias.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
index b5476e8b41e..e1bc04b82ba 100644
--- a/gcc/tree-ssa-alias.cc
+++ b/gcc/tree-ssa-alias.cc
@@ -2829,6 +2829,9 @@ ref_maybe_used_by_call_p_1 (gcall *call, ao_ref *ref, 
bool tbaa_p)
  ao_ref_init_from_ptr_and_size (_ref,
 gimple_call_arg (call, 0),
 TYPE_SIZE_UNIT (TREE_TYPE (lhs)));
+ /* We cannot make this a known-size access since otherwise
+we disambiguate against refs to decls that are smaller.  */
+ rhs_ref.size = -1;
  rhs_ref.ref_alias_set = rhs_ref.base_alias_set
= tbaa_p ? get_deref_alias_set (TREE_TYPE
(gimple_call_arg (call, 1))) : 0;
@@ -3073,7 +3076,7 @@ call_may_clobber_ref_p_1 (gcall *call, ao_ref *ref, bool 
tbaa_p)
  ao_ref_init_from_ptr_and_size (_ref, gimple_call_arg (call, 0),
 TYPE_SIZE_UNIT (TREE_TYPE (rhs)));
  /* We cannot make this a known-size access since otherwise
-we disambiguate against stores to decls that are smaller.  */
+we disambiguate against refs to decls that are smaller.  */
  lhs_ref.size = -1;
  lhs_ref.ref_alias_set = lhs_ref.base_alias_set
= tbaa_p ? get_deref_alias_set
-- 
2.35.3


Re: [PATCH] middle-end, i386: Pattern recognize add/subtract with carry [PR79173]

2023-06-13 Thread Richard Biener via Gcc-patches
On Tue, 6 Jun 2023, Jakub Jelinek wrote:

> Hi!
> 
> The following patch introduces {add,sub}c5_optab and pattern recognizes
> various forms of add with carry and subtract with carry/borrow, see
> pr79173-{1,2,3,4,5,6}.c tests on what is matched.
> Primarily forms with 2 __builtin_add_overflow or __builtin_sub_overflow
> calls per limb (with just one for the least significant one), for
> add with carry even when it is hand written in C (for subtraction
> reassoc seems to change it too much so that the pattern recognition
> doesn't work).  __builtin_{add,sub}_overflow are standardized in C23
> under ckd_{add,sub} names, so it isn't any longer a GNU only extension.
> 
> Note, clang has for these has (IMHO badly designed)
> __builtin_{add,sub}c{b,s,,l,ll} builtins which don't add/subtract just
> a single bit of carry, but basically add 3 unsigned values or
> subtract 2 unsigned values from one, and result in carry out of 0, 1, or 2
> because of that.  If we wanted to introduce those for clang compatibility,
> we could and lower them early to just two __builtin_{add,sub}_overflow
> calls and let the pattern matching in this patch recognize it later.
> 
> I've added expanders for this on ix86 and in addition to that
> added various peephole2s to make sure we get nice (and small) code
> for the common cases.  I think there are other PRs which request that
> e.g. for the _{addcarry,subborrow}_u{32,64} intrinsics, which the patch
> also improves.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> Would be nice if support for these optabs was added to many other targets,
> arm/aarch64 and powerpc* certainly have such instructions, I'd expect
> in fact that most targets do.
> 
> The _BitInt support I'm working on will also need this to emit reasonable
> code.
> 
> 2023-06-06  Jakub Jelinek  
> 
>   PR middle-end/79173
>   * internal-fn.def (ADDC, SUBC): New internal functions.
>   * internal-fn.cc (expand_ADDC, expand_SUBC): New functions.
>   (commutative_ternary_fn_p): Return true also for IFN_ADDC.
>   * optabs.def (addc5_optab, subc5_optab): New optabs.
>   * tree-ssa-math-opts.cc (match_addc_subc): New function.
>   (math_opts_dom_walker::after_dom_children): Call match_addc_subc
>   for PLUS_EXPR, MINUS_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR unless
>   other optimizations have been successful for those.
>   * gimple-fold.cc (gimple_fold_call): Handle IFN_ADDC and IFN_SUBC.
>   * gimple-range-fold.cc (adjust_imagpart_expr): Likewise.
>   * tree-ssa-dce.cc (eliminate_unnecessary_stmts): Likewise.
>   * doc/md.texi (addc5, subc5): Document new named
>   patterns.
>   * config/i386/i386.md (subborrow): Add alternative with
>   memory destination.
>   (addc5, subc5): New define_expand patterns.
>   (*sub_3, @add3_carry, addcarry, @sub3_carry,
>   subborrow, *add3_cc_overflow_1): Add define_peephole2
>   TARGET_READ_MODIFY_WRITE/-Os patterns to prefer using memory
>   destination in these patterns.
> 
>   * gcc.target/i386/pr79173-1.c: New test.
>   * gcc.target/i386/pr79173-2.c: New test.
>   * gcc.target/i386/pr79173-3.c: New test.
>   * gcc.target/i386/pr79173-4.c: New test.
>   * gcc.target/i386/pr79173-5.c: New test.
>   * gcc.target/i386/pr79173-6.c: New test.
>   * gcc.target/i386/pr79173-7.c: New test.
>   * gcc.target/i386/pr79173-8.c: New test.
>   * gcc.target/i386/pr79173-9.c: New test.
>   * gcc.target/i386/pr79173-10.c: New test.
> 
> --- gcc/internal-fn.def.jj2023-06-05 10:38:06.670333685 +0200
> +++ gcc/internal-fn.def   2023-06-05 11:40:50.672212265 +0200
> @@ -381,6 +381,8 @@ DEF_INTERNAL_FN (ASAN_POISON_USE, ECF_LE
>  DEF_INTERNAL_FN (ADD_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
>  DEF_INTERNAL_FN (SUB_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
>  DEF_INTERNAL_FN (MUL_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
> +DEF_INTERNAL_FN (ADDC, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
> +DEF_INTERNAL_FN (SUBC, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
>  DEF_INTERNAL_FN (TSAN_FUNC_EXIT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
>  DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | ECF_LEAF, NULL)
>  DEF_INTERNAL_FN (VEC_CONVERT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
> --- gcc/internal-fn.cc.jj 2023-05-15 19:12:24.080780016 +0200
> +++ gcc/internal-fn.cc2023-06-06 09:38:46.333871169 +0200
> @@ -2722,6 +2722,44 @@ expand_MUL_OVERFLOW (internal_fn, gcall
>expand_arith_overflow (MULT_EXPR, stmt);
>  }
>  
> +/* Expand ADDC STMT.  */
> +
> +static void
> +expand_ADDC (internal_fn ifn, gcall *stmt)
> +{
> +  tree lhs = gimple_call_lhs (stmt);
> +  tree arg1 = gimple_call_arg (stmt, 0);
> +  tree arg2 = gimple_call_arg (stmt, 1);
> +  tree arg3 = gimple_call_arg (stmt, 2);
> +  tree type = TREE_TYPE (arg1);
> +  machine_mode mode = TYPE_MODE (type);
> +  insn_code icode = optab_handler (ifn == IFN_ADDC
> +   

Re: [PATCH] Add MinGW option -mcrtdll= for choosing C RunTime DLL library

2023-06-13 Thread LIU Hao via Gcc-patches

在 2023/6/13 14:29, Pali Rohár 写道:

Of course, just I'm not sure where to put the new paragraph. At the
beginning? Or after the text? What do you think?


Maybe just in front of 'This option is available for MinGW targets.' Also you 
may reword it as you like.



--
Best regards,
LIU Hao



OpenPGP_signature
Description: OpenPGP digital signature


  1   2   >