date:20231008

Re: [PATCH] ifcvt/vect: Emit COND_ADD for conditional scalar reduction.

2023-10-08 Thread Richard Sandiford

Robin Dapp  writes:
> Hi Tamar,
>
>> The only comment I have is whether you actually need this helper
>> function? It looks like all the uses of it are in cases you have, or
>> will call conditional_internal_fn_code directly.
> removed the cond_fn_p entirely in the attached v3.
>
> Bootstrapped and regtested on x86_64, aarch64 and power10.
>
> Regards
>  Robin
>
> Subject: [PATCH v3] ifcvt/vect: Emit COND_ADD for conditional scalar
>  reduction.
>
> As described in PR111401 we currently emit a COND and a PLUS expression
> for conditional reductions.  This makes it difficult to combine both
> into a masked reduction statement later.
> This patch improves that by directly emitting a COND_ADD during ifcvt and
> adjusting some vectorizer code to handle it.
>
> It also makes neutral_op_for_reduction return -0 if HONOR_SIGNED_ZEROS
> is true.
>
> gcc/ChangeLog:
>
>   PR middle-end/111401
>   * tree-if-conv.cc (convert_scalar_cond_reduction): Emit COND_ADD
>   if supported.
>   (predicate_scalar_phi): Add whitespace.
>   * tree-vect-loop.cc (fold_left_reduction_fn): Add IFN_COND_ADD.
>   (neutral_op_for_reduction): Return -0 for PLUS.
>   (vect_is_simple_reduction): Don't count else operand in
>   COND_ADD.
>   (vect_create_epilog_for_reduction): Fix whitespace.
>   (vectorize_fold_left_reduction): Add COND_ADD handling.
>   (vectorizable_reduction): Don't count else operand in COND_ADD.
>   (vect_transform_reduction): Add COND_ADD handling.
>   * tree-vectorizer.h (neutral_op_for_reduction): Add default
>   parameter.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c: New test.
>   * gcc.target/riscv/rvv/autovec/cond/pr111401.c: New test.

The patch LGTM too FWIW, except...

> ---
>  .../vect-cond-reduc-in-order-2-signed-zero.c  | 141 
>  .../riscv/rvv/autovec/cond/pr111401.c | 139 
>  gcc/tree-if-conv.cc   |  63 ++--
>  gcc/tree-vect-loop.cc | 150 ++
>  gcc/tree-vectorizer.h |   2 +-
>  5 files changed, 451 insertions(+), 44 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/pr111401.c
>
> diff --git 
> a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c 
> b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c
> new file mode 100644
> index 000..7b46e7d8a2a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c
> @@ -0,0 +1,141 @@
> +/* Make sure a -0 stays -0 when we perform a conditional reduction.  */
> +/* { dg-do run } */
> +/* { dg-require-effective-target vect_double } */
> +/* { dg-add-options ieee } */
> +/* { dg-additional-options "-std=gnu99 -fno-fast-math" } */
> +
> +#include "tree-vect.h"
> +
> +#include 
> +
> +#define N (VECTOR_BITS * 17)
> +
> +double __attribute__ ((noinline, noclone))
> +reduc_plus_double (double *restrict a, double init, int *cond, int n)
> +{
> +  double res = init;
> +  for (int i = 0; i < n; i++)
> +if (cond[i])
> +  res += a[i];
> +  return res;
> +}
> +
> +double __attribute__ ((noinline, noclone, optimize ("0")))
> +reduc_plus_double_ref (double *restrict a, double init, int *cond, int n)
> +{
> +  double res = init;
> +  for (int i = 0; i < n; i++)
> +if (cond[i])
> +  res += a[i];
> +  return res;
> +}
> +
> +double __attribute__ ((noinline, noclone))
> +reduc_minus_double (double *restrict a, double init, int *cond, int n)
> +{
> +  double res = init;
> +  for (int i = 0; i < n; i++)
> +if (cond[i])
> +  res -= a[i];
> +  return res;
> +}
> +
> +double __attribute__ ((noinline, noclone, optimize ("0")))
> +reduc_minus_double_ref (double *restrict a, double init, int *cond, int n)
> +{
> +  double res = init;
> +  for (int i = 0; i < n; i++)
> +if (cond[i])
> +  res -= a[i];
> +  return res;
> +}
> +
> +int __attribute__ ((optimize (1)))
> +main ()
> +{
> +  int n = 19;
> +  double a[N];
> +  int cond1[N], cond2[N];
> +
> +  for (int i = 0; i < N; i++)
> +{
> +  a[i] = (i * 0.1) * (i & 1 ? 1 : -1);
> +  cond1[i] = 0;
> +  cond2[i] = i & 4 ? 1 : 0;
> +  asm volatile ("" ::: "memory");
> +}
> +
> +  double res1 = reduc_plus_double (a, -0.0, cond1, n);
> +  double ref1 = reduc_plus_double_ref (a, -0.0, cond1, n);
> +  double res2 = reduc_minus_double (a, -0.0, cond1, n);
> +  double ref2 = reduc_minus_double_ref (a, -0.0, cond1, n);
> +  double res3 = reduc_plus_double (a, -0.0, cond1, n);
> +  double ref3 = reduc_plus_double_ref (a, -0.0, cond1, n);
> +  double res4 = reduc_minus_double (a, -0.0, cond1, n);
> +  double ref4 = reduc_minus_double_ref (a, -0.0, cond1, n);
> +
> +  if (res1 != ref1 || signbit (res1) != signbit (ref1))
> +__builtin_abort ();
> +  if (res2 != ref2 ||

Re: [PATCH][_GLIBCXX_INLINE_VERSION] Fix

2023-10-08 Thread François Dumont

I think we can do the same without the symbol alias feature. It's even 
simpler cause do not require any maintenance when version symbol bump.


Here is what I'm testing, at least exported symbol is fine.

François


On 08/10/2023 16:06, Iain Sandoe wrote:

Hi François,


On 21 Sep 2023, at 05:41, François Dumont  wrote:

Tests were successful, ok to commit ?

On 20/09/2023 19:51, François Dumont wrote:

libstdc++: [_GLIBCXX_INLINE_VERSION] Add handle_contract_violation symbol alias

libstdc++-v3/ChangeLog:

 * src/experimental/contract.cc
 [_GLIBCXX_INLINE_VERSION](handle_contract_violation): Provide symbol alias
 without version namespace decoration for gcc.

This does not work in the source on targets without support for symbol aliases 
(Darwin is one)
“../experimental/contract.cc:79:8: warning: alias definitions not supported in 
Mach-O; ignored”

- there might be a way to do it at link-time (for one symbol not too bad); I 
will have to poke at
   it a bit.
Iain


Here is what I'm testing eventually, ok to commit if successful ?

François

On 20/09/2023 11:32, Jonathan Wakely wrote:

On Wed, 20 Sept 2023 at 05:51, François Dumont via Libstdc++
 wrote:

libstdc++: Remove std::constract_violation from versioned namespace

Spelling mistake in contract_violation, and it's not
std::contract_violation, it's std::experimental::contract_violation


GCC expects this type to be in std namespace directly.

Again, it's in std::experimental not in std directly.

Will this change cause problems when including another experimental
header, which does put experimental below std::__8?

I think std::__8::experimental and std::experimental will become ambiguous.

Maybe we do want to remove the inline __8 namespace from all
experimental headers. That needs a bit more thought though.


libstdc++-v3/ChangeLog:

   * include/experimental/contract:
   Remove _GLIBCXX_BEGIN_NAMESPACE_VERSION/_GLIBCXX_END_NAMESPACE_VERSION.

This line is too long for the changelog.


It does fix 29 g++.dg/contracts in gcc testsuite.

Ok to commit ?

Françoisdiff --git a/libstdc++-v3/src/experimental/contract.cc b/libstdc++-v3/src/experimental/contract.cc
index 504a6c041f1..7918e5ae53a 100644
--- a/libstdc++-v3/src/experimental/contract.cc
+++ b/libstdc++-v3/src/experimental/contract.cc
@@ -67,3 +67,12 @@ handle_contract_violation (const std::experimental::contract_violation 
   std::cerr << std::endl;
 #endif
 }
+
+#if _GLIBCXX_INLINE_VERSION
+// Provide symbol without version namespace decoration for gcc.
+extern "C"
+__attribute__ ((weak)) void
+_Z25handle_contract_violationRKNSt12experimental18contract_violationE
+(const std::experimental::contract_violation )
+{ handle_contract_violation(violation); }
+#endif

Re: [PATCH v14 16/40] c, c++: Use 16 bits for all use of enum rid for more keyword space

2023-10-08 Thread Ken Matsui

On Wed, Sep 27, 2023 at 6:57 AM Jason Merrill  wrote:
>
> On Tue, Sep 19, 2023 at 7:05 PM Ken Matsui  wrote:
>>
>> On Tue, Sep 19, 2023 at 9:59 AM Jason Merrill  wrote:
>> >
>> > On 9/15/23 19:51, Ken Matsui via Gcc-patches wrote:
>> > > Now that RID_MAX has reached 255, we need to update the bit sizes of 
>> > > every
>> > > use of the enum rid from 8 to 16 to support more keywords.
>> >
>> > Sorry to bring this up so late, but this does raise the question of
>> > whether we actually want to use keyword space for all these traits that
>> > will probably be used approximately once in a C++ translation unit.  I
>> > wonder if it would make sense to instead use e.g. RID_TRAIT for all of
>> > them and use gperf to look up the specific trait from the identifier?
>> >
>>
>> Thank you for your review. To use gperf, we might need to duplicate
>> the list of all traits defined in cp-trait.def. Modifying the traits
>> would require us to edit two files, but would it be acceptable?
>
>
> I think the gperf input could be generated from the .def with a simple script?
>

Thank you! Will do!

> Jason

[PATCH-2, rs6000] Enable vector mode for memory equality compare [PR111449]

2023-10-08 Thread HAO CHEN GUI

Hi,
  This patch enables vector mode for memory equality compare by adding
a new expand cbranchv16qi4 and implementing it. Also the corresponding
CC reg and compare code is set in rs6000_generate_compare. With the
patch, 16-byte equality compare can be implemented by one vector compare
instructions other than 2 8-byte compares with branches.

  The test case is in the second patch which is rs6000 specific.

  Bootstrapped and tested on powerpc64-linux BE and LE with no
regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Enable vector compare for memory equality compare

gcc/
PR target/111449
* config/rs6000/altivec.md (cbranchv16qi4): New expand pattern.
* config/rs6000/rs6000.cc (rs6000_generate_compare): Generate insn
sequence for V16QImode equality compare.
* config/rs6000/rs6000.h (MOVE_MAX_PIECES): Define.
(COMPARE_MAX_PIECES): Define.

gcc/testsuite/
PR target/111449
* gcc.target/powerpc/pr111449.c: New.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index e8a596fb7e9..c69bf266402 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2605,6 +2605,39 @@ (define_insn "altivec_vupklpx"
 }
   [(set_attr "type" "vecperm")])

+(define_expand "cbranchv16qi4"
+  [(use (match_operator 0 "equality_operator"
+   [(match_operand:V16QI 1 "gpc_reg_operand")
+(match_operand:V16QI 2 "gpc_reg_operand")]))
+   (use (match_operand 3))]
+  "VECTOR_UNIT_ALTIVEC_P (V16QImode)"
+{
+  if (!TARGET_P9_VECTOR
+  && MEM_P (operands[1])
+  && !altivec_indexed_or_indirect_operand (operands[1], V16QImode)
+  && MEM_P (operands[2])
+  && !altivec_indexed_or_indirect_operand (operands[2], V16QImode))
+{
+  /* Use direct move as the byte order doesn't matter for equality
+compare.  */
+  rtx reg_op1 = gen_reg_rtx (V16QImode);
+  rtx reg_op2 = gen_reg_rtx (V16QImode);
+  rs6000_emit_le_vsx_permute (reg_op1, operands[1], V16QImode);
+  rs6000_emit_le_vsx_permute (reg_op2, operands[2], V16QImode);
+  operands[1] = reg_op1;
+  operands[2] = reg_op2;
+}
+  else
+{
+  operands[1] = force_reg (V16QImode, operands[1]);
+  operands[2] = force_reg (V16QImode, operands[2]);
+}
+  rtx_code code = GET_CODE (operands[0]);
+  operands[0] = gen_rtx_fmt_ee (code, V16QImode, operands[1], operands[2]);
+  rs6000_emit_cbranch (V16QImode, operands);
+  DONE;
+})
+
 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
 ;; indicate a combined status
 (define_insn "altivec_vcmpequ_p"
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index efe9adce1f8..0087d786840 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -15264,6 +15264,15 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
  else
emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
}
+  else if (mode == V16QImode)
+   {
+ gcc_assert (code == EQ || code == NE);
+
+ rtx result_vector = gen_reg_rtx (V16QImode);
+ compare_result = gen_rtx_REG (CCmode, CR6_REGNO);
+ emit_insn (gen_altivec_vcmpequb_p (result_vector, op0, op1));
+ code = (code == NE) ? GE : LT;
+   }
   else
emit_insn (gen_rtx_SET (compare_result,
gen_rtx_COMPARE (comp_mode, op0, op1)));
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..dc33bca0802 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1730,6 +1730,8 @@ typedef struct rs6000_args
in one reasonably fast instruction.  */
 #define MOVE_MAX (! TARGET_POWERPC64 ? 4 : 8)
 #define MAX_MOVE_MAX 8
+#define MOVE_MAX_PIECES (!TARGET_POWERPC64 ? 4 : 16)
+#define COMPARE_MAX_PIECES (!TARGET_POWERPC64 ? 4 : 16)

 /* Nonzero if access to memory by bytes is no faster than for words.
Also nonzero if doing byte operations (specifically shifts) in registers
diff --git a/gcc/testsuite/gcc.target/powerpc/pr111449.c 
b/gcc/testsuite/gcc.target/powerpc/pr111449.c
new file mode 100644
index 000..a8c30b92a41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr111449.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-maltivec -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+/* Ensure vector comparison is used for 16-byte memory equality compare.  */
+
+int compare1 (const char* s1, const char* s2)
+{
+  return __builtin_memcmp (s1, s2, 16) == 0;
+}
+
+int compare2 (const char* s1)
+{
+  return __builtin_memcmp (s1, "0123456789012345", 16) == 0;
+}
+
+/* { dg-final { scan-assembler-times {\mvcmpequb\.} 2 } } */
+/* { dg-final { scan-assembler-not {\mcmpd\M} } } */

[PATCH-1, expand] Enable vector mode for compare_by_pieces [PR111449]

2023-10-08 Thread HAO CHEN GUI

Hi,
  Vector mode instructions are efficient on some targets (e.g. ppc64).
This patch enables vector mode for compare_by_pieces. The non-member
function widest_fixed_size_mode_for_size takes by_pieces_operation
as the second argument and decide whether vector mode is enabled or
not by the type of operations. Currently only set and compare enabled
vector mode and do the optab checking correspondingly.

  The test case is in the second patch which is rs6000 specific.

  Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
regressions.

Thanks
Gui Haochen

ChangeLog
Expand: Enable vector mode for pieces compare

Vector mode compare instructions are efficient for equality compare on
rs6000. This patch refactors the codes of pieces operation to enable
vector mode for compare.

gcc/
PR target/111449
* expr.cc (widest_fixed_size_mode_for_size): Enable vector mode
for compare.  Replace the second argument with the type of pieces
operation.  Add optab checks for vector mode used in compare.
(by_pieces_ninsns): Pass the type of pieces operation to
widest_fixed_size_mode_for_size.
(class op_by_pieces_d): Add virtual function
widest_fixed_size_mode_for_size.
(op_by_pieces_d::op_by_pieces_d): Call outer function
widest_fixed_size_mode_for_size.
(op_by_pieces_d::get_usable_mode): Call class function
widest_fixed_size_mode_for_size.
(op_by_pieces_d::run): Likewise.
(class move_by_pieces_d): Declare function
widest_fixed_size_mode_for_size.
(move_by_pieces_d::widest_fixed_size_mode_for_size): Implement.
(class store_by_pieces_d): Declare function
widest_fixed_size_mode_for_size.
(store_by_pieces_d::widest_fixed_size_mode_for_size): Implement.
(can_store_by_pieces): Pass the type of pieces operation to
widest_fixed_size_mode_for_size.
(class compare_by_pieces_d): Declare function
widest_fixed_size_mode_for_size.
(compare_by_pieces_d::compare_by_pieces_d): Set m_qi_vector_mode
to true.
(compare_by_pieces_d::widest_fixed_size_mode_for_size): Implement.

patch.diff
diff --git a/gcc/expr.cc b/gcc/expr.cc
index d87346dc07f..9885404ee9c 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -992,8 +992,9 @@ alignment_for_piecewise_move (unsigned int max_pieces, 
unsigned int align)
that is narrower than SIZE bytes.  */

 static fixed_size_mode
-widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector)
+widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op)
 {
+  bool qi_vector = ((op == COMPARE_BY_PIECES) || op == SET_BY_PIECES);
   fixed_size_mode result = NARROWEST_INT_MODE;

   gcc_checking_assert (size > 1);
@@ -1009,8 +1010,13 @@ widest_fixed_size_mode_for_size (unsigned int size, bool 
qi_vector)
  {
if (GET_MODE_SIZE (candidate) >= size)
  break;
-   if (optab_handler (vec_duplicate_optab, candidate)
-   != CODE_FOR_nothing)
+   if ((op == SET_BY_PIECES
+&& optab_handler (vec_duplicate_optab, candidate)
+  != CODE_FOR_nothing)
+|| (op == COMPARE_BY_PIECES
+&& optab_handler (mov_optab, mode)
+   != CODE_FOR_nothing
+&& can_compare_p (EQ, mode, ccp_jump)))
  result = candidate;
  }

@@ -1061,8 +1067,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int 
align,
 {
   /* NB: Round up L and ALIGN to the widest integer mode for
 MAX_SIZE.  */
-  mode = widest_fixed_size_mode_for_size (max_size,
- op == SET_BY_PIECES);
+  mode = widest_fixed_size_mode_for_size (max_size, op);
   if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
{
  unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode));
@@ -1076,8 +1081,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int 
align,

   while (max_size > 1 && l > 0)
 {
-  mode = widest_fixed_size_mode_for_size (max_size,
- op == SET_BY_PIECES);
+  mode = widest_fixed_size_mode_for_size (max_size, op);
   enum insn_code icode;

   unsigned int modesize = GET_MODE_SIZE (mode);
@@ -1327,6 +1331,8 @@ class op_by_pieces_d
   virtual void finish_mode (machine_mode)
   {
   }
+  virtual fixed_size_mode widest_fixed_size_mode_for_size (unsigned int size)
+= 0;

  public:
   op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn,
@@ -1375,8 +1381,7 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, 
rtx to,
 {
   /* Find the mode of the largest comparison.  */
   fixed_size_mode mode
-   = widest_fixed_size_mode_for_size (m_max_size,
-  m_qi_vector_mode);
+   = ::widest_fixed_size_mode_for_size (m_max_size,

[PATCH] [i386] APX EGPR: fix missing pattern that prohibits egpr

2023-10-08 Thread Hongyu Wang

Thanks, also there is another pattern missed that should use "ja" instead
of Bm. Will commit below changes.

gcc/ChangeLog:

* config/i386/sse.md (vec_concatv2di): Replace constraint "m"
with "jm" for alternative 0 and 1 of operand 2.
(sse4_1_3): Replace constraint "Bm" with
"ja" for alternative 0 and 1 of operand2.
---
 gcc/config/i386/sse.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6bffd749c6d..22e43eb3f92 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16842,7 +16842,7 @@ (define_insn "*sse4_1_3"
   [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,")
(umaxmin:VI24_128
  (match_operand:VI24_128 1 "vector_operand" "%0,0,")
- (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,m")))]
+ (match_operand:VI24_128 2 "vector_operand" "Yrja,*xja,m")))]
   "TARGET_SSE4_1
&& 
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
@@ -20638,7 +20638,7 @@ (define_insn "vec_concatv2di"
  (match_operand:DI 1 "register_operand"
  "  0, 0,x ,Yv,0,Yv,0,0,v")
  (match_operand:DI 2 "nonimmediate_operand"
- " jrm,jrm,rm,rm,x,Yv,x,m,m")))]
+ " jrjm,jrjm,rm,rm,x,Yv,x,m,m")))]
   "TARGET_SSE"
   "@
pinsrq\t{$1, %2, %0|%0, %2, 1}
-- 
2.31.1

Re: [PATCH] [i386] APX EGPR: fix missing pattern that prohibits egpr

2023-10-08 Thread Hongtao Liu

On Mon, Oct 9, 2023 at 10:05 AM Hongyu Wang  wrote:
>
> For vec_concatv2di, m constraint in alternative 0 and 1 could result in
> egpr allocated on operand 2 under -mapxf. Should use jm instead.
>
> Bootstrapped/regtested on x86-64-linux-gnu.
>
> Ok for trunk?
Ok.
>
> gcc/ChangeLog:
>
> * config/i386/sse.md (vec_concatv2di): Replace constraint "m"
> with "jm" for alternative 0 and 1 of operand 2.
> ---
>  gcc/config/i386/sse.md | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 6bffd749c6d..58672f46365 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -20638,7 +20638,7 @@ (define_insn "vec_concatv2di"
>   (match_operand:DI 1 "register_operand"
>   "  0, 0,x ,Yv,0,Yv,0,0,v")
>   (match_operand:DI 2 "nonimmediate_operand"
> - " jrm,jrm,rm,rm,x,Yv,x,m,m")))]
> + " jrjm,jrjm,rm,rm,x,Yv,x,m,m")))]
>"TARGET_SSE"
>"@
> pinsrq\t{$1, %2, %0|%0, %2, 1}
> --
> 2.31.1
>


-- 
BR,
Hongtao

[PATCH] [i386] APX EGPR: fix missing pattern that prohibits egpr

2023-10-08 Thread Hongyu Wang

For vec_concatv2di, m constraint in alternative 0 and 1 could result in
egpr allocated on operand 2 under -mapxf. Should use jm instead.

Bootstrapped/regtested on x86-64-linux-gnu.

Ok for trunk?

gcc/ChangeLog:

* config/i386/sse.md (vec_concatv2di): Replace constraint "m"
with "jm" for alternative 0 and 1 of operand 2.
---
 gcc/config/i386/sse.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6bffd749c6d..58672f46365 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20638,7 +20638,7 @@ (define_insn "vec_concatv2di"
  (match_operand:DI 1 "register_operand"
  "  0, 0,x ,Yv,0,Yv,0,0,v")
  (match_operand:DI 2 "nonimmediate_operand"
- " jrm,jrm,rm,rm,x,Yv,x,m,m")))]
+ " jrjm,jrjm,rm,rm,x,Yv,x,m,m")))]
   "TARGET_SSE"
   "@
pinsrq\t{$1, %2, %0|%0, %2, 1}
-- 
2.31.1

[PATCH v4] c++: Check for indirect change of active union member in constexpr [PR101631,PR102286]

2023-10-08 Thread Nathaniel Shead

Ping for https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631203.html

Rebased on top of current trunk and bootstrap + regtest on
x86_64-pc-linux-gnu now completed without errors.

-- >8 --

This patch adds checks for attempting to change the active member of a
union by methods other than a member access expression.

To be able to properly distinguish `*() = ` from `u.a = `, this
patch redoes the solution for c++/59950 to avoid extranneous *&; it
seems that the only case that needed the workaround was when copying
empty classes.

This patch also ensures that constructors for a union field mark that
field as the active member before entering the call itself; this ensures
that modifications of the field within the constructor's body don't
cause false positives (as these will not appear to be member access
expressions). This means that we no longer need to start the lifetime of
empty union members after the constructor body completes.

As a drive-by fix, this patch also ensures that value-initialised unions
are considered to have activated their initial member for the purpose of
checking stores, which catches some additional mistakes pre-C++20.

PR c++/101631
PR c++/102286

gcc/cp/ChangeLog:

* call.cc (build_over_call): Fold more indirect refs for trivial
assignment op.
* class.cc (type_has_non_deleted_trivial_default_ctor): Create.
* constexpr.cc (cxx_eval_call_expression): Start lifetime of
union member before entering constructor.
(cxx_eval_store_expression): Activate member for
value-initialised union. Check for accessing inactive union
member indirectly.
* cp-tree.h (type_has_non_deleted_trivial_default_ctor):
Forward declare.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/constexpr-89336-3.C: Fix union initialisation.
* g++.dg/cpp1y/constexpr-union6.C: New test.
* g++.dg/cpp2a/constexpr-union2.C: New test.
* g++.dg/cpp2a/constexpr-union3.C: New test.
* g++.dg/cpp2a/constexpr-union4.C: New test.
* g++.dg/cpp2a/constexpr-union5.C: New test.
* g++.dg/cpp2a/constexpr-union6.C: New test.

Signed-off-by: Nathaniel Shead 
---
 gcc/cp/call.cc|  11 +-
 gcc/cp/class.cc   |   8 ++
 gcc/cp/constexpr.cc   | 135 +-
 gcc/cp/cp-tree.h  |   1 +
 .../g++.dg/cpp1y/constexpr-89336-3.C  |   2 +-
 gcc/testsuite/g++.dg/cpp1y/constexpr-union6.C |  13 ++
 gcc/testsuite/g++.dg/cpp2a/constexpr-union2.C |  30 
 gcc/testsuite/g++.dg/cpp2a/constexpr-union3.C |  45 ++
 gcc/testsuite/g++.dg/cpp2a/constexpr-union4.C |  29 
 gcc/testsuite/g++.dg/cpp2a/constexpr-union5.C |  71 +
 gcc/testsuite/g++.dg/cpp2a/constexpr-union6.C |  43 ++
 11 files changed, 345 insertions(+), 43 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-union6.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-union2.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-union3.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-union4.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-union5.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-union6.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index e8dafbd8ba6..c1fb8807d3f 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -10330,10 +10330,7 @@ build_over_call (struct z_candidate *cand, int flags, 
tsubst_flags_t complain)
   && DECL_OVERLOADED_OPERATOR_IS (fn, NOP_EXPR)
   && trivial_fn_p (fn))
 {
-  /* Don't use cp_build_fold_indirect_ref, op= returns an lvalue even if
-the object argument isn't one.  */
-  tree to = cp_build_indirect_ref (input_location, argarray[0],
-  RO_ARROW, complain);
+  tree to = cp_build_fold_indirect_ref (argarray[0]);
   tree type = TREE_TYPE (to);
   tree as_base = CLASSTYPE_AS_BASE (type);
   tree arg = argarray[1];
@@ -10341,7 +10338,11 @@ build_over_call (struct z_candidate *cand, int flags, 
tsubst_flags_t complain)
 
   if (is_really_empty_class (type, /*ignore_vptr*/true))
{
- /* Avoid copying empty classes.  */
+ /* Avoid copying empty classes, but ensure op= returns an lvalue even
+if the object argument isn't one. This isn't needed in other cases
+since MODIFY_EXPR is always considered an lvalue.  */
+ to = cp_build_addr_expr (to, tf_none);
+ to = cp_build_indirect_ref (input_location, to, RO_ARROW, complain);
  val = build2 (COMPOUND_EXPR, type, arg, to);
  suppress_warning (val, OPT_Wunused);
}
diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index b71333af1f8..e31aeb8e68b 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -5688,6 +5688,14 @@ type_has_virtual_destructor (tree type)
   return (dtor && DECL_VIRTUAL_P (dtor));
 }

Re: [PATCH 1/6] aarch64: Sync system register information with Binutils

2023-10-08 Thread Ramana Radhakrishnan



> On 5 Oct 2023, at 14:04, Victor Do Nascimento  
> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> On 10/5/23 12:42, Richard Earnshaw wrote:
>> 
>> 
>> On 03/10/2023 16:18, Victor Do Nascimento wrote:
>>> This patch adds the `aarch64-sys-regs.def' file to GCC, teaching
>>> the compiler about system registers known to the assembler and how
>>> these can be used.
>>> 
>>> The macros used to hold system register information reflect those in
>>> use by binutils, a design choice made to facilitate the sharing of data
>>> between different parts of the toolchain.
>>> 
>>> By aligning the representation of data common to different parts of
>>> the toolchain we can greatly reduce the duplication of work,
>>> facilitating the maintenance of the aarch64 back-end across different
>>> parts of the toolchain; any `SYSREG (...)' that is added in one
>>> project can just as easily be added to its counterpart.
>>> 
>>> GCC does not implement the full range of ISA flags present in
>>> Binutils.  Where this is the case, aliases must be added to aarch64.h
>>> with the unknown architectural extension being mapped to its
>>> associated base architecture, such that any flag present in Binutils
>>> and used in system register definitions is understood in GCC.  Again,
>>> this is done such that flags can be used interchangeably between
>>> projects making use of the aarch64-system-regs.def file.  This is done
>>> in the next patch in the series.
>>> 
>>> `.arch' directives missing from the emitted assembly files as a
>>> consequence of this aliasing are accounted for by the compiler using
>>> the S encoding of system registers when
>>> issuing mrs/msr instructions.  This design choice ensures the
>>> assembler will accept anything that was deemed acceptable by the
>>> compiler.
>>> 
>>> gcc/ChangeLog:
>>> 
>>>* gcc/config/aarch64/aarch64-system-regs.def: New.
>>> ---
>>>  gcc/config/aarch64/aarch64-sys-regs.def | 1059 +++
>>>  1 file changed, 1059 insertions(+)
>>>  create mode 100644 gcc/config/aarch64/aarch64-sys-regs.def
>> 
>> This file is supposed to be /identical/ to the one in GNU Binutils,
>> right?
> 
> You're right Richard.
> 
> We want the same file to be compatible with both parts of the toolchain
> and, consequently, there is no compelling reason as to why the copy of
> the file found in GCC should in any way diverge from its Binutils
> counterpart.
> 
>> If so, I think it needs to continue to say that it is part of
>> GNU Binutils, not part of GCC.  Ramana, has this happened before?  If
>> not, does the SC have a position here?
>> 

I’ve not had the time to delve into the patch, apologies.


Is the intention here to keep a copy of the file with the main copy being in 
binutils i.e. modifications are made in binutils and then sync’d with GCC at 
the same time ?


In which case the comments in the file should make the mechanics of updates 
abundantly clear.

Is there any reason why if the 2 versions were different, you’d have problems 
between gcc and binutils ? 

If so, what kinds of problems would they be ? i.e. would they be no more than 
gas not knowing about a system register that GCC claimed to know because 
binutils and gcc were built with different versions of the system register 
file. 

Speaking for myself, I do not see this request being any different from the 
requests for imports from other repositories into the GCC repository.



>> R.
> 
> This does raise a very interesting question on the intellectual property
> front and one that is well beyond my competence to opine about.
> 
> Nonetheless, this is a question which may arise again if we abstract
> away more target description data into such .def files, as has been
> discussed for architectural feature flags (for example).
> 
> So what might be nice (but not necessarily tenable) is if we had
> appropriate provisions in place for where files were shared across
> different parts of the toolchain.
> 
> Something like "This file is a shared resource of GCC and Binutils."



This model of an additional shared repository with a build dependency will 
transfer the “copy in every dependent repository” to a dependency on a 
“suitable hash in every dependent repository” problem which is certainly 
something to consider

And then the question comes for the GCC project about how many such 
dependencies with different repositories it tracks :) ? Perhaps git submodules 
can be considered , however I am not sure how much that has been looked at 
after the git conversion. 



regards
Ramana




> 
> Anyway, that's my two cents on the matter :).
> 
> Let's see what Ramana has to say on the matter.
> 
> V.
> 
> 
>>> diff --git a/gcc/config/aarch64/aarch64-sys-regs.def
>>> b/gcc/config/aarch64/aarch64-sys-regs.def
>>> new file mode 100644
>>> index 000..d77fee1d5e3
>>> --- /dev/null
>>> +++ b/gcc/config/aarch64/aarch64-sys-regs.def
>>> @@ -0,0 +1,1059 @@
>>> +/* Copyright (C) 2023 Free Software

[pushed] libcpp: eliminate LINEMAPS_{ORDINARY,MACRO}_MAPS

2023-10-08 Thread David Malcolm

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4480-g0a0ceb7a72fe0b.

libcpp/ChangeLog:
* include/line-map.h (LINEMAPS_ORDINARY_MAPS): Delete.
(LINEMAPS_MACRO_MAPS): Delete.
* line-map.cc (linemap_tracks_macro_expansion_locs_p): Update for
deletion of LINEMAPS_MACRO_MAPS.
(linemap_get_statistics): Likewise.
---
 libcpp/include/line-map.h | 16 
 libcpp/line-map.cc|  4 ++--
 2 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
index 30f2284b5d1..cb934e6b9b3 100644
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -928,14 +928,6 @@ LINEMAPS_LAST_MAP (const line_maps *set, bool map_kind)
  LINEMAPS_USED (set, map_kind) - 1);
 }
 
-/* Returns a pointer to the memory region where ordinary maps are
-   allocated in the line table SET.  */
-inline line_map_ordinary *
-LINEMAPS_ORDINARY_MAPS (const line_maps *set)
-{
-  return set->info_ordinary.maps;
-}
-
 /* Returns the INDEXth ordinary map.  */
 inline line_map_ordinary *
 LINEMAPS_ORDINARY_MAP_AT (const line_maps *set, int index)
@@ -968,14 +960,6 @@ LINEMAPS_LAST_ORDINARY_MAP (const line_maps *set)
   return (line_map_ordinary *)LINEMAPS_LAST_MAP (set, false);
 }
 
-/* Returns a pointer to the beginning of the region where macro maps
-   are allocated.  */
-inline line_map_macro *
-LINEMAPS_MACRO_MAPS (const line_maps *set)
-{
-  return set->info_macro.maps;
-}
-
 /* Returns the INDEXth macro map.  */
 inline line_map_macro *
 LINEMAPS_MACRO_MAP_AT (const line_maps *set, int index)
diff --git a/libcpp/line-map.cc b/libcpp/line-map.cc
index 385d54c53b7..cd173530149 100644
--- a/libcpp/line-map.cc
+++ b/libcpp/line-map.cc
@@ -738,7 +738,7 @@ linemap_module_restore (line_maps *set, unsigned lwm)
 bool
 linemap_tracks_macro_expansion_locs_p (const line_maps *set)
 {
-  return LINEMAPS_MACRO_MAPS (set) != NULL;
+  return set->info_macro.maps != nullptr;
 }
 
 /* Create a macro map.  A macro map encodes source locations of tokens
@@ -2076,7 +2076,7 @@ linemap_get_statistics (const line_maps *set,
   macro_maps_allocated_size =
 LINEMAPS_MACRO_ALLOCATED (set) * sizeof (struct line_map_macro);
 
-  for (cur_map = LINEMAPS_MACRO_MAPS (set);
+  for (cur_map = set->info_macro.maps;
cur_map && cur_map <= LINEMAPS_LAST_MACRO_MAP (set);
++cur_map)
 {
-- 
2.26.3

[pushed] analyzer: improvements to out-of-bounds diagrams [PR111155]

2023-10-08 Thread David Malcolm

Update out-of-bounds diagrams to show existing string values,
and the initial write index within a string buffer.

For example, given the out-of-bounds write in strcat in:

void test (void)
{
  char buf[10];
  strcpy (buf, "hello");
  strcat (buf, " world!");
}

the diagram improves from:

   ┌─┬─┬┬┬┐┌─┬─┬─┐
   │ [0] │ [1] │[2] │[3] │[4] ││ [5] │ [6] │ [7] │
   ├─┼─┼┼┼┤├─┼─┼─┤
   │ ' ' │ 'w' │'o' │'r' │'l' ││ 'd' │ '!' │ NUL │
   ├─┴─┴┴┴┴┴─┴─┴─┤
   │  string literal (type: 'char[8]')   │
   └─┘
  │ ││││  │ │ │
  │ ││││  │ │ │
  v vvvv  v v v
  ┌─┬┬┐┌─┐
  │ [0] │  ...   │[9] ││ │
  ├─┴┴┤│after valid range│
  │ 'buf' (type: 'char[10]')  ││ │
  └───┘└─┘
  ├─┬─┤├┬┤
│   │
  ╭─┴╮╭─┴─╮
  │capacity: 10 bytes││overflow of 3 bytes│
  ╰──╯╰───╯

to:

 ┌┬┬┬┬┐┌─┬─┬─┐
 │[0] │[1] │[2] │[3] │[4] ││ [5] │ [6] │ [7] │
 ├┼┼┼┼┤├─┼─┼─┤
 │' ' │'w' │'o' │'r' │'l' ││ 'd' │ '!' │ NUL │
 ├┴┴┴┴┴┴─┴─┴─┤
 │ string literal (type: 'char[8]')  │
 └───┘
   │││││  │ │ │
   │││││  │ │ │
   vvvvv  v v v
  ┌─┬┬┬──┬┐┌─┐
  │ [0] │... │[5] │ ...  │[9] ││ │
  ├─┼┬┬┬┬┼┼──┴┘│ │
  │ 'h' │'e' │'l' │'l' │'o' ││NUL ││after valid range│
  ├─┴┴┴┴┴┴┴───┐│ │
  │ 'buf' (type: 'char[10]')  ││ │
  └───┘└─┘
  ├─┬─┤├┬┤
│   │
  ╭─┴╮╭─┴─╮
  │capacity: 10 bytes││overflow of 3 bytes│
  ╰──╯╰───╯

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4477-gb365e9d57ad445.

gcc/analyzer/ChangeLog:
PR analyzer/55
* access-diagram.cc (boundaries::boundaries): Add logger param
(boundaries::add): Add logging.
(boundaries::get_hard_boundaries_in_range): New.
(boundaries::m_logger): New field.
(boundaries::get_table_x_for_offset): Make public.
(class svalue_spatial_item): New.
(class compound_svalue_spatial_item): New.
(add_ellipsis_to_gaps): New.
(valid_region_spatial_item::valid_region_spatial_item): Add theme
param.  Initialize m_boundaries, m_existing_sval, and
m_existing_sval_spatial_item.
(valid_region_spatial_item::add_boundaries): Set m_boundaries.
Add boundaries for any m_existing_sval_spatial_item.
(valid_region_spatial_item::add_array_elements_to_table): Rewrite
creation of min/max index in terms of
maybe_add_array_index_to_table.  Rewrite ellipsis code using
add_ellipsis_to_gaps. Add index values for any hard boundaries
within the valid region.
(valid_region_spatial_item::maybe_add_array_index_to_table): New,
based on code formerly in add_array_elements_to_table.
(valid_region_spatial_item::make_table): Make use of
m_existing_sval_spatial_item, if any.
(valid_region_spatial_item::m_boundaries): New field.
(valid_region_spatial_item::m_existing_sval): New field.

[pushed] libcpp: eliminate LINEMAPS_{,ORDINARY_,MACRO_}CACHE

2023-10-08 Thread David Malcolm

It's simpler to use field access than to go through these inline
functions that look as if they are macros.

No functional change intended.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4479-g45bae1809c3919.

libcpp/ChangeLog:
* include/line-map.h (maps_info_ordinary::cache): Rename to...
(maps_info_ordinary::m_cache): ...this.
(maps_info_macro::cache): Rename to...
(maps_info_macro::m_cache): ...this.
(LINEMAPS_CACHE): Delete.
(LINEMAPS_ORDINARY_CACHE): Delete.
(LINEMAPS_MACRO_CACHE): Delete.
* init.cc (read_original_filename): Update for adding "m_" prefix.
* line-map.cc (linemap_add): Eliminate LINEMAPS_ORDINARY_CACHE in
favor of a simple field access.
(linemap_enter_macro): Likewise for LINEMAPS_MACRO_CACHE.
(linemap_ordinary_map_lookup): Likewise for
LINEMAPS_ORDINARY_CACHE, twice.
(linemap_lookup_macro_index): Likewise for LINEMAPS_MACRO_CACHE.
---
 libcpp/include/line-map.h | 36 ++--
 libcpp/init.cc|  2 +-
 libcpp/line-map.cc| 12 ++--
 3 files changed, 13 insertions(+), 37 deletions(-)

diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
index 7a172f4c846..30f2284b5d1 100644
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -733,7 +733,9 @@ struct GTY(()) maps_info_ordinary {
  or equal to ALLOCATED.  */
   unsigned int used;
 
-  mutable unsigned int cache;
+  /* The index of the last ordinary map that was looked up with
+ linemap_lookup.  */
+  mutable unsigned int m_cache;
 };
 
 struct GTY(()) maps_info_macro {
@@ -748,7 +750,9 @@ struct GTY(()) maps_info_macro {
  or equal to ALLOCATED.  */
   unsigned int used;
 
-  mutable unsigned int cache;
+  /* The index of the last macro map that was looked up with
+ linemap_lookup.  */
+  mutable unsigned int m_cache;
 };
 
 /* Data structure to associate a source_range together with an arbitrary
@@ -904,18 +908,6 @@ LINEMAPS_USED (line_maps *set, bool map_kind)
 return set->info_ordinary.used;
 }
 
-/* Returns the index of the last map that was looked up with
-   linemap_lookup. MAP_KIND shall be TRUE if we are interested in
-   macro maps, FALSE otherwise.  */
-inline unsigned int &
-LINEMAPS_CACHE (const line_maps *set, bool map_kind)
-{
-  if (map_kind)
-return set->info_macro.cache;
-  else
-return set->info_ordinary.cache;
-}
-
 /* Return the map at a given index.  */
 inline line_map *
 LINEMAPS_MAP_AT (const line_maps *set, bool map_kind, int index)
@@ -968,14 +960,6 @@ LINEMAPS_ORDINARY_USED (const line_maps *set)
   return LINEMAPS_USED (set, false);
 }
 
-/* Return the index of the last ordinary map that was looked up with
-   linemap_lookup.  */
-inline unsigned int &
-LINEMAPS_ORDINARY_CACHE (const line_maps *set)
-{
-  return LINEMAPS_CACHE (set, false);
-}
-
 /* Returns a pointer to the last ordinary map used in the line table
SET.  */
 inline line_map_ordinary *
@@ -1016,14 +1000,6 @@ LINEMAPS_MACRO_USED (const line_maps *set)
   return LINEMAPS_USED (set, true);
 }
 
-/* Return the index of the last macro map that was looked up with
-   linemap_lookup.  */
-inline unsigned int &
-LINEMAPS_MACRO_CACHE (const line_maps *set)
-{
-  return LINEMAPS_CACHE (set, true);
-}
-
 /* Returns the last macro map used in the line table SET.  */
 inline line_map_macro *
 LINEMAPS_LAST_MACRO_MAP (const line_maps *set)
diff --git a/libcpp/init.cc b/libcpp/init.cc
index 9a20f8d8176..b97d7a7b00e 100644
--- a/libcpp/init.cc
+++ b/libcpp/init.cc
@@ -788,7 +788,7 @@ read_original_filename (cpp_reader *pfile)
  penult[1].reason = penult[0].reason;
  penult[0] = penult[1];
  pfile->line_table->info_ordinary.used--;
- pfile->line_table->info_ordinary.cache = 0;
+ pfile->line_table->info_ordinary.m_cache = 0;
}
 
  return true;
diff --git a/libcpp/line-map.cc b/libcpp/line-map.cc
index 5b67a70429f..385d54c53b7 100644
--- a/libcpp/line-map.cc
+++ b/libcpp/line-map.cc
@@ -638,7 +638,7 @@ linemap_add (line_maps *set, enum lc_reason reason,
   map->sysp = sysp;
   map->to_file = to_file;
   map->to_line = to_line;
-  LINEMAPS_ORDINARY_CACHE (set) = LINEMAPS_ORDINARY_USED (set) - 1;
+  set->info_ordinary.m_cache = LINEMAPS_ORDINARY_USED (set) - 1;
   /* Do not store range_bits here.  That's readjusted in
  linemap_line_start.  */
   map->m_range_bits = map->m_column_and_range_bits = 0;
@@ -786,7 +786,7 @@ linemap_enter_macro (class line_maps *set, struct 
cpp_hashnode *macro_node,
   memset (MACRO_MAP_LOCATIONS (map), 0,
  2 * num_tokens * sizeof (location_t));
 
-  LINEMAPS_MACRO_CACHE (set) = LINEMAPS_MACRO_USED (set) - 1;
+  set->info_macro.m_cache = LINEMAPS_MACRO_USED (set) - 1;
 
   return map;
 }
@@ -1116,7 +1116,7 @@ linemap_ordinary_map_lookup (const line_maps *set, 
location_t line)
   if (set

[pushed] libcpp: eliminate LINEMAPS_LAST_ALLOCATED{, _ORDINARY, _MACRO}_MAP

2023-10-08 Thread David Malcolm

Nothing uses these; delete them.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4478-ga73c80d99736f0.

libcpp/ChangeLog:
* include/line-map.h (LINEMAPS_LAST_ALLOCATED_MAP): Delete.
(LINEMAPS_LAST_ALLOCATED_ORDINARY_MAP): Delete.
(LINEMAPS_LAST_ALLOCATED_MACRO_MAP): Delete.
---
 libcpp/include/line-map.h | 25 -
 1 file changed, 25 deletions(-)

diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
index b353df45209..7a172f4c846 100644
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -936,16 +936,6 @@ LINEMAPS_LAST_MAP (const line_maps *set, bool map_kind)
  LINEMAPS_USED (set, map_kind) - 1);
 }
 
-/* Returns the last map that was allocated in the line table SET.
-   MAP_KIND shall be TRUE if we are interested in macro maps, FALSE
-   otherwise.*/
-inline line_map *
-LINEMAPS_LAST_ALLOCATED_MAP (const line_maps *set, bool map_kind)
-{
-  return LINEMAPS_MAP_AT (set, map_kind,
- LINEMAPS_ALLOCATED (set, map_kind) - 1);
-}
-
 /* Returns a pointer to the memory region where ordinary maps are
allocated in the line table SET.  */
 inline line_map_ordinary *
@@ -994,14 +984,6 @@ LINEMAPS_LAST_ORDINARY_MAP (const line_maps *set)
   return (line_map_ordinary *)LINEMAPS_LAST_MAP (set, false);
 }
 
-/* Returns a pointer to the last ordinary map allocated the line table
-   SET.  */
-inline line_map_ordinary *
-LINEMAPS_LAST_ALLOCATED_ORDINARY_MAP (const line_maps *set)
-{
-  return (line_map_ordinary *)LINEMAPS_LAST_ALLOCATED_MAP (set, false);
-}
-
 /* Returns a pointer to the beginning of the region where macro maps
are allocated.  */
 inline line_map_macro *
@@ -1059,13 +1041,6 @@ LINEMAPS_MACRO_LOWEST_LOCATION (const line_maps *set)
  : MAX_LOCATION_T + 1;
 }
 
-/* Returns the last macro map allocated in the line table SET.  */
-inline line_map_macro *
-LINEMAPS_LAST_ALLOCATED_MACRO_MAP (const line_maps *set)
-{
-  return (line_map_macro *)LINEMAPS_LAST_ALLOCATED_MAP (set, true);
-}
-
 extern void *get_data_from_adhoc_loc (const line_maps *, location_t);
 extern unsigned get_discriminator_from_adhoc_loc (const line_maps *, 
location_t);
 extern location_t get_location_from_adhoc_loc (const line_maps *,
-- 
2.26.3

[pushed] libcpp: eliminate COMBINE_LOCATION_DATA

2023-10-08 Thread David Malcolm

This patch eliminates the function "COMBINE_LOCATION_DATA" (which hasn't
been a macro since r6-739-g0501dbd932a7e9) and the function
"get_combined_adhoc_loc" in favor of a new
line_maps::get_or_create_combined_loc member function.

No functional change intended.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4476-g1f68a3e8727f36.

gcc/cp/ChangeLog:
* module.cc (module_state::read_location): Update for renaming of
get_combined_adhoc_loc.

gcc/ChangeLog:
* genmatch.cc (main): Update for "m_" prefix of some fields of
line_maps.
* input.cc (make_location): Update for removal of
COMBINE_LOCATION_DATA.
(dump_line_table_statistics): Update for "m_" prefix of some
fields of line_maps.
(location_with_discriminator): Update for removal of
COMBINE_LOCATION_DATA.
(line_table_test::line_table_test): Update for "m_" prefix of some
fields of line_maps.
* toplev.cc (general_init): Likewise.
* tree.cc (set_block): Update for removal of
COMBINE_LOCATION_DATA.
(set_source_range): Likewise.

libcpp/ChangeLog:
* include/line-map.h (line_maps::reallocator): Rename to...
(line_maps::m_reallocator): ...this.
(line_maps::round_alloc_size): Rename to...
(line_maps::m_round_alloc_size): ...this.
(line_maps::location_adhoc_data_map): Rename to...
(line_maps::m_location_adhoc_data_map): ...this.
(line_maps::num_optimized_ranges): Rename to...
(line_maps::m_num_optimized_ranges): ..this.
(line_maps::num_unoptimized_ranges): Rename to...
(line_maps::m_num_unoptimized_ranges): ...this.
(get_combined_adhoc_loc): Delete decl.
(COMBINE_LOCATION_DATA): Delete.
* lex.cc (get_location_for_byte_range_in_cur_line): Update for
removal of COMBINE_LOCATION_DATA.
(warn_about_normalization): Likewise.
(_cpp_lex_direct): Likewise.
* line-map.cc (line_maps::~line_maps): Update for "m_" prefix of
some fields of line_maps.
(rebuild_location_adhoc_htab): Likewise.
(can_be_stored_compactly_p): Convert to...
(line_maps::can_be_stored_compactly_p): ...this private member
function.
(get_combined_adhoc_loc): Convert to...
(line_maps::get_or_create_combined_loc): ...this public member
function.
(line_maps::make_location): Update for removal of
COMBINE_LOCATION_DATA.
(get_data_from_adhoc_loc): Update for "m_" prefix of some fields
of line_maps.
(get_discriminator_from_adhoc_loc): Likewise.
(get_location_from_adhoc_loc): Likewise.
(get_range_from_adhoc_loc): Convert to...
(line_maps::get_range_from_adhoc_loc): ...this private member
function.
(line_maps::get_range_from_loc): Update for conversion of
get_range_from_adhoc_loc to a member function.
(linemap_init): Update for "m_" prefix of some fields of
line_maps.
(line_map_new_raw): Likewise.
(linemap_enter_macro): Likewise.
(linemap_get_statistics): Likewise.
---
 gcc/cp/module.cc  |   3 +-
 gcc/genmatch.cc   |   4 +-
 gcc/input.cc  |  26 
 gcc/toplev.cc |   4 +-
 gcc/tree.cc   |  12 ++--
 libcpp/include/line-map.h |  40 ++--
 libcpp/lex.cc |  20 +++---
 libcpp/line-map.cc| 128 ++
 8 files changed, 118 insertions(+), 119 deletions(-)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 77c9edcbc04..e3fb2299d93 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -15969,7 +15969,8 @@ module_state::read_location (bytes_in ) const
range.m_finish = read_location (sec);
unsigned discriminator = sec.u ();
if (locus != loc && range.m_start != loc && range.m_finish != loc)
- locus = get_combined_adhoc_loc (line_table, locus, range, NULL, 
discriminator);
+ locus = line_table->get_or_create_combined_loc (locus, range,
+ nullptr, 
discriminator);
   }
   break;
 
diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index 03d325efdf6..e9d7afa7728 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -5458,8 +5458,8 @@ main (int argc, char **argv)
 
   line_table = XCNEW (class line_maps);
   linemap_init (line_table, 0);
-  line_table->reallocator = xrealloc;
-  line_table->round_alloc_size = round_alloc_size;
+  line_table->m_reallocator = xrealloc;
+  line_table->m_round_alloc_size = round_alloc_size;
 
   r = cpp_create_reader (CLK_GNUC99, NULL, line_table);
   cpp_callbacks *cb = cpp_get_callbacks (r);
diff --git a/gcc/input.cc b/gcc/input.cc
index a0e7cb17455..fd09fccb0e3 100644
--- a/gcc/input.cc
+++ b/gcc/input.cc
@@ -1231,7 +1231,8 @@ location_t
 make_location (location_t caret, source_range src_range)
 {

[pushed] diagnostics: fix ICE on sarif output when source file is unreadable [PR111700]

2023-10-08 Thread David Malcolm

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4474-g94caa6a6b4bd73.

gcc/ChangeLog:
PR driver/111700
* input.cc (file_cache::add_file): Update leading comment to
clarify that it can fail.
(file_cache::lookup_or_add_file): Likewise.
(file_cache::get_source_file_content): Gracefully handle
lookup_or_add_file failing.

gcc/testsuite/ChangeLog:
PR driver/111700
* c-c++-common/diagnostic-format-sarif-file-pr111700.c: New test.
---
 gcc/input.cc | 12 ++--
 .../diagnostic-format-sarif-file-pr111700.c  | 12 
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 
gcc/testsuite/c-c++-common/diagnostic-format-sarif-file-pr111700.c

diff --git a/gcc/input.cc b/gcc/input.cc
index 0b00b8923db..1956f2fcbad 100644
--- a/gcc/input.cc
+++ b/gcc/input.cc
@@ -443,7 +443,10 @@ file_cache::evicted_cache_tab_entry (unsigned 
*highest_use_count)
accessed by caret diagnostic.  This cache is added to an array of
cache and can be retrieved by lookup_file_in_cache_tab.  This
function returns the created cache.  Note that only the last
-   num_file_slots files are cached.  */
+   num_file_slots files are cached.
+
+   This can return nullptr if the FILE_PATH can't be opened for
+   reading, or if the content can't be converted to the input_charset.  */
 
 file_cache_slot*
 file_cache::add_file (const char *file_path)
@@ -547,7 +550,10 @@ file_cache::~file_cache ()
 /* Lookup the cache used for the content of a given file accessed by
caret diagnostic.  If no cached file was found, create a new cache
for this file, add it to the array of cached file and return
-   it.  */
+   it.
+
+   This can return nullptr on a cache miss if FILE_PATH can't be opened for
+   reading, or if the content can't be converted to the input_charset.  */
 
 file_cache_slot*
 file_cache::lookup_or_add_file (const char *file_path)
@@ -1072,6 +1078,8 @@ char_span
 file_cache::get_source_file_content (const char *file_path)
 {
   file_cache_slot *c = lookup_or_add_file (file_path);
+  if (c == nullptr)
+return char_span (nullptr, 0);
   return c->get_full_file_content ();
 }
 
diff --git a/gcc/testsuite/c-c++-common/diagnostic-format-sarif-file-pr111700.c 
b/gcc/testsuite/c-c++-common/diagnostic-format-sarif-file-pr111700.c
new file mode 100644
index 000..06605accf6e
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/diagnostic-format-sarif-file-pr111700.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-fdiagnostics-format=sarif-file" } */
+
+# 0 "this-file-does-not-exist.c"
+#warning message
+
+/* Verify that some JSON was written to a file with the expected name.  */
+/* { dg-final { verify-sarif-file } } */
+
+/* ...and that it at least includes the warning
+   { dg-final { scan-sarif-file "\"message\": " } }
+ { dg-final { scan-sarif-file "\"text\": \"#warning message" } } */
-- 
2.26.3

[pushed] libcpp: "const" and other cleanups

2023-10-08 Thread David Malcolm

No functional change intended.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4475-g25af7c1a806c0c.

gcc/ChangeLog:
* input.cc (make_location): Move implementation to
line_maps::make_location.

libcpp/ChangeLog:
* include/line-map.h (line_maps::pure_location_p): New decl.
(line_maps::get_pure_location): New decl.
(line_maps::get_range_from_loc): New decl.
(line_maps::get_start): New.
(line_maps::get_finish): New.
(line_maps::make_location): New decl.
(get_range_from_loc): Make line_maps param const.
(get_discriminator_from_loc): Likewise.
(pure_location_p): Likewise.
(get_pure_location): Likewise.
(linemap_check_files_exited): Likewise.
(linemap_tracks_macro_expansion_locs_p): Likewise.
(linemap_location_in_system_header_p): Likewise.
(linemap_location_from_macro_definition_p): Likewise.
(linemap_macro_map_loc_unwind_toward_spelling): Likewise.
(linemap_included_from_linemap): Likewise.
(first_map_in_common): Likewise.
(linemap_compare_locations): Likewise.
(linemap_location_before_p): Likewise.
(linemap_resolve_location): Likewise.
(linemap_unwind_toward_expansion): Likewise.
(linemap_unwind_to_first_non_reserved_loc): Likewise.
(linemap_expand_location): Likewise.
(linemap_get_file_highest_location): Likewise.
(linemap_get_statistics): Likewise.
(linemap_dump_location): Likewise.
(linemap_dump): Likewise.
(line_table_dump): Likewise.
* internal.h (linemap_get_expansion_line): Likewise.
(linemap_get_expansion_filename): Likewise.
* line-map.cc (can_be_stored_compactly_p): Likewise.
(get_data_from_adhoc_loc): Drop redundant "class".
(get_discriminator_from_adhoc_loc): Likewise.
(get_location_from_adhoc_loc): Likewise.
(get_range_from_adhoc_loc): Likewise.
(get_range_from_loc): Make const and move implementation to...
(line_maps::get_range_from_loc): ...this new function.
(get_discriminator_from_loc): Make line_maps param const.
(pure_location_p): Make const and move implementation to...
(line_maps::pure_location_p): ...this new function.
(get_pure_location): Make const and move implementation to...
(line_maps::get_pure_location): ...this new function.
(linemap_included_from_linemap): Make line_maps param const.
(linemap_check_files_exited): Likewise.
(linemap_tracks_macro_expansion_locs_p): Likewise.
(linemap_macro_map_loc_unwind_toward_spelling): Likewise.
(linemap_get_expansion_line): Likewise.
(linemap_get_expansion_filename): Likewise.
(linemap_location_in_system_header_p): Likewise.
(first_map_in_common_1): Likewise.
(linemap_compare_locations): Likewise.
(linemap_macro_loc_to_spelling_point): Likewise.
(linemap_macro_loc_to_def_point): Likewise.
(linemap_macro_loc_to_exp_point): Likewise.
(linemap_resolve_location): Likewise.
(linemap_location_from_macro_definition_p): Likewise.
(linemap_unwind_toward_expansion): Likewise.
(linemap_unwind_to_first_non_reserved_loc): Likewise.
(linemap_expand_location): Likewise.
(linemap_dump): Likewise.
(linemap_dump_location): Likewise.
(linemap_get_file_highest_location): Likewise.
(linemap_get_statistics): Likewise.
(line_table_dump): Likewise.
---
 gcc/input.cc  |  15 ++---
 libcpp/include/line-map.h |  69 ---
 libcpp/internal.h |   4 +-
 libcpp/line-map.cc| 135 ++
 4 files changed, 143 insertions(+), 80 deletions(-)

diff --git a/gcc/input.cc b/gcc/input.cc
index 1956f2fcbad..a0e7cb17455 100644
--- a/gcc/input.cc
+++ b/gcc/input.cc
@@ -1204,7 +1204,9 @@ expansion_point_location (location_t location)
 }
 
 /* Construct a location with caret at CARET, ranging from START to
-   finish e.g.
+   FINISH.
+
+   For example, consider:
 
  112
 12345678901234567890
@@ -1220,16 +1222,7 @@ expansion_point_location (location_t location)
 location_t
 make_location (location_t caret, location_t start, location_t finish)
 {
-  location_t pure_loc = get_pure_location (caret);
-  source_range src_range;
-  src_range.m_start = get_start (start);
-  src_range.m_finish = get_finish (finish);
-  location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
-  pure_loc,
-  src_range,
-  NULL,
-  0);
-  return combined_loc;
+  return line_table->make_location (caret, start, finish);
 }
 
 /* Same as above, but taking a source

Re: Darwin: Replace environment runpath with embedded [PR88590]

2023-10-08 Thread Iain Sandoe

+ Jeff

> On 8 Oct 2023, at 14:07, Nathanael Nerode  wrote:
> 
> I hope a global maintainer can step up.  I've been on hiatus from GCC work 
> for some years, and this was never my part of the build system anyway -- and 
> I don't use Darwin -- so I'm not qualified to review it.  It looks fine but 
> it should be reviewed by someone who knows what they're doing.

Thanks Nathanael for taking a look, 

@Jeff as we discussed at the Cauldron, I suspected it might be difficult to get 
this review, so would really appreciate if could cast an eye over it at some 
point,

thanks
Iain

> 
> On Wed, Sep 20, 2023, at 9:52 AM, FX Coudert wrote:
>> ping**2 for a build maintainer or global maintainer to review, please
>> It restores testing on darwin >= 21, which is not currently working
>> 
>> If no maintainer is available to review build system, could I get 
>> guidance on how to proceed further?
>> 
>> Thanks,
>> FX
>> 
>> 
>>> Le 12 sept. 2023 à 19:52, FX Coudert  a écrit :
>>> 
>>> Hi build maintainers,
>>> 
>>> May I ping this series of patches for review? In particular, they allow to 
>>> restore testing on darwin, which is currently broken with darwin >= 21, due 
>>> to DYLD_LIBRARY_PATH being systematically removed from the environment by 
>>> system tools.
>>> 
>>> It has been tested for two years on darwin, and would allow to restore 
>>> regular regtesting on that target. It is a big step to help prevent bugs 
>>> being undetected on this port.
>>> 
>>> The patchset was okayed from the driver point of view, but we need a build 
>>> reviewer (or global reviewer) to okay those bits. Original presentation of 
>>> the patches:
>>> 
>>> --
>>> I’d like to post an updated and rebased version of Iain Sandoe’s patches 
>>> for modern darwin, originally posted in November 2021: 
>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-November/584775.html
>>> 
>>> The rationale in that message is pretty much unchanged, and the patches 
>>> have been since tested thoroughly on darwin (both Intel and ARM) for almost 
>>> two years now. We have been shipping Iain’s branch (including these 
>>> patches) since then in Homebrew and most other major distros of GCC on 
>>> Darwin. So I think it’s been very thoroughly tested.
>>> 
>>> The main comment that arose from review in the previous incarnation was the 
>>> need to at least offer the libtool part of the patch to upstream, in order 
>>> to reduce in the long term the divergence between our version and upstream. 
>>> I have done so in https://savannah.gnu.org/patch/index.php?10385
>>> 
>>> (I would also note that I have offered other suggestions of small snippets 
>>> that could be upstream in libtool for darwin, but have not received much 
>>> feedback for now: https://savannah.gnu.org/patch/?10371)
>>> --
>>> 
>>> 
>>> Thanks,
>>> FX
>>> 
>>> 
>>> 
 Le 29 août 2023 à 22:17, FX Coudert  a écrit :
 
> I think a build machinery review is needed.
 
 Thanks. CC’ing the relevant maintainers for review of the build part.
 The driver part and the darwin-specific part are already okayed.
 
 FX
>> 
>> 
>> Attachments:
>> * 0001-Driver-Provide-a-spec-to-insert-rpaths-for-compiler-.patch
>> * 0002-Darwin-Allow-for-configuring-Darwin-to-use-embedded-.patch
>> * 0003-Darwin-rpaths-Add-with-darwin-extra-rpath.patch
>> * 0004-Testsuite-allow-non-installed-testing-on-darwin.patch
>> * 0005-Doc-document-the-new-Darwin-options.patch

[ARC PATCH] Improved SImode shifts and rotates on !TARGET_BARREL_SHIFTER.

2023-10-08 Thread Roger Sayle


This patch completes the ARC back-end's transition to using pre-reload
splitters for SImode shifts and rotates on targets without a barrel
shifter.  The core part is that the shift_si3 define_insn is no longer
needed, as shifts and rotates that don't require a loop are split
before reload, and then because shift_si3_loop is the only caller
of output_shift, both can be significantly cleaned up and simplified.
The output_shift function (Claudiu's "the elephant in the room") is
renamed output_shift_loop, which handles just the four instruction
zero-overhead loop implementations.

Aside from the clean-ups, the user visible changes are much improved
implementations of SImode shifts and rotates on affected targets.

For the function:
unsigned int rotr_1 (unsigned int x) { return (x >> 1) | (x << 31); }

GCC with -O2 -mcpu=em would previously generate:

rotr_1: lsr_s r2,r0
bmsk_s r0,r0,0
ror r0,r0
j_s.d   [blink]
or_sr0,r0,r2

with this patch, we now generate:

j_s.d   [blink]
ror r0,r0

For the function:
unsigned int rotr_31 (unsigned int x) { return (x >> 31) | (x << 1); }

GCC with -O2 -mcpu=em would previously generate:

rotr_31:
mov_s   r2,r0   ;4
asl_s r0,r0
add.f 0,r2,r2
rlc r2,0
j_s.d   [blink]
or_sr0,r0,r2

with this patch we now generate an add.f followed by an adc:

rotr_31:
add.f   r0,r0,r0
j_s.d   [blink]
add.cs  r0,r0,1


Shifts by constants requiring a loop have been improved for even counts
by performing two operations in each iteration:

int shl10(int x) { return x >> 10; }

Previously looked like:

shl10:  mov.f lp_count, 10
lpnz2f
asr r0,r0
nop
2:  # end single insn loop
j_s [blink]


And now becomes:

shl10:
mov lp_count,5
lp  2f
asr r0,r0
asr r0,r0
2:  # end single insn loop
j_s [blink]


So emulating ARC's SWAP on architectures that don't have it:

unsigned int rotr_16 (unsigned int x) { return (x >> 16) | (x << 16); }

previously required 10 instructions and ~70 cycles:

rotr_16:
mov_s   r2,r0   ;4
mov.f lp_count, 16
lpnz2f
add r0,r0,r0
nop
2:  # end single insn loop
mov.f lp_count, 16
lpnz2f
lsr r2,r2
nop
2:  # end single insn loop
j_s.d   [blink]
or_sr0,r0,r2

now becomes just 4 instructions and ~18 cycles:

rotr_16:
mov lp_count,8
lp  2f
ror r0,r0
ror r0,r0
2:  # end single insn loop
j_s [blink]


This patch has been tested with a cross-compiler to arc-linux hosted
on x86_64-pc-linux-gnu and (partially) tested with the compile-only
portions of the testsuite with no regressions.  Ok for mainline, if
your own testing shows no issues?


2023-10-07  Roger Sayle  

gcc/ChangeLog
* config/arc/arc-protos.h (output_shift): Rename to...
(output_shift_loop): Tweak API to take an explicit rtx_code.
(arc_split_ashl): Prototype new function here.
(arc_split_ashr): Likewise.
(arc_split_lshr): Likewise.
(arc_split_rotl): Likewise.
(arc_split_rotr): Likewise.
* config/arc/arc.cc (output_shift): Delete local prototype.  Rename.
(output_shift_loop): New function replacing output_shift to output
a zero overheap loop for SImode shifts and rotates on ARC targets
without barrel shifter (i.e. no hardware support for these insns).
(arc_split_ashl): New helper function to split *ashlsi3_nobs.
(arc_split_ashr): New helper function to split *ashrsi3_nobs.
(arc_split_lshr): New helper function to split *lshrsi3_nobs.
(arc_split_rotl): New helper function to split *rotlsi3_nobs.
(arc_split_rotr): New helper function to split *rotrsi3_nobs.
* config/arc/arc.md (any_shift_rotate): New define_code_iterator.
(define_code_attr insn): New code attribute to map to pattern name.
(si3): New expander unifying previous ashlsi3,
ashrsi3 and lshrsi3 define_expands.  Adds rotlsi3 and rotrsi3.
(*si3_nobs): New define_insn_and_split that
unifies the previous *ashlsi3_nobs, *ashrsi3_nobs and *lshrsi3_nobs.
We now call arc_split_ in arc.cc to implement each split.
(shift_si3): Delete define_insn, all shifts/rotates are now split.
(shift_si3_loop): Rename to...
(si3_loop): define_insn to handle loop implementations of
SImode shifts and rotates, calling ouput_shift_loop for template.
(rotrsi3): Rename to...
(*rotrsi3_insn): define_insn for TARGET_BARREL_SHIFTER's ror.
(*rotlsi3): New define_insn_and_split to transform left rotates
into right rotates before reload.
(rotlsi3_cnt1): New define_insn_and_split to implement a left
rotate by one bit using an add.f followed

Re: [PING^1][PATCH] rs6000: Change bitwise xor to inequality operator [PR106907]

2023-10-08 Thread jeevitha

Ping!

please review.

Thanks & Regards
Jeevitha

On 16/06/23 9:55 am, Peter Bergner wrote:
> On 6/12/23 6:18 AM, P Jeevitha wrote:
>> Bitwise xor performed on bool
>> is similar to checking inequality. So changed to inequality
>> operator (!=) instead of bitwise xor (^).
> [snip'
>> -  if (swapped ^ !BYTES_BIG_ENDIAN
> [snip]
>> +  if (swapped != !BYTES_BIG_ENDIAN
> 
> I know Andreas mentioned using "swapped != !BYTES_BIG_ENDIAN" in
> the bugzilla, but that's the same as "swapped == BYTES_BIG_ENDIAN",
> and it doesn't contain a double-negative and seems a little clearer.
> 
> It's up to Segher though...and if we go with this, then the ChangeLog
> entry needs to be updated slightly since we're no longer testing for
> inequality.
> 
> Peter
>

RE: [PATCH v4] [tree-optimization/110279] Consider FMA in get_reassociation_width

2023-10-08 Thread Di Zhao OS

Attached is a new version of the patch.

> -Original Message-
> From: Richard Biener 
> Sent: Friday, October 6, 2023 5:33 PM
> To: Di Zhao OS 
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH v4] [tree-optimization/110279] Consider FMA in
> get_reassociation_width
> 
> On Thu, Sep 14, 2023 at 2:43 PM Di Zhao OS
>  wrote:
> >
> > This is a new version of the patch on "nested FMA".
> > Sorry for updating this after so long, I've been studying and
> > writing micro cases to sort out the cause of the regression.
> 
> Sorry for taking so long to reply.
> 
> > First, following previous discussion:
> > (https://gcc.gnu.org/pipermail/gcc-patches/2023-September/629080.html)
> >
> > 1. From testing more altered cases, I don't think the
> > problem is that reassociation works locally. In that:
> >
> >   1) On the example with multiplications:
> >
> > tmp1 = a + c * c + d * d + x * y;
> > tmp2 = x * tmp1;
> > result += (a + c + d + tmp2);
> >
> >   Given "result" rewritten by width=2, the performance is
> >   worse if we rewrite "tmp1" with width=2. In contrast, if we
> >   remove the multiplications from the example (and make "tmp1"
> >   not singe used), and still rewrite "result" by width=2, then
> >   rewriting "tmp1" with width=2 is better. (Make sense because
> >   the tree's depth at "result" is still smaller if we rewrite
> >   "tmp1".)
> >
> >   2) I tried to modify the assembly code of the example without
> >   FMA, so the width of "result" is 4. On Ampere1 there's no
> >   obvious improvement. So although this is an interesting
> >   problem, it doesn't seem like the cause of the regression.
> 
> OK, I see.
> 
> > 2. From assembly code of the case with FMA, one problem is
> > that, rewriting "tmp1" to parallel didn't decrease the
> > minimum CPU cycles (taking MULT_EXPRs into account), but
> > increased code size, so the overhead is increased.
> >
> >a) When "tmp1" is not re-written to parallel:
> > fmadd d31, d2, d2, d30
> > fmadd d31, d3, d3, d31
> > fmadd d31, d4, d5, d31  //"tmp1"
> > fmadd d31, d31, d4, d3
> >
> >b) When "tmp1" is re-written to parallel:
> > fmul  d31, d4, d5
> > fmadd d27, d2, d2, d30
> > fmadd d31, d3, d3, d31
> > fadd  d31, d31, d27 //"tmp1"
> > fmadd d31, d31, d4, d3
> >
> > For version a), there are 3 dependent FMAs to calculate "tmp1".
> > For version b), there are also 3 dependent instructions in the
> > longer path: the 1st, 3rd and 4th.
> 
> Yes, it doesn't really change anything.  The patch has
> 
> +  /* If there's code like "acc = a * b + c * d + acc" in a tight loop, some
> + uarchs can execute results like:
> +
> +   _1 = a * b;
> +   _2 = .FMA (c, d, _1);
> +   acc_1 = acc_0 + _2;
> +
> + in parallel, while turning it into
> +
> +   _1 = .FMA(a, b, acc_0);
> +   acc_1 = .FMA(c, d, _1);
> +
> + hinders that, because then the first FMA depends on the result
> of preceding
> + iteration.  */
> 
> I can't see what can be run in parallel for the first case.  The .FMA
> depends on the multiplication a * b.  Iff the uarch somehow decomposes
> .FMA into multiply + add then the c * d multiply could run in parallel
> with the a * b multiply which _might_ be able to hide some of the
> latency of the full .FMA.  Like on x86 Zen FMA has a latency of 4
> cycles but a multiply only 3.  But I never got confirmation from any
> of the CPU designers that .FMAs are issued when the multiply
> operands are ready and the add operand can be forwarded.
> 
> I also wonder why the multiplications of the two-FMA sequence
> then cannot be executed at the same time?  So I have some doubt
> of the theory above.

The parallel execution for the code snippet above was the other
issue (previously discussed here:
https://gcc.gnu.org/pipermail/gcc-patches/2023-August/628960.html).
Sorry it's a bit confusing to include that here, but these 2 fixes
needs to be combined to avoid new regressions. Since considering
FMA in get_reassociation_width produces more results of width=1,
so there would be more loop depending FMA chains.

> Iff this really is the reason for the sequence to execute with lower
> overall latency and we want to attack this on GIMPLE then I think
> we need a target hook telling us this fact (I also wonder if such
> behavior can be modeled in the scheduler pipeline description at all?)
> 
> > So it seems to me the current get_reassociation_width algorithm
> > isn't optimal in the presence of FMA. So I modified the patch to
> > improve get_reassociation_width, rather than check for code
> > patterns. (Although there could be some other complicated
> > factors so the regression is more obvious when there's "nested
> > FMA". But with this patch that should be avoided or reduced.)
> >
> > With this patch 508.namd_r 1-copy run has 7% improvement on
> > Ampere1, on Intel Xeon there's about 3%. While I'm still
> > collecting data on other CPUs,

Re: Re: [PATCH] TEST: Fix dump FAIL of vect-multitypes-16.c for RVV

2023-10-08 Thread 钟居哲

No. They are not the same property.

Maybe I should pretend RVV support vect_pack/vect_unpack and enable all the 
tests in target-supports.exp?

juzhe.zh...@rivai.ai

From: Jeff Law
Date: 2023-10-08 23:09
To: Juzhe-Zhong; gcc-patches
CC: rguenther
Subject: Re: [PATCH] TEST: Fix dump FAIL of vect-multitypes-16.c for RVV

On 10/8/23 05:35, Juzhe-Zhong wrote:
> RVV (RISC-V Vector) doesn't enable vect_unpack, but we still vectorize this 
> case well.
> So, adjust dump check for RVV.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.dg/vect/vect-multitypes-16.c: Fix dump FAIL of RVV.
I'd hoped to avoid a bunch of risc-v special casing in the generic part 
of the testsuite.  Basically the more we have target specific 
conditionals rather than conditionals using properties, the more likely 
we are to keep revisiting this stuff over time and possibly for other 
architectures as well.

What is it about risc-v's vector support that allows it to optimize this 
case?  Is it the same property that allows us to handle the outer loop 
vectorization tests that you changed in another patch?

Neither an ACK nor NAK right now.

Jeff

Re: Re: [PATCH] TEST: Fix vect_cond_arith_* dump checks for RVV

2023-10-08 Thread 钟居哲

It can't work. Still failed:

spawn -ignore SIGHUP 
/work/home/jzzhong/work/toolchain/riscv/build/dev-rv64gcv_zfh-lp64d-medany-newlib-spike-debug/build-gcc-newlib-stage2/gcc/xgcc
 
-B/work/home/jzzhong/work/toolchain/riscv/build/dev-rv64gcv_zfh-lp64d-medany-newlib-spike-debug/build-gcc-newlib-stage2/gcc/
 riscv_ext_v3079949.c -march=rv64gcv_zfh -mabi=lp64d -mcmodel=medany 
-fdiagnostics-plain-output -Wno-complain-wrong-lang -S -o riscv_ext_v3079949.s^M
PASS: gcc.dg/vect/vect-cond-arith-6.c scan-tree-dump-times vect "vectorizing 
stmts using SLP" 4
gcc.dg/vect/vect-cond-arith-6.c: pattern found 2 times
FAIL: gcc.dg/vect/vect-cond-arith-6.c scan-tree-dump-times optimized " = 
\\.COND(_LEN)?_ADD" 1
gcc.dg/vect/vect-cond-arith-6.c: pattern found 2 times
FAIL: gcc.dg/vect/vect-cond-arith-6.c scan-tree-dump-times optimized " = 
\\.COND(_LEN)?_SUB" 1
gcc.dg/vect/vect-cond-arith-6.c: pattern found 2 times
FAIL: gcc.dg/vect/vect-cond-arith-6.c scan-tree-dump-times optimized " = 
\\.COND(_LEN)?_MUL" 1
gcc.dg/vect/vect-cond-arith-6.c: pattern found 2 times
FAIL: gcc.dg/vect/vect-cond-arith-6.c scan-tree-dump-times optimized " = 
\\.COND(_LEN)?_RDIV" 1

I change it into:

/* { dg-final { scan-tree-dump-times { = \.COND_LEN_ADD} 1 "optimized" { target 
vect_double_cond_arith } } } */
/* { dg-final { scan-tree-dump-times { = \.COND_LEN_SUB} 1 "optimized" { target 
vect_double_cond_arith } } } */
/* { dg-final { scan-tree-dump-times { = \.COND_LEN_MUL} 1 "optimized" { target 
vect_double_cond_arith } } } */
/* { dg-final { scan-tree-dump-times { = \.COND_LEN_RDIV} 1 "optimized" { 
target vect_double_cond_arith } } } */

It work.

This also can work:

/* { dg-final { scan-tree-dump-times { = \.COND_L?E?N?_?ADD} 1 "optimized" { 
target vect_double_cond_arith } } } */
/* { dg-final { scan-tree-dump-times { = \.COND_L?E?N?_?SUB} 1 "optimized" { 
target vect_double_cond_arith } } } */
/* { dg-final { scan-tree-dump-times { = \.COND_L?E?N?_?MUL} 1 "optimized" { 
target vect_double_cond_arith } } } */
/* { dg-final { scan-tree-dump-times { = \.COND_L?E?N?_?RDIV} 1 "optimized" { 
target vect_double_cond_arith } } } */




juzhe.zh...@rivai.ai
 
From: Jeff Law
Date: 2023-10-08 23:18
To: 钟居哲; gcc-patches
CC: rguenther; rdapp.gcc
Subject: Re: [PATCH] TEST: Fix vect_cond_arith_* dump checks for RVV
 
 
On 10/7/23 16:02, 钟居哲 wrote:
> Do you mean change it like this ?
> 
> /* { dg-final { scan-tree-dump-times { = \.COND_L?E?N?_?RDIV} 1 "optimized" { 
> target vect_double_cond_arith } } } */
I was thinking something more like
COND(_LEN)?_ADD
 
The idea being we match _LEN conditionally as a group.
 
jeff

Re: [PATCH] TEST: Fix vect_cond_arith_* dump checks for RVV

2023-10-08 Thread Jeff Law





On 10/7/23 16:02, 钟居哲 wrote:

Do you mean change it like this ?

/* { dg-final { scan-tree-dump-times { = \.COND_L?E?N?_?RDIV} 1 "optimized" { 
target vect_double_cond_arith } } } */

I was thinking something more like
COND(_LEN)?_ADD

The idea being we match _LEN conditionally as a group.

jeff

Re: [PATCH] TEST: Fix dump FAIL of vect-multitypes-16.c for RVV

2023-10-08 Thread Jeff Law





On 10/8/23 05:35, Juzhe-Zhong wrote:

RVV (RISC-V Vector) doesn't enable vect_unpack, but we still vectorize this 
case well.
So, adjust dump check for RVV.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-multitypes-16.c: Fix dump FAIL of RVV.
I'd hoped to avoid a bunch of risc-v special casing in the generic part 
of the testsuite.  Basically the more we have target specific 
conditionals rather than conditionals using properties, the more likely 
we are to keep revisiting this stuff over time and possibly for other 
architectures as well.


What is it about risc-v's vector support that allows it to optimize this 
case?  Is it the same property that allows us to handle the outer loop 
vectorization tests that you changed in another patch?


Neither an ACK nor NAK right now.

Jeff

Re: [PATCH] LoongArch: Adjust makefile dependency for loongarch headers.

2023-10-08 Thread Jan-Benedict Glaw

On Sat, 2023-10-07 16:50:14 +0800, Yang Yujie  wrote:
> gcc/ChangeLog:
> 
>   * config.gcc: Add loongarch-driver.h to tm_files.
>   * config/loongarch/loongarch.h: Do not include loongarch-driver.h.
>   * config/loongarch/t-loongarch: Append loongarch-multilib.h to $(GTM_H)
>   instead of $(TM_H) for building generator programs.
> ---
>  gcc/config.gcc   | 2 +-
>  gcc/config/loongarch/loongarch.h | 3 ---
>  gcc/config/loongarch/t-loongarch | 3 ++-
>  3 files changed, 3 insertions(+), 5 deletions(-)

This patch fixes it for me:

http://toolchain.lug-owl.de/laminar/jobs/gcc-loongarch64-linux-gnuf64
http://toolchain.lug-owl.de/laminar/jobs/gcc-loongarch64-linux-gnuf32
http://toolchain.lug-owl.de/laminar/jobs/gcc-loongarch64-linux-gnusf
http://toolchain.lug-owl.de/laminar/jobs/gcc-loongarch64-linux

MfG, JBG

-- 


signature.asc
Description: PGP signature

Re: [PATCH][_GLIBCXX_INLINE_VERSION] Fix

2023-10-08 Thread Iain Sandoe

Hi François,

> On 21 Sep 2023, at 05:41, François Dumont  wrote:
> 
> Tests were successful, ok to commit ?
> 
> On 20/09/2023 19:51, François Dumont wrote:
>> libstdc++: [_GLIBCXX_INLINE_VERSION] Add handle_contract_violation symbol 
>> alias
>> 
>> libstdc++-v3/ChangeLog:
>> 
>> * src/experimental/contract.cc
>> [_GLIBCXX_INLINE_VERSION](handle_contract_violation): Provide symbol 
>> alias
>> without version namespace decoration for gcc.

This does not work in the source on targets without support for symbol aliases 
(Darwin is one)
“../experimental/contract.cc:79:8: warning: alias definitions not supported in 
Mach-O; ignored”

- there might be a way to do it at link-time (for one symbol not too bad); I 
will have to poke at
  it a bit.
Iain

>> 
>> Here is what I'm testing eventually, ok to commit if successful ?
>> 
>> François
>> 
>> On 20/09/2023 11:32, Jonathan Wakely wrote:
>>> On Wed, 20 Sept 2023 at 05:51, François Dumont via Libstdc++
>>>  wrote:
 libstdc++: Remove std::constract_violation from versioned namespace
>>> Spelling mistake in contract_violation, and it's not
>>> std::contract_violation, it's std::experimental::contract_violation
>>> 
 GCC expects this type to be in std namespace directly.
>>> Again, it's in std::experimental not in std directly.
>>> 
>>> Will this change cause problems when including another experimental
>>> header, which does put experimental below std::__8?
>>> 
>>> I think std::__8::experimental and std::experimental will become ambiguous.
>>> 
>>> Maybe we do want to remove the inline __8 namespace from all
>>> experimental headers. That needs a bit more thought though.
>>> 
 libstdc++-v3/ChangeLog:
 
   * include/experimental/contract:
   Remove 
 _GLIBCXX_BEGIN_NAMESPACE_VERSION/_GLIBCXX_END_NAMESPACE_VERSION.
>>> This line is too long for the changelog.
>>> 
 It does fix 29 g++.dg/contracts in gcc testsuite.
 
 Ok to commit ?
 
 François

Re: [PATCH] Fix coroutine tests for libstdc++ gnu-version-namespace mode

2023-10-08 Thread Iain Sandoe

Hi François,

> On 23 Sep 2023, at 21:10, François Dumont  wrote:
> 
> I'm eventually fixing those tests the same way we manage this problem in 
> libstdc++ testsuite.
> 
>testsuite: Add optional libstdc++ version namespace in expected diagnostic
> 
> When libstdc++ is build with --enable-symvers=gnu-versioned-namespace 
> diagnostics are
> showing this namespace, currently __8.
> 
> gcc/testsuite/ChangeLog:
> 
> * testsuite/g++.dg/coroutines/coro-bad-alloc-00-bad-op-new.C: Add 
> optional
> '__8' version namespace in expected diagnostic.
> * testsuite/g++.dg/coroutines/coro-bad-alloc-01-bad-op-del.C: 
> Likewise.
> * testsuite/g++.dg/coroutines/coro-bad-alloc-02-no-op-new-nt.C: 
> Likewise.
> * 
> testsuite/g++.dg/coroutines/coro-bad-grooaf-01-grooaf-expected.C: Likewise.
> * testsuite/g++.dg/coroutines/pr97438.C: Likewise.
> * testsuite/g++.dg/coroutines/ramp-return-b.C: Likewise.
> 
> Tested under Linux x86_64.
> 
> I'm contributing to libstdc++ so I already have write access.
> 
> Ok to commit ?

As author of the tests, this LGTM as a suitable fix for now (at least, once the 
main
patch to fix versioned namespaces lands).

However, IMO, this could become quite painful as more g++ tests make use of std 
headers
(which is not really optional for facilities like this that are tightly-coupled 
between the FE and
the library).

For the future, it does seem that a more complete solution might be to 
introduce a
testsuite-wide definition for the C++ versioned std:: introducer, so that we 
can update it in one
place as the version changes.

So (as a thought experiment):
 - we’d have something of the form “CXX_STD” as a tcl global
 - we’d add the presence/absence of versioning to the relevant site.exp (which
   means recognising the versioning choice also in the GCC configure)
 - we’d migrate tests to using ${CXX_STD} instead of "std::__N”  in matches

… I guess an alternative could be to cook up some alternate warning/error/etc
   match functions that cater for arbitrary inline namespaces but that seems 
like a much
   more tricky and invasive testsuite change.

thoughts?
Iain

[PATCH] openmp: Add support for the 'indirect' clause in C/C++

2023-10-08 Thread Kwok Cheung Yeung


Hello

This patch adds support for the 'indirect' clause in the 'declare 
target' directive in C/C++ (Fortran to follow) and adds the necessary 
infrastructure to support indirect calls in target regions. This allows 
one to pass in pointers to functions that have been declared as indirect 
from the host to the target, then invoked via the passed-in pointer on 
the target device.


This is done by processing the functions declared as indirect in a 
similar way to regular kernels - they are added as a separate entry to 
the offload tables which are embedded into the target code by mkoffload. 
When the image is loaded, the host reads the target version of the 
offload table, then combines it with the host version to produce an 
address map. This map is then written to the device memory and a pointer 
is set to point to it.


The omp_device_lower pass now runs if any indirect functions are 
present. The pass searches for any indirect function calls, and runs a
new builtin BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR to process the 
function pointer before making the indirect call.


The builtin (implemented by GOMP_target_map_indirect_ptr) searches 
through the address map, returning the target address if found, or the 
original address if not. I've added two search algorithms - a simple 
linear search through the map, and another which builds up a splay tree 
from the map and uses that to do the search. I've enabled the splay-tree 
version by default, but the linear search is useful for debugging 
purposes so I have kept it in.


The C++ support is currently limited to normal indirect calls - virtual 
calls on objects do not currently work. I believe the main issue is that 
the vtables are not currently copied across to the target. I have added 
some handling for OBJ_TYPE_REF to prevent the compiler from ICEing when 
it encounters a virtual call, but without the vtable this cannot work 
properly.


Tested on a x86_64 host with offloading to NVPTX and AMD GCN, and 
bootstrapped on a x86_64 host. Okay for mainline?


Thanks

KwokFrom 46129c254990a9fff4b6d8512f04ad8fa7d61f0e Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Sun, 8 Oct 2023 13:50:25 +0100
Subject: [PATCH] openmp: Add support for the 'indirect' clause in C/C++

This adds support for the 'indirect' clause in the 'declare target'
directive.  Functions declared as indirect may be called via function
pointers passed from the host in offloaded code.

Virtual calls to member functions via the object pointer in C++ are
currently not supported in target regions.

2023-10-08  Kwok Cheung Yeung  

gcc/c-family/
* c-attribs.cc (c_common_attribute_table): Add attribute for
indirect functions.
* c-pragma.h (enum parma_omp_clause): Add entry for indirect clause.

gcc/c/
* c-decl.cc (c_decl_attributes): Add attribute for indirect
functions.
* c-lang.h (c_omp_declare_target_attr): Add indirect field.
* c-parser.cc (c_parser_omp_clause_name): Handle indirect clause.
(c_parser_omp_clause_indirect): New.
(c_parser_omp_all_clauses): Handle indirect clause.
(OMP_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask.
(c_parser_omp_declare_target): Handle indirect clause.
(OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask.
(c_parser_omp_begin): Handle indirect clause.
* c-typeck.cc (c_finish_omp_clauses): Handle indirect clause.

gcc/cp/
* cp-tree.h (cp_omp_declare_target_attr): Add indirect field.
* decl2.cc (cplus_decl_attributes): Add attribute for indirect
functions.
* parser.cc (cp_parser_omp_clause_name): Handle indirect clause.
(cp_parser_omp_clause_indirect): New.
(cp_parser_omp_all_clauses): Handle indirect clause.
(handle_omp_declare_target_clause): Add extra parameter.  Add
indirect attribute for indirect functions.
(OMP_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask.
(cp_parser_omp_declare_target): Handle indirect clause.
(OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask.
(cp_parser_omp_begin): Handle indirect clause.
* semantics.cc (finish_omp_clauses): Handle indirect clause.

gcc/
* lto-cgraph.cc (enum LTO_symtab_tags): Add tag for indirect
functions.
(output_offload_tables): Write indirect functions.
(input_offload_tables): read indirect functions.
* lto-section-names.h (OFFLOAD_IND_FUNC_TABLE_SECTION_NAME): New.
* omp-builtins.def (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR): New.
* omp-offload.cc (offload_ind_funcs): New.
(omp_discover_implicit_declare_target): Add functions marked with
'omp declare target indirect' to indirect functions list.
(omp_finish_file): Add indirect functions to section for offload
indirect functions.
(execute_omp_device_lower): Redirect indirect calls on target by

Re: Darwin: Replace environment runpath with embedded [PR88590]

2023-10-08 Thread Nathanael Nerode

I hope a global maintainer can step up.  I've been on hiatus from GCC work for 
some years, and this was never my part of the build system anyway -- and I 
don't use Darwin -- so I'm not qualified to review it.  It looks fine but it 
should be reviewed by someone who knows what they're doing.

On Wed, Sep 20, 2023, at 9:52 AM, FX Coudert wrote:
> ping**2 for a build maintainer or global maintainer to review, please
> It restores testing on darwin >= 21, which is not currently working
>
> If no maintainer is available to review build system, could I get 
> guidance on how to proceed further?
>
> Thanks,
> FX
>
>
>> Le 12 sept. 2023 à 19:52, FX Coudert  a écrit :
>> 
>> Hi build maintainers,
>> 
>> May I ping this series of patches for review? In particular, they allow to 
>> restore testing on darwin, which is currently broken with darwin >= 21, due 
>> to DYLD_LIBRARY_PATH being systematically removed from the environment by 
>> system tools.
>> 
>> It has been tested for two years on darwin, and would allow to restore 
>> regular regtesting on that target. It is a big step to help prevent bugs 
>> being undetected on this port.
>> 
>> The patchset was okayed from the driver point of view, but we need a build 
>> reviewer (or global reviewer) to okay those bits. Original presentation of 
>> the patches:
>> 
>> --
>> I’d like to post an updated and rebased version of Iain Sandoe’s patches for 
>> modern darwin, originally posted in November 2021: 
>> https://gcc.gnu.org/pipermail/gcc-patches/2021-November/584775.html
>> 
>> The rationale in that message is pretty much unchanged, and the patches have 
>> been since tested thoroughly on darwin (both Intel and ARM) for almost two 
>> years now. We have been shipping Iain’s branch (including these patches) 
>> since then in Homebrew and most other major distros of GCC on Darwin. So I 
>> think it’s been very thoroughly tested.
>> 
>> The main comment that arose from review in the previous incarnation was the 
>> need to at least offer the libtool part of the patch to upstream, in order 
>> to reduce in the long term the divergence between our version and upstream. 
>> I have done so in https://savannah.gnu.org/patch/index.php?10385
>> 
>> (I would also note that I have offered other suggestions of small snippets 
>> that could be upstream in libtool for darwin, but have not received much 
>> feedback for now: https://savannah.gnu.org/patch/?10371)
>> --
>> 
>> 
>> Thanks,
>> FX
>> 
>> 
>> 
>>> Le 29 août 2023 à 22:17, FX Coudert  a écrit :
>>> 
 I think a build machinery review is needed.
>>> 
>>> Thanks. CC’ing the relevant maintainers for review of the build part.
>>> The driver part and the darwin-specific part are already okayed.
>>> 
>>> FX
>
>
> Attachments:
> * 0001-Driver-Provide-a-spec-to-insert-rpaths-for-compiler-.patch
> * 0002-Darwin-Allow-for-configuring-Darwin-to-use-embedded-.patch
> * 0003-Darwin-rpaths-Add-with-darwin-extra-rpath.patch
> * 0004-Testsuite-allow-non-installed-testing-on-darwin.patch
> * 0005-Doc-document-the-new-Darwin-options.patch

Ping: [PATCH v5] C, ObjC: Add -Wunterminated-string-initialization

2023-10-08 Thread Alejandro Colomar

Hi,

Gentle ping here.

Thanks,
Alex

On Sun, Oct 01, 2023 at 06:24:00PM +0200, Alejandro Colomar wrote:
> Warn about the following:
> 
> char  s[3] = "foo";
> 
> Initializing a char array with a string literal of the same length as
> the size of the array is usually a mistake.  Rarely is the case where
> one wants to create a non-terminated character sequence from a string
> literal.
> 
> In some cases, for writing faster code, one may want to use arrays
> instead of pointers, since that removes the need for storing an array of
> pointers apart from the strings themselves.
> 
> char  *log_levels[]   = { "info", "warning", "err" };
> vs.
> char  log_levels[][7] = { "info", "warning", "err" };
> 
> This forces the programmer to specify a size, which might change if a
> new entry is later added.  Having no way to enforce null termination is
> very dangerous, however, so it is useful to have a warning for this, so
> that the compiler can make sure that the programmer didn't make any
> mistakes.  This warning catches the bug above, so that the programmer
> will be able to fix it and write:
> 
> char  log_levels[][8] = { "info", "warning", "err" };
> 
> This warning already existed as part of -Wc++-compat, but this patch
> allows enabling it separately.  It is also included in -Wextra, since
> it may not always be desired (when unterminated character sequences are
> wanted), but it's likely to be desired in most cases.
> 
> Since Wc++-compat now includes this warning, the test has to be modified
> to expect the text of the new warning too, in .
> 
> Link: 
> Link: 
> Link: 
> 
> Acked-by: Doug McIlroy 
> Cc: "G. Branden Robinson" 
> Cc: Ralph Corderoy 
> Cc: Dave Kemper 
> Cc: Larry McVoy 
> Cc: Andrew Pinski 
> Cc: Jonathan Wakely 
> Cc: Andrew Clayton 
> Cc: Martin Uecker 
> Cc: David Malcolm 
> Signed-off-by: Alejandro Colomar 
> ---
> 
> v5:
> 
> -  Fix existing C++-compat tests.  [reported by ]
> 
> 
>  gcc/c-family/c.opt | 4 
>  gcc/c/c-typeck.cc  | 6 +++---
>  gcc/testsuite/gcc.dg/Wcxx-compat-14.c  | 2 +-
>  gcc/testsuite/gcc.dg/Wunterminated-string-initialization.c | 6 ++
>  4 files changed, 14 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/Wunterminated-string-initialization.c
> 
> diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
> index 44b9c862c14..e8f6b836836 100644
> --- a/gcc/c-family/c.opt
> +++ b/gcc/c-family/c.opt
> @@ -1407,6 +1407,10 @@ Wunsuffixed-float-constants
>  C ObjC Var(warn_unsuffixed_float_constants) Warning
>  Warn about unsuffixed float constants.
>  
> +Wunterminated-string-initialization
> +C ObjC Var(warn_unterminated_string_initialization) Warning LangEnabledBy(C 
> ObjC,Wextra || Wc++-compat)
> +Warn about character arrays initialized as unterminated character sequences 
> by a string literal.
> +
>  Wunused
>  C ObjC C++ ObjC++ LangEnabledBy(C ObjC C++ ObjC++,Wall)
>  ; documented in common.opt
> diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
> index e55e887da14..7df9de819ed 100644
> --- a/gcc/c/c-typeck.cc
> +++ b/gcc/c/c-typeck.cc
> @@ -8399,11 +8399,11 @@ digest_init (location_t init_loc, tree type, tree 
> init, tree origtype,
>   pedwarn_init (init_loc, 0,
> ("initializer-string for array of %qT "
>  "is too long"), typ1);
> -   else if (warn_cxx_compat
> +   else if (warn_unterminated_string_initialization
>  && compare_tree_int (TYPE_SIZE_UNIT (type), len) < 0)
> - warning_at (init_loc, OPT_Wc___compat,
> + warning_at (init_loc, OPT_Wunterminated_string_initialization,
>   ("initializer-string for array of %qT "
> -  "is too long for C++"), typ1);
> +  "is too long"), typ1);
> if (compare_tree_int (TYPE_SIZE_UNIT (type), len) < 0)
>   {
> unsigned HOST_WIDE_INT size
> diff --git a/gcc/testsuite/gcc.dg/Wcxx-compat-14.c 
> b/gcc/testsuite/gcc.dg/Wcxx-compat-14.c
> index 23783711be6..6df0ee197cc 100644
> --- a/gcc/testsuite/gcc.dg/Wcxx-compat-14.c
> +++ b/gcc/testsuite/gcc.dg/Wcxx-compat-14.c
> @@ -2,5 +2,5 @@
>  /* { dg-options "-Wc++-compat" } */
>  
>  char a1[] = "a";
> -char a2[1] = "a";/* { dg-warning "C\[+\]\[+\]" } */
> +char a2[1] = "a";/* { dg-warning "initializer-string for array of 'char' 
> is too long" } */
>  char a3[2] = "a";
> diff --git a/gcc/testsuite/gcc.dg/Wunterminated-string-initialization.c 
> b/gcc/testsuite/gcc.dg/Wunterminated-string-initialization.c
> new file mode 100644
> index 000..13d5dbc6640
> --- /dev/null
> +++

[PATCH] TEST: Fix XPASS of outer loop vectorization tests for RVV

2023-10-08 Thread Juzhe-Zhong

Even though RVV doesn't enable vec_unpack/vec_pack, it succeed on outer loop 
vectorizations.

Fix these following XPASS FAILs:

XPASS: gcc.dg/vect/no-scevccp-outer-16.c scan-tree-dump-times vect "OUTER LOOP 
VECTORIZED." 1
XPASS: gcc.dg/vect/no-scevccp-outer-17.c scan-tree-dump-times vect "OUTER LOOP 
VECTORIZED." 1
XPASS: gcc.dg/vect/no-scevccp-outer-19.c scan-tree-dump-times vect "OUTER LOOP 
VECTORIZED." 1
XPASS: gcc.dg/vect/no-scevccp-outer-21.c scan-tree-dump-times vect "OUTER LOOP 
VECTORIZED." 1

gcc/testsuite/ChangeLog:

* gcc.dg/vect/no-scevccp-outer-16.c: Fix XPASS for RVV.
* gcc.dg/vect/no-scevccp-outer-17.c: Ditto.
* gcc.dg/vect/no-scevccp-outer-19.c: Ditto.
* gcc.dg/vect/no-scevccp-outer-21.c: Ditto.

---
 gcc/testsuite/gcc.dg/vect/no-scevccp-outer-16.c | 2 +-
 gcc/testsuite/gcc.dg/vect/no-scevccp-outer-17.c | 2 +-
 gcc/testsuite/gcc.dg/vect/no-scevccp-outer-19.c | 2 +-
 gcc/testsuite/gcc.dg/vect/no-scevccp-outer-21.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-16.c 
b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-16.c
index c7c2fa8a504..12179949e00 100644
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-16.c
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-16.c
@@ -59,4 +59,4 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail 
{ ! {vect_unpack } } } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail 
{ { ! {vect_unpack } } && { ! {riscv_v } } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-17.c 
b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-17.c
index ba904a6c03e..86554a98169 100644
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-17.c
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-17.c
@@ -65,4 +65,4 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail 
{ ! {vect_unpack } } } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail 
{ { ! {vect_unpack } } && { ! {riscv_v } } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-19.c 
b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-19.c
index 5cd4049d08c..624b54accf4 100644
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-19.c
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-19.c
@@ -49,4 +49,4 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail 
{ ! {vect_unpack } } } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail 
{ { ! {vect_unpack } } && { ! {riscv_v } } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-21.c 
b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-21.c
index 72e53c2bfb0..b30a5d78819 100644
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-21.c
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-21.c
@@ -59,4 +59,4 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail 
{ ! { vect_pack_trunc } } } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail 
{ { ! {vect_pack_trunc } } && { ! {riscv_v } } } } } } */
-- 
2.36.3

[PATCH] TEST: Fix dump FAIL for RVV

2023-10-08 Thread Juzhe-Zhong

gcc/testsuite/ChangeLog:

* gcc.dg/vect/bb-slp-cond-1.c: Fix dump FAIL for RVV.
* gcc.dg/vect/pr57705.c: Ditto.

---
 gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c | 4 ++--
 gcc/testsuite/gcc.dg/vect/pr57705.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c
index c8024429e9c..e1ebc23505f 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c
@@ -47,6 +47,6 @@ int main ()
 }
 
 /* { dg-final { scan-tree-dump {(no need for alias check [^\n]* when VF is 
1|no alias between [^\n]* when [^\n]* is outside \(-16, 16\))} "vect" { target 
vect_element_align } } } */
-/* { dg-final { scan-tree-dump-times "loop vectorized" 1 "vect" { target { 
vect_element_align && { ! amdgcn-*-* } } } } } */
-/* { dg-final { scan-tree-dump-times "loop vectorized" 2 "vect" { target 
amdgcn-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loop vectorized" 1 "vect" { target { 
vect_element_align && { { ! amdgcn-*-* } && { ! riscv_v } } } } } } */
+/* { dg-final { scan-tree-dump-times "loop vectorized" 2 "vect" { target { 
amdgcn-*-* || riscv_v } } } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/pr57705.c 
b/gcc/testsuite/gcc.dg/vect/pr57705.c
index 39c32946d74..2dacea0a7a7 100644
--- a/gcc/testsuite/gcc.dg/vect/pr57705.c
+++ b/gcc/testsuite/gcc.dg/vect/pr57705.c
@@ -64,5 +64,5 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 3 "vect" { target 
vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 2 "vect" { target { ! 
vect_pack_trunc } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 3 "vect" { target { 
vect_pack_trunc || riscv_v } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 2 "vect" { target { { 
! vect_pack_trunc } && { ! riscv_v } } } } } */
-- 
2.36.3

[PATCH] TEST: Fix dump FAIL of vect-multitypes-16.c for RVV

2023-10-08 Thread Juzhe-Zhong

RVV (RISC-V Vector) doesn't enable vect_unpack, but we still vectorize this 
case well.
So, adjust dump check for RVV.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-multitypes-16.c: Fix dump FAIL of RVV.

---
 gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c 
b/gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c
index a61f1a9a221..829a4d41601 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c
@@ -35,6 +35,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target 
vect_unpack } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { 
! vect_unpack } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
vect_unpack || riscv_v } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { 
{ ! vect_unpack } && { ! riscv_v } } } } } */
 
-- 
2.36.3

[PATCH] TEST: Fix dump FAIL for RVV (RISCV-V vector)

2023-10-08 Thread Juzhe-Zhong

As this showed: https://godbolt.org/z/3K9oK7fx3

ARM SVE 2 times for FOLD_EXTRACT_LAST wheras RVV 4 times.

This is because RISC-V doesn't enable vec_pack_trunc so we will failed 
conversion and fold_extract_last at the first time analysis.
Then we succeed at the second time.

So RVV has 4 times of showing "FOLD_EXTRACT_LAST:.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-cond-reduc-4.c: Add vect_pack_trunc variant.

---
 gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c 
b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
index 8820075b1dc..8ea8c538713 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
@@ -42,6 +42,7 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with 
FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with 
FOLD_EXTRACT_LAST" 2 "vect" { target { vect_fold_extract_last && 
vect_pack_trunc } } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with 
FOLD_EXTRACT_LAST" 4 "vect" { target { { vect_fold_extract_last } && { ! 
vect_pack_trunc } } } } } */
 /* { dg-final { scan-tree-dump-times "condition expression based on integer 
induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
 
-- 
2.36.3

Re: [PATCH] Support g++ 4.8 as a host compiler.

2023-10-08 Thread Iain Sandoe




> On 8 Oct 2023, at 05:40, Jeff Law  wrote:
> On 10/7/23 15:30, Sam James wrote:
>> Jeff Law  writes:
>>> On 10/4/23 16:19, Roger Sayle wrote:
 The recent patch to remove poly_int_pod triggers a bug in g++
 4.8.5's
 C++ 11 support which mistakenly believes poly_uint16 has a non-trivial
 constructor.  This in turn prohibits it from being used as a member in
 a union (rtxunion) that constructed statically, resulting in a (fatal)
 error during stage 1.  A workaround is to add an explicit constructor
 to the problematic union, which allows mainline to be bootstrapped with
 the system compiler on older RedHat 7 systems.
 This patch has been tested on x86_64-pc-linux-gnu where it allows a
 bootstrap to complete when using g++ 4.8.5 as the host compiler.
 Ok for mainline?
 2023-10-04  Roger Sayle  
 gcc/ChangeLog
* rtl.h (rtx_def::u): Add explicit constructor to workaround
issue using g++ 4.8 as a host compiler.
>>> I think the bigger question is whether or not we're going to step
>>> forward on the minimum build requirements.
>>> 
>>> My recollection was we settled on gcc-4.8 for the benefit of RHEL 7
>>> and Centos 7 which are rapidly approaching EOL (June 2024).
>>> 
>>> I would certainly support stepping forward to a more modern compiler
>>> for the build requirements, which might make this patch obsolete.
>> See also richi and jakub's comments at 
>> https://inbox.sourceware.org/gcc-patches/mpt5y3ppio0@arm.com/T/#m985295bedaadb47aa0b9ba63b7cb69a660a108bb.
> Yea.  As Jakub notes, there's the cfarm situation, but I've had good success 
> with DTS on Centos 7 systems (I have to support some of those internally 
> within Ventana).  It quite literally "just works" though users would have to 
> enable it.
> 
> Alternately, update the cfarm hosts?

In practice, if one wants to test Ada and D, a newer toolchain is needed anyway 
- so at least some of us are already using self-built bootstrap toolchains.

Is there some blocker to installing a project-built toolchain on /opt, for 
example?  (admittedly it then becomes a point that someone has to take 
responsibility for providing it).
Iain

[PATCH] RISC-V: Support movmisalign of RVV VLA modes

2023-10-08 Thread Juzhe-Zhong

Previously, I removed the movmisalign pattern to fix the execution FAILs in 
this commit:
https://github.com/gcc-mirror/gcc/commit/f7bff24905a6959f85f866390db2fff1d6f95520

I was thinking that RVV doesn't allow misaligned at the beginning so I removed 
that pattern.
However, after deep investigation && reading RVV ISA again and experiment on 
SPIKE,
I realized I was wrong.

RVV ISA reference: 
https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#vector-memory-alignment-constraints

"If an element accessed by a vector memory instruction is not naturally aligned 
to the size of the element, 
 either the element is transferred successfully or an address misaligned 
exception is raised on that element."

It's obvious that RVV ISA does allow misaligned vector load/store.

And experiment and confirm on SPIKE:

[jzzhong@rios-cad122:/work/home/jzzhong/work/toolchain/riscv/gcc/gcc/testsuite/gcc.dg/vect]$~/work/toolchain/riscv/build/dev-rv64gcv_zfh-lp64d-medany-newlib-spike-debug/install/bin/spike
 --isa=rv64gcv --varch=vlen:128,elen:64 
~/work/toolchain/riscv/build/dev-rv64gcv_zfh-lp64d-medany-newlib-spike-debug/install/riscv64-unknown-elf/bin/pk64
  a.out
bbl loader
z   ra 00010158 sp 003ffb40 gp 00012c48
tp  t0 000110da t1 000f t2 
s0 00013460 s1  a0 00012ef5 a1 00012018
a2 00012a71 a3 000d a4 0004 a5 00012a71
a6 00012a71 a7 00012018 s2  s3 
s4  s5  s6  s7 
s8  s9  sA  sB 
t3  t4  t5  t6 
pc 00010258 va/inst 020660a7 sr 80026620
Store/AMO access fault!

[jzzhong@rios-cad122:/work/home/jzzhong/work/toolchain/riscv/gcc/gcc/testsuite/gcc.dg/vect]$~/work/toolchain/riscv/build/dev-rv64gcv_zfh-lp64d-medany-newlib-spike-debug/install/bin/spike
 --misaligned --isa=rv64gcv --varch=vlen:128,elen:64 
~/work/toolchain/riscv/build/dev-rv64gcv_zfh-lp64d-medany-newlib-spike-debug/install/riscv64-unknown-elf/bin/pk64
  a.out
bbl loader

We can see SPIKE can pass previous *FAILED* execution tests with specifying 
--misaligned to SPIKE.

So, to honor RVV ISA SPEC, we should add movmisalign pattern back base on the 
investigations I have done since
it can improve multiple vectorization tests and fix dumple FAILs.

This patch fixes these following dump FAILs:

FAIL: gcc.dg/vect/vect-bitfield-read-2.c -flto -ffat-lto-objects  
scan-tree-dump-not optimized "Invalid sum"
FAIL: gcc.dg/vect/vect-bitfield-read-2.c scan-tree-dump-not optimized "Invalid 
sum"
FAIL: gcc.dg/vect/vect-bitfield-read-4.c -flto -ffat-lto-objects  
scan-tree-dump-not optimized "Invalid sum"
FAIL: gcc.dg/vect/vect-bitfield-read-4.c scan-tree-dump-not optimized "Invalid 
sum"
FAIL: gcc.dg/vect/vect-bitfield-write-2.c -flto -ffat-lto-objects  
scan-tree-dump-not optimized "Invalid sum"
FAIL: gcc.dg/vect/vect-bitfield-write-2.c scan-tree-dump-not optimized "Invalid 
sum"
FAIL: gcc.dg/vect/vect-bitfield-write-3.c -flto -ffat-lto-objects  
scan-tree-dump-not optimized "Invalid sum"
FAIL: gcc.dg/vect/vect-bitfield-write-3.c scan-tree-dump-not optimized "Invalid 
sum"

Consider this following case:

struct s {
unsigned i : 31;
char a : 4;
};

#define N 32
#define ELT0 {0x7FFFUL, 0}
#define ELT1 {0x7FFFUL, 1}
#define ELT2 {0x7FFFUL, 2}
#define ELT3 {0x7FFFUL, 3}
#define RES 48
struct s A[N]
  = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
  ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
  ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
  ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};

int __attribute__ ((noipa))
f(struct s *ptr, unsigned n) {
int res = 0;
for (int i = 0; i < n; ++i)
  res += ptr[i].a;
return res;
}

-O3 -S -fno-vect-cost-model (default strict-align):

f:
mv  a4,a0
beq a1,zero,.L9
addiw   a5,a1,-1
li  a3,14
vsetivlizero,16,e64,m8,ta,ma
bleua5,a3,.L3
andia5,a0,127
bne a5,zero,.L3
srliw   a3,a1,4
sllia3,a3,7
li  a0,15
sllia0,a0,32
add a3,a3,a4
mv  a5,a4
li  a2,32
vmv.v.x v16,a0
vsetvli zero,zero,e32,m4,ta,ma
vmv.v.i v4,0
.L4:
vsetvli zero,zero,e64,m8,ta,ma
vle64.v v8,0(a5)
addia5,a5,128
vand.vv v8,v8,v16
vsetvli zero,zero,e32,m4,ta,ma
vnsrl.wxv8,v8,a2
vadd.vv v4,v4,v8
bne a5,a3,.L4
li  a3,0
andia5,a1,15
vmv.s.x v1,a3
andia3,a1,-16
vredsum.vs  v1,v4,v1
vmv.x.s a0,v1
mv  a2,a0
beq a5,zero,.L15

37 matches

Mail list logo