The following fixes re-materialization of aggregates before calls
that take the address of a scalarized decl.  The issue here is that
we do not know the appropriate effective type to use for the stores.
So we use ref-all accesses for the re-materialization to properly
support TBAA info modref might have recorded.  The same holds
true for the re-load after the call.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

OK if that succeeds?

Thanks,
Richard.

        PR tree-optimization/121726
        * tree-sra.cc (build_ref_for_offset): Add force_ref_all
        parameter and use ptr_type_node as alias pointer type in
        that case.
        (build_ref_for_model): Add force_ref_all parameter and
        pass it through, forcing build_ref_for_offset.
        (generate_subtree_copies): Likewise.
        (sra_modify_call_arg): Force ref-all accesses.

        * gcc.target/i386/pr121726.c: New testcase.
---
 gcc/testsuite/gcc.target/i386/pr121726.c | 117 +++++++++++++++++++++++
 gcc/tree-sra.cc                          |  43 +++++----
 2 files changed, 143 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121726.c

diff --git a/gcc/testsuite/gcc.target/i386/pr121726.c 
b/gcc/testsuite/gcc.target/i386/pr121726.c
new file mode 100644
index 00000000000..83cbbaf8ad6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121726.c
@@ -0,0 +1,117 @@
+/* { dg-do run { target avx2_runtime } } */
+/* { dg-options "-O3 -mavx2" } */
+
+#include <stdint.h>
+#include <string.h>
+#include <immintrin.h>
+
+#define SPX_N 16
+
+typedef struct {
+    uint8_t pub_seed[SPX_N];
+} spx_ctx;
+
+typedef float __m256_u __attribute__ ((__vector_size__ (32), __may_alias__, 
__aligned__ (1)));
+typedef double __m256d_u __attribute__ ((__vector_size__ (32), __may_alias__, 
__aligned__ (1)));
+typedef long long __m256i_u __attribute__ ((__vector_size__ (32), 
__may_alias__, __aligned__ (1)));
+
+__attribute__((noinline))
+void Keccak(__m256i *states) { }
+
+__attribute__((noipa))
+void capture(uint8_t *v) { }
+
+static void thashx4(uint64_t *out0,
+             uint64_t *out1,
+             uint64_t *out2,
+             uint64_t *out3,
+             unsigned int inblocks,
+             const uint64_t* pub_seed) {
+    if (inblocks == 1 || inblocks == 2) {
+        __m256i state[25];
+        for (int i = 0; i < 25; i++) {
+            state[i] = _mm256_set1_epi64x(0);
+        }
+        for (int i = 0; i < 2; i++) {
+            state[i] = _mm256_set1_epi64x(pub_seed[i]);
+        }
+
+        /* Domain separator and padding. */
+        for (size_t i = 4 * inblocks + 4; i < 16; i++) {
+            state[i] = _mm256_set1_epi64x(0);
+        }
+
+        Keccak(&state[0]);
+
+        for (int i = 0; i < SPX_N / 8; i++) {
+            out0[i] = _mm256_extract_epi64(state[i], 0);
+            out1[i] = _mm256_extract_epi64(state[i], 1);
+            out2[i] = _mm256_extract_epi64(state[i], 2);
+            out3[i] = _mm256_extract_epi64(state[i], 3);
+        }
+    } else {
+        unsigned char buf0[inblocks * SPX_N];
+        unsigned char buf1[inblocks * SPX_N];
+        unsigned char buf2[inblocks * SPX_N];
+
+        memcpy(buf0, pub_seed, SPX_N);
+        memcpy(buf1, pub_seed, SPX_N);
+        memcpy(buf2, pub_seed, SPX_N);
+
+        capture(buf0);
+        capture(buf1);
+        capture(buf2);
+    }
+}
+
+static
+void wots_gen_leafx4(const spx_ctx *ctx) {
+       uint64_t dest[4];
+
+       thashx4(dest, dest, dest, dest, 1, (const uint64_t *) ctx->pub_seed);
+       thashx4(dest, dest, dest, dest, 3, (const uint64_t *) ctx->pub_seed);
+}
+
+void treehashx4_v2(const spx_ctx *ctx,
+                void (*gen_leafx4)( const spx_ctx *),
+                uint32_t* tree_addrx4
+                ) {
+    for (int i = 0; i < 2; i++) {
+        gen_leafx4( ctx );
+    }
+}
+
+__attribute__((noipa))
+void crypto_sign_signature(uint64_t *sg, uint8_t enable, const uint8_t *sk)
+{
+  spx_ctx ctx;
+
+  memcpy(ctx.pub_seed, sk, SPX_N);
+
+  const uint64_t* ptr = (const uint64_t *)&ctx.pub_seed[0];
+  thashx4(sg, sg, sg, sg, 1, ptr);
+
+  if (!enable)
+    return;
+
+  uint32_t tree_addrx4[32] = { 0 };
+
+  treehashx4_v2(&ctx, wots_gen_leafx4, tree_addrx4);
+}
+
+#define length_secret_key 64
+#define length_signature 64
+
+int main() {
+  uint8_t secret_key[length_secret_key];
+  uint64_t signature[length_signature];
+  memset(secret_key, 0, length_secret_key);
+  memset(signature, 0, length_signature * sizeof(uint64_t));
+
+  crypto_sign_signature(signature, 0, secret_key);
+  
+  for (int i = 0; i < length_signature; ++i)
+    if (signature[i] != 0)
+      __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc
index 151f6005ff3..edb03fe237b 100644
--- a/gcc/tree-sra.cc
+++ b/gcc/tree-sra.cc
@@ -1886,12 +1886,13 @@ make_fancy_name (tree expr)
    something for which get_addr_base_and_unit_offset returns NULL, gsi must
    be non-NULL and is used to insert new statements either before or below
    the current one as specified by INSERT_AFTER.  This function is not capable
-   of handling bitfields.  */
+   of handling bitfields.  If FORCE_REF_ALL is true then the memory access
+   will use alias-set zero.  */
 
 static tree
 build_ref_for_offset (location_t loc, tree base, poly_int64 offset,
                      bool reverse, tree exp_type, gimple_stmt_iterator *gsi,
-                     bool insert_after)
+                     bool insert_after, bool force_ref_all = false)
 {
   tree prev_base = base;
   tree off;
@@ -1929,19 +1930,22 @@ build_ref_for_offset (location_t loc, tree base, 
poly_int64 offset,
       else
        gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 
-      off = build_int_cst (reference_alias_ptr_type (prev_base), byte_offset);
+      off = build_int_cst (force_ref_all ? ptr_type_node
+                          : reference_alias_ptr_type (prev_base), byte_offset);
       base = tmp;
     }
   else if (TREE_CODE (base) == MEM_REF)
     {
-      off = build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)),
+      off = build_int_cst (force_ref_all ? ptr_type_node
+                          : TREE_TYPE (TREE_OPERAND (base, 1)),
                           base_offset + byte_offset);
       off = int_const_binop (PLUS_EXPR, TREE_OPERAND (base, 1), off);
       base = unshare_expr (TREE_OPERAND (base, 0));
     }
   else
     {
-      off = build_int_cst (reference_alias_ptr_type (prev_base),
+      off = build_int_cst (force_ref_all ? ptr_type_node
+                          : reference_alias_ptr_type (prev_base),
                           base_offset + byte_offset);
       base = build_fold_addr_expr (unshare_expr (base));
     }
@@ -2004,12 +2008,13 @@ build_reconstructed_reference (location_t, tree base, 
struct access *model)
    build_ref_for_offset, furthermore, when GSI is NULL, the function expects
    that it re-builds the entire reference from a DECL to the final access and
    so will create a MEM_REF when OFFSET does not exactly match offset of
-   MODEL.  */
+   MODEL.  If FORCE_REF_ALL is true then the memory access will use
+   alias-set zero.  */
 
 static tree
 build_ref_for_model (location_t loc, tree base, HOST_WIDE_INT offset,
                     struct access *model, gimple_stmt_iterator *gsi,
-                    bool insert_after)
+                    bool insert_after, bool force_ref_all = false)
 {
   gcc_assert (offset >= 0);
   if (TREE_CODE (model->expr) == COMPONENT_REF
@@ -2021,7 +2026,7 @@ build_ref_for_model (location_t loc, tree base, 
HOST_WIDE_INT offset,
       offset -= int_bit_position (fld);
       exp_type = TREE_TYPE (TREE_OPERAND (model->expr, 0));
       t = build_ref_for_offset (loc, base, offset, model->reverse, exp_type,
-                               gsi, insert_after);
+                               gsi, insert_after, force_ref_all);
       /* The flag will be set on the record type.  */
       REF_REVERSE_STORAGE_ORDER (t) = 0;
       return fold_build3_loc (loc, COMPONENT_REF, TREE_TYPE (fld), t, fld,
@@ -2031,6 +2036,7 @@ build_ref_for_model (location_t loc, tree base, 
HOST_WIDE_INT offset,
     {
       tree res;
       if (model->grp_same_access_path
+         && !force_ref_all
          && !TREE_THIS_VOLATILE (base)
          && (TYPE_ADDR_SPACE (TREE_TYPE (base))
              == TYPE_ADDR_SPACE (TREE_TYPE (model->expr)))
@@ -2042,7 +2048,8 @@ build_ref_for_model (location_t loc, tree base, 
HOST_WIDE_INT offset,
        return res;
       else
        return build_ref_for_offset (loc, base, offset, model->reverse,
-                                    model->type, gsi, insert_after);
+                                    model->type, gsi, insert_after,
+                                    force_ref_all);
     }
 }
 
@@ -3924,16 +3931,18 @@ analyze_all_variable_accesses (void)
    replacements in the interval <start_offset, start_offset + chunk_size>,
    otherwise copy all.  GSI is a statement iterator used to place the new
    statements.  WRITE should be true when the statements should write from AGG
-   to the replacement and false if vice versa.  if INSERT_AFTER is true, new
+   to the replacement and false if vice versa.  If INSERT_AFTER is true, new
    statements will be added after the current statement in GSI, they will be
-   added before the statement otherwise.  */
+   added before the statement otherwise.  If FORCE_REF_ALL is true then
+   memory accesses will use alias-set zero.  */
 
 static void
 generate_subtree_copies (struct access *access, tree agg,
                         HOST_WIDE_INT top_offset,
                         HOST_WIDE_INT start_offset, HOST_WIDE_INT chunk_size,
                         gimple_stmt_iterator *gsi, bool write,
-                        bool insert_after, location_t loc)
+                        bool insert_after, location_t loc,
+                        bool force_ref_all = false)
 {
   /* Never write anything into constant pool decls.  See PR70602.  */
   if (!write && constant_decl_p (agg))
@@ -3951,7 +3960,7 @@ generate_subtree_copies (struct access *access, tree agg,
          gassign *stmt;
 
          expr = build_ref_for_model (loc, agg, access->offset - top_offset,
-                                     access, gsi, insert_after);
+                                     access, gsi, insert_after, force_ref_all);
 
          if (write)
            {
@@ -4001,7 +4010,7 @@ generate_subtree_copies (struct access *access, tree agg,
       if (access->first_child)
        generate_subtree_copies (access->first_child, agg, top_offset,
                                 start_offset, chunk_size, gsi,
-                                write, insert_after, loc);
+                                write, insert_after, loc, force_ref_all);
 
       access = access->next_sibling;
     }
@@ -4303,14 +4312,14 @@ sra_modify_call_arg (tree *expr, gimple_stmt_iterator 
*call_gsi,
   gimple *stmt = gsi_stmt (*call_gsi);
   location_t loc = gimple_location (stmt);
   generate_subtree_copies (access, base, 0, 0, 0, call_gsi, false, false,
-                          loc);
+                          loc, true);
 
   if (flags & EAF_NO_DIRECT_CLOBBER)
     return true;
 
   if (!stmt_ends_bb_p (stmt))
     generate_subtree_copies (access, base, 0, 0, 0, refresh_gsi, true,
-                            true, loc);
+                            true, loc, true);
   else
     {
       edge e;
@@ -4319,7 +4328,7 @@ sra_modify_call_arg (tree *expr, gimple_stmt_iterator 
*call_gsi,
        {
          gimple_stmt_iterator alt_gsi = gsi_start_edge (e);
          generate_subtree_copies (access, base, 0, 0, 0, &alt_gsi, true,
-                                  true, loc);
+                                  true, loc, true);
        }
     }
   return true;
-- 
2.51.0

Reply via email to