On Fri, Aug 28, 2020 at 05:07:11PM +0100, Richard Sandiford wrote: > Jakub Jelinek via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > > + auto_vec<target_unit, 128> buffer; > > + auto_vec<constant_descriptor_rtx_data *, 128> vec; > > + object_allocator<constant_descriptor_rtx_data> > > + data_pool ("constant_descriptor_rtx_data_pool"); > > + int idx = 0; > > + size_t size = 0; > > + for (constant_descriptor_rtx *desc = pool->first; desc; desc = > > desc->next) > > + if (desc->mark > 0 > > + && ! (SYMBOL_REF_HAS_BLOCK_INFO_P (desc->sym) > > + && SYMBOL_REF_BLOCK (desc->sym))) > > + { > > + buffer.truncate (0); > > 128 isn't big enough for all targets (e.g. aarch64 with > -msve-vector-bits=2048), so I think we still need a reserve > call here.
You're right, thanks for spotting it, I've missed native_encode_rtx will do quick_push rather than safe_push. Updated patch below, it shouldn't be needed in the second loop, because the first loop should already grow it to the largest size. 2020-08-28 Jakub Jelinek <ja...@redhat.com> PR middle-end/54201 * varasm.c: Include alloc-pool.h. (output_constant_pool_contents): Emit desc->mark < 0 entries as aliases. (struct constant_descriptor_rtx_data): New type. (constant_descriptor_rtx_data_cmp): New function. (struct const_rtx_data_hasher): New type. (const_rtx_data_hasher::hash, const_rtx_data_hasher::equal): New methods. (optimize_constant_pool): New function. (output_shared_constant_pool): Call it if TARGET_SUPPORTS_ALIASES. --- gcc/varasm.c.jj 2020-07-28 15:39:10.091755086 +0200 +++ gcc/varasm.c 2020-08-28 18:21:58.943759578 +0200 @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3. #include "asan.h" #include "rtl-iter.h" #include "file-prefix-map.h" /* remap_debug_filename() */ +#include "alloc-pool.h" #ifdef XCOFF_DEBUGGING_INFO #include "xcoffout.h" /* Needed for external data declarations. */ @@ -4198,7 +4199,27 @@ output_constant_pool_contents (struct rt class constant_descriptor_rtx *desc; for (desc = pool->first; desc ; desc = desc->next) - if (desc->mark) + if (desc->mark < 0) + { +#ifdef ASM_OUTPUT_DEF + const char *name = targetm.strip_name_encoding (XSTR (desc->sym, 0)); + char label[256]; + char buffer[256 + 32]; + const char *p; + + ASM_GENERATE_INTERNAL_LABEL (label, "LC", ~desc->mark); + p = targetm.strip_name_encoding (label); + if (desc->offset) + { + sprintf (buffer, "%s+%ld", p, desc->offset); + p = buffer; + } + ASM_OUTPUT_DEF (asm_out_file, name, p); +#else + gcc_unreachable (); +#endif + } + else if (desc->mark) { /* If the constant is part of an object_block, make sure that the constant has been positioned within its block, but do not @@ -4216,6 +4237,160 @@ output_constant_pool_contents (struct rt } } +struct constant_descriptor_rtx_data { + constant_descriptor_rtx *desc; + target_unit *bytes; + unsigned short size; + unsigned short offset; + unsigned int hash; +}; + +/* qsort callback to sort constant_descriptor_rtx_data * vector by + decreasing size. */ + +static int +constant_descriptor_rtx_data_cmp (const void *p1, const void *p2) +{ + constant_descriptor_rtx_data *const data1 + = *(constant_descriptor_rtx_data * const *) p1; + constant_descriptor_rtx_data *const data2 + = *(constant_descriptor_rtx_data * const *) p2; + if (data1->size > data2->size) + return -1; + if (data1->size < data2->size) + return 1; + if (data1->hash < data2->hash) + return -1; + gcc_assert (data1->hash > data2->hash); + return 1; +} + +struct const_rtx_data_hasher : nofree_ptr_hash<constant_descriptor_rtx_data> +{ + static hashval_t hash (constant_descriptor_rtx_data *); + static bool equal (constant_descriptor_rtx_data *, + constant_descriptor_rtx_data *); +}; + +/* Hash and compare functions for const_rtx_data_htab. */ + +hashval_t +const_rtx_data_hasher::hash (constant_descriptor_rtx_data *data) +{ + return data->hash; +} + +bool +const_rtx_data_hasher::equal (constant_descriptor_rtx_data *x, + constant_descriptor_rtx_data *y) +{ + if (x->hash != y->hash || x->size != y->size) + return 0; + unsigned int align1 = x->desc->align; + unsigned int align2 = y->desc->align; + unsigned int offset1 = (x->offset * BITS_PER_UNIT) & (align1 - 1); + unsigned int offset2 = (y->offset * BITS_PER_UNIT) & (align2 - 1); + if (offset1) + align1 = least_bit_hwi (offset1); + if (offset2) + align2 = least_bit_hwi (offset2); + if (align2 > align1) + return 0; + if (memcmp (x->bytes, y->bytes, x->size * sizeof (target_unit)) != 0) + return 0; + return 1; +} + +/* Attempt to optimize constant pool POOL. If it contains both CONST_VECTOR + constants and scalar constants with the values of CONST_VECTOR elements, + try to alias the scalar constants with the CONST_VECTOR elements. */ + +static void +optimize_constant_pool (struct rtx_constant_pool *pool) +{ + auto_vec<target_unit, 128> buffer; + auto_vec<constant_descriptor_rtx_data *, 128> vec; + object_allocator<constant_descriptor_rtx_data> + data_pool ("constant_descriptor_rtx_data_pool"); + int idx = 0; + size_t size = 0; + for (constant_descriptor_rtx *desc = pool->first; desc; desc = desc->next) + if (desc->mark > 0 + && ! (SYMBOL_REF_HAS_BLOCK_INFO_P (desc->sym) + && SYMBOL_REF_BLOCK (desc->sym))) + { + buffer.truncate (0); + buffer.reserve (GET_MODE_SIZE (desc->mode)); + if (native_encode_rtx (desc->mode, desc->constant, buffer, 0, + GET_MODE_SIZE (desc->mode))) + { + constant_descriptor_rtx_data *data = data_pool.allocate (); + data->desc = desc; + data->bytes = NULL; + data->size = GET_MODE_SIZE (desc->mode); + data->offset = 0; + data->hash = idx++; + size += data->size; + vec.safe_push (data); + } + } + if (idx) + { + vec.qsort (constant_descriptor_rtx_data_cmp); + unsigned min_size = vec.last ()->size; + target_unit *bytes = XNEWVEC (target_unit, size); + unsigned int i; + constant_descriptor_rtx_data *data; + hash_table<const_rtx_data_hasher> * htab + = new hash_table<const_rtx_data_hasher> (31); + size = 0; + FOR_EACH_VEC_ELT (vec, i, data) + { + buffer.truncate (0); + native_encode_rtx (data->desc->mode, data->desc->constant, + buffer, 0, data->size); + memcpy (bytes + size, buffer.address (), data->size); + data->bytes = bytes + size; + data->hash = iterative_hash (data->bytes, + data->size * sizeof (target_unit), 0); + size += data->size; + constant_descriptor_rtx_data **slot + = htab->find_slot_with_hash (data, data->hash, INSERT); + if (*slot) + { + data->desc->mark = ~(*slot)->desc->labelno; + data->desc->offset = (*slot)->offset; + } + else + { + unsigned int sz = 1 << floor_log2 (data->size); + + *slot = data; + for (sz >>= 1; sz >= min_size; sz >>= 1) + for (unsigned off = 0; off + sz <= data->size; off += sz) + { + constant_descriptor_rtx_data tmp; + tmp.desc = data->desc; + tmp.bytes = data->bytes + off; + tmp.size = sz; + tmp.offset = off; + tmp.hash = iterative_hash (tmp.bytes, + sz * sizeof (target_unit), 0); + slot = htab->find_slot_with_hash (&tmp, tmp.hash, INSERT); + if (*slot == NULL) + { + *slot = data_pool.allocate (); + **slot = tmp; + } + } + } + } + delete htab; + XDELETE (bytes); + } + data_pool.release (); +} + /* Mark all constants that are used in the current function, then write out the function's private constant pool. */ @@ -4251,6 +4426,10 @@ output_constant_pool (const char *fnname void output_shared_constant_pool (void) { + if (optimize + && TARGET_SUPPORTS_ALIASES) + optimize_constant_pool (shared_constant_pool); + output_constant_pool_contents (shared_constant_pool); } Jakub