Hi, Changes from v1: - Add gather_scatter argument to support_vector_misalignment. - Don't rely on DR_BASE_ALIGNMENT. - Add IFN helpers and use them. - Add gather/scatter helper macros. - Clarify is_packed handling in docs.
This patch adds simple misalignment checks for gather/scatter operations. Previously, we assumed that those perform element accesses internally so alignment does not matter. The riscv vector spec however explicitly states that vector operations are allowed to fault on element-misaligned accesses. Reasonable uarchs won't, but... For gather/scatter we have two paths in the vectorizer: (1) Regular analysis based on datarefs. Here we can also create strided loads. (2) Non-affine access where each gather index is relative to the initial address. The assumption this patch works off is that once the alignment for the first scalar is correct, all others will fall in line, as the index is always a multiple of the first element's size. For (1) we have a dataref and can check it for alignment as in other cases. For (2) this patch checks the object alignment of BASE and compares it against the natural alignment of the current vectype's unit. The patch also adds a pointer argument to the gather/scatter IFNs that contains the necessary alignment. Most of the patch is thus mechanical in that it merely adjusts indices. I tested the riscv version with a custom qemu version that faults on element-misaligned vector accesses. With this patch applied, there is just a single fault left, which is due to PR120782 and which will be addressed separately. Bootstrapped and regtested on x86 and aarch64 and powerpc. Regtested on rv64gcv_zvl512b with and without unaligned vector support. Regards Robin gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_builtin_support_vector_misalignment): Return true for gather_scatter. * config/arm/arm.cc (arm_builtin_support_vector_misalignment): Ditto. * config/epiphany/epiphany.cc (epiphany_support_vector_misalignment): Ditto. * config/gcn/gcn.cc (gcn_vectorize_support_vector_misalignment): Ditto. * config/loongarch/loongarch.cc (loongarch_builtin_support_vector_misalignment): Ditto. * config/riscv/riscv.cc (riscv_support_vector_misalignment): Always support known aligned types. * config/rs6000/rs6000.cc (rs6000_builtin_support_vector_misalignment): Ditto. * config/s390/s390.cc (s390_support_vector_misalignment): Ditto. * internal-fn.cc (expand_scatter_store_optab_fn): Change argument numbers. (expand_gather_load_optab_fn): Ditto. (internal_fn_len_index): Ditto. (internal_fn_else_index): Ditto. (internal_fn_mask_index): Ditto. (internal_fn_stored_value_index): Ditto. (internal_fn_alias_ptr_index): New helper. (internal_fn_offset_index): Ditto. (internal_fn_scale_index): Ditto. (internal_gather_scatter_fn_supported_p): Ditto. * internal-fn.h (internal_fn_offset_index): Declare. (internal_fn_scale_index): Ditto. (internal_fn_alias_ptr_index): Ditto. * optabs-query.cc (supports_vec_gather_load_p): Ditto. * target.def: Add gather_scatter argument and adjust docs. * doc/tm.texi: Ditto. * targhooks.cc (default_builtin_support_vector_misalignment): Add gather_scatter argument. * targhooks.h (default_builtin_support_vector_misalignment): Ditto. * tree-vect-data-refs.cc (vect_describe_gather_scatter_call): Handle alias_ptr. (vect_check_gather_scatter): Compute and set alias_ptr. * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Ditto. * tree-vect-slp.cc (GATHER_SCATTER_OFFSET): Define. (vect_get_and_check_slp_defs): Use define. * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): Set alias_ptr. (get_group_load_store_type): Do not special-case gather/scatter. (get_load_store_type): Compute misalignment. (vectorizable_store): Remove alignment assert for scatter/gather. (vectorizable_load): Ditto. * tree-vectorizer.h (struct gather_scatter_info): Add alias_ptr. (GATHER_SCATTER_LEGACY_P): Define. (GATHER_SCATTER_IFN_P): Ditto. (GATHER_SCATTER_UNSUPPORTED_P): Ditto. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Fix riscv misalign supported check. --- gcc/config/aarch64/aarch64.cc | 12 ++- gcc/config/arm/arm.cc | 11 ++- gcc/config/epiphany/epiphany.cc | 8 +- gcc/config/gcn/gcn.cc | 5 +- gcc/config/loongarch/loongarch.cc | 8 +- gcc/config/riscv/riscv.cc | 29 +++++-- gcc/config/rs6000/rs6000.cc | 6 +- gcc/config/s390/s390.cc | 6 +- gcc/doc/tm.texi | 8 +- gcc/internal-fn.cc | 96 +++++++++++++++++++---- gcc/internal-fn.h | 3 + gcc/optabs-query.cc | 6 +- gcc/target.def | 13 +-- gcc/targhooks.cc | 2 + gcc/targhooks.h | 2 +- gcc/testsuite/lib/target-supports.exp | 2 +- gcc/tree-vect-data-refs.cc | 17 +++- gcc/tree-vect-patterns.cc | 17 ++-- gcc/tree-vect-slp.cc | 26 +++--- gcc/tree-vect-stmts.cc | 109 +++++++++++++++++--------- gcc/tree-vectorizer.h | 12 +++ 21 files changed, 291 insertions(+), 107 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index abbb97768f5..f5438f7cb1c 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -356,7 +356,8 @@ static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool); static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, - bool is_packed); + bool is_packed, + bool gather_scatter); static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64); static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, aarch64_addr_query_type); @@ -24319,10 +24320,14 @@ aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed) static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, - bool is_packed) + bool is_packed, + bool gather_scatter) { if (TARGET_SIMD && STRICT_ALIGNMENT) { + if (gather_scatter) + return true; + /* Return if movmisalign pattern is not supported for this mode. */ if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) return false; @@ -24332,7 +24337,8 @@ aarch64_builtin_support_vector_misalignment (machine_mode mode, return false; } return default_builtin_support_vector_misalignment (mode, type, misalignment, - is_packed); + is_packed, + gather_scatter); } /* If VALS is a vector constant that can be loaded into a register diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index bde06f3fa86..efa01fb8b8b 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -289,7 +289,8 @@ static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); static bool arm_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, - bool is_packed); + bool is_packed, + bool gather_scatter); static void arm_conditional_register_usage (void); static enum flt_eval_method arm_excess_precision (enum excess_precision_type); static reg_class_t arm_preferred_rename_class (reg_class_t rclass); @@ -30661,12 +30662,15 @@ arm_vector_alignment_reachable (const_tree type, bool is_packed) static bool arm_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, - bool is_packed) + bool is_packed, bool gather_scatter) { if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access) { HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type); + if (gather_scatter) + return true; + if (is_packed) return align == 1; @@ -30683,7 +30687,8 @@ arm_builtin_support_vector_misalignment (machine_mode mode, } return default_builtin_support_vector_misalignment (mode, type, misalignment, - is_packed); + is_packed, + gather_scatter); } static void diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc index 16626f85f7e..9d3419d70f7 100644 --- a/gcc/config/epiphany/epiphany.cc +++ b/gcc/config/epiphany/epiphany.cc @@ -2816,12 +2816,16 @@ epiphany_vector_alignment_reachable (const_tree type, bool is_packed) static bool epiphany_support_vector_misalignment (machine_mode mode, const_tree type, - int misalignment, bool is_packed) + int misalignment, bool is_packed, + bool gather_scatter) { + if (gather_scatter) + return true; if (GET_MODE_SIZE (mode) == 8 && misalignment % 4 == 0) return true; return default_builtin_support_vector_misalignment (mode, type, misalignment, - is_packed); + is_packed, + gather_scatter); } /* STRUCTURE_SIZE_BOUNDARY seems a bit crude in how it enlarges small diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 0ce5a29fbb5..f06ab0be35f 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -5315,8 +5315,11 @@ gcn_preferred_vector_alignment (const_tree type) static bool gcn_vectorize_support_vector_misalignment (machine_mode ARG_UNUSED (mode), const_tree type, int misalignment, - bool is_packed) + bool is_packed, bool gather_scatter) { + if (gather_scatter) + return true; + if (is_packed) return false; diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index f62e4163c71..8bd956a5274 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -11116,17 +11116,21 @@ static bool loongarch_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, - bool is_packed) + bool is_packed, + bool gather_scatter) { if ((ISA_HAS_LSX || ISA_HAS_LASX) && STRICT_ALIGNMENT) { + if (gather_scatter) + return true; if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) return false; if (misalignment == -1) return false; } return default_builtin_support_vector_misalignment (mode, type, misalignment, - is_packed); + is_packed, + gather_scatter); } /* Return a PARALLEL containing NELTS elements, with element I equal diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 9a958fc5f77..51a744d8992 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -12132,14 +12132,31 @@ riscv_estimated_poly_value (poly_int64 val, /* Return true if the vector misalignment factor is supported by the target. */ bool -riscv_support_vector_misalignment (machine_mode mode, - const_tree type ATTRIBUTE_UNUSED, - int misalignment, - bool is_packed ATTRIBUTE_UNUSED) +riscv_support_vector_misalignment (machine_mode mode, const_tree type, + int misalignment, bool is_packed, + bool gather_scatter) { - /* Depend on movmisalign pattern. */ + /* IS_PACKED is true if the corresponding scalar element is not naturally + aligned. If the misalignment is unknown and the the access is packed + we defer to the default hook which will check if movmisalign is present. + Movmisalign, in turn, depends on TARGET_VECTOR_MISALIGN_SUPPORTED. */ + if (misalignment == DR_MISALIGNMENT_UNKNOWN) + { + if (!is_packed) + return true; + } + else + { + /* If we know that misalignment is a multiple of the element size, we're + good. */ + if (misalignment % TYPE_ALIGN_UNIT (type) == 0) + return true; + } + + /* Otherwise fall back to movmisalign again. */ return default_builtin_support_vector_misalignment (mode, type, misalignment, - is_packed); + is_packed, + gather_scatter); } /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */ diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 7ee26e52b13..7cfe49c1aa0 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -4951,10 +4951,14 @@ static bool rs6000_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, - bool is_packed) + bool is_packed, + bool gather_scatter) { if (TARGET_VSX) { + if (gather_scatter) + return true; + if (TARGET_EFFICIENT_UNALIGNED_VSX) return true; diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 38267202f66..745a2c64b1e 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -17334,13 +17334,15 @@ static bool s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED, const_tree type ATTRIBUTE_UNUSED, int misalignment ATTRIBUTE_UNUSED, - bool is_packed ATTRIBUTE_UNUSED) + bool is_packed ATTRIBUTE_UNUSED, + bool gather_scatter ATTRIBUTE_UNUSED) { if (TARGET_VX) return true; return default_builtin_support_vector_misalignment (mode, type, misalignment, - is_packed); + is_packed, + gather_scatter); } /* The vector ABI requires vector types to be aligned on an 8 byte diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 5e305643b3a..757fc1d0350 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6382,12 +6382,14 @@ return type of the vectorized function shall be of vector type @var{vec_type_out} and the argument types should be @var{vec_type_in}. @end deftypefn -@deftypefn {Target Hook} bool TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT (machine_mode @var{mode}, const_tree @var{type}, int @var{misalignment}, bool @var{is_packed}) +@deftypefn {Target Hook} bool TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT (machine_mode @var{mode}, const_tree @var{type}, int @var{misalignment}, bool @var{is_packed}, bool @var{gather_scatter}) This hook should return true if the target supports misaligned vector store/load of a specific factor denoted in the @var{misalignment} parameter. The vector store/load should be of machine mode @var{mode} and -the elements in the vectors should be of type @var{type}. @var{is_packed} -parameter is true if the memory access is defined in a packed struct. +the elements in the vectors should be of type @var{type}. The +@var{is_packed} parameter is true if the misalignment is unknown and the +memory access is defined in a packed struct. @var{gather_scatter} is true +if the load/store is a gather or scatter. @end deftypefn @deftypefn {Target Hook} machine_mode TARGET_VECTORIZE_PREFERRED_SIMD_MODE (scalar_mode @var{mode}) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 3f4ac937367..f620691f7dd 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -3654,8 +3654,8 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab) internal_fn ifn = gimple_call_internal_fn (stmt); int rhs_index = internal_fn_stored_value_index (ifn); tree base = gimple_call_arg (stmt, 0); - tree offset = gimple_call_arg (stmt, 1); - tree scale = gimple_call_arg (stmt, 2); + tree offset = gimple_call_arg (stmt, internal_fn_offset_index (ifn)); + tree scale = gimple_call_arg (stmt, internal_fn_scale_index (ifn)); tree rhs = gimple_call_arg (stmt, rhs_index); rtx base_rtx = expand_normal (base); @@ -3680,12 +3680,12 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab) /* Expand {MASK_,}GATHER_LOAD call CALL using optab OPTAB. */ static void -expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab) +expand_gather_load_optab_fn (internal_fn ifn, gcall *stmt, direct_optab optab) { tree lhs = gimple_call_lhs (stmt); tree base = gimple_call_arg (stmt, 0); - tree offset = gimple_call_arg (stmt, 1); - tree scale = gimple_call_arg (stmt, 2); + tree offset = gimple_call_arg (stmt, internal_fn_offset_index (ifn)); + tree scale = gimple_call_arg (stmt, internal_fn_scale_index (ifn)); rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); rtx base_rtx = expand_normal (base); @@ -4936,11 +4936,13 @@ internal_fn_len_index (internal_fn fn) return 2; case IFN_MASK_LEN_SCATTER_STORE: + return 6; + case IFN_MASK_LEN_STRIDED_LOAD: return 5; case IFN_MASK_LEN_GATHER_LOAD: - return 6; + return 7; case IFN_COND_LEN_FMA: case IFN_COND_LEN_FMS: @@ -5044,7 +5046,7 @@ internal_fn_else_index (internal_fn fn) case IFN_MASK_GATHER_LOAD: case IFN_MASK_LEN_GATHER_LOAD: - return 5; + return 6; default: return -1; @@ -5079,7 +5081,7 @@ internal_fn_mask_index (internal_fn fn) case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_GATHER_LOAD: case IFN_MASK_LEN_SCATTER_STORE: - return 4; + return 5; case IFN_VCOND_MASK: case IFN_VCOND_MASK_LEN: @@ -5104,10 +5106,11 @@ internal_fn_stored_value_index (internal_fn fn) case IFN_MASK_STORE: case IFN_MASK_STORE_LANES: + return 3; case IFN_SCATTER_STORE: case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_SCATTER_STORE: - return 3; + return 4; case IFN_LEN_STORE: return 4; @@ -5121,6 +5124,75 @@ internal_fn_stored_value_index (internal_fn fn) } } +/* If FN has an alias pointer return its index, otherwise return -1. */ + +int +internal_fn_alias_ptr_index (internal_fn fn) +{ + switch (fn) + { + case IFN_MASK_LOAD: + case IFN_MASK_LEN_LOAD: + case IFN_GATHER_LOAD: + case IFN_MASK_GATHER_LOAD: + case IFN_MASK_LEN_GATHER_LOAD: + case IFN_SCATTER_STORE: + case IFN_MASK_SCATTER_STORE: + case IFN_MASK_LEN_SCATTER_STORE: + return 1; + + default: + return -1; + } +} + +/* If FN is a gather/scatter return the index of its offset argument, + otherwise return -1. */ + +int +internal_fn_offset_index (internal_fn fn) +{ + if (!internal_gather_scatter_fn_p (fn)) + return -1; + + switch (fn) + { + case IFN_GATHER_LOAD: + case IFN_MASK_GATHER_LOAD: + case IFN_MASK_LEN_GATHER_LOAD: + case IFN_SCATTER_STORE: + case IFN_MASK_SCATTER_STORE: + case IFN_MASK_LEN_SCATTER_STORE: + return 2; + + default: + return -1; + } +} + +/* If FN is a gather/scatter return the index of its scale argument, + otherwise return -1. */ + +int +internal_fn_scale_index (internal_fn fn) +{ + if (!internal_gather_scatter_fn_p (fn)) + return -1; + + switch (fn) + { + case IFN_GATHER_LOAD: + case IFN_MASK_GATHER_LOAD: + case IFN_MASK_LEN_GATHER_LOAD: + case IFN_SCATTER_STORE: + case IFN_MASK_SCATTER_STORE: + case IFN_MASK_LEN_SCATTER_STORE: + return 3; + + default: + return -1; + } +} /* Store all supported else values for the optab referred to by ICODE in ELSE_VALS. The index of the else operand must be specified in @@ -5199,13 +5271,9 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type, && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p)) && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)); - /* For gather the optab's operand indices do not match the IFN's because - the latter does not have the extension operand (operand 3). It is - implicitly added during expansion so we use the IFN's else index + 1. - */ if (ok && elsvals) get_supported_else_vals - (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals); + (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals); return ok; } diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index afd4f8e64c7..d190d718240 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -239,6 +239,9 @@ extern int internal_fn_mask_index (internal_fn); extern int internal_fn_len_index (internal_fn); extern int internal_fn_else_index (internal_fn); extern int internal_fn_stored_value_index (internal_fn); +extern int internal_fn_offset_index (internal_fn fn); +extern int internal_fn_scale_index (internal_fn fn); +extern int internal_fn_alias_ptr_index (internal_fn fn); extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, tree, tree, int, vec<int> * = nullptr); diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc index f5ca98da818..5335d0d8401 100644 --- a/gcc/optabs-query.cc +++ b/gcc/optabs-query.cc @@ -719,13 +719,9 @@ supports_vec_gather_load_p (machine_mode mode, vec<int> *elsvals) = (icode != CODE_FOR_nothing) ? 1 : -1; } - /* For gather the optab's operand indices do not match the IFN's because - the latter does not have the extension operand (operand 3). It is - implicitly added during expansion so we use the IFN's else index + 1. - */ if (elsvals && icode != CODE_FOR_nothing) get_supported_else_vals - (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals); + (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals); return this_fn_optabs->supports_vec_gather_load[mode] > 0; } diff --git a/gcc/target.def b/gcc/target.def index 38903eb567a..c3d0732c8b6 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1918,17 +1918,20 @@ implementation approaches itself.", NULL) /* Return true if the target supports misaligned store/load of a - specific factor denoted in the third parameter. The last parameter - is true if the access is defined in a packed struct. */ + specific factor denoted in the third parameter. The second to the last + parameter is true if the access is defined in a packed struct. */ DEFHOOK (support_vector_misalignment, "This hook should return true if the target supports misaligned vector\n\ store/load of a specific factor denoted in the @var{misalignment}\n\ parameter. The vector store/load should be of machine mode @var{mode} and\n\ -the elements in the vectors should be of type @var{type}. @var{is_packed}\n\ -parameter is true if the memory access is defined in a packed struct.", +the elements in the vectors should be of type @var{type}. The\n\ +@var{is_packed} parameter is true if the misalignment is unknown and the\n\ +memory access is defined in a packed struct. @var{gather_scatter} is true\n\ +if the load/store is a gather or scatter.", bool, - (machine_mode mode, const_tree type, int misalignment, bool is_packed), + (machine_mode mode, const_tree type, int misalignment, bool is_packed, + bool gather_scatter), default_builtin_support_vector_misalignment) /* Returns the preferred mode for SIMD operations for the specified diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc index c79458e374e..d2426d8fd16 100644 --- a/gcc/targhooks.cc +++ b/gcc/targhooks.cc @@ -1556,6 +1556,8 @@ default_builtin_support_vector_misalignment (machine_mode mode, int misalignment ATTRIBUTE_UNUSED, bool is_packed + ATTRIBUTE_UNUSED, + bool gather_scatter ATTRIBUTE_UNUSED) { if (optab_handler (movmisalign_optab, mode) != CODE_FOR_nothing) diff --git a/gcc/targhooks.h b/gcc/targhooks.h index f16b58798c2..3fa20af0e11 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -114,7 +114,7 @@ extern bool default_builtin_vector_alignment_reachable (const_tree, bool); extern bool default_builtin_support_vector_misalignment (machine_mode mode, const_tree, - int, bool); + int, bool, bool); extern machine_mode default_preferred_simd_mode (scalar_mode mode); extern machine_mode default_split_reduction (machine_mode); extern unsigned int default_autovectorize_vector_modes (vector_modes *, bool); diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 82e5c31e499..bac85d89b19 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2428,7 +2428,7 @@ proc check_effective_target_riscv_v_misalign_ok { } { = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; asm ("vsetivli zero,7,e8,m1,ta,ma"); asm ("addi a7,%0,1" : : "r" (a) : "a7" ); - asm ("vle8.v v8,0(a7)" : : : "v8"); + asm ("vle16.v v8,0(a7)" : : : "v8"); return 0; } } "-march=${gcc_march}"] } { return 1 } diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index ee040eb9888..3642123ff85 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -4531,10 +4531,14 @@ vect_describe_gather_scatter_call (stmt_vec_info stmt_info, info->ifn = gimple_call_internal_fn (call); info->decl = NULL_TREE; info->base = gimple_call_arg (call, 0); - info->offset = gimple_call_arg (call, 1); + info->alias_ptr = gimple_call_arg + (call, internal_fn_alias_ptr_index (info->ifn)); + info->offset = gimple_call_arg + (call, internal_fn_offset_index (info->ifn)); info->offset_dt = vect_unknown_def_type; info->offset_vectype = NULL_TREE; - info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); + info->scale = TREE_INT_CST_LOW (gimple_call_arg + (call, internal_fn_scale_index (info->ifn))); info->element_type = TREE_TYPE (vectype); info->memory_type = TREE_TYPE (DR_REF (dr)); } @@ -4859,6 +4863,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, info->ifn = ifn; info->decl = decl; info->base = base; + + info->alias_ptr = build_int_cst + (reference_alias_ptr_type (DR_REF (dr)), + get_object_alignment (DR_REF (dr))); + info->offset = off; info->offset_dt = vect_unknown_def_type; info->offset_vectype = offset_vectype; @@ -7476,11 +7485,11 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info, } bool is_packed = false; - tree type = TREE_TYPE (DR_REF (dr)); if (misalignment == DR_MISALIGNMENT_UNKNOWN) is_packed = not_size_aligned (DR_REF (dr)); + tree type = TREE_TYPE (DR_REF (dr)); if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment, - is_packed)) + is_packed, false)) return dr_unaligned_supported; /* Unsupported. */ diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 0f6d6b77ea1..f0ddbf9660c 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -6042,12 +6042,14 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, tree vec_els = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype)); - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base, + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base, + gs_info.alias_ptr, offset, scale, zero, mask, vec_els); } else - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base, + gs_info.alias_ptr, offset, scale, zero); tree lhs = gimple_get_lhs (stmt_info->stmt); tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); @@ -6057,12 +6059,13 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, { tree rhs = vect_get_store_rhs (stmt_info); if (mask != NULL) - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, - base, offset, scale, rhs, - mask); + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, + base, gs_info.alias_ptr, + offset, scale, rhs, mask); else - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, - base, offset, scale, rhs); + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, + base, gs_info.alias_ptr, + offset, scale, rhs); } gimple_call_set_nothrow (pattern_stmt, true); diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 1a703a9bae4..81fc3e1a5a1 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -507,19 +507,21 @@ vect_def_types_match (enum vect_def_type dta, enum vect_def_type dtb) && (dtb == vect_external_def || dtb == vect_constant_def))); } +#define GATHER_SCATTER_OFFSET (-3) + static const int no_arg_map[] = { 0 }; static const int arg0_map[] = { 1, 0 }; -static const int arg1_map[] = { 1, 1 }; +static const int arg2_map[] = { 1, 2 }; static const int arg2_arg3_map[] = { 2, 2, 3 }; -static const int arg1_arg3_map[] = { 2, 1, 3 }; -static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 }; -static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 }; +static const int arg1_arg3_map[] = { 2, 2, 4 }; +static const int arg2_arg5_arg6_map[] = { 3, 2, 5, 6 }; +static const int arg2_arg4_arg5_map[] = { 3, 2, 4, 5 }; static const int arg3_arg2_map[] = { 2, 3, 2 }; static const int op1_op0_map[] = { 2, 1, 0 }; -static const int off_map[] = { 1, -3 }; -static const int off_op0_map[] = { 2, -3, 0 }; -static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 }; -static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 }; +static const int off_map[] = { 1, GATHER_SCATTER_OFFSET }; +static const int off_op0_map[] = { 2, GATHER_SCATTER_OFFSET, 0 }; +static const int off_arg2_arg3_map[] = { 3, GATHER_SCATTER_OFFSET, 2, 3 }; +static const int off_arg3_arg2_map[] = { 3, GATHER_SCATTER_OFFSET, 3, 2 }; static const int mask_call_maps[6][7] = { { 1, 1, }, { 2, 1, 2, }, @@ -568,18 +570,18 @@ vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false, return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map; case IFN_GATHER_LOAD: - return arg1_map; + return arg2_map; case IFN_MASK_GATHER_LOAD: case IFN_MASK_LEN_GATHER_LOAD: - return arg1_arg4_arg5_map; + return arg2_arg5_arg6_map; case IFN_SCATTER_STORE: return arg1_arg3_map; case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_SCATTER_STORE: - return arg1_arg3_arg4_map; + return arg2_arg4_arg5_map; case IFN_MASK_STORE: return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map; @@ -691,7 +693,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, { oprnd_info = (*oprnds_info)[i]; int opno = map ? map[i] : int (i); - if (opno == -3) + if (opno == GATHER_SCATTER_OFFSET) { gcc_assert (STMT_VINFO_GATHER_SCATTER_P (stmt_info)); if (!is_a <loop_vec_info> (vinfo) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 69f5f6758a1..4c044683655 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1803,6 +1803,9 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET, but we don't need to store that here. */ gs_info->base = NULL_TREE; + gs_info->alias_ptr = build_int_cst + (reference_alias_ptr_type (DR_REF (dr)), + get_object_alignment (DR_REF (dr))); gs_info->element_type = TREE_TYPE (vectype); gs_info->offset = fold_convert (offset_type, step); gs_info->offset_dt = vect_constant_def; @@ -2360,8 +2363,9 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) *poffset = neg_ldst_offset; - if (*memory_access_type == VMAT_GATHER_SCATTER - || *memory_access_type == VMAT_ELEMENTWISE + if (*memory_access_type == VMAT_ELEMENTWISE + || (*memory_access_type == VMAT_GATHER_SCATTER + && GATHER_SCATTER_LEGACY_P (*gs_info)) || *memory_access_type == VMAT_STRIDED_SLP || *memory_access_type == VMAT_INVARIANT) { @@ -2476,7 +2480,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, vls_type == VLS_LOAD ? "gather" : "scatter"); return false; } - else if (gs_info->ifn == IFN_LAST && !gs_info->decl) + else if (GATHER_SCATTER_UNSUPPORTED_P (*gs_info)) { if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant () @@ -2492,9 +2496,36 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, return false; } } - /* Gather-scatter accesses perform only component accesses, alignment - is irrelevant for them. */ - *alignment_support_scheme = dr_unaligned_supported; + + /* Gather-scatter accesses normally perform only component accesses so + alignment is irrelevant for them. Targets like riscv do care about + scalar alignment in vector accesses, though, so check scalar + alignment here. We determined the alias pointer as well as the base + alignment during pattern recognition and can re-use it here. + + As we do not have a dataref we only know the alignment of the + base. For now don't try harder to determine misalignment and just + assume it is unknown. We consider the type packed if its scalar + alignment is lower than the natural alignment of a vector + element's type. */ + + tree inner_vectype = TREE_TYPE (vectype); + + unsigned HOST_WIDE_INT scalar_align + = tree_to_uhwi (gs_info->alias_ptr); + unsigned HOST_WIDE_INT inner_vectype_sz + = tree_to_uhwi (TYPE_SIZE (inner_vectype)); + + bool is_misaligned = scalar_align < inner_vectype_sz; + bool is_packed = scalar_align > 1 && is_misaligned; + + *misalignment = DR_MISALIGNMENT_UNKNOWN; + + if (targetm.vectorize.support_vector_misalignment + (TYPE_MODE (vectype), inner_vectype, *misalignment, is_packed, true)) + *alignment_support_scheme = dr_unaligned_supported; + else + *alignment_support_scheme = dr_unaligned_unsupported; } else if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node, masked_p, @@ -2535,17 +2566,18 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, "alignment. With non-contiguous memory vectorization" " could read out of bounds at %G ", STMT_VINFO_STMT (stmt_info)); - if (inbounds) - LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; - else - return false; + if (inbounds) + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; + else + return false; } /* If this DR needs alignment for correctness, we must ensure the target alignment is a constant power-of-two multiple of the amount read per vector iteration or force masking. */ if (dr_safe_speculative_read_required (stmt_info) - && *alignment_support_scheme == dr_aligned) + && (*alignment_support_scheme == dr_aligned + && *memory_access_type != VMAT_GATHER_SCATTER)) { /* We can only peel for loops, of course. */ gcc_checking_assert (loop_vinfo); @@ -8375,7 +8407,8 @@ vectorizable_store (vec_info *vinfo, } else if (memory_access_type != VMAT_LOAD_STORE_LANES && (memory_access_type != VMAT_GATHER_SCATTER - || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype)))) + || (GATHER_SCATTER_LEGACY_P (gs_info) + && !VECTOR_BOOLEAN_TYPE_P (mask_vectype)))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -8383,8 +8416,7 @@ vectorizable_store (vec_info *vinfo, return false; } else if (memory_access_type == VMAT_GATHER_SCATTER - && gs_info.ifn == IFN_LAST - && !gs_info.decl) + && GATHER_SCATTER_UNSUPPORTED_P (gs_info)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -8449,7 +8481,6 @@ vectorizable_store (vec_info *vinfo, if (dump_enabled_p () && memory_access_type != VMAT_ELEMENTWISE - && memory_access_type != VMAT_GATHER_SCATTER && memory_access_type != VMAT_STRIDED_SLP && memory_access_type != VMAT_INVARIANT && alignment_support_scheme != dr_aligned) @@ -9110,7 +9141,7 @@ vectorizable_store (vec_info *vinfo, final_mask, vec_mask, gsi); } - if (gs_info.ifn != IFN_LAST) + if (GATHER_SCATTER_IFN_P (gs_info)) { if (costing_p) { @@ -9150,30 +9181,37 @@ vectorizable_store (vec_info *vinfo, { if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) call = gimple_build_call_internal ( - IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr, + IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr, + gs_info.alias_ptr, vec_offset, scale, vec_oprnd, final_mask, final_len, bias); else /* Non-vector offset indicates that prefer to take MASK_LEN_STRIDED_STORE instead of the - IFN_MASK_SCATTER_STORE with direct stride arg. */ + IFN_MASK_SCATTER_STORE with direct stride arg. + Similar to the gather case we have checked the + alignment for a scatter already and assume + that the strided store has the same requirements. */ call = gimple_build_call_internal ( IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr, vec_offset, vec_oprnd, final_mask, final_len, bias); } else if (final_mask) call = gimple_build_call_internal - (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, + (IFN_MASK_SCATTER_STORE, 6, dataref_ptr, + gs_info.alias_ptr, vec_offset, scale, vec_oprnd, final_mask); else - call = gimple_build_call_internal (IFN_SCATTER_STORE, 4, - dataref_ptr, vec_offset, + call = gimple_build_call_internal (IFN_SCATTER_STORE, 5, + dataref_ptr, + gs_info.alias_ptr, + vec_offset, scale, vec_oprnd); gimple_call_set_nothrow (call, true); vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); new_stmt = call; } - else if (gs_info.decl) + else if (GATHER_SCATTER_LEGACY_P (gs_info)) { /* The builtin decls path for scatter is legacy, x86 only. */ gcc_assert (nunits.is_constant () @@ -10083,8 +10121,7 @@ vectorizable_load (vec_info *vinfo, return false; } else if (memory_access_type == VMAT_GATHER_SCATTER - && gs_info.ifn == IFN_LAST - && !gs_info.decl) + && GATHER_SCATTER_UNSUPPORTED_P (gs_info)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -10634,7 +10671,6 @@ vectorizable_load (vec_info *vinfo, vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); } - gcc_assert (alignment_support_scheme); vec_loop_masks *loop_masks = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) ? &LOOP_VINFO_MASKS (loop_vinfo) @@ -10654,10 +10690,12 @@ vectorizable_load (vec_info *vinfo, /* Targets with store-lane instructions must not require explicit realignment. vect_supportable_dr_alignment always returns either - dr_aligned or dr_unaligned_supported for masked operations. */ + dr_aligned or dr_unaligned_supported for (non-length) masked + operations. */ gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask && !loop_masks) + || memory_access_type == VMAT_GATHER_SCATTER || alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); @@ -11002,8 +11040,6 @@ vectorizable_load (vec_info *vinfo, if (memory_access_type == VMAT_GATHER_SCATTER) { - gcc_assert (alignment_support_scheme == dr_aligned - || alignment_support_scheme == dr_unaligned_supported); gcc_assert (!grouped_load && !slp_perm); unsigned int inside_cost = 0, prologue_cost = 0; @@ -11046,7 +11082,7 @@ vectorizable_load (vec_info *vinfo, /* 2. Create the vector-load in the loop. */ unsigned HOST_WIDE_INT align; - if (gs_info.ifn != IFN_LAST) + if (GATHER_SCATTER_IFN_P (gs_info)) { if (costing_p) { @@ -11092,7 +11128,8 @@ vectorizable_load (vec_info *vinfo, { if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, - 8, dataref_ptr, + 9, dataref_ptr, + gs_info.alias_ptr, vec_offset, scale, zero, final_mask, vec_els, final_len, bias); @@ -11107,18 +11144,20 @@ vectorizable_load (vec_info *vinfo, } else if (final_mask) call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, - 6, dataref_ptr, + 7, dataref_ptr, + gs_info.alias_ptr, vec_offset, scale, zero, final_mask, vec_els); else - call = gimple_build_call_internal (IFN_GATHER_LOAD, 4, - dataref_ptr, vec_offset, - scale, zero); + call = gimple_build_call_internal (IFN_GATHER_LOAD, 5, + dataref_ptr, + gs_info.alias_ptr, + vec_offset, scale, zero); gimple_call_set_nothrow (call, true); new_stmt = call; data_ref = NULL_TREE; } - else if (gs_info.decl) + else if (GATHER_SCATTER_LEGACY_P (gs_info)) { /* The builtin decls path for gather is legacy, x86 only. */ gcc_assert (!final_len && nunits.is_constant ()); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 66a29648fb4..35615daf7a7 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1557,6 +1557,10 @@ struct gather_scatter_info { /* The loop-invariant base value. */ tree base; + /* The TBBA alias pointer the value of which determines the alignment + of the scalar accesses. */ + tree alias_ptr; + /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ tree offset; @@ -1655,6 +1659,14 @@ struct gather_scatter_info { #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) #define STMT_SLP_TYPE(S) (S)->slp_type +#define GATHER_SCATTER_LEGACY_P(info) ((info).decl != NULL_TREE \ + && (info).ifn == IFN_LAST) +#define GATHER_SCATTER_IFN_P(info) ((info).decl == NULL_TREE \ + && (info).ifn != IFN_LAST) +#define GATHER_SCATTER_UNSUPPORTED_P(info) ((info).decl == NULL_TREE \ + && (info).ifn == IFN_LAST) + + /* Contains the scalar or vector costs for a vec_info. */ class vector_costs { -- 2.49.0