> On 30 Aug 2024, at 14:05, Richard Sandiford <richard.sandif...@arm.com> wrote: > > External email: Use caution opening links or attachments > > > Jennifer Schmitz <jschm...@nvidia.com> writes: >> This patch implements constant folding of binary operations for SVE >> intrinsics >> by calling the constant-folding mechanism of the middle-end for a given >> tree_code. >> In fold-const.cc, the code for folding vector constants was moved from >> const_binop to a new function vector_const_binop. This function takes a >> function pointer as argument specifying how to fold the vector elements. >> The code for folding operations where the first operand is a vector >> constant and the second argument is an integer constant was also moved >> into vector_const_binop to fold binary SVE intrinsics where the second >> operand is an integer (_n). >> In the aarch64 backend, the new function aarch64_const_binop was >> created, which - in contrast to int_const_binop - does not treat operations >> as >> overflowing. This function is passed as callback to vector_const_binop >> during gimple folding in intrinsic implementations. >> Because aarch64_const_binop calls poly_int_binop, the latter was made public. >> >> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. >> OK for mainline? >> >> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> >> >> gcc/ >> * config/aarch64/aarch64-sve-builtins.cc (aarch64_const_binop): >> New function to fold binary SVE intrinsics without overflow. >> * config/aarch64/aarch64-sve-builtins.h: Declare aarch64_const_binop. >> * fold-const.h: Declare vector_const_binop. >> * fold-const.cc (const_binop): Remove cases for vector constants. >> (vector_const_binop): New function that folds vector constants >> element-wise. >> (int_const_binop): Remove call to wide_int_binop. >> (poly_int_binop): Add call to wide_int_binop. >> >> From 2a773d8289b5ec5ab2f2e0d03cbaa35b48bc44b2 Mon Sep 17 00:00:00 2001 >> From: Jennifer Schmitz <jschm...@nvidia.com> >> Date: Thu, 29 Aug 2024 04:35:49 -0700 >> Subject: [PATCH 1/3] SVE intrinsics: Fold constant operands. >> >> This patch implements constant folding of binary operations for SVE >> intrinsics >> by calling the constant-folding mechanism of the middle-end for a given >> tree_code. >> In fold-const.cc, the code for folding vector constants was moved from >> const_binop to a new function vector_const_binop. This function takes a >> function pointer as argument specifying how to fold the vector elements. >> The code for folding operations where the first operand is a vector >> constant and the second argument is an integer constant was also moved >> into vector_const_binop to fold binary SVE intrinsics where the second >> operand is an integer (_n). >> In the aarch64 backend, the new function aarch64_const_binop was >> created, which - in contrast to int_const_binop - does not treat operations >> as >> overflowing. This function is passed as callback to vector_const_binop >> during gimple folding in intrinsic implementations. >> Because aarch64_const_binop calls poly_int_binop, the latter was made public. >> >> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. >> OK for mainline? >> >> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> >> >> gcc/ >> * config/aarch64/aarch64-sve-builtins.cc (aarch64_const_binop): >> New function to fold binary SVE intrinsics without overflow. >> * config/aarch64/aarch64-sve-builtins.h: Declare aarch64_const_binop. >> * fold-const.h: Declare vector_const_binop. >> * fold-const.cc (const_binop): Remove cases for vector constants. >> (vector_const_binop): New function that folds vector constants >> element-wise. >> (int_const_binop): Remove call to wide_int_binop. >> (poly_int_binop): Add call to wide_int_binop. >> --- >> gcc/config/aarch64/aarch64-sve-builtins.cc | 20 +++ >> gcc/config/aarch64/aarch64-sve-builtins.h | 1 + >> gcc/fold-const.cc | 192 +++++++++++---------- >> gcc/fold-const.h | 5 + >> 4 files changed, 131 insertions(+), 87 deletions(-) >> >> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc >> b/gcc/config/aarch64/aarch64-sve-builtins.cc >> index 5ca9ec32b69..315d5ac4177 100644 >> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc >> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc >> @@ -3432,6 +3432,26 @@ is_ptrue (tree v, unsigned int step) >> && vector_cst_all_same (v, step)); >> } >> >> +/* Try to fold constant arguments arg1 and arg2 using the given tree_code. >> + Operations are not treated as overflowing. */ >> +tree >> +aarch64_const_binop (enum tree_code code, tree arg1, tree arg2) >> +{ >> + if (poly_int_tree_p (arg1) && poly_int_tree_p (arg2)) >> + { >> + poly_wide_int poly_res; >> + tree type = TREE_TYPE (arg1); >> + signop sign = TYPE_SIGN (type); >> + wi::overflow_type overflow = wi::OVF_NONE; >> + >> + if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow)) >> + return NULL_TREE; >> + return force_fit_type (type, poly_res, false, >> + TREE_OVERFLOW (arg1) | TREE_OVERFLOW (arg2)); >> + } >> + return NULL_TREE; >> +} >> + > > This is ok, but for staging purposes, I think it should be part of > the later patches that call it, with the function marked "static”. I moved the function into patch 2/3 and made it static. > >> gimple_folder::gimple_folder (const function_instance &instance, tree fndecl, >> gimple_stmt_iterator *gsi_in, gcall *call_in) >> : function_call_info (gimple_location (call_in), instance, fndecl), >> @@ -1677,82 +1763,14 @@ const_binop (enum tree_code code, tree arg1, tree >> arg2) >> && (simplified = simplify_const_binop (code, arg2, arg1, 1))) >> return simplified; >> >> - if (TREE_CODE (arg1) == VECTOR_CST >> - && TREE_CODE (arg2) == VECTOR_CST >> - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), >> - TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2)))) >> - { >> - tree type = TREE_TYPE (arg1); >> - bool step_ok_p; >> - if (VECTOR_CST_STEPPED_P (arg1) >> - && VECTOR_CST_STEPPED_P (arg2)) >> - /* We can operate directly on the encoding if: >> - >> - a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1 >> - implies >> - (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1) >> - >> - Addition and subtraction are the supported operators >> - for which this is true. */ >> - step_ok_p = (code == PLUS_EXPR || code == MINUS_EXPR); >> - else if (VECTOR_CST_STEPPED_P (arg1)) >> - /* We can operate directly on stepped encodings if: >> - >> - a3 - a2 == a2 - a1 >> - implies: >> - (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c) >> - >> - which is true if (x -> x op c) distributes over addition. */ >> - step_ok_p = distributes_over_addition_p (code, 1); >> - else >> - /* Similarly in reverse. */ >> - step_ok_p = distributes_over_addition_p (code, 2); >> - tree_vector_builder elts; >> - if (!elts.new_binary_operation (type, arg1, arg2, step_ok_p)) >> - return NULL_TREE; >> - unsigned int count = elts.encoded_nelts (); >> - for (unsigned int i = 0; i < count; ++i) >> - { >> - tree elem1 = VECTOR_CST_ELT (arg1, i); >> - tree elem2 = VECTOR_CST_ELT (arg2, i); >> - >> - tree elt = const_binop (code, elem1, elem2); >> + if ((TREE_CODE (arg1) == VECTOR_CST >> + && TREE_CODE (arg2) == VECTOR_CST >> + && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), >> + TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2)))) >> + || (TREE_CODE (arg1) == VECTOR_CST >> + && TREE_CODE (arg2) == INTEGER_CST)) >> + return vector_const_binop (code, arg1, arg2, const_binop); > > Rather than do: > > if ((TREE_CODE (arg1) == VECTOR_CST > && TREE_CODE (arg2) == VECTOR_CST > && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), > TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2)))) > || (TREE_CODE (arg1) == VECTOR_CST > && TREE_CODE (arg2) == INTEGER_CST)) > return vector_const_binop (code, arg1, arg2, const_binop); > > return NULL_TREE; > > I think we should just tail-call vector_const_binop unconditionally: > > return vector_const_binop (code, arg1, arg2, const_binop); > > That should be more efficient, and also protects against any risk > that the conditions would get out of sync. > > OK for the target-independent bits with that change, if there is > no objection by this time on Monday. > > Thanks, > Richard Done. I also moved
&& known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2)))) to vector_const_binop. The patch was bootstrapped and tested again, no regression. Best, Jennifer
0001-SVE-intrinsics-Refactor-const_binop-to-allow-constan.patch
Description: Binary data
> >> >> - /* It is possible that const_binop cannot handle the given >> - code and return NULL_TREE */ >> - if (elt == NULL_TREE) >> - return NULL_TREE; >> - elts.quick_push (elt); >> - } >> - >> - return elts.build (); >> - } >> - >> - /* Shifts allow a scalar offset for a vector. */ >> - if (TREE_CODE (arg1) == VECTOR_CST >> - && TREE_CODE (arg2) == INTEGER_CST) >> - { >> - tree type = TREE_TYPE (arg1); >> - bool step_ok_p = distributes_over_addition_p (code, 1); >> - tree_vector_builder elts; >> - if (!elts.new_unary_operation (type, arg1, step_ok_p)) >> - return NULL_TREE; >> - unsigned int count = elts.encoded_nelts (); >> - for (unsigned int i = 0; i < count; ++i) >> - { >> - tree elem1 = VECTOR_CST_ELT (arg1, i); >> - >> - tree elt = const_binop (code, elem1, arg2); >> - >> - /* It is possible that const_binop cannot handle the given >> - code and return NULL_TREE. */ >> - if (elt == NULL_TREE) >> - return NULL_TREE; >> - elts.quick_push (elt); >> - } >> - >> - return elts.build (); >> - } >> return NULL_TREE; >> } >> >> diff --git a/gcc/fold-const.h b/gcc/fold-const.h >> index b82ef137e2f..3e3998b57b0 100644 >> --- a/gcc/fold-const.h >> +++ b/gcc/fold-const.h >> @@ -126,6 +126,9 @@ extern tree fold_vec_perm (tree, tree, tree, const >> vec_perm_indices &); >> extern bool wide_int_binop (wide_int &res, enum tree_code, >> const wide_int &arg1, const wide_int &arg2, >> signop, wi::overflow_type *); >> +extern bool poly_int_binop (poly_wide_int &res, enum tree_code, >> + const_tree, const_tree, signop, >> + wi::overflow_type *); >> extern tree int_const_binop (enum tree_code, const_tree, const_tree, int = >> 1); >> #define build_fold_addr_expr(T)\ >> build_fold_addr_expr_loc (UNKNOWN_LOCATION, (T)) >> @@ -218,6 +221,8 @@ extern bool simple_condition_p (tree); >> extern tree exact_inverse (tree, tree); >> extern bool expr_not_equal_to (tree t, const wide_int &); >> extern tree const_unop (enum tree_code, tree, tree); >> +extern tree vector_const_binop (enum tree_code, tree, tree, >> + tree (*) (enum tree_code, tree, tree)); >> extern tree const_binop (enum tree_code, tree, tree, tree); >> extern bool negate_mathfn_p (combined_fn); >> extern const char *getbyterep (tree, unsigned HOST_WIDE_INT *);
smime.p7s
Description: S/MIME cryptographic signature