Hi, This patch is fixing an issue about parameter accessing if the parameter is struct type and passed through integer registers, and there is floating member is accessed. Like below code:
typedef struct DF {double a[4]; long l; } DF; double foo_df (DF arg){return arg.a[3];} On ppc64le, with trunk gcc, "std 6,-24(1) ; lfd 1,-24(1)" is generated. While instruction "mtvsrd 1, 6" would be enough for this case. This patch updates the behavior when loading floating members of a parameter: if that floating member is stored via integer register, then loading it as integer mode first, and converting it to floating mode. Compare with previous patch: https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608872.html Previous version supports converion from DImode to DF/SF, this version also supports conversion from DImode to SI/HI/QI modes. I also tried to enhance CSE/DSE for this issue. But because the limitations (e.g. CSE does not like new pseudo, DSE is not good at cross-blocks), some cases (as this patch) can not be handled. Bootstrap and regtest passes on ppc64{,le}. Is this ok for trunk? Thanks for comments! BR, Jeff (Jiufu) PR target/108073 gcc/ChangeLog: * expr.cc (extract_subreg_from_loading_word): New function. (expand_expr_real_1): Call extract_subreg_from_loading_word. gcc/testsuite/ChangeLog: * g++.target/powerpc/pr102024.C: Updated. * gcc.target/powerpc/pr108073.c: New test. --- gcc/expr.cc | 76 +++++++++++++++++++++ gcc/testsuite/g++.target/powerpc/pr102024.C | 2 +- gcc/testsuite/gcc.target/powerpc/pr108073.c | 30 ++++++++ 3 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c diff --git a/gcc/expr.cc b/gcc/expr.cc index d9407432ea5..6de4a985c8b 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -10631,6 +10631,69 @@ stmt_is_replaceable_p (gimple *stmt) return false; } +/* Return the content of the memory slot SOURCE as MODE. + SOURCE is based on BASE. BASE is a memory block that is stored via words. + + To get the content from SOURCE: + first load the word from the memory which covers the SOURCE slot first; + next return the word's subreg which offsets to SOURCE slot; + then convert to MODE as necessary. */ + +static rtx +extract_subreg_from_loading_word (machine_mode mode, rtx source, rtx base) +{ + rtx src_base = XEXP (source, 0); + poly_uint64 offset = MEM_OFFSET (source); + + if (GET_CODE (src_base) == PLUS && CONSTANT_P (XEXP (src_base, 1))) + { + offset += INTVAL (XEXP (src_base, 1)); + src_base = XEXP (src_base, 0); + } + + if (!rtx_equal_p (XEXP (base, 0), src_base)) + return NULL_RTX; + + /* Subreg(DI,n) -> DF/SF/SI/HI/QI */ + poly_uint64 word_size = GET_MODE_SIZE (word_mode); + poly_uint64 mode_size = GET_MODE_SIZE (mode); + poly_uint64 byte_off; + unsigned int start; + machine_mode int_mode; + if (known_ge (word_size, mode_size) && multiple_p (word_size, mode_size) + && int_mode_for_mode (mode).exists (&int_mode) + && can_div_trunc_p (offset, word_size, &start, &byte_off) + && multiple_p (byte_off, mode_size)) + { + rtx word_mem = copy_rtx (source); + PUT_MODE (word_mem, word_mode); + word_mem = adjust_address (word_mem, word_mode, -byte_off); + + rtx word_reg = gen_reg_rtx (word_mode); + emit_move_insn (word_reg, word_mem); + + poly_uint64 low_off = subreg_lowpart_offset (int_mode, word_mode); + if (!known_eq (byte_off, low_off)) + { + poly_uint64 shift_bytes = known_gt (byte_off, low_off) + ? byte_off - low_off + : low_off - byte_off; + word_reg = expand_shift (RSHIFT_EXPR, word_mode, word_reg, + shift_bytes * BITS_PER_UNIT, word_reg, 0); + } + + rtx int_subreg = gen_lowpart (int_mode, word_reg); + if (mode == int_mode) + return int_subreg; + + rtx int_mode_reg = gen_reg_rtx (int_mode); + emit_move_insn (int_mode_reg, int_subreg); + return gen_lowpart (mode, int_mode_reg); + } + + return NULL_RTX; +} + rtx expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, enum expand_modifier modifier, rtx *alt_rtl, @@ -11812,6 +11875,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, && modifier != EXPAND_WRITE) op0 = flip_storage_order (mode1, op0); + /* Accessing sub-field of struct parameter which passed via integer + registers. */ + if (mode == mode1 && TREE_CODE (tem) == PARM_DECL + && DECL_INCOMING_RTL (tem) && REG_P (DECL_INCOMING_RTL (tem)) + && GET_MODE (DECL_INCOMING_RTL (tem)) == BLKmode && MEM_P (op0) + && MEM_OFFSET_KNOWN_P (op0)) + { + rtx subreg + = extract_subreg_from_loading_word (mode, op0, DECL_RTL (tem)); + if (subreg) + op0 = subreg; + } + if (mode == mode1 || mode1 == BLKmode || mode1 == tmode || modifier == EXPAND_CONST_ADDRESS || modifier == EXPAND_INITIALIZER) diff --git a/gcc/testsuite/g++.target/powerpc/pr102024.C b/gcc/testsuite/g++.target/powerpc/pr102024.C index 769585052b5..c8995cae707 100644 --- a/gcc/testsuite/g++.target/powerpc/pr102024.C +++ b/gcc/testsuite/g++.target/powerpc/pr102024.C @@ -5,7 +5,7 @@ // Test that a zero-width bit field in an otherwise homogeneous aggregate // generates a psabi warning and passes arguments in GPRs. -// { dg-final { scan-assembler-times {\mstd\M} 4 } } +// { dg-final { scan-assembler-times {\mmtvsrd\M} 4 } } struct a_thing { diff --git a/gcc/testsuite/gcc.target/powerpc/pr108073.c b/gcc/testsuite/gcc.target/powerpc/pr108073.c new file mode 100644 index 00000000000..91c13d896b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108073.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -save-temps" } */ + +typedef struct DF {double a[4]; short s1; short s2; short s3; short s4; } DF; +typedef struct SF {float a[4]; int i1; int i2; } SF; + +/* Each of below function contains one mtvsrd. */ +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 {target { has_arch_ppc64 && has_arch_pwr8 } } } } */ +/* { dg-final { scan-assembler-not {\mlwz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */ +/* { dg-final { scan-assembler-not {\mlhz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */ +short __attribute__ ((noipa)) foo_hi (DF a, int flag){if (flag == 2)return a.s2+a.s3;return 0;} +int __attribute__ ((noipa)) foo_si (SF a, int flag){if (flag == 2)return a.i2+a.i1;return 0;} +double __attribute__ ((noipa)) foo_df (DF arg, int flag){if (flag == 2)return arg.a[3];else return 0.0;} +float __attribute__ ((noipa)) foo_sf (SF arg, int flag){if (flag == 2)return arg.a[2]; return 0;} +float __attribute__ ((noipa)) foo_sf1 (SF arg, int flag){if (flag == 2)return arg.a[1];return 0;} + +DF gdf = {{1.0,2.0,3.0,4.0}, 1, 2, 3, 4}; +SF gsf = {{1.0f,2.0f,3.0f,4.0f}, 1, 2}; + +int main() +{ + if (!(foo_hi (gdf, 2) == 5 && foo_si (gsf, 2) == 3 && foo_df (gdf, 2) == 4.0 + && foo_sf (gsf, 2) == 3.0 && foo_sf1 (gsf, 2) == 2.0)) + __builtin_abort (); + if (!(foo_hi (gdf, 1) == 0 && foo_si (gsf, 1) == 0 && foo_df (gdf, 1) == 0 + && foo_sf (gsf, 1) == 0 && foo_sf1 (gsf, 1) == 0)) + __builtin_abort (); + return 0; +} + -- 2.17.1