On Fri, Jan 31, 2025 at 10:09 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Fri, Jan 31, 2025 at 2:54 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > On Fri, Jan 31, 2025 at 2:36 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > -fno-plt forces external call to indirect call via GOT memory. But > > > -mindirect-branch-register requires indirect call and jump via register. > > > For -mindirect-branch-register, expanding indirect call via register and > > > update call patterns and peepholes to disable indirect call via memory. > > > > > > gcc/ > > > > > > PR target/118713 > > > * config/i386/i386-expand.cc (ix86_expand_call): Force indirect > > > call via register for -mindirect-branch-register. > > > * config/i386/i386.md (*call): Disable indirect call via memory > > > for -mindirect-branch-register. > > > (*call_got_x32): Likewise. > > > (*sibcall_GOT_32): Likewise. > > > (*sibcall): Likewise. > > > (*sibcall_memory): Likewise. > > > (*call_pop): Likewise. > > > (*sibcall_pop): Likewise. > > > (*sibcall_pop_memory): Likewise. > > > (*call_value): Likewise. > > > (*call_value_got_x32): Likewise. > > > (*sibcall_value_GOT_32): Likewise. > > > (*sibcall_value): Likewise. > > > (*sibcall_value_memory): Likewise. > > > (*call_value_pop): Likewise. > > > (*sibcall_value_pop): Likewise. > > > (*sibcall_value_pop_memory): Likewise. > > > > > > gcc/testsuite/ > > > > > > PR target/118713 > > > * gcc.target/i386/pr118713-1-x32.c: New test. > > > * gcc.target/i386/pr118713-1.c: Likewise. > > > * gcc.target/i386/pr118713-2-x32.c: Likewise. > > > * gcc.target/i386/pr118713-2.c: Likewise. > > > * gcc.target/i386/pr118713-3-x32.c: Likewise. > > > * gcc.target/i386/pr118713-3.c: Likewise. > > > * gcc.target/i386/pr118713-4-x32.c: Likewise. > > > * gcc.target/i386/pr118713-4.c: Likewise. > > > * gcc.target/i386/pr118713-5-x32.c: Likewise. > > > * gcc.target/i386/pr118713-5.c: Likewise. > > > * gcc.target/i386/pr118713-6-x32.c: Likewise. > > > * gcc.target/i386/pr118713-6.c: Likewise. > > > * gcc.target/i386/pr118713-7-x32.c: Likewise. > > > * gcc.target/i386/pr118713-7.c: Likewise. > > > * gcc.target/i386/pr118713-8-x32.c: Likewise. > > > * gcc.target/i386/pr118713-8.c: Likewise. > > > * gcc.target/i386/pr118713-9-x32.c: Likewise. > > > * gcc.target/i386/pr118713-9.c: Likewise. > > > * gcc.target/i386/pr118713-10-x32.c: Likewise. > > > * gcc.target/i386/pr118713-10.c: Likewise. > > > * gcc.target/i386/pr118713-11-x32.c: Likewise. > > > * gcc.target/i386/pr118713-11.c: Likewise. > > > * gcc.target/i386/pr118713-12-x32.c: Likewise. > > > * gcc.target/i386/pr118713-12.c: Likewise. > > > > > > Co-Authored-By: Uros Bizjak <ubiz...@gmail.com> > > > Signed-off-by: H.J. Lu <hjl.to...@gmail.com> > > > --- > > > gcc/config/i386/i386-expand.cc | 20 ++-- > > > gcc/config/i386/i386.md | 98 +++++++++++++------ > > > .../gcc.target/i386/pr118713-1-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-1.c | 14 +++ > > > .../gcc.target/i386/pr118713-10-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-10.c | 15 +++ > > > .../gcc.target/i386/pr118713-11-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-11.c | 14 +++ > > > .../gcc.target/i386/pr118713-12-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-12.c | 14 +++ > > > .../gcc.target/i386/pr118713-2-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-2.c | 15 +++ > > > .../gcc.target/i386/pr118713-3-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-3.c | 14 +++ > > > .../gcc.target/i386/pr118713-4-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-4.c | 14 +++ > > > .../gcc.target/i386/pr118713-5-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-5.c | 13 +++ > > > .../gcc.target/i386/pr118713-6-x32.c | 15 +++ > > > gcc/testsuite/gcc.target/i386/pr118713-6.c | 14 +++ > > > .../gcc.target/i386/pr118713-7-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-7.c | 13 +++ > > > .../gcc.target/i386/pr118713-8-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-8.c | 13 +++ > > > .../gcc.target/i386/pr118713-9-x32.c | 8 ++ > > > gcc/testsuite/gcc.target/i386/pr118713-9.c | 14 +++ > > > 26 files changed, 353 insertions(+), 35 deletions(-) > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-11-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-11.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-12-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-12.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-2-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-2.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-3-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-3.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-4-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-4.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-5-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-5.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-6-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-6.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-7-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-7.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-8-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-8.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-9-x32.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-9.c > > > > > > diff --git a/gcc/config/i386/i386-expand.cc > > > b/gcc/config/i386/i386-expand.cc > > > index 117f6f6f7eb..1d1614d2ccc 100644 > > > --- a/gcc/config/i386/i386-expand.cc > > > +++ b/gcc/config/i386/i386-expand.cc > > > @@ -10223,15 +10223,21 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx > > > callarg1, > > > && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF > > > && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) > > > fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, > > > 0))); > > > + else if (TARGET_INDIRECT_BRANCH_REGISTER && MEM_P (fnaddr)) > > > + { > > > + fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1); > > > + fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, > > > fnaddr)); > > > + } > > > /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect > > > branch via x32 GOT slot is OK. */ > > > - else if (!(TARGET_X32 > > > - && MEM_P (fnaddr) > > > - && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND > > > - && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)) > > > - && (sibcall > > > - ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode) > > > - : !call_insn_operand (XEXP (fnaddr, 0), word_mode))) > > > + if (TARGET_X32 > > > + && MEM_P (fnaddr) > > > + && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND > > > + && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)) > > > + ; > > > + else if (sibcall > > > + ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode) > > > + : !call_insn_operand (XEXP (fnaddr, 0), word_mode)) > > > { > > > fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1); > > > fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, > > > fnaddr)); > > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > > index 52c02b6351a..0c9721bdc7a 100644 > > > --- a/gcc/config/i386/i386.md > > > +++ b/gcc/config/i386/i386.md > > > @@ -20122,18 +20122,23 @@ (define_expand "sibcall" > > > }) > > > > > > (define_insn "*call" > > > - [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>BwBz")) > > > + [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>,BwBz")) > > > (match_operand 1))] > > > "!SIBLING_CALL_P (insn)" > > > "* return ix86_output_call_insn (insn, operands[0]);" > > > - [(set_attr "type" "call")]) > > > + [(set_attr "type" "call") > > > + (set (attr "enabled") > > > + (if_then_else > > > + (eq_attr "alternative" "1") > > > + (symbol_ref "!TARGET_INDIRECT_BRANCH_REGISTER") > > > + (const_string "*")))]) > > > > Did you read my previous message? You don't need to add "enabled" > > attribute (and alternatives), because Bw and Bs already include: > > > > (define_constraint "Bs" > > "@internal Sibcall memory operand." > > (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER")) > > (not (match_test "TARGET_X32")) > > (match_operand 0 "sibcall_memory_operand")) > > (and (match_test "TARGET_X32") > > (match_test "Pmode == DImode") > > (match_operand 0 "GOT_memory_operand")))) > > > > (define_constraint "Bw" > > "@internal Call memory operand." > > (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER")) > > (not (match_test "TARGET_X32")) > > (match_operand 0 "memory_operand")) > > (and (match_test "TARGET_X32") > > (match_test "Pmode == DImode") > > (match_operand 0 "GOT_memory_operand")))) > > > > "Bz" constraint does not mention TARGET_INDIRECT_BRANCH_REGISTER. > > > > So, please investigate the condition and eventually fix Bs, Bw and Bz > > constraints. There is no need to introduce and disable alternatives > > when corresponding constraints are disabled. > > OTOH, you can use and disable alternatives, but then you don't need to > check TARGET_INDIRECT_BRANCH_REGISTER usage in "Bs" and "Bw" > constraint. The choice depends on the question if T_I_B_R fully > disables "Bs" and "Bw". If "Bz" is also invalid with > TARGET_INDIRECT_BRANCH_REGISTER, only then you should use alternatives > to disable "BwBz" resp "BsBz" alternatives in call patterns.
The problem is with Bz. The indirect branch is generated by ix86_output_call_insn which calls "x86_nopic_noplt_attribute_p (call_op))" to check if indirect branch should be generated on a symbol reference. I am testing this patch. > AFAICS, when T_I_B_R is active, it should fully disable "Bs" as well > as "Bw" constraint. As it is written now, it doesn't disable X32 GOT > memory operand, which is probably wrong. > > Also, TLS patterns use the "Bz" constraint, so it should not be > disabled with T_I_B_R. > > Uros. -- H.J.
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 9dd280568bd..d3d6e9e66f4 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -203,21 +203,21 @@ (define_special_memory_constraint "Br" (define_constraint "Bs" "@internal Sibcall memory operand." - (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER")) - (not (match_test "TARGET_X32")) - (match_operand 0 "sibcall_memory_operand")) - (and (match_test "TARGET_X32") - (match_test "Pmode == DImode") - (match_operand 0 "GOT_memory_operand")))) + (and (match_test "!TARGET_INDIRECT_BRANCH_REGISTER") + (ior (and (match_test "!TARGET_X32") + (match_operand 0 "sibcall_memory_operand")) + (and (match_test "TARGET_X32") + (match_test "Pmode == DImode") + (match_operand 0 "GOT_memory_operand"))))) (define_constraint "Bw" "@internal Call memory operand." - (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER")) - (not (match_test "TARGET_X32")) - (match_operand 0 "memory_operand")) - (and (match_test "TARGET_X32") - (match_test "Pmode == DImode") - (match_operand 0 "GOT_memory_operand")))) + (and (match_test "!TARGET_INDIRECT_BRANCH_REGISTER") + (ior (and (match_test "!TARGET_X32") + (match_operand 0 "memory_operand")) + (and (match_test "TARGET_X32") + (match_test "Pmode == DImode") + (match_operand 0 "GOT_memory_operand"))))) (define_constraint "Bz" "@internal Constant call address operand." diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 117f6f6f7eb..7a2df43e7cd 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -10225,13 +10225,21 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect branch via x32 GOT slot is OK. */ - else if (!(TARGET_X32 - && MEM_P (fnaddr) - && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND - && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)) - && (sibcall - ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode) - : !call_insn_operand (XEXP (fnaddr, 0), word_mode))) + if (TARGET_X32 + && MEM_P (fnaddr) + && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND + && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)) + { + if (TARGET_INDIRECT_BRANCH_REGISTER) + { + fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1); + fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, + fnaddr)); + } + } + else if (sibcall + ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode) + : !call_insn_operand (XEXP (fnaddr, 0), word_mode)) { fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1); fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr)); diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index f122fd8a0a3..bea3fd4b2e2 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -371,6 +371,7 @@ extern int asm_preferred_eh_data_format (int, int); extern enum attr_cpu ix86_schedule; #endif +extern bool ix86_nopic_noplt_attribute_p (rtx call_op); extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op); extern const char * ix86_output_indirect_jmp (rtx call_op); extern const char * ix86_output_function_return (bool long_p); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 11770aa8a50..f89201684a8 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -16724,7 +16724,7 @@ ix86_ifunc_ref_local_ok (void) This is currently used only with 64-bit or 32-bit GOT32X ELF targets to call the function marked "noplt" indirectly. */ -static bool +bool ix86_nopic_noplt_attribute_p (rtx call_op) { if (flag_pic || ix86_cmodel == CM_LARGE diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 52c02b6351a..d6ae3ee378a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20133,7 +20133,7 @@ (define_insn "*call_got_x32" [(call (mem:QI (zero_extend:DI (match_operand:SI 0 "GOT_memory_operand" "Bg"))) (match_operand 1))] - "TARGET_X32" + "TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER" { rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0)); return ix86_output_call_insn (insn, fnaddr); @@ -20338,7 +20338,7 @@ (define_insn "*call_value_got_x32" (zero_extend:DI (match_operand:SI 1 "GOT_memory_operand" "Bg"))) (match_operand 2)))] - "TARGET_X32" + "TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER" { rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0)); return ix86_output_call_insn (insn, fnaddr); @@ -20460,7 +20460,7 @@ (define_insn "*sibcall_value_pop_memory" (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "immediate_operand" "i"))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)] - "!TARGET_64BIT" + "!TARGET_64BIT && !TARGET_INDIRECT_BRANCH_REGISTER" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index e733a474f9e..31e751ac329 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -670,7 +670,8 @@ (define_predicate "constant_call_address_operand" (match_code "symbol_ref") { if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC - || flag_force_indirect_call) + || flag_force_indirect_call + || ix86_nopic_noplt_attribute_p (op)) return false; if (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op)) return false;