Don't expand UNSPEC_TLS_LD_BASE to a call so that the RTL local copy propagation pass can eliminate multiple __tls_get_addr calls.
gcc/ PR target/81501 * config/i386/i386-protos.h (ix86_split_tls_local_dynamic_base_64): New. * config/i386/i386.cc (ix86_split_tls_local_dynamic_base_64): New. (legitimize_tls_address): Don't emit the 64-bit UNSPEC_TLS_LD_BASE as a call. * config/i386/i386.md (*tls_local_dynamic_base_64_<mode>): Renamed to ... (@tls_local_dynamic_base_call_64_<mode>): This. Replace (match_operand 2) with (const_int 0). (@tls_local_dynamic_base_64_<mode>): Change call to unspec. (*tls_local_dynamic_base_64_<mode>): New. gcc/testsuite/ PR target/81501 * gcc.target/i386/pr81501-1.c: New test. OK for master? Thanks. -- H.J.
From d154b3bf2fb86c82a6291f1fae45fbbe0d74f4e4 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Fri, 19 Aug 2022 11:50:41 -0700 Subject: [PATCH] x86-64: Don't expand UNSPEC_TLS_LD_BASE to a call Don't expand UNSPEC_TLS_LD_BASE to a call so that the RTL local copy propagation pass can eliminate multiple __tls_get_addr calls. gcc/ PR target/81501 * config/i386/i386-protos.h (ix86_split_tls_local_dynamic_base_64): New. * config/i386/i386.cc (ix86_split_tls_local_dynamic_base_64): New. (legitimize_tls_address): Don't emit the 64-bit UNSPEC_TLS_LD_BASE as a call. * config/i386/i386.md (*tls_local_dynamic_base_64_<mode>): Renamed to ... (@tls_local_dynamic_base_call_64_<mode>): This. Replace (match_operand 2) with (const_int 0). (@tls_local_dynamic_base_64_<mode>): Change call to unspec. (*tls_local_dynamic_base_64_<mode>): New. gcc/testsuite/ PR target/81501 * gcc.target/i386/pr81501-1.c: New test. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/i386.cc | 42 +++++++++++++---------- gcc/config/i386/i386.md | 30 +++++++++++----- gcc/testsuite/gcc.target/i386/pr81501-1.c | 17 +++++++++ 4 files changed, 63 insertions(+), 27 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-1.c diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index c59b5a67e3a..a8850cd7311 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -287,6 +287,7 @@ extern rtx ix86_tls_module_base (void); extern bool ix86_gpr_tls_address_pattern_p (rtx); extern bool ix86_tls_address_pattern_p (rtx); extern rtx ix86_rewrite_tls_address (rtx); +extern void ix86_split_tls_local_dynamic_base_64 (rtx[]); extern void ix86_expand_vector_init (bool, rtx, rtx); extern void ix86_expand_vector_set (bool, rtx, rtx, int); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index ddefc0f88d9..9ad11b122de 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -12330,6 +12330,27 @@ ix86_tls_module_base (void) return ix86_tls_module_base_symbol; } +/* Split to CALL INSN to properly handle scratch registers. */ + +void +ix86_split_tls_local_dynamic_base_64 (rtx operands[]) +{ + rtx rax = gen_rtx_REG (Pmode, AX_REG); + rtx_insn *call_insn + = emit_call_insn (gen_tls_local_dynamic_base_call_64 (Pmode, rax, + operands[1])); + /* Indicate that this function can't jump to non-local gotos. */ + make_reg_eh_region_note_nothrow_nononlocal (call_insn); + RTL_CONST_CALL_P (call_insn) = 1; + + /* Attach a unique REG_EQUAL, to allow the RTL optimizers to share + the LD_BASE result with other LD model accesses. */ + rtx eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLS_LD_BASE); + rtx_insn *set_insn = emit_move_insn (operands[0], rax); + set_unique_reg_note (set_insn, REG_EQUAL, eqv); +} + /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is false if we expect this to be used for a memory address and true if we expect to load the address into a register. */ @@ -12442,25 +12463,8 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) base = gen_reg_rtx (Pmode); if (TARGET_64BIT) - { - rtx rax = gen_rtx_REG (Pmode, AX_REG); - rtx_insn *insns; - rtx eqv; - - start_sequence (); - emit_call_insn - (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr)); - insns = get_insns (); - end_sequence (); - - /* Attach a unique REG_EQUAL, to allow the RTL optimizers to - share the LD_BASE result with other LD model accesses. */ - eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), - UNSPEC_TLS_LD_BASE); - - RTL_CONST_CALL_P (insns) = 1; - emit_libcall_block (insns, base, rax, eqv); - } + emit_insn (gen_tls_local_dynamic_base_64 (Pmode, base, + caddr)); else emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e170da3b0e6..cf54a58ef96 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -23102,11 +23102,11 @@ (define_expand "tls_local_dynamic_base_32" "" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") -(define_insn "*tls_local_dynamic_base_64_<mode>" +(define_insn "@tls_local_dynamic_base_call_64_<mode>" [(set (match_operand:P 0 "register_operand" "=a") (call:P (mem:QI (match_operand 1 "constant_call_address_operand" "Bz")) - (match_operand 2))) + (const_int 0))) (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)] "TARGET_64BIT" { @@ -23143,15 +23143,29 @@ (define_insn "*tls_local_dynamic_base_64_largepic" (set_attr "length" "22")]) (define_expand "@tls_local_dynamic_base_64_<mode>" - [(parallel - [(set (match_operand:P 0 "register_operand") - (call:P - (mem:QI (match_operand 1)) - (const_int 0))) - (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])] + [(set (match_operand:P 0 "register_operand") + (unspec:P + [(match_operand 1 "constant_call_address_operand") + (reg:P SP_REG)] + UNSPEC_TLS_LD_BASE))] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") +;; Split to CALL INSN to properly handle scratch registers. +(define_insn_and_split "*tls_local_dynamic_base_64_<mode>" + [(set (match_operand:P 0 "register_operand") + (unspec:P + [(match_operand 1 "constant_call_address_operand") + (reg:P SP_REG)] + UNSPEC_TLS_LD_BASE))] + "TARGET_64BIT && ix86_pre_reload_split ()" + "#" + "" + [(const_int 0)] +{ + ix86_split_tls_local_dynamic_base_64 (operands); +}) + ;; Local dynamic of a single variable is a lose. Show combine how ;; to convert that back to global dynamic. diff --git a/gcc/testsuite/gcc.target/i386/pr81501-1.c b/gcc/testsuite/gcc.target/i386/pr81501-1.c new file mode 100644 index 00000000000..96d593c3e6d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */ +/* { dg-options "-O2 -fpic -fplt" } */ + +void a(long *); +int b(void); +void c(void); +static __thread long e; +long +d(void) +{ + a(&e); + if (b()) + c(); + return e; +} + +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 } } */ -- 2.49.0