On Mon, Aug 18, 2025 at 4:50 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > We can't place a TLS call before a conditional jump in a basic block like > > (code_label 13 11 14 4 2 (nil) [1 uses]) > (note 14 13 16 4 [bb 4] NOTE_INSN_BASIC_BLOCK) > (jump_insn 16 14 17 4 (set (pc) > (if_then_else (le (reg:CCNO 17 flags) > (const_int 0 [0])) > (label_ref 27) > (pc))) "x.c":10:21 discrim 1 1462 {*jcc} > (expr_list:REG_DEAD (reg:CCNO 17 flags) > (int_list:REG_BR_PROB 628353713 (nil))) > -> 27) > > since the TLS call will clobber flags register nor place a TLS call in a > basic block if any live caller-saved registers aren't dead at the end of > the basic block: > > ;; live in 6 [bp] 7 [sp] 16 [argp] 17 [flags] 19 [frame] 104 > ;; live gen 0 [ax] 102 106 108 116 117 118 120 > ;; live kill 5 [di] > > Instead, we should place such call before all register setting basic > blocks which dominate the current basic block. > > Keep track the replaced GNU and GNU2 TLS instructions. Use these info to > place the __tls_get_addr call and mark FLAGS register as dead. > > gcc/ > > PR target/121572 > * config/i386/i386-features.cc (replace_tls_call): Add a bitmap > argument and put the updated TLS instruction in the bitmap. > (ix86_get_dominator_for_reg): New. > (ix86_place_single_tls_call): Add 2 bitmap arguments for updated > GNU and GNU2 TLS instructions. Add the live flag register to the > bitmap. Insert the __tls_get_addr call before INSN if it replaces > a __tls_get_addr call. Mark FLAGS register as dead if INSN > replaced the GNU2 TLS instruction. Clear the live register bitmap > only for hard register. If there is a conditional jump in the > basic block or any live caller-saved registers aren't dead at the > end of the basic block, get the basic block which dominates all > basic blocks which set the live registers. > > gcc/testsuite/ > > PR target/121572 > * gcc.target/i386/pr121572-1a.c: New test. > * gcc.target/i386/pr121572-1b.c: Likewise. > * gcc.target/i386/pr121572-2a.c: Likewise. > * gcc.target/i386/pr121572-2b.c: Likewise. > > Signed-off-by: H.J. Lu <hjl.to...@gmail.com> > --- > gcc/config/i386/i386-features.cc | 136 ++++++++++++++++---- > gcc/testsuite/gcc.target/i386/pr121572-1a.c | 41 ++++++ > gcc/testsuite/gcc.target/i386/pr121572-1b.c | 18 +++ > gcc/testsuite/gcc.target/i386/pr121572-2a.c | 39 ++++++ > gcc/testsuite/gcc.target/i386/pr121572-2b.c | 6 + > 5 files changed, 215 insertions(+), 25 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1a.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1b.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2a.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2b.c > > diff --git a/gcc/config/i386/i386-features.cc > b/gcc/config/i386/i386-features.cc > index f0bdc5c1880..903f2b0b478 100644 > --- a/gcc/config/i386/i386-features.cc > +++ b/gcc/config/i386/i386-features.cc > @@ -3684,10 +3684,12 @@ ix86_broadcast_inner (rtx op, machine_mode mode, > return op; > } > > -/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC. */ > +/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and > + put the updated instruction in UPDATED_TLS_INSNS. */ > > static void > -replace_tls_call (rtx src, auto_bitmap &tls_call_insns) > +replace_tls_call (rtx src, auto_bitmap &tls_call_insns, > + auto_bitmap &updated_tls_insns) > { > bitmap_iterator bi; > unsigned int id; > @@ -3716,6 +3718,9 @@ replace_tls_call (rtx src, auto_bitmap &tls_call_insns) > if (recog_memoized (set_insn) < 0) > gcc_unreachable (); > > + /* Put SET_INSN in UPDATED_TLS_INSNS. */ > + bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn)); > + > if (dump_file) > { > fprintf (dump_file, "\nReplace:\n\n"); > @@ -3732,15 +3737,48 @@ replace_tls_call (rtx src, auto_bitmap > &tls_call_insns) > } > } > > +/* Return the basic block which dominates all basic blocks which set > + hard register REGNO used in basic block BB. */ > + > +static basic_block > +ix86_get_dominator_for_reg (unsigned int regno, basic_block bb) > +{ > + basic_block set_bb; > + auto_bitmap set_bbs; > + > + /* Get all BBs which set REGNO and dominate the current BB from all > + DEFs of REGNO. */ > + for (df_ref def = DF_REG_DEF_CHAIN (regno); > + def; > + def = DF_REF_NEXT_REG (def)) > + if (!DF_REF_IS_ARTIFICIAL (def) > + && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER) > + && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER)) > + { > + set_bb = DF_REF_BB (def); > + if (dominated_by_p (CDI_DOMINATORS, bb, set_bb)) > + bitmap_set_bit (set_bbs, set_bb->index); > + } > + > + bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); > + return bb; > +} > + > /* Generate a TLS call of KIND with VAL and copy the call result to DEST, > at entry of the nearest dominator for basic block map BBS, which is in > the fake loop that contains the whole function, so that there is only > - a single TLS CALL of KIND with VAL in the whole function. If > - TLSDESC_SET isn't nullptr, insert it before the TLS call. */ > + a single TLS CALL of KIND with VAL in the whole function. > + UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS > + instructions. UPDATED_GNU2_TLS_INSNS contains instructions which > + replace the GNU2 TLS instructions. If TLSDESC_SET isn't nullptr, > + insert it before the TLS call. */ > > static void > ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, > - bitmap bbs, rtx tlsdesc_set = nullptr) > + auto_bitmap &bbs, > + auto_bitmap &updated_gnu_tls_insns, > + auto_bitmap &updated_gnu2_tls_insns, > + rtx tlsdesc_set = nullptr) > { > basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs); > while (bb->loop_father->latch > @@ -3748,6 +3786,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, > x86_cse_kind kind, > bb = get_immediate_dominator (CDI_DOMINATORS, > bb->loop_father->header); > > +place_tls_call: > rtx_insn *insn = BB_HEAD (bb); > while (insn && !NONDEBUG_INSN_P (insn)) > { > @@ -3824,7 +3863,8 @@ ix86_place_single_tls_call (rtx dest, rtx val, > x86_cse_kind kind, > auto_bitmap live_caller_saved_regs; > bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb); > > - bool flags_live_p = bitmap_bit_p (in, FLAGS_REG); > + if (bitmap_bit_p (in, FLAGS_REG)) > + bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); > > unsigned int i; > > @@ -3845,13 +3885,46 @@ ix86_place_single_tls_call (rtx dest, rtx val, > x86_cse_kind kind, > if (!NONDEBUG_INSN_P (insn)) > continue; > > + if (JUMP_P (insn)) > + { > + /* This must be a conditional jump. */ > + rtx label = JUMP_LABEL (insn); > + if (label == nullptr > + || ANY_RETURN_P (label) > + || !(LABEL_P (label) || SYMBOL_REF_P (label))) > + gcc_unreachable (); > + > + /* Place the call before all FLAGS_REG setting BBs since > + we can't place a call before nor after a conditional > + jump. */ > + bb = ix86_get_dominator_for_reg (FLAGS_REG, bb); > + goto place_tls_call; Can we put those codes related to find the **BEFORE/AFTER** into a function and called it recursively(or iteratively to decide find the final BEFORE/AFTER) So we can avoid those GOTO statements.
i.e switch (kind) { ... } ix86_place_single_tls_call_place (....) if (before) emit_insn_before (..); else if (after) emit_insn_after (...); else gcc_unreachable (); .... > + } > + > /* Check if FLAGS register is live. */ > set = single_set (insn); > if (set) > { > rtx dest = SET_DEST (set); > - if (REG_P (dest) && REGNO (dest) == FLAGS_REG) > - flags_live_p = true; > + if (REG_P (dest)) > + { > + if (bitmap_bit_p (updated_gnu_tls_insns, > + INSN_UID (insn))) > + { > + /* Insert the __tls_get_addr call before INSN which > + replaces a __tls_get_addr call. */ > + before = insn; > + goto insert_before; > + } > + if (bitmap_bit_p (updated_gnu2_tls_insns, > + INSN_UID (insn))) > + /* Mark FLAGS register as dead since FLAGS register > + would be clobbered by the GNU2 TLS instruction. */ > + bitmap_clear_bit (live_caller_saved_regs, > + FLAGS_REG); > + else if (REGNO (dest) == FLAGS_REG) > + bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); > + } > } > > rtx link; > @@ -3863,29 +3936,30 @@ ix86_place_single_tls_call (rtx dest, rtx val, > x86_cse_kind kind, > for (i = REGNO (XEXP (link, 0)); > i < END_REGNO (XEXP (link, 0)); > i++) > - bitmap_clear_bit (live_caller_saved_regs, i); > - > - /* Check if FLAGS register is dead. */ > - if (REGNO (XEXP (link, 0)) == FLAGS_REG) > - flags_live_p = false; > + if (i < FIRST_PSEUDO_REGISTER) > + bitmap_clear_bit (live_caller_saved_regs, i); > > if (bitmap_empty_p (live_caller_saved_regs)) > { > - /* All live caller-saved registers are dead after > - this instruction. Since TLS instructions > - clobber FLAGS register, it must be dead where > - the TLS will be inserted after. */ > - if (flags_live_p) > - gcc_unreachable (); > after = insn; > goto insert_after; > } > } > } > > - /* All live caller-saved registers should be dead at the end > - of this basic block. */ > - gcc_unreachable (); > + /* If any live caller-saved registers aren't dead at the end > + of this basic block, get the basic block which dominates all > + basic blocks which set the remaining live registers. */ > + auto_bitmap set_bbs; > + bitmap_iterator bi; > + unsigned int id; > + EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi) > + { > + basic_block set_bb = ix86_get_dominator_for_reg (id, bb); > + bitmap_set_bit (set_bbs, set_bb->index); > + } > + bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); > + goto place_tls_call; > } > > /* Emit the TLS CALL insn. */ > @@ -3895,7 +3969,10 @@ insert_after: > tls_insn = emit_insn_after (tls, after); > } > else > - tls_insn = emit_insn_before (tls, before); > + { > +insert_before: > + tls_insn = emit_insn_before (tls, before); > + } > > rtx_insn *tlsdesc_insn = nullptr; > if (tlsdesc_set) > @@ -4213,6 +4290,8 @@ pass_x86_cse::x86_cse (void) > basic_block bb; > rtx_insn *insn; > unsigned int i; > + auto_bitmap updated_gnu_tls_insns; > + auto_bitmap updated_gnu2_tls_insns; > > df_set_flags (DF_DEFER_INSN_RESCAN); > > @@ -4333,7 +4412,10 @@ pass_x86_cse::x86_cse (void) > case X86_CSE_TLS_LD_BASE: > case X86_CSE_TLSDESC: > broadcast_reg = gen_reg_rtx (load->mode); > - replace_tls_call (broadcast_reg, load->insns); > + replace_tls_call (broadcast_reg, load->insns, > + (load->kind == X86_CSE_TLSDESC > + ? updated_gnu2_tls_insns > + : updated_gnu_tls_insns)); > load->broadcast_reg = broadcast_reg; > break; > > @@ -4399,6 +4481,8 @@ pass_x86_cse::x86_cse (void) > load->val, > load->kind, > load->bbs, > + updated_gnu_tls_insns, > + updated_gnu2_tls_insns, > PATTERN (load->def_insn)); > break; > case X86_CSE_VEC_DUP: > @@ -4442,7 +4526,9 @@ pass_x86_cse::x86_cse (void) > ix86_place_single_tls_call (load->broadcast_reg, > load->val, > load->kind, > - load->bbs); > + load->bbs, > + updated_gnu_tls_insns, > + updated_gnu2_tls_insns); > break; > case X86_CSE_CONST0_VECTOR: > case X86_CSE_CONSTM1_VECTOR: > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1a.c > b/gcc/testsuite/gcc.target/i386/pr121572-1a.c > new file mode 100644 > index 00000000000..270d8ff5cb6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr121572-1a.c > @@ -0,0 +1,41 @@ > +/* { dg-do compile { target *-*-linux* } } */ > +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu" } */ > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ > +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } > {^\t?\.} } } */ > + > +/* > +**bug: > +**.LFB[0-9]+: > +**... > +** leaq tv_cache@tlsld\(%rip\), %rdi > +** call __tls_get_addr@PLT > +** movl \$-1, %edi > +** mov[l|q] %[e|r]ax, %[e|r]bx > +** call val@PLT > +**... > +*/ > + > +extern __thread int tv_cache __attribute__ ((visibility ("hidden"))); > +extern void use_cache (int); > +extern int val (int v); > + > +__attribute__ ((optimize (2))) > +void > +bug (void) > +{ > + int compared = val (-1); > + > + if (compared == 0 || (compared > 0 && val (2) == 0)) > + { > + __builtin_trap (); > + } > + > + if (compared < 0) > + { > + use_cache (tv_cache); > + return; > + } > + > + use_cache (tv_cache); > + __builtin_trap (); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1b.c > b/gcc/testsuite/gcc.target/i386/pr121572-1b.c > new file mode 100644 > index 00000000000..8a6089109f5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr121572-1b.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile { target *-*-linux* } } */ > +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu2" } */ > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ > +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } > {^\t?\.} } } */ > + > +/* > +**bug: > +**.LFB[0-9]+: > +**... > +** lea[l|q] tv_cache@TLSDESC\(%rip\), %[e|r]ax > +** movl \$-1, %edi > +** call \*tv_cache@TLSCALL\(%[e|r]ax\) > +** mov[l|q] %[e|r]ax, %[e|r]bx > +** call val@PLT > +**... > +*/ > + > +#include "pr121572-1a.c" > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2a.c > b/gcc/testsuite/gcc.target/i386/pr121572-2a.c > new file mode 100644 > index 00000000000..38b254657d3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr121572-2a.c > @@ -0,0 +1,39 @@ > +/* { dg-do compile { target *-*-linux* } } */ > +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ > + > +typedef enum > +{ > + MPFR_RNDN > +} mpfr_rnd_t; > +typedef int mpfr_t[1]; > +long __gmpfr_emin, mpfr_agm_expo_0; > +_Thread_local long __gmpfr_emax; > +int mpfr_agm_compare, mpfr_agm___trans_tmp_1; > +mpfr_t mpfr_agm_u; > +void mpfr_mul (int *, int, int, mpfr_rnd_t); > +int > +mpfr_agm (int op1) > +{ > + int op2 = 0; > + if (__builtin_expect (mpfr_agm_compare == 0, 0)) > + return 0; > + if (mpfr_agm_compare > 0) > + { > + int t = op1; > + op2 = t; > + } > + mpfr_agm_expo_0 = __gmpfr_emax; > + for (;;) > + { > + retry: > + mpfr_mul (mpfr_agm_u, op1, op2, MPFR_RNDN); > + if (0) > + goto retry; > + if (__builtin_expect (mpfr_agm___trans_tmp_1, 1)) > + break; > + } > + __gmpfr_emin = __gmpfr_emax; > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { > target { ! ia32 } } } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2b.c > b/gcc/testsuite/gcc.target/i386/pr121572-2b.c > new file mode 100644 > index 00000000000..33d70024324 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr121572-2b.c > @@ -0,0 +1,6 @@ > +/* { dg-do compile { target *-*-linux* } } */ > +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */ > + > +#include "pr121572-2a.c" > + > +/* { dg-final { scan-assembler-times "call\[ > \t\]\\*__gmpfr_emax@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ > -- > 2.50.1 > -- BR, Hongtao