On Mon, Aug 18, 2025 at 4:50 AM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> We can't place a TLS call before a conditional jump in a basic block like
>
> (code_label 13 11 14 4 2 (nil) [1 uses])
> (note 14 13 16 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> (jump_insn 16 14 17 4 (set (pc)
>         (if_then_else (le (reg:CCNO 17 flags)
>                 (const_int 0 [0]))
>             (label_ref 27)
>             (pc))) "x.c":10:21 discrim 1 1462 {*jcc}
>      (expr_list:REG_DEAD (reg:CCNO 17 flags)
>         (int_list:REG_BR_PROB 628353713 (nil)))
>  -> 27)
>
> since the TLS call will clobber flags register nor place a TLS call in a
> basic block if any live caller-saved registers aren't dead at the end of
> the basic block:
>
> ;; live  in      6 [bp] 7 [sp] 16 [argp] 17 [flags] 19 [frame] 104
> ;; live  gen     0 [ax] 102 106 108 116 117 118 120
> ;; live  kill    5 [di]
>
> Instead, we should place such call before all register setting basic
> blocks which dominate the current basic block.
>
> Keep track the replaced GNU and GNU2 TLS instructions.  Use these info to
> place the __tls_get_addr call and mark FLAGS register as dead.
>
> gcc/
>
>         PR target/121572
>         * config/i386/i386-features.cc (replace_tls_call): Add a bitmap
>         argument and put the updated TLS instruction in the bitmap.
>         (ix86_get_dominator_for_reg): New.
>         (ix86_place_single_tls_call): Add 2 bitmap arguments for updated
>         GNU and GNU2 TLS instructions.  Add the live flag register to the
>         bitmap.  Insert the __tls_get_addr call before INSN if it replaces
>         a __tls_get_addr call.  Mark FLAGS register as dead if INSN
>         replaced the GNU2 TLS instruction.  Clear the live register bitmap
>         only for hard register.  If there is a conditional jump in the
>         basic block or any live caller-saved registers aren't dead at the
>         end of the basic block, get the basic block which dominates all
>         basic blocks which set the live registers.
>
> gcc/testsuite/
>
>         PR target/121572
>         * gcc.target/i386/pr121572-1a.c: New test.
>         * gcc.target/i386/pr121572-1b.c: Likewise.
>         * gcc.target/i386/pr121572-2a.c: Likewise.
>         * gcc.target/i386/pr121572-2b.c: Likewise.
>
> Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
> ---
>  gcc/config/i386/i386-features.cc            | 136 ++++++++++++++++----
>  gcc/testsuite/gcc.target/i386/pr121572-1a.c |  41 ++++++
>  gcc/testsuite/gcc.target/i386/pr121572-1b.c |  18 +++
>  gcc/testsuite/gcc.target/i386/pr121572-2a.c |  39 ++++++
>  gcc/testsuite/gcc.target/i386/pr121572-2b.c |   6 +
>  5 files changed, 215 insertions(+), 25 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2b.c
>
> diff --git a/gcc/config/i386/i386-features.cc 
> b/gcc/config/i386/i386-features.cc
> index f0bdc5c1880..903f2b0b478 100644
> --- a/gcc/config/i386/i386-features.cc
> +++ b/gcc/config/i386/i386-features.cc
> @@ -3684,10 +3684,12 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
>    return op;
>  }
>
> -/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC.  */
> +/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
> +   put the updated instruction in UPDATED_TLS_INSNS.  */
>
>  static void
> -replace_tls_call (rtx src, auto_bitmap &tls_call_insns)
> +replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
> +                 auto_bitmap &updated_tls_insns)
>  {
>    bitmap_iterator bi;
>    unsigned int id;
> @@ -3716,6 +3718,9 @@ replace_tls_call (rtx src, auto_bitmap &tls_call_insns)
>        if (recog_memoized (set_insn) < 0)
>         gcc_unreachable ();
>
> +      /* Put SET_INSN in UPDATED_TLS_INSNS.  */
> +      bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
> +
>        if (dump_file)
>         {
>           fprintf (dump_file, "\nReplace:\n\n");
> @@ -3732,15 +3737,48 @@ replace_tls_call (rtx src, auto_bitmap 
> &tls_call_insns)
>      }
>  }
>
> +/* Return the basic block which dominates all basic blocks which set
> +   hard register REGNO used in basic block BB.  */
> +
> +static basic_block
> +ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
> +{
> +  basic_block set_bb;
> +  auto_bitmap set_bbs;
> +
> +  /* Get all BBs which set REGNO and dominate the current BB from all
> +     DEFs of REGNO.  */
> +  for (df_ref def = DF_REG_DEF_CHAIN (regno);
> +       def;
> +       def = DF_REF_NEXT_REG (def))
> +    if (!DF_REF_IS_ARTIFICIAL (def)
> +       && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
> +       && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
> +      {
> +       set_bb = DF_REF_BB (def);
> +       if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
> +         bitmap_set_bit (set_bbs, set_bb->index);
> +      }
> +
> +  bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
> +  return bb;
> +}
> +
>  /* Generate a TLS call of KIND with VAL and copy the call result to DEST,
>     at entry of the nearest dominator for basic block map BBS, which is in
>     the fake loop that contains the whole function, so that there is only
> -   a single TLS CALL of KIND with VAL in the whole function.  If
> -   TLSDESC_SET isn't nullptr, insert it before the TLS call.  */
> +   a single TLS CALL of KIND with VAL in the whole function.
> +   UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
> +   instructions.  UPDATED_GNU2_TLS_INSNS contains instructions which
> +   replace the GNU2 TLS instructions.  If TLSDESC_SET isn't nullptr,
> +   insert it before the TLS call.  */
>
>  static void
>  ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
> -                           bitmap bbs, rtx tlsdesc_set = nullptr)
> +                           auto_bitmap &bbs,
> +                           auto_bitmap &updated_gnu_tls_insns,
> +                           auto_bitmap &updated_gnu2_tls_insns,
> +                           rtx tlsdesc_set = nullptr)
>  {
>    basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
>    while (bb->loop_father->latch
> @@ -3748,6 +3786,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
> x86_cse_kind kind,
>      bb = get_immediate_dominator (CDI_DOMINATORS,
>                                   bb->loop_father->header);
>
> +place_tls_call:
>    rtx_insn *insn = BB_HEAD (bb);
>    while (insn && !NONDEBUG_INSN_P (insn))
>      {
> @@ -3824,7 +3863,8 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
> x86_cse_kind kind,
>    auto_bitmap live_caller_saved_regs;
>    bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
>
> -  bool flags_live_p = bitmap_bit_p (in, FLAGS_REG);
> +  if (bitmap_bit_p (in, FLAGS_REG))
> +    bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
>
>    unsigned int i;
>
> @@ -3845,13 +3885,46 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
> x86_cse_kind kind,
>           if (!NONDEBUG_INSN_P (insn))
>             continue;
>
> +         if (JUMP_P (insn))
> +           {
> +             /* This must be a conditional jump.  */
> +             rtx label = JUMP_LABEL (insn);
> +             if (label == nullptr
> +                 || ANY_RETURN_P (label)
> +                 || !(LABEL_P (label) || SYMBOL_REF_P (label)))
> +               gcc_unreachable ();
> +
> +             /* Place the call before all FLAGS_REG setting BBs since
> +                we can't place a call before nor after a conditional
> +                jump.  */
> +             bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
> +             goto place_tls_call;
Can we put those codes related to find the **BEFORE/AFTER** into a
function and called it recursively(or iteratively to decide find the
final BEFORE/AFTER)
So we can avoid those GOTO statements.

i.e

switch (kind)
  {
    ...
  }

  ix86_place_single_tls_call_place (....)

  if (before)
  emit_insn_before (..);
  else if (after)
   emit_insn_after (...);
  else
    gcc_unreachable ();

....

> +           }
> +
>           /* Check if FLAGS register is live.  */
>           set = single_set (insn);
>           if (set)
>             {
>               rtx dest = SET_DEST (set);
> -             if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
> -               flags_live_p = true;
> +             if (REG_P (dest))
> +               {
> +                 if (bitmap_bit_p (updated_gnu_tls_insns,
> +                                   INSN_UID (insn)))
> +                   {
> +                   /* Insert the __tls_get_addr call before INSN which
> +                      replaces a __tls_get_addr call.  */
> +                     before = insn;
> +                     goto insert_before;
> +                   }
> +                 if (bitmap_bit_p (updated_gnu2_tls_insns,
> +                                   INSN_UID (insn)))
> +                   /* Mark FLAGS register as dead since FLAGS register
> +                      would be clobbered by the GNU2 TLS instruction.  */
> +                   bitmap_clear_bit (live_caller_saved_regs,
> +                                     FLAGS_REG);
> +                 else if (REGNO (dest) == FLAGS_REG)
> +                   bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
> +               }
>             }
>
>           rtx link;
> @@ -3863,29 +3936,30 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
> x86_cse_kind kind,
>                 for (i = REGNO (XEXP (link, 0));
>                      i < END_REGNO (XEXP (link, 0));
>                      i++)
> -                 bitmap_clear_bit (live_caller_saved_regs, i);
> -
> -               /* Check if FLAGS register is dead.  */
> -               if (REGNO (XEXP (link, 0)) == FLAGS_REG)
> -                 flags_live_p = false;
> +                 if (i < FIRST_PSEUDO_REGISTER)
> +                   bitmap_clear_bit (live_caller_saved_regs, i);
>
>                 if (bitmap_empty_p (live_caller_saved_regs))
>                   {
> -                   /* All live caller-saved registers are dead after
> -                      this instruction.  Since TLS instructions
> -                      clobber FLAGS register, it must be dead where
> -                      the TLS will be inserted after.  */
> -                   if (flags_live_p)
> -                     gcc_unreachable ();
>                     after = insn;
>                     goto insert_after;
>                   }
>               }
>         }
>
> -      /* All live caller-saved registers should be dead at the end
> -        of this basic block.  */
> -      gcc_unreachable ();
> +      /* If any live caller-saved registers aren't dead at the end
> +        of this basic block, get the basic block which dominates all
> +        basic blocks which set the remaining live registers.  */
> +      auto_bitmap set_bbs;
> +      bitmap_iterator bi;
> +      unsigned int id;
> +      EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
> +       {
> +         basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
> +         bitmap_set_bit (set_bbs, set_bb->index);
> +       }
> +      bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
> +      goto place_tls_call;
>      }
>
>    /* Emit the TLS CALL insn.  */
> @@ -3895,7 +3969,10 @@ insert_after:
>        tls_insn = emit_insn_after (tls, after);
>      }
>    else
> -    tls_insn = emit_insn_before (tls, before);
> +    {
> +insert_before:
> +      tls_insn = emit_insn_before (tls, before);
> +    }
>
>    rtx_insn *tlsdesc_insn = nullptr;
>    if (tlsdesc_set)
> @@ -4213,6 +4290,8 @@ pass_x86_cse::x86_cse (void)
>    basic_block bb;
>    rtx_insn *insn;
>    unsigned int i;
> +  auto_bitmap updated_gnu_tls_insns;
> +  auto_bitmap updated_gnu2_tls_insns;
>
>    df_set_flags (DF_DEFER_INSN_RESCAN);
>
> @@ -4333,7 +4412,10 @@ pass_x86_cse::x86_cse (void)
>           case X86_CSE_TLS_LD_BASE:
>           case X86_CSE_TLSDESC:
>             broadcast_reg = gen_reg_rtx (load->mode);
> -           replace_tls_call (broadcast_reg, load->insns);
> +           replace_tls_call (broadcast_reg, load->insns,
> +                             (load->kind == X86_CSE_TLSDESC
> +                              ? updated_gnu2_tls_insns
> +                              : updated_gnu_tls_insns));
>             load->broadcast_reg = broadcast_reg;
>             break;
>
> @@ -4399,6 +4481,8 @@ pass_x86_cse::x86_cse (void)
>                                               load->val,
>                                               load->kind,
>                                               load->bbs,
> +                                             updated_gnu_tls_insns,
> +                                             updated_gnu2_tls_insns,
>                                               PATTERN (load->def_insn));
>                   break;
>                 case X86_CSE_VEC_DUP:
> @@ -4442,7 +4526,9 @@ pass_x86_cse::x86_cse (void)
>                   ix86_place_single_tls_call (load->broadcast_reg,
>                                               load->val,
>                                               load->kind,
> -                                             load->bbs);
> +                                             load->bbs,
> +                                             updated_gnu_tls_insns,
> +                                             updated_gnu2_tls_insns);
>                   break;
>                 case X86_CSE_CONST0_VECTOR:
>                 case X86_CSE_CONSTM1_VECTOR:
> diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1a.c 
> b/gcc/testsuite/gcc.target/i386/pr121572-1a.c
> new file mode 100644
> index 00000000000..270d8ff5cb6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr121572-1a.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile { target *-*-linux* } } */
> +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu" } */
> +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
> +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
> {^\t?\.} } } */
> +
> +/*
> +**bug:
> +**.LFB[0-9]+:
> +**...
> +**     leaq    tv_cache@tlsld\(%rip\), %rdi
> +**     call    __tls_get_addr@PLT
> +**     movl    \$-1, %edi
> +**     mov[l|q]        %[e|r]ax, %[e|r]bx
> +**     call    val@PLT
> +**...
> +*/
> +
> +extern __thread int tv_cache __attribute__ ((visibility ("hidden")));
> +extern void use_cache (int);
> +extern int val (int v);
> +
> +__attribute__ ((optimize (2)))
> +void
> +bug (void)
> +{
> +  int compared = val (-1);
> +
> +  if (compared == 0 || (compared > 0 && val (2) == 0))
> +    {
> +      __builtin_trap ();
> +    }
> +
> +  if (compared < 0)
> +    {
> +      use_cache (tv_cache);
> +      return;
> +    }
> +
> +  use_cache (tv_cache);
> +  __builtin_trap ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1b.c 
> b/gcc/testsuite/gcc.target/i386/pr121572-1b.c
> new file mode 100644
> index 00000000000..8a6089109f5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr121572-1b.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target *-*-linux* } } */
> +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu2" } */
> +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
> +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
> {^\t?\.} } } */
> +
> +/*
> +**bug:
> +**.LFB[0-9]+:
> +**...
> +**     lea[l|q]        tv_cache@TLSDESC\(%rip\), %[e|r]ax
> +**     movl    \$-1, %edi
> +**     call    \*tv_cache@TLSCALL\(%[e|r]ax\)
> +**     mov[l|q]        %[e|r]ax, %[e|r]bx
> +**     call    val@PLT
> +**...
> +*/
> +
> +#include "pr121572-1a.c"
> diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2a.c 
> b/gcc/testsuite/gcc.target/i386/pr121572-2a.c
> new file mode 100644
> index 00000000000..38b254657d3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr121572-2a.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile { target *-*-linux* } } */
> +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
> +
> +typedef enum
> +{
> +  MPFR_RNDN
> +} mpfr_rnd_t;
> +typedef int mpfr_t[1];
> +long __gmpfr_emin, mpfr_agm_expo_0;
> +_Thread_local long __gmpfr_emax;
> +int mpfr_agm_compare, mpfr_agm___trans_tmp_1;
> +mpfr_t mpfr_agm_u;
> +void mpfr_mul (int *, int, int, mpfr_rnd_t);
> +int
> +mpfr_agm (int op1)
> +{
> +  int op2 = 0;
> +  if (__builtin_expect (mpfr_agm_compare == 0, 0))
> +    return 0;
> +  if (mpfr_agm_compare > 0)
> +    {
> +      int t = op1;
> +      op2 = t;
> +    }
> +  mpfr_agm_expo_0 = __gmpfr_emax;
> +  for (;;)
> +    {
> +    retry:
> +      mpfr_mul (mpfr_agm_u, op1, op2, MPFR_RNDN);
> +      if (0)
> +        goto retry;
> +      if (__builtin_expect (mpfr_agm___trans_tmp_1, 1))
> +        break;
> +    }
> +  __gmpfr_emin = __gmpfr_emax;
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { 
> target { ! ia32 } } } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2b.c 
> b/gcc/testsuite/gcc.target/i386/pr121572-2b.c
> new file mode 100644
> index 00000000000..33d70024324
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr121572-2b.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile { target *-*-linux* } } */
> +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
> +
> +#include "pr121572-2a.c"
> +
> +/* { dg-final { scan-assembler-times "call\[ 
> \t\]\\*__gmpfr_emax@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
> --
> 2.50.1
>


-- 
BR,
Hongtao

Reply via email to