We can't place a TLS call before a conditional jump in a basic block like

(code_label 13 11 14 4 2 (nil) [1 uses])
(note 14 13 16 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
(jump_insn 16 14 17 4 (set (pc)
        (if_then_else (le (reg:CCNO 17 flags)
                (const_int 0 [0]))
            (label_ref 27)
            (pc))) "x.c":10:21 discrim 1 1462 {*jcc}
     (expr_list:REG_DEAD (reg:CCNO 17 flags)
        (int_list:REG_BR_PROB 628353713 (nil)))
 -> 27)

since the TLS call will clobber flags register nor place a TLS call in a
basic block if any live caller-saved registers aren't dead at the end of
the basic block:

;; live  in      6 [bp] 7 [sp] 16 [argp] 17 [flags] 19 [frame] 104
;; live  gen     0 [ax] 102 106 108 116 117 118 120
;; live  kill    5 [di]

Instead, we should place such call before all register setting basic
blocks which dominate the current basic block.

Keep track the replaced GNU and GNU2 TLS instructions.  Use these info to
place the __tls_get_addr call and mark FLAGS register as dead.

gcc/

        PR target/121572
        * config/i386/i386-features.cc (replace_tls_call): Add a bitmap
        argument and put the updated TLS instruction in the bitmap.
        (ix86_get_dominator_for_reg): New.
        (ix86_place_single_tls_call): Add 2 bitmap arguments for updated
        GNU and GNU2 TLS instructions.  Add the live flag register to the
        bitmap.  Insert the __tls_get_addr call before INSN if it replaces
        a __tls_get_addr call.  Mark FLAGS register as dead if INSN
        replaced the GNU2 TLS instruction.  Clear the live register bitmap
        only for hard register.  If there is a conditional jump in the
        basic block or any live caller-saved registers aren't dead at the
        end of the basic block, get the basic block which dominates all
        basic blocks which set the live registers.

gcc/testsuite/

        PR target/121572
        * gcc.target/i386/pr121572-1a.c: New test.
        * gcc.target/i386/pr121572-1b.c: Likewise.
        * gcc.target/i386/pr121572-2a.c: Likewise.
        * gcc.target/i386/pr121572-2b.c: Likewise.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386-features.cc            | 136 ++++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr121572-1a.c |  41 ++++++
 gcc/testsuite/gcc.target/i386/pr121572-1b.c |  18 +++
 gcc/testsuite/gcc.target/i386/pr121572-2a.c |  39 ++++++
 gcc/testsuite/gcc.target/i386/pr121572-2b.c |   6 +
 5 files changed, 215 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2b.c

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index f0bdc5c1880..903f2b0b478 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3684,10 +3684,12 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
   return op;
 }
 
-/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC.  */
+/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
+   put the updated instruction in UPDATED_TLS_INSNS.  */
 
 static void
-replace_tls_call (rtx src, auto_bitmap &tls_call_insns)
+replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
+                 auto_bitmap &updated_tls_insns)
 {
   bitmap_iterator bi;
   unsigned int id;
@@ -3716,6 +3718,9 @@ replace_tls_call (rtx src, auto_bitmap &tls_call_insns)
       if (recog_memoized (set_insn) < 0)
        gcc_unreachable ();
 
+      /* Put SET_INSN in UPDATED_TLS_INSNS.  */
+      bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
+
       if (dump_file)
        {
          fprintf (dump_file, "\nReplace:\n\n");
@@ -3732,15 +3737,48 @@ replace_tls_call (rtx src, auto_bitmap &tls_call_insns)
     }
 }
 
+/* Return the basic block which dominates all basic blocks which set
+   hard register REGNO used in basic block BB.  */
+
+static basic_block
+ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
+{
+  basic_block set_bb;
+  auto_bitmap set_bbs;
+
+  /* Get all BBs which set REGNO and dominate the current BB from all
+     DEFs of REGNO.  */
+  for (df_ref def = DF_REG_DEF_CHAIN (regno);
+       def;
+       def = DF_REF_NEXT_REG (def))
+    if (!DF_REF_IS_ARTIFICIAL (def)
+       && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
+       && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
+      {
+       set_bb = DF_REF_BB (def);
+       if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
+         bitmap_set_bit (set_bbs, set_bb->index);
+      }
+
+  bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
+  return bb;
+}
+
 /* Generate a TLS call of KIND with VAL and copy the call result to DEST,
    at entry of the nearest dominator for basic block map BBS, which is in
    the fake loop that contains the whole function, so that there is only
-   a single TLS CALL of KIND with VAL in the whole function.  If
-   TLSDESC_SET isn't nullptr, insert it before the TLS call.  */
+   a single TLS CALL of KIND with VAL in the whole function.
+   UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
+   instructions.  UPDATED_GNU2_TLS_INSNS contains instructions which
+   replace the GNU2 TLS instructions.  If TLSDESC_SET isn't nullptr,
+   insert it before the TLS call.  */
 
 static void
 ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
-                           bitmap bbs, rtx tlsdesc_set = nullptr)
+                           auto_bitmap &bbs,
+                           auto_bitmap &updated_gnu_tls_insns,
+                           auto_bitmap &updated_gnu2_tls_insns,
+                           rtx tlsdesc_set = nullptr)
 {
   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
   while (bb->loop_father->latch
@@ -3748,6 +3786,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
     bb = get_immediate_dominator (CDI_DOMINATORS,
                                  bb->loop_father->header);
 
+place_tls_call:
   rtx_insn *insn = BB_HEAD (bb);
   while (insn && !NONDEBUG_INSN_P (insn))
     {
@@ -3824,7 +3863,8 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
   auto_bitmap live_caller_saved_regs;
   bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
 
-  bool flags_live_p = bitmap_bit_p (in, FLAGS_REG);
+  if (bitmap_bit_p (in, FLAGS_REG))
+    bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
 
   unsigned int i;
 
@@ -3845,13 +3885,46 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
          if (!NONDEBUG_INSN_P (insn))
            continue;
 
+         if (JUMP_P (insn))
+           {
+             /* This must be a conditional jump.  */
+             rtx label = JUMP_LABEL (insn);
+             if (label == nullptr
+                 || ANY_RETURN_P (label)
+                 || !(LABEL_P (label) || SYMBOL_REF_P (label)))
+               gcc_unreachable ();
+
+             /* Place the call before all FLAGS_REG setting BBs since
+                we can't place a call before nor after a conditional
+                jump.  */
+             bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
+             goto place_tls_call;
+           }
+
          /* Check if FLAGS register is live.  */
          set = single_set (insn);
          if (set)
            {
              rtx dest = SET_DEST (set);
-             if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
-               flags_live_p = true;
+             if (REG_P (dest))
+               {
+                 if (bitmap_bit_p (updated_gnu_tls_insns,
+                                   INSN_UID (insn)))
+                   {
+                   /* Insert the __tls_get_addr call before INSN which
+                      replaces a __tls_get_addr call.  */
+                     before = insn;
+                     goto insert_before;
+                   }
+                 if (bitmap_bit_p (updated_gnu2_tls_insns,
+                                   INSN_UID (insn)))
+                   /* Mark FLAGS register as dead since FLAGS register
+                      would be clobbered by the GNU2 TLS instruction.  */
+                   bitmap_clear_bit (live_caller_saved_regs,
+                                     FLAGS_REG);
+                 else if (REGNO (dest) == FLAGS_REG)
+                   bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
+               }
            }
 
          rtx link;
@@ -3863,29 +3936,30 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
                for (i = REGNO (XEXP (link, 0));
                     i < END_REGNO (XEXP (link, 0));
                     i++)
-                 bitmap_clear_bit (live_caller_saved_regs, i);
-
-               /* Check if FLAGS register is dead.  */
-               if (REGNO (XEXP (link, 0)) == FLAGS_REG)
-                 flags_live_p = false;
+                 if (i < FIRST_PSEUDO_REGISTER)
+                   bitmap_clear_bit (live_caller_saved_regs, i);
 
                if (bitmap_empty_p (live_caller_saved_regs))
                  {
-                   /* All live caller-saved registers are dead after
-                      this instruction.  Since TLS instructions
-                      clobber FLAGS register, it must be dead where
-                      the TLS will be inserted after.  */
-                   if (flags_live_p)
-                     gcc_unreachable ();
                    after = insn;
                    goto insert_after;
                  }
              }
        }
 
-      /* All live caller-saved registers should be dead at the end
-        of this basic block.  */
-      gcc_unreachable ();
+      /* If any live caller-saved registers aren't dead at the end
+        of this basic block, get the basic block which dominates all
+        basic blocks which set the remaining live registers.  */
+      auto_bitmap set_bbs;
+      bitmap_iterator bi;
+      unsigned int id;
+      EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
+       {
+         basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
+         bitmap_set_bit (set_bbs, set_bb->index);
+       }
+      bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
+      goto place_tls_call;
     }
 
   /* Emit the TLS CALL insn.  */
@@ -3895,7 +3969,10 @@ insert_after:
       tls_insn = emit_insn_after (tls, after);
     }
   else
-    tls_insn = emit_insn_before (tls, before);
+    {
+insert_before:
+      tls_insn = emit_insn_before (tls, before);
+    }
 
   rtx_insn *tlsdesc_insn = nullptr;
   if (tlsdesc_set)
@@ -4213,6 +4290,8 @@ pass_x86_cse::x86_cse (void)
   basic_block bb;
   rtx_insn *insn;
   unsigned int i;
+  auto_bitmap updated_gnu_tls_insns;
+  auto_bitmap updated_gnu2_tls_insns;
 
   df_set_flags (DF_DEFER_INSN_RESCAN);
 
@@ -4333,7 +4412,10 @@ pass_x86_cse::x86_cse (void)
          case X86_CSE_TLS_LD_BASE:
          case X86_CSE_TLSDESC:
            broadcast_reg = gen_reg_rtx (load->mode);
-           replace_tls_call (broadcast_reg, load->insns);
+           replace_tls_call (broadcast_reg, load->insns,
+                             (load->kind == X86_CSE_TLSDESC
+                              ? updated_gnu2_tls_insns
+                              : updated_gnu_tls_insns));
            load->broadcast_reg = broadcast_reg;
            break;
 
@@ -4399,6 +4481,8 @@ pass_x86_cse::x86_cse (void)
                                              load->val,
                                              load->kind,
                                              load->bbs,
+                                             updated_gnu_tls_insns,
+                                             updated_gnu2_tls_insns,
                                              PATTERN (load->def_insn));
                  break;
                case X86_CSE_VEC_DUP:
@@ -4442,7 +4526,9 @@ pass_x86_cse::x86_cse (void)
                  ix86_place_single_tls_call (load->broadcast_reg,
                                              load->val,
                                              load->kind,
-                                             load->bbs);
+                                             load->bbs,
+                                             updated_gnu_tls_insns,
+                                             updated_gnu2_tls_insns);
                  break;
                case X86_CSE_CONST0_VECTOR:
                case X86_CSE_CONSTM1_VECTOR:
diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1a.c 
b/gcc/testsuite/gcc.target/i386/pr121572-1a.c
new file mode 100644
index 00000000000..270d8ff5cb6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121572-1a.c
@@ -0,0 +1,41 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
{^\t?\.} } } */
+
+/*
+**bug:
+**.LFB[0-9]+:
+**...
+**     leaq    tv_cache@tlsld\(%rip\), %rdi
+**     call    __tls_get_addr@PLT
+**     movl    \$-1, %edi
+**     mov[l|q]        %[e|r]ax, %[e|r]bx
+**     call    val@PLT
+**...
+*/
+
+extern __thread int tv_cache __attribute__ ((visibility ("hidden")));
+extern void use_cache (int);
+extern int val (int v);
+
+__attribute__ ((optimize (2)))
+void
+bug (void)
+{
+  int compared = val (-1);
+
+  if (compared == 0 || (compared > 0 && val (2) == 0))
+    {
+      __builtin_trap ();
+    }
+
+  if (compared < 0)
+    {
+      use_cache (tv_cache);
+      return;
+    }
+
+  use_cache (tv_cache);
+  __builtin_trap ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1b.c 
b/gcc/testsuite/gcc.target/i386/pr121572-1b.c
new file mode 100644
index 00000000000..8a6089109f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121572-1b.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
{^\t?\.} } } */
+
+/*
+**bug:
+**.LFB[0-9]+:
+**...
+**     lea[l|q]        tv_cache@TLSDESC\(%rip\), %[e|r]ax
+**     movl    \$-1, %edi
+**     call    \*tv_cache@TLSCALL\(%[e|r]ax\)
+**     mov[l|q]        %[e|r]ax, %[e|r]bx
+**     call    val@PLT
+**...
+*/
+
+#include "pr121572-1a.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2a.c 
b/gcc/testsuite/gcc.target/i386/pr121572-2a.c
new file mode 100644
index 00000000000..38b254657d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121572-2a.c
@@ -0,0 +1,39 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+
+typedef enum
+{
+  MPFR_RNDN
+} mpfr_rnd_t;
+typedef int mpfr_t[1];
+long __gmpfr_emin, mpfr_agm_expo_0;
+_Thread_local long __gmpfr_emax;
+int mpfr_agm_compare, mpfr_agm___trans_tmp_1;
+mpfr_t mpfr_agm_u;
+void mpfr_mul (int *, int, int, mpfr_rnd_t);
+int
+mpfr_agm (int op1)
+{
+  int op2 = 0;
+  if (__builtin_expect (mpfr_agm_compare == 0, 0))
+    return 0;
+  if (mpfr_agm_compare > 0)
+    {
+      int t = op1;
+      op2 = t;
+    }
+  mpfr_agm_expo_0 = __gmpfr_emax;
+  for (;;)
+    {
+    retry:
+      mpfr_mul (mpfr_agm_u, op1, op2, MPFR_RNDN);
+      if (0)
+        goto retry;
+      if (__builtin_expect (mpfr_agm___trans_tmp_1, 1))
+        break;
+    }
+  __gmpfr_emin = __gmpfr_emax;
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { 
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2b.c 
b/gcc/testsuite/gcc.target/i386/pr121572-2b.c
new file mode 100644
index 00000000000..33d70024324
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121572-2b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+
+#include "pr121572-2a.c"
+
+/* { dg-final { scan-assembler-times "call\[ 
\t\]\\*__gmpfr_emax@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
-- 
2.50.1

Reply via email to