Hi,

Zero-extend UNSPEC_TP leads problems in ix86_decompose_address. In 64bit
mode for both x32 and x86-64, thread pointer is an address stored in %fs,
which is a 64bit segment register.  Since there is no direct access to %fs
from user space, OS provides a system call to write/read %fs:

        int arch_prctl(int code, unsigned long addr);
        int arch_prctl(int code, unsigned long *addr);

       ARCH_SET_FS
              Set the 64-bit base for the FS register to addr.

       ARCH_GET_FS
              Return the 64-bit base value for the FS register of the
              current thread in the unsigned long pointed to by addr.

To avoid a call of arch_prctl (ARCH_GET_FS, &addr) to read %fs, OS arrangs
%fs points to a struct:

typedef struct
{
  void *tcb;            /* Pointer to the TCB.  Not necessarily the
                           thread descriptor used by libpthread.  */
  ...
}

OS sets up tcb == %fs. Then we can use

(define_insn "*load_tp_<mode>"
  [(set (match_operand:P 0 "register_operand" "=r") 
        (unspec:P [(const_int 0)] UNSPEC_TP))]
  "!TARGET_X32"
  "mov{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}"
  [(set_attr "type" "imov")
   (set_attr "modrm" "0")
   (set_attr "length" "7")
   (set_attr "memory" "load")
   (set_attr "imm_disp" "false")])

instead of

void *
get_tp (void)
{
  unsigned long long addr;
  arch_prctl (ARCH_GET_FS, &addr);
  return (void *) addr;
}

Since x32 address space is 32bit, the upper 32bits of %fs are always zero
and we can still use a pointer to store %fs value.  To load TP into SImode
or DImode register we can use

(define_insn "*load_tp_x32_<mode>"
  [(set (match_operand:SWI48x 0 "register_operand" "=r") 
        (unspec:SWI48x [(const_int 0)] UNSPEC_TP))]
  "TARGET_X32"
  "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
  [(set_attr "type" "imov")
   (set_attr "modrm" "0")
   (set_attr "length" "7")
   (set_attr "memory" "load")
   (set_attr "imm_disp" "false")])

instead of get_tp calls

void *
get_tp_si (void)
{
  unsigned long long addr;
  arch_prctl (ARCH_GET_FS, &addr);
  return (void *) (unsigned long) addr;
}

long long
get_tp_di (void)
{
  unsigned long long addr;
  arch_prctl (ARCH_GET_FS, &addr);
  return addr;
}


H.J.
---
gcc/

2012-03-24  H.J. Lu  <hongjiu...@intel.com>

        PR target/52698
        * config/i386/i386.c (ix86_decompose_address): Remove
        <case ZERO_EXTEND> for UNSPEC_TP references.
        (legitimize_pic_address): Load UNSPEC_TP into tp_mode register
        directly.

        * config/i386/i386.md (*load_tp_x32): Removed.
        (*load_tp_x32_zext): Likewise.
        (*load_tp_x32_<mode>): New.

gcc/testsuite/

2012-03-24  H.J. Lu  <hongjiu...@intel.com>

        PR target/52698
        * gcc.target/i386/pr52698-1.c: New.
        * gcc.target/i386/pr52698-2.c: Likewise.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a21f2da..14c4056 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -11514,12 +11514,6 @@ ix86_decompose_address (rtx addr, struct ix86_address 
*out)
              scale = 1 << scale;
              break;
 
-           case ZERO_EXTEND:
-             op = XEXP (op, 0);
-             if (GET_CODE (op) != UNSPEC)
-               return 0;
-             /* FALLTHRU */
-
            case UNSPEC:
              if (XINT (op, 1) == UNSPEC_TP
                  && TARGET_TLS_DIRECT_SEG_REFS
@@ -12491,15 +12485,7 @@ legitimize_pic_address (rtx orig, rtx reg)
 static rtx
 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
 {
-  rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
-
-  if (GET_MODE (tp) != tp_mode)
-    {
-      gcc_assert (GET_MODE (tp) == SImode);
-      gcc_assert (tp_mode == DImode);
-
-      tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
-    }
+  rtx tp = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
 
   if (to_reg)
     tp = copy_to_mode_reg (tp_mode, tp);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 2d20a52..ac6124e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12748,20 +12748,9 @@
 (define_mode_attr tp_seg [(SI "gs") (DI "fs")])
 
 ;; Load and add the thread base pointer from %<tp_seg>:0.
-(define_insn "*load_tp_x32"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI [(const_int 0)] UNSPEC_TP))]
-  "TARGET_X32"
-  "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0")
-   (set_attr "length" "7")
-   (set_attr "memory" "load")
-   (set_attr "imm_disp" "false")])
-
-(define_insn "*load_tp_x32_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))]
+(define_insn "*load_tp_x32_<mode>"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r")
+       (unspec:SWI48x [(const_int 0)] UNSPEC_TP))]
   "TARGET_X32"
   "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
   [(set_attr "type" "imov")
diff --git a/gcc/testsuite/gcc.target/i386/pr52698-1.c 
b/gcc/testsuite/gcc.target/i386/pr52698-1.c
new file mode 100644
index 0000000..0395521
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr52698-1.c
@@ -0,0 +1,18 @@
+/* PR target/52698 */
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-mx32 -O2 -maddress-mode=short" } */
+
+extern void abort (void);
+static __thread unsigned char foo [32]
+  __attribute__ ((tls_model ("initial-exec"), aligned (sizeof (void *))));
+void
+test2 (void)
+{
+  unsigned int s;
+  for (s = 0; s < sizeof (foo); ++s)
+    {
+      if (foo [s] != s)
+       abort ();
+      foo [s] = sizeof (foo) - s;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr52698-2.c 
b/gcc/testsuite/gcc.target/i386/pr52698-2.c
new file mode 100644
index 0000000..8ad470a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr52698-2.c
@@ -0,0 +1,18 @@
+/* PR target/52698 */
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-mx32 -O2 -maddress-mode=long" } */
+
+extern void abort (void);
+static __thread unsigned char foo [32]
+  __attribute__ ((tls_model ("initial-exec"), aligned (sizeof (void *))));
+void
+test2 (void)
+{
+  unsigned int s;
+  for (s = 0; s < sizeof (foo); ++s)
+    {
+      if (foo [s] != s)
+       abort ();
+      foo [s] = sizeof (foo) - s;
+    }
+}

Reply via email to