Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF

2012-03-05 Thread Uros Bizjak
On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote:

 X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
 by checking

        movq foo@gottpoff(%rip), %reg

 and

        addq foo@gottpoff(%rip), %reg

 It uses the REX prefix to avoid the last byte of the previous
 instruction.  With 32bit Pmode, we may not have the REX prefix and
 the last byte of the previous instruction may be an offset, which
 may look like a REX prefix.  IE-LE optimization will generate corrupted
 binary.  This patch makes sure we always output an REX pfrefix for
 UNSPEC_GOTNTPOFF.  OK for trunk?

Actually, linker has:

case R_X86_64_GOTTPOFF:
  /* Check transition from IE access model:
mov foo@gottpoff(%rip), %reg
add foo@gottpoff(%rip), %reg
   */

  /* Check REX prefix first.  */
  if (offset = 3  (offset + 4) = sec-size)
{
  val = bfd_get_8 (abfd, contents + offset - 3);
  if (val != 0x48  val != 0x4c)
{
  /* X32 may have 0x44 REX prefix or no REX prefix.  */
  if (ABI_64_P (abfd))
return FALSE;
}
}
  else
{
  /* X32 may not have any REX prefix.  */
  if (ABI_64_P (abfd))
return FALSE;
  if (offset  2 || (offset + 3)  sec-size)
return FALSE;
}

So, it should handle the case without REX just OK. If it doesn't, then
this is a bug in binutils.

Uros.


Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF

2012-03-05 Thread H.J. Lu
On Mon, Mar 5, 2012 at 7:31 AM, Uros Bizjak ubiz...@gmail.com wrote:
 On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote:

 X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
 by checking

        movq foo@gottpoff(%rip), %reg

 and

        addq foo@gottpoff(%rip), %reg

 It uses the REX prefix to avoid the last byte of the previous
 instruction.  With 32bit Pmode, we may not have the REX prefix and
 the last byte of the previous instruction may be an offset, which
 may look like a REX prefix.  IE-LE optimization will generate corrupted
 binary.  This patch makes sure we always output an REX pfrefix for
 UNSPEC_GOTNTPOFF.  OK for trunk?

 Actually, linker has:

    case R_X86_64_GOTTPOFF:
      /* Check transition from IE access model:
                mov foo@gottpoff(%rip), %reg
                add foo@gottpoff(%rip), %reg
       */

      /* Check REX prefix first.  */
      if (offset = 3  (offset + 4) = sec-size)
        {
          val = bfd_get_8 (abfd, contents + offset - 3);
          if (val != 0x48  val != 0x4c)
            {
              /* X32 may have 0x44 REX prefix or no REX prefix.  */
              if (ABI_64_P (abfd))
                return FALSE;
            }
        }
      else
        {
          /* X32 may not have any REX prefix.  */
          if (ABI_64_P (abfd))
            return FALSE;
          if (offset  2 || (offset + 3)  sec-size)
            return FALSE;
        }

 So, it should handle the case without REX just OK. If it doesn't, then
 this is a bug in binutils.


The last byte of the displacement in the previous instruction
may happen to look like a REX byte. In that case, linker
will overwrite the last byte of the previous instruction and
generate the wrong instruction sequence.

I need to update linker to enforce the REX byte check.

-- 
H.J.


Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF

2012-03-05 Thread Uros Bizjak
On Mon, Mar 5, 2012 at 6:03 PM, H.J. Lu hjl.to...@gmail.com wrote:

 X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
 by checking

        movq foo@gottpoff(%rip), %reg

 and

        addq foo@gottpoff(%rip), %reg

 It uses the REX prefix to avoid the last byte of the previous
 instruction.  With 32bit Pmode, we may not have the REX prefix and
 the last byte of the previous instruction may be an offset, which
 may look like a REX prefix.  IE-LE optimization will generate corrupted
 binary.  This patch makes sure we always output an REX pfrefix for
 UNSPEC_GOTNTPOFF.  OK for trunk?

 Actually, linker has:

    case R_X86_64_GOTTPOFF:
      /* Check transition from IE access model:
                mov foo@gottpoff(%rip), %reg
                add foo@gottpoff(%rip), %reg
       */

      /* Check REX prefix first.  */
      if (offset = 3  (offset + 4) = sec-size)
        {
          val = bfd_get_8 (abfd, contents + offset - 3);
          if (val != 0x48  val != 0x4c)
            {
              /* X32 may have 0x44 REX prefix or no REX prefix.  */
              if (ABI_64_P (abfd))
                return FALSE;
            }
        }
      else
        {
          /* X32 may not have any REX prefix.  */
          if (ABI_64_P (abfd))
            return FALSE;
          if (offset  2 || (offset + 3)  sec-size)
            return FALSE;
        }

 So, it should handle the case without REX just OK. If it doesn't, then
 this is a bug in binutils.


 The last byte of the displacement in the previous instruction
 may happen to look like a REX byte. In that case, linker
 will overwrite the last byte of the previous instruction and
 generate the wrong instruction sequence.

 I need to update linker to enforce the REX byte check.

One important observation: if we want to follow the x86_64 TLS spec
strictly, we have to use existing DImode patterns only. This also
means that we should NOT convert other TLS patterns to Pmode, since
they explicitly state movq and addq. If this is not the case, then we
need new TLS specification for X32.

Uros.

 --
 H.J.


Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF

2012-03-04 Thread Uros Bizjak
On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote:

 X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
 by checking

        movq foo@gottpoff(%rip), %reg

 and

        addq foo@gottpoff(%rip), %reg

 It uses the REX prefix to avoid the last byte of the previous
 instruction.  With 32bit Pmode, we may not have the REX prefix and
 the last byte of the previous instruction may be an offset, which
 may look like a REX prefix.  IE-LE optimization will generate corrupted
 binary.  This patch makes sure we always output an REX pfrefix for
 UNSPEC_GOTNTPOFF.  OK for trunk?

No, please implement this using UNSPEC in the same way as
tls_initial_exec_64_sun implements Sun linker quirk.

Uros.


Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF

2012-03-04 Thread H.J. Lu
On Sun, Mar 4, 2012 at 2:12 PM, Uros Bizjak ubiz...@gmail.com wrote:
 On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote:

 X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
 by checking

        movq foo@gottpoff(%rip), %reg

 and

        addq foo@gottpoff(%rip), %reg

 It uses the REX prefix to avoid the last byte of the previous
 instruction.  With 32bit Pmode, we may not have the REX prefix and
 the last byte of the previous instruction may be an offset, which
 may look like a REX prefix.  IE-LE optimization will generate corrupted
 binary.  This patch makes sure we always output an REX pfrefix for
 UNSPEC_GOTNTPOFF.  OK for trunk?

 No, please implement this using UNSPEC in the same way as
 tls_initial_exec_64_sun implements Sun linker quirk.


I am not sure how it can be done with UNSPEC cleanly.
Unlike the Sun linker issue, this is an instruction encoding
issue.  At instruction pattern level, there is no difference
between x32 and x86-64. I need to make sure that there is
always one and only one REX prefix with UNSPEC_GOTNTPOFF.
If REG is r8-r15, we shouldn't add another REX prefix.

-- 
H.J.


Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF

2012-03-04 Thread Uros Bizjak
On Sun, Mar 4, 2012 at 11:38 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Sun, Mar 4, 2012 at 2:12 PM, Uros Bizjak ubiz...@gmail.com wrote:
 On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote:

 X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
 by checking

        movq foo@gottpoff(%rip), %reg

 and

        addq foo@gottpoff(%rip), %reg

 It uses the REX prefix to avoid the last byte of the previous
 instruction.  With 32bit Pmode, we may not have the REX prefix and
 the last byte of the previous instruction may be an offset, which
 may look like a REX prefix.  IE-LE optimization will generate corrupted
 binary.  This patch makes sure we always output an REX pfrefix for
 UNSPEC_GOTNTPOFF.  OK for trunk?

 No, please implement this using UNSPEC in the same way as
 tls_initial_exec_64_sun implements Sun linker quirk.


 I am not sure how it can be done with UNSPEC cleanly.
 Unlike the Sun linker issue, this is an instruction encoding
 issue.  At instruction pattern level, there is no difference
 between x32 and x86-64. I need to make sure that there is
 always one and only one REX prefix with UNSPEC_GOTNTPOFF.
 If REG is r8-r15, we shouldn't add another REX prefix.

(define_insn tls_initial_exec_x32
  [(set (match_operand:SI 0 register_operand =r)
(unspec:SI
 [(match_operand:SI 1 tls_symbolic_operand )]
 UNSPEC_TLS_IE_X32))
   (clobber (reg:CC FLAGS_REG))]
  TARGET_X32
{
  if (!REX_INT_REG_P (operands[0])
fputs (\trex\n, asm_out_file);
  output_asm_insn
(mov{l}\t{%%fs:0, %0|%0, QWORD PTR fs:0}, operands);
  if (!REX_INT_REG_P (operands[0])
fputs (\trex\n, asm_out_file);
  return add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]};
}
  [(set_attr type multi)])

rex or rex64 prefix, whatever you wish.

Then generate this pattern from legitimize_tls_address,
TLS_MODEL_INITIAL_EXEC, see TARGET_SUN_TLS part.

Uros.


Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF

2012-03-04 Thread H.J. Lu
On Sun, Mar 4, 2012 at 2:52 PM, Uros Bizjak ubiz...@gmail.com wrote:
 On Sun, Mar 4, 2012 at 11:38 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Sun, Mar 4, 2012 at 2:12 PM, Uros Bizjak ubiz...@gmail.com wrote:
 On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote:

 X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
 by checking

        movq foo@gottpoff(%rip), %reg

 and

        addq foo@gottpoff(%rip), %reg

 It uses the REX prefix to avoid the last byte of the previous
 instruction.  With 32bit Pmode, we may not have the REX prefix and
 the last byte of the previous instruction may be an offset, which
 may look like a REX prefix.  IE-LE optimization will generate corrupted
 binary.  This patch makes sure we always output an REX pfrefix for
 UNSPEC_GOTNTPOFF.  OK for trunk?

 No, please implement this using UNSPEC in the same way as
 tls_initial_exec_64_sun implements Sun linker quirk.


 I am not sure how it can be done with UNSPEC cleanly.
 Unlike the Sun linker issue, this is an instruction encoding
 issue.  At instruction pattern level, there is no difference
 between x32 and x86-64. I need to make sure that there is
 always one and only one REX prefix with UNSPEC_GOTNTPOFF.
 If REG is r8-r15, we shouldn't add another REX prefix.

 (define_insn tls_initial_exec_x32
  [(set (match_operand:SI 0 register_operand =r)
        (unspec:SI
         [(match_operand:SI 1 tls_symbolic_operand )]
         UNSPEC_TLS_IE_X32))
   (clobber (reg:CC FLAGS_REG))]
  TARGET_X32
 {
  if (!REX_INT_REG_P (operands[0])
    fputs (\trex\n, asm_out_file);
  output_asm_insn
    (mov{l}\t{%%fs:0, %0|%0, QWORD PTR fs:0}, operands);
  if (!REX_INT_REG_P (operands[0])
    fputs (\trex\n, asm_out_file);
  return add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]};
 }
  [(set_attr type multi)])

 rex or rex64 prefix, whatever you wish.


Will it introduce bugs? The normal code look like

  off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
  off = gen_rtx_CONST (Pmode, off);
  if (pic)
off = gen_rtx_PLUS (Pmode, pic, off);
  off = gen_const_mem (Pmode, off);
  set_mem_alias_set (off, ix86_GOT_alias_set ());

  if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
{
  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
  off = force_reg (Pmode, off);
  return gen_rtx_PLUS (Pmode, base, off);
}

There is a call to

 set_mem_alias_set (off, ix86_GOT_alias_set ());

-- 
H.J.


[PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF

2012-03-02 Thread H.J. Lu
X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
by checking

movq foo@gottpoff(%rip), %reg

and

addq foo@gottpoff(%rip), %reg

It uses the REX prefix to avoid the last byte of the previous
instruction.  With 32bit Pmode, we may not have the REX prefix and
the last byte of the previous instruction may be an offset, which
may look like a REX prefix.  IE-LE optimization will generate corrupted
binary.  This patch makes sure we always output an REX pfrefix for
UNSPEC_GOTNTPOFF.  OK for trunk?

Thanks.

H.J.

2012-03-02  H.J. Lu  hongjiu...@intel.com

* config/i386/i386-protos.h (ix86_output_rex_prefix_p): New.
* config/i386/i386.c (ix86_output_rex_prefix_p): Likewise.

* config/i386/i386.md (*movsi_internal): Output REX prefix if
needed.
(*addmode_1): Likewise.

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 630112f..a9b9d3f 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -277,6 +277,8 @@ extern void x86_output_aligned_bss (FILE *, tree, const 
char *,
 extern void x86_elf_aligned_common (FILE *, const char *,
unsigned HOST_WIDE_INT, int);
 
+extern bool ix86_output_rex_prefix_p (rtx, rtx);
+
 #ifdef RTX_CODE
 extern void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
  enum rtx_code *, enum rtx_code *);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ac9c714..2cbfb64 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14670,6 +14670,29 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x)
 
   return true;
 }
+
+/* Since x64-64 linker IE-LE transition requires a REX prefix, we
+   output a REX prefix if there isn't one.  */
+
+bool
+ix86_output_rex_prefix_p (rtx dest, rtx op)
+{
+  if (!TARGET_X32
+  || GET_MODE (dest) != SImode
+  || REX_INT_REG_P (dest)
+  || !MEM_P (op))
+return false;
+
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != CONST)
+return false;
+
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != UNSPEC)
+return false;
+
+  return XINT (op, 1) == UNSPEC_GOTNTPOFF;
+}
 
 /* Split one or more double-mode RTL references into pairs of half-mode
references.  The RTL can be REG, offsettable MEM, integer constant, or
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8fc7918..35b2673 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2211,7 +2211,13 @@
   if (ix86_use_lea_for_mov (insn, operands))
return lea{l}\t{%a1, %0|%0, %a1};
   else
-   return mov{l}\t{%1, %0|%0, %1};
+   {
+ /* Output REX prefix if needed.  */
+ if (ix86_output_rex_prefix_p (operands[0], operands[1]))
+   return rex mov{l}\t{%1, %0|%0, %1};
+ else
+   return mov{l}\t{%1, %0|%0, %1};
+   }
 }
 }
   [(set (attr type)
@@ -5540,7 +5546,11 @@
   if (x86_maybe_negate_const_int (operands[2], MODEmode))
 return sub{imodesuffix}\t{%2, %0|%0, %2};
 
-  return add{imodesuffix}\t{%2, %0|%0, %2};
+  /* Output REX prefix if needed.  */
+  if (ix86_output_rex_prefix_p (operands[0], operands[2]))
+   return rex add{imodesuffix}\t{%2, %0|%0, %2};
+  else
+   return add{imodesuffix}\t{%2, %0|%0, %2};
 }
 }
   [(set (attr type)
-- 
1.7.6.5