Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF
On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote: X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC by checking movq foo@gottpoff(%rip), %reg and addq foo@gottpoff(%rip), %reg It uses the REX prefix to avoid the last byte of the previous instruction. With 32bit Pmode, we may not have the REX prefix and the last byte of the previous instruction may be an offset, which may look like a REX prefix. IE-LE optimization will generate corrupted binary. This patch makes sure we always output an REX pfrefix for UNSPEC_GOTNTPOFF. OK for trunk? Actually, linker has: case R_X86_64_GOTTPOFF: /* Check transition from IE access model: mov foo@gottpoff(%rip), %reg add foo@gottpoff(%rip), %reg */ /* Check REX prefix first. */ if (offset = 3 (offset + 4) = sec-size) { val = bfd_get_8 (abfd, contents + offset - 3); if (val != 0x48 val != 0x4c) { /* X32 may have 0x44 REX prefix or no REX prefix. */ if (ABI_64_P (abfd)) return FALSE; } } else { /* X32 may not have any REX prefix. */ if (ABI_64_P (abfd)) return FALSE; if (offset 2 || (offset + 3) sec-size) return FALSE; } So, it should handle the case without REX just OK. If it doesn't, then this is a bug in binutils. Uros.
Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF
On Mon, Mar 5, 2012 at 7:31 AM, Uros Bizjak ubiz...@gmail.com wrote: On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote: X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC by checking movq foo@gottpoff(%rip), %reg and addq foo@gottpoff(%rip), %reg It uses the REX prefix to avoid the last byte of the previous instruction. With 32bit Pmode, we may not have the REX prefix and the last byte of the previous instruction may be an offset, which may look like a REX prefix. IE-LE optimization will generate corrupted binary. This patch makes sure we always output an REX pfrefix for UNSPEC_GOTNTPOFF. OK for trunk? Actually, linker has: case R_X86_64_GOTTPOFF: /* Check transition from IE access model: mov foo@gottpoff(%rip), %reg add foo@gottpoff(%rip), %reg */ /* Check REX prefix first. */ if (offset = 3 (offset + 4) = sec-size) { val = bfd_get_8 (abfd, contents + offset - 3); if (val != 0x48 val != 0x4c) { /* X32 may have 0x44 REX prefix or no REX prefix. */ if (ABI_64_P (abfd)) return FALSE; } } else { /* X32 may not have any REX prefix. */ if (ABI_64_P (abfd)) return FALSE; if (offset 2 || (offset + 3) sec-size) return FALSE; } So, it should handle the case without REX just OK. If it doesn't, then this is a bug in binutils. The last byte of the displacement in the previous instruction may happen to look like a REX byte. In that case, linker will overwrite the last byte of the previous instruction and generate the wrong instruction sequence. I need to update linker to enforce the REX byte check. -- H.J.
Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF
On Mon, Mar 5, 2012 at 6:03 PM, H.J. Lu hjl.to...@gmail.com wrote: X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC by checking movq foo@gottpoff(%rip), %reg and addq foo@gottpoff(%rip), %reg It uses the REX prefix to avoid the last byte of the previous instruction. With 32bit Pmode, we may not have the REX prefix and the last byte of the previous instruction may be an offset, which may look like a REX prefix. IE-LE optimization will generate corrupted binary. This patch makes sure we always output an REX pfrefix for UNSPEC_GOTNTPOFF. OK for trunk? Actually, linker has: case R_X86_64_GOTTPOFF: /* Check transition from IE access model: mov foo@gottpoff(%rip), %reg add foo@gottpoff(%rip), %reg */ /* Check REX prefix first. */ if (offset = 3 (offset + 4) = sec-size) { val = bfd_get_8 (abfd, contents + offset - 3); if (val != 0x48 val != 0x4c) { /* X32 may have 0x44 REX prefix or no REX prefix. */ if (ABI_64_P (abfd)) return FALSE; } } else { /* X32 may not have any REX prefix. */ if (ABI_64_P (abfd)) return FALSE; if (offset 2 || (offset + 3) sec-size) return FALSE; } So, it should handle the case without REX just OK. If it doesn't, then this is a bug in binutils. The last byte of the displacement in the previous instruction may happen to look like a REX byte. In that case, linker will overwrite the last byte of the previous instruction and generate the wrong instruction sequence. I need to update linker to enforce the REX byte check. One important observation: if we want to follow the x86_64 TLS spec strictly, we have to use existing DImode patterns only. This also means that we should NOT convert other TLS patterns to Pmode, since they explicitly state movq and addq. If this is not the case, then we need new TLS specification for X32. Uros. -- H.J.
Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF
On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote: X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC by checking movq foo@gottpoff(%rip), %reg and addq foo@gottpoff(%rip), %reg It uses the REX prefix to avoid the last byte of the previous instruction. With 32bit Pmode, we may not have the REX prefix and the last byte of the previous instruction may be an offset, which may look like a REX prefix. IE-LE optimization will generate corrupted binary. This patch makes sure we always output an REX pfrefix for UNSPEC_GOTNTPOFF. OK for trunk? No, please implement this using UNSPEC in the same way as tls_initial_exec_64_sun implements Sun linker quirk. Uros.
Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF
On Sun, Mar 4, 2012 at 2:12 PM, Uros Bizjak ubiz...@gmail.com wrote: On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote: X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC by checking movq foo@gottpoff(%rip), %reg and addq foo@gottpoff(%rip), %reg It uses the REX prefix to avoid the last byte of the previous instruction. With 32bit Pmode, we may not have the REX prefix and the last byte of the previous instruction may be an offset, which may look like a REX prefix. IE-LE optimization will generate corrupted binary. This patch makes sure we always output an REX pfrefix for UNSPEC_GOTNTPOFF. OK for trunk? No, please implement this using UNSPEC in the same way as tls_initial_exec_64_sun implements Sun linker quirk. I am not sure how it can be done with UNSPEC cleanly. Unlike the Sun linker issue, this is an instruction encoding issue. At instruction pattern level, there is no difference between x32 and x86-64. I need to make sure that there is always one and only one REX prefix with UNSPEC_GOTNTPOFF. If REG is r8-r15, we shouldn't add another REX prefix. -- H.J.
Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF
On Sun, Mar 4, 2012 at 11:38 PM, H.J. Lu hjl.to...@gmail.com wrote: On Sun, Mar 4, 2012 at 2:12 PM, Uros Bizjak ubiz...@gmail.com wrote: On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote: X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC by checking movq foo@gottpoff(%rip), %reg and addq foo@gottpoff(%rip), %reg It uses the REX prefix to avoid the last byte of the previous instruction. With 32bit Pmode, we may not have the REX prefix and the last byte of the previous instruction may be an offset, which may look like a REX prefix. IE-LE optimization will generate corrupted binary. This patch makes sure we always output an REX pfrefix for UNSPEC_GOTNTPOFF. OK for trunk? No, please implement this using UNSPEC in the same way as tls_initial_exec_64_sun implements Sun linker quirk. I am not sure how it can be done with UNSPEC cleanly. Unlike the Sun linker issue, this is an instruction encoding issue. At instruction pattern level, there is no difference between x32 and x86-64. I need to make sure that there is always one and only one REX prefix with UNSPEC_GOTNTPOFF. If REG is r8-r15, we shouldn't add another REX prefix. (define_insn tls_initial_exec_x32 [(set (match_operand:SI 0 register_operand =r) (unspec:SI [(match_operand:SI 1 tls_symbolic_operand )] UNSPEC_TLS_IE_X32)) (clobber (reg:CC FLAGS_REG))] TARGET_X32 { if (!REX_INT_REG_P (operands[0]) fputs (\trex\n, asm_out_file); output_asm_insn (mov{l}\t{%%fs:0, %0|%0, QWORD PTR fs:0}, operands); if (!REX_INT_REG_P (operands[0]) fputs (\trex\n, asm_out_file); return add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}; } [(set_attr type multi)]) rex or rex64 prefix, whatever you wish. Then generate this pattern from legitimize_tls_address, TLS_MODEL_INITIAL_EXEC, see TARGET_SUN_TLS part. Uros.
Re: [PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF
On Sun, Mar 4, 2012 at 2:52 PM, Uros Bizjak ubiz...@gmail.com wrote: On Sun, Mar 4, 2012 at 11:38 PM, H.J. Lu hjl.to...@gmail.com wrote: On Sun, Mar 4, 2012 at 2:12 PM, Uros Bizjak ubiz...@gmail.com wrote: On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu hongjiu...@intel.com wrote: X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC by checking movq foo@gottpoff(%rip), %reg and addq foo@gottpoff(%rip), %reg It uses the REX prefix to avoid the last byte of the previous instruction. With 32bit Pmode, we may not have the REX prefix and the last byte of the previous instruction may be an offset, which may look like a REX prefix. IE-LE optimization will generate corrupted binary. This patch makes sure we always output an REX pfrefix for UNSPEC_GOTNTPOFF. OK for trunk? No, please implement this using UNSPEC in the same way as tls_initial_exec_64_sun implements Sun linker quirk. I am not sure how it can be done with UNSPEC cleanly. Unlike the Sun linker issue, this is an instruction encoding issue. At instruction pattern level, there is no difference between x32 and x86-64. I need to make sure that there is always one and only one REX prefix with UNSPEC_GOTNTPOFF. If REG is r8-r15, we shouldn't add another REX prefix. (define_insn tls_initial_exec_x32 [(set (match_operand:SI 0 register_operand =r) (unspec:SI [(match_operand:SI 1 tls_symbolic_operand )] UNSPEC_TLS_IE_X32)) (clobber (reg:CC FLAGS_REG))] TARGET_X32 { if (!REX_INT_REG_P (operands[0]) fputs (\trex\n, asm_out_file); output_asm_insn (mov{l}\t{%%fs:0, %0|%0, QWORD PTR fs:0}, operands); if (!REX_INT_REG_P (operands[0]) fputs (\trex\n, asm_out_file); return add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}; } [(set_attr type multi)]) rex or rex64 prefix, whatever you wish. Will it introduce bugs? The normal code look like off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); off = gen_rtx_CONST (Pmode, off); if (pic) off = gen_rtx_PLUS (Pmode, pic, off); off = gen_const_mem (Pmode, off); set_mem_alias_set (off, ix86_GOT_alias_set ()); if (TARGET_64BIT || TARGET_ANY_GNU_TLS) { base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); off = force_reg (Pmode, off); return gen_rtx_PLUS (Pmode, base, off); } There is a call to set_mem_alias_set (off, ix86_GOT_alias_set ()); -- H.J.
[PATCH 02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF
X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC by checking movq foo@gottpoff(%rip), %reg and addq foo@gottpoff(%rip), %reg It uses the REX prefix to avoid the last byte of the previous instruction. With 32bit Pmode, we may not have the REX prefix and the last byte of the previous instruction may be an offset, which may look like a REX prefix. IE-LE optimization will generate corrupted binary. This patch makes sure we always output an REX pfrefix for UNSPEC_GOTNTPOFF. OK for trunk? Thanks. H.J. 2012-03-02 H.J. Lu hongjiu...@intel.com * config/i386/i386-protos.h (ix86_output_rex_prefix_p): New. * config/i386/i386.c (ix86_output_rex_prefix_p): Likewise. * config/i386/i386.md (*movsi_internal): Output REX prefix if needed. (*addmode_1): Likewise. diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 630112f..a9b9d3f 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -277,6 +277,8 @@ extern void x86_output_aligned_bss (FILE *, tree, const char *, extern void x86_elf_aligned_common (FILE *, const char *, unsigned HOST_WIDE_INT, int); +extern bool ix86_output_rex_prefix_p (rtx, rtx); + #ifdef RTX_CODE extern void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *, enum rtx_code *, enum rtx_code *); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ac9c714..2cbfb64 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -14670,6 +14670,29 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x) return true; } + +/* Since x64-64 linker IE-LE transition requires a REX prefix, we + output a REX prefix if there isn't one. */ + +bool +ix86_output_rex_prefix_p (rtx dest, rtx op) +{ + if (!TARGET_X32 + || GET_MODE (dest) != SImode + || REX_INT_REG_P (dest) + || !MEM_P (op)) +return false; + + op = XEXP (op, 0); + if (GET_CODE (op) != CONST) +return false; + + op = XEXP (op, 0); + if (GET_CODE (op) != UNSPEC) +return false; + + return XINT (op, 1) == UNSPEC_GOTNTPOFF; +} /* Split one or more double-mode RTL references into pairs of half-mode references. The RTL can be REG, offsettable MEM, integer constant, or diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8fc7918..35b2673 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2211,7 +2211,13 @@ if (ix86_use_lea_for_mov (insn, operands)) return lea{l}\t{%a1, %0|%0, %a1}; else - return mov{l}\t{%1, %0|%0, %1}; + { + /* Output REX prefix if needed. */ + if (ix86_output_rex_prefix_p (operands[0], operands[1])) + return rex mov{l}\t{%1, %0|%0, %1}; + else + return mov{l}\t{%1, %0|%0, %1}; + } } } [(set (attr type) @@ -5540,7 +5546,11 @@ if (x86_maybe_negate_const_int (operands[2], MODEmode)) return sub{imodesuffix}\t{%2, %0|%0, %2}; - return add{imodesuffix}\t{%2, %0|%0, %2}; + /* Output REX prefix if needed. */ + if (ix86_output_rex_prefix_p (operands[0], operands[2])) + return rex add{imodesuffix}\t{%2, %0|%0, %2}; + else + return add{imodesuffix}\t{%2, %0|%0, %2}; } } [(set (attr type) -- 1.7.6.5