Hello!
These two insns have implicit address operands, so when Pmode !=
word_mode (x32 with -maddress-mode=short) we have to instruct the
linker to emit 0x67 address override prefix.
The patch also changes *sse3_monitor pattern to emit mnemonic with
implicit operands, to avoid duplicating operands in reverse order for
Intel syntax.
2013-08-13 Uros Bizjak <[email protected]>
* config/i386/sse.md (*sse2_maskmovdqu): Emit addr32 prefix
when Pmode != word_mode. Add length_address attribute.
(sse3_monitor_<mode>): Merge from sse3_monitor and
sse3_monitor64_<mode> insn patterns. Emit addr32 prefix when
Pmode != word_mode. Update insn length attribute.
* config/i386/i386.c (ix86_option_override_internal): Update
ix86_gen_monitor selection for merged sse3_monitor insn.
Patch was tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN.
Uros.
Index: i386.c
===================================================================
--- i386.c (revision 201689)
+++ i386.c (working copy)
@@ -4170,24 +4170,19 @@ ix86_option_override_internal (bool main_args_p)
ix86_gen_leave = gen_leave_rex64;
if (Pmode == DImode)
{
- ix86_gen_monitor = gen_sse3_monitor64_di;
ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
ix86_gen_tls_local_dynamic_base_64
= gen_tls_local_dynamic_base_64_di;
}
else
{
- ix86_gen_monitor = gen_sse3_monitor64_si;
ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
ix86_gen_tls_local_dynamic_base_64
= gen_tls_local_dynamic_base_64_si;
}
}
else
- {
- ix86_gen_leave = gen_leave;
- ix86_gen_monitor = gen_sse3_monitor;
- }
+ ix86_gen_leave = gen_leave;
if (Pmode == DImode)
{
@@ -4199,6 +4194,7 @@ ix86_option_override_internal (bool main_args_p)
ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
+ ix86_gen_monitor = gen_sse3_monitor_di;
}
else
{
@@ -4210,6 +4206,7 @@ ix86_option_override_internal (bool main_args_p)
ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
+ ix86_gen_monitor = gen_sse3_monitor_si;
}
#ifdef USE_IX86_CLD
Index: sse.md
===================================================================
--- sse.md (revision 201689)
+++ sse.md (working copy)
@@ -7731,9 +7731,17 @@
(mem:V16QI (match_dup 0))]
UNSPEC_MASKMOV))]
"TARGET_SSE2"
- "%vmaskmovdqu\t{%2, %1|%1, %2}"
+{
+ /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
+ that requires %v to be at the beginning of the opcode name. */
+ if (Pmode != word_mode)
+ fputs ("\taddr32", asm_out_file);
+ return "%vmaskmovdqu\t{%2, %1|%1, %2}";
+}
[(set_attr "type" "ssemov")
(set_attr "prefix_data16" "1")
+ (set (attr "length_address")
+ (symbol_ref ("Pmode != word_mode")))
;; The implicit %rdi operand confuses default length_vex computation.
(set (attr "length_vex")
(symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
@@ -7781,26 +7789,18 @@
"mwait"
[(set_attr "length" "3")])
-(define_insn "sse3_monitor"
- [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
- (match_operand:SI 1 "register_operand" "c")
- (match_operand:SI 2 "register_operand" "d")]
- UNSPECV_MONITOR)]
- "TARGET_SSE3 && !TARGET_64BIT"
- "monitor\t%0, %1, %2"
- [(set_attr "length" "3")])
-
-(define_insn "sse3_monitor64_<mode>"
+(define_insn "sse3_monitor_<mode>"
[(unspec_volatile [(match_operand:P 0 "register_operand" "a")
(match_operand:SI 1 "register_operand" "c")
(match_operand:SI 2 "register_operand" "d")]
UNSPECV_MONITOR)]
- "TARGET_SSE3 && TARGET_64BIT"
+ "TARGET_SSE3"
;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
;; RCX and RDX are used. Since 32bit register operands are implicitly
;; zero extended to 64bit, we only need to set up 32bit registers.
- "monitor"
- [(set_attr "length" "3")])
+ "%^monitor"
+ [(set (attr "length")
+ (symbol_ref ("(Pmode != word_mode) + 3")))])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;