Hello! Attached patch adds AVX modes to ix86_modes_tieable_p, in the same way as other SSE and MMX modes.
Additionally, the patch removes unneeded gen_lowpart calls from ix86_expand_vector_move_misalign. The mode function argument just duplicates the mode of operands for convenience. 2012-03-28 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes. (ix86_expand_vector_move_misalign): Remove un-needed gen_lowpart calls. Tested on x86_64-pc-linux-gnu {,-m32} with and without -mfpmath=avx. Committed. Uros.
Index: i386.c =================================================================== --- i386.c (revision 185918) +++ i386.c (working copy) @@ -15831,17 +15831,18 @@ ix86_expand_vector_move_misalign (enum machine_mod switch (GET_MODE_SIZE (mode)) { case 16: - /* If we're optimizing for size, movups is the smallest. */ if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); emit_insn (gen_sse_movups (op0, op1)); - return; } - op0 = gen_lowpart (V16QImode, op0); - op1 = gen_lowpart (V16QImode, op1); - emit_insn (gen_sse2_movdqu (op0, op1)); + else + { + op0 = gen_lowpart (V16QImode, op0); + op1 = gen_lowpart (V16QImode, op1); + emit_insn (gen_sse2_movdqu (op0, op1)); + } break; case 32: op0 = gen_lowpart (V32QImode, op0); @@ -15853,27 +15854,22 @@ ix86_expand_vector_move_misalign (enum machine_mod } break; case MODE_VECTOR_FLOAT: - op0 = gen_lowpart (mode, op0); - op1 = gen_lowpart (mode, op1); - switch (mode) { case V4SFmode: emit_insn (gen_sse_movups (op0, op1)); break; - case V8SFmode: - ix86_avx256_split_vector_move_misalign (op0, op1); - break; case V2DFmode: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); emit_insn (gen_sse_movups (op0, op1)); - return; } - emit_insn (gen_sse2_movupd (op0, op1)); + else + emit_insn (gen_sse2_movupd (op0, op1)); break; + case V8SFmode: case V4DFmode: ix86_avx256_split_vector_move_misalign (op0, op1); break; @@ -15918,8 +15914,6 @@ ix86_expand_vector_move_misalign (enum machine_mod if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) { - op0 = gen_lowpart (V2DFmode, op0); - op1 = gen_lowpart (V2DFmode, op1); emit_insn (gen_sse2_movupd (op0, op1)); return; } @@ -15984,8 +15978,8 @@ ix86_expand_vector_move_misalign (enum machine_mod return; } - /* ??? Similar to above, only less clear because of quote - typeless stores unquote. */ + /* ??? Similar to above, only less clear + because of typeless stores. */ if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) { @@ -15998,11 +15992,7 @@ ix86_expand_vector_move_misalign (enum machine_mod if (TARGET_SSE2 && mode == V2DFmode) { if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) - { - op0 = gen_lowpart (V2DFmode, op0); - op1 = gen_lowpart (V2DFmode, op1); - emit_insn (gen_sse2_movupd (op0, op1)); - } + emit_insn (gen_sse2_movupd (op0, op1)); else { m = adjust_address (op0, DFmode, 0); @@ -31399,6 +31389,10 @@ ix86_modes_tieable_p (enum machine_mode mode1, enu /* If MODE2 is only appropriate for an SSE register, then tie with any other mode acceptable to SSE registers. */ + if (GET_MODE_SIZE (mode2) == 32 + && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) + return (GET_MODE_SIZE (mode1) == 32 + && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); if (GET_MODE_SIZE (mode2) == 16 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) return (GET_MODE_SIZE (mode1) == 16