------- Comment #6 from hjl dot tools at gmail dot com  2008-09-09 04:39 -------
This patch will disable SSE4 and SSE2 optimization if inter-unit
move is disabled or there are duplicates:

--- i386.c.sse2 2008-09-08 21:17:15.000000000 -0700
+++ i386.c      2008-09-08 21:36:38.000000000 -0700
@@ -26886,7 +26886,7 @@ static void
 ix86_expand_vector_init_interleave (enum machine_mode mode,
                                    rtx target, rtx *ops, int n)
 {
-  enum machine_mode first_imode, second_imode, third_imode;
+  enum machine_mode first_imode, second_imode, third_imode, inner_mode;
   int i, j;
   rtx op0, op1;
   rtx (*gen_load_even) (rtx, rtx, rtx);
@@ -26899,6 +26899,7 @@ ix86_expand_vector_init_interleave (enum
       gen_load_even = gen_vec_setv8hi;
       gen_interleave_first_low = gen_vec_interleave_lowv4si;
       gen_interleave_second_low = gen_vec_interleave_lowv2di;
+      inner_mode = HImode;
       first_imode = V4SImode;
       second_imode = V2DImode;
       third_imode = VOIDmode;
@@ -26907,6 +26908,7 @@ ix86_expand_vector_init_interleave (enum
       gen_load_even = gen_vec_setv16qi;
       gen_interleave_first_low = gen_vec_interleave_lowv8hi;
       gen_interleave_second_low = gen_vec_interleave_lowv4si;
+      inner_mode = QImode;
       first_imode = V8HImode;
       second_imode = V4SImode;
       third_imode = V2DImode;
@@ -26935,7 +26937,9 @@ ix86_expand_vector_init_interleave (enum
       emit_move_insn (op0, gen_lowpart (mode, op1));

       /* Load even elements into the second positon.  */
-      emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
+      emit_insn ((*gen_load_even) (op0,
+                                  force_reg (inner_mode,
+                                             ops [i + i + 1]),
                                   const1_rtx));

       /* Cast vector to FIRST_IMODE vector.  */
@@ -26998,7 +27002,8 @@ ix86_expand_vector_init_general (bool mm
 {
   rtx ops[32], op0, op1;
   enum machine_mode half_mode = VOIDmode;
-  int n, i;
+  int n, i, h;
+  bool duplicated;

   switch (mode)
     {
@@ -27045,18 +27050,27 @@ half:
       return;

     case V16QImode:
-      if (!TARGET_SSE4_1)
+      if (!TARGET_SSE4_1 || !TARGET_INTER_UNIT_MOVES)
        break;
       /* FALLTHRU */

     case V8HImode:
-      if (!TARGET_SSE2)
+      if (!TARGET_SSE2 || !TARGET_INTER_UNIT_MOVES)
        break;

       n = GET_MODE_NUNITS (mode);
+      h = n >> 1;
+      duplicated = true;
       for (i = 0; i < n; i++)
-       ops[i] = XVECEXP (vals, 0, i);
-      ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
+       {
+         ops[i] = XVECEXP (vals, 0, i);
+         if (i >= h && !rtx_equal_p (ops[i], ops[i - h]))
+           duplicated = false;
+       }
+      if (duplicated)
+       break;
+
+      ix86_expand_vector_init_interleave (mode, target, ops, h);
       return;

     case V4HImode:


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37434

Reply via email to