Hello,

this patch combines for vectors a concat and a shuffle. An example on x86 would be:

__m128d f(double d){
  __m128d x=_mm_setr_pd(-d,d);
  return _mm_shuffle_pd(x,x,1);
}

which was compiled as:

        vmovsd  .LC0(%rip), %xmm1
        vxorpd  %xmm0, %xmm1, %xmm1
        vunpcklpd       %xmm0, %xmm1, %xmm0
        vshufpd $1, %xmm0, %xmm0, %xmm0

and with the patch:

        vmovsd  .LC0(%rip), %xmm1
        vxorpd  %xmm0, %xmm1, %xmm1
        vunpcklpd       %xmm1, %xmm0, %xmm0

This happens a lot in my code, for interval arithmetics, where I have a number d, build an interval (-d,d) from it, then subtract that interval from an other one, and subtraction is implemented as shufpd+addpd.

The patch is quite specialized, but I guessed I could start there, and it can always be generalized later.

For the testsuite, since the patch is not in a particular target, it would be better to have a generic test (in gcc.dg?), but I don't really know how to write a generic one, so would a test in gcc.target/i386 that scans the asm for shuf or perm be ok?

Ah, and if I use __builtin_shuffle instead of _mm_shuffle_pd, the patch works without -mavx, but -mavx uses vpermilpd (ie a vec_select:V2DF (reg:V2DF) ...) instead of a vshufpd, so I'll probably want to handle that too later. I thought about doing a general transformation from vec_select(vec_concat(x,x),*) to vec_select(x,*) (reducing the indexes in * so they fit), but that seemed way too dangerous.

--
Marc Glisse
Index: simplify-rtx.c
===================================================================
--- simplify-rtx.c      (revision 187228)
+++ simplify-rtx.c      (working copy)
@@ -3268,10 +3268,32 @@ simplify_binary_operation_1 (enum rtx_co
 
          if (GET_MODE (vec) == mode)
            return vec;
        }
 
+      /* If we build {a,b} then permute it, build the result directly.  */
+      if (XVECLEN (trueop1, 0) == 2
+         && CONST_INT_P (XVECEXP (trueop1, 0, 0))
+         && CONST_INT_P (XVECEXP (trueop1, 0, 1))
+         && GET_CODE (trueop0) == VEC_CONCAT
+         && rtx_equal_p (XEXP (trueop0, 0), XEXP (trueop0, 1))
+         && GET_CODE (XEXP (trueop0, 0)) == VEC_CONCAT
+         && GET_MODE (XEXP (trueop0, 0)) == mode)
+       {
+         int offset0 = INTVAL (XVECEXP (trueop1, 0, 0)) % 2;
+         int offset1 = INTVAL (XVECEXP (trueop1, 0, 1)) % 2;
+         rtx baseop  = XEXP (trueop0, 0);
+         rtx baseop0 = XEXP (baseop , 0);
+         rtx baseop1 = XEXP (baseop , 1);
+         baseop0 = avoid_constant_pool_reference (baseop0);
+         baseop1 = avoid_constant_pool_reference (baseop1);
+
+         return simplify_gen_binary (VEC_CONCAT, mode,
+            offset0 ? baseop1 : baseop0,
+            offset1 ? baseop1 : baseop0);
+       }
+
       return 0;
     case VEC_CONCAT:
       {
        enum machine_mode op0_mode = (GET_MODE (trueop0) != VOIDmode
                                      ? GET_MODE (trueop0)

Reply via email to