Hello,
this patch combines for vectors a concat and a shuffle. An example on x86
would be:
__m128d f(double d){
__m128d x=_mm_setr_pd(-d,d);
return _mm_shuffle_pd(x,x,1);
}
which was compiled as:
vmovsd .LC0(%rip), %xmm1
vxorpd %xmm0, %xmm1, %xmm1
vunpcklpd %xmm0, %xmm1, %xmm0
vshufpd $1, %xmm0, %xmm0, %xmm0
and with the patch:
vmovsd .LC0(%rip), %xmm1
vxorpd %xmm0, %xmm1, %xmm1
vunpcklpd %xmm1, %xmm0, %xmm0
This happens a lot in my code, for interval arithmetics, where I have a
number d, build an interval (-d,d) from it, then subtract that interval
from an other one, and subtraction is implemented as shufpd+addpd.
The patch is quite specialized, but I guessed I could start there, and it
can always be generalized later.
For the testsuite, since the patch is not in a particular target, it would
be better to have a generic test (in gcc.dg?), but I don't really know how
to write a generic one, so would a test in gcc.target/i386 that scans
the asm for shuf or perm be ok?
Ah, and if I use __builtin_shuffle instead of _mm_shuffle_pd, the patch
works without -mavx, but -mavx uses vpermilpd (ie a vec_select:V2DF
(reg:V2DF) ...) instead of a vshufpd, so I'll probably want to handle that
too later. I thought about doing a general transformation from
vec_select(vec_concat(x,x),*) to vec_select(x,*) (reducing the indexes in
* so they fit), but that seemed way too dangerous.
--
Marc Glisse
Index: simplify-rtx.c
===================================================================
--- simplify-rtx.c (revision 187228)
+++ simplify-rtx.c (working copy)
@@ -3268,10 +3268,32 @@ simplify_binary_operation_1 (enum rtx_co
if (GET_MODE (vec) == mode)
return vec;
}
+ /* If we build {a,b} then permute it, build the result directly. */
+ if (XVECLEN (trueop1, 0) == 2
+ && CONST_INT_P (XVECEXP (trueop1, 0, 0))
+ && CONST_INT_P (XVECEXP (trueop1, 0, 1))
+ && GET_CODE (trueop0) == VEC_CONCAT
+ && rtx_equal_p (XEXP (trueop0, 0), XEXP (trueop0, 1))
+ && GET_CODE (XEXP (trueop0, 0)) == VEC_CONCAT
+ && GET_MODE (XEXP (trueop0, 0)) == mode)
+ {
+ int offset0 = INTVAL (XVECEXP (trueop1, 0, 0)) % 2;
+ int offset1 = INTVAL (XVECEXP (trueop1, 0, 1)) % 2;
+ rtx baseop = XEXP (trueop0, 0);
+ rtx baseop0 = XEXP (baseop , 0);
+ rtx baseop1 = XEXP (baseop , 1);
+ baseop0 = avoid_constant_pool_reference (baseop0);
+ baseop1 = avoid_constant_pool_reference (baseop1);
+
+ return simplify_gen_binary (VEC_CONCAT, mode,
+ offset0 ? baseop1 : baseop0,
+ offset1 ? baseop1 : baseop0);
+ }
+
return 0;
case VEC_CONCAT:
{
enum machine_mode op0_mode = (GET_MODE (trueop0) != VOIDmode
? GET_MODE (trueop0)