Is it ok to use the following pattern?
patch passed bootstrap and make check, but one test failed:
gcc/testsuite/gcc.target/i386/vect-rebuild.c
It failed on /* { dg-final { scan-assembler-times "\tv?permilpd\[ \t\]" 1 } } */
which is now palignr. However, both palignr and permilpd costs 1 tick
and take 6 bytes in the opcode.
I vote for modifying the test to scan for palignr:
/* { dg-final { scan-assembler-times "\tv?palignr\[ \t\]" 1 } } */
2014-06-04 Evgeny Stupachenko <[email protected]>
* config/i386/sse.md (*ssse3_palignr<mode>_perm): New.
* config/i386/predicates.md (palignr_operand): New.
Indicates if permutation is suitable for palignr instruction.
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 2ef1384..8266f3e 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1417,6 +1417,22 @@
return true;
})
+;; Return true if OP is a parallel for a palignr permute.
+(define_predicate "palignr_operand"
+ (and (match_code "parallel")
+ (match_code "const_int" "a"))
+{
+ int elt = INTVAL (XVECEXP (op, 0, 0));
+ int i, nelt = XVECLEN (op, 0);
+
+ /* Check that an order in the permutation is suitable for palignr.
+ For example, {5 6 7 0 1 2 3 4} is "palignr 5, xmm, xmm". */
+ for (i = 1; i < nelt; ++i)
+ if (INTVAL (XVECEXP (op, 0, i)) != ((elt + i) % nelt))
+ return false;
+ return true;
+})
+
;; Return true if OP is a proper third operand to vpblendw256.
(define_predicate "avx2_pblendw_operand"
(match_code "const_int")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c91626b..5e8fd65 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -11454,6 +11454,36 @@
}
})
+(define_insn "*ssse3_palignr<mode>_perm"
+ [(set (match_operand:V_128 0 "register_operand" "=x,x")
+ (vec_select:V_128
+ (match_operand:V_128 1 "register_operand" "0,x")
+ (match_parallel 2 "palignr_operand"
+ [(match_operand 3 "const_int_operand" "n, n")])))]
+ "TARGET_SSSE3"
+{
+ enum machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
+ operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
+
+ switch (which_alternative)
+ {
+ case 0:
+ return "palignr\t{%2, %1, %0|%0, %1, %2}";
+ case 1:
+ return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseishft")
+ (set_attr "atom_unit" "sishuf")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "orig,vex")])
+
+
(define_insn "abs<mode>2"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y")
(abs:MMXMODEI
On Mon, May 19, 2014 at 8:00 PM, Richard Henderson <[email protected]> wrote:
> On 05/05/2014 09:54 AM, Evgeny Stupachenko wrote:
>> @@ -42943,6 +42944,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
>> if (expand_vec_perm_vpermil (d))
>> return true;
>>
>> + /* Try palignr on one operand. */
>> + if (d->one_operand_p && expand_vec_perm_palignr (d))
>> + return true;
>
> No, because unless in_order and SSSE3, expand_vec_perm_palignr generates at
> least 2 insns, and by contract expand_vec_perm_1 must generate only one.
>
> I think what might help you out is to have the rotate permutation matched
> directly, rather than have to have it converted to a shift.
>
> Thus I think you'd do well to start this series with a patch that adds a
> pattern of the form
>
> (define_insn "*ssse3_palignr<mode>_perm"
> [(set (match_operand:V_128 0 "register_operand" "=x,x")
> (vec_select:V_128
> (match_operand:V_128 1 "register_operand" "0,x")
> (match_operand:V_128 2 "nonimmediate_operand" "xm,xm")
> (match_parallel 3 "palign_operand"
> [(match_operand 4 "const_int_operand" "")]
> "TARGET_SSSE3"
> {
> enum machine_mode imode = GET_INNER_MODE (GET_MODE (operands[0]));
> operands[3] = GEN_INT (INTVAL (operands[4]) * GET_MODE_SIZE (imode));
>
> switch (which_alternative)
> {
> case 0:
> return "palignr\t{%3, %2, %0|%0, %2, %3}";
> case 1:
> return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
> default:
> gcc_unreachable ();
> }
> }
> [(set_attr "isa" "noavx,avx")
> (set_attr "type" "sseishft")
> (set_attr "atom_unit" "sishuf")
> (set_attr "prefix_data16" "1,*")
> (set_attr "prefix_extra" "1")
> (set_attr "length_immediate" "1")
> (set_attr "prefix" "orig,vex")])
>
> where the palign_operand function verifies that the constants are all in
> order.
> This is very similar to the way we define the broadcast type patterns.
>
> You'll need a similar pattern with a different predicate for the avx2 palignr,
> since it's not a simple increment, but also verifying the cross-lane
> constraint.
>
> With that as patch 1/1, I believe that will significantly tidy up what else
> you're attempting to change with this series.
>
>
>
> r~