Re: [PATCH] AVX2 permutation improvements

2012-03-20 Thread Richard Henderson
On 03/20/12 04:22, Jakub Jelinek wrote:
> 2012-03-20  Jakub Jelinek  
> 
>   PR target/52607
>   * config/i386/i386.md ("isa" attribute): Add avx2 and noavx2.
>   ("enabled" attribute): Handle avx2 and noavx2 isas.
>   * config/i386/sse.md (avx2_vec_dupv8sf_1, avx2_pbroadcast_1):
>   New insns.
>   (vec_dup): Add avx2 =x,x alternative.
>   (vec_dup splitter): Don't split if TARGET_AVX2.
>   (*avx_vperm_broadcast_): Don't split V4DFmode if TARGET_AVX2.
>   For TARGET_AVX2, V8SFmode and elt == 0 split into vbroadcastss.
>   * config/i386/i386.c (expand_vec_perm_pshufb): Emit also vpermps
>   for V8SFmode.
>   (expand_vec_perm_1): For broadcasts, use avx2_pbroadcast_1
>   if possible, handle also V8SFmode.

Ok.


r~


[PATCH] AVX2 permutation improvements

2012-03-20 Thread Jakub Jelinek
Hi!

This patch improves register -> register broadcast AVX2 permutations
and also starts using vpermps where possible for V8SFmode
permutations.  Bootstrapped/regtested on x86_64-linux and i686-linux,
ok for trunk?

2012-03-20  Jakub Jelinek  

PR target/52607
* config/i386/i386.md ("isa" attribute): Add avx2 and noavx2.
("enabled" attribute): Handle avx2 and noavx2 isas.
* config/i386/sse.md (avx2_vec_dupv8sf_1, avx2_pbroadcast_1):
New insns.
(vec_dup): Add avx2 =x,x alternative.
(vec_dup splitter): Don't split if TARGET_AVX2.
(*avx_vperm_broadcast_): Don't split V4DFmode if TARGET_AVX2.
For TARGET_AVX2, V8SFmode and elt == 0 split into vbroadcastss.
* config/i386/i386.c (expand_vec_perm_pshufb): Emit also vpermps
for V8SFmode.
(expand_vec_perm_1): For broadcasts, use avx2_pbroadcast_1
if possible, handle also V8SFmode.

--- gcc/config/i386/i386.md.jj  2012-03-20 08:51:30.937236938 +0100
+++ gcc/config/i386/i386.md 2012-03-20 08:54:50.742079909 +0100
@@ -639,7 +639,7 @@ (define_attr "use_carry" "0,1" (const_st
 (define_attr "movu" "0,1" (const_string "0"))
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
-(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,bmi2"
+(define_attr "isa" 
"base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,avx2,noavx2,bmi2"
   (const_string "base"))
 
 (define_attr "enabled" ""
@@ -652,6 +652,8 @@ (define_attr "enabled" ""
   (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
 (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
+(eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
+(eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
]
(const_int 1)))
--- gcc/config/i386/sse.md.jj   2012-03-20 08:51:30.940236899 +0100
+++ gcc/config/i386/sse.md  2012-03-20 08:55:22.344898469 +0100
@@ -3808,6 +3808,18 @@ (define_insn "avx2_vec_dup"
 (set_attr "prefix" "vex")
 (set_attr "mode" "")])
 
+(define_insn "avx2_vec_dupv8sf_1"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+   (vec_duplicate:V8SF
+ (vec_select:SF
+   (match_operand:V8SF 1 "register_operand" "x")
+   (parallel [(const_int 0)]]
+  "TARGET_AVX2"
+  "vbroadcastss\t{%x1, %0|%0, %x1}"
+  [(set_attr "type" "sselog1")
+(set_attr "prefix" "vex")
+(set_attr "mode" "V8SF")])
+
 (define_insn "vec_dupv4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
(vec_duplicate:V4SF
@@ -11876,6 +11888,19 @@ (define_insn "avx2_pbroadcast"
(set_attr "prefix" "vex")
(set_attr "mode" "")])
 
+(define_insn "avx2_pbroadcast_1"
+  [(set (match_operand:VI_256 0 "register_operand" "=x")
+   (vec_duplicate:VI_256
+ (vec_select:
+   (match_operand:VI_256 1 "nonimmediate_operand" "xm")
+   (parallel [(const_int 0)]]
+  "TARGET_AVX2"
+  "vpbroadcast\t{%x1, %0|%0, %x1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "")])
+
 (define_insn "avx2_permvarv8si"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
(unspec:V8SI
@@ -11967,16 +11992,18 @@ (define_mode_iterator AVX_VEC_DUP_MODE
   [V8SI V8SF V4DI V4DF])
 
 (define_insn "vec_dup"
-  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
+  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
(vec_duplicate:AVX_VEC_DUP_MODE
- (match_operand: 1 "nonimmediate_operand" "m,?x")))]
+ (match_operand: 1 "nonimmediate_operand" "m,x,?x")))]
   "TARGET_AVX"
   "@
vbroadcast\t{%1, %0|%0, %1}
+   vbroadcast\t{%x1, %0|%0, %x1}
#"
   [(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
+   (set_attr "isa" "*,avx2,noavx2")
(set_attr "mode" "V8SF")])
 
 (define_insn "avx2_vbroadcasti128_"
@@ -11995,7 +12022,7 @@ (define_split
   [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
(vec_duplicate:AVX_VEC_DUP_MODE
  (match_operand: 1 "register_operand")))]
-  "TARGET_AVX && reload_completed"
+  "TARGET_AVX && !TARGET_AVX2 && reload_completed"
   [(set (match_dup 2)
(vec_duplicate: (match_dup 1)))
(set (match_dup 0)
@@ -12057,7 +12084,7 @@ (define_insn_and_split "*avx_vperm_broad
[(match_operand 3 "const_int_operand" "C,n,n")])))]
   "TARGET_AVX"
   "#"
-  "&& reload_completed"
+  "&& reload_completed && (mode != V4DFmode || !TARGET_AVX2)"
   [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
 {
   rtx op0 = operands[0], op1 = operands[1];
@@ -12067,6 +12094,13 @@ (define_insn_and_split "*avx_vperm_broad
 {
   int mask;
 
+  if (TARGET_AVX2 && elt == 0)
+   {
+ emit_insn (gen_vec_dup (op0, gen_lowpart (mode,
+