The Neon vector-compare operations allow a register or zero (vector of)
as the operands.  However, we currently permit any immediate.  This can
allow the optimizer to sink a non-trivial constant into a loop when it
was better left in a register.  Further, by hiding the register needed
to rematerialize the value from the register allocators we can end up
with worse code in some cases.

Fixed by only allowing zero or a register in the compare operations.

        * arm/predicates.md (zero_operand, reg_or_zero_operand): New predicates.
        * arm/neon.md (neon_vceq<mode>, neon_vcge<mode>): Use 
reg_or_zero_operand
        predicate.
        (neon_vcle<mode>, neon_vclt<mode>): Use zero_operand predicate.

Tested on both gcc-4.7 and trunk, but only committing it to trunk.

R.
--- config/arm/neon.md  (revision 185488)
+++ config/arm/neon.md  (local)
@@ -2114,7 +2114,7 @@ (define_insn "neon_vceq<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
         (unspec:<V_cmp_result>
          [(match_operand:VDQW 1 "s_register_operand" "w,w")
-          (match_operand:VDQW 2 "nonmemory_operand" "w,Dz")
+          (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
           (match_operand:SI 3 "immediate_operand" "i,i")]
           UNSPEC_VCEQ))]
   "TARGET_NEON"
@@ -2133,7 +2133,7 @@ (define_insn "neon_vcge<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
         (unspec:<V_cmp_result>
          [(match_operand:VDQW 1 "s_register_operand" "w,w")
-          (match_operand:VDQW 2 "nonmemory_operand" "w,Dz")
+          (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
           (match_operand:SI 3 "immediate_operand" "i,i")]
           UNSPEC_VCGE))]
   "TARGET_NEON"
@@ -2164,7 +2164,7 @@ (define_insn "neon_vcgt<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
         (unspec:<V_cmp_result>
          [(match_operand:VDQW 1 "s_register_operand" "w,w")
-          (match_operand:VDQW 2 "nonmemory_operand" "w,Dz")
+          (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
            (match_operand:SI 3 "immediate_operand" "i,i")]
           UNSPEC_VCGT))]
   "TARGET_NEON"
@@ -2198,7 +2198,7 @@ (define_insn "neon_vcle<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
         (unspec:<V_cmp_result>
          [(match_operand:VDQW 1 "s_register_operand" "w")
-          (match_operand:VDQW 2 "nonmemory_operand" "Dz")
+          (match_operand:VDQW 2 "zero_operand" "Dz")
           (match_operand:SI 3 "immediate_operand" "i")]
           UNSPEC_VCLE))]
   "TARGET_NEON"
@@ -2215,7 +2215,7 @@ (define_insn "neon_vclt<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
         (unspec:<V_cmp_result>
          [(match_operand:VDQW 1 "s_register_operand" "w")
-          (match_operand:VDQW 2 "nonmemory_operand" "Dz")
+          (match_operand:VDQW 2 "zero_operand" "Dz")
           (match_operand:SI 3 "immediate_operand" "i")]
           UNSPEC_VCLT))]
   "TARGET_NEON"
--- config/arm/predicates.md    (revision 185488)
+++ config/arm/predicates.md    (local)
@@ -89,6 +89,15 @@ (define_predicate "vfp_register_operand"
                  && REGNO_REG_CLASS (REGNO (op)) == VFP_REGS)));
 })
 
+(define_predicate "zero_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Match a register, or zero in the appropriate mode.
+(define_predicate "reg_or_zero_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (match_operand 0 "zero_operand")))
+
 (define_special_predicate "subreg_lowpart_operator"
   (and (match_code "subreg")
        (match_test "subreg_lowpart_p (op)")))

Reply via email to