Hi,
This is the second patch. It rewrites vcond patterns using vcond_mask/vec_cmp
patterns introduced in the first one. It also implements vcond patterns which
were missing in the current AArch64 backend. After this patch, I have a simple
follow up change enabling testing requirement "vect_cond_mixed" on AArch64,
which will enable various tests.
Bootstrap & test along with the first patch on AArch64, is it OK?
Thanks,
bin
2016-06-07 Alan Lawrence <alan.lawre...@arm.com>
Renlin Li <renlin...@arm.com>
Bin Cheng <bin.ch...@arm.com>
* config/aarch64/iterators.md (V_cmp_mixed, v_cmp_mixed): New.
* config/aarch64/aarch64-simd.md (<su><maxmin>v2di3): Call
gen_vcondv2div2di instead of gen_aarch64_vcond_internalv2div2di.
(aarch64_vcond_internal<mode><mode>): Delete pattern.
(aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>): Ditto.
(vcond<mode><mode>): Re-implement using vec_cmp and vcond_mask.
(vcondu<mode><mode>): Ditto.
(vcond<v_cmp_result><mode>): Delete.
(vcond<v_cmp_mixed><mode>): New pattern.
(vcondu<mode><v_cmp_mixed>): New pattern.
(aarch64_cmtst<mode>): Revise comment using aarch64_vcond instead
of aarch64_vcond_internal.
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index 6ea35bf..e080b71 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1053,7 +1053,7 @@
}
cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
- emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
+ emit_insn (gen_vcondv2div2di (operands[0], operands[1],
operands[2], cmp_fmt, operands[1], operands[2]));
DONE;
})
@@ -2202,314 +2202,6 @@
DONE;
})
-(define_expand "aarch64_vcond_internal<mode><mode>"
- [(set (match_operand:VSDQ_I_DI 0 "register_operand")
- (if_then_else:VSDQ_I_DI
- (match_operator 3 "comparison_operator"
- [(match_operand:VSDQ_I_DI 4 "register_operand")
- (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
- (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
- (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
- "TARGET_SIMD"
-{
- rtx op1 = operands[1];
- rtx op2 = operands[2];
- rtx mask = gen_reg_rtx (<MODE>mode);
- enum rtx_code code = GET_CODE (operands[3]);
-
- /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
- and desirable for other comparisons if it results in FOO ? -1 : 0
- (this allows direct use of the comparison result without a bsl). */
- if (code == NE
- || (code != EQ
- && op1 == CONST0_RTX (<V_cmp_result>mode)
- && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
- {
- op1 = operands[2];
- op2 = operands[1];
- switch (code)
- {
- case LE: code = GT; break;
- case LT: code = GE; break;
- case GE: code = LT; break;
- case GT: code = LE; break;
- /* No case EQ. */
- case NE: code = EQ; break;
- case LTU: code = GEU; break;
- case LEU: code = GTU; break;
- case GTU: code = LEU; break;
- case GEU: code = LTU; break;
- default: gcc_unreachable ();
- }
- }
-
- /* Make sure we can handle the last operand. */
- switch (code)
- {
- case NE:
- /* Normalized to EQ above. */
- gcc_unreachable ();
-
- case LE:
- case LT:
- case GE:
- case GT:
- case EQ:
- /* These instructions have a form taking an immediate zero. */
- if (operands[5] == CONST0_RTX (<MODE>mode))
- break;
- /* Fall through, as may need to load into register. */
- default:
- if (!REG_P (operands[5]))
- operands[5] = force_reg (<MODE>mode, operands[5]);
- break;
- }
-
- switch (code)
- {
- case LT:
- emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
- break;
-
- case GE:
- emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
- break;
-
- case LE:
- emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
- break;
-
- case GT:
- emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
- break;
-
- case LTU:
- emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
- break;
-
- case GEU:
- emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
- break;
-
- case LEU:
- emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
- break;
-
- case GTU:
- emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
- break;
-
- /* NE has been normalized to EQ above. */
- case EQ:
- emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
- break;
-
- default:
- gcc_unreachable ();
- }
-
- /* If we have (a = (b CMP c) ? -1 : 0);
- Then we can simply move the generated mask. */
-
- if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
- && op2 == CONST0_RTX (<V_cmp_result>mode))
- emit_move_insn (operands[0], mask);
- else
- {
- if (!REG_P (op1))
- op1 = force_reg (<MODE>mode, op1);
- if (!REG_P (op2))
- op2 = force_reg (<MODE>mode, op2);
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
- op1, op2));
- }
-
- DONE;
-})
-
-(define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
- [(set (match_operand:VDQF_COND 0 "register_operand")
- (if_then_else:VDQF
- (match_operator 3 "comparison_operator"
- [(match_operand:VDQF 4 "register_operand")
- (match_operand:VDQF 5 "nonmemory_operand")])
- (match_operand:VDQF_COND 1 "nonmemory_operand")
- (match_operand:VDQF_COND 2 "nonmemory_operand")))]
- "TARGET_SIMD"
-{
- int inverse = 0;
- int use_zero_form = 0;
- int swap_bsl_operands = 0;
- rtx op1 = operands[1];
- rtx op2 = operands[2];
- rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
- rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
-
- rtx (*base_comparison) (rtx, rtx, rtx);
- rtx (*complimentary_comparison) (rtx, rtx, rtx);
-
- switch (GET_CODE (operands[3]))
- {
- case GE:
- case GT:
- case LE:
- case LT:
- case EQ:
- if (operands[5] == CONST0_RTX (<MODE>mode))
- {
- use_zero_form = 1;
- break;
- }
- /* Fall through. */
- default:
- if (!REG_P (operands[5]))
- operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
- }
-
- switch (GET_CODE (operands[3]))
- {
- case LT:
- case UNLT:
- inverse = 1;
- /* Fall through. */
- case GE:
- case UNGE:
- case ORDERED:
- case UNORDERED:
- base_comparison = gen_aarch64_cmge<VDQF:mode>;
- complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
- break;
- case LE:
- case UNLE:
- inverse = 1;
- /* Fall through. */
- case GT:
- case UNGT:
- base_comparison = gen_aarch64_cmgt<VDQF:mode>;
- complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
- break;
- case EQ:
- case NE:
- case UNEQ:
- base_comparison = gen_aarch64_cmeq<VDQF:mode>;
- complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
- break;
- default:
- gcc_unreachable ();
- }
-
- switch (GET_CODE (operands[3]))
- {
- case LT:
- case LE:
- case GT:
- case GE:
- case EQ:
- /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
- As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
- a GE b -> a GE b
- a GT b -> a GT b
- a LE b -> b GE a
- a LT b -> b GT a
- a EQ b -> a EQ b
- Note that there also exist direct comparison against 0 forms,
- so catch those as a special case. */
- if (use_zero_form)
- {
- inverse = 0;
- switch (GET_CODE (operands[3]))
- {
- case LT:
- base_comparison = gen_aarch64_cmlt<VDQF:mode>;
- break;
- case LE:
- base_comparison = gen_aarch64_cmle<VDQF:mode>;
- break;
- default:
- /* Do nothing, other zero form cases already have the correct
- base_comparison. */
- break;
- }
- }
-
- if (!inverse)
- emit_insn (base_comparison (mask, operands[4], operands[5]));
- else
- emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
- break;
- case UNLT:
- case UNLE:
- case UNGT:
- case UNGE:
- case NE:
- /* FCM returns false for lanes which are unordered, so if we use
- the inverse of the comparison we actually want to emit, then
- swap the operands to BSL, we will end up with the correct result.
- Note that a NE NaN and NaN NE b are true for all a, b.
-
- Our transformations are:
- a GE b -> !(b GT a)
- a GT b -> !(b GE a)
- a LE b -> !(a GT b)
- a LT b -> !(a GE b)
- a NE b -> !(a EQ b) */
-
- if (inverse)
- emit_insn (base_comparison (mask, operands[4], operands[5]));
- else
- emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
-
- swap_bsl_operands = 1;
- break;
- case UNEQ:
- /* We check (a > b || b > a). combining these comparisons give us
- true iff !(a != b && a ORDERED b), swapping the operands to BSL
- will then give us (a == b || a UNORDERED b) as intended. */
-
- emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
- emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
- emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
- swap_bsl_operands = 1;
- break;
- case UNORDERED:
- /* Operands are ORDERED iff (a > b || b >= a).
- Swapping the operands to BSL will give the UNORDERED case. */
- swap_bsl_operands = 1;
- /* Fall through. */
- case ORDERED:
- emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
- emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
- emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
- break;
- default:
- gcc_unreachable ();
- }
-
- if (swap_bsl_operands)
- {
- op1 = operands[2];
- op2 = operands[1];
- }
-
- /* If we have (a = (b CMP c) ? -1 : 0);
- Then we can simply move the generated mask. */
-
- if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
- && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
- emit_move_insn (operands[0], mask);
- else
- {
- if (!REG_P (op1))
- op1 = force_reg (<VDQF_COND:MODE>mode, op1);
- if (!REG_P (op2))
- op2 = force_reg (<VDQF_COND:MODE>mode, op2);
- emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
- op1, op2));
- }
-
- DONE;
-})
-
(define_expand "vcond<mode><mode>"
[(set (match_operand:VALLDI 0 "register_operand")
(if_then_else:VALLDI
@@ -2520,26 +2212,50 @@
(match_operand:VALLDI 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
- emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
- operands[2], operands[3],
+ rtx mask = gen_reg_rtx (<V_cmp_result>mode);
+ enum rtx_code code = GET_CODE (operands[3]);
+
+ emit_insn (gen_vec_cmp<mode><v_cmp_result>_internal (mask, operands[3],
operands[4], operands[5]));
+ /* See comments of vec_cmp<mode><v_cmp_result>_internal, the opposite
+ result masks are computed for below operators, we need to invert
+ the mask here. In this case we can save an inverting instruction
+ by simply swapping the two operands to bsl. */
+ if (code == NE || code == UNEQ || code == UNLT || code == UNLE
+ || code == UNGT || code == UNGE || code == UNORDERED)
+ std::swap (operands[1], operands[2]);
+
+ emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
+ operands[2], mask));
DONE;
})
-(define_expand "vcond<v_cmp_result><mode>"
- [(set (match_operand:<V_cmp_result> 0 "register_operand")
- (if_then_else:<V_cmp_result>
+(define_expand "vcond<v_cmp_mixed><mode>"
+ [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
+ (if_then_else:<V_cmp_mixed>
(match_operator 3 "comparison_operator"
- [(match_operand:VDQF 4 "register_operand")
- (match_operand:VDQF 5 "nonmemory_operand")])
- (match_operand:<V_cmp_result> 1 "nonmemory_operand")
- (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
+ [(match_operand:VDQF_COND 4 "register_operand")
+ (match_operand:VDQF_COND 5 "nonmemory_operand")])
+ (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
+ (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
- emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
+ rtx mask = gen_reg_rtx (<V_cmp_result>mode);
+ enum rtx_code code = GET_CODE (operands[3]);
+
+ emit_insn (gen_vec_cmp<mode><v_cmp_result>_internal (mask, operands[3],
+ operands[4], operands[5]));
+ /* See comments of vec_cmp<mode><v_cmp_result>_internal, the opposite
+ result masks are computed for below operators, we need to invert
+ the mask here. In this case we can save an inverting instruction
+ by simply swapping the two operands to bsl. */
+ if (code == NE || code == UNEQ || code == UNLT || code == UNLE
+ || code == UNGT || code == UNGE || code == UNORDERED)
+ std::swap (operands[1], operands[2]);
+
+ emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
operands[0], operands[1],
- operands[2], operands[3],
- operands[4], operands[5]));
+ operands[2], mask));
DONE;
})
@@ -2553,9 +2269,48 @@
(match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
- emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
- operands[2], operands[3],
+ rtx mask = gen_reg_rtx (<MODE>mode);
+ enum rtx_code code = GET_CODE (operands[3]);
+
+ emit_insn (gen_vec_cmp<mode><mode>_internal (mask, operands[3],
operands[4], operands[5]));
+ /* See comments of vec_cmp<mode><mode>_internal, the opposite result
+ mask is computed for NE operator, we need to invert the mask here.
+ In this case we can save an inverting instruction by simply swapping
+ the two operands to bsl. */
+ if (code == NE)
+ std::swap (operands[1], operands[2]);
+
+ emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
+ operands[2], mask));
+ DONE;
+})
+
+(define_expand "vcondu<mode><v_cmp_mixed>"
+ [(set (match_operand:VDQF 0 "register_operand")
+ (if_then_else:VDQF
+ (match_operator 3 "comparison_operator"
+ [(match_operand:<V_cmp_mixed> 4 "register_operand")
+ (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
+ (match_operand:VDQF 1 "nonmemory_operand")
+ (match_operand:VDQF 2 "nonmemory_operand")))]
+ "TARGET_SIMD"
+{
+ rtx mask = gen_reg_rtx (<V_cmp_result>mode);
+ enum rtx_code code = GET_CODE (operands[3]);
+
+ emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed>_internal (
+ mask, operands[3],
+ operands[4], operands[5]));
+ /* See comments of vec_cmp<mode><mode>_internal, the opposite result
+ mask is computed for NE operator, we need to invert the mask here.
+ In this case we can save an inverting instruction by simply swapping
+ the two operands to bsl. */
+ if (code == NE)
+ std::swap (operands[1], operands[2]);
+
+ emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
+ operands[2], mask));
DONE;
})
@@ -4156,7 +3911,7 @@
;; cmtst
;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
-;; we don't have any insns using ne, and aarch64_vcond_internal outputs
+;; we don't have any insns using ne, and aarch64_vcond outputs
;; not (neg (eq (and x y) 0))
;; which is rewritten by simplify_rtx as
;; plus (eq (and x y) 0) -1.
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 43b22d8..f53fe9d 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -610,6 +610,16 @@
(V2DF "v2di") (DF "di")
(SF "si")])
+;; Mode for vector conditional operations where the comparison has
+;; different type from the lhs.
+(define_mode_attr V_cmp_mixed [(V2SI "V2SF") (V4SI "V4SF")
+ (V2DI "V2DF") (V2SF "V2SI")
+ (V4SF "V4SI") (V2DF "V2DI")])
+
+(define_mode_attr v_cmp_mixed [(V2SI "v2sf") (V4SI "v4sf")
+ (V2DI "v2df") (V2SF "v2si")
+ (V4SF "v4si") (V2DF "v2di")])
+
;; Lower case element modes (as used in shift immediate patterns).
(define_mode_attr ve_mode [(V8QI "qi") (V16QI "qi")
(V4HI "hi") (V8HI "hi")