Hi, According to review comments, I split the original patch @ https://gcc.gnu.org/ml/gcc-patches/2016-05/msg01182.html into two, as well as refined the comments. Here is the first one implementing vcond_mask/vec_cmp patterns on AArch64. These new patterns will be used in the second patch for vcond.
Bootstrap & test on AArch64. Is it OK? 2016-06-07 Alan Lawrence <alan.lawre...@arm.com> Renlin Li <renlin...@arm.com> Bin Cheng <bin.ch...@arm.com> * config/aarch64/aarch64-simd.md (vec_cmp<mode><mode>): New pattern. (vec_cmp<mode><mode>_internal): New pattern. (vec_cmp<mode><v_cmp_result>): New pattern. (vec_cmp<mode><v_cmp_result>_internal): New pattern. (vec_cmpu<mode><mode>): New pattern. (vcond_mask_<mode><v_cmp_result>): New pattern.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 6ea35bf..9437d02 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2202,6 +2202,325 @@ DONE; }) +(define_expand "vcond_mask_<mode><v_cmp_result>" + [(match_operand:VALLDI 0 "register_operand") + (match_operand:VALLDI 1 "nonmemory_operand") + (match_operand:VALLDI 2 "nonmemory_operand") + (match_operand:<V_cmp_result> 3 "register_operand")] + "TARGET_SIMD" +{ + /* If we have (a = (P) ? -1 : 0); + Then we can simply move the generated mask (result must be int). */ + if (operands[1] == CONSTM1_RTX (<MODE>mode) + && operands[2] == CONST0_RTX (<MODE>mode)) + emit_move_insn (operands[0], operands[3]); + /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */ + else if (operands[1] == CONST0_RTX (<MODE>mode) + && operands[2] == CONSTM1_RTX (<MODE>mode)) + emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[3])); + else + { + if (!REG_P (operands[1])) + operands[1] = force_reg (<MODE>mode, operands[1]); + if (!REG_P (operands[2])) + operands[2] = force_reg (<MODE>mode, operands[2]); + emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3], + operands[1], operands[2])); + } + + DONE; +}) + +;; Patterns comparing two vectors to produce a mask. + +;; Internal pattern for vec_cmp. It returns expected result mask for +;; comparison operators other than NE. For NE operator, it returns +;; the opposite result mask. This is intended behavior so that we +;; can save one mask inverting instruction when using this pattern in +;; vcond patterns. In this case, it is the caller's responsibility +;; to interpret and use the result mask correctly. +(define_expand "vec_cmp<mode><mode>_internal" + [(set (match_operand:VSDQ_I_DI 0 "register_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VSDQ_I_DI 2 "register_operand") + (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] + "TARGET_SIMD" +{ + rtx mask = operands[0]; + enum rtx_code code = GET_CODE (operands[1]); + + if (operands[3] == CONST0_RTX (<MODE>mode) + && (code == NE || code == LE || code == LT + || code == GE || code == GT || code == EQ)) + { + /* Some instructions have a form taking an immediate zero. */ + ; + } + else if (!REG_P (operands[3])) + { + /* Make sure we can handle the last operand. */ + operands[3] = force_reg (<MODE>mode, operands[3]); + } + + switch (code) + { + case LT: + emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3])); + break; + + case GE: + emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3])); + break; + + case LE: + emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3])); + break; + + case GT: + emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3])); + break; + + case LTU: + emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2])); + break; + + case GEU: + emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3])); + break; + + case LEU: + emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2])); + break; + + case GTU: + emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3])); + break; + + case NE: + /* See comment at the beginning of this pattern, we return the + opposite of result mask for this operator, and it's caller's + resonsibility to invert the mask. + + Fall through. */ + + case EQ: + emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); + break; + + default: + gcc_unreachable (); + } + + /* Don't invert result mask for NE, which should be done in caller. */ + + DONE; +}) + +(define_expand "vec_cmp<mode><mode>" + [(set (match_operand:VSDQ_I_DI 0 "register_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VSDQ_I_DI 2 "register_operand") + (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] + "TARGET_SIMD" +{ + enum rtx_code code = GET_CODE (operands[1]); + + emit_insn (gen_vec_cmp<mode><mode>_internal (operands[0], + operands[1], operands[2], + operands[3])); + /* See comments of vec_cmp<mode><mode>_internal, the opposite result + mask is computed for NE operator, we need to invert the mask here. */ + if (code == NE) + emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0])); + + DONE; +}) + + +;; Internal pattern for vec_cmp. Similar to vec_cmp<mode<mode>_internal, +;; it returns the opposite result mask for operators NE, UNEQ, UNLT, +;; UNLE, UNGT, UNGE and UNORDERED. This is intended behavior so that +;; we can save one mask inverting instruction when using this pattern +;; in vcond patterns. In these cases, it is the caller's responsibility +;; to interpret and use the result mask correctly. +(define_expand "vec_cmp<mode><v_cmp_result>_internal" + [(set (match_operand:<V_cmp_result> 0 "register_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VDQF 2 "register_operand") + (match_operand:VDQF 3 "nonmemory_operand")]))] + "TARGET_SIMD" +{ + int use_zero_form = 0; + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp = gen_reg_rtx (<V_cmp_result>mode); + + rtx (*comparison) (rtx, rtx, rtx); + + if (operands[3] == CONST0_RTX (<MODE>mode) + && (code == LE || code == LT || code == GE || code == GT || code == EQ)) + { + /* Some instructions have a form taking an immediate zero. */ + use_zero_form = 1; + } + else if (!REG_P (operands[3])) + { + /* Make sure we can handle the last operand. */ + operands[3] = force_reg (<MODE>mode, operands[3]); + } + + switch (code) + { + case LT: + if (use_zero_form) + { + comparison = gen_aarch64_cmlt<mode>; + break; + } + /* Else, fall through. */ + case UNGE: + std::swap (operands[2], operands[3]); + /* Fall through. */ + case UNLE: + case GT: + comparison = gen_aarch64_cmgt<mode>; + break; + case LE: + if (use_zero_form) + { + comparison = gen_aarch64_cmle<mode>; + break; + } + /* Else, fall through. */ + case UNGT: + std::swap (operands[2], operands[3]); + /* Fall through. */ + case UNLT: + case GE: + comparison = gen_aarch64_cmge<mode>; + break; + case NE: + case EQ: + comparison = gen_aarch64_cmeq<mode>; + break; + case UNEQ: + case ORDERED: + case UNORDERED: + break; + default: + gcc_unreachable (); + } + + switch (code) + { + case UNGT: + case UNGE: + case NE: + case UNLT: + case UNLE: + /* FCM returns false for lanes which are unordered, so if we use + the inverse of the comparison we actually want to emit, then + revert the result, we will end up with the correct result. + Note that a NE NaN and NaN NE b are true for all a, b. + + Our transformations are: + a GE b -> !(b GT a) + a GT b -> !(b GE a) + a LE b -> !(a GT b) + a LT b -> !(a GE b) + a NE b -> !(a EQ b) + + See comment at the beginning of this pattern, we return the + opposite of result mask for these operators, and it's caller's + resonsibility to invert the mask. + + Fall through. */ + case LT: + case LE: + case GT: + case GE: + case EQ: + /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. + As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: + a GE b -> a GE b + a GT b -> a GT b + a LE b -> b GE a + a LT b -> b GT a + a EQ b -> a EQ b + Note that there also exist direct comparison against 0 forms, + so catch those as a special case. */ + + emit_insn (comparison (operands[0], operands[2], operands[3])); + break; + + case UNEQ: + /* We first check (a > b || b > a) which is !UNEQ, reverting + this result will then give us (a == b || a UNORDERED b). + + See comment at the beginning of this pattern, we return the + opposite of result mask for this operator, and it's caller's + resonsibility to invert the mask. */ + + emit_insn (gen_aarch64_cmgt<mode> (operands[0], + operands[2], operands[3])); + emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2])); + emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp)); + break; + case UNORDERED: + /* Operands are ORDERED iff (a > b || b >= a), so we can compute + UNORDERED as !ORDERED. + + See comment at the beginning of this pattern, we return the + opposite of result mask for this operator, and it's caller's + resonsibility to invert the mask. + + Fall through. */ + case ORDERED: + emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3])); + emit_insn (gen_aarch64_cmge<mode> (operands[0], + operands[3], operands[2])); + emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp)); + break; + default: + gcc_unreachable (); + } + + DONE; +}) + +(define_expand "vec_cmp<mode><v_cmp_result>" + [(set (match_operand:<V_cmp_result> 0 "register_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VDQF 2 "register_operand") + (match_operand:VDQF 3 "nonmemory_operand")]))] + "TARGET_SIMD" +{ + enum rtx_code code = GET_CODE (operands[1]); + + emit_insn (gen_vec_cmp<mode><v_cmp_result>_internal (operands[0], + operands[1], operands[2], + operands[3])); + /* See comments of vec_cmp<mode><v_cmp_result>_internal, the opposite + result masks are computed for below operators, we need to invert + the mask here. */ + if (code == NE || code == UNEQ || code == UNLT || code == UNLE + || code == UNGT || code == UNGE || code == UNORDERED) + emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0])); + + DONE; +}) + +(define_expand "vec_cmpu<mode><mode>" + [(set (match_operand:VSDQ_I_DI 0 "register_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VSDQ_I_DI 2 "register_operand") + (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] + "TARGET_SIMD" +{ + emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + (define_expand "aarch64_vcond_internal<mode><mode>" [(set (match_operand:VSDQ_I_DI 0 "register_operand") (if_then_else:VSDQ_I_DI