Both patterns used !reload_completed as a condition, which is
questionable at best.  The branch pattern failed to include a
clobber of CC_REGNUM.  Both problems were unlikely to trigger
in practice, due to how the optimization pipeline is organized,
but let's fix them anyway.

gcc:
        * config/aarch64/aarch64.cc (aarch64_gen_compare_split_imm24): New.
        * config/aarch64/aarch64-proto.h: Update.
        * config/aarch64/aarch64.md (*aarch64_bcond_wide_imm<GPI>): Use it.
        Add match_scratch and cc clobbers.  Use match_operator instead of
        iterator expansion.
        (*compare_cstore<GPI>_insn): Likewise.
---
 gcc/config/aarch64/aarch64-protos.h |  1 +
 gcc/config/aarch64/aarch64.cc       | 37 +++++++++++++++
 gcc/config/aarch64/aarch64.md       | 74 ++++++++++-------------------
 3 files changed, 63 insertions(+), 49 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 7b9b16bd3bd..d26e1d5642e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1098,6 +1098,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, 
bool,
                                   aarch64_addr_query_type = ADDR_QUERY_M);
 machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
 rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
+rtx aarch64_gen_compare_split_imm24 (rtx, rtx, rtx);
 bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool);
 rtx aarch64_load_tp (rtx);
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index c12365868b7..650da2ff95d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2882,6 +2882,43 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, 
rtx y,
   return aarch64_gen_compare_reg (code, x, y);
 }
 
+/* Split IMM into two 12-bit halves, producing an EQ/NE comparison vs X.
+   TMP may be a scratch.  This optimizes a sequence from
+       mov     x0, #imm1
+       movk    x0, #imm2, lsl 16  -- x0 contains CST
+       cmp     x1, x0
+   into the shorter:
+       sub     tmp, x1, #(CST & 0xfff000)
+       subs    tmp, tmp, #(CST & 0x000fff)
+*/
+rtx
+aarch64_gen_compare_split_imm24 (rtx x, rtx imm, rtx tmp)
+{
+  HOST_WIDE_INT lo_imm = UINTVAL (imm) & 0xfff;
+  HOST_WIDE_INT hi_imm = UINTVAL (imm) & 0xfff000;
+  enum machine_mode mode = GET_MODE (x);
+
+  if (GET_CODE (tmp) == SCRATCH)
+    tmp = gen_reg_rtx (mode);
+
+  emit_insn (gen_add3_insn (tmp, x, GEN_INT (-hi_imm)));
+  /* TODO: We don't need the gpr result of the second insn. */
+  switch (mode)
+    {
+    case SImode:
+      tmp = gen_addsi3_compare0 (tmp, tmp, GEN_INT (-lo_imm));
+      break;
+    case DImode:
+      tmp = gen_adddi3_compare0 (tmp, tmp, GEN_INT (-lo_imm));
+      break;
+    default:
+      abort ();
+    }
+  emit_insn (tmp);
+
+  return gen_rtx_REG (CC_NZmode, CC_REGNUM);
+}
+
 /* Generate conditional branch to LABEL, comparing X to 0 using CODE.
    Return the jump instruction.  */
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c7b1b8b3860..ec7dea8de31 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -978,35 +978,24 @@
                      (const_string "yes")))]
 )
 
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;;     mov     x0, #imm1
-;;     movk    x0, #imm2, lsl 16 /* x0 contains CST.  */
-;;     cmp     x1, x0
-;;     b<ne,eq> .Label
-;; into the shorter:
-;;     sub     x0, x1, #(CST & 0xfff000)
-;;     subs    x0, x0, #(CST & 0x000fff)
-;;     b<ne,eq> .Label
+;; For a 24-bit immediate CST we can optimize the compare for equality.
 (define_insn_and_split "*aarch64_bcond_wide_imm<GPI:mode>"
-  [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
-                               (match_operand:GPI 1 "aarch64_split_imm24" "n"))
-                          (label_ref:P (match_operand 2))
-                          (pc)))]
-  "!reload_completed"
+  [(set (pc) (if_then_else
+              (match_operator 0 "aarch64_equality_operator"
+               [(match_operand:GPI 1 "register_operand" "r")
+                (match_operand:GPI 2 "aarch64_split_imm24" "n")])
+              (label_ref (match_operand 3))
+              (pc)))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:GPI 4 "=r"))]
+  ""
   "#"
-  "&& true"
+  ""
   [(const_int 0)]
   {
-    HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
-    HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
-    rtx tmp = gen_reg_rtx (<GPI:MODE>mode);
-    emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm)));
-    emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
-    rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
-    rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode,
-                                 cc_reg, const0_rtx);
-    emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[2]));
+    rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[1], operands[2],
+                                                 operands[4]);
+    emit_jump_insn (gen_aarch64_bcond (operands[0], cc_reg, operands[3]));
     DONE;
   }
 )
@@ -4631,37 +4620,24 @@
   [(set_attr "type" "csel")]
 )
 
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;;     mov     x0, #imm1
-;;     movk    x0, #imm2, lsl 16 /* x0 contains CST.  */
-;;     cmp     x1, x0
-;;     cset    x2, <ne,eq>
-;; into the shorter:
-;;     sub     x0, x1, #(CST & 0xfff000)
-;;     subs    x0, x0, #(CST & 0x000fff)
-;;     cset x2, <ne, eq>.
+;; For a 24-bit immediate CST we can optimize the compare for equality.
 (define_insn_and_split "*compare_cstore<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-        (EQL:GPI (match_operand:GPI 1 "register_operand" "r")
-                 (match_operand:GPI 2 "aarch64_split_imm24" "n")))
-   (clobber (reg:CC CC_REGNUM))]
-  "!reload_completed"
+       (match_operator:GPI 1 "aarch64_equality_operator"
+        [(match_operand:GPI 2 "register_operand" "r")
+         (match_operand:GPI 3 "aarch64_split_imm24" "n")]))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:GPI 4 "=r"))]
+  ""
   "#"
-  "&& true"
+  ""
   [(const_int 0)]
   {
-    HOST_WIDE_INT lo_imm = UINTVAL (operands[2]) & 0xfff;
-    HOST_WIDE_INT hi_imm = UINTVAL (operands[2]) & 0xfff000;
-    rtx tmp = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (-hi_imm)));
-    emit_insn (gen_add<mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
-    rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
-    rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <MODE>mode, cc_reg, const0_rtx);
-    emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp_rtx, cc_reg));
+    rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[2], operands[3],
+                                                 operands[4]);
+    emit_insn (gen_aarch64_cstore<mode> (operands[0], operands[1], cc_reg));
     DONE;
   }
-  [(set_attr "type" "csel")]
 )
 
 ;; zero_extend version of the above
-- 
2.34.1

Reply via email to