Hi Wilco,

On 9/9/19 6:06 PM, Wilco Dijkstra wrote:
ping


We currently use default mid-end expanders for logical DImode operations.
 These split operations without first splitting off complex immediates or
 memory operands.  The resulting expansions are non-optimal and allow for
 fewer LDRD/STRD opportunities.  So add back explicit expanders which ensure
 memory operands and immediates are handled more efficiently.


Makes sense to me.


 Bootstrap OK on armhf, regress passes.

 ChangeLog:
 2019-08-29  Wilco Dijkstra  <wdijk...@arm.com>

         * config/arm/arm.md (anddi3): Expand explicitly.
         (iordi3): Likewise.
         (xordi3): Likewise.
         (one_cmpldi2): Likewise.
         * config/arm/arm.c (const_ok_for_dimode_op): Return true if one
         of the constant parts is simple.
         * config/arm/predicates.md (arm_anddi_operand): Add predicate.
         (arm_iordi_operand): Add predicate.
         (arm_xordi_operand): Add predicate.

 --

 diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
 index fb57880fe0568be96a04aee1b7d230e77121e3f5..1fec00baa2a5e510ef2c02d9766432cc7cd0a17b 100644
 --- a/gcc/config/arm/arm.c
 +++ b/gcc/config/arm/arm.c
 @@ -4273,8 +4273,8 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
      case AND:
      case IOR:
      case XOR:
 -      return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
 -              && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
 +      return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
 +            || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
      case PLUS:
        return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);

 diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
 index ed49c4beda138633a84b58fe345cf5ba99103ab7..738d42fd164f117f1dec1108a824d984ccd70d09 100644
 --- a/gcc/config/arm/arm.md
 +++ b/gcc/config/arm/arm.md
 @@ -2176,6 +2176,89 @@ (define_expand "divdf3"
    "")


 +; Expand logical operations.  The mid-end expander does not split off memory  +; operands or complex immediates, which leads to fewer LDRD/STRD instructions.
 +; So an explicit expander is needed to generate better code.
 +
 +(define_expand "anddi3"
 +  [(set (match_operand:DI        0 "s_register_operand")
 +       (and:DI (match_operand:DI 1 "s_register_operand")
 +               (match_operand:DI 2 "arm_anddi_operand")))]
 +  "TARGET_32BIT"
 +  {
 +      rtx low  = simplify_gen_binary (AND, SImode,
 +                                     gen_lowpart (SImode, operands[1]),
 +                                     gen_lowpart (SImode, operands[2]));
 +      rtx high = simplify_gen_binary (AND, SImode,
 +                                     gen_highpart (SImode, operands[1]),
 +                                     gen_highpart_mode (SImode, DImode,
 +                                                        operands[2]));
 +
 +      emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
 +      emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
 +      DONE;
 +  }
 +)
 +
 +(define_expand "iordi3"
 +  [(set (match_operand:DI        0 "s_register_operand")
 +       (ior:DI (match_operand:DI 1 "s_register_operand")
 +               (match_operand:DI 2 "arm_iordi_operand")))]
 +  "TARGET_32BIT"
 +  {
 +      rtx low  = simplify_gen_binary (IOR, SImode,
 +                                     gen_lowpart (SImode, operands[1]),
 +                                     gen_lowpart (SImode, operands[2]));
 +      rtx high = simplify_gen_binary (IOR, SImode,
 +                                     gen_highpart (SImode, operands[1]),
 +                                     gen_highpart_mode (SImode, DImode,
 +                                                        operands[2]));
 +
 +      emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
 +      emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
 +      DONE;
 +  }
 +)
 +
 +(define_expand "xordi3"
 +  [(set (match_operand:DI        0 "s_register_operand")
 +       (xor:DI (match_operand:DI 1 "s_register_operand")
 +               (match_operand:DI 2 "arm_xordi_operand")))]
 +  "TARGET_32BIT"
 +  {
 +       rtx low  = simplify_gen_binary (XOR, SImode,
 +                                       gen_lowpart (SImode, operands[1]),  +                                       gen_lowpart (SImode, operands[2]));
 +       rtx high = simplify_gen_binary (XOR, SImode,
 +                                       gen_highpart (SImode, operands[1]),
 + gen_highpart_mode (SImode, DImode,
 + operands[2]));
 +
 +       emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
 +       emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
 +       DONE;
 +  }
 +)

We should be able to "compress" the above 3 patterns into one using code iterators.

Looks ok to me otherwise.

Thanks,

Kyrill

 +
 +(define_expand "one_cmpldi2"
 +  [(set (match_operand:DI 0 "s_register_operand")
 +       (not:DI (match_operand:DI 1 "s_register_operand")))]
 +  "TARGET_32BIT"
 +  {
 +      rtx low  = simplify_gen_unary (NOT, SImode,
 +                                    gen_lowpart (SImode, operands[1]),
 +                                    SImode);
 +      rtx high = simplify_gen_unary (NOT, SImode,
 +                                    gen_highpart_mode (SImode, DImode,
 + operands[1]),
 +                                    SImode);
 +
 +      emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
 +      emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
 +      DONE;
 +  }
 +)
 +
  ;; Split DImode and, ior, xor operations.  Simply perform the logical
  ;; operation on the upper and lower halves of the registers.
  ;; This is needed for atomic operations in arm_split_atomic_op.
 diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
 index 59dc2e89534a8b85df1197bd7211af43c56fb18c..82a2c841a51e6da120303bf0037280da0d10b049 100644
 --- a/gcc/config/arm/predicates.md
 +++ b/gcc/config/arm/predicates.md
 @@ -206,6 +206,21 @@ (define_predicate "arm_adddi_operand"
         (and (match_code "const_int")
              (match_test "const_ok_for_dimode_op (INTVAL (op), PLUS)"))))

 +(define_predicate "arm_anddi_operand"
 +  (ior (match_operand 0 "s_register_operand")
 +       (and (match_code "const_int")
 +           (match_test "const_ok_for_dimode_op (INTVAL (op), AND)"))))
 +
 +(define_predicate "arm_iordi_operand"
 +  (ior (match_operand 0 "s_register_operand")
 +       (and (match_code "const_int")
 +           (match_test "const_ok_for_dimode_op (INTVAL (op), IOR)"))))
 +
 +(define_predicate "arm_xordi_operand"
 +  (ior (match_operand 0 "s_register_operand")
 +       (and (match_code "const_int")
 +           (match_test "const_ok_for_dimode_op (INTVAL (op), XOR)"))))
 +
  (define_predicate "arm_addimm_operand"
    (ior (match_operand 0 "arm_immediate_operand")
         (match_operand 0 "arm_neg_immediate_operand")))

Reply via email to