This is a patch for PR target/77729 on aarch64.  The code is doing an
unneeded zero extend ('uxtb' in the original report, 'and' in the ToT sources).

The patch looks a bit odd, it is a specialized define_insn for the combine
pass.  At some point in combine (I never did find out where), the zero_extend
is converted to an AND so my instruction looks for an OR of a constant
and an AND expression where one operand of the AND is a subreg and the other
is a constant.  If the two constants add up to 255 that means that the AND
is being used to mask out the upper bits of the register while not messing
up the constant we are using in the OR expression.

I also had to recognize this in the aarch64 cost function or combine would
not use the new expression even when it recognized it as it thought it cost
more than the original uncombined expressions.

Tested on aarch64 with a bootstrap and testsuite run that had no regressions.

OK to checkin?


2017-09-13  Steve Ellcey  <sell...@cavium.com>

        PR target/77729
        * config/aarch64/aarch64.c (aarch64_rtx_costs):
        Handle cost of *iorqi3_uxtw instruction.
        * config/aarch64/aarch64.md (*iorqi3_uxtw): New
        instruction for combine phase.


2017-09-13  Steve Ellcey  <sell...@cavium.com>

        * gcc.target/aarch64/pr77729.c: New test.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index de1fbdc..5266347 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7433,6 +7433,19 @@ cost_plus:
 
           return true;
         }
+      /* Special cost test for *iorqi3_uxtw where the AND can be removed.  */
+      if (GET_MODE (x) == SImode
+	  && GET_CODE (XEXP (x, 0)) == AND
+	  && CONST_INT_P (XEXP (x, 1)))
+	{
+	  op0 = XEXP (XEXP (x, 0), 0);
+	  op1 = XEXP (XEXP (x, 0), 1);
+	  if (REG_P (SUBREG_REG (op0))
+	      && GET_MODE (SUBREG_REG (op0)) == QImode
+	      && CONST_INT_P (op1))
+	    if (INTVAL (XEXP (x, 1)) + INTVAL (op1) == 255)
+	      return true;
+	}
     /* Fall through.  */
     case XOR:
     case AND:
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index f8cdb06..6934c15 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3441,6 +3441,21 @@
   [(set_attr "type" "logic_reg,logic_imm")]
 )
 
+;; Specialized OR instruction for combiner.  The AND is masking out bits
+;; not needed in the OR (doing a zero_extend).  The zero_extend is not
+;; needed because we know from the subreg that the upper part of the reg
+;; is zero.
+(define_insn "*iorqi3_uxtw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (and:SI
+		  (subreg:SI (match_operand:QI 1 "register_operand" "r") 0)
+		  (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "aarch64_logical_operand" "K")))]
+  "INTVAL (operands[2]) + INTVAL (operands[3]) == 255"
+  "orr\\t%w0, %w1, %3"
+  [(set_attr "type" "logic_imm")]
+)
+
 (define_insn "*and<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
diff --git a/gcc/testsuite/gcc.target/aarch64/pr77729.c b/gcc/testsuite/gcc.target/aarch64/pr77729.c
index e69de29..2fcda9a 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr77729.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr77729.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int TrieCase3_v1(const char *string)
+{
+    if((string[0] | 32) == 't') {
+        if((string[1] | 32) == 'a') {
+            if((string[2] | 32) == 'g') {
+                return 42;
+            }
+        }
+    }
+    return -1;
+}
+
+int TrieCase3_v2(const char *string)
+{
+    switch(string[0] | 32) {
+    case 't':
+        switch(string[1] | 32) {
+        case 'a':
+            switch(string[2] | 32) {
+            case 'g':
+                return 42;
+            }
+        }
+    }
+    return -1;
+}
+
+/* { dg-final { scan-assembler-not "and" } } */
+/* { dg-final { scan-assembler-not "uxtb" } } */

Reply via email to