Improve immediate expansion of immediates which can be created from a
bitmask immediate and 2 MOVKs.  This reduces the number of 4-instruction
immediates in SPECINT/FP by 10-15%.

Passes regress, OK for commit?

gcc/ChangeLog:

        PR target/106583
        * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate)
        Add support for a bitmask immediate with 2 MOVKs.

gcc/testsuite:
        PR target/106583
        * gcc.target/aarch64/pr106583.c: Add new test.

---

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
926e81f028c82aac9a5fecc18f921f84399c24ae..1601d11710cb6132c80a77bb4fe2f8429519aa5a
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5568,7 +5568,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool 
generate,
   one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
     ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
 
-  if (zero_match != 2 && one_match != 2)
+  if (zero_match < 2 && one_match < 2)
     {
       /* Try emitting a bitmask immediate with a movk replacing 16 bits.
         For a 64-bit bitmask try whether changing 16 bits to all ones or
@@ -5600,6 +5600,43 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool 
generate,
        }
     }
 
+  /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions.  */
+  if (zero_match + one_match == 0)
+    {
+      mask = 0xffffffff;
+
+      for (i = 0; i < 64; i += 16)
+       {
+         val2 = val & ~mask;
+         if (aarch64_bitmask_imm (val2, mode))
+           break;
+         val2 = val | mask;
+         if (aarch64_bitmask_imm (val2, mode))
+           break;
+         val2 = val2 & ~mask;
+         val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
+         if (aarch64_bitmask_imm (val2, mode))
+           break;
+
+         mask = (mask << 16) | (mask >> 48);
+       }
+
+      if (i != 64)
+       {
+         if (generate)
+           {
+             emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+                                        GEN_INT ((val >> i) & 0xffff)));
+             i = (i + 16) & 63;
+             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+                                        GEN_INT ((val >> i) & 0xffff)));
+           }
+
+         return 3;
+       }
+    }
+
   /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
      are emitted by the initial mov.  If one_match > zero_match, skip set bits,
      otherwise skip zero bits.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c 
b/gcc/testsuite/gcc.target/aarch64/pr106583.c
new file mode 100644
index 
0000000000000000000000000000000000000000..f0a027a0950e506d4ddaacce5e151f57070948dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c
@@ -0,0 +1,30 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps" } */
+
+long f1 (void)
+{
+  return 0x7efefefefefefeff;
+}
+
+long f2 (void)
+{
+  return 0x12345678aaaaaaaa;
+}
+
+long f3 (void)
+{
+  return 0x1234cccccccc5678;
+}
+
+long f4 (void)
+{
+  return 0x7777123456787777;
+}
+
+long f5 (void)
+{
+  return 0x5555555512345678;
+}
+
+/* { dg-final { scan-assembler-times {\tmovk\t} 10 } } */
+/* { dg-final { scan-assembler-times {\tmov\t} 5 } } */

Reply via email to