[PATCH] AArch64 SIMD: convert mvn+shrn into mvni+subhn

Remi Machet Thu, 12 Jun 2025 07:22:39 -0700

Add an optimization to aarch64 SIMD converting mvn+shrn into mvni+subhn 
which
allows for better optimization when the code is inside a loop by using a
constant.


Bootstrapped and regtested on aarch64-linux-gnu.

Signed-off-by: Remi Machet <rmac...@nvidia.com>

gcc/ChangeLog:

     * config/aarch64/aarch64-simd.md (*shrn_to_subhn_<mode>): Add pattern
         converting mvn+shrn into mvni+subhn.

gcc/testsuite/ChangeLog:

     * gcc.target/aarch64/simd/shrn2subhn.c: New test.

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 6e30dc48934..f49e6fe6a26 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5028,6 +5028,34 @@
    DONE;
  })

+;; convert what would be a mvn+shrn into a mvni+subhn because the use of a
+;; constant load rather than not instructions allows for better loop
+;; optimization. On some implementations the use of subhn also result 
in better
+;; throughput.
+(define_insn_and_split "*shrn_to_subhn_<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=&w")
+    (truncate:<VNARROWQ>
+      (lshiftrt:VQN
+        (not:VQN (match_operand:VQN 1 "register_operand" "w"))
+        (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top"))))]
+  "TARGET_SIMD"
+  "#"
+  "&& true"
+  [(const_int 0)]
+{
+  rtx tmp;
+  if (can_create_pseudo_p ())
+    tmp = gen_reg_rtx (<MODE>mode);
+  else
+    tmp = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+  emit_insn (gen_move_insn (tmp,
+              aarch64_simd_gen_const_vector_dup (<MODE>mode, -1)));
+  emit_insn (gen_aarch64_subhn<mode>_insn (operands[0], tmp,
+                        operands[1], operands[2]));
+  DONE;
+})
+
+
  ;; pmul.

  (define_insn "aarch64_pmul<mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/shrn2subhn.c 
b/gcc/testsuite/gcc.target/aarch64/simd/shrn2subhn.c
new file mode 100644
index 00000000000..d03af815671
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/shrn2subhn.c
@@ -0,0 +1,18 @@
+/* This test case checks that replacing a not+shift by a sub -1 works. */
+/* { dg-do compile } */
+/* { dg-additional-options "--save-temps -O1" } */
+/* { dg-final { scan-assembler-times "\\tsubhn\\t" 2 } } */
+
+#include<stdint.h>
+#include<arm_neon.h>
+#include<stdlib.h>
+
+uint8x8_t neg_narrow(uint16x8_t a) {
+  uint16x8_t b = vmvnq_u16(a);
+  return vshrn_n_u16(b, 8);
+}
+
+uint8x8_t neg_narrow_vsubhn(uint16x8_t a) {
+  uint16x8_t ones = vdupq_n_u16(0xffff);
+  return vsubhn_u16(ones, a);
+}

[PATCH] AArch64 SIMD: convert mvn+shrn into mvni+subhn

Reply via email to