Extend AdvSIMD constant materialization to recognize 128‑bit integer vector
constants where the low 64 bits contain a duplicated scalar value and the high
64 bits are zero.
Bootstrapped and tested on aarch64-linux-gnu.

PR target/113926

gcc/ChangeLog:

        * config/aarch64/aarch64.cc (struct simd_immediate_info): Add width
        field to record AdvSIMD output vector width.
        (simd_immediate_info::simd_immediate_info): Initialize width to zero
        in all constructors.
        (aarch64_simd_valid_imm): Allow 128-bit AdvSIMD MOV immediates with
        zero high 64 bits to be materialized using 64-bit MOVI.
        (aarch64_output_simd_imm): Use recorded immediate width when outputting
        AdvSIMD immediates.

gcc/testsuite/ChangeLog:
        * gcc.target/aarch64/pr113926.c: New test.

Signed-off-by: Naveen <[email protected]>
---
 gcc/config/aarch64/aarch64.cc               | 52 +++++++++++++++++----
 gcc/testsuite/gcc.target/aarch64/pr113926.c | 43 +++++++++++++++++
 2 files changed, 86 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113926.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 3816df92b18..033a868d15e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -169,6 +169,9 @@ struct simd_immediate_info
   /* The mode of the elements.  */
   scalar_mode elt_mode;
 
+  /* If nonzero, the vector width to print the AdvSIMD immediate.  */
+  unsigned int width = 0;
+
   /* The instruction to use to move the immediate into a vector.  */
   insn_type insn;
 
@@ -203,7 +206,7 @@ struct simd_immediate_info
    ELT_MODE_IN and value VALUE_IN.  */
 inline simd_immediate_info
 ::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
-  : elt_mode (elt_mode_in), insn (MOV)
+  : elt_mode (elt_mode_in), width (0), insn (MOV)
 {
   u.mov.value = value_in;
   u.mov.modifier = LSL;
@@ -218,7 +221,7 @@ inline simd_immediate_info
                       unsigned HOST_WIDE_INT value_in,
                       insn_type insn_in, modifier_type modifier_in,
                       unsigned int shift_in)
-  : elt_mode (elt_mode_in), insn (insn_in)
+  : elt_mode (elt_mode_in), width (0), insn (insn_in)
 {
   u.mov.value = gen_int_mode (value_in, elt_mode_in);
   u.mov.modifier = modifier_in;
@@ -229,7 +232,7 @@ inline simd_immediate_info
    and where element I is equal to BASE_IN + I * STEP_IN.  */
 inline simd_immediate_info
 ::simd_immediate_info (scalar_mode elt_mode_in, rtx base_in, rtx step_in)
-  : elt_mode (elt_mode_in), insn (INDEX)
+  : elt_mode (elt_mode_in), width (0), insn (INDEX)
 {
   u.index.base = base_in;
   u.index.step = step_in;
@@ -240,7 +243,7 @@ inline simd_immediate_info
 inline simd_immediate_info
 ::simd_immediate_info (scalar_int_mode elt_mode_in,
                       aarch64_svpattern pattern_in)
-  : elt_mode (elt_mode_in), insn (PTRUE)
+  : elt_mode (elt_mode_in), width (0), insn (PTRUE)
 {
   u.pattern = pattern_in;
 }
@@ -24491,12 +24494,35 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info 
*info,
        }
     }
 
-  /* The immediate must repeat every eight bytes.  */
-  unsigned int nbytes = bytes.length ();
-  for (unsigned i = 8; i < nbytes; ++i)
-    if (bytes[i] != bytes[i - 8])
+/* The immediate must normally repeat every eight bytes.  For MOV
+   also allow a 128-bit AdvSIMD constant whose high 64 bits are zero
+   since it can be materialized using a 64-bit MOVI.  */
+unsigned int nbytes = bytes.length ();
+unsigned int output_width = 0;
+bool repeats_every_8_bytes = true;
+
+for (unsigned int i = 8; i < nbytes; ++i)
+  if (bytes[i] != bytes[i - 8])
+    {
+      repeats_every_8_bytes = false;
+      break;
+    }
+
+if (!repeats_every_8_bytes)
+  {
+    if (which != AARCH64_CHECK_MOV
+    || !(vec_flags & VEC_ADVSIMD)
+    || aarch64_sve_mode_p (mode)
+    || nbytes != 16)
       return false;
 
+    for (unsigned int i = 8; i < nbytes; ++i)
+      if (bytes[i] != 0)
+    return false;
+
+    output_width = 64;
+  }
+
   /* Get the repeating 8-byte value as an integer.  No endian correction
      is needed here because bytes is already in lsb-first order.  */
   unsigned HOST_WIDE_INT val64 = 0;
@@ -24548,6 +24574,7 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info 
*info,
            {
              rtx float_val = const_double_from_real_value (r, fmode);
              *info = simd_immediate_info (fmode, float_val);
+             info->width = output_width;
            }
          return true;
        }
@@ -24557,7 +24584,11 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info 
*info,
     return aarch64_sve_valid_immediate (ival, imode, info, which);
 
   if (aarch64_advsimd_valid_immediate (val64, imode, info, which))
-    return true;
+    {
+      if (info)
+       info->width = output_width;
+      return true;
+    }
 
   if (TARGET_SVE)
     return aarch64_sve_valid_immediate (ival, imode, info, which);
@@ -26925,6 +26956,9 @@ aarch64_output_simd_imm (rtx const_vector, unsigned 
width,
   is_valid = aarch64_simd_valid_imm (const_vector, &info, which);
   gcc_assert (is_valid);
 
+  if (info.width != 0)
+    width = info.width;
+
   element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
   lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113926.c 
b/gcc/testsuite/gcc.target/aarch64/pr113926.c
new file mode 100644
index 00000000000..e6e07087e87
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113926.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Test recognition of 128-bit integer SIMD constants where the low 64 bits
+   contain a duplicated scalar value and the high 64 bits are all zero.
+   Such constants should be materialized using a single MOVI instruction.
+   PR113926.  */
+
+typedef signed char v16qi __attribute__((vector_size(16)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+
+/* QI mode: low 64 bits duplicated, high 64 bits zero.  */
+v16qi
+f_qi (void)
+{
+  return (v16qi)
+    { 3, 3, 3, 3, 3, 3, 3, 3,
+      0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+/* HI mode: low 64 bits duplicated, high 64 bits zero.  */
+v8hi
+f_hi (void)
+{
+  return (v8hi)
+    { 2, 2, 2, 2,
+      0, 0, 0, 0 };
+}
+
+/* SI mode: low 64 bits duplicated, high 64 bits zero.  */
+v4si
+f_si (void)
+{
+  return (v4si)
+    { 1, 1, 0, 0 };
+}
+
+/* Each function should generate exactly one MOVI instruction.  */
+/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.8b, 0x3} 1 } } */
+/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.4h, 0x2} 1 } } */
+/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.2s, 0x1} 1 } } */
+/* { dg-final { scan-assembler-not {\tldr\tq[0-9]+,} } } */
-- 
2.34.1

Reply via email to