The svzero_mask_za intrinsic tried to use the shortest combination
of .b, .h, .s and .d tiles, allowing mixtures of sizes where necessary.
However, Iain S pointed out that LLVM instead requires the tiles to
have the same suffix.  GAS supports both versions, so this patch
generates the LLVM-friendly form.

Tested on aarch64-linux-gnu & pushed.

Please revert the patch if it causes any problems.

Richard


gcc/
        * config/aarch64/aarch64.cc (aarch64_output_sme_zero_za): Require
        all tiles to have the same suffix.

gcc/testsuite/
        * gcc.target/aarch64/sme/acle-asm/zero_mask_za.c (zero_mask_za_ab)
        (zero_mask_za_d7, zero_mask_za_bf): Expect a list of .d tiles instead
        of a mixture.
---
 gcc/config/aarch64/aarch64.cc                 | 20 +++++++++++--------
 .../aarch64/sme/acle-asm/zero_mask_za.c       |  6 +++---
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index a2e3d208d76..1beec94629d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -13210,29 +13210,33 @@ aarch64_output_sme_zero_za (rtx mask)
   /* The last entry in the list has the form "za7.d }", but that's the
      same length as "za7.d, ".  */
   static char buffer[sizeof("zero\t{ ") + sizeof ("za7.d, ") * 8 + 1];
-  unsigned int i = 0;
-  i += snprintf (buffer + i, sizeof (buffer) - i, "zero\t");
-  const char *prefix = "{ ";
   for (auto &tile : tiles)
     {
       unsigned int tile_mask = tile.mask;
       unsigned int tile_index = 0;
+      unsigned int i = snprintf (buffer, sizeof (buffer), "zero\t");
+      const char *prefix = "{ ";
+      auto remaining_mask = mask_val;
       while (tile_mask < 0x100)
        {
-         if ((mask_val & tile_mask) == tile_mask)
+         if ((remaining_mask & tile_mask) == tile_mask)
            {
              i += snprintf (buffer + i, sizeof (buffer) - i, "%sza%d.%c",
                             prefix, tile_index, tile.letter);
              prefix = ", ";
-             mask_val &= ~tile_mask;
+             remaining_mask &= ~tile_mask;
            }
          tile_mask <<= 1;
          tile_index += 1;
        }
+      if (remaining_mask == 0)
+       {
+         gcc_assert (i + 3 <= sizeof (buffer));
+         snprintf (buffer + i, sizeof (buffer) - i, " }");
+         return buffer;
+       }
     }
-  gcc_assert (mask_val == 0 && i + 3 <= sizeof (buffer));
-  snprintf (buffer + i, sizeof (buffer) - i, " }");
-  return buffer;
+  gcc_unreachable ();
 }
 
 /* Return size in bits of an arithmetic operand which is shifted/scaled and
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c 
b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c
index 9ce7331ebdd..2ba8f8cc332 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c
@@ -103,21 +103,21 @@ PROTO (zero_mask_za_aa, void, ()) { svzero_mask_za 
(0xaa); }
 
 /*
 ** zero_mask_za_ab:
-**     zero    { za1\.h, za0\.d }
+**     zero    { za0\.d, za1\.d, za3\.d, za5\.d, za7\.d }
 **     ret
 */
 PROTO (zero_mask_za_ab, void, ()) { svzero_mask_za (0xab); }
 
 /*
 ** zero_mask_za_d7:
-**     zero    { za0\.h, za1\.d, za7\.d }
+**     zero    { za0\.d, za1\.d, za2\.d, za4\.d, za6\.d, za7\.d }
 **     ret
 */
 PROTO (zero_mask_za_d7, void, ()) { svzero_mask_za (0xd7); }
 
 /*
 ** zero_mask_za_bf:
-**     zero    { za1\.h, za0\.s, za2\.d }
+**     zero    { za0\.d, za1\.d, za2\.d, za3\.d, za4\.d, za5\.d, za7\.d }
 **     ret
 */
 PROTO (zero_mask_za_bf, void, ()) { svzero_mask_za (0xbf); }
-- 
2.25.1

Reply via email to