Hi,

The mask-mode vec_init and vec_extract expanders assume that we can
create QImode vectors with the same number of units as the mask mode
has.  XTheadVector does not have fractional-LMUL modes and we ICE when
trying to expand the actual insns.

An obvioius solution would be to simply disable the autovec expands for
TARGET_XTHEADVECTOR but exactly these optab/mode combination has no fallback
in the common expansion code.  That's a known problem and should be fixed
separately in extract_bit_field_1.

For now, though, we do not need to use small modes and can go with full
vectors instead.  The excess elements don't matter.
To that end, this patch uses paradoxical subregs on the source (for
vec_extract) and on the destination (for vec_init), leaving the
remaining elements undefined.

In order to obtain the right "full vector" I needed to adjust
get_m1_mode slightly.  By default it returns a VLA mode so when
introducing a subreg like above we can have
  (subreg:full_vector_mode (reg:small_vls_vector_mode) 0)
where full_vector_mode is a VLA mode and small_vls_vector_mode is a VLS
mode.  This won't be a valid subreg so the patch adds a VLS_P
argument to get_m1_mode that returns a full VLS vector mode.

Regtested on rv64gcv_zvl512b.  As I didn't have theadvector hardware
available I made the new code paths unconditional (so they are active
for regular RVV) and re-tested.

I'll wait for the CI.  Even though it shows false negatives I think
it generally still works.

Regards
 Robin

        PR target/124147

gcc/ChangeLog:

        * config/riscv/autovec.md: Work around fractional-LMUL modes for
        TARGET_XTHEADVECTOR.
        * config/riscv/riscv-protos.h (get_m1_mode): Export.
        * config/riscv/riscv-v.cc (get_m1_mode): Allow to get a VLS m1
        vector.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/xtheadvector/pr124147.c: New test.
---
 gcc/config/riscv/autovec.md                   | 43 ++++++++++++++++---
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv-v.cc                   | 13 ++++--
 .../riscv/rvv/xtheadvector/pr124147.c         | 11 +++++
 4 files changed, 57 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr124147.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index cc27a76c44f..30ccf4527af 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -419,9 +419,18 @@ (define_expand "vec_init<mode>qi"
    (match_operand 1 "")]
   "TARGET_VECTOR"
   {
-    /* Expand into a QImode vector.  */
-    machine_mode qimode = riscv_vector::get_vector_mode
+    /* Expand into a QImode vector.
+       For XTheadVector which does not have fractional-LMUL modes, we use
+       a full vector instead.  */
+    bool fractional_p = known_lt (GET_MODE_NUNITS (<MODE>mode),
+                                 BYTES_PER_RISCV_VECTOR);
+    machine_mode qimode;
+    if (!TARGET_XTHEADVECTOR || !fractional_p)
+      qimode = riscv_vector::get_vector_mode
        (QImode, GET_MODE_NUNITS (<MODE>mode)).require ();
+    else
+      qimode = riscv_vector::get_m1_mode
+       (QImode, GET_MODE_NUNITS (<MODE>mode).is_constant ()).require ();
     rtx tmp = gen_reg_rtx (qimode);
     riscv_vector::expand_vec_init (tmp, operands[1]);
 
@@ -433,7 +442,12 @@ (define_expand "vec_init<mode>qi"
     riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
 
     /* Compare against zero.  */
-    riscv_vector::expand_vec_cmp (operands[0], NE, tmp2, CONST0_RTX (qimode));
+    rtx op0;
+    if (!TARGET_XTHEADVECTOR || !fractional_p)
+      op0 = operands[0];
+    else
+      op0 = gen_lowpart (riscv_vector::get_mask_mode (qimode), operands[0]);
+    riscv_vector::expand_vec_cmp (op0, NE, tmp2, CONST0_RTX (qimode));
     DONE;
   }
 )
@@ -1449,15 +1463,30 @@ (define_expand "vec_extract<mode>qi"
         [(match_operand          2 "nonmemory_operand")])))]
   "TARGET_VECTOR"
 {
-  /* Create an empty byte vector and set it to one under mask.  */
-  machine_mode qimode = riscv_vector::get_vector_mode
-      (QImode, GET_MODE_NUNITS (<MODE>mode)).require ();
+  /* Create an empty byte vector and set it to one under mask.
+     For XTheadVector which does not have fractional-LMUL modes, we use
+     a full vector instead.  */
+  bool fractional_p = known_lt (GET_MODE_NUNITS (<MODE>mode),
+                               BYTES_PER_RISCV_VECTOR);
+  machine_mode qimode;
+  if (!TARGET_XTHEADVECTOR || !fractional_p)
+    qimode = riscv_vector::get_vector_mode
+       (QImode, GET_MODE_NUNITS (<MODE>mode)).require ();
+  else
+    qimode = riscv_vector::get_m1_mode
+       (QImode, GET_MODE_NUNITS (<MODE>mode).is_constant ()).require ();
 
   rtx tmp1 = gen_reg_rtx (qimode);
   emit_move_insn (tmp1, gen_const_vec_duplicate (qimode, GEN_INT (0)));
   rtx ones = gen_const_vec_duplicate (qimode, GEN_INT (1));
 
-  rtx ops1[] = {tmp1, tmp1, ones, operands[1]};
+  rtx op1;
+  if (!TARGET_XTHEADVECTOR || !fractional_p)
+    op1 = operands[1];
+  else
+    op1 = gen_lowpart (riscv_vector::get_mask_mode (qimode), operands[1]);
+
+  rtx ops1[] = {tmp1, tmp1, ones, op1};
   riscv_vector::emit_vlmax_insn (code_for_pred_merge (qimode),
                                 riscv_vector::MERGE_OP, ops1);
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index cb6eaebc546..5fd7c5804d3 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -653,6 +653,7 @@ enum tail_policy get_prefer_tail_policy ();
 enum mask_policy get_prefer_mask_policy ();
 rtx get_avl_type_rtx (enum avl_type);
 opt_machine_mode get_lmul_mode (scalar_mode, int);
+opt_machine_mode get_m1_mode (machine_mode, bool = false);
 opt_machine_mode get_vector_mode (scalar_mode, poly_uint64);
 opt_machine_mode get_tuple_mode (machine_mode, unsigned int);
 bool simm5_p (rtx);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 10a1109f10e..70c98b7c12c 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2368,14 +2368,19 @@ get_lmul_mode (scalar_mode mode, int lmul)
   return E_VOIDmode;
 }
 
-/* Return the appropriate M1 mode for MODE.  */
+/* Return the appropriate LMUL1 mode for MODE.
+   If VLS_P is specified, get a VLS mode that represents a full
+   vector.  */
 
-static opt_machine_mode
-get_m1_mode (machine_mode mode)
+opt_machine_mode
+get_m1_mode (machine_mode mode, bool vls_p)
 {
   scalar_mode smode = GET_MODE_INNER (mode);
   unsigned int bytes = GET_MODE_SIZE (smode);
-  poly_uint64 m1_nunits = exact_div (BYTES_PER_RISCV_VECTOR, bytes);
+  poly_uint64 bytes_vector = BYTES_PER_RISCV_VECTOR;
+  if (vls_p)
+    bytes_vector = constant_lower_bound (bytes_vector);
+  poly_uint64 m1_nunits = exact_div (bytes_vector, bytes);
   return get_vector_mode (smode, m1_nunits);
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr124147.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr124147.c
new file mode 100644
index 00000000000..2233a18f4c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr124147.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=xt-c920 -mrvv-vector-bits=zvl" } */
+
+typedef __attribute__((__vector_size__(2 * sizeof(int)))) int V;
+
+V
+foo(V v)
+{
+  return v > 0;
+}
+
-- 
2.53.0

Reply via email to