https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121949

--- Comment #1 from Tamar Christina <tnfchris at gcc dot gnu.org> ---
(In reply to Tamar Christina from comment #0)
> While the optimal solution may be to just extend row to a 64-bit IV, it's
> unclear why we didn't support unpacking in this case.

Indeed, just removing the check

      incompatible_op1_vectype_p
        = (op1_vectype == NULL_TREE
           || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
                        TYPE_VECTOR_SUBPARTS (vectype))
           || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
      if (incompatible_op1_vectype_p
          && (SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
              || slp_op1->refcnt != 1))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "unusable type for last operand in"
                             " vector/vector shift/rotate.\n");
          return false;
        }

gets the unpacked version as expected

.L3:
        movprfx z0, z29
        uxtw    z0.d, p6/m, z29.d
        movprfx z27, z29
        sxtw    z27.d, p6/m, z29.d
        lsrr    z0.d, p6/m, z0.d, z30.d
        and     z0.d, z0.d, #0x1
        cmpne   p7.d, p7/z, z0.d, #0
        ld1d    z28.d, p7/z, [x2, x0, lsl 3]
        add     z28.d, z27.d, z28.d
        st1d    z28.d, p7, [x2, x0, lsl 3]
        add     z29.s, z29.s, z31.s
        add     x0, x0, x3
        whilelo p7.d, w0, w1
        b.any   .L3

So it's unclear to me why that check is there.. Will check git history.

Reply via email to