Christophe Lyon <christophe.l...@linaro.org> writes: > Thanks for the feedback. How about v2 attached? > Do you want me to merge neon_vec_unpack<US> and > mve_vec_unpack<US> and only have different assembly? > if (TARGET_HAVE_MVE) > return "vmovlb.<US>%#<V_sz_elem> %q0, %q1"; > else > return "vmovlb.<US>%#<V_sz_elem> %q0, %q1";
I think it'd be better to keep them separate, given that they have different attributes. > @@ -2199,10 +2222,23 @@ (define_insn "mve_vmovnbq_<supf><mode>" > [(set_attr "type" "mve_move") > ]) > > +;; vmovnb pattern used by the vec_pack_trunc expander to avoid the > +;; need for an extra register. “to avoid the need for an uninitailized input operand” might be clearer. > +(define_insn "@mve_vec_pack_trunc_lo_<mode>" > + [ > + (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") > + (unspec:<V_narrow_pack> [(match_operand:MVE_5 1 "s_register_operand" > "w")] > + VMOVNBQ_S)) > + ] > + "TARGET_HAVE_MVE" > + "vmovnb.i%#<V_sz_elem> %q0, %q1" > + [(set_attr "type" "mve_move") > +]) > + > […] > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md > index 430a92ce966..3afb3e1d891 100644 > --- a/gcc/config/arm/vec-common.md > +++ b/gcc/config/arm/vec-common.md > @@ -632,3 +632,91 @@ (define_expand "clz<mode>2" > "ARM_HAVE_<MODE>_ARITH > && !TARGET_REALLY_IWMMXT" > ) > + > +;; vmovl[tb] are not available for V4SI on MVE > +(define_expand "vec_unpack<US>_hi_<mode>" > + [(match_operand:<V_unpack> 0 "register_operand") > + (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] > + "ARM_HAVE_<MODE>_ARITH > + && !TARGET_REALLY_IWMMXT > + && ! (<MODE>mode == V4SImode && TARGET_HAVE_MVE) > + && !BYTES_BIG_ENDIAN" > + { > + rtvec v = rtvec_alloc (<V_mode_nunits>/2); > + rtx t1; > + int i; > + for (i = 0; i < (<V_mode_nunits>/2); i++) > + RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); > + > + t1 = gen_rtx_PARALLEL (<MODE>mode, v); > + > + if (TARGET_NEON) > + emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], > + operands[1], > + t1)); > + else > + emit_insn (gen_mve_vec_unpack<US>_hi_<mode> (operands[0], > + operands[1], > + t1)); > + DONE; > + } > +) Since the patterns are the same for both MVE and Neon (a good thing!), it would be simpler to write this as: (define_insn "mve_vec_unpack<US>_lo_<mode>" [(set (match_operand:<V_unpack> 0 "register_operand" "=w") (SE:<V_unpack> (vec_select:<V_HALF> (match_operand:VU 1 "register_operand" "w") (match_dup 2))))] … and then set operands[2] to what is t1 above. There would then be no need to call emit_insn & DONE, and we could use the natural iterator (MVE_3) for the MVE patterns. Same idea for the lo version. > […] > +;; vmovn[tb] are not available for V2DI on MVE > +(define_expand "vec_pack_trunc_<mode>" > + [(set (match_operand:<V_narrow_pack> 0 "register_operand") > + (vec_concat:<V_narrow_pack> > + (truncate:<V_narrow> > + (match_operand:VN 1 "register_operand")) > + (truncate:<V_narrow> > + (match_operand:VN 2 "register_operand"))))] > + "ARM_HAVE_<MODE>_ARITH > + && !TARGET_REALLY_IWMMXT > + && ! (<MODE>mode == V2DImode && TARGET_HAVE_MVE) > + && !BYTES_BIG_ENDIAN" > + { > + if (TARGET_NEON) > + { > + emit_insn (gen_neon_quad_vec_pack_trunc_<mode> (operands[0], > operands[1], > + operands[2])); > + } > + else > + { > + rtx tmpreg = gen_reg_rtx (<V_narrow_pack>mode); > + emit_insn (gen_mve_vec_pack_trunc_lo (<MODE>mode, tmpreg, > operands[1])); > + emit_insn (gen_mve_vmovntq (VMOVNTQ_S, <MODE>mode, > + tmpreg, tmpreg, operands[2])); > + emit_move_insn (operands[0], tmpreg); vmovntq can assign directly to operands[0], without a separate move. OK with those changes, thanks. Richard > + } > + DONE; > + } > +) > […]