Pengxuan Zheng <quic_pzh...@quicinc.com> writes: > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 15f08cebeb1..98ce85dfdae 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -23621,6 +23621,36 @@ aarch64_simd_valid_and_imm (rtx op) > return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_AND); > } > > +/* Return true if OP is a valid SIMD and immediate which allows the and be
s/and be/and to be/ > + optimized as fmov. If ELT_SIZE is nonnull, it represents the size of the > + register for fmov. */ Maybe rename this to ELT_BITSIZE (see below), and say: If ELT_BITSIZE is nonnull, use it to return the number of bits to move. > +bool > +aarch64_simd_valid_and_imm_fmov (rtx op, unsigned int *elt_size) > +{ > + machine_mode mode = GET_MODE (op); > + gcc_assert (!aarch64_sve_mode_p (mode)); > + > + auto_vec<target_unit, 16> buffer; > + unsigned int n_bytes = GET_MODE_SIZE (mode).to_constant (); > + buffer.reserve (n_bytes); > + > + bool ok = native_encode_rtx (mode, op, buffer, 0, n_bytes); > + gcc_assert (ok); > + > + auto mask = native_decode_int (buffer, 0, n_bytes, n_bytes * > BITS_PER_UNIT); > + int set_bit = wi::exact_log2 (mask + 1); > + if ((set_bit == 16 && TARGET_SIMD_F16INST) > + || set_bit == 32 > + || set_bit == 64) > + { > + if (elt_size) > + *elt_size = set_bit / BITS_PER_UNIT; I didn't notice last time that the only consumer multiplies by BITS_PER_UNIT again, so how about making this: *elt_bitsize = set_bit; and removing the later multiplication. Please leave 24 hours for other to comment, but otherwise the patch is ok with those changes, thanks. Richard > + return true; > + } > + > + return false; > +} > + > /* Return true if OP is a valid SIMD xor immediate for SVE. */ > bool > aarch64_simd_valid_xor_imm (rtx op) > @@ -25757,6 +25787,26 @@ aarch64_float_const_representable_p (rtx x) > return aarch64_real_float_const_representable_p (r); > } > > +/* Returns the string with the fmov instruction which is equivalent to an and > + instruction with the SIMD immediate CONST_VECTOR. */ > +char* > +aarch64_output_fmov (rtx const_vector) > +{ > + bool is_valid; > + static char templ[40]; > + char element_char; > + unsigned int elt_size; > + > + is_valid = aarch64_simd_valid_and_imm_fmov (const_vector, &elt_size); > + gcc_assert (is_valid); > + > + element_char = sizetochar (elt_size * BITS_PER_UNIT); > + snprintf (templ, sizeof (templ), "fmov\t%%%c0, %%%c1", > + element_char, element_char); > + > + return templ; > +} > + > /* Returns the string with the instruction for the SIMD immediate > * CONST_VECTOR of MODE and WIDTH. WHICH selects a move, and(bic) or orr. > */ > char* > diff --git a/gcc/config/aarch64/constraints.md > b/gcc/config/aarch64/constraints.md > index e8321c4d2fb..e9f69f823a6 100644 > --- a/gcc/config/aarch64/constraints.md > +++ b/gcc/config/aarch64/constraints.md > @@ -466,6 +466,13 @@ (define_constraint "Do" > (and (match_code "const_vector") > (match_test "aarch64_simd_valid_orr_imm (op)"))) > > +(define_constraint "Df" > + "@internal > + A constraint that matches a vector of immediates for and which can be > + optimized as fmov." > + (and (match_code "const_vector") > + (match_test "aarch64_simd_valid_and_imm_fmov (op)"))) > + > (define_constraint "Db" > "@internal > A constraint that matches vector of immediates for and/bic." > diff --git a/gcc/config/aarch64/predicates.md > b/gcc/config/aarch64/predicates.md > index 1ab1c696c62..2c6af831eae 100644 > --- a/gcc/config/aarch64/predicates.md > +++ b/gcc/config/aarch64/predicates.md > @@ -123,7 +123,8 @@ (define_predicate "aarch64_reg_or_orr_imm" > (define_predicate "aarch64_reg_or_and_imm" > (ior (match_operand 0 "register_operand") > (and (match_code "const_vector") > - (match_test "aarch64_simd_valid_and_imm (op)")))) > + (ior (match_test "aarch64_simd_valid_and_imm (op)") > + (match_test "aarch64_simd_valid_and_imm_fmov (op)"))))) > > (define_predicate "aarch64_reg_or_xor_imm" > (ior (match_operand 0 "register_operand") > diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-1-be.c > b/gcc/testsuite/gcc.target/aarch64/fmov-1-be.c > new file mode 100644 > index 00000000000..65dd4f52d09 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/fmov-1-be.c > @@ -0,0 +1,149 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mbig-endian" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +typedef int v2si __attribute__ ((vector_size (8))); > +typedef float v2sf __attribute__ ((vector_size (8))); > +typedef short v4hi __attribute__ ((vector_size (8))); > +typedef char v8qi __attribute__ ((vector_size (8))); > +typedef long v2di __attribute__ ((vector_size (16))); > +typedef double v2df __attribute__ ((vector_size (16))); > +typedef int v4si __attribute__ ((vector_size (16))); > +typedef float v4sf __attribute__ ((vector_size (16))); > +typedef short v8hi __attribute__ ((vector_size (16))); > +typedef char v16qi __attribute__ ((vector_size (16))); > + > +/* > +** f_v4hi: > +** fmov s0, s0 > +** ret > +*/ > +v4hi > +f_v4hi (v4hi x) > +{ > + return x & (v4hi){ 0, 0, 0xffff, 0xffff }; > +} > + > +/* > +** g_v4hi: > +** movi d([0-9]+), 0xffff00000000ffff > +** and v0.8b, v0.8b, v\1.8b > +** ret > +*/ > +v4hi > +g_v4hi (v4hi x) > +{ > + return x & (v4hi){ 0xffff, 0, 0, 0xffff }; > +} > + > +/* > +** f_v8hi: > +** fmov s0, s0 > +** ret > +*/ > +v8hi > +f_v8hi (v8hi x) > +{ > + return x & (v8hi){ 0, 0, 0, 0, 0, 0, 0xffff, 0xffff }; > +} > + > +/* > +** g_v8hi: > +** fmov d0, d0 > +** ret > +*/ > +v8hi > +g_v8hi (v8hi x) > +{ > + return x & (v8hi){ 0, 0, 0, 0, 0xffff, 0xffff, 0xffff, 0xffff }; > +} > + > +/* > +** f_v2si: > +** fmov s0, s0 > +** ret > +*/ > +v2si > +f_v2si (v2si x) > +{ > + return x & (v2si){ 0, 0xffffffff }; > +} > + > +/* > +** f_v2di: > +** fmov d0, d0 > +** ret > +*/ > +v2di > +f_v2di (v2di x) > +{ > + return x & (v2di){ 0, 0xffffffffffffffff }; > +} > + > +/* > +** g_v2di: > +** fmov s0, s0 > +** ret > +*/ > +v2di > +g_v2di (v2di x) > +{ > + return x & (v2di){ 0, 0xffffffff }; > +} > + > +/* > +** f_v4si: > +** fmov s0, s0 > +** ret > +*/ > +v4si > +f_v4si (v4si x) > +{ > + return x & (v4si){ 0, 0, 0, 0xffffffff }; > +} > + > +/* > +** h_v4si: > +** fmov d0, d0 > +** ret > +*/ > +v4si > +h_v4si (v4si x) > +{ > + return x & (v4si){ 0, 0, 0xffffffff, 0xffffffff }; > +} > + > +/* > +** f_v8qi: > +** fmov s0, s0 > +** ret > +*/ > +v8qi > +f_v8qi (v8qi x) > +{ > + return x & (v8qi){ 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff }; > +} > + > +/* > +** f_v16qi: > +** fmov d0, d0 > +** ret > +*/ > +v16qi > +f_v16qi (v16qi x) > +{ > + return x & (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0, > + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; > +} > + > +/* > +** g_v16qi: > +** fmov s0, s0 > +** ret > +*/ > +v16qi > +g_v16qi (v16qi x) > +{ > + return x & (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0, > + 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff }; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-1-le.c > b/gcc/testsuite/gcc.target/aarch64/fmov-1-le.c > new file mode 100644 > index 00000000000..d969e2ab6b1 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/fmov-1-le.c > @@ -0,0 +1,149 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mlittle-endian" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +typedef int v2si __attribute__ ((vector_size (8))); > +typedef float v2sf __attribute__ ((vector_size (8))); > +typedef short v4hi __attribute__ ((vector_size (8))); > +typedef char v8qi __attribute__ ((vector_size (8))); > +typedef long v2di __attribute__ ((vector_size (16))); > +typedef double v2df __attribute__ ((vector_size (16))); > +typedef int v4si __attribute__ ((vector_size (16))); > +typedef float v4sf __attribute__ ((vector_size (16))); > +typedef short v8hi __attribute__ ((vector_size (16))); > +typedef char v16qi __attribute__ ((vector_size (16))); > + > +/* > +** f_v4hi: > +** fmov s0, s0 > +** ret > +*/ > +v4hi > +f_v4hi (v4hi x) > +{ > + return x & (v4hi){ 0xffff, 0xffff, 0, 0 }; > +} > + > +/* > +** g_v4hi: > +** movi d([0-9]+), 0xffff00000000ffff > +** and v0.8b, v0.8b, v\1.8b > +** ret > +*/ > +v4hi > +g_v4hi (v4hi x) > +{ > + return x & (v4hi){ 0xffff, 0, 0, 0xffff }; > +} > + > +/* > +** f_v8hi: > +** fmov s0, s0 > +** ret > +*/ > +v8hi > +f_v8hi (v8hi x) > +{ > + return x & (v8hi){ 0xffff, 0xffff, 0, 0, 0, 0, 0, 0 }; > +} > + > +/* > +** g_v8hi: > +** fmov d0, d0 > +** ret > +*/ > +v8hi > +g_v8hi (v8hi x) > +{ > + return x & (v8hi){ 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0 }; > +} > + > +/* > +** f_v2si: > +** fmov s0, s0 > +** ret > +*/ > +v2si > +f_v2si (v2si x) > +{ > + return x & (v2si){ 0xffffffff, 0 }; > +} > + > +/* > +** f_v2di: > +** fmov d0, d0 > +** ret > +*/ > +v2di > +f_v2di (v2di x) > +{ > + return x & (v2di){ 0xffffffffffffffff, 0 }; > +} > + > +/* > +** g_v2di: > +** fmov s0, s0 > +** ret > +*/ > +v2di > +g_v2di (v2di x) > +{ > + return x & (v2di){ 0xffffffff, 0 }; > +} > + > +/* > +** f_v4si: > +** fmov s0, s0 > +** ret > +*/ > +v4si > +f_v4si (v4si x) > +{ > + return x & (v4si){ 0xffffffff, 0, 0, 0 }; > +} > + > +/* > +** h_v4si: > +** fmov d0, d0 > +** ret > +*/ > +v4si > +h_v4si (v4si x) > +{ > + return x & (v4si){ 0xffffffff, 0xffffffff, 0, 0 }; > +} > + > +/* > +** f_v8qi: > +** fmov s0, s0 > +** ret > +*/ > +v8qi > +f_v8qi (v8qi x) > +{ > + return x & (v8qi){ 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0 }; > +} > + > +/* > +** f_v16qi: > +** fmov d0, d0 > +** ret > +*/ > +v16qi > +f_v16qi (v16qi x) > +{ > + return x & (v16qi){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, > + 0, 0, 0, 0, 0, 0, 0, 0 }; > +} > + > +/* > +** g_v16qi: > +** fmov s0, s0 > +** ret > +*/ > +v16qi > +g_v16qi (v16qi x) > +{ > + return x & (v16qi){ 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, > + 0, 0, 0, 0, 0, 0, 0, 0 }; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-2-be.c > b/gcc/testsuite/gcc.target/aarch64/fmov-2-be.c > new file mode 100644 > index 00000000000..1e38066b4cf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/fmov-2-be.c > @@ -0,0 +1,90 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mbig-endian" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +#pragma GCC target ("arch=armv8.2-a+fp16") > + > +typedef int v2si __attribute__ ((vector_size (8))); > +typedef short v4hi __attribute__ ((vector_size (8))); > +typedef char v8qi __attribute__ ((vector_size (8))); > +typedef long v2di __attribute__ ((vector_size (16))); > +typedef int v4si __attribute__ ((vector_size (16))); > +typedef short v8hi __attribute__ ((vector_size (16))); > +typedef char v16qi __attribute__ ((vector_size (16))); > + > +/* > +** f_v2di: > +** fmov h0, h0 > +** ret > +*/ > +v2di > +f_v2di (v2di x) > +{ > + return x & (v2di){ 0, 0xffff }; > +} > + > +/* > +** f_v4si: > +** fmov h0, h0 > +** ret > +*/ > +v4si > +f_v4si (v4si x) > +{ > + return x & (v4si){ 0, 0, 0, 0xffff }; > +} > + > +/* > +** f_v2si: > +** fmov h0, h0 > +** ret > +*/ > +v2si > +f_v2si (v2si x) > +{ > + return x & (v2si){ 0, 0xffff }; > +} > + > +/* > +** f_v8hi: > +** fmov h0, h0 > +** ret > +*/ > +v8hi > +f_v8hi (v8hi x) > +{ > + return x & (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0xffff }; > +} > + > +/* > +** f_v4hi: > +** fmov h0, h0 > +** ret > +*/ > +v4hi > +f_v4hi (v4hi x) > +{ > + return x & (v4hi){ 0, 0, 0, 0xffff }; > +} > + > +/* > +** f_v16qi: > +** fmov h0, h0 > +** ret > +*/ > +v16qi > +f_v16qi (v16qi x) > +{ > + return x & (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff }; > +} > + > +/* > +** f_v8qi: > +** fmov h0, h0 > +** ret > +*/ > +v8qi > +f_v8qi (v8qi x) > +{ > + return x & (v8qi){ 0, 0, 0, 0, 0, 0, 0xff, 0xff }; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-2-le.c > b/gcc/testsuite/gcc.target/aarch64/fmov-2-le.c > new file mode 100644 > index 00000000000..7627680a0b2 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/fmov-2-le.c > @@ -0,0 +1,90 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mlittle-endian" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +#pragma GCC target ("arch=armv8.2-a+fp16") > + > +typedef int v2si __attribute__ ((vector_size (8))); > +typedef short v4hi __attribute__ ((vector_size (8))); > +typedef char v8qi __attribute__ ((vector_size (8))); > +typedef long v2di __attribute__ ((vector_size (16))); > +typedef int v4si __attribute__ ((vector_size (16))); > +typedef short v8hi __attribute__ ((vector_size (16))); > +typedef char v16qi __attribute__ ((vector_size (16))); > + > +/* > +** f_v2di: > +** fmov h0, h0 > +** ret > +*/ > +v2di > +f_v2di (v2di x) > +{ > + return x & (v2di){ 0xffff, 0 }; > +} > + > +/* > +** f_v4si: > +** fmov h0, h0 > +** ret > +*/ > +v4si > +f_v4si (v4si x) > +{ > + return x & (v4si){ 0xffff, 0, 0, 0 }; > +} > + > +/* > +** f_v2si: > +** fmov h0, h0 > +** ret > +*/ > +v2si > +f_v2si (v2si x) > +{ > + return x & (v2si){ 0xffff, 0 }; > +} > + > +/* > +** f_v8hi: > +** fmov h0, h0 > +** ret > +*/ > +v8hi > +f_v8hi (v8hi x) > +{ > + return x & (v8hi){ 0xffff, 0, 0, 0, 0, 0, 0, 0 }; > +} > + > +/* > +** f_v4hi: > +** fmov h0, h0 > +** ret > +*/ > +v4hi > +f_v4hi (v4hi x) > +{ > + return x & (v4hi){ 0xffff, 0, 0, 0 }; > +} > + > +/* > +** f_v16qi: > +** fmov h0, h0 > +** ret > +*/ > +v16qi > +f_v16qi (v16qi x) > +{ > + return x & (v16qi){ 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; > +} > + > +/* > +** f_v8qi: > +** fmov h0, h0 > +** ret > +*/ > +v8qi > +f_v8qi (v8qi x) > +{ > + return x & (v8qi){ 0xff, 0xff, 0, 0, 0, 0, 0, 0 }; > +}