Tamar Christina <tamar.christ...@arm.com> writes:
> Hi All,
>
> Our zero and sign extend and extract patterns are currently very limited and
> only work for the original register size of the instructions. i.e. limited by
> GPI patterns.  However these instructions extract bits and extend.  This means
> that any register size can be used as an input as long as the extraction makes
> logical sense.
>
> The majority of the attached testcases fail currently to optimize.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-simd.md (aarch64_get_lane<mode>): Drop reload
>       penalty.
>       * config/aarch64/aarch64.md
>       (*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>): Renamed to...
>       (*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>): ...this.
>       (*zero_extend<GPI:mode>_lshr<SHORT:mode>): Renamed to...
>       (*zero_extend<GPI:mode>_<optab><ALLX:mode>): ...this.
>       (*extend<GPI:mode>_ashr<SHORT:mode>): Rename to...
>       (*extend<GPI:mode>_<optab><ALLX:mode>): ...this.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/bitmove_1.c: New test.
>       * gcc.target/aarch64/bitmove_2.c: New test.

Looks like a nice change, but some comments below.

>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 8bcc9e76b1cad4a2591fb176175db72d7a190d57..23909c62638b49722568da4555b33c71fd21337e
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -4259,7 +4259,7 @@ (define_insn 
> "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
>  ;; Extracting lane zero is split into a simple move when it is between SIMD
>  ;; registers or a store.
>  (define_insn_and_split "aarch64_get_lane<mode>"
> -  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, 
> Utv")
> +  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, 
> Utv")
>       (vec_select:<VEL>
>         (match_operand:VALL_F16_FULL 1 "register_operand" "w, w, w")
>         (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]

Which testcase does this help with?  It didn't look like the new tests
do any vector stuff.

> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 85b400489cb382a01b0c469eff2b600a93805e31..3116feda4fe54e2a21dc3f990b6976d216874260
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -5629,13 +5629,13 @@ (define_insn "*<optab>si3_insn2_uxtw"
>  )
>  
>  (define_insn "*<optab><mode>3_insn"
> -  [(set (match_operand:SHORT 0 "register_operand" "=r")
> -     (ASHIFT:SHORT (match_operand:SHORT 1 "register_operand" "r")
> +  [(set (match_operand:ALLI 0 "register_operand" "=r")
> +     (ASHIFT:ALLI (match_operand:ALLI 1 "register_operand" "r")
>                     (match_operand 2 "const_int_operand" "n")))]
>    "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
>  {
>    operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2]));
> -  return "<bfshift>\t%w0, %w1, %2, %3";
> +  return "<bfshift>\t%<w>0, %<w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
>  )

Similar question here I guess.  There's a separate pattern for SI and DI
shifts, so I wouldn't have expected this to be necessary.

> @@ -5710,40 +5710,40 @@ (define_insn "*extrsi5_insn_di"
>    [(set_attr "type" "rotate_imm")]
>  )
>  
> -(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<SHORT:mode>"
> +(define_insn "*<ANY_EXTEND:optab><GPI:mode>_ashl<ALLX:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>       (ANY_EXTEND:GPI
> -      (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r")
> +      (ashift:ALLX (match_operand:ALLX 1 "register_operand" "r")
>                      (match_operand 2 "const_int_operand" "n"))))]
> -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
> +  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"

It'd be better to avoid even defining si<-si or si<-di "extensions"
(even though nothing should try to match them), so how about adding:

  <GPI:sizen> > <ALLX:sizen> && 

or similar to the beginning of the condition?  The conditions for
the invalid combos will then be provably false at compile time and
the patterns will be compiled out.

Same comment for the others.

>  {
> -  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
> +  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
>    return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
>  )
>  
> -(define_insn "*zero_extend<GPI:mode>_lshr<SHORT:mode>"
> +(define_insn "*zero_extend<GPI:mode>_<optab><ALLX:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>       (zero_extend:GPI
> -      (lshiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
> -                      (match_operand 2 "const_int_operand" "n"))))]
> -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
> +      (LSHIFTRT_ONLY:ALLX (match_operand:ALLX 1 "register_operand" "r")
> +                          (match_operand 2 "const_int_operand" "n"))))]
> +  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
>  {
> -  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
> +  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
>    return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
>  )

I think it'd better to stick to the hard-coded lshiftrt, since nothing
in the asm template is parameterised by the operation.  Using single-item
iterators is only really useful for "@" patterns.

Likewise for the ashiftrt pattern.

Thanks,
Richard

> -(define_insn "*extend<GPI:mode>_ashr<SHORT:mode>"
> +(define_insn "*extend<GPI:mode>_<optab><ALLX:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>       (sign_extend:GPI
> -      (ashiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r")
> -                      (match_operand 2 "const_int_operand" "n"))))]
> -  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<SHORT:MODE>mode)"
> +      (ASHIFTRT_ONLY:ALLX (match_operand:ALLX 1 "register_operand" "r")
> +                          (match_operand 2 "const_int_operand" "n"))))]
> +  "UINTVAL (operands[2]) < GET_MODE_BITSIZE (<ALLX:MODE>mode)"
>  {
> -  operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2]));
> +  operands[3] = GEN_INT (<ALLX:sizen> - UINTVAL (operands[2]));
>    return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3";
>  }
>    [(set_attr "type" "bfx")]
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_1.c 
> b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..8b0aa8af49cd070928bacc4995a321c7bfde58a6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitmove_1.c
> @@ -0,0 +1,76 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -std=c99" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +#include <stdint.h>
> +
> +/*
> +** sfoo6:
> +**   asr     x0, x0, 16
> +**   ret
> +*/
> +int64_t sfoo6 (int32_t x)
> +{
> +  return x >> 16;
> +}
> +
> +/*
> +** ufoo6:
> +**   lsr     w0, w0, 30
> +**   ret
> +*/
> +uint64_t ufoo6 (uint32_t x)
> +{
> +  return x >> 30;
> +}
> +
> +/*
> +** ufoo6s:
> +**   ubfx    w0, w0, 7, 9
> +**   ret
> +*/
> +uint32_t ufoo6s (uint16_t x)
> +{
> +  return x >> 7;
> +}
> +
> +/*
> +** ufoo6h:
> +**   ubfx    w0, w0, 4, 4
> +**   ret
> +*/
> +uint16_t ufoo6h (uint8_t x)
> +{
> +  return x >> 4;
> +}
> +
> +/*
> +** sfoo62:
> +**   asr     x0, x0, 10
> +**   ret
> +*/
> +int64_t sfoo62 (int32_t x)
> +{
> +  return x >> 10;
> +}
> +
> +/*
> +** ufoo62:
> +**   lsr     w0, w0, 10
> +**   ret
> +*/
> +uint64_t ufoo62 (uint32_t x)
> +{
> +  return x >> 10;
> +}
> +
> +/*
> +** sfoo63:
> +**   asr     x0, x0, 10
> +**   ret
> +*/
> +int64_t sfoo63 (int32_t x)
> +{
> +  return x >> 10;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitmove_2.c 
> b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..54b3071a3b4e2001f83337837e712c381683d23a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitmove_2.c
> @@ -0,0 +1,76 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -std=c99" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +#include <stdint.h>
> +
> +/*
> +** sfoo6:
> +**   sbfiz   x0, x0, 16, 16
> +**   ret
> +*/
> +int64_t sfoo6 (int32_t x)
> +{
> +  return x << 16;
> +}
> +
> +/*
> +** ufoo6:
> +**   lsl     w0, w0, 30
> +**   ret
> +*/
> +uint64_t ufoo6 (uint32_t x)
> +{
> +  return x << 30;
> +}
> +
> +/*
> +** ufoo6s:
> +**   ubfiz   w0, w0, 7, 16
> +**   ret
> +*/
> +uint32_t ufoo6s (uint16_t x)
> +{
> +  return x << 7;
> +}
> +
> +/*
> +** ufoo6h:
> +**   uxtb    w0, w0
> +**   ubfiz   w0, w0, 4, 12
> +**   ret
> +*/
> +uint16_t ufoo6h (uint8_t x)
> +{
> +  return x << 4;
> +}
> +
> +/*
> +** sfoo62:
> +**   sbfiz   x0, x0, 10, 22
> +**   ret
> +*/
> +int64_t sfoo62 (int32_t x)
> +{
> +  return x << 10;
> +}
> +
> +/*
> +** ufoo62:
> +**   lsl     w0, w0, 10
> +**   ret
> +*/
> +uint64_t ufoo62 (uint32_t x)
> +{
> +  return x << 10;
> +}
> +
> +/*
> +** sfoo63:
> +**   sbfiz   x0, x0, 10, 22
> +**   ret
> +*/
> +int64_t sfoo63 (int32_t x)
> +{
> +  return x << 10;
> +}

Reply via email to