Re: [PATCH][AArch64] Cleanup move immediate code

2022-12-07 Thread Wilco Dijkstra via Gcc-patches
Hi Andreas,

Thanks for the report, I've committed the fix: 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108006

Cheers,
Wilco


Re: [PATCH][AArch64] Cleanup move immediate code

2022-12-07 Thread Andreas Schwab via Gcc-patches
FAIL: gcc.target/aarch64/sve/cond_arith_5.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/const_3.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/loop_add_5.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/mask_load_slp_1.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/mul_highpart_3.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/slp_13.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/slp_2.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/slp_8.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/slp_9.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/spill_4.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/spill_6.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/vcond_18.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/vcond_19.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/vcond_20.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/vcond_3.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/vcond_7.c (internal compiler error: in 
aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=c90 -O0 -g -DTEST_FULL 
(internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=c90 -O0 -g 
-DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=c90 -O1 -g -DTEST_FULL 
(internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=c90 -O1 -g 
-DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=c99 -O2 -g -DTEST_FULL 
(internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=c99 -O2 -g 
-DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=c11 -O3 -g -DTEST_FULL 
(internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=c11 -O3 -g 
-DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=gnu90 -O2 
-fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_FULL (internal compiler 
error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=gnu90 -O2 
-fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_OVERLOADS (internal 
compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=gnu99 -Ofast -g 
-DTEST_FULL (internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=gnu99 -Ofast -g 
-DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=gnu11 -Os -g -DTEST_FULL 
(internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c  -std=gnu11 -Os -g 
-DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c  -std=c90 -O0 -g -DTEST_FULL 
(internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c  -std=c90 -O0 -g 
-DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c  -std=c90 -O1 -g -DTEST_FULL 
(internal compiler error: in aarch64_move_imm, at 
config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c  -std=c90 -O1 -g 
-DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at 

Re: [PATCH][AArch64] Cleanup move immediate code

2022-12-05 Thread Richard Sandiford via Gcc-patches
Wilco Dijkstra  writes:
> Hi Richard,
>
>> -  scalar_int_mode imode = (mode == HFmode
>> -    ? SImode
>> -    : int_mode_for_mode (mode).require ());
>> +  machine_mode imode = (mode == DFmode) ? DImode : SImode;
>
>> It looks like this might mishandle DDmode, if not now then in the future.
>> Seems safer to use known_eq (GET_MODE_SIZE (mode), 8)
>
> I've changed that, but it does not matter for the narrow modes as the result
> will be identical - only DDmode might get costed incorrectly.
>
>> Sorry for not noticing last time, but: rather than have
>> aarch64_zeroextended_move_imm (which is quite a complicated test),
>> could we just add an extra (default off) parameter to aarch64_move_imm
>> that suppresses the (val >> 32) == 0 test?
>
> That makes things more complicated again - ultimately I'd like to get rid of 
> the
> mode parameter since most callers use a fixed mode, and ones that don't are
> now creating and passing fake modes...

I guess we'll have to agree to disagree on that one.

> I've change it like aarch64_move_imm and call aarch64_is_movz twice to
> check it is not a 64-bit MOVZ/MOVN.

Unlike with my suggestion, the result of the function is only meaningful
if the caller has checked for a valid move immediate first.  That is,
aarch64_zeroextended_move_imm takes as granted that the immediate is
valid for MOV Xn or MOV Wn, and the function returns false if the
immediate is valid for MOV Xn.  It seems like an odd way round to me,
since it inherently means checking the same thing twice.

Maybe a compromise would be to reverse the sense of the return value
(return true for MOV Xns rather than false) and call the function
something like aarch64_mov_xn_imm.  All callers could cope with that,
and:

(define_constraint "N"
  "A constant that can be used with a 64-bit MOV immediate operation."
  (and (match_code "const_int")
   (match_test "aarch64_move_imm (ival, DImode)")
   (match_test "!aarch64_zeroextended_move_imm (ival)")))

would become simply:

(define_constraint "N"
  "A constant that can be used with a 64-bit MOV immediate operation."
  (and (match_code "const_int")
   (match_test "aarch64_mov_xn_imm (ival)")))

I'm still not too happy with the duplication, since the structure of
aarch64_mov_xn_imm is obviously aarch64_move_imm with some bits taken
out.  If (somehow) one function learns a new trick, that trick would
need to be duplicated in the other function.  But like I say, I think
we'll have to agree to disagree on that.

So the patch is OK with the aarch64_mov_xn_imm change suggested above,
or let me know if you disagree.

Thanks,
Richard

>
> Cheers,
> Wilco
>
> v3: Use aarch64_is_movz, use known_eq
>
> Simplify, refactor and improve various move immediate functions.
> Allow 32-bit MOVI/N as a valid 64-bit immediate which removes special
> cases in aarch64_internal_mov_immediate.  Add new constraint so the movdi
> pattern only needs a single alternative for move immediate.
>
> Passes bootstrap and regress, OK for commit?
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
> (aarch64_zeroextended_move_imm): New function.
> (aarch64_move_imm): Refactor, assert mode is SImode or DImode.
> (aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
> Simplify special cases.
> (aarch64_uimm12_shift): Simplify code.
> (aarch64_clamp_to_uimm12_shift): Likewise.
> (aarch64_movw_imm): Rename to aarch64_is_movz.
> (aarch64_float_const_rtx_p): Pass either SImode or DImode to
> aarch64_internal_mov_immediate.
> (aarch64_rtx_costs): Likewise.
> * config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
> constraints into single 'O'.
> (mov_aarch64): Likewise.
> * config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
> (aarch64_bitmask_imm): Likewise.
> (aarch64_uimm12_shift): Likewise.
> (aarch64_zeroextended_move_imm): New prototype.
> * config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
> limit 'N' to 64-bit only moves.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 
> 4be93c93c26e091f878bc8e4cf06e90888405fb2..8bce6ec7599edcc2e6a1d8006450f35c0ce7f61f
>  100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -756,7 +756,7 @@ void aarch64_post_cfi_startproc (void);
>  poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
>  int aarch64_get_condition_code (rtx);
>  bool aarch64_address_valid_for_prefetch_p (rtx, bool);
> -bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
> +bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
>  unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
>  unsigned HOST_WIDE_INT aarch64_and_split_imm2 

Re: [PATCH][AArch64] Cleanup move immediate code

2022-12-05 Thread Wilco Dijkstra via Gcc-patches
Hi Richard,

> -  scalar_int_mode imode = (mode == HFmode
> -    ? SImode
> -    : int_mode_for_mode (mode).require ());
> +  machine_mode imode = (mode == DFmode) ? DImode : SImode;

> It looks like this might mishandle DDmode, if not now then in the future.
> Seems safer to use known_eq (GET_MODE_SIZE (mode), 8)

I've changed that, but it does not matter for the narrow modes as the result
will be identical - only DDmode might get costed incorrectly.

> Sorry for not noticing last time, but: rather than have
> aarch64_zeroextended_move_imm (which is quite a complicated test),
> could we just add an extra (default off) parameter to aarch64_move_imm
> that suppresses the (val >> 32) == 0 test?

That makes things more complicated again - ultimately I'd like to get rid of the
mode parameter since most callers use a fixed mode, and ones that don't are
now creating and passing fake modes... I've change it like aarch64_move_imm
and call aarch64_is_movz twice to check it is not a 64-bit MOVZ/MOVN.

Cheers,
Wilco

v3: Use aarch64_is_movz, use known_eq

Simplify, refactor and improve various move immediate functions.
Allow 32-bit MOVI/N as a valid 64-bit immediate which removes special
cases in aarch64_internal_mov_immediate.  Add new constraint so the movdi
pattern only needs a single alternative for move immediate.

Passes bootstrap and regress, OK for commit?

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
(aarch64_zeroextended_move_imm): New function.
(aarch64_move_imm): Refactor, assert mode is SImode or DImode.
(aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
Simplify special cases.
(aarch64_uimm12_shift): Simplify code.
(aarch64_clamp_to_uimm12_shift): Likewise.
(aarch64_movw_imm): Rename to aarch64_is_movz.
(aarch64_float_const_rtx_p): Pass either SImode or DImode to
aarch64_internal_mov_immediate.
(aarch64_rtx_costs): Likewise.
* config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
constraints into single 'O'.
(mov_aarch64): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
(aarch64_bitmask_imm): Likewise.
(aarch64_uimm12_shift): Likewise.
(aarch64_zeroextended_move_imm): New prototype.
* config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
limit 'N' to 64-bit only moves.

---

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
4be93c93c26e091f878bc8e4cf06e90888405fb2..8bce6ec7599edcc2e6a1d8006450f35c0ce7f61f
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -756,7 +756,7 @@ void aarch64_post_cfi_startproc (void);
 poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
 int aarch64_get_condition_code (rtx);
 bool aarch64_address_valid_for_prefetch_p (rtx, bool);
-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
 unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
 unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
 bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode 
mode);
@@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, 
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
 bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
+bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
 machine_mode aarch64_sve_int_mode (machine_mode);
 opt_machine_mode aarch64_sve_pred_mode (unsigned int);
 machine_mode aarch64_sve_pred_mode (machine_mode);
@@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
 bool aarch64_sve_float_mul_immediate_p (rtx);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
-bool aarch64_uimm12_shift (HOST_WIDE_INT);
+bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
 int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
+bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
 bool aarch64_use_return_insn_p (void);
 const char *aarch64_output_casesi (rtx *);
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
a73741800c963ee6605fd2cfa918f4399da4bfdf..00269632eeb52c29ba2011c4c82274968b850d71
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5625,12 +5625,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
 
 /* Return true if VAL is a valid bitmask immediate for MODE.  */
 bool
-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
+aarch64_bitmask_imm (unsigned HOST_WIDE_INT 

Re: [PATCH][AArch64] Cleanup move immediate code

2022-12-01 Thread Richard Sandiford via Gcc-patches
Wilco Dijkstra  writes:
> Hi Richard,
>
>> Just to make sure I understand: isn't it really just MOVN?  I would have
>> expected a 32-bit MOVZ to be equivalent to (and add no capabilities over)
>> a 64-bit MOVZ.
>
> The 32-bit MOVZ immediates are equivalent, MOVN never overlaps, and
> MOVI has some overlaps . Since we allow all 3 variants, the 2 alternatives
> in the movdi pattern are overlapping for MOVZ and MOVI immediates.
>
>> I agree the ctz trick is more elegant than (and an improvement over)
>> the current approach to testing for movz.  But I think the overall logic
>> is harder to follow than it was in the original version.  Initially
>> canonicalising val2 based on the sign bit seems unintuitive since we
>> still need to handle all four combinations of (top bit set, top bit clear)
>> x (low 48 bits set, low 48 bits clear).  I preferred the original
>> approach of testing once with the original value (for MOVZ) and once
>> with the inverted value (for MOVN).
>
> Yes, the canonicalization on the sign ends up requiring 2 special cases.
> Handling the MOVZ case first and then MOVN does avoid that, and makes
> things simpler overall, so I've used that approach in v2.
>
>> Don't the new cases boil down to: if mode is DImode and the upper 32 bits
>> are clear, we can test based on SImode instead?  In other words, couldn't
>> the "(val >> 32) == 0" part of the final test be done first, with the
>> effect of changing the mode to SImode?  Something like:
>
> Yes that works. I used masking of the top bits to avoid repeatedly testing the
> same condition. The new version removes most special cases and ends up
> both smaller and simpler:
>
>
> v2: Simplify the special cases in aarch64_move_imm, use aarch64_is_movz.
>
> Simplify, refactor and improve various move immediate functions.
> Allow 32-bit MOVZ/I/N as a valid 64-bit immediate which removes special
> cases in aarch64_internal_mov_immediate.  Add new constraint so the movdi
> pattern only needs a single alternative for move immediate.
>
> Passes bootstrap and regress, OK for commit?
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
> (aarch64_zeroextended_move_imm): New function.
> (aarch64_move_imm): Refactor, assert mode is SImode or DImode.
> (aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
> Simplify special cases.
> (aarch64_uimm12_shift): Simplify code.
> (aarch64_clamp_to_uimm12_shift): Likewise.
> (aarch64_movw_imm): Rename to aarch64_is_movz.
> (aarch64_float_const_rtx_p): Pass either SImode or DImode to
> aarch64_internal_mov_immediate.
> (aarch64_rtx_costs): Likewise.
> * config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
> constraints into single 'O'.
> (mov_aarch64): Likewise.
> * config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
> (aarch64_bitmask_imm): Likewise.
> (aarch64_uimm12_shift): Likewise.
> (aarch64_zeroextended_move_imm): New prototype.
> * config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
> limit 'N' to 64-bit only moves.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 
> 4be93c93c26e091f878bc8e4cf06e90888405fb2..8bce6ec7599edcc2e6a1d8006450f35c0ce7f61f
>  100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -756,7 +756,7 @@ void aarch64_post_cfi_startproc (void);
>  poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
>  int aarch64_get_condition_code (rtx);
>  bool aarch64_address_valid_for_prefetch_p (rtx, bool);
> -bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
> +bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
>  unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
>  unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
>  bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode 
> mode);
> @@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, 
> unsigned HOST_WIDE_INT,
>   unsigned HOST_WIDE_INT,
>   unsigned HOST_WIDE_INT);
>  bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
> -bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
> +bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
>  machine_mode aarch64_sve_int_mode (machine_mode);
>  opt_machine_mode aarch64_sve_pred_mode (unsigned int);
>  machine_mode aarch64_sve_pred_mode (machine_mode);
> @@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
>  bool aarch64_sve_float_mul_immediate_p (rtx);
>  bool aarch64_split_dimode_const_store (rtx, rtx);
>  bool aarch64_symbolic_address_p (rtx);
> -bool aarch64_uimm12_shift (HOST_WIDE_INT);
> +bool aarch64_uimm12_shift 

Re: [PATCH][AArch64] Cleanup move immediate code

2022-11-29 Thread Wilco Dijkstra via Gcc-patches
Hi Richard,

> Just to make sure I understand: isn't it really just MOVN?  I would have
> expected a 32-bit MOVZ to be equivalent to (and add no capabilities over)
> a 64-bit MOVZ.

The 32-bit MOVZ immediates are equivalent, MOVN never overlaps, and
MOVI has some overlaps . Since we allow all 3 variants, the 2 alternatives
in the movdi pattern are overlapping for MOVZ and MOVI immediates.

> I agree the ctz trick is more elegant than (and an improvement over)
> the current approach to testing for movz.  But I think the overall logic
> is harder to follow than it was in the original version.  Initially
> canonicalising val2 based on the sign bit seems unintuitive since we
> still need to handle all four combinations of (top bit set, top bit clear)
> x (low 48 bits set, low 48 bits clear).  I preferred the original
> approach of testing once with the original value (for MOVZ) and once
> with the inverted value (for MOVN).

Yes, the canonicalization on the sign ends up requiring 2 special cases.
Handling the MOVZ case first and then MOVN does avoid that, and makes
things simpler overall, so I've used that approach in v2.

> Don't the new cases boil down to: if mode is DImode and the upper 32 bits
> are clear, we can test based on SImode instead?  In other words, couldn't
> the "(val >> 32) == 0" part of the final test be done first, with the
> effect of changing the mode to SImode?  Something like:

Yes that works. I used masking of the top bits to avoid repeatedly testing the
same condition. The new version removes most special cases and ends up
both smaller and simpler:


v2: Simplify the special cases in aarch64_move_imm, use aarch64_is_movz.

Simplify, refactor and improve various move immediate functions.
Allow 32-bit MOVZ/I/N as a valid 64-bit immediate which removes special
cases in aarch64_internal_mov_immediate.  Add new constraint so the movdi
pattern only needs a single alternative for move immediate.

Passes bootstrap and regress, OK for commit?

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
(aarch64_zeroextended_move_imm): New function.
(aarch64_move_imm): Refactor, assert mode is SImode or DImode.
(aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
Simplify special cases.
(aarch64_uimm12_shift): Simplify code.
(aarch64_clamp_to_uimm12_shift): Likewise.
(aarch64_movw_imm): Rename to aarch64_is_movz.
(aarch64_float_const_rtx_p): Pass either SImode or DImode to
aarch64_internal_mov_immediate.
(aarch64_rtx_costs): Likewise.
* config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
constraints into single 'O'.
(mov_aarch64): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
(aarch64_bitmask_imm): Likewise.
(aarch64_uimm12_shift): Likewise.
(aarch64_zeroextended_move_imm): New prototype.
* config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
limit 'N' to 64-bit only moves.

---

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
4be93c93c26e091f878bc8e4cf06e90888405fb2..8bce6ec7599edcc2e6a1d8006450f35c0ce7f61f
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -756,7 +756,7 @@ void aarch64_post_cfi_startproc (void);
 poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
 int aarch64_get_condition_code (rtx);
 bool aarch64_address_valid_for_prefetch_p (rtx, bool);
-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
 unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
 unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
 bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode 
mode);
@@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, 
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
 bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
+bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
 machine_mode aarch64_sve_int_mode (machine_mode);
 opt_machine_mode aarch64_sve_pred_mode (unsigned int);
 machine_mode aarch64_sve_pred_mode (machine_mode);
@@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
 bool aarch64_sve_float_mul_immediate_p (rtx);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
-bool aarch64_uimm12_shift (HOST_WIDE_INT);
+bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
 int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
+bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
 bool 

Re: [PATCH][AArch64] Cleanup move immediate code

2022-11-24 Thread Richard Sandiford via Gcc-patches
Sorry for the very long delay in reviewing this.

Wilco Dijkstra  writes:
> Hi Richard,
>
> Here is the immediate cleanup splitoff from the previous patch:
>
> Simplify, refactor and improve various move immediate functions.
> Allow 32-bit MOVZ/N as a valid 64-bit immediate which removes special
> cases in aarch64_internal_mov_immediate.  Add new constraint so the movdi
> pattern only needs a single alternative for move immediate.

Just to make sure I understand: isn't it really just MOVN?  I would have
expected a 32-bit MOVZ to be equivalent to (and add no capabilities over)
a 64-bit MOVZ.

> Passes bootstrap and regress, OK for commit?
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
> (aarch64_zeroextended_move_imm): New function.
> (aarch64_move_imm): Refactor, assert mode is SImode or DImode.
> (aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
> Simplify special cases.
> (aarch64_uimm12_shift): Simplify code.
> (aarch64_clamp_to_uimm12_shift): Likewise.
> (aarch64_movw_imm): Remove.
> (aarch64_float_const_rtx_p): Pass either SImode or DImode to
> aarch64_internal_mov_immediate.
> (aarch64_rtx_costs): Likewise.
> * config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
> constraints into single 'O'.
> (mov_aarch64): Likewise.
> * config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
> (aarch64_bitmask_imm): Likewise.
> (aarch64_uimm12_shift): Likewise.
> (aarch64_zeroextended_move_imm): New prototype.
> * config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
> limit 'N' to 64-bit only moves.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 
> 1a71f02284137c64e7115b26e6aa00447596f105..a73bfa20acb9b92ae0475794c3f11c67d22feb97
>  100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void);
>  poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
>  int aarch64_get_condition_code (rtx);
>  bool aarch64_address_valid_for_prefetch_p (rtx, bool);
> -bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
> +bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
>  unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
>  unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
>  bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode 
> mode);
> @@ -792,7 +792,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, 
> unsigned HOST_WIDE_INT,
> unsigned HOST_WIDE_INT,
> unsigned HOST_WIDE_INT);
>  bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
> -bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
> +bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
>  machine_mode aarch64_sve_int_mode (machine_mode);
>  opt_machine_mode aarch64_sve_pred_mode (unsigned int);
>  machine_mode aarch64_sve_pred_mode (machine_mode);
> @@ -842,8 +842,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
>  bool aarch64_sve_float_mul_immediate_p (rtx);
>  bool aarch64_split_dimode_const_store (rtx, rtx);
>  bool aarch64_symbolic_address_p (rtx);
> -bool aarch64_uimm12_shift (HOST_WIDE_INT);
> +bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
>  int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
> +bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
>  bool aarch64_use_return_insn_p (void);
>  const char *aarch64_output_casesi (rtx *);
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> 5d1ab5aa42b2cda0a655d2bc69c4df19da457ab3..798363bcc449c414de5bbb4f26b8e1c64a0cf71a
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -5558,12 +5558,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
>
>  /* Return true if VAL is a valid bitmask immediate for MODE.  */
>  bool
> -aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
> +aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
>  {
>if (mode == DImode)
> -return aarch64_bitmask_imm (val_in);
> -
> -  unsigned HOST_WIDE_INT val = val_in;
> +return aarch64_bitmask_imm (val);
>
>if (mode == SImode)
>  return aarch64_bitmask_imm ((val & 0x) | (val << 32));
> @@ -5602,51 +5600,60 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
>  }
>
>
> -/* Return true if val is an immediate that can be loaded into a
> -   register by a MOVZ instruction.  */
> -static bool
> -aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
> +/* Return true if immediate VAL can only be created by using a 32-bit
> +   zero-extended move immediate, 

[PATCH][AArch64] Cleanup move immediate code

2022-11-01 Thread Wilco Dijkstra via Gcc-patches
Hi Richard,

Here is the immediate cleanup splitoff from the previous patch:

Simplify, refactor and improve various move immediate functions.
Allow 32-bit MOVZ/N as a valid 64-bit immediate which removes special
cases in aarch64_internal_mov_immediate.  Add new constraint so the movdi
pattern only needs a single alternative for move immediate.

Passes bootstrap and regress, OK for commit?

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
(aarch64_zeroextended_move_imm): New function.
(aarch64_move_imm): Refactor, assert mode is SImode or DImode.
(aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
Simplify special cases.
(aarch64_uimm12_shift): Simplify code.
(aarch64_clamp_to_uimm12_shift): Likewise.
(aarch64_movw_imm): Remove.
(aarch64_float_const_rtx_p): Pass either SImode or DImode to
aarch64_internal_mov_immediate.
(aarch64_rtx_costs): Likewise.
* config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
constraints into single 'O'.
(mov_aarch64): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
(aarch64_bitmask_imm): Likewise.
(aarch64_uimm12_shift): Likewise.
(aarch64_zeroextended_move_imm): New prototype.
* config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
limit 'N' to 64-bit only moves.

---

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
1a71f02284137c64e7115b26e6aa00447596f105..a73bfa20acb9b92ae0475794c3f11c67d22feb97
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void);
 poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
 int aarch64_get_condition_code (rtx);
 bool aarch64_address_valid_for_prefetch_p (rtx, bool);
-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
 unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
 unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
 bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode 
mode);
@@ -792,7 +792,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, 
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
 bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
+bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
 machine_mode aarch64_sve_int_mode (machine_mode);
 opt_machine_mode aarch64_sve_pred_mode (unsigned int);
 machine_mode aarch64_sve_pred_mode (machine_mode);
@@ -842,8 +842,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
 bool aarch64_sve_float_mul_immediate_p (rtx);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
-bool aarch64_uimm12_shift (HOST_WIDE_INT);
+bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
 int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
+bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
 bool aarch64_use_return_insn_p (void);
 const char *aarch64_output_casesi (rtx *);
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
5d1ab5aa42b2cda0a655d2bc69c4df19da457ab3..798363bcc449c414de5bbb4f26b8e1c64a0cf71a
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5558,12 +5558,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
 
 /* Return true if VAL is a valid bitmask immediate for MODE.  */
 bool
-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
+aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
 {
   if (mode == DImode)
-return aarch64_bitmask_imm (val_in);
-
-  unsigned HOST_WIDE_INT val = val_in;
+return aarch64_bitmask_imm (val);
 
   if (mode == SImode)
 return aarch64_bitmask_imm ((val & 0x) | (val << 32));
@@ -5602,51 +5600,60 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
 }
 
 
-/* Return true if val is an immediate that can be loaded into a
-   register by a MOVZ instruction.  */
-static bool
-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
+/* Return true if immediate VAL can only be created by using a 32-bit
+   zero-extended move immediate, not by a 64-bit move.  */
+bool
+aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT val)
 {
-  if (GET_MODE_SIZE (mode) > 4)
-{
-  if ((val & (((HOST_WIDE_INT) 0x) << 32)) == val
-  || (val & (((HOST_WIDE_INT) 0x) << 48)) == val)
-   return 1;
-}
-  else
-{
-  /* Ignore sign extension.  */
-  val &= (HOST_WIDE_INT) 0x;
-}
-  return ((val & (((HOST_WIDE_INT) 0x)