Re: [PATCH 2/4]AArch64: add new tuning param and attribute for enabling conditional early clobber
Tamar Christina writes: >> -Original Message- >> From: Tamar Christina >> Sent: Wednesday, May 22, 2024 10:29 AM >> To: Richard Sandiford >> Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw >> ; Marcus Shawcroft >> ; ktkac...@gcc.gnu.org >> Subject: RE: [PATCH 2/4]AArch64: add new tuning param and attribute for >> enabling conditional early clobber >> >> > >> > Sorry for the bike-shedding, but how about something like "avoid_pred_rmw"? >> > (I'm open to other suggestions.) Just looking for something that describes >> > either the architecture or the end result that we want to achieve. >> > And preferable something fairly short :) >> > >> > avoid_* would be consistent with the existing "avoid_cross_loop_fma". >> > >> > > + >> > > #undef AARCH64_EXTRA_TUNING_OPTION >> > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h >> > > index >> > >> bbf11faaf4b4340956094a983f8b0dc2649b2d27..76a18dd511f40ebb58ed12d5 >> > 6b46c74084ba7c3c 100644 >> > > --- a/gcc/config/aarch64/aarch64.h >> > > +++ b/gcc/config/aarch64/aarch64.h >> > > @@ -495,6 +495,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = >> > AARCH64_FL_SM_OFF; >> > > enabled through +gcs. */ >> > > #define TARGET_GCS (AARCH64_ISA_GCS) >> > > >> > > +/* Prefer different predicate registers for the output of a predicated >> > > operation >> > over >> > > +re-using an existing input predicate. */ >> > > +#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ >> > > + && (aarch64_tune_params.extra_tuning_flags \ >> > > + & >> > AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST)) >> > > >> > > /* Standard register usage. */ >> > > >> > > diff --git a/gcc/config/aarch64/aarch64.md >> > > b/gcc/config/aarch64/aarch64.md >> > > index >> > >> dbde066f7478bec51a8703b017ea553aa98be309..1ecd1a2812969504bd5114a >> > 53473b478c5ddba82 100644 >> > > --- a/gcc/config/aarch64/aarch64.md >> > > +++ b/gcc/config/aarch64/aarch64.md >> > > @@ -445,6 +445,10 @@ (define_enum_attr "arch" "arches" (const_string >> > "any")) >> > > ;; target-independent code. >> > > (define_attr "is_call" "no,yes" (const_string "no")) >> > > >> > > +;; Indicates whether we want to enable the pattern with an optional >> > > early >> > > +;; clobber for SVE predicates. >> > > +(define_attr "pred_clobber" "no,yes" (const_string "no")) >> > > + >> > > ;; [For compatibility with Arm in pipeline models] >> > > ;; Attribute that specifies whether or not the instruction touches fp >> > > ;; registers. >> > > @@ -461,7 +465,8 @@ (define_attr "fp" "no,yes" >> > > (define_attr "arch_enabled" "no,yes" >> > >(if_then_else >> > > (ior >> > > - (eq_attr "arch" "any") >> > > + (and (eq_attr "arch" "any") >> > > + (eq_attr "pred_clobber" "no")) >> > > >> > > (and (eq_attr "arch" "rcpc8_4") >> > >(match_test "AARCH64_ISA_RCPC8_4")) >> > > @@ -488,7 +493,10 @@ (define_attr "arch_enabled" "no,yes" >> > >(match_test "TARGET_SVE")) >> > > >> > > (and (eq_attr "arch" "sme") >> > > - (match_test "TARGET_SME"))) >> > > + (match_test "TARGET_SME")) >> > > + >> > > + (and (eq_attr "pred_clobber" "yes") >> > > + (match_test "TARGET_SVE_PRED_CLOBBER"))) >> > >> > IMO it'd be bettero handle pred_clobber separately from arch, as a new >> > top-level AND: >> > >> > (and >> > (ior >> > (eq_attr "pred_clobber" "no") >> > (match_test "!TARGET_...")) >> > (ior >> > ...existing arch tests...)) >> > >> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for mas
RE: [PATCH 2/4]AArch64: add new tuning param and attribute for enabling conditional early clobber
> -Original Message- > From: Tamar Christina > Sent: Wednesday, May 22, 2024 10:29 AM > To: Richard Sandiford > Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw > ; Marcus Shawcroft > ; ktkac...@gcc.gnu.org > Subject: RE: [PATCH 2/4]AArch64: add new tuning param and attribute for > enabling conditional early clobber > > > > > Sorry for the bike-shedding, but how about something like "avoid_pred_rmw"? > > (I'm open to other suggestions.) Just looking for something that describes > > either the architecture or the end result that we want to achieve. > > And preferable something fairly short :) > > > > avoid_* would be consistent with the existing "avoid_cross_loop_fma". > > > > > + > > > #undef AARCH64_EXTRA_TUNING_OPTION > > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > > > index > > > bbf11faaf4b4340956094a983f8b0dc2649b2d27..76a18dd511f40ebb58ed12d5 > > 6b46c74084ba7c3c 100644 > > > --- a/gcc/config/aarch64/aarch64.h > > > +++ b/gcc/config/aarch64/aarch64.h > > > @@ -495,6 +495,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = > > AARCH64_FL_SM_OFF; > > > enabled through +gcs. */ > > > #define TARGET_GCS (AARCH64_ISA_GCS) > > > > > > +/* Prefer different predicate registers for the output of a predicated > > > operation > > over > > > +re-using an existing input predicate. */ > > > +#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ > > > + && (aarch64_tune_params.extra_tuning_flags \ > > > + & > > AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST)) > > > > > > /* Standard register usage. */ > > > > > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > > > index > > > dbde066f7478bec51a8703b017ea553aa98be309..1ecd1a2812969504bd5114a > > 53473b478c5ddba82 100644 > > > --- a/gcc/config/aarch64/aarch64.md > > > +++ b/gcc/config/aarch64/aarch64.md > > > @@ -445,6 +445,10 @@ (define_enum_attr "arch" "arches" (const_string > > "any")) > > > ;; target-independent code. > > > (define_attr "is_call" "no,yes" (const_string "no")) > > > > > > +;; Indicates whether we want to enable the pattern with an optional early > > > +;; clobber for SVE predicates. > > > +(define_attr "pred_clobber" "no,yes" (const_string "no")) > > > + > > > ;; [For compatibility with Arm in pipeline models] > > > ;; Attribute that specifies whether or not the instruction touches fp > > > ;; registers. > > > @@ -461,7 +465,8 @@ (define_attr "fp" "no,yes" > > > (define_attr "arch_enabled" "no,yes" > > >(if_then_else > > > (ior > > > - (eq_attr "arch" "any") > > > + (and (eq_attr "arch" "any") > > > + (eq_attr "pred_clobber" "no")) > > > > > > (and (eq_attr "arch" "rcpc8_4") > > >(match_test "AARCH64_ISA_RCPC8_4")) > > > @@ -488,7 +493,10 @@ (define_attr "arch_enabled" "no,yes" > > >(match_test "TARGET_SVE")) > > > > > > (and (eq_attr "arch" "sme") > > > - (match_test "TARGET_SME"))) > > > + (match_test "TARGET_SME")) > > > + > > > + (and (eq_attr "pred_clobber" "yes") > > > + (match_test "TARGET_SVE_PRED_CLOBBER"))) > > > > IMO it'd be bettero handle pred_clobber separately from arch, as a new > > top-level AND: > > > > (and > > (ior > > (eq_attr "pred_clobber" "no") > > (match_test "!TARGET_...")) > > (ior > > ...existing arch tests...)) > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-tuning-flags.def (AVOID_PRED_RMW): New. * config/aarch64/aarch64.h (TARGET_SVE_PRED_CLOBBER): New. * config/aarch64/aarch64.md (pred_clobber): New. (arch_enabled): Use it. -- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index d5bcaebce770f0b217aac783063d39135f754c77..a9f48f
RE: [PATCH 2/4]AArch64: add new tuning param and attribute for enabling conditional early clobber
> > Sorry for the bike-shedding, but how about something like "avoid_pred_rmw"? > (I'm open to other suggestions.) Just looking for something that describes > either the architecture or the end result that we want to achieve. > And preferable something fairly short :) > > avoid_* would be consistent with the existing "avoid_cross_loop_fma". > > > + > > #undef AARCH64_EXTRA_TUNING_OPTION > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > > index > bbf11faaf4b4340956094a983f8b0dc2649b2d27..76a18dd511f40ebb58ed12d5 > 6b46c74084ba7c3c 100644 > > --- a/gcc/config/aarch64/aarch64.h > > +++ b/gcc/config/aarch64/aarch64.h > > @@ -495,6 +495,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = > AARCH64_FL_SM_OFF; > > enabled through +gcs. */ > > #define TARGET_GCS (AARCH64_ISA_GCS) > > > > +/* Prefer different predicate registers for the output of a predicated > > operation > over > > +re-using an existing input predicate. */ > > +#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ > > +&& (aarch64_tune_params.extra_tuning_flags \ > > +& > AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST)) > > > > /* Standard register usage. */ > > > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > > index > dbde066f7478bec51a8703b017ea553aa98be309..1ecd1a2812969504bd5114a > 53473b478c5ddba82 100644 > > --- a/gcc/config/aarch64/aarch64.md > > +++ b/gcc/config/aarch64/aarch64.md > > @@ -445,6 +445,10 @@ (define_enum_attr "arch" "arches" (const_string > "any")) > > ;; target-independent code. > > (define_attr "is_call" "no,yes" (const_string "no")) > > > > +;; Indicates whether we want to enable the pattern with an optional early > > +;; clobber for SVE predicates. > > +(define_attr "pred_clobber" "no,yes" (const_string "no")) > > + > > ;; [For compatibility with Arm in pipeline models] > > ;; Attribute that specifies whether or not the instruction touches fp > > ;; registers. > > @@ -461,7 +465,8 @@ (define_attr "fp" "no,yes" > > (define_attr "arch_enabled" "no,yes" > >(if_then_else > > (ior > > - (eq_attr "arch" "any") > > + (and (eq_attr "arch" "any") > > +(eq_attr "pred_clobber" "no")) > > > > (and (eq_attr "arch" "rcpc8_4") > > (match_test "AARCH64_ISA_RCPC8_4")) > > @@ -488,7 +493,10 @@ (define_attr "arch_enabled" "no,yes" > > (match_test "TARGET_SVE")) > > > > (and (eq_attr "arch" "sme") > > -(match_test "TARGET_SME"))) > > +(match_test "TARGET_SME")) > > + > > + (and (eq_attr "pred_clobber" "yes") > > +(match_test "TARGET_SVE_PRED_CLOBBER"))) > > IMO it'd be bettero handle pred_clobber separately from arch, as a new > top-level AND: > > (and > (ior > (eq_attr "pred_clobber" "no") > (match_test "!TARGET_...")) > (ior > ...existing arch tests...)) > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-tuning-flags.def (AVOID_PRED_RMW): New. * config/aarch64/aarch64.h (TARGET_SVE_PRED_CLOBBER): New. * config/aarch64/aarch64.md (pred_clobber): New. (arch_enabled): Use it. -- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index d5bcaebce770f0b217aac783063d39135f754c77..a9f48f5d3d4ea32fbf53086ba21eab4bc65b6dcb 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -48,4 +48,8 @@ AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA) AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA) +/* Enable is the target prefers to use a fresh register for predicate outputs + rather than re-use an input predicate register. */ +AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", AVOID_PRED_RMW) + #undef AARCH64_EXTRA_TUNING_OPTION diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index bbf11faaf4b4340956094a983f8b0dc2649b2d27..e7669e65d7dae5df2ba42c265079b1856a5c382b 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -495,6 +495,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; enabled through +gcs. */ #define TARGET_GCS (AARCH64_ISA_GCS) +/* Prefer different predicate registers for the output of a predicated operation over +re-using an existing input predicate. */ +#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ +&& (aarch64_tune_params.extra_tuning_flags \ +& AARCH64_EXTRA_TUNE_AVOID_PRED_RMW)) /* Standard register usage. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index dbde066f7478bec51a8703b017ea553aa98be309..52e5adba4172e14b794b5df9394e58ce49ef8b7f 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/con
RE: [PATCH 2/4]AArch64: add new tuning param and attribute for enabling conditional early clobber
> -Original Message- > From: Richard Sandiford > Sent: Wednesday, May 15, 2024 11:56 AM > To: Tamar Christina > Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw > ; Marcus Shawcroft > ; ktkac...@gcc.gnu.org > Subject: Re: [PATCH 2/4]AArch64: add new tuning param and attribute for > enabling conditional early clobber > > Tamar Christina writes: > > Hi All, > > > > This adds a new tuning parameter EARLY_CLOBBER_SVE_PRED_DEST for AArch64 > to > > allow us to conditionally enable the early clobber alternatives based on the > > tuning models. > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > Ok for master? > > > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > * config/aarch64/aarch64-tuning-flags.def > > (EARLY_CLOBBER_SVE_PRED_DEST): New. > > * config/aarch64/aarch64.h (TARGET_SVE_PRED_CLOBBER): New. > > * config/aarch64/aarch64.md (pred_clobber): New. > > (arch_enabled): Use it. > > > > --- > > diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def > b/gcc/config/aarch64/aarch64-tuning-flags.def > > index > d5bcaebce770f0b217aac783063d39135f754c77..49fbad3ff28bc82b25c61ac50 > 1ccf533ec4b4c3f 100644 > > --- a/gcc/config/aarch64/aarch64-tuning-flags.def > > +++ b/gcc/config/aarch64/aarch64-tuning-flags.def > > @@ -48,4 +48,8 @@ AARCH64_EXTRA_TUNING_OPTION > ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA) > > > > AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", > FULLY_PIPELINED_FMA) > > > > +/* Enable is the target prefers to use a fresh register for predicate > > outputs > > + rather than re-use an input predicate register. */ > > +AARCH64_EXTRA_TUNING_OPTION ("early_clobber_sve_pred_dest", > EARLY_CLOBBER_SVE_PRED_DEST) > > Sorry for the bike-shedding, but how about something like "avoid_pred_rmw"? > (I'm open to other suggestions.) Just looking for something that describes > either the architecture or the end result that we want to achieve. > And preferable something fairly short :) > > avoid_* would be consistent with the existing "avoid_cross_loop_fma". Sure, happy to, it's something we initially struggled with naming internally as well. It sounds there's precedence so the avoid_ naming, so happy to use this naming. Will respin with it. Thanks, Tamar > > > + > > #undef AARCH64_EXTRA_TUNING_OPTION > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > > index > bbf11faaf4b4340956094a983f8b0dc2649b2d27..76a18dd511f40ebb58ed12d5 > 6b46c74084ba7c3c 100644 > > --- a/gcc/config/aarch64/aarch64.h > > +++ b/gcc/config/aarch64/aarch64.h > > @@ -495,6 +495,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = > AARCH64_FL_SM_OFF; > > enabled through +gcs. */ > > #define TARGET_GCS (AARCH64_ISA_GCS) > > > > +/* Prefer different predicate registers for the output of a predicated > > operation > over > > +re-using an existing input predicate. */ > > +#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ > > +&& (aarch64_tune_params.extra_tuning_flags \ > > +& > AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST)) > > > > /* Standard register usage. */ > > > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > > index > dbde066f7478bec51a8703b017ea553aa98be309..1ecd1a2812969504bd5114a > 53473b478c5ddba82 100644 > > --- a/gcc/config/aarch64/aarch64.md > > +++ b/gcc/config/aarch64/aarch64.md > > @@ -445,6 +445,10 @@ (define_enum_attr "arch" "arches" (const_string > "any")) > > ;; target-independent code. > > (define_attr "is_call" "no,yes" (const_string "no")) > > > > +;; Indicates whether we want to enable the pattern with an optional early > > +;; clobber for SVE predicates. > > +(define_attr "pred_clobber" "no,yes" (const_string "no")) > > + > > ;; [For compatibility with Arm in pipeline models] > > ;; Attribute that specifies whether or not the instruction touches fp > > ;; registers. > > @@ -461,7 +465,8 @@ (define_attr "fp" "no,yes" > > (define_attr "arch_enabled" "no,yes" > >(if_then_else > > (ior > > - (eq_attr "arch" "any") > > + (and (eq_attr "arch" "any") > > +(eq_attr "pred_clobber" "no")) > > > > (and (eq_attr "arch" "rcpc8_4") > > (match_test "AARCH64_ISA_RCPC8_4")) > > @@ -488,7 +493,10 @@ (define_attr "arch_enabled" "no,yes" > > (match_test "TARGET_SVE")) > > > > (and (eq_attr "arch" "sme") > > -(match_test "TARGET_SME"))) > > +(match_test "TARGET_SME")) > > + > > + (and (eq_attr "pred_clobber" "yes") > > +(match_test "TARGET_SVE_PRED_CLOBBER"))) > > IMO it'd be bettero handle pred_clobber separately from arch, as a new > top-level AND: > > (and > (ior > (eq_attr "pred_clobber" "no") > (match_test "!TARGET_...")) > (ior > ...existing arch tests...)) > > Thanks, > Richard
Re: [PATCH 2/4]AArch64: add new tuning param and attribute for enabling conditional early clobber
Tamar Christina writes: > Hi All, > > This adds a new tuning parameter EARLY_CLOBBER_SVE_PRED_DEST for AArch64 to > allow us to conditionally enable the early clobber alternatives based on the > tuning models. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * config/aarch64/aarch64-tuning-flags.def > (EARLY_CLOBBER_SVE_PRED_DEST): New. > * config/aarch64/aarch64.h (TARGET_SVE_PRED_CLOBBER): New. > * config/aarch64/aarch64.md (pred_clobber): New. > (arch_enabled): Use it. > > --- > diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def > b/gcc/config/aarch64/aarch64-tuning-flags.def > index > d5bcaebce770f0b217aac783063d39135f754c77..49fbad3ff28bc82b25c61ac501ccf533ec4b4c3f > 100644 > --- a/gcc/config/aarch64/aarch64-tuning-flags.def > +++ b/gcc/config/aarch64/aarch64-tuning-flags.def > @@ -48,4 +48,8 @@ AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", > AVOID_CROSS_LOOP_FMA) > > AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA) > > +/* Enable is the target prefers to use a fresh register for predicate outputs > + rather than re-use an input predicate register. */ > +AARCH64_EXTRA_TUNING_OPTION ("early_clobber_sve_pred_dest", > EARLY_CLOBBER_SVE_PRED_DEST) Sorry for the bike-shedding, but how about something like "avoid_pred_rmw"? (I'm open to other suggestions.) Just looking for something that describes either the architecture or the end result that we want to achieve. And preferable something fairly short :) avoid_* would be consistent with the existing "avoid_cross_loop_fma". > + > #undef AARCH64_EXTRA_TUNING_OPTION > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index > bbf11faaf4b4340956094a983f8b0dc2649b2d27..76a18dd511f40ebb58ed12d56b46c74084ba7c3c > 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -495,6 +495,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = > AARCH64_FL_SM_OFF; > enabled through +gcs. */ > #define TARGET_GCS (AARCH64_ISA_GCS) > > +/* Prefer different predicate registers for the output of a predicated > operation over > +re-using an existing input predicate. */ > +#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ > + && (aarch64_tune_params.extra_tuning_flags \ > + & > AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST)) > > /* Standard register usage. */ > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index > dbde066f7478bec51a8703b017ea553aa98be309..1ecd1a2812969504bd5114a53473b478c5ddba82 > 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -445,6 +445,10 @@ (define_enum_attr "arch" "arches" (const_string "any")) > ;; target-independent code. > (define_attr "is_call" "no,yes" (const_string "no")) > > +;; Indicates whether we want to enable the pattern with an optional early > +;; clobber for SVE predicates. > +(define_attr "pred_clobber" "no,yes" (const_string "no")) > + > ;; [For compatibility with Arm in pipeline models] > ;; Attribute that specifies whether or not the instruction touches fp > ;; registers. > @@ -461,7 +465,8 @@ (define_attr "fp" "no,yes" > (define_attr "arch_enabled" "no,yes" >(if_then_else > (ior > - (eq_attr "arch" "any") > + (and (eq_attr "arch" "any") > + (eq_attr "pred_clobber" "no")) > > (and (eq_attr "arch" "rcpc8_4") >(match_test "AARCH64_ISA_RCPC8_4")) > @@ -488,7 +493,10 @@ (define_attr "arch_enabled" "no,yes" >(match_test "TARGET_SVE")) > > (and (eq_attr "arch" "sme") > - (match_test "TARGET_SME"))) > + (match_test "TARGET_SME")) > + > + (and (eq_attr "pred_clobber" "yes") > + (match_test "TARGET_SVE_PRED_CLOBBER"))) IMO it'd be bettero handle pred_clobber separately from arch, as a new top-level AND: (and (ior (eq_attr "pred_clobber" "no") (match_test "!TARGET_...")) (ior ...existing arch tests...)) Thanks, Richard
[PATCH 2/4]AArch64: add new tuning param and attribute for enabling conditional early clobber
Hi All, This adds a new tuning parameter EARLY_CLOBBER_SVE_PRED_DEST for AArch64 to allow us to conditionally enable the early clobber alternatives based on the tuning models. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-tuning-flags.def (EARLY_CLOBBER_SVE_PRED_DEST): New. * config/aarch64/aarch64.h (TARGET_SVE_PRED_CLOBBER): New. * config/aarch64/aarch64.md (pred_clobber): New. (arch_enabled): Use it. --- diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index d5bcaebce770f0b217aac783063d39135f754c77..49fbad3ff28bc82b25c61ac501ccf533ec4b4c3f 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -48,4 +48,8 @@ AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA) AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA) +/* Enable is the target prefers to use a fresh register for predicate outputs + rather than re-use an input predicate register. */ +AARCH64_EXTRA_TUNING_OPTION ("early_clobber_sve_pred_dest", EARLY_CLOBBER_SVE_PRED_DEST) + #undef AARCH64_EXTRA_TUNING_OPTION diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index bbf11faaf4b4340956094a983f8b0dc2649b2d27..76a18dd511f40ebb58ed12d56b46c74084ba7c3c 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -495,6 +495,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; enabled through +gcs. */ #define TARGET_GCS (AARCH64_ISA_GCS) +/* Prefer different predicate registers for the output of a predicated operation over +re-using an existing input predicate. */ +#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ +&& (aarch64_tune_params.extra_tuning_flags \ +& AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST)) /* Standard register usage. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index dbde066f7478bec51a8703b017ea553aa98be309..1ecd1a2812969504bd5114a53473b478c5ddba82 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -445,6 +445,10 @@ (define_enum_attr "arch" "arches" (const_string "any")) ;; target-independent code. (define_attr "is_call" "no,yes" (const_string "no")) +;; Indicates whether we want to enable the pattern with an optional early +;; clobber for SVE predicates. +(define_attr "pred_clobber" "no,yes" (const_string "no")) + ;; [For compatibility with Arm in pipeline models] ;; Attribute that specifies whether or not the instruction touches fp ;; registers. @@ -461,7 +465,8 @@ (define_attr "fp" "no,yes" (define_attr "arch_enabled" "no,yes" (if_then_else (ior - (eq_attr "arch" "any") + (and (eq_attr "arch" "any") +(eq_attr "pred_clobber" "no")) (and (eq_attr "arch" "rcpc8_4") (match_test "AARCH64_ISA_RCPC8_4")) @@ -488,7 +493,10 @@ (define_attr "arch_enabled" "no,yes" (match_test "TARGET_SVE")) (and (eq_attr "arch" "sme") -(match_test "TARGET_SME"))) +(match_test "TARGET_SME")) + + (and (eq_attr "pred_clobber" "yes") +(match_test "TARGET_SVE_PRED_CLOBBER"))) (const_string "yes") (const_string "no"))) -- diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index d5bcaebce770f0b217aac783063d39135f754c77..49fbad3ff28bc82b25c61ac501ccf533ec4b4c3f 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -48,4 +48,8 @@ AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA) AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA) +/* Enable is the target prefers to use a fresh register for predicate outputs + rather than re-use an input predicate register. */ +AARCH64_EXTRA_TUNING_OPTION ("early_clobber_sve_pred_dest", EARLY_CLOBBER_SVE_PRED_DEST) + #undef AARCH64_EXTRA_TUNING_OPTION diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index bbf11faaf4b4340956094a983f8b0dc2649b2d27..76a18dd511f40ebb58ed12d56b46c74084ba7c3c 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -495,6 +495,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; enabled through +gcs. */ #define TARGET_GCS (AARCH64_ISA_GCS) +/* Prefer different predicate registers for the output of a predicated operation over +re-using an existing input predicate. */ +#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ + && (aarch64_tune_params.extra_tuning_flags \ + & AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST)) /* Standard register usage. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/a