On Mon, Jan 26, 2026 at 11:02:24AM +0000, Richard Earnshaw wrote:
> On 22/01/2026 16:51, Artemiy Volkov wrote:
> > On Thu, Jan 22, 2026 at 04:05:36PM +0000, Richard Earnshaw wrote:
> > > On 14/01/2026 14:20, Artemiy Volkov wrote:
> > > > This is a v2 of
> > > > https://gcc.gnu.org/pipermail/gcc-patches/2026-January/705195.html with
> > > > feedback from Richard and Christophe addressed.
> > > > 
> > > > The check_effective_target_arm_v8_3a_fp16_complex_neon_ok_nocache
> > > > procedure in target-supports.exp should return a set of flags providing
> > > > all of AdvSIMD, complex numbers, and fp16 capabilities.  However, to
> > > > achieve this we have to be able to overwrite the current -mfpu setting.
> > > > This means that the current empty string alternative for $flags does not
> > > > work for us.  This patch splits sets of options to iterate over between
> > > > the arm and the aarch64 backend and prohibits an empty string for the
> > > > former.  (This split of option sets allows us to remove the redundant
> > > > "+simd" from -march on aarch64, as simd is present on all base
> > > > architectures all the way from armv8-a.)  Moreover, it changes
> > > > check_effective_target_arm_v8_3a_complex_neon_ok_nocache in a similar 
> > > > way,
> > > > except offering an empty string as the first alternative to try testing
> > > > with the flags that gcc was configured with.
> > > > 
> > > > This (together with patch 1/2) fixes the
> > > > advsimd-intrinsics/vector-complex_f16.c testcase, which was previously
> > > > being compiled with the -mfpu=neon-fp16 flag added by the .exp file
> > > > itself.
> > > > 
> > > > Re-tested on aarch64 by me and on arm by Christophe.
> > > > 
> > > > Changes from v1:
> > > > 
> > > > - Add "" and "-mfpu=auto" as the first alternative
> > > >     in check_effective_target_arm_v8_3a{,_fp16}_complex_neon_ok,
> > > >     respectively.
> > > > - Clarify the changes to -march values in the aarch64 case in the commit
> > > >     message.
> > > > 
> > > > Co-authored-by: Richard Earnshaw <[email protected]>
> > > > 
> > > > gcc/testsuite/ChangeLog:
> > > > 
> > > >         * lib/target-supports.exp:
> > > >         (check_effective_target_arm_v8_3a_complex_neon_ok_nocache):
> > > >         Split and fill in arm and aarch64 compile options.  Remove the
> > > >         cpu_unset variable.
> > > >         (check_effective_target_arm_v8_3a_fp16_complex_neon_ok_nocache):
> > > >         Likewise.
> > > > ---
> > > >    gcc/testsuite/lib/target-supports.exp | 33 
> > > > ++++++++++++++++++---------
> > > >    1 file changed, 22 insertions(+), 11 deletions(-)
> > > > 
> > > > diff --git a/gcc/testsuite/lib/target-supports.exp 
> > > > b/gcc/testsuite/lib/target-supports.exp
> > > > index dbcba42629f..77399a04bc5 100644
> > > > --- a/gcc/testsuite/lib/target-supports.exp
> > > > +++ b/gcc/testsuite/lib/target-supports.exp
> > > > @@ -13717,27 +13717,33 @@ proc check_effective_target_inff { } {
> > > >    proc check_effective_target_arm_v8_3a_complex_neon_ok_nocache { } {
> > > >        global et_arm_v8_3a_complex_neon_flags
> > > >        set et_arm_v8_3a_complex_neon_flags ""
> > > > -    set cpu_unset ""
> > > >        if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } {
> > > >         return 0;
> > > >        }
> > > >        if { [istarget arm*-*-*] } {
> > > > -       set cpu_unset "-mcpu=unset"
> > > > +       set flag_opts {
> > > > +           ""
> > > 
> > > I don't see how it can be right that we don't need -mcpu=auto here...
> > > 
> > > > +           "-mcpu=unset -march=armv8.3-a+simd"
> > > > +           "-mfloat-abi=softfp -mfpu=auto -mcpu=unset 
> > > > -march=armv8.3-a+simd"
> > > > +           "-mfloat-abi=hard -mfpu=auto -mcpu=unset 
> > > > -march=armv8.3-a+simd"
> > > > +       }
> > > > +    } else {
> > > > +       set flag_opts { "" "-march=armv8.3-a" }
> > > >        }
> > > >        # Iterate through sets of options to find the compiler flags that
> > > >        # need to be added to the -march option.
> > > > -    foreach flags {"" "-mfloat-abi=softfp -mfpu=auto" 
> > > > "-mfloat-abi=hard -mfpu=auto"} {
> > > > +    foreach flags $flag_opts {
> > > >         if { [check_no_compiler_messages_nocache \
> > > >                   arm_v8_3a_complex_neon_ok assembly {
> > > >             #if !defined (__ARM_FEATURE_COMPLEX)
> > > >             #error "__ARM_FEATURE_COMPLEX not defined"
> > > >             #endif
> > > >             #include <complex.h>
> > > > -       } "$flags $cpu_unset -march=armv8.3-a+simd"] } {
> > > > -           set et_arm_v8_3a_complex_neon_flags "$flags $cpu_unset 
> > > > -march=armv8.3-a+simd"
> > > > +       } "$flags"] } {
> > > > +           set et_arm_v8_3a_complex_neon_flags "$flags"
> > > >             return 1;
> > > >         }
> > > >        }
> > > > @@ -13765,19 +13771,25 @@ proc add_options_for_arm_v8_3a_complex_neon { 
> > > > flags } {
> > > >    proc check_effective_target_arm_v8_3a_fp16_complex_neon_ok_nocache { 
> > > > } {
> > > >        global et_arm_v8_3a_fp16_complex_neon_flags
> > > >        set et_arm_v8_3a_fp16_complex_neon_flags ""
> > > > -    set cpu_unset ""
> > > >        if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } {
> > > >         return 0;
> > > >        }
> > > >        if { [istarget arm*-*-*] } {
> > > > -       set cpu_unset "-mcpu=unset"
> > > > +       set flag_opts {
> > > > +           "-mfpu=auto"
> > > 
> > > ... but do here.  This implies something else is wrong.  FWIW, I think the
> > > first case is what we really want, though there may be other bugs in some
> > > tests that need fixing as well.
> > 
> > The issue that "-mfpu=auto" is working around occurs at the intersection of
> > vector, complex, and fp16 capabilities.  This combination can not be
> > achieved for just any value of -mfpu, but rather only for those that
> > satisfy TARGET_VFP5.  In particular, when advsimd-intrinsics.exp injects
> > -mfpu=neon-fp-armv8 via code on ll. 57-63, there is nothing an individual
> > test can do short of resetting the value by doing -mfpu=auto before
> > trying any other options.  This is not a concern for the first check as
> > things work out fine for non-fp16 complex vectorization, and an empty
> > string is sufficient.
> 
> No, the issue is that the dg- headers in vector-complex_f16.c are completely
> broken.  You can't ask the framework to calculate the flags needed for
> adding complex, then ask it to calculate the flags needed for adding fp16
> and then finally try to merge the two; it simply won't work that way - these
> are two separate calculations that both start from the base flags, and
> what's more the results are cached for use across other tests.  Even worse,
> you can't then start trying to override the flags that have been calculated
> with something else - especially when you want to make that work in both
> aarch32 and aarch64.
> 
> So, instead of
> 
> /* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */
> /* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */
> /* { dg-add-options arm_v8_3a_complex_neon } */
> /* { dg-add-options arm_v8_2a_fp16_neon } */
> /* { dg-additional-options "-O2 -march=armv8.3-a+fp16 -save-temps" } */
> 
> that test should be doing:
> 
> /* { dg-require-effective-target arm_v8_3a_fp16_complex_neon_ok } */
> /* { dg-add-options arm_v8_3a_fp16_complex_neon } */
> /* { dg-additional-options "-O2 -save-temps" } */
> 
> I'd be inclined to push a change like that directly, but I'm not entirely
> sure if Tamar had something else in mind when he originally wrote the patch,
> so I'd like to give him a chance to comment first.

This is exactly what patch 1/2 of this series is doing:
https://gcc.gnu.org/pipermail/gcc-patches/2026-January/705194.html.
Coincidentally, it has already been approved by Tamar.  I'm just waiting
to resolve this one to push both patches at the same time.

Even with 1/2 though, we still run into the problem I described above, which
necessitates this 2/2 patch.

Thanks,
Artemiy

> > These things are bound to surface every once in a while until the pattern
> > "find a set of flags to satisfy a target requirement and cache it, then
> > add a bunch of other, sometimes conflicting, flags in the .exp file, then
> > append the cached result" persists in the testsuite.  This patch does not
> > try to address this situation, but rather minimize the workaround for one
> > specific test, and as such is IMO a net improvement.
> > 
> > Thanks,
> > Artemiy
> > 
> > > 
> > > > +           "-mfpu=auto -mcpu=unset -march=armv8.3-a+fp16+simd"
> > > > +           "-mfloat-abi=softfp -mfpu=auto -mcpu=unset 
> > > > -march=armv8.3-a+fp16+simd"
> > > > +           "-mfloat-abi=hard -mfpu=auto -mcpu=unset 
> > > > -march=armv8.3-a+fp16+simd"
> > > > +       }
> > > > +    } else {
> > > > +       set flag_opts { "" "-march=armv8.3-a+fp16" }
> > > >        }
> > > >        # Iterate through sets of options to find the compiler flags that
> > > >        # need to be added to the -march option.
> > > > -    foreach flags {"" "-mfloat-abi=softfp -mfpu=auto" 
> > > > "-mfloat-abi=hard -mfpu=auto"} {
> > > > +    foreach flags $flag_opts {
> > > >         if { [check_no_compiler_messages_nocache \
> > > >                   arm_v8_3a_fp16_complex_neon_ok assembly {
> > > >             #if !defined (__ARM_FEATURE_COMPLEX)
> > > > @@ -13787,9 +13799,8 @@ proc 
> > > > check_effective_target_arm_v8_3a_fp16_complex_neon_ok_nocache { } {
> > > >             #error "__ARM_FEATURE_FP16_VECTOR_ARITHMETIC not defined"
> > > >             #endif
> > > >             #include <complex.h>
> > > > -       } "$flags $cpu_unset -march=armv8.3-a+fp16+simd"] } {
> > > > -           set et_arm_v8_3a_fp16_complex_neon_flags \
> > > > -                       "$flags $cpu_unset -march=armv8.3-a+fp16+simd"
> > > > +       } "$flags"] } {
> > > > +           set et_arm_v8_3a_fp16_complex_neon_flags "$flags"
> > > >             return 1;
> > > >         }
> > > >        }
> > > 
> > > R.
> 
> R.

Reply via email to