> From: Dipesh Sharma <[email protected]>
> Sent: Friday, June 26, 2026 8:16 PM
> 
> ---
> gcc/ChangeLog:
> 
>         * common/config/i386/cpuinfo.h (get_available_features):
>         * common/config/i386/i386-common.cc
> (OPTION_MASK_ISA2_AVX10_V2_AUX_SET):
>         (OPTION_MASK_ISA2_AVX10_V2_AUX_UNSET):
>         (ix86_handle_option):
>         * common/config/i386/i386-cpuinfo.h (enum processor_features):
>         * common/config/i386/i386-isas.h:
>         * config.gcc:
>         * config/i386/cpuid.h (bit_AVX10_V2_AUX):
>         * config/i386/i386-builtin-types.def (V16QI):
>         * config/i386/i386-builtin.def (BDESC):
>         * config/i386/i386-c.cc (ix86_target_macros_internal):
>         * config/i386/i386-expand.cc (ix86_expand_args_builtin):
>         * config/i386/i386-isa.def (AVX10_V2_AUX):
>         * config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
>         * config/i386/i386.opt:
>         * config/i386/immintrin.h:
>         * config/i386/sse.md (vcvt<convertps2fp8><mode>):
>         (vcvt<convertps2fp8><mode>_mask):
>         (*vcvt<convertps2fp8><mode>_mask):
>         * doc/invoke.texi:
>         * config/i386/avx10_v2_auxintrin.h: New file.
> 
> gcc/testsuite/ChangeLog:
> 
>         * lib/target-supports.exp:
>         * gcc.target/i386/avx10_2-v2-aux-convert-1.c: New test.

Please fill the changelog and mention what you are doing. You only
created the changelog but not filling in.

> 
> diff --git a/gcc/common/config/i386/cpuinfo.h
> b/gcc/common/config/i386/cpuinfo.h
> index f59090c8cde..6310e7294da 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -1139,6 +1139,10 @@ get_available_features (struct __processor_model
> *cpu_model,
>         set_feature (FEATURE_AVX10_1);
>         break;
>       }
> +      /* CPUID.(EAX=24H, ECX=1) for AVX10_V2_AUX features.  */
> +     __cpuid_count (0x24, 1, eax, ebx, ecx, edx);
> +     if (ecx & bit_AVX10_V2_AUX)
> +     set_feature (FEATURE_AVX10_V2_AUX);
>      }
> 

Please get the max_subleaf_level first then guard with it to avoid
unexpected issues.

Also, in GCC, we will directly use avx10v2aux/AVX10V2AUX, with
no "_" connecting for feature name and options (except amx).
Please change them.

>    /* Check cpuid level of extended features.  */
> diff --git a/gcc/common/config/i386/i386-common.cc
> b/gcc/common/config/i386/i386-common.cc
> index 607581b0f09..a57f7887b58 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -334,6 +335,7 @@ along with GCC; see the file COPYING3.  If not see
>  #define OPTION_MASK_ISA2_MOVRS_UNSET OPTION_MASK_ISA2_MOVRS
>  #define OPTION_MASK_ISA2_AMX_MOVRS_UNSET
> OPTION_MASK_ISA2_AMX_MOVRS
>  #define OPTION_MASK_ISA2_AVX512BMM_UNSET
> OPTION_MASK_ISA2_AVX512BMM
> +#define OPTION_MASK_ISA2_AVX10_V2_AUX_UNSET
> OPTION_MASK_ISA2_AVX10_V2_AUX
> 

You should also disable AVX10V2AUX for AVX10_1_UNSET.

>  /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
>     as -mno-sse4.1. */
> @@ -1417,6 +1419,21 @@ ix86_handle_option (struct gcc_options *opts,
>         opts->x_ix86_isa_flags2_explicit |=
> OPTION_MASK_ISA2_AVX10_2_UNSET;
>       }
>        return true;
> +
> +     case OPT_mavx10_v2_aux:

The padding seems messed up for me.

> diff --git a/gcc/config/i386/avx10_v2_auxintrin.h
> b/gcc/config/i386/avx10_v2_auxintrin.h
> new file mode 100644
> index 00000000000..3ca4d0b1166
> --- /dev/null
> +++ b/gcc/config/i386/avx10_v2_auxintrin.h
> @@ -0,0 +1,409 @@
> +/* Copyright (C) 2024-2026 Free Software Foundation, Inc.

Only 2026.

> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index ff08188c761..6f6fc081448 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1389,3 +1389,7 @@ Support AMX-MOVRS built-in functions and code
> generation.
>  mavx512bmm
>  Target Mask(ISA2_AVX512BMM) Var(ix86_isa_flags2) Save
>  Support AVX512BMM built-in functions and code generation.
> +
> +mavx10-v2-aux
> +Target Mask(ISA2_AVX10_V2_AUX) Var(ix86_isa_flags2) Save
> +Support AVX10_V2_AUX built-in functions and code generation.

You forgot to regen i386.opt.urls in your patch.

> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index bb150f6b5f9..c5c3b48e63e 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -33795,3 +33801,58 @@
>    "vbitrevb\t{%1, %0|%0, %1}"
>    [(set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> +
> +;; AVX10_V2_AUX instructions
> +
> +;; FP32 to FP8 single-source converts (VCVTPS2BF8, VCVTPS2BF8S,
> +;; VCVTPS2HF8, VCVTPS2HF8S)
> +
> +(define_int_iterator UNSPEC_CONVERTPS2FP8
> +  [UNSPEC_VCVTPS2BF8 UNSPEC_VCVTPS2BF8S
> +   UNSPEC_VCVTPS2HF8 UNSPEC_VCVTPS2HF8S])
> +
> +(define_int_attr convertps2fp8
> +  [(UNSPEC_VCVTPS2BF8 "ps2bf8")
> +   (UNSPEC_VCVTPS2BF8S "ps2bf8s")
> +   (UNSPEC_VCVTPS2HF8 "ps2hf8")
> +   (UNSPEC_VCVTPS2HF8S "ps2hf8s")])
> +
> +(define_insn "vcvt<convertps2fp8><mode>"
> +  [(set (match_operand:V16QI 0 "register_operand" "=v")
> +     (unspec:V16QI
> +       [(match_operand:VF1_AVX512VL 1 "nonimmediate_operand"
> "vm")]
> +       UNSPEC_CONVERTPS2FP8))]
> +  "TARGET_AVX10_V2_AUX"
> +  "vcvt<convertps2fp8>\t{%1, %0|%0, %1}"
> +  [(set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_expand "vcvt<convertps2fp8><mode>_mask"
> +  [(set (match_operand:V16QI 0 "register_operand")
> +     (vec_merge:V16QI
> +       (unspec:V16QI
> +         [(match_operand:VF1_AVX512VL 1 "nonimmediate_operand")]
> +         UNSPEC_CONVERTPS2FP8)
> +       (match_operand:V16QI 2 "nonimm_or_0_operand")
> +       (match_operand:<avx512fmaskmode> 3
> "register_or_constm1_operand")))]
> +  "TARGET_AVX10_V2_AUX"
> +{
> +  if (CONST_INT_P (operands[3]))
> +    {
> +      emit_insn (gen_vcvt<convertps2fp8><mode> (operands[0],
> operands[1]));
> +      DONE;
> +    }
> +})
> +
> +(define_insn "*vcvt<convertps2fp8><mode>_mask"
> +  [(set (match_operand:V16QI 0 "register_operand" "=v")
> +     (vec_merge:V16QI
> +       (unspec:V16QI
> +         [(match_operand:VF1_AVX512VL 1 "nonimmediate_operand"
> "vm")]
> +         UNSPEC_CONVERTPS2FP8)
> +       (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
> +       (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
> +  "TARGET_AVX10_V2_AUX"
> +  "vcvt<convertps2fp8>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
> +  [(set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])

The mask handling is totally wrong here. When the masked size is smaller
than the actual size, you need to zero the upper part. There are also other
similar cases in sse.md. Please refer to them.

Also please use <mask_name> iterator for normal cases. We do not want
"%0%{%3%}%N2" everywhere unless we really need to separate mask
and non-mask ones (zero upper part is a case we need to separate).

Nearly all the patterns for convert in this patch series need re-do since
many of them are wrong in mask handling, they need zero upper part.
And no pattern is using <mask_name> to simplify the pattern.

You did not notice that since you did not have a runtime test in the
whole patch series. This could be easily found if you have.

> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 8da5f03ccbd..a8685896897 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -1551,7 +1551,7 @@ See RS/6000 and PowerPC Options.
>  -mavxneconvert  -mcmpccxadd  -mamx-fp16  -mprefetchi  -mraoint
>  -mamx-complex  -mavxvnniint16  -msm3  -msha512  -msm4  -mapxf
>  -musermsr  -mavx10.1  -mavx10.2  -mamx-avx512  -mamx-tf32  -mmovrs
> --mamx-movrs  -mavx512bmm  -mcldemote  -mms-bitfields
> +-mamx-movrs  -mavx512bmm -mavx10-v2-aux  -mcldemote  -mms-bitfields
>  -mno-align-stringops  -minline-all-stringops
>  -minline-stringops-dynamically  -mstringop-strategy=@var{alg}
>  -mkl  -mwidekl
> @@ -36192,6 +36192,11 @@ Support MOVRS built-in functions and code
> generation.
>  @opindex mno-amx-movrs
>  @item -mamx-movrs
>  Support AMX-MOVRS built-in functions and code generation.
> +
> +@opindex mavx10-v2-aux
> +@opindex mno-avx10-v2-aux
> +@item -mavx10-v2-aux
> +Support AVX10_V2_AUX built-in functions and code generation.
>  @end table
> 
>  These additional options are available for the x86 processor family.

Missing sourcebuild.texi and extend.texi change.

Reply via email to