ACEv1 is a new ISA documented here:
https://x86ecosystem.org/resource/ai-compute-extensions-ace-specification/
In this patch, we will first add initial support
for ACEv1. The instruction support will come afterwards.
gcc/ChangeLog:
* common/config/i386/cpuinfo.h:
(get_available_features): Add ACEV1.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_ACEV1_SET): New.
(OPTION_MASK_ISA2_AVX10V2AUX_UNSET): Disable ACEV1.
(OPTION_MASK_ISA2_ACEV1_UNSET): New.
(ix86_handle_option): Handle ACEV1.
* common/config/i386/i386-cpuinfo.h
(enum processor_features): Add FEATURE_ACEV1.
* common/config/i386/i386-isas.h: Handle acev1.
* config/i386/cpuid.h (bit_ACE): New.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Handle acev1.
* config/i386/i386-isa.def (ACEV1): Add DEF_PTA.
* config/i386/i386-options.cc (isa2_opts): Handle acev1.
(ix86_valid_target_attribute_inner_p): Ditto.
* config/i386/i386.opt: Add macev1.
* config/i386/i386.opt.urls: Regenerated.
* doc/extend.texi: Add acev1 documentation.
* doc/invoke.texi: Ditto.
* doc/sourcebuild.texi: Ditto.
Co-authored-by: Dipesh Sharma <[email protected]>
---
gcc/common/config/i386/cpuinfo.h | 41 +++++++++++++++++++--
gcc/common/config/i386/i386-common.cc | 21 ++++++++++-
gcc/common/config/i386/i386-cpuinfo.h | 1 +
gcc/common/config/i386/i386-isas.h | 1 +
gcc/config/i386/cpuid.h | 3 ++
gcc/config/i386/i386-c.cc | 2 ++
gcc/config/i386/i386-isa.def | 1 +
gcc/config/i386/i386-options.cc | 4 ++-
gcc/config/i386/i386.opt | 5 +++
gcc/config/i386/i386.opt.urls | 3 ++
gcc/doc/extend.texi | 5 +++
gcc/doc/invoke.texi | 8 ++++-
gcc/doc/sourcebuild.texi | 51 ++++++++++++++-------------
13 files changed, 117 insertions(+), 29 deletions(-)
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 99e92394abc..5d27ac5f18a 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -802,8 +802,9 @@ get_available_features (struct __processor_model *cpu_model,
#define XSTATE_ZMM 0x40
#define XSTATE_HI_ZMM 0x80
#define XSTATE_TILECFG 0x20000
-#define XSTATE_TILEDATA 0x40000
+#define XSTATE_TILEDATA 0x40000
#define XSTATE_APX_F 0x80000
+#define XSTATE_ACE 0x100000
#define XCR_AVX_ENABLED_MASK \
(XSTATE_SSE | XSTATE_YMM)
@@ -812,16 +813,20 @@ get_available_features (struct __processor_model
*cpu_model,
#define XCR_AMX_ENABLED_MASK \
(XSTATE_TILECFG | XSTATE_TILEDATA)
#define XCR_APX_F_ENABLED_MASK XSTATE_APX_F
+#define XCR_ACE_ENABLED_MASK \
+ (XCR_AVX512F_ENABLED_MASK | XCR_AMX_ENABLED_MASK | XSTATE_ACE)
/* Check if AVX, AVX512 and APX are usable. */
int avx_usable = 0;
int avx512_usable = 0;
int amx_usable = 0;
int apx_usable = 0;
+ int ace_usable = 0;
/* Check if KL is usable. */
int has_kl = 0;
/* Record AVX10 version. */
int avx10_set = 0;
+ int ace_set = 0, avx10v2aux_set = 0;
int version = 0;
if ((ecx & bit_OSXSAVE))
{
@@ -842,6 +847,8 @@ get_available_features (struct __processor_model *cpu_model,
== XCR_AMX_ENABLED_MASK);
apx_usable = ((xcrlow & XCR_APX_F_ENABLED_MASK)
== XCR_APX_F_ENABLED_MASK);
+ ace_usable = ((xcrlow & XCR_ACE_ENABLED_MASK)
+ == XCR_ACE_ENABLED_MASK);
}
#define set_feature(f) \
@@ -1057,6 +1064,11 @@ get_available_features (struct __processor_model
*cpu_model,
if (edx & bit_APX_F)
set_feature (FEATURE_APX_F);
}
+ if (ace_usable)
+ {
+ if (ecx & bit_ACE)
+ ace_set = 1;
+ }
}
}
@@ -1144,7 +1156,32 @@ get_available_features (struct __processor_model
*cpu_model,
{
__cpuid_count (0x24, 1, eax, ebx, ecx, edx);
if (ecx & bit_AVX10V2AUX)
- set_feature (FEATURE_AVX10V2AUX);
+ {
+ set_feature (FEATURE_AVX10V2AUX);
+ avx10v2aux_set = 1;
+ }
+ }
+ }
+
+ /* Get Advanced Features at level 0x1d (eax = 0x1d).
+ ACE check must be put after AVX10 check to get AVX10 features.
+ TODO: Change the condition after AVX10V1AUX is added. */
+ if (version >= 2 && avx10v2aux_set && ace_set && max_cpuid_level >= 0x1d)
+ {
+ __cpuid_count (0x1d, 0, eax, ebx, ecx, edx);
+ if (eax == 2)
+ {
+ __cpuid_count (0x1d, 2, eax, ebx, ecx, edx);
+ version = eax & 0xff;
+ switch (version)
+ {
+ case 1:
+ set_feature (FEATURE_ACEV1);
+ break;
+ default:
+ set_feature (FEATURE_ACEV1);
+ break;
+ }
}
}
diff --git a/gcc/common/config/i386/i386-common.cc
b/gcc/common/config/i386/i386-common.cc
index ea2a47fccfc..b1835620118 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -140,6 +140,8 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_AVX512BMM_SET OPTION_MASK_ISA2_AVX512BMM
#define OPTION_MASK_ISA2_AVX10V2AUX_SET \
(OPTION_MASK_ISA2_AVX10_1_SET | OPTION_MASK_ISA2_AVX10V2AUX)
+#define OPTION_MASK_ISA2_ACEV1_SET \
+ (OPTION_MASK_ISA2_AVX10V2AUX_SET | OPTION_MASK_ISA2_ACEV1)
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
@@ -333,7 +335,9 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_MOVRS_UNSET OPTION_MASK_ISA2_MOVRS
#define OPTION_MASK_ISA2_AMX_MOVRS_UNSET OPTION_MASK_ISA2_AMX_MOVRS
#define OPTION_MASK_ISA2_AVX512BMM_UNSET OPTION_MASK_ISA2_AVX512BMM
-#define OPTION_MASK_ISA2_AVX10V2AUX_UNSET OPTION_MASK_ISA2_AVX10V2AUX
+#define OPTION_MASK_ISA2_AVX10V2AUX_UNSET \
+ (OPTION_MASK_ISA2_AVX10V2AUX | OPTION_MASK_ISA2_ACEV1_UNSET)
+#define OPTION_MASK_ISA2_ACEV1_UNSET OPTION_MASK_ISA2_ACEV1
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
@@ -1491,6 +1495,21 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_macev1:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_ACEV1_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_ACEV1_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX10_1_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX10_1_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_ACEV1_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_ACEV1_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
diff --git a/gcc/common/config/i386/i386-cpuinfo.h
b/gcc/common/config/i386/i386-cpuinfo.h
index 5b9bf5db68b..3b3d7c924de 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -284,6 +284,7 @@ enum processor_features
FEATURE_AMX_MOVRS,
FEATURE_AVX512BMM,
FEATURE_AVX10V2AUX,
+ FEATURE_ACEV1,
CPU_FEATURE_MAX
};
diff --git a/gcc/common/config/i386/i386-isas.h
b/gcc/common/config/i386/i386-isas.h
index d280418c261..171226ab07b 100644
--- a/gcc/common/config/i386/i386-isas.h
+++ b/gcc/common/config/i386/i386-isas.h
@@ -193,4 +193,5 @@ ISA_NAMES_TABLE_START
ISA_NAMES_TABLE_ENTRY("avx512bmm", FEATURE_AVX512BMM, P_NONE, "-mavx512bmm")
ISA_NAMES_TABLE_ENTRY("avx10v2aux", FEATURE_AVX10V2AUX, P_NONE,
"-mavx10v2aux")
+ ISA_NAMES_TABLE_ENTRY("acev1", FEATURE_ACEV1, P_NONE, "-macev1")
ISA_NAMES_TABLE_END
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 68359e11929..a2c8f0ee175 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -144,6 +144,9 @@
#define bit_AVXIFMA (1 << 23)
#define bit_MOVRS (1 << 31)
+/* %ecx */
+#define bit_ACE (1 << 11)
+
/* %edx */
#define bit_AVXVNNIINT8 (1 << 4)
#define bit_AVXNECONVERT (1 << 5)
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 4c56ff6dad9..f6a4093c277 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -792,6 +792,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__AVX512BMM__");
if (isa_flag2 & OPTION_MASK_ISA2_AVX10V2AUX)
def_or_undef (parse_in, "__AVX10V2AUX__");
+ if (isa_flag2 & OPTION_MASK_ISA2_ACEV1)
+ def_or_undef (parse_in, "__ACEV1__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
index e50cb6768fa..28835b1d542 100644
--- a/gcc/config/i386/i386-isa.def
+++ b/gcc/config/i386/i386-isa.def
@@ -126,3 +126,4 @@ DEF_PTA(MOVRS)
DEF_PTA(AMX_MOVRS)
DEF_PTA(AVX512BMM)
DEF_PTA(AVX10V2AUX)
+DEF_PTA(ACEV1)
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index bbdd684a2b2..631ed00a25c 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -275,7 +275,8 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mmovrs", OPTION_MASK_ISA2_MOVRS },
{ "-mamx-movrs", OPTION_MASK_ISA2_AMX_MOVRS },
{ "-mavx512bmm", OPTION_MASK_ISA2_AVX512BMM },
- { "-mavx10v2aux", OPTION_MASK_ISA2_AVX10V2AUX }
+ { "-mavx10v2aux", OPTION_MASK_ISA2_AVX10V2AUX },
+ { "-macev1", OPTION_MASK_ISA2_ACEV1 }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -1141,6 +1142,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree
args, char *p_strings[],
IX86_ATTR_ISA ("amx-movrs", OPT_mamx_movrs),
IX86_ATTR_ISA ("avx512bmm", OPT_mavx512bmm),
IX86_ATTR_ISA ("avx10v2aux", OPT_mavx10v2aux),
+ IX86_ATTR_ISA ("acev1", OPT_macev1),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index bee7c4c5e13..6948556b27b 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1390,3 +1390,8 @@ mavx10v2aux
Target Mask(ISA2_AVX10V2AUX) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
AVX10.1, AVX10.2 and AVX10V2AUX built-in functions and code generation.
+
+macev1
+Target Mask(ISA2_ACEV1) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
+AVX10.1 and ACEV1 built-in functions and code generation.
diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls
index 47e0b046e83..a5235492841 100644
--- a/gcc/config/i386/i386.opt.urls
+++ b/gcc/config/i386/i386.opt.urls
@@ -635,3 +635,6 @@ UrlSuffix(gcc/x86-Options.html#index-mavx512bmm)
mavx10v2aux
UrlSuffix(gcc/x86-Options.html#index-mavx10v2aux)
+macev1
+UrlSuffix(gcc/x86-Options.html#index-macev1)
+
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 5009c2b559d..0d3ccdbc89f 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9507,6 +9507,11 @@ Enable/disable the generation of the AMX-MOVRS
instructions.
@itemx no-avx10v2aux
Enable/disable the generation of the AVX10V2AUX instructions.
+@cindex @code{target("acev1")}, x86
+@item acev1
+@itemx no-acev1
+Enable/disable the generation of the ACEV1 instructions.
+
@atindex @code{target("cld")}, x86
@item cld
@itemx no-cld
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 2f927603f38..32595a934a8 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1551,7 +1551,7 @@ See RS/6000 and PowerPC Options.
-mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint
-mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mapxf
-musermsr -mavx10.1 -mavx10.2 -mamx-avx512 -mmovrs -mamx-movrs
--mavx512bmm -mavx10v2aux
+-mavx512bmm -mavx10v2aux -macev1
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops
-minline-stringops-dynamically -mstringop-strategy=@var{alg}
-mkl -mwidekl
@@ -36196,6 +36196,12 @@ Support AMX-MOVRS built-in functions and code
generation.
@item -mavx10v2aux
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
AVX10.1, AVX10.2 and AVX10V2AUX built-in functions and code generation.
+
+@opindex macev1
+@opindex mno-acev1
+@item -macev1
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
+AVX10.1 and ACEV1 built-in functions and code generation.
@end table
These additional options are available for the x86 processor family.
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index b74fe4f7380..4e989855815 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2668,6 +2668,33 @@ Test system has support for the CORE-V BI extension.
@item autoincdec
Target supports autoincrement/decrement addressing.
+@item acev1
+Target supports the execution of @code{acev1} instructions.
+
+@item amx_avx512
+Target supports the execution of @code{amx-avx512} instructions.
+
+@item amx_bf16
+Target supports the execution of @code{amx-bf16} instructions.
+
+@item amx_complex
+Target supports the execution of @code{amx-complex} instructions.
+
+@item amx_fp16
+Target supports the execution of @code{amx-fp16} instructions.
+
+@item amx_fp8
+Target supports the execution of @code{amx-fp8} instructions.
+
+@item amx_int8
+Target supports the execution of @code{amx-int8} instructions.
+
+@item amx_movrs
+Target supports the execution of @code{amx-movrs} instructions.
+
+@item amx_tile
+Target supports the execution of @code{amx-tile} instructions.
+
@item avx
Target supports compiling @code{avx} instructions.
@@ -2713,30 +2740,6 @@ Target supports the execution of @code{avxvnniint8}
instructions.
@item avxvnniint16
Target supports the execution of @code{avxvnniint16} instructions.
-@item amx_tile
-Target supports the execution of @code{amx-tile} instructions.
-
-@item amx_int8
-Target supports the execution of @code{amx-int8} instructions.
-
-@item amx_bf16
-Target supports the execution of @code{amx-bf16} instructions.
-
-@item amx_avx512
-Target supports the execution of @code{amx-avx512} instructions.
-
-@item amx_complex
-Target supports the execution of @code{amx-complex} instructions.
-
-@item amx_fp16
-Target supports the execution of @code{amx-fp16} instructions.
-
-@item amx_movrs
-Target supports the execution of @code{amx-movrs} instructions.
-
-@item amx_fp8
-Target supports the execution of @code{amx-fp8} instructions.
-
@item cell_hw
Test system can execute AltiVec and Cell PPU instructions.
--
2.31.1