Hi all, This patch is part of a series adding support for Armv8.6-A features. It enables options including -march=armv8.6-a, +i8mm and +bf16. The +i8mm and +bf16 features are optional for Armv8.2-a and onward. Documents are at https://developer.arm.com/docs/ddi0596/latest
Regtested for arm-none-linux-gnueabi-armv8-a. Please help to check if ready for trunk. Many thanks! Dennis gcc/ChangeLog: 2019-11-15 Dennis Zhang <dennis.zh...@arm.com> * config/arm/arm-c.c (arm_cpu_builtins): Define __ARM_FEATURE_MATMUL_INT8, __ARM_FEATURE_BF16_VECTOR_ARITHMETIC, __ARM_FEATURE_BF16_SCALAR_ARITHMETIC, and __ARM_BF16_FORMAT_ALTERNATIVE when enabled. * config/arm/arm-cpus.in (armv8_6, i8mm, bf16): New features. * config/arm/arm-tables.opt: Regenerated. * config/arm/arm.c (arm_option_reconfigure_globals): Init arm_arch_i8mm and arm_arch_bf16 to enable features. * config/arm/arm.h (TARGET_I8MM): New macro. (TARGET_BF16_FP, TARGET_BF16_SIMD): Likewise. * config/arm/t-aprofile: Add matching rules for -march=armv8.6-a. * config/arm/t-arm-elf (all_v8_archs): Add armv8.6-a. * config/arm/t-multilib: Add matching rules for -march=armv8.6-a. (v8_6_a_simd_variants): New. (v8_*_a_simd_variants): Add i8mm and bf16. * doc/invoke.texi (armv8.6-a, i8mm, bf16): Document new options. gcc/testsuite/ChangeLog: 2019-11-15 Dennis Zhang <dennis.zh...@arm.com> * gcc.target/arm/multilib.exp: Add combination tests for armv8.6-a.
diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c index c4485ce7af1..b47e64c2151 100644 --- a/gcc/config/arm/arm-c.c +++ b/gcc/config/arm/arm-c.c @@ -225,6 +225,14 @@ arm_cpu_builtins (struct cpp_reader* pfile) builtin_define_with_int_value ("__ARM_FEATURE_COPROC", coproc_level); } + + def_or_undef_macro (pfile, "__ARM_FEATURE_MATMUL_INT8", TARGET_I8MM); + def_or_undef_macro (pfile, "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", + TARGET_BF16_FP); + def_or_undef_macro (pfile, "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", + TARGET_BF16_SIMD); + def_or_undef_macro (pfile, "__ARM_BF16_FORMAT_ALTERNATIVE", + TARGET_BF16_FP || TARGET_BF16_SIMD); } void diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index 50379a0a10a..d373406649c 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -123,6 +123,9 @@ define feature armv8_4 # Architecture rel 8.5. define feature armv8_5 +# Architecture rel 8.6. +define feature armv8_6 + # M-Profile security extensions. define feature cmse @@ -191,6 +194,12 @@ define feature sb # v8-A architectures, added by default from v8.5-A define feature predres +# 8-bit Integer Matrix Multiply extension. Optional from v8.2-A. +define feature i8mm + +# Brain half-precision floating-point extension. Optional from v8.2-A. +define feature bf16 + # Feature groups. Conventionally all (or mostly) upper case. # ALL_FPU lists all the feature bits associated with the floating-point # unit; these will all be removed if the floating-point unit is disabled @@ -213,7 +222,7 @@ define fgroup ALL_CRYPTO crypto # strip off 32 D-registers, but does not remove support for # double-precision FP. define fgroup ALL_SIMD_INTERNAL fp_d32 neon ALL_CRYPTO -define fgroup ALL_SIMD ALL_SIMD_INTERNAL dotprod fp16fml +define fgroup ALL_SIMD ALL_SIMD_INTERNAL dotprod fp16fml i8mm # List of all FPU bits to strip out if -mfpu is used to override the # default. fp16 is deliberately missing from this list. @@ -253,6 +262,7 @@ define fgroup ARMv8_2a ARMv8_1a armv8_2 define fgroup ARMv8_3a ARMv8_2a armv8_3 define fgroup ARMv8_4a ARMv8_3a armv8_4 define fgroup ARMv8_5a ARMv8_4a armv8_5 sb predres +define fgroup ARMv8_6a ARMv8_5a armv8_6 define fgroup ARMv8m_base ARMv6m armv8 cmse tdiv define fgroup ARMv8m_main ARMv7m armv8 cmse define fgroup ARMv8r ARMv8a @@ -560,6 +570,8 @@ begin arch armv8.2-a option dotprod add FP_ARMv8 DOTPROD option sb add sb option predres add predres + option i8mm add i8mm FP_ARMv8 NEON + option bf16 add bf16 FP_ARMv8 NEON end arch armv8.2-a begin arch armv8.3-a @@ -577,6 +589,8 @@ begin arch armv8.3-a option dotprod add FP_ARMv8 DOTPROD option sb add sb option predres add predres + option i8mm add i8mm FP_ARMv8 NEON + option bf16 add bf16 FP_ARMv8 NEON end arch armv8.3-a begin arch armv8.4-a @@ -592,6 +606,8 @@ begin arch armv8.4-a option nofp remove ALL_FP option sb add sb option predres add predres + option i8mm add i8mm FP_ARMv8 NEON + option bf16 add bf16 FP_ARMv8 NEON end arch armv8.4-a begin arch armv8.5-a @@ -605,8 +621,25 @@ begin arch armv8.5-a option crypto add FP_ARMv8 CRYPTO DOTPROD option nocrypto remove ALL_CRYPTO option nofp remove ALL_FP + option i8mm add i8mm FP_ARMv8 NEON + option bf16 add bf16 FP_ARMv8 NEON end arch armv8.5-a +begin arch armv8.6-a + tune for cortex-a53 + tune flags CO_PROC + base 8A + profile A + isa ARMv8_6a + option simd add FP_ARMv8 DOTPROD + option fp16 add fp16 fp16fml FP_ARMv8 DOTPROD + option crypto add FP_ARMv8 CRYPTO DOTPROD + option nocrypto remove ALL_CRYPTO + option nofp remove ALL_FP + option i8mm add i8mm FP_ARMv8 NEON + option bf16 add bf16 FP_ARMv8 NEON +end arch armv8.6-a + begin arch armv8-m.base tune for cortex-m23 base 8M_BASE diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index aeb5b3fbf62..e509081678e 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -344,19 +344,22 @@ EnumValue Enum(arm_arch) String(armv8.5-a) Value(25) EnumValue -Enum(arm_arch) String(armv8-m.base) Value(26) +Enum(arm_arch) String(armv8.6-a) Value(26) EnumValue -Enum(arm_arch) String(armv8-m.main) Value(27) +Enum(arm_arch) String(armv8-m.base) Value(27) EnumValue -Enum(arm_arch) String(armv8-r) Value(28) +Enum(arm_arch) String(armv8-m.main) Value(28) EnumValue -Enum(arm_arch) String(iwmmxt) Value(29) +Enum(arm_arch) String(armv8-r) Value(29) EnumValue -Enum(arm_arch) String(iwmmxt2) Value(30) +Enum(arm_arch) String(iwmmxt) Value(30) + +EnumValue +Enum(arm_arch) String(iwmmxt2) Value(31) Enum Name(arm_fpu) Type(enum fpu_type) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 1fd30c238cd..290db1129f2 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -996,6 +996,12 @@ int arm_arch_cmse = 0; /* Nonzero if the core has a very small, high-latency, multiply unit. */ int arm_m_profile_small_mul = 0; +/* Nonzero if chip supports the AdvSIMD I8MM instructions. */ +int arm_arch_i8mm = 0; + +/* Nonzero if chip supports the BFloat16 instructions. */ +int arm_arch_bf16 = 0; + /* The condition codes of the ARM, and the inverse function. */ static const char * const arm_condition_codes[] = { @@ -3649,8 +3655,11 @@ arm_option_reconfigure_globals (void) arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv); arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32); arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse); - arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16); arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae); + arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm); + arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16); + + arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16); if (arm_fp16_inst) { if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 3a1ba8b9a57..6c8ff6637d2 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -246,6 +246,15 @@ emission of floating point pcs attributes. */ /* FPU supports the AdvSIMD FP16 instructions for ARMv8.2 and later. */ #define TARGET_NEON_FP16INST (TARGET_VFP_FP16INST && TARGET_NEON_RDMA) +/* FPU supports 8-bit Integer Matrix Multiply (I8MM) AdvSIMD extensions. */ +#define TARGET_I8MM (TARGET_NEON && arm_arch8_2 && arm_arch_i8mm) + +/* FPU supports Brain half-precision floating-point (BFloat16) extension. */ +#define TARGET_BF16_FP (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP5 \ + && arm_arch8_2 && arm_arch_bf16) +#define TARGET_BF16_SIMD (TARGET_NEON && TARGET_VFP5 \ + && arm_arch8_2 && arm_arch_bf16) + /* Q-bit is present. */ #define TARGET_ARM_QBIT \ (TARGET_32BIT && arm_arch5te && (arm_arch_notm || arm_arch7)) @@ -517,6 +526,12 @@ extern int arm_arch_crc; /* Nonzero if chip supports the ARMv8-M Security Extensions. */ extern int arm_arch_cmse; +/* Nonzero if chip supports the ARMv8 I8MM instructions. */ +extern int arm_arch_i8mm; + +/* Nonzero if chip supports the BFloat16 instructions. */ +extern int arm_arch_bf16; + #ifndef TARGET_DEFAULT #define TARGET_DEFAULT (MASK_APCS_FRAME) #endif diff --git a/gcc/config/arm/t-aprofile b/gcc/config/arm/t-aprofile index 1556f1b23e3..e5f3c3b42d6 100644 --- a/gcc/config/arm/t-aprofile +++ b/gcc/config/arm/t-aprofile @@ -122,6 +122,13 @@ MULTILIB_MATCHES += march?armv8-a=march?armv8.5-a MULTILIB_MATCHES += $(foreach ARCH, $(v8_5_a_simd_variants), \ march?armv8-a+simd=march?armv8.5-a$(ARCH)) +# Baseline v8.6-a: map down to baseline v8-a +MULTILIB_MATCHES += march?armv8-a=march?armv8.6-a + +# Map all v8.6-a SIMD variants to v8-a+simd +MULTILIB_MATCHES += $(foreach ARCH, $(v8_6_a_simd_variants), \ + march?armv8-a+simd=march?armv8.6-a$(ARCH)) + # Use Thumb libraries for everything. MULTILIB_REUSE += mthumb/march.armv7-a/mfloat-abi.soft=marm/march.armv7-a/mfloat-abi.soft diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf index 8911d489f14..970cc43a9e4 100644 --- a/gcc/config/arm/t-arm-elf +++ b/gcc/config/arm/t-arm-elf @@ -47,7 +47,7 @@ all_early_arch := armv5tej armv6 armv6j armv6k armv6z armv6kz \ all_v7_a_r := armv7-a armv7ve armv7-r all_v8_archs := armv8-a armv8-a+crc armv8.1-a armv8.2-a armv8.3-a armv8.4-a \ - armv8.5-a + armv8.5-a armv8.6-a # No floating point variants, require thumb1 softfp all_nofp_t := armv6-m armv6s-m armv8-m.base diff --git a/gcc/config/arm/t-multilib b/gcc/config/arm/t-multilib index dc97c8f09fb..fcf3b0b46e3 100644 --- a/gcc/config/arm/t-multilib +++ b/gcc/config/arm/t-multilib @@ -73,9 +73,10 @@ v7ve_vfpv4_simd_variants := +simd v8_a_nosimd_variants := +crc v8_a_simd_variants := $(call all_feat_combs, simd crypto) v8_1_a_simd_variants := $(call all_feat_combs, simd crypto) -v8_2_a_simd_variants := $(call all_feat_combs, simd fp16 fp16fml crypto dotprod) -v8_4_a_simd_variants := $(call all_feat_combs, simd fp16 crypto) -v8_5_a_simd_variants := $(call all_feat_combs, simd fp16 crypto) +v8_2_a_simd_variants := $(call all_feat_combs, simd fp16 fp16fml crypto dotprod i8mm bf16) +v8_4_a_simd_variants := $(call all_feat_combs, simd fp16 crypto i8mm bf16) +v8_5_a_simd_variants := $(call all_feat_combs, simd fp16 crypto i8mm bf16) +v8_6_a_simd_variants := $(call all_feat_combs, simd fp16 crypto i8mm bf16) v8_r_nosimd_variants := +crc ifneq (,$(HAS_APROFILE)) @@ -185,6 +186,13 @@ MULTILIB_MATCHES += march?armv7=march?armv8.5-a MULTILIB_MATCHES += $(foreach ARCH, $(v8_5_a_simd_variants), \ march?armv7+fp=march?armv8.5-a$(ARCH)) +# Baseline v8.6-a: map down to baseline v8-a +MULTILIB_MATCHES += march?armv7=march?armv8.6-a + +# Map all v8.6-a SIMD variants +MULTILIB_MATCHES += $(foreach ARCH, $(v8_6_a_simd_variants), \ + march?armv7+fp=march?armv8.6-a$(ARCH)) + # Use Thumb libraries for everything. MULTILIB_REUSE += mthumb/march.armv7/mfloat-abi.soft=marm/march.armv7/mfloat-abi.soft diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 2897982705e..7a31bf0cf27 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -17423,6 +17423,7 @@ Permissible names are: @samp{armv8-a}, @samp{armv8.1-a}, @samp{armv8.2-a}, @samp{armv8.3-a}, @samp{armv8.4-a}, @samp{armv8.5-a}, +@samp{armv8.6-a}, @samp{armv7-r}, @samp{armv8-r}, @samp{armv6-m}, @samp{armv6s-m}, @@ -17666,6 +17667,14 @@ Speculation Barrier Instruction. @item +predres Execution and Data Prediction Restriction Instructions. + +@item +i8mm +8-bit Integer Matrix Multiply instructions. +This also enables Advanced SIMD instructions. + +@item +bf16 +Brain half-precision floating-point instructions. +This also enables Advanced SIMD and floating-point instructions. @end table @item armv8.4-a @@ -17695,6 +17704,14 @@ Speculation Barrier Instruction. @item +predres Execution and Data Prediction Restriction Instructions. + +@item +i8mm +8-bit Integer Matrix Multiply instructions. +This also enables Advanced SIMD instructions. + +@item +bf16 +Brain half-precision floating-point instructions. +This also enables Advanced SIMD and floating-point instructions. @end table @item armv8.5-a @@ -17718,6 +17735,45 @@ Disable the cryptographic extension. @item +nofp Disable the floating-point, Advanced SIMD and cryptographic instructions. + +@item +i8mm +8-bit Integer Matrix Multiply instructions. +This also enables Advanced SIMD instructions. + +@item +bf16 +Brain half-precision floating-point instructions. +This also enables Advanced SIMD and floating-point instructions. +@end table + +@item armv8.6-a +@table @samp +@item +fp16 +The half-precision floating-point data processing instructions. +This also enables the Advanced SIMD and floating-point instructions as well +as the Dot Product extension and the half-precision floating-point fmla +extension. + +@item +simd +The ARMv8.3-A Advanced SIMD and floating-point instructions as well as the +Dot Product extension. + +@item +crypto +The cryptographic instructions. This also enables the Advanced SIMD and +floating-point instructions as well as the Dot Product extension. + +@item +nocrypto +Disable the cryptographic extension. + +@item +nofp +Disable the floating-point, Advanced SIMD and cryptographic instructions. + +@item +i8mm +8-bit Integer Matrix Multiply instructions. +This also enables Advanced SIMD instructions. + +@item +bf16 +Brain half-precision floating-point instructions. +This also enables Advanced SIMD and floating-point instructions. @end table @item armv7-r diff --git a/gcc/testsuite/gcc.target/arm/multilib.exp b/gcc/testsuite/gcc.target/arm/multilib.exp index dcea829965e..7807485352f 100644 --- a/gcc/testsuite/gcc.target/arm/multilib.exp +++ b/gcc/testsuite/gcc.target/arm/multilib.exp @@ -126,6 +126,14 @@ if {[multilib_config "aprofile"] } { {-march=armv8.5-a+simd+fp16 -mfloat-abi=softfp} "thumb/v8-a+simd/softfp" {-march=armv8.5-a+simd+fp16+nofp -mfloat-abi=softfp} "thumb/v8-a/nofp" {-march=armv8.5-a+simd+nofp+fp16 -mfloat-abi=softfp} "thumb/v8-a+simd/softfp" + {-march=armv8.6-a+crypto -mfloat-abi=soft} "thumb/v8-a/nofp" + {-march=armv8.6-a+simd+crypto -mfloat-abi=softfp} "thumb/v8-a+simd/softfp" + {-march=armv8.6-a+simd+crypto+nofp -mfloat-abi=softfp} "thumb/v8-a/nofp" + {-march=armv8.6-a+simd+nofp+crypto -mfloat-abi=softfp} "thumb/v8-a+simd/softfp" + {-march=armv8.6-a+fp16 -mfloat-abi=soft} "thumb/v8-a/nofp" + {-march=armv8.6-a+simd+fp16 -mfloat-abi=softfp} "thumb/v8-a+simd/softfp" + {-march=armv8.6-a+simd+fp16+nofp -mfloat-abi=softfp} "thumb/v8-a/nofp" + {-march=armv8.6-a+simd+nofp+fp16 -mfloat-abi=softfp} "thumb/v8-a+simd/softfp" {-mcpu=cortex-a53+crypto -mfloat-abi=hard} "thumb/v8-a+simd/hard" {-mcpu=cortex-a53+nofp -mfloat-abi=softfp} "thumb/v8-a/nofp" {-march=armv8-a+crc -mfloat-abi=hard -mfpu=vfp} "thumb/v8-a+simd/hard"