[PATCH] x86: Add missing intrinsics [PR95483]
Tested on x86-64. gcc/ChangeLog: * config/i386/avx2intrin.h (_mm_broadcastsi128_si256): New intrinsics. (_mm_broadcastsd_pd): Ditto. * config/i386/avx512bwintrin.h (_mm512_loadu_epi16): New intrinsics. (_mm512_storeu_epi16): Ditto. (_mm512_loadu_epi8): Ditto. (_mm512_storeu_epi8): Ditto. * config/i386/avx512dqintrin.h (_mm_reduce_round_sd): New intrinsics. (_mm_mask_reduce_round_sd): Ditto. (_mm_maskz_reduce_round_sd): Ditto. (_mm_reduce_round_ss): Ditto. (_mm_mask_reduce_round_ss): Ditto. (_mm_maskz_reduce_round_ss): Ditto. (_mm512_reduce_round_pd): Ditto. (_mm512_mask_reduce_round_pd): Ditto. (_mm512_maskz_reduce_round_pd): Ditto. (_mm512_reduce_round_ps): Ditto. (_mm512_mask_reduce_round_ps): Ditto. (_mm512_maskz_reduce_round_ps): Ditto. * config/i386/avx512erintrin.h (_mm_mask_rcp28_round_sd): New intrinsics. (_mm_maskz_rcp28_round_sd): Ditto. (_mm_mask_rcp28_round_ss): Ditto. (_mm_maskz_rcp28_round_ss): Ditto. (_mm_mask_rsqrt28_round_sd): Ditto. (_mm_maskz_rsqrt28_round_sd): Ditto. (_mm_mask_rsqrt28_round_ss): Ditto. (_mm_maskz_rsqrt28_round_ss): Ditto. (_mm_mask_rcp28_sd): Ditto. (_mm_maskz_rcp28_sd): Ditto. (_mm_mask_rcp28_ss): Ditto. (_mm_maskz_rcp28_ss): Ditto. (_mm_mask_rsqrt28_sd): Ditto. (_mm_maskz_rsqrt28_sd): Ditto. (_mm_mask_rsqrt28_ss): Ditto. (_mm_maskz_rsqrt28_ss): Ditto. * config/i386/avx512fintrin.h (_mm_mask_sqrt_sd): New intrinsics. (_mm_maskz_sqrt_sd): Ditto. (_mm_mask_sqrt_ss): Ditto. (_mm_maskz_sqrt_ss): Ditto. (_mm_mask_scalef_sd): Ditto. (_mm_maskz_scalef_sd): Ditto. (_mm_mask_scalef_ss): Ditto. (_mm_maskz_scalef_ss): Ditto. (_mm_mask_cvt_roundsd_ss): Ditto. (_mm_maskz_cvt_roundsd_ss): Ditto. (_mm_mask_cvt_roundss_sd): Ditto. (_mm_maskz_cvt_roundss_sd): Ditto. (_mm_mask_cvtss_sd): Ditto. (_mm_maskz_cvtss_sd): Ditto. (_mm_mask_cvtsd_ss): Ditto. (_mm_maskz_cvtsd_ss): Ditto. (_mm512_cvtsi512_si32): Ditto. (_mm_cvtsd_i32): Ditto. (_mm_cvtss_i32): Ditto. (_mm_cvti32_sd): Ditto. (_mm_cvti32_ss): Ditto. (_mm_cvtsd_i64): Ditto. (_mm_cvtss_i64): Ditto. (_mm_cvti64_sd): Ditto. (_mm_cvti64_ss): Ditto. * config/i386/avx512vlbwintrin.h (_mm256_storeu_epi8): New intrinsics. (_mm_storeu_epi8): Ditto. (_mm256_loadu_epi16): Ditto. (_mm_loadu_epi16): Ditto. (_mm256_loadu_epi8): Ditto. (_mm_loadu_epi8): Ditto. (_mm256_storeu_epi16): Ditto. (_mm_storeu_epi16): Ditto. * config/i386/avx512vlintrin.h (_mm256_load_epi64): New intrinsics. (_mm_load_epi64): Ditto. (_mm256_load_epi32): Ditto. (_mm_load_epi32): Ditto. (_mm256_store_epi32): Ditto. (_mm_store_epi32): Ditto. (_mm256_loadu_epi64): Ditto. (_mm_loadu_epi64): Ditto. (_mm256_loadu_epi32): Ditto. (_mm_loadu_epi32): Ditto. (_mm256_mask_cvt_roundps_ph): Ditto. (_mm256_maskz_cvt_roundps_ph): Ditto. (_mm_mask_cvt_roundps_ph): Ditto. (_mm_maskz_cvt_roundps_ph): Ditto. * config/i386/avxintrin.h (_mm256_cvtsi256_si32): New intrinsics. * config/i386/emmintrin.h (_mm_loadu_si32): New intrinsics. (_mm_loadu_si16): Ditto. (_mm_storeu_si32): Ditto. (_mm_storeu_si16): Ditto. * config/i386/i386-builtin-types.def (V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT): Add new type. (V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT): Ditto. (V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT): Ditto. (V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT): Ditto. * config/i386/i386-builtin.def (__builtin_ia32_cvtsd2ss_mask_round): New builtin. (__builtin_ia32_cvtss2sd_mask_round): Ditto. (__builtin_ia32_rcp28sd_mask_round): Ditto. (__builtin_ia32_rcp28ss_mask_round): Ditto. (__builtin_ia32_rsqrt28sd_mask_round): Ditto. (__builtin_ia32_rsqrt28ss_mask_round): Ditto. (__builtin_ia32_reducepd512_mask_round): Ditto. (__builtin_ia32_reduceps512_mask_round): Ditto. (__builtin_ia32_reducesd_mask_round): Ditto. (__builtin_ia32_reducess_mask_round): Ditto. * config/i386/i386-expand.c (ix86_expand_round_builtin): Expand round builtin for new type. (V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT) (V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT) (V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT) (V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT) * config/i386/mmintrin.h () Define datatype __m32 and __m16. Define datatype __m32_u and __m16_u. * config/i386/sse.md: Adjust pattern. (reducep): Adjust.
[PATCH] Fix fma test case [PR97018]
These tests are written for 256 bit vector. For -march=cascadelake, vector size changed to 512 bit. It doubles the number of fma instruction and test fail. Fix is to explicitly disable 512 bit vector by passing additional option -mno-avx512f. Tested on x86-64. gcc/testsuite/ChangeLog: PR target/97018 * gcc.target/i386/l_fma_double_1.c: Add option -mno-avx512f. * gcc.target/i386/l_fma_double_2.c: Likewise. * gcc.target/i386/l_fma_double_3.c: Likewise. * gcc.target/i386/l_fma_double_4.c: Likewise. * gcc.target/i386/l_fma_double_5.c: Likewise. * gcc.target/i386/l_fma_double_6.c: Likewise. * gcc.target/i386/l_fma_float_1.c: Likewise. * gcc.target/i386/l_fma_float_2.c: Likewise. * gcc.target/i386/l_fma_float_3.c: Likewise. * gcc.target/i386/l_fma_float_4.c: Likewise. * gcc.target/i386/l_fma_float_5.c: Likewise. * gcc.target/i386/l_fma_float_6.c: Likewise. --- gcc/testsuite/gcc.target/i386/l_fma_double_1.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_double_2.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_double_3.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_double_4.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_double_5.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_double_6.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_float_1.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_float_2.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_float_3.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_float_4.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_float_5.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_float_6.c | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c index 5089874faa5..3413beba960 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */ +/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4 -mno-avx512f" } */ /* Disabling epilogues until we find a better way to deal with scans. */ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c index e4696204299..1b9b7988850 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */ +/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4 -mno-avx512f" } */ /* Disabling epilogues until we find a better way to deal with scans. */ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c index df986d0a633..0fbe9ab9569 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */ +/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4 -mno-avx512f" } */ /* Disabling epilogues until we find a better way to deal with scans. */ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c index ae065590f62..c9eba09fea3 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */ +/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4 -mno-avx512f" } */ /* Disabling epilogues until we find a better way to deal with scans. */ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c index 5d31abaa5a7..3217d2683f6 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */ +/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4 -mno-avx512f" } */ /* Disabling epilogues until we find a better way to deal with scans. */ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c index ff857fb02f1..a22b4e2e37a 100644 ---
[PATCH] Preliminary work on support for 128bits integers
From: Arnaud Charlet * fe.h, opt.ads (Enable_128bit_Types): New. * stand.ads (Standard_Long_Long_Long_Integer, S_Long_Long_Long_Integer): New. --- gcc/ada/fe.h | 1 + gcc/ada/opt.ads | 7 +++ gcc/ada/stand.ads | 4 3 files changed, 12 insertions(+) diff --git a/gcc/ada/fe.h b/gcc/ada/fe.h index 8ad16c2b1c9..520301e4c3e 100644 --- a/gcc/ada/fe.h +++ b/gcc/ada/fe.h @@ -192,6 +192,7 @@ extern Boolean In_Extended_Main_Code_Unit (Entity_Id); #define Ada_Versionopt__ada_version #define Back_End_Inlining opt__back_end_inlining #define Debug_Generated_Code opt__debug_generated_code +#define Enable_128bit_Typesopt__enable_128bit_types #define Exception_Extra_Info opt__exception_extra_info #define Exception_Locations_Suppressed opt__exception_locations_suppressed #define Exception_Mechanismopt__exception_mechanism diff --git a/gcc/ada/opt.ads b/gcc/ada/opt.ads index c982f83b9e4..885a6fb9497 100644 --- a/gcc/ada/opt.ads +++ b/gcc/ada/opt.ads @@ -525,6 +525,13 @@ package Opt is -- dataflow analysis, which is not available. This behavior parallels that -- of the old ABE mechanism. + Enable_128bit_Types : Boolean := False; + -- GNAT + -- Set to True to enable the support for 128-bit types in the compiler. + -- The prerequisite is a 64-bit target that supports 128-bit computation. + + -- WARNING: There is a matching C declaration of this variable in fe.h + Error_Msg_Line_Length : Nat := 0; -- GNAT -- Records the error message line length limit. If this is set to zero, diff --git a/gcc/ada/stand.ads b/gcc/ada/stand.ads index f3f7eb512d5..57b4d55387e 100644 --- a/gcc/ada/stand.ads +++ b/gcc/ada/stand.ads @@ -61,6 +61,7 @@ package Stand is S_Integer, S_Long_Integer, S_Long_Long_Integer, + S_Long_Long_Long_Integer, S_Natural, S_Positive, @@ -283,6 +284,9 @@ package Stand is Standard_Long_Integer: Entity_Id renames SE (S_Long_Integer); Standard_Long_Long_Integer : Entity_Id renames SE (S_Long_Long_Integer); + Standard_Long_Long_Long_Integer : Entity_Id renames + SE (S_Long_Long_Long_Integer); + Standard_Op_Add : Entity_Id renames SE (S_Op_Add); Standard_Op_And : Entity_Id renames SE (S_Op_And); Standard_Op_Concat : Entity_Id renames SE (S_Op_Concat); -- 2.26.2
[PATCH] Add testcase to show kernel issue got fixed by pr95237 [PR96192]
This test case, extracted from PR 95645, was failing because alignment of local long long variable got lowered from 8 bytes to 4 bytes in adjust alignment pass, which triggered assert failure. This test case passes now because PR 95237 fix only allows lowering of alignment of local variables in the front end. As a result, alignment of local long long variable no longer gets lowered in adjust alignment pass. gcc/testsuite/ChangeLog: PR target/96192 * c-c++-common/pr96192-1.c: New test. --- gcc/testsuite/c-c++-common/pr96192-1.c | 16 1 file changed, 16 insertions(+) create mode 100644 gcc/testsuite/c-c++-common/pr96192-1.c diff --git a/gcc/testsuite/c-c++-common/pr96192-1.c b/gcc/testsuite/c-c++-common/pr96192-1.c new file mode 100644 index 000..4d9be06a045 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr96192-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-mpreferred-stack-boundary=2 -Os -w" } */ + +int a; + +long long +b (void) +{ +} + +void +c (void) +{ + if (b()) +a = 1; +} -- 2.25.4
[PATCH] Add TARGET_UPDATE_DECL_ALIGNMENT [PR95237]
From: Sunil K Pandey Default for this hook is NOP. For x86, in 32 bit mode, this hook sets alignment of long long on stack to 32 bits if preferred stack boundary is 32 bits. - This patch fixes gcc.target/i386/pr69454-2.c gcc.target/i386/stackalign/longlong-1.c - Regression test on x86-64, no new fail introduced. Tested on x86-64. gcc/ChangeLog: PR target/95237 * config/i386/i386.c (ix86_update_decl_alignment): New function. (TARGET_UPDATE_DECL_ALIGNMENT): Define. * doc/tm.texi: Regenerate. * doc/tm.texi.in (TARGET_UPDATE_DECL_ALIGNMENT): New hook. * stor-layout.c (do_type_align): Call target hook to update decl alignment. * target.def (update_decl_alignment): New hook. gcc/testsuite/ChangeLog: PR target/95237 * gcc.target/i386/pr95237-1.c: New test. * gcc.target/i386/pr95237-2.c: New test. * gcc.target/i386/pr95237-3.c: New test. * gcc.target/i386/pr95237-4.c: New test. * gcc.target/i386/pr95237-5.c: New test. --- gcc/config/i386/i386.c| 22 ++ gcc/doc/tm.texi | 5 + gcc/doc/tm.texi.in| 2 ++ gcc/stor-layout.c | 2 ++ gcc/target.def| 7 +++ gcc/testsuite/gcc.target/i386/pr95237-1.c | 16 gcc/testsuite/gcc.target/i386/pr95237-2.c | 10 ++ gcc/testsuite/gcc.target/i386/pr95237-3.c | 10 ++ gcc/testsuite/gcc.target/i386/pr95237-4.c | 10 ++ gcc/testsuite/gcc.target/i386/pr95237-5.c | 16 10 files changed, 100 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr95237-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95237-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95237-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95237-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95237-5.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 37aaa49996d..bcd9abd5303 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -16917,6 +16917,25 @@ ix86_minimum_alignment (tree exp, machine_mode mode, return align; } + +/* Implement TARGET_UPDATE_DECL_ALIGNMENT. */ + +static void +ix86_update_decl_alignment (tree decl) +{ + tree type = TREE_TYPE (decl); + + if (cfun != NULL + && !TARGET_64BIT + && DECL_ALIGN (decl) == 64 + && ix86_preferred_stack_boundary < 64 + && !is_global_var (decl) + && (DECL_MODE (decl) == E_DImode + || (type && TYPE_MODE (type) == E_DImode)) + && (!type || !TYPE_USER_ALIGN (type)) + && (!decl || !DECL_USER_ALIGN (decl))) +SET_DECL_ALIGN (decl, 32); +} /* Find a location for the static chain incoming to a nested function. This is a register, unless all free registers are used by arguments. */ @@ -23519,6 +23538,9 @@ ix86_run_selftests (void) #undef TARGET_CAN_CHANGE_MODE_CLASS #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class +#undef TARGET_UPDATE_DECL_ALIGNMENT +#define TARGET_UPDATE_DECL_ALIGNMENT ix86_update_decl_alignment + #undef TARGET_STATIC_RTX_ALIGNMENT #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment #undef TARGET_CONSTANT_ALIGNMENT diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 6e7d9dc54a9..c11ef5dca89 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -1086,6 +1086,11 @@ On 32-bit ELF the largest supported section alignment in bits is @samp{(0x8000 * 8)}, but this is not representable on 32-bit hosts. @end defmac +@deftypefn {Target Hook} void TARGET_UPDATE_DECL_ALIGNMENT (tree @var{decl}) +Define this hook to update alignment of decl +@samp{(@var{decl}}. +@end deftypefn + @deftypefn {Target Hook} HOST_WIDE_INT TARGET_STATIC_RTX_ALIGNMENT (machine_mode @var{mode}) This hook returns the preferred alignment in bits for a statically-allocated rtx, such as a constant pool entry. @var{mode} diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 3be984bbd5c..618acd73a1e 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -1036,6 +1036,8 @@ On 32-bit ELF the largest supported section alignment in bits is @samp{(0x8000 * 8)}, but this is not representable on 32-bit hosts. @end defmac +@hook TARGET_UPDATE_DECL_ALIGNMENT + @hook TARGET_STATIC_RTX_ALIGNMENT @defmac DATA_ALIGNMENT (@var{type}, @var{basic-align}) diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c index bde6fa22b58..0687a68ba29 100644 --- a/gcc/stor-layout.c +++ b/gcc/stor-layout.c @@ -605,6 +605,8 @@ do_type_align (tree type, tree decl) if (TYPE_ALIGN (type) > DECL_ALIGN (decl)) { SET_DECL_ALIGN (decl, TYPE_ALIGN (type)); + /* Update decl alignment */ + targetm.update_decl_alignment (decl); if (TREE_CODE (decl) == FIELD_DECL) DECL_USER_ALIGN (decl) = TYPE_USER_ALIGN (type); } diff --git a/gcc/target.def b/gcc/target.def