Hello,
I'd like to have tighter control over the individual situations that
-mrecip handles, and I think the user might appreciate this too. Hence
I've introduced four new target options -mrecip-div, -mrecip-sqrt,
-mrecip-vec-div and -mrecip-vec-sqrt. I've redefined -mrecip to be
equivalent to using those four options together. In addition one can
selectively disable some part via -mrecip -mno-recip-vec for instance.
I was split mind about the approach, I could also have done like rs6000
(-mrecip=<csv list>) with the disadvantage of having to write an own
parser as our opt framework can't deal with comma separated lists of
masks. With the approach I chose our opt framework gets most of the work
done.
I've decided to not use four new bits from target_flags, and instead
created a new mask (recip_mask). Four bits would have fit in target bits
right now, but in the future we might want to add more specialization,
like modes for which the reciprocals are active.
What do you think?
Ciao,
Michael.
* i386/i386.opt (recip_mask_explicit, x_recip_mask_explicit):
New variable and cl_target member.
(mrecip-div, mrecip-sqrt, mrecip-vec-div, mrecip-vec-sqrt): New
options.
* common/config/i386/i386-common.c (ix86_handle_option): Handle
new options.
* i386/i386.md (divsf3): Check OPTION_RECIP_DIV.
(sqrt<mode>2): Check OPTION_RECIP_SQRT.
* i386/sse.md (div<mode>3): Check OPTION_RECIP_VEC_DIV.
(sqrt<mode>2): Check OPTION_RECIP_VEC_SQRT.
* i386/i386.c (ix86_option_override_internal): Set recip_mask
for -mrecip.
(ix86_function_specific_save): Save recip_mask_explicit.
(ix86_function_specific_restore): Restore recip_mask_explicit.
* doc/invoke.texi (ix86 Options): Document the new options.
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 178101)
+++ config/i386/i386.md (working copy)
@@ -7050,7 +7050,9 @@ (define_expand "divsf3"
"(TARGET_80387 && X87_ENABLE_ARITH (SFmode))
|| TARGET_SSE_MATH"
{
- if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
+ if (TARGET_SSE_MATH
+ && OPTION_RECIP_DIV
+ && optimize_insn_for_speed_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
@@ -13422,7 +13424,9 @@ (define_expand "sqrt<mode>2"
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
{
if (<MODE>mode == SFmode
- && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p
(cfun)
+ && TARGET_SSE_MATH
+ && OPTION_RECIP_SQRT
+ && !optimize_function_for_size_p (cfun)
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 178101)
+++ config/i386/sse.md (working copy)
@@ -772,7 +772,9 @@ (define_expand "div<mode>3"
{
ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
- if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+ if (TARGET_SSE_MATH
+ && OPTION_RECIP_VEC_DIV
+ && !optimize_insn_for_size_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
@@ -850,7 +852,9 @@ (define_expand "sqrt<mode>2"
(sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
"TARGET_SSE"
{
- if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+ if (TARGET_SSE_MATH
+ && OPTION_RECIP_VEC_SQRT
+ && !optimize_insn_for_size_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
Index: config/i386/i386.opt
===================================================================
--- config/i386/i386.opt (revision 178101)
+++ config/i386/i386.opt (working copy)
@@ -31,6 +31,9 @@ HOST_WIDE_INT ix86_isa_flags = TARGET_64
Variable
HOST_WIDE_INT ix86_isa_flags_explicit
+Variable
+int recip_mask_explicit
+
;; Definitions to add to the cl_target_option structure
;; -march= processor
TargetSave
@@ -56,6 +59,9 @@ HOST_WIDE_INT x_ix86_isa_flags_explicit
TargetSave
int ix86_target_flags_explicit
+TargetSave
+int x_recip_mask_explicit
+
;; whether -mtune was not specified
TargetSave
unsigned char tune_defaulted
@@ -373,6 +379,22 @@ mrecip
Target Report Mask(RECIP) Save
Generate reciprocals instead of divss and sqrtss.
+mrecip-div
+Target Mask(RECIP_DIV) Var(recip_mask) Save
+Generate reciprocal estimations instead of scalar divisions.
+
+mrecip-sqrt
+Target Mask(RECIP_SQRT) Var(recip_mask) Save
+Generate reciprocal estimations instead of scalar sqrt.
+
+mrecip-vec-div
+Target Mask(RECIP_VEC_DIV) Var(recip_mask) Save
+Generate reciprocal estimations instead of vector divisions.
+
+mrecip-vec-sqrt
+Target Mask(RECIP_VEC_SQRT) Var(recip_mask) Save
+Generate reciprocal estimations instead of vector sqrt.
+
mcld
Target Report Mask(CLD) Save
Generate cld instruction in the function prologue.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 178101)
+++ config/i386/i386.c (working copy)
@@ -3806,6 +3806,19 @@ ix86_option_override_internal (bool main
target_flags &= ~MASK_VZEROUPPER;
}
+ if (TARGET_RECIP)
+ {
+ recip_mask |= (OPTION_MASK_RECIP_DIV | OPTION_MASK_RECIP_SQRT
+ | OPTION_MASK_RECIP_VEC_DIV
+ | OPTION_MASK_RECIP_VEC_SQRT) & ~recip_mask_explicit;
+ }
+ else if (target_flags_explicit & MASK_RECIP)
+ {
+ recip_mask &= ~((OPTION_MASK_RECIP_DIV | OPTION_MASK_RECIP_SQRT
+ | OPTION_MASK_RECIP_VEC_DIV
+ | OPTION_MASK_RECIP_VEC_SQRT) & ~recip_mask_explicit);
+ }
+
/* Save the initial options in case the user does function specific
options. */
if (main_args_p)
@@ -3938,6 +3951,7 @@ ix86_function_specific_save (struct cl_t
ptr->arch_specified = ix86_arch_specified;
ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
ptr->ix86_target_flags_explicit = target_flags_explicit;
+ ptr->x_recip_mask_explicit = recip_mask_explicit;
/* The fields are char but the variables are not; make sure the
values fit in the fields. */
@@ -3965,6 +3979,7 @@ ix86_function_specific_restore (struct c
ix86_arch_specified = ptr->arch_specified;
ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
target_flags_explicit = ptr->ix86_target_flags_explicit;
+ recip_mask_explicit = ptr->x_recip_mask_explicit;
/* Recreate the arch feature tests if the arch changed */
if (old_arch != ix86_arch)
Index: common/config/i386/i386-common.c
===================================================================
--- common/config/i386/i386-common.c (revision 178101)
+++ common/config/i386/i386-common.c (working copy)
@@ -553,6 +553,58 @@ ix86_handle_option (struct gcc_options *
}
return true;
+ case OPT_mrecip_div:
+ if (value)
+ {
+ opts->x_recip_mask |= OPTION_MASK_RECIP_DIV;
+ opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_DIV;
+ }
+ else
+ {
+ opts->x_recip_mask &= ~OPTION_MASK_RECIP_DIV;
+ opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_DIV;
+ }
+ return true;
+
+ case OPT_mrecip_sqrt:
+ if (value)
+ {
+ opts->x_recip_mask |= OPTION_MASK_RECIP_SQRT;
+ opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_SQRT;
+ }
+ else
+ {
+ opts->x_recip_mask &= ~OPTION_MASK_RECIP_SQRT;
+ opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_SQRT;
+ }
+ return true;
+
+ case OPT_mrecip_vec_div:
+ if (value)
+ {
+ opts->x_recip_mask |= OPTION_MASK_RECIP_VEC_DIV;
+ opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_VEC_DIV;
+ }
+ else
+ {
+ opts->x_recip_mask &= ~OPTION_MASK_RECIP_VEC_DIV;
+ opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_VEC_DIV;
+ }
+ return true;
+
+ case OPT_mrecip_vec_sqrt:
+ if (value)
+ {
+ opts->x_recip_mask |= OPTION_MASK_RECIP_VEC_SQRT;
+ opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_VEC_SQRT;
+ }
+ else
+ {
+ opts->x_recip_mask &= ~OPTION_MASK_RECIP_VEC_SQRT;
+ opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_VEC_SQRT;
+ }
+ return true;
+
/* Comes from final.c -- no real reason to change it. */
#define MAX_CODE_ALIGN 16
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi (revision 178101)
+++ doc/invoke.texi (working copy)
@@ -604,7 +604,9 @@ Objective-C and Objective-C++ Dialects}.
-mno-wide-multiply -mrtd -malign-double @gol
-mpreferred-stack-boundary=@var{num} @gol
-mincoming-stack-boundary=@var{num} @gol
--mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip -mvzeroupper @gol
+-mcld -mcx16 -msahf -mmovbe -mcrc32 @gol
+-mrecip -mrecip-div -mrecip-sqrt -mrecip-vec-div -mrecip-vec-sqrt @gol
+-mvzeroupper @gol
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
-mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
-msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol
@@ -12804,6 +12806,31 @@ Note that GCC implements 1.0f/sqrtf(x) i
already with @option{-ffast-math} (or the above option combination), and
doesn't need @option{-mrecip}.
+@item -mrecip-div
+@itemx -mrecip-sqrt
+@itemx -mrecip-vec-div
+@itemx -mrecip-vec-sqrt
+@itemx -mno-recip-div
+@itemx -mno-recip-sqrt
+@itemx -mno-recip-vec-div
+@itemx -mno-recip-vec-sqrt
+@opindex mrecip-div
+@opindex mrecip-sqrt
+@opindex mrecip-vec-div
+@opindex mrecip-vec-sqrt
+@opindex mno-recip-div
+@opindex mno-recip-sqrt
+@opindex mno-recip-vec-div
+@opindex mno-recip-vec-sqrt
+These options control the use of reciprocal estimate instructions
+in detail. @option{-mrecip-div} controls scalar division,
+@option{-mrecip-sqrt} scalar square root, @option{-mrecip-vec-div}
+vectorized division and @option{-mrecip-vec-sqrt} vectorized
+square root. @option{-mrecip} actually is equivalent to mentioning
+all these four options, see also there. The corresponding negative
+options like @option{-mno-recip-div} can be used to selectively
+disable some variants.
+
@item -mveclibabi=@var{type}
@opindex mveclibabi
Specifies the ABI type to use for vectorizing intrinsics using an