Hello! With all recent mode handling cleanups to move patterns and SSE bitops, it is now possible to enable TFmode moves via XMM registers for 32bit SSE targets. The compiler emits packed single operations in this case, so following testcase:
--cut here-- __float128 test_abs (__float128 a) { return (__builtin_fabsq (a)); } __float128 test_copysign (__float128 a, __float128 b) { return (__builtin_copysignq (a, b)); } --cut here-- compiles with "-O2 -msse" to: test_abs: movl 4(%esp), %eax movaps 20(%esp), %xmm0 andps .LC0, %xmm0 movaps %xmm0, (%eax) ret $4 test_copysign: movaps 20(%esp), %xmm0 movaps 36(%esp), %xmm1 movl 4(%esp), %eax andps .LC1, %xmm1 andps .LC0, %xmm0 orps %xmm1, %xmm0 movaps %xmm0, (%eax) ret $4 For comparison, with -msse2 compiler generates: test_abs: movl 4(%esp), %eax movdqa 20(%esp), %xmm0 pand .LC0, %xmm0 movdqa %xmm0, (%eax) ret $4 test_copysign: movl 4(%esp), %eax movdqa 20(%esp), %xmm0 movdqa 36(%esp), %xmm1 pand .LC0, %xmm0 pand .LC1, %xmm1 por %xmm1, %xmm0 movdqa %xmm0, (%eax) ret $4 With unpached 4.7 compiler, the same code compiles (-O2 -msse) to some 40 SImode moves, with calls to __fabstf2 and __copysigntf2. 2012-05-13 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (*pushtf): Enable for TARGET_SSE. (pushtf splitter): Ditto. (movtf): Ditto. (*movtf_internal): Ditto. Use V4SFmode for !TARGET_SSE2. (<code>tf2): Enable for TARGET_SSE. (*absnegtf2_sse): Ditto. (copysign<mode>3): Enable TFmode for TARGET_SSE. (copysign<mode>3_const): Ditto. (copysign<mode>3_var): Ditto. * config/i386/sse.md (<code>tf3): Enable for TARGET_SSE. (*andnottf3): Ditto. Use V4SFmode for !TARGET_SSE2. (*<code>tf3): Ditto. * config/i386/i386.c (struct builtin_description bdesc_args) <IX86_BUILTIN_FABSQ>: Enable for TARGET_SSE. <IX86_BUILTIN_COPYSIGNQ>: Ditto. (ix86_expand_builtin) <IX86_BUILTIN_FABSQ, IX86_BUILTIN_COPYSIGNQ>: Emit a normal call if SSE isn't available. Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN. Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 187435) +++ config/i386/i386.c (working copy) @@ -26327,6 +26327,9 @@ static const struct builtin_description bdesc_args { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, + { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 }, + /* SSE MMX or 3Dnow!A */ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, @@ -26510,9 +26513,6 @@ static const struct builtin_description bdesc_args { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, /* SSE2 MMX */ @@ -28081,7 +28081,7 @@ ix86_init_builtins (void) def_builtin_const (0, "__builtin_huge_valq", FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ); - /* We will expand them to normal call if SSE2 isn't available since + /* We will expand them to normal call if SSE isn't available since they are used by libgcc. */ t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128); t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ, @@ -30215,8 +30215,8 @@ rdrand_step: { case IX86_BUILTIN_FABSQ: case IX86_BUILTIN_COPYSIGNQ: - if (!TARGET_SSE2) - /* Emit a normal call if SSE2 isn't available. */ + if (!TARGET_SSE) + /* Emit a normal call if SSE isn't available. */ return expand_call (exp, target, ignore); default: return ix86_expand_args_builtin (d, exp, target); Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 187435) +++ config/i386/i386.md (working copy) @@ -2708,7 +2708,7 @@ (define_insn "*pushtf" [(set (match_operand:TF 0 "push_operand" "=<,<,<") (match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))] - "TARGET_SSE2" + "TARGET_SSE" { /* This insn should be already split before reg-stack. */ gcc_unreachable (); @@ -2721,7 +2721,7 @@ (define_split [(set (match_operand:TF 0 "push_operand") (match_operand:TF 1 "sse_reg_operand"))] - "TARGET_SSE2 && reload_completed" + "TARGET_SSE && reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) (set (mem:TF (reg:P SP_REG)) (match_dup 1))]) @@ -2859,7 +2859,7 @@ (define_expand "movtf" [(set (match_operand:TF 0 "nonimmediate_operand") (match_operand:TF 1 "nonimmediate_operand"))] - "TARGET_SSE2" + "TARGET_SSE" { ix86_expand_move (TFmode, operands); DONE; @@ -2874,7 +2874,7 @@ (define_insn "*movtf_internal" [(set (match_operand:TF 0 "nonimmediate_operand" "=x,x ,m,?*r ,!o") (match_operand:TF 1 "general_operand" "C ,xm,x,*roF,F*r"))] - "TARGET_SSE2 + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2929,7 +2929,8 @@ (const_string "V4SF") (match_test "TARGET_AVX") (const_string "TI") - (match_test "optimize_function_for_size_p (cfun)") + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] (const_string "TI")))]) @@ -8710,7 +8711,7 @@ (define_expand "<code>tf2" [(set (match_operand:TF 0 "register_operand") (absneg:TF (match_operand:TF 1 "register_operand")))] - "TARGET_SSE2" + "TARGET_SSE" "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;") (define_insn "*absnegtf2_sse" @@ -8719,7 +8720,7 @@ [(match_operand:TF 1 "register_operand" "0,x")])) (use (match_operand:TF 2 "nonimmediate_operand" "xm,0")) (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2" + "TARGET_SSE" "#") ;; Splitters for fp abs and neg. @@ -8898,7 +8899,7 @@ (match_operand:CSGNMODE 1 "nonmemory_operand") (match_operand:CSGNMODE 2 "register_operand")] "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || (TARGET_SSE2 && (<MODE>mode == TFmode))" + || (TARGET_SSE && (<MODE>mode == TFmode))" "ix86_expand_copysign (operands); DONE;") (define_insn_and_split "copysign<mode>3_const" @@ -8909,7 +8910,7 @@ (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")] UNSPEC_COPYSIGN))] "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || (TARGET_SSE2 && (<MODE>mode == TFmode))" + || (TARGET_SSE && (<MODE>mode == TFmode))" "#" "&& reload_completed" [(const_int 0)] @@ -8925,7 +8926,7 @@ UNSPEC_COPYSIGN)) (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))] "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || (TARGET_SSE2 && (<MODE>mode == TFmode))" + || (TARGET_SSE && (<MODE>mode == TFmode))" "#") (define_split @@ -8938,7 +8939,7 @@ UNSPEC_COPYSIGN)) (clobber (match_scratch:<CSGNVMODE> 1))] "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || (TARGET_SSE2 && (<MODE>mode == TFmode))) + || (TARGET_SSE && (<MODE>mode == TFmode))) && reload_completed" [(const_int 0)] "ix86_split_copysign_var (operands); DONE;")