The patch moves splitting to after epilogue_completed, so eventual regrename pass won't break register matching requirements for TARGET_AVOID_FALSE_DEP_FOR_BMI fix.
Also, remove unneeded expanders and merge a couple of patterns. 2016-12-17 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (*tzcnt<mode>_1): Merge *tzcnt<mode>_1_falsedep_1 and *tzcnt<mode>_1 to define_insn_and_split pattern. Adjust split condition to split after epilogue_completed. (ctz<mode>2): Remove expander. (ctz<mode>2): Merge *ctz<mode>2_falsedep_1 and *ctz<mode>2 to define_insn_and_split pattern. Adjust split condition to split after epilogue_completed. (clz<mode>2_lznct): Remove expander. (clz<mode>2_lzcnt): Merge *clz<mode>2_lzcnt_falsedep_1 and *clz<mode>2 to define_insn_and_split pattern. Adjust split condition to split after epilogue_completed. (<lt_zcnt>_<mode>): Remove expander. (<lt_zcnt>_<mode>): Merge *<lt_zcnt>_<mode>_falsedep_1 and *<lt_zcnt>_<mode> to define_insn_and_split pattern. Adjust split condition to split after epilogue_completed. (<lt_zcnt>_hi): New insn pattern. (popcount<mode>2): Remove expander. (popcount<mode>2): Merge *popcount<mode>2_falsedep_1 and *popcount<mode>2 to define_insn_and_split pattern. Adjust split condition to split after epilogue_completed. (popcounthi2): New insn pattern. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 243746) +++ config/i386/i386.md (working copy) @@ -12569,19 +12566,17 @@ ix86_expand_clear (operands[2]); }) -; False dependency happens when destination is only updated by tzcnt, -; lzcnt or popcnt. There is no false dependency when destination is -; also used in source. -(define_insn_and_split "*tzcnt<mode>_1_falsedep_1" +(define_insn_and_split "*tzcnt<mode>_1" [(set (reg:CCC FLAGS_REG) (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_dup 1)))] - "TARGET_BMI - && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" - "#" - "&& reload_completed" + "TARGET_BMI" + "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (match_dup 1) (const_int 0))) @@ -12588,11 +12583,16 @@ (set (match_dup 0) (ctz:SWI48 (match_dup 1))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] -{ - if (!reg_mentioned_p (operands[0], operands[1])) - ix86_expand_clear (operands[0]); -}) + "ix86_expand_clear (operands[0]);" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "btver2_decode" "double") + (set_attr "mode" "<MODE>")]) +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. (define_insn "*tzcnt<mode>_1_falsedep" [(set (reg:CCC FLAGS_REG) (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm") @@ -12609,20 +12609,6 @@ (set_attr "btver2_decode" "double") (set_attr "mode" "<MODE>")]) -(define_insn "*tzcnt<mode>_1" - [(set (reg:CCC FLAGS_REG) - (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm") - (const_int 0))) - (set (match_operand:SWI48 0 "register_operand" "=r") - (ctz:SWI48 (match_dup 1)))] - "TARGET_BMI" - "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" - [(set_attr "type" "alu1") - (set_attr "prefix_0f" "1") - (set_attr "prefix_rep" "1") - (set_attr "btver2_decode" "double") - (set_attr "mode" "<MODE>")]) - (define_insn "*bsf<mode>_1" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm") @@ -12637,13 +12623,6 @@ (set_attr "znver1_decode" "vector") (set_attr "mode" "<MODE>")]) -(define_expand "ctz<mode>2" - [(parallel - [(set (match_operand:SWI48 0 "register_operand") - (ctz:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand"))) - (clobber (reg:CC FLAGS_REG))])]) - (define_insn_and_split "*ctzhi2" [(set (match_operand:SI 0 "register_operand") (ctz:SI @@ -12662,28 +12641,47 @@ DONE; }) -; False dependency happens when destination is only updated by tzcnt, -; lzcnt or popcnt. There is no false dependency when destination is -; also used in source. -(define_insn_and_split "*ctz<mode>2_falsedep_1" +(define_insn_and_split "ctz<mode>2" [(set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] + "" +{ + if (TARGET_BMI) + return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; + else if (optimize_function_for_size_p (cfun)) + ; + else if (TARGET_GENERIC) + /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */ + return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; + + return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; +} "(TARGET_BMI || TARGET_GENERIC) - && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" - "#" - "&& reload_completed" + && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (ctz:SWI48 (match_dup 1))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] -{ - if (!reg_mentioned_p (operands[0], operands[1])) - ix86_expand_clear (operands[0]); -}) + "ix86_expand_clear (operands[0]);" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set (attr "prefix_rep") + (if_then_else + (ior (match_test "TARGET_BMI") + (and (not (match_test "optimize_function_for_size_p (cfun)")) + (match_test "TARGET_GENERIC"))) + (const_string "1") + (const_string "0"))) + (set_attr "mode" "<MODE>")]) +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. (define_insn "*ctz<mode>2_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 @@ -12706,33 +12704,6 @@ (set_attr "prefix_rep" "1") (set_attr "mode" "<MODE>")]) -(define_insn "*ctz<mode>2" - [(set (match_operand:SWI48 0 "register_operand" "=r") - (ctz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) - (clobber (reg:CC FLAGS_REG))] - "" -{ - if (TARGET_BMI) - return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; - else if (optimize_function_for_size_p (cfun)) - ; - else if (TARGET_GENERIC) - /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */ - return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; - - return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; -} - [(set_attr "type" "alu1") - (set_attr "prefix_0f" "1") - (set (attr "prefix_rep") - (if_then_else - (ior (match_test "TARGET_BMI") - (and (not (match_test "optimize_function_for_size_p (cfun)")) - (match_test "TARGET_GENERIC"))) - (const_string "1") - (const_string "0"))) - (set_attr "mode" "<MODE>")]) - (define_insn "bsr_rex64" [(set (match_operand:DI 0 "register_operand" "=r") (minus:DI (const_int 63) @@ -12807,28 +12778,29 @@ DONE; }) -; False dependency happens when destination is only updated by tzcnt, -; lzcnt or popcnt. There is no false dependency when destination is -; also used in source. -(define_insn_and_split "*clz<mode>2_lzcnt_falsedep_1" +(define_insn_and_split "clz<mode>2_lzcnt" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_LZCNT - && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" - "#" - "&& reload_completed" + "TARGET_LZCNT" + "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (clz:SWI48 (match_dup 1))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] -{ - if (!reg_mentioned_p (operands[0], operands[1])) - ix86_expand_clear (operands[0]); -}) + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. (define_insn "*clz<mode>2_lzcnt_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 @@ -12842,17 +12814,6 @@ (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) -(define_insn "clz<mode>2_lzcnt" - [(set (match_operand:SWI48 0 "register_operand" "=r") - (clz:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_LZCNT" - "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" - [(set_attr "prefix_rep" "1") - (set_attr "type" "bitmanip") - (set_attr "mode" "<MODE>")]) - (define_int_iterator LT_ZCNT [(UNSPEC_TZCNT "TARGET_BMI") (UNSPEC_LZCNT "TARGET_LZCNT")]) @@ -12868,34 +12829,30 @@ ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version ;; provides operand size as output when source operand is zero. -(define_expand "<lt_zcnt>_<mode>" - [(parallel - [(set (match_operand:SWI248 0 "register_operand") - (unspec:SWI248 - [(match_operand:SWI248 1 "nonimmediate_operand")] LT_ZCNT)) - (clobber (reg:CC FLAGS_REG))])]) - -; False dependency happens when destination is only updated by tzcnt, -; lzcnt or popcnt. There is no false dependency when destination is -; also used in source. -(define_insn_and_split "*<lt_zcnt>_<mode>_falsedep_1" +(define_insn_and_split "<lt_zcnt>_<mode>" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT)) (clobber (reg:CC FLAGS_REG))] - "TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" - "#" - "&& reload_completed" + "" + "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (unspec:SWI48 [(match_dup 1)] LT_ZCNT)) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] -{ - if (!reg_mentioned_p (operands[0], operands[1])) - ix86_expand_clear (operands[0]); -}) + "ix86_expand_clear (operands[0]);" + [(set_attr "type" "<lt_zcnt_type>") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. (define_insn "*<lt_zcnt>_<mode>_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 @@ -12910,17 +12867,17 @@ (set_attr "prefix_rep" "1") (set_attr "mode" "<MODE>")]) -(define_insn "*<lt_zcnt>_<mode>" - [(set (match_operand:SWI248 0 "register_operand" "=r") - (unspec:SWI248 - [(match_operand:SWI248 1 "nonimmediate_operand" "rm")] LT_ZCNT)) +(define_insn "<lt_zcnt>_hi" + [(set (match_operand:HI 0 "register_operand" "=r") + (unspec:HI + [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT)) (clobber (reg:CC FLAGS_REG))] "" - "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}" + "<lt_zcnt>{w}\t{%1, %0|%0, %1}" [(set_attr "type" "<lt_zcnt_type>") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") - (set_attr "mode" "<MODE>")]) + (set_attr "mode" "HI")]) ;; BMI instructions. @@ -13216,33 +13173,35 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) -(define_expand "popcount<mode>2" - [(parallel - [(set (match_operand:SWI248 0 "register_operand") - (popcount:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand"))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_POPCNT") - -(define_insn_and_split "*popcount<mode>2_falsedep_1" +(define_insn_and_split "popcount<mode>2" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_POPCNT - && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" - "#" - "&& reload_completed" + "TARGET_POPCNT" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; +#endif +} + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (popcount:SWI48 (match_dup 1))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] -{ - if (!reg_mentioned_p (operands[0], operands[1])) - ix86_expand_clear (operands[0]); -}) + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. (define_insn "*popcount<mode>2_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 @@ -13262,10 +13221,10 @@ (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) -(define_insn "*popcount<mode>2" - [(set (match_operand:SWI248 0 "register_operand" "=r") - (popcount:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) +(define_insn "popcounthi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (popcount:HI + (match_operand:HI 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT" { @@ -13272,12 +13231,12 @@ #if TARGET_MACHO return "popcnt\t{%1, %0|%0, %1}"; #else - return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; + return "popcnt{w}\t{%1, %0|%0, %1}"; #endif } [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") - (set_attr "mode" "<MODE>")]) + (set_attr "mode" "HI")]) (define_expand "bswapdi2" [(set (match_operand:DI 0 "register_operand")