On Fri, Aug 15, 2014 at 2:26 PM, Yuri Rumyantsev <ysrum...@gmail.com> wrote:
> Is it important to have correct value for length attribute for Big Cores? > As I new this attribute is used for code layout alignment. > > 2014-08-15 15:54 GMT+04:00 Jakub Jelinek <ja...@redhat.com>: >> On Fri, Aug 15, 2014 at 03:45:33PM +0400, Yuri Rumyantsev wrote: >>> gcc/ChangeLog >>> 2014-08-15 Yuri Rumyantsev <ysrum...@gmail.com> >>> >>> PR target/62011 >>> * config/i386/i386-protos.h (ix86_avoid_false_dep_for_bm): New function >>> prototype. >>> * config/i386/i386.c (ix86_avoid_false_dep_for_bm): New function. >>> * config/i386/i386.h (TARGET_AVOID_FALSE_DEP_FOR_BM) New macros. >>> * config/i386/i386.md (ctz<mode>2, clz<mode>2_lzcnt, popcount<mode>2, >>> *popcount<mode>2_cmp, *popcountsi2_cmp_zext): Output zeroing >>> destination register for unary bit-manipulation instructions >>> if required. >>> * config/i386/x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_BM): New. I am testing a different approach, outlined in the attached patch. In the patch, insn is split after reload to separate insns. As far as popcnt is concerned, we don't need _cmp pattern, the generic code is clever enough to substuitute "if (popcnt (a))" with "if (a)". Uros.
Index: i386.h =================================================================== --- i386.h (revision 214000) +++ i386.h (working copy) @@ -473,6 +473,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_L ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS] #define TARGET_ADJUST_UNROLL \ ix86_tune_features[X86_TUNE_ADJUST_UNROLL] +#define TARGET_AVOID_FALSE_DEP_FOR_BMI \ + ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { Index: i386.md =================================================================== --- i386.md (revision 214000) +++ i386.md (working copy) @@ -112,6 +112,7 @@ UNSPEC_XBEGIN_ABORT UNSPEC_STOS UNSPEC_PEEPSIB + UNSPEC_INSN_FALSE_DEP ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -12569,11 +12570,37 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "HI")]) -(define_insn "popcount<mode>2" - [(set (match_operand:SWI248 0 "register_operand" "=r") - (popcount:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) +(define_expand "popcount<mode>2" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_POPCNT") + +(define_insn_and_split "*popcount<mode>2_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=&r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_insn_for_speed_p ()" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (popcount:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] + "ix86_expand_clear (operands[0]);") + +(define_insn "*popcount<mode>2_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT" { #if TARGET_MACHO @@ -12586,15 +12613,12 @@ (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) -(define_insn "*popcount<mode>2_cmp" - [(set (reg FLAGS_REG) - (compare - (popcount:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "rm")) - (const_int 0))) - (set (match_operand:SWI248 0 "register_operand" "=r") - (popcount:SWI248 (match_dup 1)))] - "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +(define_insn "*popcount<mode>2" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" { #if TARGET_MACHO return "popcnt\t{%1, %0|%0, %1}"; @@ -12606,25 +12630,6 @@ (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) -(define_insn "*popcountsi2_cmp_zext" - [(set (reg FLAGS_REG) - (compare - (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) - (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI(popcount:SI (match_dup 1))))] - "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" -{ -#if TARGET_MACHO - return "popcnt\t{%1, %0|%0, %1}"; -#else - return "popcnt{l}\t{%1, %0|%0, %1}"; -#endif -} - [(set_attr "prefix_rep" "1") - (set_attr "type" "bitmanip") - (set_attr "mode" "SI")]) - (define_expand "bswapdi2" [(set (match_operand:DI 0 "register_operand") (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))] Index: x86-tune.def =================================================================== --- x86-tune.def (revision 214000) +++ x86-tune.def (working copy) @@ -509,6 +509,11 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode", m_K8) +/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency + for bit-manipulation instructions. */ +DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi", + m_SANDYBRIDGE | m_HASWELL | m_INTEL | m_GENERIC) + /*****************************************************************************/ /* This never worked well before. */ /*****************************************************************************/