On Fri, Aug 15, 2014 at 2:26 PM, Yuri Rumyantsev <ysrum...@gmail.com> wrote:

> Is it important to have correct value for length attribute for Big Cores?
> As I new this attribute is used for code layout alignment.
>
> 2014-08-15 15:54 GMT+04:00 Jakub Jelinek <ja...@redhat.com>:
>> On Fri, Aug 15, 2014 at 03:45:33PM +0400, Yuri Rumyantsev wrote:
>>> gcc/ChangeLog
>>> 2014-08-15  Yuri Rumyantsev  <ysrum...@gmail.com>
>>>
>>> PR target/62011
>>> * config/i386/i386-protos.h (ix86_avoid_false_dep_for_bm): New function
>>>  prototype.
>>> * config/i386/i386.c (ix86_avoid_false_dep_for_bm): New function.
>>> * config/i386/i386.h (TARGET_AVOID_FALSE_DEP_FOR_BM) New macros.
>>> * config/i386/i386.md (ctz<mode>2, clz<mode>2_lzcnt, popcount<mode>2,
>>>  *popcount<mode>2_cmp, *popcountsi2_cmp_zext): Output zeroing
>>>  destination register for unary bit-manipulation instructions
>>>  if required.
>>> * config/i386/x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_BM): New.

I am testing a different approach, outlined in the attached patch. In
the patch, insn is split after reload to separate insns.

As far as popcnt is concerned, we don't need _cmp pattern, the generic
code is clever enough to substuitute "if (popcnt (a))" with "if (a)".

Uros.
Index: i386.h
===================================================================
--- i386.h      (revision 214000)
+++ i386.h      (working copy)
@@ -473,6 +473,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_L
        ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
 #define TARGET_ADJUST_UNROLL \
     ix86_tune_features[X86_TUNE_ADJUST_UNROLL]
+#define TARGET_AVOID_FALSE_DEP_FOR_BMI \
+       ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
Index: i386.md
===================================================================
--- i386.md     (revision 214000)
+++ i386.md     (working copy)
@@ -112,6 +112,7 @@
   UNSPEC_XBEGIN_ABORT
   UNSPEC_STOS
   UNSPEC_PEEPSIB
+  UNSPEC_INSN_FALSE_DEP
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -12569,11 +12570,37 @@
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "HI")])
 
-(define_insn "popcount<mode>2"
-  [(set (match_operand:SWI248 0 "register_operand" "=r")
-       (popcount:SWI248
-         (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+(define_expand "popcount<mode>2"
+  [(parallel
+    [(set (match_operand:SWI248 0 "register_operand")
+         (popcount:SWI248
+           (match_operand:SWI248 1 "nonimmediate_operand")))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_POPCNT")
+
+(define_insn_and_split "*popcount<mode>2_falsedep_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=&r")
+       (popcount:SWI48
+         (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT
+   && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_insn_for_speed_p ()"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 0)
+         (popcount:SWI48 (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
+     (clobber (reg:CC FLAGS_REG))])]
+  "ix86_expand_clear (operands[0]);")
+
+(define_insn "*popcount<mode>2_falsedep"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+       (popcount:SWI48
+         (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+          UNSPEC_INSN_FALSE_DEP)
+   (clobber (reg:CC FLAGS_REG))]
   "TARGET_POPCNT"
 {
 #if TARGET_MACHO
@@ -12586,15 +12613,12 @@
    (set_attr "type" "bitmanip")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*popcount<mode>2_cmp"
-  [(set (reg FLAGS_REG)
-       (compare
-         (popcount:SWI248
-           (match_operand:SWI248 1 "nonimmediate_operand" "rm"))
-         (const_int 0)))
-   (set (match_operand:SWI248 0 "register_operand" "=r")
-       (popcount:SWI248 (match_dup 1)))]
-  "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+(define_insn "*popcount<mode>2"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+       (popcount:SWI248
+         (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT"
 {
 #if TARGET_MACHO
   return "popcnt\t{%1, %0|%0, %1}";
@@ -12606,25 +12630,6 @@
    (set_attr "type" "bitmanip")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*popcountsi2_cmp_zext"
-  [(set (reg FLAGS_REG)
-        (compare
-          (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
-          (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-        (zero_extend:DI(popcount:SI (match_dup 1))))]
-  "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-{
-#if TARGET_MACHO
-  return "popcnt\t{%1, %0|%0, %1}";
-#else
-  return "popcnt{l}\t{%1, %0|%0, %1}";
-#endif
-}
-  [(set_attr "prefix_rep" "1")
-   (set_attr "type" "bitmanip")
-   (set_attr "mode" "SI")])
-
 (define_expand "bswapdi2"
   [(set (match_operand:DI 0 "register_operand")
        (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
Index: x86-tune.def
===================================================================
--- x86-tune.def        (revision 214000)
+++ x86-tune.def        (working copy)
@@ -509,6 +509,11 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode
 DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
           m_K8)
 
+/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
+   for bit-manipulation instructions.  */
+DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
+         m_SANDYBRIDGE | m_HASWELL | m_INTEL | m_GENERIC)
+
 /*****************************************************************************/
 /* This never worked well before.                                            */
 /*****************************************************************************/

Reply via email to