Hi,
  This patch passes down the equality only flags from
emit_block_cmp_hints to cmpmem optab so that the target specific expand
can generate optimized insns for equality only compare. Targets
(e.g. rs6000) can generate more efficient insn sequence if the block
compare is equality only.

  Bootstrapped and tested on x86 and powerpc64-linux BE and LE with
no regressions. Is this OK for trunk?

Thanks
Gui Haochen

ChangeLog
Expand: Pass down equality only flag to cmpmem expand

Targets (e.g. rs6000) can generate more efficient insn sequence if the
block compare is equality only.  This patch passes down the equality
only flags from emit_block_cmp_hints to cmpmem optab so that the target
specific expand can generate optimized insns for equality only compare.

gcc/
        * expr.cc (expand_cmpstrn_or_cmpmem): Rename to...
        (expand_cmpstrn): ...this.
        (expand_cmpmem): New function.  Pass down equality only flag to
        cmpmem expand.
        (emit_block_cmp_via_cmpmem): Add an argument for equality only
        flag and call expand_cmpmem instead of expand_cmpstrn_or_cmpmem.
        (emit_block_cmp_hints): Call emit_block_cmp_via_cmpmem with
        equality only flag.
        * expr.h (expand_cmpstrn, expand_cmpmem): Declare.
        * builtins.cc (expand_builtin_strcmp, expand_builtin_strncmp):
        Call expand_cmpstrn instead of expand_cmpstrn_or_cmpmem.
        * config/i386/i386.md (cmpmemsi): Add the sixth operand for
        equality only flag.
        * config/rs6000/rs6000.md (cmpmemsi): Likewise.
        * config/s390/s390.md (cmpmemsi): Likewise.
        * doc/md.texi (cmpmem): Modify the document and add an operand
        for equality only flag.

patch.diff
diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 5ece0d23eb9..c2dbc25433d 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -4819,7 +4819,7 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx 
target)
       if (len && !TREE_SIDE_EFFECTS (len))
        {
          arg3_rtx = expand_normal (len);
-         result = expand_cmpstrn_or_cmpmem
+         result = expand_cmpstrn
            (cmpstrn_icode, target, arg1_rtx, arg2_rtx, TREE_TYPE (len),
             arg3_rtx, MIN (arg1_align, arg2_align));
        }
@@ -4929,9 +4929,9 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx 
target,
   rtx arg1_rtx = get_memory_rtx (arg1, len);
   rtx arg2_rtx = get_memory_rtx (arg2, len);
   rtx arg3_rtx = expand_normal (len);
-  result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx,
-                                    arg2_rtx, TREE_TYPE (len), arg3_rtx,
-                                    MIN (arg1_align, arg2_align));
+  result = expand_cmpstrn (cmpstrn_icode, target, arg1_rtx, arg2_rtx,
+                          TREE_TYPE (len), arg3_rtx,
+                          MIN (arg1_align, arg2_align));

   tree fndecl = get_callee_fndecl (exp);
   if (result)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1b5a794b9e5..775cba5d93d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -23195,7 +23195,8 @@ (define_expand "cmpmemsi"
         (compare:SI (match_operand:BLK 1 "memory_operand" "")
                     (match_operand:BLK 2 "memory_operand" "") ) )
    (use (match_operand 3 "general_operand"))
-   (use (match_operand 4 "immediate_operand"))]
+   (use (match_operand 4 "immediate_operand"))
+   (use (match_operand 5 ""))]
   ""
 {
   if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2a1b5ecfaee..e66330f876e 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -10097,7 +10097,8 @@ (define_expand "cmpmemsi"
                (compare:SI (match_operand:BLK 1)
                            (match_operand:BLK 2)))
              (use (match_operand:SI 3))
-             (use (match_operand:SI 4))])]
+             (use (match_operand:SI 4))
+             (use (match_operand:SI 5))])]
   "TARGET_POPCNTD"
 {
   if (expand_block_compare (operands))
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 4bdb679daf2..506e79fb035 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -3790,7 +3790,8 @@ (define_expand "cmpmemsi"
         (compare:SI (match_operand:BLK 1 "memory_operand" "")
                     (match_operand:BLK 2 "memory_operand" "") ) )
    (use (match_operand:SI 3 "general_operand" ""))
-   (use (match_operand:SI 4 "" ""))]
+   (use (match_operand:SI 4 "" ""))
+   (use (match_operand:SI 5 "" ""))]
   ""
 {
   if (s390_expand_cmpmem (operands[0], operands[1],
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index e01cdcbe22c..06955cd7e78 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6992,14 +6992,19 @@ result of the comparison.

 @cindex @code{cmpmem@var{m}} instruction pattern
 @item @samp{cmpmem@var{m}}
-Block compare instruction, with five operands like the operands
-of @samp{cmpstr@var{m}}.  The two memory blocks specified are compared
-byte by byte in lexicographic order starting at the beginning of each
-block.  Unlike @samp{cmpstr@var{m}} the instruction can prefetch
-any bytes in the two memory blocks.  Also unlike @samp{cmpstr@var{m}}
-the comparison will not stop if both bytes are zero.  The effect of
-the instruction is to store a value in operand 0 whose sign indicates
-the result of the comparison.
+Block compare instruction, with six operands.  The first five operands are
+like the operands of @samp{cmpstr@var{m}}.  The last operand indicates
+whether the comparison is equality or not.  Value one means it's an
+equality only compare and zero means it's a non-equality compare.
+
+The two memory blocks specified are compared byte by byte in lexicographic
+order starting at the beginning of each block.  Unlike @samp{cmpstr@var{m}}
+the instruction can prefetch any bytes in the two memory blocks.  Also
+unlike @samp{cmpstr@var{m}} the comparison will not stop if both bytes are
+zero.  When last operand is zero, the effect of the instruction is to store
+a value in operand 0 whose sign indicates the result of the comparison.
+When last operand is one, zero in operand 0 indicates two blocks are equal.
+All other values in operand 0 indicate two blocks are not equal.

 @cindex @code{strlen@var{m}} instruction pattern
 @item @samp{strlen@var{m}}
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 6dd9b8f2ce6..3cdc5181bd3 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -2381,14 +2381,13 @@ emit_block_op_via_libcall (enum built_in_function 
fncode, rtx dst, rtx src,
   return expand_call (call_expr, NULL_RTX, false);
 }

-/* Try to expand cmpstrn or cmpmem operation ICODE with the given operands.
+/* Try to expand cmpstrn operation ICODE with the given operands.
    ARG3_TYPE is the type of ARG3_RTX.  Return the result rtx on success,
    otherwise return null.  */

 rtx
-expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
-                         rtx arg2_rtx, tree arg3_type, rtx arg3_rtx,
-                         HOST_WIDE_INT align)
+expand_cmpstrn (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
+               tree arg3_type, rtx arg3_rtx, HOST_WIDE_INT align)
 {
   machine_mode insn_mode = insn_data[icode].operand[0].mode;

@@ -2407,6 +2406,34 @@ expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, 
rtx arg1_rtx,
   return NULL_RTX;
 }

+/* Similar as expand_cmpstrn, the last operand indicates whether it is a
+   equality comparison or not.  */
+rtx
+expand_cmpmem (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
+              tree arg3_type, rtx arg3_rtx, HOST_WIDE_INT align,
+              bool equality_only)
+{
+  machine_mode insn_mode = insn_data[icode].operand[0].mode;
+
+  if (target && (!REG_P (target) || HARD_REGISTER_P (target)))
+    target = NULL_RTX;
+
+  class expand_operand ops[6];
+  create_output_operand (&ops[0], target, insn_mode);
+  create_fixed_operand (&ops[1], arg1_rtx);
+  create_fixed_operand (&ops[2], arg2_rtx);
+  create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type),
+                              TYPE_UNSIGNED (arg3_type));
+  create_integer_operand (&ops[4], align);
+  if (equality_only)
+    create_integer_operand (&ops[5], 1);
+  else
+    create_integer_operand (&ops[5], 0);
+  if (maybe_expand_insn (icode, 6, ops))
+    return ops[0].value;
+  return NULL_RTX;
+}
+
 /* Expand a block compare between X and Y with length LEN using the
    cmpmem optab, placing the result in TARGET.  LEN_TYPE is the type
    of the expression that was used to calculate the length.  ALIGN
@@ -2414,7 +2441,7 @@ expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, 
rtx arg1_rtx,

 static rtx
 emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target,
-                          unsigned align)
+                          unsigned align, bool equality_only)
 {
   /* Note: The cmpstrnsi pattern, if it exists, is not suitable for
      implementing memcmp because it will stop if it encounters two
@@ -2424,7 +2451,8 @@ emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree 
len_type, rtx target,
   if (icode == CODE_FOR_nothing)
     return NULL_RTX;

-  return expand_cmpstrn_or_cmpmem (icode, target, x, y, len_type, len, align);
+  return expand_cmpmem (icode, target, x, y, len_type, len, align,
+                       equality_only);
 }

 /* Emit code to compare a block Y to a block X.  This may be done with
@@ -2469,7 +2497,8 @@ emit_block_cmp_hints (rtx x, rtx y, rtx len, tree 
len_type, rtx target,
     result = compare_by_pieces (x, y, INTVAL (len), target, align,
                                y_cfn, y_cfndata);
   else
-    result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align);
+    result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align,
+                                       equality_only);

   return result;
 }
diff --git a/gcc/expr.h b/gcc/expr.h
index 2a172867fdb..64dbbcfcaad 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -199,8 +199,9 @@ extern void use_regs (rtx *, int, int);
 extern void use_group_regs (rtx *, rtx);

 #ifdef GCC_INSN_CODES_H
-extern rtx expand_cmpstrn_or_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx,
-                                    HOST_WIDE_INT);
+extern rtx expand_cmpstrn (insn_code, rtx, rtx, rtx, tree, rtx, HOST_WIDE_INT);
+extern rtx expand_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx, HOST_WIDE_INT,
+                         bool);
 #endif

 /* Write zeros through the storage of OBJECT.

Reply via email to