> Here is a patch wich introduces new pass 'ree' based on pass
> 'implicit_zee' as was discussed above.

Thanks.

> 2011-11-22  Enkovich Ilya  <ilya.enkov...@intel.com>
>
>       PR target/50038
>       * implicit-zee.c: Delete.
>       * ree.c: New file.
>       * Makefile.in: Replace implicit-zee.c with ree.c.
>       * i386.c (ix86_option_override_internal): Set flag_ree for
>       32 bit platform.

* config/i386/i386.c (ix86_option_override_internal): ...

>       * common.opt (fzee): Ignored.
>       (free): New.
>       * passes.c (init_optimization_passes): Replace pass_implicit_zee
>       with pass_ree.
>       * tree-pass.h (pass_implicit_zee): Delete.
>       (pass_ree): New.
>       * timevar.def (TV_ZEE): Delete.
>       (TV_REE): New.

It would be nice to add something to doc/invoke.texi about -free.


The patch is mostly OK, but a few changes are required:

+/* Problem Description :
+   --------------------
+   This pass is intended to remove redundant extension instructions.
+   Such instructions appeare for different reasons.  We expect some of

appear without terminal 'e'.

+   them due to implicit zero-extend in 64-bit registers after writing to

zero-extension

+   their lower 32-bit half (as in x86_64 arch).

(e.g. for the x86-64 architecture).

+ Another possible reason is a type cast which follows load (for instance
+ register restore) which can be combined into single instruction in the
+ most cases.

Another possible reason is a type cast which follows a load (for instance
a register restore) and which can be combined into a single instruction, and
for which earlier local passes, e.g. the combiner, weren't able to optimize.

+   extension instruction that could possibly be redundant. Such extension

double space after the period

+   For example, in x86_64, implicit zero-extensions are captured with

For example, for the x86-64 architecture, implicit...

+   Architectures like x86_64 support conditional moves whose semantics for

x86-64

+   Basic ZEE pass reported reduction of the dynamic instruction count of a

Let's use the wording "The original redundant zero-extension elimination pass"

+   The most performance gain from REE pass in addition to ZEE pass is expected

The additional performance gain with the enhanced pass is mostly expected...


+/* This structure is used to hold data about candidate for
+   elimination.  */
+
+typedef struct ext_cand
+{
+  rtx insn;
+  const_rtx ext_expr;

No need to repeat the ext_ prefix, "const_rtx expr;" is fine.

+  enum machine_mode src_mode;
+} *ext_cand_t;
+
+static alloc_pool ext_cand_pool;

+/* Carry information about extensions while walking the RTL.  */
+
+DEF_VEC_P(ext_cand_t);
+DEF_VEC_ALLOC_P(ext_cand_t, heap);

The combination of a pool with a heap-allocated vector of pointers looks a 
little convoluted.  Can't you use a vector of objects (DEF_VEC_O) directly?


+/* Returns the merge code status for INSN.  */
+
+static enum insn_merge_code
+get_insn_status (rtx insn)

I know this was in the original file, but in the head comment of functions, 
this should be

/* Return the merge code status for INSN.  */

+/* Sets the merge code status of INSN to CODE.  */

/* Set the merge code status of INSN to CODE.  */

and so on.


+/* Given a insn (CURR_INSN) and a pointer to the SET rtx (ORIG_SET)
+   that needs to be modified, this code modifies the SET rtx to a
+   new SET rtx that extends the right hand expression into a
+   register (NEWREG) on the left hand side.  Note that multiple
+   assumptions are made about the nature of the set that needs
+   to be true for this to work and is called from merge_def_and_ext.
+
+   Original :
+   (set (reg a) (expression))
+
+   Transform :
+   (set (reg a) (extend (expression)))
+
+   Special Cases :
+   If the expression is a constant or another extend directly
+   assign it to the register.  */
+
+static bool
+combine_set_extend (ext_cand_t cand, rtx curr_insn, rtx *orig_set)

The head comment is outdated: NEWREG is gone and CAND has appeared.


+  /* Merge constants by directly moving the constant into the
+     register under some conditions.  */
+
+  if (GET_CODE (orig_src) == CONST_INT
+      && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (dst_mode))
+    {
+      if (INTVAL (orig_src) >= 0)
+       new_set = gen_rtx_SET (VOIDmode, newreg, orig_src);
+      else if (GET_CODE (orig_src) == ZERO_EXTEND)
+       {
+         /* Zero-extending a negative SImode integer into DImode
+            makes it a positive integer.  Convert the given negative
+            integer into the appropriate integer when zero-extended.  */
+
+         delta_width = HOST_BITS_PER_WIDE_INT - GET_MODE_BITSIZE (SImode);
+         mask = (~(unsigned HOST_WIDE_INT) 0) >> delta_width;
+         val = INTVAL (orig_src);
+         val = val & mask;
+         new_const_int = gen_rtx_CONST_INT (VOIDmode, val);
+         new_set = gen_rtx_SET (VOIDmode, newreg, new_const_int);
+       }
+      else if (GET_CODE (orig_src) == SIGN_EXTEND)
+       new_set = gen_rtx_SET (VOIDmode, newreg, orig_src);
+      else
+       return false;
+    }

Watch out the pasto here: ORIG_SRC is a CONST_INT so it cannot be a ZERO_EXTEND 
or a SIGN_EXTEND within the block.

  /* Merge constants by directly moving the constant into the register under
     some conditions.  Recall that RTL constants are sign-extended.  */
  if (GET_CODE (orig_src) == CONST_INT
      && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (dst_mode))
    {
      if (INTVAL (orig_src) >= 0 || GET_CODE (cand_src) == SIGN_EXTEND)
        new_set = gen_rtx_SET (VOIDmode, newreg, orig_src);
      else 
        {
          /* Zero-extend the negative constant by masking out the bits outside
             the source mode.  */
          enum machine_mode src_mode = GET_MODE (SET_DEST (orig_src));
          new_const_int
            = GEN_INT (INTVAL (orig_src) & GET_MODE_MASK (src_mode));
          new_set = gen_rtx_SET (VOIDmode, newreg, new_const_int);
        }
    }

+      /* Here is a sequence of two extensions.  Trying to merge them into a
+        single one.  */

Try to merge them...

+      if (GET_CODE (orig_src) == ZERO_EXTEND)
+       temp_extension = gen_rtx_ZERO_EXTEND (dst_mode, XEXP (orig_src, 0));
+      else
+       temp_extension = gen_rtx_SIGN_EXTEND (dst_mode, XEXP (orig_src, 0));

  temp_extension
    = gen_rtx_fmt_e (GET_CODE (orig_src), dst_mode, XEXP (orig_src, 0));

+      if (GET_CODE (cand_src) == ZERO_EXTEND)
+       temp_extension = gen_rtx_ZERO_EXTEND (dst_mode, orig_src);
+      else
+       temp_extension = gen_rtx_SIGN_EXTEND (dst_mode, orig_src);

  temp_extension
    = gen_rtx_fmt_e (GET_CODE (cand_src), dst_mode, orig_src);


+  if (GET_CODE (SET_DEST (*sub_rtx)) == REG
+      && GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode)
+    {
+      return combine_set_extend (cand, def_insn, sub_rtx);
+    }
+  else
+    return false;
+  return true;

So, in the end, what do you return? ;-)


+/* Add an extension pattern that could be eliminated.  This is called via
+   note_stores from find_removable_extends.  */
+
+static void
+add_removable_extend (rtx x ATTRIBUTE_UNUSED, const_rtx expr, void *data)

Let's call it add_removable_extension


+/* Traverse the instruction stream looking for extends and return the
+   list of candidates.  */

...for extensions...

+static VEC (ext_cand_t,heap)*
+find_removable_extends (void)

find_removable_extensions


+      /* Try to combine the extend with the definition here.  */

...extensions...


Please make the modifications, retest and repost.  Thanks in advance.

-- 
Eric Botcazou

Reply via email to