I should add that I have tested performance of this on Core2, Corei7
(Nehalem) and AMD Opteron-based systems. It appears to be
performance-neutral on AMD (only minor perturbations, overall a wash).
For the test case that provoked the optimization, there were nice
improvements on Core2 and Corei7.

Thanks,
Teresa

On Fri, Mar 30, 2012 at 7:18 AM, Teresa Johnson <tejohn...@google.com> wrote:
> This patch addresses instructions that incur expensive length-changing prefix 
> (LCP) stalls
> on some x86-64 implementations, notably Core2 and Corei7. Specifically, a 
> move of
> a 16-bit constant into memory requires a length-changing prefix and can incur 
> significant
> penalties. The attached patch avoids this by forcing such instructions to be 
> split into
> two: a move of the corresponding 32-bit constant into a register, and a move 
> of the
> register's lower 16 bits into memory.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu. Is this ok for trunk?
>
> Thanks,
> Teresa
>
> 2012-03-29   Teresa Johnson  <tejohn...@google.com>
>
>        * config/i386/i386.h (ix86_tune_indices): Add
>        X86_TUNE_LCP_STALL.
>        * config/i386/i386.md (movhi_internal): Split to
>        movhi_internal and movhi_imm_internal.
>        * config/i386/i386.c (initial_ix86_tune_features): Initialize
>        X86_TUNE_LCP_STALL entry.
>
> Index: config/i386/i386.h
> ===================================================================
> --- config/i386/i386.h  (revision 185920)
> +++ config/i386/i386.h  (working copy)
> @@ -262,6 +262,7 @@ enum ix86_tune_indices {
>   X86_TUNE_MOVX,
>   X86_TUNE_PARTIAL_REG_STALL,
>   X86_TUNE_PARTIAL_FLAG_REG_STALL,
> +  X86_TUNE_LCP_STALL,
>   X86_TUNE_USE_HIMODE_FIOP,
>   X86_TUNE_USE_SIMODE_FIOP,
>   X86_TUNE_USE_MOV0,
> @@ -340,6 +341,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_L
>  #define TARGET_PARTIAL_REG_STALL 
> ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
>  #define TARGET_PARTIAL_FLAG_REG_STALL \
>        ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
> +#define TARGET_LCP_STALL \
> +       ix86_tune_features[X86_TUNE_LCP_STALL]
>  #define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
>  #define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
>  #define TARGET_USE_MOV0                ix86_tune_features[X86_TUNE_USE_MOV0]
> Index: config/i386/i386.md
> ===================================================================
> --- config/i386/i386.md (revision 185920)
> +++ config/i386/i386.md (working copy)
> @@ -2262,9 +2262,19 @@
>           ]
>           (const_string "SI")))])
>
> +(define_insn "*movhi_imm_internal"
> +  [(set (match_operand:HI 0 "memory_operand" "=m")
> +        (match_operand:HI 1 "immediate_operand" "n"))]
> +  "!TARGET_LCP_STALL && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> +{
> +  return "mov{w}\t{%1, %0|%0, %1}";
> +}
> +  [(set (attr "type") (const_string "imov"))
> +   (set (attr "mode") (const_string "HI"))])
> +
>  (define_insn "*movhi_internal"
>   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
> -       (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
> +       (match_operand:HI 1 "general_operand" "r,rn,rm,r"))]
>   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
>  {
>   switch (get_attr_type (insn))
> Index: config/i386/i386.c
> ===================================================================
> --- config/i386/i386.c  (revision 185920)
> +++ config/i386/i386.c  (working copy)
> @@ -1964,6 +1964,11 @@ static unsigned int initial_ix86_tune_features[X86
>   /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
>   m_CORE2I7 | m_GENERIC,
>
> +  /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
> +   * on 16-bit immediate moves into memory on Core2 and Corei7,
> +   * which may also affect AMD implementations.  */
> +  m_CORE2I7 | m_GENERIC | m_AMD_MULTIPLE,
> +
>   /* X86_TUNE_USE_HIMODE_FIOP */
>   m_386 | m_486 | m_K6_GEODE,
>
>
> --
> This patch is available for review at http://codereview.appspot.com/5975045



-- 
Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413

Reply via email to