Re: [Open64-devel] sub-optimal strength reduction bug?

Shin-Ming Liu Sun, 15 Jul 2012 19:46:02 -0700

It is expression canonicalization issue.

the source is:
  c = b*60;
  d = c+44;


  for (i = 0; i< b; i++)
  {
    x = x+d;
    *a++=x;
  }
Before PRE, due to the copy propagation and expression canonicalization, it
has become:
for (i = 0; i < b; i++) {
    x = (x + (b*60)) + 44;
}

The PRE, only (b*60) got move out of the loop.

To solve the problem, someone needs to understand what the expression
canonicalization could be done to avoid this suboptimal form.

if the loop has the following form, PRE would be able to undo the copy
propagation.
for (i = 0; i < b; i++) {
   x = x + ((b*60)) + 44);
}

- Shin
On Thu, Jul 12, 2012 at 3:28 PM, Yiran Wang <[email protected]> wrote:

> Hi All,
>
> It looks like strength reduction is not optimal for the following example?
>
> 7 instructions per iteration is used, but 4 (or 5 without LFTR) are
> necessary.
>
> Best Regards,
> Yiran Wang
>
> bash-4.0$ cat x.c
> int foo(int x, int b, int *__restrict a)
> {
>   int i;
>   int c,d ;
>   c = b*60;
>   d = c+44;
>
>   for (i = 0; i< b; i++)
>   {
>     x = x+d;
>     *a++=x;
>   }
>   return x;
> }
> bash-4.0$ /opt/open64tr/bin/opencc -c -O3 -keep x.c
> -Wb,-trlow,-tt25:0xffffffff -OPT:unroll_times_max=1 -march=barcelona
> bash-4.0$ cat x.s
> #  /opt/open64tr/lib/gcc-lib/x86_64-open64-linux/5.0/be::5.0
>
> #-----------------------------------------------------------
>  # Compiling x.c (x.I)
> #-----------------------------------------------------------
>
> #-----------------------------------------------------------
> # Options:
>  #-----------------------------------------------------------
> #  Target:Barcelona, ISA:ISA_1, Endian:little, Pointer Size:32
>  #  -O3 (Optimization level)
> #  -g0 (Debug level)
>  #  -m2 (Report advisories)
> #-----------------------------------------------------------
>
> int foo(int x, int b, int *__restrict a)
> {
>   int i;
>   int c,d ;
>   c = b*60;
>   d = c+44;
>
>   for (i = 0; i< b; i++)
>   {
>     x = x+d;
>     *a++=x;
>   }
>   return x;
> }
> bash-4.0$ /opt/open64tr/bin/opencc -c -O3 -keep x.c
> -Wb,-trlow,-tt25:0xffffffff -OPT:unroll_times_max=1 -march=barcelona
> bash-4.0$ cat x.s
> #  /opt/open64tr/lib/gcc-lib/x86_64-open64-linux/5.0/be::5.0
>
> #-----------------------------------------------------------
>  # Compiling x.c (x.I)
> #-----------------------------------------------------------
>
> #-----------------------------------------------------------
> # Options:
>  #-----------------------------------------------------------
> #  Target:Barcelona, ISA:ISA_1, Endian:little, Pointer Size:32
>  #  -O3 (Optimization level)
> #  -g0 (Debug level)
>  #  -m2 (Report advisories)
> #-----------------------------------------------------------
>
> .text
> .align 2
>  .section .text
> .p2align 5,,
>
> # Program Unit: foo
> .globl foo
> .type foo, @function
> foo: # 0x0
> # .frame %esp, 16, %esp
>  # _temp_gra_spill0 = 0
> .loc 1 2 0
>  #   1  int foo(int x, int b, int *__restrict a)
>  #   2  {
> .LBB1_foo:
> pushl %ebp                     # [0]
>  pushl %ebx                     # [3]
> pushl %edi                     # [6]
>  addl $-16,%esp                 # [9]
> movl 36(%esp),%edi             # [10] b
>  leal -1(%edi),%eax             # [13]
> testl %eax,%eax               # [14]
>  jl .Lt_0_2818                 # [15]
> .LBB2_foo:
> movl %edi,%ebp                 # [0]
>  .loc 1 8 0
>  #   4    int c,d ;
>  #   5    c = b*60;
>  #   6    d = c+44;
>  #   7
>  #   8    for (i = 0; i< b; i++)
> movl %edi,%ecx                 # [0]
>  movl 32(%esp),%ebx             # [0] x
> movl %ecx,0(%esp)             # [1] _temp_gra_spill0
>  imull $60,%ebp                 # [1]
> movl 40(%esp),%eax             # [1] a
>  xorl %edx,%edx                 # [2]
> .p2align 5,,31
> .Lt_0_3586:
>  #<loop> Loop body line 8, nesting depth: 1, estimated iterations: 1000
> .loc 1 11 0
>  #   9    {
>  #  10      x = x+d;
>  #  11      *a++=x;
> addl $1,%edx                   # [0]
>  .loc 1 10 0
>  addl %ebp,%ebx                 # [0]
> .loc 1 11 0
>  addl $4,%eax                   # [0]
> .loc 1 10 0
>  addl $44,%ebx                 # [1]
> .loc 1 11 0
>  cmpl %edi,%edx                 # [1]
> movl %ebx,-4(%eax)             # [2] id:17
>  jl .Lt_0_3586                 # [2]
> .Lt_0_4098:
> .loc 1 13 0
>  #  12    }
>  #  13    return x;
> movl %ebx,%eax                 # [0]
>  addl $16,%esp                 # [0]
> popl %edi                     # [1]
>  popl %ebx                     # [4]
> popl %ebp                     # [7]
>  ret                           # [7]
> .p2align 5,,31
> .Lt_0_2818:
> .loc 1 11 0
>  movl 32(%esp),%eax             # [0] x
> .loc 1 13 0
>  addl $16,%esp                 # [0]
> popl %edi                     # [1]
>  popl %ebx                     # [4]
> popl %ebp                     # [7]
>  ret                           # [7]
> .LDWend_foo:
> .size foo, .LDWend_foo-foo
>  .section .text
> .align 4
>
> .section .eh_frame, "a",@progbits
> .LEHCIE:
> .4byte .LEHCIE_end - .LEHCIE_begin
> .LEHCIE_begin:
> .4byte 0x0
> .byte 0x01, 0x00, 0x01, 0x7c, 0x08, 0x0c, 0x04, 0x04
>  .byte 0x88, 0x01
> .align 4
> .LEHCIE_end:
>
> .section .debug_line, ""
> .section .note.GNU-stack,"",@progbits
>  .ident "#Open64 Compiler Version 5.0 : x.c compiled with : -O3
> -OPT:unroll_times_max=1 -march=barcelona -msse2 -msse3 -mno-3dnow
> -mno-sse4a -mno-ssse3 -mno-sse41 -mno-sse42 -mno-aes -mno-pclmul -mno-avx
> -mno-xop -mno-fma -mno-fma4 -m32"
>
>
>
> .text
> .align 2
>  .section .text
> .p2align 5,,
>
> # Program Unit: foo
> .globl foo
> .type foo, @function
> foo: # 0x0
> # .frame %esp, 16, %esp
>  # _temp_gra_spill0 = 0
> .loc 1 2 0
>  #   1  int foo(int x, int b, int *__restrict a)
>  #   2  {
> .LBB1_foo:
> pushl %ebp                     # [0]
>  pushl %ebx                     # [3]
> pushl %edi                     # [6]
>  addl $-16,%esp                 # [9]
> movl 36(%esp),%edi             # [10] b
>  leal -1(%edi),%eax             # [13]
> testl %eax,%eax               # [14]
>  jl .Lt_0_2818                 # [15]
> .LBB2_foo:
> movl %edi,%ebp                 # [0]
>  .loc 1 8 0
>  #   4    int c,d ;
>  #   5    c = b*60;
>  #   6    d = c+44;
>  #   7
>  #   8    for (i = 0; i< b; i++)
> movl %edi,%ecx                 # [0]
>  movl 32(%esp),%ebx             # [0] x
> movl %ecx,0(%esp)             # [1] _temp_gra_spill0
>  imull $60,%ebp                 # [1]
> movl 40(%esp),%eax             # [1] a
>  xorl %edx,%edx                 # [2]
> .p2align 5,,31
> .Lt_0_3586:
>  #<loop> Loop body line 8, nesting depth: 1, estimated iterations: 1000
> .loc 1 11 0
>  #   9    {
>  #  10      x = x+d;
>  #  11      *a++=x;
> addl $1,%edx                   # [0]
>  .loc 1 10 0
>  addl %ebp,%ebx                 # [0]
> .loc 1 11 0
>  addl $4,%eax                   # [0]
> .loc 1 10 0
>  addl $44,%ebx                 # [1]
> .loc 1 11 0
>  cmpl %edi,%edx                 # [1]
> movl %ebx,-4(%eax)             # [2] id:17
>  jl .Lt_0_3586                 # [2]
> .Lt_0_4098:
> .loc 1 13 0
>  #  12    }
>  #  13    return x;
> movl %ebx,%eax                 # [0]
>  addl $16,%esp                 # [0]
> popl %edi                     # [1]
>  popl %ebx                     # [4]
> popl %ebp                     # [7]
>  ret                           # [7]
> .p2align 5,,31
> .Lt_0_2818:
> .loc 1 11 0
>  movl 32(%esp),%eax             # [0] x
> .loc 1 13 0
>  addl $16,%esp                 # [0]
> popl %edi                     # [1]
>  popl %ebx                     # [4]
> popl %ebp                     # [7]
>  ret                           # [7]
> .LDWend_foo:
> .size foo, .LDWend_foo-foo
>  .section .text
> .align 4
>
> .section .eh_frame, "a",@progbits
> .LEHCIE:
> .4byte .LEHCIE_end - .LEHCIE_begin
> .LEHCIE_begin:
> .4byte 0x0
> .byte 0x01, 0x00, 0x01, 0x7c, 0x08, 0x0c, 0x04, 0x04
>  .byte 0x88, 0x01
> .align 4
> .LEHCIE_end:
>
> .section .debug_line, ""
> .section .note.GNU-stack,"",@progbits
>  .ident "#Open64 Compiler Version 5.0 : x.c compiled with : -O3
> -OPT:unroll_times_max=1 -march=barcelona -msse2 -msse3 -mno-3dnow
> -mno-sse4a -mno-ssse3 -mno-sse41 -mno-sse42 -mno-aes -mno-pclmul -mno-avx
> -mno-xop -mno-fma -mno-fma4 -m32"
>
>
>
> ------------------------------------------------------------------------------
> Live Security Virtual Conference
> Exclusive live event will cover all the ways today's security and
> threat landscape has changed and how IT managers can respond. Discussions
> will include endpoint security, mobile security and the latest in malware
> threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
> _______________________________________________
> Open64-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/open64-devel
>
>

------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/

_______________________________________________
Open64-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/open64-devel

Re: [Open64-devel] sub-optimal strength reduction bug?

Reply via email to