[Open64-devel] sub-optimal strength reduction bug?

Yiran Wang Thu, 12 Jul 2012 15:29:31 -0700

Hi All,

It looks like strength reduction is not optimal for the following example?


7 instructions per iteration is used, but 4 (or 5 without LFTR) are
necessary.

Best Regards,
Yiran Wang

bash-4.0$ cat x.c
int foo(int x, int b, int *__restrict a)
{
  int i;
  int c,d ;
  c = b*60;
  d = c+44;

  for (i = 0; i< b; i++)
  {
    x = x+d;
    *a++=x;
  }
  return x;
}
bash-4.0$ /opt/open64tr/bin/opencc -c -O3 -keep x.c
-Wb,-trlow,-tt25:0xffffffff -OPT:unroll_times_max=1 -march=barcelona
bash-4.0$ cat x.s
#  /opt/open64tr/lib/gcc-lib/x86_64-open64-linux/5.0/be::5.0

#-----------------------------------------------------------
# Compiling x.c (x.I)
#-----------------------------------------------------------

#-----------------------------------------------------------
# Options:
#-----------------------------------------------------------
#  Target:Barcelona, ISA:ISA_1, Endian:little, Pointer Size:32
#  -O3 (Optimization level)
#  -g0 (Debug level)
#  -m2 (Report advisories)
#-----------------------------------------------------------

int foo(int x, int b, int *__restrict a)
{
  int i;
  int c,d ;
  c = b*60;
  d = c+44;

  for (i = 0; i< b; i++)
  {
    x = x+d;
    *a++=x;
  }
  return x;
}
bash-4.0$ /opt/open64tr/bin/opencc -c -O3 -keep x.c
-Wb,-trlow,-tt25:0xffffffff -OPT:unroll_times_max=1 -march=barcelona
bash-4.0$ cat x.s
#  /opt/open64tr/lib/gcc-lib/x86_64-open64-linux/5.0/be::5.0

#-----------------------------------------------------------
# Compiling x.c (x.I)
#-----------------------------------------------------------

#-----------------------------------------------------------
# Options:
#-----------------------------------------------------------
#  Target:Barcelona, ISA:ISA_1, Endian:little, Pointer Size:32
#  -O3 (Optimization level)
#  -g0 (Debug level)
#  -m2 (Report advisories)
#-----------------------------------------------------------

.text
.align 2
.section .text
.p2align 5,,

# Program Unit: foo
.globl foo
.type foo, @function
foo: # 0x0
# .frame %esp, 16, %esp
# _temp_gra_spill0 = 0
.loc 1 2 0
 #   1  int foo(int x, int b, int *__restrict a)
 #   2  {
.LBB1_foo:
pushl %ebp                     # [0]
pushl %ebx                     # [3]
pushl %edi                     # [6]
addl $-16,%esp                 # [9]
movl 36(%esp),%edi             # [10] b
leal -1(%edi),%eax             # [13]
testl %eax,%eax               # [14]
jl .Lt_0_2818                 # [15]
.LBB2_foo:
movl %edi,%ebp                 # [0]
.loc 1 8 0
 #   4    int c,d ;
 #   5    c = b*60;
 #   6    d = c+44;
 #   7
 #   8    for (i = 0; i< b; i++)
movl %edi,%ecx                 # [0]
movl 32(%esp),%ebx             # [0] x
movl %ecx,0(%esp)             # [1] _temp_gra_spill0
imull $60,%ebp                 # [1]
movl 40(%esp),%eax             # [1] a
xorl %edx,%edx                 # [2]
.p2align 5,,31
.Lt_0_3586:
 #<loop> Loop body line 8, nesting depth: 1, estimated iterations: 1000
.loc 1 11 0
 #   9    {
 #  10      x = x+d;
 #  11      *a++=x;
addl $1,%edx                   # [0]
.loc 1 10 0
addl %ebp,%ebx                 # [0]
.loc 1 11 0
addl $4,%eax                   # [0]
.loc 1 10 0
addl $44,%ebx                 # [1]
.loc 1 11 0
cmpl %edi,%edx                 # [1]
movl %ebx,-4(%eax)             # [2] id:17
jl .Lt_0_3586                 # [2]
.Lt_0_4098:
.loc 1 13 0
 #  12    }
 #  13    return x;
movl %ebx,%eax                 # [0]
addl $16,%esp                 # [0]
popl %edi                     # [1]
popl %ebx                     # [4]
popl %ebp                     # [7]
ret                           # [7]
.p2align 5,,31
.Lt_0_2818:
.loc 1 11 0
movl 32(%esp),%eax             # [0] x
.loc 1 13 0
addl $16,%esp                 # [0]
popl %edi                     # [1]
popl %ebx                     # [4]
popl %ebp                     # [7]
ret                           # [7]
.LDWend_foo:
.size foo, .LDWend_foo-foo
.section .text
.align 4

.section .eh_frame, "a",@progbits
.LEHCIE:
.4byte .LEHCIE_end - .LEHCIE_begin
.LEHCIE_begin:
.4byte 0x0
.byte 0x01, 0x00, 0x01, 0x7c, 0x08, 0x0c, 0x04, 0x04
.byte 0x88, 0x01
.align 4
.LEHCIE_end:

.section .debug_line, ""
.section .note.GNU-stack,"",@progbits
.ident "#Open64 Compiler Version 5.0 : x.c compiled with : -O3
-OPT:unroll_times_max=1 -march=barcelona -msse2 -msse3 -mno-3dnow
-mno-sse4a -mno-ssse3 -mno-sse41 -mno-sse42 -mno-aes -mno-pclmul -mno-avx
-mno-xop -mno-fma -mno-fma4 -m32"



.text
.align 2
.section .text
.p2align 5,,

# Program Unit: foo
.globl foo
.type foo, @function
foo: # 0x0
# .frame %esp, 16, %esp
# _temp_gra_spill0 = 0
.loc 1 2 0
 #   1  int foo(int x, int b, int *__restrict a)
 #   2  {
.LBB1_foo:
pushl %ebp                     # [0]
pushl %ebx                     # [3]
pushl %edi                     # [6]
addl $-16,%esp                 # [9]
movl 36(%esp),%edi             # [10] b
leal -1(%edi),%eax             # [13]
testl %eax,%eax               # [14]
jl .Lt_0_2818                 # [15]
.LBB2_foo:
movl %edi,%ebp                 # [0]
.loc 1 8 0
 #   4    int c,d ;
 #   5    c = b*60;
 #   6    d = c+44;
 #   7
 #   8    for (i = 0; i< b; i++)
movl %edi,%ecx                 # [0]
movl 32(%esp),%ebx             # [0] x
movl %ecx,0(%esp)             # [1] _temp_gra_spill0
imull $60,%ebp                 # [1]
movl 40(%esp),%eax             # [1] a
xorl %edx,%edx                 # [2]
.p2align 5,,31
.Lt_0_3586:
 #<loop> Loop body line 8, nesting depth: 1, estimated iterations: 1000
.loc 1 11 0
 #   9    {
 #  10      x = x+d;
 #  11      *a++=x;
addl $1,%edx                   # [0]
.loc 1 10 0
addl %ebp,%ebx                 # [0]
.loc 1 11 0
addl $4,%eax                   # [0]
.loc 1 10 0
addl $44,%ebx                 # [1]
.loc 1 11 0
cmpl %edi,%edx                 # [1]
movl %ebx,-4(%eax)             # [2] id:17
jl .Lt_0_3586                 # [2]
.Lt_0_4098:
.loc 1 13 0
 #  12    }
 #  13    return x;
movl %ebx,%eax                 # [0]
addl $16,%esp                 # [0]
popl %edi                     # [1]
popl %ebx                     # [4]
popl %ebp                     # [7]
ret                           # [7]
.p2align 5,,31
.Lt_0_2818:
.loc 1 11 0
movl 32(%esp),%eax             # [0] x
.loc 1 13 0
addl $16,%esp                 # [0]
popl %edi                     # [1]
popl %ebx                     # [4]
popl %ebp                     # [7]
ret                           # [7]
.LDWend_foo:
.size foo, .LDWend_foo-foo
.section .text
.align 4

.section .eh_frame, "a",@progbits
.LEHCIE:
.4byte .LEHCIE_end - .LEHCIE_begin
.LEHCIE_begin:
.4byte 0x0
.byte 0x01, 0x00, 0x01, 0x7c, 0x08, 0x0c, 0x04, 0x04
.byte 0x88, 0x01
.align 4
.LEHCIE_end:

.section .debug_line, ""
.section .note.GNU-stack,"",@progbits
.ident "#Open64 Compiler Version 5.0 : x.c compiled with : -O3
-OPT:unroll_times_max=1 -march=barcelona -msse2 -msse3 -mno-3dnow
-mno-sse4a -mno-ssse3 -mno-sse41 -mno-sse42 -mno-aes -mno-pclmul -mno-avx
-mno-xop -mno-fma -mno-fma4 -m32"

------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/

_______________________________________________
Open64-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/open64-devel

[Open64-devel] sub-optimal strength reduction bug?

Reply via email to