Re: [Open64-devel] sub-optimal strength reduction bug?

shuxin yang Thu, 12 Jul 2012 15:43:19 -0700

hi buddy, this loop can be vectorized:

    v_init = <x, x+d, x+2d, x+3d>
    v_inc = <4d, 4d, 4d, 4d>


    the statement can be vect into:
    a[0:3] = v_init;
    v_init += v_inc;


On 07/12/2012 03:28 PM, Yiran Wang wrote:

Hi All,

It looks like strength reduction is not optimal for the following example?

7 instructions per iteration is used, but 4 (or 5 without LFTR) arenecessary.


Best Regards,
Yiran Wang

bash-4.0$ cat x.c
int foo(int x, int b, int *__restrict a)
{
  int i;
  int c,d ;
  c = b*60;
  d = c+44;
  for (i = 0; i< b; i++)
  {
    x = x+d;
    *a++=x;
  }
  return x;
}

bash-4.0$ /opt/open64tr/bin/opencc -c -O3 -keep x.c-Wb,-trlow,-tt25:0xffffffff -OPT:unroll_times_max=1 -march=barcelona

bash-4.0$ cat x.s
#  /opt/open64tr/lib/gcc-lib/x86_64-open64-linux/5.0/be::5.0

#-----------------------------------------------------------
# Compiling x.c (x.I)
#-----------------------------------------------------------

#-----------------------------------------------------------
# Options:
#-----------------------------------------------------------
#  Target:Barcelona, ISA:ISA_1, Endian:little, Pointer Size:32
#  -O3(Optimization level)
#  -g0(Debug level)
#  -m2(Report advisories)
#-----------------------------------------------------------

int foo(int x, int b, int *__restrict a)
{
  int i;
  int c,d ;
  c = b*60;
  d = c+44;
  for (i = 0; i< b; i++)
  {
    x = x+d;
    *a++=x;
  }
  return x;
}

bash-4.0$ /opt/open64tr/bin/opencc -c -O3 -keep x.c-Wb,-trlow,-tt25:0xffffffff -OPT:unroll_times_max=1 -march=barcelona

bash-4.0$ cat x.s
#  /opt/open64tr/lib/gcc-lib/x86_64-open64-linux/5.0/be::5.0

#-----------------------------------------------------------
# Compiling x.c (x.I)
#-----------------------------------------------------------

#-----------------------------------------------------------
# Options:
#-----------------------------------------------------------
#  Target:Barcelona, ISA:ISA_1, Endian:little, Pointer Size:32
#  -O3(Optimization level)
#  -g0(Debug level)
#  -m2(Report advisories)
#-----------------------------------------------------------

.text
.align2
.section .text
.p2align 5,,

# Program Unit: foo
.globlfoo
.typefoo, @function
foo:# 0x0
# .frame%esp, 16, %esp
# _temp_gra_spill0 = 0
.loc120
 #   1  int foo(int x, int b, int *__restrict a)
 #   2  {
.LBB1_foo:
pushl %ebp # [0]
pushl %ebx # [3]
pushl %edi # [6]
addl $-16,%esp # [9]
movl 36(%esp),%edi # [10] b
leal -1(%edi),%eax # [13]
testl %eax,%eax # [14]
jl .Lt_0_2818 # [15]
.LBB2_foo:
movl %edi,%ebp # [0]
.loc180
 #   4    int c,d ;
 #   5    c = b*60;
 #   6    d = c+44;
 #   7
 #   8    for (i = 0; i< b; i++)
movl %edi,%ecx # [0]
movl 32(%esp),%ebx # [0] x
movl %ecx,0(%esp) # [1] _temp_gra_spill0
imull $60,%ebp # [1]
movl 40(%esp),%eax # [1] a
xorl %edx,%edx # [2]
.p2align 5,,31
.Lt_0_3586:
 #<loop> Loop body line 8, nesting depth: 1, estimated iterations: 1000
.loc1110
 #   9    {
 #  10      x = x+d;
 #  11      *a++=x;
addl $1,%edx # [0]
.loc1100
addl %ebp,%ebx # [0]
.loc1110
addl $4,%eax # [0]
.loc1100
addl $44,%ebx # [1]
.loc1110
cmpl %edi,%edx # [1]
movl %ebx,-4(%eax) # [2] id:17
jl .Lt_0_3586 # [2]
.Lt_0_4098:
.loc1130
 #  12    }
 #  13    return x;
movl %ebx,%eax # [0]
addl $16,%esp # [0]
popl %edi # [1]
popl %ebx # [4]
popl %ebp # [7]
ret # [7]
.p2align 5,,31
.Lt_0_2818:
.loc1110
movl 32(%esp),%eax # [0] x
.loc1130
addl $16,%esp # [0]
popl %edi # [1]
popl %ebx # [4]
popl %ebp # [7]
ret # [7]
.LDWend_foo:
.size foo, .LDWend_foo-foo
.section .text
.align4

.section .eh_frame, "a",@progbits
.LEHCIE:
.4byte.LEHCIE_end - .LEHCIE_begin
.LEHCIE_begin:
.4byte 0x0
.byte0x01, 0x00, 0x01, 0x7c, 0x08, 0x0c, 0x04, 0x04
.byte0x88, 0x01
.align 4
.LEHCIE_end:

.section .debug_line, ""
.section.note.GNU-stack,"",@progbits

.ident"#Open64 Compiler Version 5.0 : x.c compiled with : -O3-OPT:unroll_times_max=1 -march=barcelona -msse2 -msse3 -mno-3dnow-mno-sse4a -mno-ssse3 -mno-sse41 -mno-sse42 -mno-aes -mno-pclmul-mno-avx -mno-xop -mno-fma -mno-fma4 -m32"




.text
.align2
.section .text
.p2align 5,,

# Program Unit: foo
.globlfoo
.typefoo, @function
foo:# 0x0
# .frame%esp, 16, %esp
# _temp_gra_spill0 = 0
.loc120
 #   1  int foo(int x, int b, int *__restrict a)
 #   2  {
.LBB1_foo:
pushl %ebp # [0]
pushl %ebx # [3]
pushl %edi # [6]
addl $-16,%esp # [9]
movl 36(%esp),%edi # [10] b
leal -1(%edi),%eax # [13]
testl %eax,%eax # [14]
jl .Lt_0_2818 # [15]
.LBB2_foo:
movl %edi,%ebp # [0]
.loc180
 #   4    int c,d ;
 #   5    c = b*60;
 #   6    d = c+44;
 #   7
 #   8    for (i = 0; i< b; i++)
movl %edi,%ecx # [0]
movl 32(%esp),%ebx # [0] x
movl %ecx,0(%esp) # [1] _temp_gra_spill0
imull $60,%ebp # [1]
movl 40(%esp),%eax # [1] a
xorl %edx,%edx # [2]
.p2align 5,,31
.Lt_0_3586:
 #<loop> Loop body line 8, nesting depth: 1, estimated iterations: 1000
.loc1110
 #   9    {
 #  10      x = x+d;
 #  11      *a++=x;
addl $1,%edx # [0]
.loc1100
addl %ebp,%ebx # [0]
.loc1110
addl $4,%eax # [0]
.loc1100
addl $44,%ebx # [1]
.loc1110
cmpl %edi,%edx # [1]
movl %ebx,-4(%eax) # [2] id:17
jl .Lt_0_3586 # [2]
.Lt_0_4098:
.loc1130
 #  12    }
 #  13    return x;
movl %ebx,%eax # [0]
addl $16,%esp # [0]
popl %edi # [1]
popl %ebx # [4]
popl %ebp # [7]
ret # [7]
.p2align 5,,31
.Lt_0_2818:
.loc1110
movl 32(%esp),%eax # [0] x
.loc1130
addl $16,%esp # [0]
popl %edi # [1]
popl %ebx # [4]
popl %ebp # [7]
ret # [7]
.LDWend_foo:
.size foo, .LDWend_foo-foo
.section .text
.align4

.section .eh_frame, "a",@progbits
.LEHCIE:
.4byte.LEHCIE_end - .LEHCIE_begin
.LEHCIE_begin:
.4byte 0x0
.byte0x01, 0x00, 0x01, 0x7c, 0x08, 0x0c, 0x04, 0x04
.byte0x88, 0x01
.align 4
.LEHCIE_end:

.section .debug_line, ""
.section.note.GNU-stack,"",@progbits




------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/


_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/

_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Re: [Open64-devel] sub-optimal strength reduction bug?

Reply via email to