------- Comment #14 from ubizjak at gmail dot com  2009-06-25 08:25 -------
(In reply to comment #13)
> Predictive commoning does exactly what you want.

It is not effective for the testcase in Comment #9. The dumps for innermost
loop are the same for -O2 -funroll-loops [-fpredictive-commoning]:

.L6:
        movss   (%rsi), %xmm9
        addl    $4, %r8d
        mulss   (%rcx), %xmm9
        movss   (%rdx), %xmm8
        movss   4(%rdx), %xmm6
        movss   8(%rdx), %xmm4
        movss   12(%rdx), %xmm2
        subss   %xmm9, %xmm8
        mulss   0(%rbp), %xmm8
        movss   %xmm8, (%rdx)
        movss   4(%rsi), %xmm7
        mulss   4(%rcx), %xmm7
        subss   %xmm7, %xmm6
        mulss   4(%rbp), %xmm6
        movss   %xmm6, 4(%rdx)
        movss   8(%rsi), %xmm5
        mulss   8(%rcx), %xmm5
        subss   %xmm5, %xmm4
        mulss   8(%rbp), %xmm4
        movss   %xmm4, 8(%rdx)
        movss   12(%rsi), %xmm3
        addq    $16, %rsi
        mulss   12(%rcx), %xmm3
        addq    $16, %rcx
        subss   %xmm3, %xmm2
        mulss   12(%rbp), %xmm2
        addq    $16, %rbp
        movss   %xmm2, 12(%rdx)
        addq    $16, %rdx
        cmpl    %r9d, %r8d
        jne     .L6


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34163

Reply via email to