------- Comment #9 from dominiq at lps dot ens dot fr  2009-02-01 10:58 -------
> Did you try enabling SSE3 btw?

No. How do I get the enabled SSE* by default?

> Can you post the ifort assembly of the loop?

L_B1.14:                        # Preds L_B1.14 L_B1.13
        lea       (%rsi,%r9,8), %r11                            #
        lea       mymatmul_$A.0.1(%rip), %r10                   #27.33
        movaps    (%r10,%r11), %xmm2                            #27.33
        movaps    16(%r10,%r11), %xmm4                          #27.33
        movaps    %xmm0, %xmm3                                  #27.40
        mulps     %xmm2, %xmm3                                  #27.40
        shufps    $177, %xmm2, %xmm2                            #27.40
        lea       (%rdx,%r9,8), %r15                            #
        lea       mymatmul_$C.0.1(%rip), %r14                   #27.24
        movaps    %xmm0, %xmm5                                  #27.40
        addq      $4, %r9                                       #26.12
        mulps     %xmm1, %xmm2                                  #27.40
        cmpq      $128, %r9                                     #26.12
        addsubps  %xmm2, %xmm3                                  #27.40
        addps     (%r14,%r15), %xmm3                            #27.15
        movaps    %xmm3, (%r14,%r15)                            #27.15
        mulps     %xmm4, %xmm5                                  #27.40
        shufps    $177, %xmm4, %xmm4                            #27.40
        mulps     %xmm1, %xmm4                                  #27.40
        addsubps  %xmm4, %xmm5                                  #27.40
        addps     16(%r14,%r15), %xmm5                          #27.15
        movaps    %xmm5, 16(%r14,%r15)                          #27.15
        jl        L_B1.14       # Prob 99%                      #26.12


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38968

Reply via email to