does no-one know why 3.0 generates more complex code?

the above post didn't contain the native code, but here it is for 3.0:

julia> function f{T}(x::T)
           result::T = zero(T)
           for n = one(T):x
               result += x
           end
           result
       end
f (generic function with 1 method)                                         
     
                                                                            
    
julia> code_native(f, (Int,))
        .text                                                               
    
Filename: none                                                             
     
Source line: 3                                                             
     
        push    RBP                                                         
    
        mov     RBP, RSP                                                   
     
        push    R15                                                         
    
        push    R14                                                         
    
        push    R12                                                         
    
        push    RBX                                                         
    
        mov     R15, RDI                                                   
     
        xor     EBX, EBX                                                   
     
        test    R15, R15                                                   
     
Source line: 3                                                             
     
        cmovns  RBX, R15                                                   
     
Source line: 2                                                             
     
        mov     R14, QWORD PTR [12771720]                                   
    
        mov     R12D, 1                                                     
    
Source line: 3                                                             
     
        movabs  RAX, 140272257151888                                       
     
        mov     EDI, 1                                                     
     
        mov     RSI, RBX                                                   
     
        call    RAX                                                         
    
        inc     RBX                                                         
    
        cmp     RAX, RBX                                                   
     
        je      83                                                         
     
        lea     RCX, QWORD PTR [R15 + 1]                                   
     
        test    R15, R15                                                   
     
        cmovg   R12, RCX                                                   
     
        mov     RCX, R12                                                   
     
        sub     RCX, RAX                                                   
     
        mov     RDX, RCX                                                   
     
        and     RDX, -4                                                     
    
        je      13                                                         
     
        add     RAX, RDX                                                   
     
        add     RDX, -4                                                     
    
        jne     -10                                                         
    
        cmp     R12, RAX                                                   
     
        je      24                                                         
     
        xor     EDX, EDX                                                   
     
        test    R15, R15                                                   
     
        cmovns  RDX, R15
        inc     RDX
        sub     RDX, RAX
Source line: 4
        dec     RDX
        jne     -9
Source line: 3
        imul    RCX, R15
        add     R14, RCX
Source line: 6
        mov     RAX, R14
        pop     RBX
        pop     R12
        pop     R14
        pop     R15
        pop     RBP
        ret




On Monday, 31 March 2014 23:34:59 UTC-3, Rak Rok wrote:
>
> Hello,
>
> Apologies in advance if these are silly questions or contain obvious 
> mistakes, I'm a Julia newbie.
>
> I wrote the following julia method and was curious about what it 
> translated to in llvm/assembly:
> function f{T}(x::T)
>     result::T = zero(T)
>     for n = one(T):x
>         result += x
>     end
>     result
> end
>
> I used a parametrized type to be careful about creating Unions out of thin 
> air etc.
>
> The generated native code is:
> julia> code_native(f, (Int,))
> Source line: 4
> push RBP
> mov RBP, RSP
> xor EAX, EAX
> test RDI, RDI
> jle 20
> mov ECX, 1
> Source line: 4
> add RAX, RDI
> inc RCX
> cmp RCX, RDI
> Source line: 3
> jle -15
> Source line: 6
> pop RBP
> ret
>
>
> Which isn't horrible but is clearly suboptimal. g++ and clang are both able 
> to recognize that the method is actually x^2 and generate:
>
>         xorl  %eax, %eax
>
>       testl   %edi, %edi
>       jle     LBB0_2
> ## BB#1:                                ## %.lr.ph
>       imull   %edi, %edi
>       movl    %edi, %eax
> LBB0_2:
>       popq    %rbp
>       ret
>
>
> So my question is - since Julia and clang both use LLVM internally, how come 
> they're coming to different answers?
>
> Is the C/C++ to LLVM code generation superior to the Julia to LLVM code gen?
>
>
> Beyond that, another issue i had was a bunch of weird code that would get 
> generated if instead of saying for n = one(T):x, i said for n = 1:x:
>
> julia> code_native(f, (Uint,))
>       .section        __TEXT,__text,regular,pure_instructions
> Filename: none
> Source line: 4
>       push    RBP
>       mov     RBP, RSP
>       xor     EAX, EAX
>       test    RDI, RDI    
>       je      33
>       mov     ECX, 2      # n = 2
>       mov     RDX, RCX    # rdx = n # XXX
> Source line: 4
>       add     RAX, RDI    # result += x
> Source line: 3
>       lea     RCX, QWORD PTR [RDX + 1] # ++n
>       cmp     RDX, RDI                 # if rdx <= x
>       jbe     -19                      #     goto XXX above (i think?)
>       test    RDX, RDX                 # if rdx <= 0 (i think?)
>
>       jle     -28
> Source line: 6
>       pop     RBP
>       ret
>
>
> I'm not really sure why the generated code is keeping an extra copy of 'n' 
> around. I think it's somehow worried about the sign of n?
>
>
> Another final question - is there an easy way to map the offsets of the 
> relative jumps to which instruction they map to?
>
> What would be useful is addresses (fake or otherwise) on the left hand side 
> which would indicate the size of each asm instruction.
>
>
> Btw this is on julia 0.2.1
>
>
> If I try the same thing on julia version 0.3.0-prerelease+2262 i get crazy 
> results so I think I must be doing something wrong:
>
> julia> code_typed(f, (Int,))
> 1-element Array{Any,1}:
>  :($(Expr(:lambda, {:x}, 
> {{:result,:#s38,:#s37,:#s36,:n,:_var0,:_var1},{{:x,Int64,0},{:result,Int64,2},{:#s38,Range1{Int64},18},{:#s37,Int64,2},{:#s36,(Int64,Int64),18},{:n,Int64,18},{:_var0,Int64,18},{:_var1,Int64,18}},{}},
>  :(begin  # none, line 2:
>         result = top(typeassert)(0,T)::Int64 # line 3:
>         #s38 = $(Expr(:new, Range1{Int64}, 1, 
> :(top(getfield)(Intrinsics,:select_value)(top(slt_int)(x::Int64,1)::Bool,0,top(box)(Int64,top(checked_sadd)(top(box)(Int64,top(checked_ssub)(x::Int64,1))::Int64,1))::Int64)::Int64)))::Range1{Int64}
>         #s37 = top(getfield)(#s38::Range1{Int64},:start)::Int64
>         unless top(box)(Bool,top(not_int)(#s37::Int64 === 
> top(box)(Int64,top(add_int)(top(getfield)(#s38::Range1{Int64},:start)::Int64,top(getfield)(#s38::Range1{Int64},:len)::Int64))::Int64::Bool))::Bool
>  goto 1
>         2: 
>         _var0 = #s37::Int64
>         _var1 = top(box)(Int64,top(add_int)(#s37::Int64,1))::Int64
>         n = _var0::Int64
>         #s37 = _var1::Int64 # line 4:
>         result = 
> top(typeassert)(top(box)(Int64,top(add_int)(result::Int64,x::Int64))::Int64,T)::Int64
>         3: 
>         unless 
> top(box)(Bool,top(not_int)(top(box)(Bool,top(not_int)(#s37::Int64 === 
> top(box)(Int64,top(add_int)(top(getfield)(#s38::Range1{Int64},:start)::Int64,top(getfield)(#s38::Range1{Int64},:len)::Int64))::Int64::Bool))::Bool))::Bool
>  goto 2
>         1: 
>         0:  # line 6:
>         return result::Int64
>     end::Int64))))
>
> julia> code_llvm(f, (Int,))
>
> define i64 @julia_f15840(i64) {
> top:
>   %1 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %0, i64 1), !dbg 
> !1363
>   %2 = extractvalue { i64, i1 } %1, 1, !dbg !1363
>   br i1 %2, label %fail, label %pass, !dbg !1363
>
> fail:                                             ; preds = %top
>   %3 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !1363
>   call void @jl_throw_with_superfluous_argument(%jl_value_t* %3, i32 3), !dbg 
> !1363
>   unreachable, !dbg !1363
>
> pass:                                             ; preds = %top
>   %4 = extractvalue { i64, i1 } %1, 0, !dbg !1363
>   %5 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %4, i64 1), !dbg 
> !1363
>   %6 = extractvalue { i64, i1 } %5, 1, !dbg !1363
>   br i1 %6, label %fail1, label %pass2, !dbg !1363
>
> fail1:                                            ; preds = %pass
>   %7 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !1363
>   call void @jl_throw_with_superfluous_argument(%jl_value_t* %7, i32 3), !dbg 
> !1363
>   unreachable, !dbg !1363
>
> pass2:                                            ; preds = %pass
>   %8 = load i64* inttoptr (i64 140237317881928 to i64*), align 8, !dbg !1364
>   %9 = icmp sgt i64 %0, 0, !dbg !1363
>   %10 = extractvalue { i64, i1 } %5, 0, !dbg !1363
>   %11 = select i1 %9, i64 %10, i64 0, !dbg !1363
>   %12 = icmp eq i64 %11, 0, !dbg !1363
>   br i1 %12, label %L5, label %L.preheader, !dbg !1363
>
> L.preheader:                                      ; preds = %pass2
>   %13 = mul i64 %11, %0, !dbg !1363
>   br label %L, !dbg !1363
>
> L:                                                ; preds = %L.preheader, %L
>   %"#s37.0" = phi i64 [ %14, %L ], [ 1, %L.preheader ]
>   %14 = add i64 %"#s37.0", 1, !dbg !1363
>   %15 = icmp eq i64 %"#s37.0", %11, !dbg !1365
>   br i1 %15, label %L5.loopexit, label %L, !dbg !1365
>
> L5.loopexit:                                      ; preds = %L
>   %16 = add i64 %8, %13, !dbg !1363
>   br label %L5
>
> L5:                                               ; preds = %L5.loopexit, 
> %pass2
>   %result.1 = phi i64 [ %8, %pass2 ], [ %16, %L5.loopexit ]
>   ret i64 %result.1, !dbg !1366
> }
>
>
> Thanks a lot!
>
> -rr-
>
>

Reply via email to