Hello,

Apologies in advance if these are silly questions or contain obvious 
mistakes, I'm a Julia newbie.

I wrote the following julia method and was curious about what it translated 
to in llvm/assembly:
function f{T}(x::T)
    result::T = zero(T)
    for n = one(T):x
        result += x
    end
    result
end

I used a parametrized type to be careful about creating Unions out of thin 
air etc.

The generated native code is:
julia> code_native(f, (Int,))
Source line: 4
push RBP
mov RBP, RSP
xor EAX, EAX
test RDI, RDI
jle 20
mov ECX, 1
Source line: 4
add RAX, RDI
inc RCX
cmp RCX, RDI
Source line: 3
jle -15
Source line: 6
pop RBP
ret


Which isn't horrible but is clearly suboptimal. g++ and clang are both able to 
recognize that the method is actually x^2 and generate:

        xorl    %eax, %eax

        testl   %edi, %edi
        jle     LBB0_2
## BB#1:                                ## %.lr.ph
        imull   %edi, %edi
        movl    %edi, %eax
LBB0_2:
        popq    %rbp
        ret


So my question is - since Julia and clang both use LLVM internally, how come 
they're coming to different answers?

Is the C/C++ to LLVM code generation superior to the Julia to LLVM code gen?


Beyond that, another issue i had was a bunch of weird code that would get 
generated if instead of saying for n = one(T):x, i said for n = 1:x:

julia> code_native(f, (Uint,))
        .section        __TEXT,__text,regular,pure_instructions
Filename: none
Source line: 4
        push    RBP
        mov     RBP, RSP
        xor     EAX, EAX
        test    RDI, RDI    
        je      33
        mov     ECX, 2      # n = 2
        mov     RDX, RCX    # rdx = n # XXX
Source line: 4
        add     RAX, RDI    # result += x
Source line: 3
        lea     RCX, QWORD PTR [RDX + 1] # ++n
        cmp     RDX, RDI                 # if rdx <= x
        jbe     -19                      #     goto XXX above (i think?)
        test    RDX, RDX                 # if rdx <= 0 (i think?)

        jle     -28
Source line: 6
        pop     RBP
        ret


I'm not really sure why the generated code is keeping an extra copy of 'n' 
around. I think it's somehow worried about the sign of n?


Another final question - is there an easy way to map the offsets of the 
relative jumps to which instruction they map to?

What would be useful is addresses (fake or otherwise) on the left hand side 
which would indicate the size of each asm instruction.


Btw this is on julia 0.2.1


If I try the same thing on julia version 0.3.0-prerelease+2262 i get crazy 
results so I think I must be doing something wrong:

julia> code_typed(f, (Int,))
1-element Array{Any,1}:
 :($(Expr(:lambda, {:x}, 
{{:result,:#s38,:#s37,:#s36,:n,:_var0,:_var1},{{:x,Int64,0},{:result,Int64,2},{:#s38,Range1{Int64},18},{:#s37,Int64,2},{:#s36,(Int64,Int64),18},{:n,Int64,18},{:_var0,Int64,18},{:_var1,Int64,18}},{}},
 :(begin  # none, line 2:
        result = top(typeassert)(0,T)::Int64 # line 3:
        #s38 = $(Expr(:new, Range1{Int64}, 1, 
:(top(getfield)(Intrinsics,:select_value)(top(slt_int)(x::Int64,1)::Bool,0,top(box)(Int64,top(checked_sadd)(top(box)(Int64,top(checked_ssub)(x::Int64,1))::Int64,1))::Int64)::Int64)))::Range1{Int64}
        #s37 = top(getfield)(#s38::Range1{Int64},:start)::Int64
        unless top(box)(Bool,top(not_int)(#s37::Int64 === 
top(box)(Int64,top(add_int)(top(getfield)(#s38::Range1{Int64},:start)::Int64,top(getfield)(#s38::Range1{Int64},:len)::Int64))::Int64::Bool))::Bool
 goto 1
        2: 
        _var0 = #s37::Int64
        _var1 = top(box)(Int64,top(add_int)(#s37::Int64,1))::Int64
        n = _var0::Int64
        #s37 = _var1::Int64 # line 4:
        result = 
top(typeassert)(top(box)(Int64,top(add_int)(result::Int64,x::Int64))::Int64,T)::Int64
        3: 
        unless 
top(box)(Bool,top(not_int)(top(box)(Bool,top(not_int)(#s37::Int64 === 
top(box)(Int64,top(add_int)(top(getfield)(#s38::Range1{Int64},:start)::Int64,top(getfield)(#s38::Range1{Int64},:len)::Int64))::Int64::Bool))::Bool))::Bool
 goto 2
        1: 
        0:  # line 6:
        return result::Int64
    end::Int64))))

julia> code_llvm(f, (Int,))

define i64 @julia_f15840(i64) {
top:
  %1 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %0, i64 1), !dbg !1363
  %2 = extractvalue { i64, i1 } %1, 1, !dbg !1363
  br i1 %2, label %fail, label %pass, !dbg !1363

fail:                                             ; preds = %top
  %3 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !1363
  call void @jl_throw_with_superfluous_argument(%jl_value_t* %3, i32 3), !dbg 
!1363
  unreachable, !dbg !1363

pass:                                             ; preds = %top
  %4 = extractvalue { i64, i1 } %1, 0, !dbg !1363
  %5 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %4, i64 1), !dbg !1363
  %6 = extractvalue { i64, i1 } %5, 1, !dbg !1363
  br i1 %6, label %fail1, label %pass2, !dbg !1363

fail1:                                            ; preds = %pass
  %7 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !1363
  call void @jl_throw_with_superfluous_argument(%jl_value_t* %7, i32 3), !dbg 
!1363
  unreachable, !dbg !1363

pass2:                                            ; preds = %pass
  %8 = load i64* inttoptr (i64 140237317881928 to i64*), align 8, !dbg !1364
  %9 = icmp sgt i64 %0, 0, !dbg !1363
  %10 = extractvalue { i64, i1 } %5, 0, !dbg !1363
  %11 = select i1 %9, i64 %10, i64 0, !dbg !1363
  %12 = icmp eq i64 %11, 0, !dbg !1363
  br i1 %12, label %L5, label %L.preheader, !dbg !1363

L.preheader:                                      ; preds = %pass2
  %13 = mul i64 %11, %0, !dbg !1363
  br label %L, !dbg !1363

L:                                                ; preds = %L.preheader, %L
  %"#s37.0" = phi i64 [ %14, %L ], [ 1, %L.preheader ]
  %14 = add i64 %"#s37.0", 1, !dbg !1363
  %15 = icmp eq i64 %"#s37.0", %11, !dbg !1365
  br i1 %15, label %L5.loopexit, label %L, !dbg !1365

L5.loopexit:                                      ; preds = %L
  %16 = add i64 %8, %13, !dbg !1363
  br label %L5

L5:                                               ; preds = %L5.loopexit, %pass2
  %result.1 = phi i64 [ %8, %pass2 ], [ %16, %L5.loopexit ]
  ret i64 %result.1, !dbg !1366
}


Thanks a lot!

-rr-

Reply via email to