Hello,
Apologies in advance if these are silly questions or contain obvious
mistakes, I'm a Julia newbie.
I wrote the following julia method and was curious about what it translated
to in llvm/assembly:
function f{T}(x::T)
result::T = zero(T)
for n = one(T):x
result += x
end
result
end
I used a parametrized type to be careful about creating Unions out of thin
air etc.
The generated native code is:
julia> code_native(f, (Int,))
Source line: 4
push RBP
mov RBP, RSP
xor EAX, EAX
test RDI, RDI
jle 20
mov ECX, 1
Source line: 4
add RAX, RDI
inc RCX
cmp RCX, RDI
Source line: 3
jle -15
Source line: 6
pop RBP
ret
Which isn't horrible but is clearly suboptimal. g++ and clang are both able to
recognize that the method is actually x^2 and generate:
xorl %eax, %eax
testl %edi, %edi
jle LBB0_2
## BB#1: ## %.lr.ph
imull %edi, %edi
movl %edi, %eax
LBB0_2:
popq %rbp
ret
So my question is - since Julia and clang both use LLVM internally, how come
they're coming to different answers?
Is the C/C++ to LLVM code generation superior to the Julia to LLVM code gen?
Beyond that, another issue i had was a bunch of weird code that would get
generated if instead of saying for n = one(T):x, i said for n = 1:x:
julia> code_native(f, (Uint,))
.section __TEXT,__text,regular,pure_instructions
Filename: none
Source line: 4
push RBP
mov RBP, RSP
xor EAX, EAX
test RDI, RDI
je 33
mov ECX, 2 # n = 2
mov RDX, RCX # rdx = n # XXX
Source line: 4
add RAX, RDI # result += x
Source line: 3
lea RCX, QWORD PTR [RDX + 1] # ++n
cmp RDX, RDI # if rdx <= x
jbe -19 # goto XXX above (i think?)
test RDX, RDX # if rdx <= 0 (i think?)
jle -28
Source line: 6
pop RBP
ret
I'm not really sure why the generated code is keeping an extra copy of 'n'
around. I think it's somehow worried about the sign of n?
Another final question - is there an easy way to map the offsets of the
relative jumps to which instruction they map to?
What would be useful is addresses (fake or otherwise) on the left hand side
which would indicate the size of each asm instruction.
Btw this is on julia 0.2.1
If I try the same thing on julia version 0.3.0-prerelease+2262 i get crazy
results so I think I must be doing something wrong:
julia> code_typed(f, (Int,))
1-element Array{Any,1}:
:($(Expr(:lambda, {:x},
{{:result,:#s38,:#s37,:#s36,:n,:_var0,:_var1},{{:x,Int64,0},{:result,Int64,2},{:#s38,Range1{Int64},18},{:#s37,Int64,2},{:#s36,(Int64,Int64),18},{:n,Int64,18},{:_var0,Int64,18},{:_var1,Int64,18}},{}},
:(begin # none, line 2:
result = top(typeassert)(0,T)::Int64 # line 3:
#s38 = $(Expr(:new, Range1{Int64}, 1,
:(top(getfield)(Intrinsics,:select_value)(top(slt_int)(x::Int64,1)::Bool,0,top(box)(Int64,top(checked_sadd)(top(box)(Int64,top(checked_ssub)(x::Int64,1))::Int64,1))::Int64)::Int64)))::Range1{Int64}
#s37 = top(getfield)(#s38::Range1{Int64},:start)::Int64
unless top(box)(Bool,top(not_int)(#s37::Int64 ===
top(box)(Int64,top(add_int)(top(getfield)(#s38::Range1{Int64},:start)::Int64,top(getfield)(#s38::Range1{Int64},:len)::Int64))::Int64::Bool))::Bool
goto 1
2:
_var0 = #s37::Int64
_var1 = top(box)(Int64,top(add_int)(#s37::Int64,1))::Int64
n = _var0::Int64
#s37 = _var1::Int64 # line 4:
result =
top(typeassert)(top(box)(Int64,top(add_int)(result::Int64,x::Int64))::Int64,T)::Int64
3:
unless
top(box)(Bool,top(not_int)(top(box)(Bool,top(not_int)(#s37::Int64 ===
top(box)(Int64,top(add_int)(top(getfield)(#s38::Range1{Int64},:start)::Int64,top(getfield)(#s38::Range1{Int64},:len)::Int64))::Int64::Bool))::Bool))::Bool
goto 2
1:
0: # line 6:
return result::Int64
end::Int64))))
julia> code_llvm(f, (Int,))
define i64 @julia_f15840(i64) {
top:
%1 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %0, i64 1), !dbg !1363
%2 = extractvalue { i64, i1 } %1, 1, !dbg !1363
br i1 %2, label %fail, label %pass, !dbg !1363
fail: ; preds = %top
%3 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !1363
call void @jl_throw_with_superfluous_argument(%jl_value_t* %3, i32 3), !dbg
!1363
unreachable, !dbg !1363
pass: ; preds = %top
%4 = extractvalue { i64, i1 } %1, 0, !dbg !1363
%5 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %4, i64 1), !dbg !1363
%6 = extractvalue { i64, i1 } %5, 1, !dbg !1363
br i1 %6, label %fail1, label %pass2, !dbg !1363
fail1: ; preds = %pass
%7 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !1363
call void @jl_throw_with_superfluous_argument(%jl_value_t* %7, i32 3), !dbg
!1363
unreachable, !dbg !1363
pass2: ; preds = %pass
%8 = load i64* inttoptr (i64 140237317881928 to i64*), align 8, !dbg !1364
%9 = icmp sgt i64 %0, 0, !dbg !1363
%10 = extractvalue { i64, i1 } %5, 0, !dbg !1363
%11 = select i1 %9, i64 %10, i64 0, !dbg !1363
%12 = icmp eq i64 %11, 0, !dbg !1363
br i1 %12, label %L5, label %L.preheader, !dbg !1363
L.preheader: ; preds = %pass2
%13 = mul i64 %11, %0, !dbg !1363
br label %L, !dbg !1363
L: ; preds = %L.preheader, %L
%"#s37.0" = phi i64 [ %14, %L ], [ 1, %L.preheader ]
%14 = add i64 %"#s37.0", 1, !dbg !1363
%15 = icmp eq i64 %"#s37.0", %11, !dbg !1365
br i1 %15, label %L5.loopexit, label %L, !dbg !1365
L5.loopexit: ; preds = %L
%16 = add i64 %8, %13, !dbg !1363
br label %L5
L5: ; preds = %L5.loopexit, %pass2
%result.1 = phi i64 [ %8, %pass2 ], [ %16, %L5.loopexit ]
ret i64 %result.1, !dbg !1366
}
Thanks a lot!
-rr-