When I've run benchmarks with custom bitstypes, they seem to run very
quickly. But I wouldn't have guessed it from the machine code I can preview
at the REPL. Can anyone explain what I'm seeing here? (I'm dumping
@code_llvm as it's more instructive; the @code_native is very large).
Converting a constant number to a Uint8 is a one-operation function.
Converting a constant number to a bitstype of the same size seems to do a
lot more.
julia> @code_llvm convert(Uint8, 100)
define i8 @"julia_convert;19720"(%jl_value_t*, i64) {
top:
%2 = trunc i64 %1 to i8, !dbg !772, !julia_type !773
ret i8 %2, !dbg !772
}
julia> bitstype 8 Foo
julia> @code_llvm convert(Foo, 100)
; Function Attrs: noreturn
define void @"julia_convert;19725"(%jl_value_t*, i64) #0 {
top:
%2 = alloca [5 x %jl_value_t*], align 8
%.sub = getelementptr inbounds [5 x %jl_value_t*]* %2, i64 0, i64 0
%3 = getelementptr [5 x %jl_value_t*]* %2, i64 0, i64 2, !dbg !776
store %jl_value_t* inttoptr (i64 6 to %jl_value_t*), %jl_value_t** %.sub,
align 8
%4 = load %jl_value_t*** @jl_pgcstack, align 8, !dbg !776
%5 = getelementptr [5 x %jl_value_t*]* %2, i64 0, i64 1, !dbg !776
%.c = bitcast %jl_value_t** %4 to %jl_value_t*, !dbg !776
store %jl_value_t* %.c, %jl_value_t** %5, align 8, !dbg !776
store %jl_value_t** %.sub, %jl_value_t*** @jl_pgcstack, align 8, !dbg !776
store %jl_value_t* null, %jl_value_t** %3, align 8
%6 = getelementptr [5 x %jl_value_t*]* %2, i64 0, i64 3
store %jl_value_t* null, %jl_value_t** %6, align 8
%7 = getelementptr [5 x %jl_value_t*]* %2, i64 0, i64 4
store %jl_value_t* null, %jl_value_t** %7, align 8
%8 = load %jl_value_t** inttoptr (i64 140384830597872 to %jl_value_t**),
align 16, !dbg !777
%9 = getelementptr inbounds %jl_value_t* %8, i64 1, i32 0, !dbg !777
%10 = load %jl_value_t** %9, align 8, !dbg !777, !tbaa %jtbaa_func
%11 = bitcast %jl_value_t* %10 to %jl_value_t* (%jl_value_t*,
%jl_value_t**, i32)*, !dbg !777
store %jl_value_t* %0, %jl_value_t** %3, align 8, !dbg !777
%12 = call %jl_value_t* @jl_box_int64(i64 %1), !dbg !777
store %jl_value_t* %12, %jl_value_t** %6, align 8, !dbg !777
%13 = load %jl_value_t** inttoptr (i64 140384825150528 to %jl_value_t**),
align 64, !dbg !777
store %jl_value_t* %13, %jl_value_t** %7, align 8, !dbg !777
%14 = call %jl_value_t* %11(%jl_value_t* %8, %jl_value_t** %3, i32 3),
!dbg !777
%15 = load %jl_value_t** %5, align 8, !dbg !777
%16 = getelementptr inbounds %jl_value_t* %15, i64 0, i32 0, !dbg !777
store %jl_value_t** %16, %jl_value_t*** @jl_pgcstack, align 8, !dbg !777
ret void, !dbg !777
}