[julia-users] Re: Tracking down type instability
A correction: the native Haswell build does use packed conversions for 32-bit integers, and valiantly mixes unpacked conversions with packed division for 64-bit. I didn't see any of this from the pre-built binaries. On Friday, September 16, 2016 at 10:22:16 PM UTC-4, Ralph Smith wrote: > > The integer vectors d and l have to be converted to floats before > division. Vectorization at this point means that the compiler writes code > to use wide registers to do more than one at a time (and lots of extra > logic to handle alignment and stragglers). > Something in the chain (LLVM front end, I suspect) seems ignorant or > unhappy about the packed conversion instructions on recent Intel chips > (they are not emitted on my build of v0.5rc4 from source, targeted for > native Haswell architecture). What you show should still compile to a > pretty fast loop. > > Surely this is not your bottleneck? > > On Friday, September 16, 2016 at 8:12:53 PM UTC-4, Ben Ward wrote: >> >> function >> distance{T<:MutationType,A<:NucleotideAlphabet}(::Type{Proportion{T}}, >> seqs::Vector{BioSequence{A}}) >> d, l = distance(Count{T}, seqs) >> D = Vector{Float64}(length(d)) >> @inbounds @simd for i in 1:length(D) >> D[i] = d[i] / l[i] >> end >> return D, l >> end >> >> *julia> **@code_llvm distance(Proportion{AnyMutation}, dnas2)* >> >> >> define %jl_value_t* @julia_distance_70450(%jl_value_t*, %jl_value_t*) #0 { >> >> top: >> >> %2 = call %jl_value_t*** @jl_get_ptls_states() #1 >> >> %3 = alloca [17 x %jl_value_t*], align 8 >> >> %.sub = getelementptr inbounds [17 x %jl_value_t*], [17 x >> %jl_value_t*]* %3, i64 0, i64 0 >> >> %4 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, >> i64 2 >> >> %5 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, >> i64 3 >> >> %6 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, >> i64 4 >> >> %7 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, >> i64 5 >> >> %8 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, >> i64 6 >> >> %9 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, >> i64 7 >> >> %10 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 >> 0, i64 8 >> >> %11 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 >> 0, i64 9 >> >> %12 = bitcast [17 x %jl_value_t*]* %3 to i64* >> >> %13 = bitcast %jl_value_t** %4 to i8* >> >> call void @llvm.memset.p0i8.i64(i8* %13, i8 0, i64 120, i32 8, i1 false) >> >> store i64 30, i64* %12, align 8 >> >> %14 = bitcast %jl_value_t*** %2 to i64* >> >> %15 = load i64, i64* %14, align 8 >> >> %16 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 >> 0, i64 1 >> >> %17 = bitcast %jl_value_t** %16 to i64* >> >> store i64 %15, i64* %17, align 8 >> >> store %jl_value_t** %.sub, %jl_value_t*** %2, align 8 >> >> %18 = call %jl_value_t* @julia_seqmatrix_70452(%jl_value_t* %1, >> %jl_value_t* inttoptr (i64 13078874632 to %jl_value_t*)) #0 >> >> store %jl_value_t* %18, %jl_value_t** %4, align 8 >> >> store %jl_value_t* %18, %jl_value_t** %5, align 8 >> >> %19 = call %jl_value_t* @julia_count_mutations_70454(%jl_value_t* >> inttoptr (i64 4687660080 to %jl_value_t*), %jl_value_t* %18) #0 >> >> store %jl_value_t* %19, %jl_value_t** %6, align 8 >> >> %20 = getelementptr inbounds %jl_value_t, %jl_value_t* %19, i64 0, i32 0 >> >> %21 = load %jl_value_t*, %jl_value_t** %20, align 8 >> >> store %jl_value_t* %21, %jl_value_t** %7, align 8 >> >> %22 = getelementptr inbounds %jl_value_t, %jl_value_t* %19, i64 1, i32 0 >> >> %23 = load %jl_value_t*, %jl_value_t** %22, align 8 >> >> store %jl_value_t* %23, %jl_value_t** %8, align 8 >> >> store %jl_value_t* %21, %jl_value_t** %9, align 8 >> >> %24 = getelementptr inbounds %jl_value_t, %jl_value_t* %21, i64 1 >> >> %25 = bitcast %jl_value_t* %24 to i64* >> >> %26 = load i64, i64* %25, align 8 >> >> %27 = call %jl_value_t* inttoptr (i64 4388816272 to %jl_value_t* >> (%jl_value_t*, i64)*)(%jl_value_t* inttoptr (i64 4473018288 to >> %jl_value_t*), i64 %26) >> >> store %jl_value_t* %27, %jl_value_t** %10, align 8 >> >> store %jl_value_t* %27, %jl_value_t** %11, align 8 >> >> %28 = getelementptr inbounds %jl_value_t, %jl_value_t* %27, i64 1 >> >> %29 = bitcast %jl_value_t* %28 to i64* >> >> %30 = load i64, i64* %29, align 8 >> >> %31 = icmp sgt i64 %30, 0 >> >> %32 = select i1 %31, i64 %30, i64 0 >> >> %33 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %32, i64 1) >> >> %34 = extractvalue { i64, i1 } %33, 1 >> >> br i1 %34, label %fail.split, label %top.top.split_crit_edge >> >> >> top.top.split_crit_edge: ; preds = %top >> >> %35 = extractvalue { i64, i1 } %33, 0 >> >> %36 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %35, i64 1) >> >> %37 = extractvalue { i64, i1 } %36, 1 >> >> br i1 %37, label
[julia-users] Re: Tracking down type instability
The integer vectors d and l have to be converted to floats before division. Vectorization at this point means that the compiler writes code to use wide registers to do more than one at a time (and lots of extra logic to handle alignment and stragglers). Something in the chain (LLVM front end, I suspect) seems ignorant or unhappy about the packed conversion instructions on recent Intel chips (they are not emitted on my build of v0.5rc4 from source, targeted for native Haswell architecture). What you show should still compile to a pretty fast loop. Surely this is not your bottleneck? On Friday, September 16, 2016 at 8:12:53 PM UTC-4, Ben Ward wrote: > > function > distance{T<:MutationType,A<:NucleotideAlphabet}(::Type{Proportion{T}}, > seqs::Vector{BioSequence{A}}) > d, l = distance(Count{T}, seqs) > D = Vector{Float64}(length(d)) > @inbounds @simd for i in 1:length(D) > D[i] = d[i] / l[i] > end > return D, l > end > > *julia> **@code_llvm distance(Proportion{AnyMutation}, dnas2)* > > > define %jl_value_t* @julia_distance_70450(%jl_value_t*, %jl_value_t*) #0 { > > top: > > %2 = call %jl_value_t*** @jl_get_ptls_states() #1 > > %3 = alloca [17 x %jl_value_t*], align 8 > > %.sub = getelementptr inbounds [17 x %jl_value_t*], [17 x %jl_value_t*]* > %3, i64 0, i64 0 > > %4 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, > i64 2 > > %5 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, > i64 3 > > %6 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, > i64 4 > > %7 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, > i64 5 > > %8 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, > i64 6 > > %9 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, > i64 7 > > %10 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, > i64 8 > > %11 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, > i64 9 > > %12 = bitcast [17 x %jl_value_t*]* %3 to i64* > > %13 = bitcast %jl_value_t** %4 to i8* > > call void @llvm.memset.p0i8.i64(i8* %13, i8 0, i64 120, i32 8, i1 false) > > store i64 30, i64* %12, align 8 > > %14 = bitcast %jl_value_t*** %2 to i64* > > %15 = load i64, i64* %14, align 8 > > %16 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, > i64 1 > > %17 = bitcast %jl_value_t** %16 to i64* > > store i64 %15, i64* %17, align 8 > > store %jl_value_t** %.sub, %jl_value_t*** %2, align 8 > > %18 = call %jl_value_t* @julia_seqmatrix_70452(%jl_value_t* %1, > %jl_value_t* inttoptr (i64 13078874632 to %jl_value_t*)) #0 > > store %jl_value_t* %18, %jl_value_t** %4, align 8 > > store %jl_value_t* %18, %jl_value_t** %5, align 8 > > %19 = call %jl_value_t* @julia_count_mutations_70454(%jl_value_t* > inttoptr (i64 4687660080 to %jl_value_t*), %jl_value_t* %18) #0 > > store %jl_value_t* %19, %jl_value_t** %6, align 8 > > %20 = getelementptr inbounds %jl_value_t, %jl_value_t* %19, i64 0, i32 0 > > %21 = load %jl_value_t*, %jl_value_t** %20, align 8 > > store %jl_value_t* %21, %jl_value_t** %7, align 8 > > %22 = getelementptr inbounds %jl_value_t, %jl_value_t* %19, i64 1, i32 0 > > %23 = load %jl_value_t*, %jl_value_t** %22, align 8 > > store %jl_value_t* %23, %jl_value_t** %8, align 8 > > store %jl_value_t* %21, %jl_value_t** %9, align 8 > > %24 = getelementptr inbounds %jl_value_t, %jl_value_t* %21, i64 1 > > %25 = bitcast %jl_value_t* %24 to i64* > > %26 = load i64, i64* %25, align 8 > > %27 = call %jl_value_t* inttoptr (i64 4388816272 to %jl_value_t* > (%jl_value_t*, i64)*)(%jl_value_t* inttoptr (i64 4473018288 to > %jl_value_t*), i64 %26) > > store %jl_value_t* %27, %jl_value_t** %10, align 8 > > store %jl_value_t* %27, %jl_value_t** %11, align 8 > > %28 = getelementptr inbounds %jl_value_t, %jl_value_t* %27, i64 1 > > %29 = bitcast %jl_value_t* %28 to i64* > > %30 = load i64, i64* %29, align 8 > > %31 = icmp sgt i64 %30, 0 > > %32 = select i1 %31, i64 %30, i64 0 > > %33 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %32, i64 1) > > %34 = extractvalue { i64, i1 } %33, 1 > > br i1 %34, label %fail.split, label %top.top.split_crit_edge > > > top.top.split_crit_edge: ; preds = %top > > %35 = extractvalue { i64, i1 } %33, 0 > > %36 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %35, i64 1) > > %37 = extractvalue { i64, i1 } %36, 1 > > br i1 %37, label %fail12, label %top.split.top.split.split_crit_edge > > > top.split.top.split.split_crit_edge: ; preds = > %top.top.split_crit_edge > > %38 = extractvalue { i64, i1 } %36, 0 > > %39 = icmp slt i64 %38, 1 > > br i1 %39, label %L11, label %if15.lr.ph > > > L11.loopexit: ; preds = %if15 > > br label %L11 > > > L11: ; preds = %L11.loopexit, > %top.spli
[julia-users] Re: Tracking down type instability
function distance{T<:MutationType,A<:NucleotideAlphabet}(::Type{Proportion{T}}, seqs::Vector{BioSequence{A}}) d, l = distance(Count{T}, seqs) D = Vector{Float64}(length(d)) @inbounds @simd for i in 1:length(D) D[i] = d[i] / l[i] end return D, l end *julia> **@code_llvm distance(Proportion{AnyMutation}, dnas2)* define %jl_value_t* @julia_distance_70450(%jl_value_t*, %jl_value_t*) #0 { top: %2 = call %jl_value_t*** @jl_get_ptls_states() #1 %3 = alloca [17 x %jl_value_t*], align 8 %.sub = getelementptr inbounds [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 0 %4 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 2 %5 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 3 %6 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 4 %7 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 5 %8 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 6 %9 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 7 %10 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 8 %11 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 9 %12 = bitcast [17 x %jl_value_t*]* %3 to i64* %13 = bitcast %jl_value_t** %4 to i8* call void @llvm.memset.p0i8.i64(i8* %13, i8 0, i64 120, i32 8, i1 false) store i64 30, i64* %12, align 8 %14 = bitcast %jl_value_t*** %2 to i64* %15 = load i64, i64* %14, align 8 %16 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 1 %17 = bitcast %jl_value_t** %16 to i64* store i64 %15, i64* %17, align 8 store %jl_value_t** %.sub, %jl_value_t*** %2, align 8 %18 = call %jl_value_t* @julia_seqmatrix_70452(%jl_value_t* %1, %jl_value_t* inttoptr (i64 13078874632 to %jl_value_t*)) #0 store %jl_value_t* %18, %jl_value_t** %4, align 8 store %jl_value_t* %18, %jl_value_t** %5, align 8 %19 = call %jl_value_t* @julia_count_mutations_70454(%jl_value_t* inttoptr (i64 4687660080 to %jl_value_t*), %jl_value_t* %18) #0 store %jl_value_t* %19, %jl_value_t** %6, align 8 %20 = getelementptr inbounds %jl_value_t, %jl_value_t* %19, i64 0, i32 0 %21 = load %jl_value_t*, %jl_value_t** %20, align 8 store %jl_value_t* %21, %jl_value_t** %7, align 8 %22 = getelementptr inbounds %jl_value_t, %jl_value_t* %19, i64 1, i32 0 %23 = load %jl_value_t*, %jl_value_t** %22, align 8 store %jl_value_t* %23, %jl_value_t** %8, align 8 store %jl_value_t* %21, %jl_value_t** %9, align 8 %24 = getelementptr inbounds %jl_value_t, %jl_value_t* %21, i64 1 %25 = bitcast %jl_value_t* %24 to i64* %26 = load i64, i64* %25, align 8 %27 = call %jl_value_t* inttoptr (i64 4388816272 to %jl_value_t* (%jl_value_t*, i64)*)(%jl_value_t* inttoptr (i64 4473018288 to %jl_value_t*), i64 %26) store %jl_value_t* %27, %jl_value_t** %10, align 8 store %jl_value_t* %27, %jl_value_t** %11, align 8 %28 = getelementptr inbounds %jl_value_t, %jl_value_t* %27, i64 1 %29 = bitcast %jl_value_t* %28 to i64* %30 = load i64, i64* %29, align 8 %31 = icmp sgt i64 %30, 0 %32 = select i1 %31, i64 %30, i64 0 %33 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %32, i64 1) %34 = extractvalue { i64, i1 } %33, 1 br i1 %34, label %fail.split, label %top.top.split_crit_edge top.top.split_crit_edge: ; preds = %top %35 = extractvalue { i64, i1 } %33, 0 %36 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %35, i64 1) %37 = extractvalue { i64, i1 } %36, 1 br i1 %37, label %fail12, label %top.split.top.split.split_crit_edge top.split.top.split.split_crit_edge: ; preds = %top.top.split_crit_edge %38 = extractvalue { i64, i1 } %36, 0 %39 = icmp slt i64 %38, 1 br i1 %39, label %L11, label %if15.lr.ph L11.loopexit: ; preds = %if15 br label %L11 L11: ; preds = %L11.loopexit, %top.split.top.split.split_crit_edge %40 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 13 %41 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 14 %42 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 15 %43 = getelementptr [17 x %jl_value_t*], [17 x %jl_value_t*]* %3, i64 0, i64 16 store %jl_value_t* %27, %jl_value_t** %40, align 8 %44 = bitcast %jl_value_t*** %2 to i8* %45 = call %jl_value_t* @jl_gc_pool_alloc(i8* %44, i32 1408, i32 32) %46 = getelementptr inbounds %jl_value_t, %jl_value_t* %45, i64 -1, i32 0 store %jl_value_t* inttoptr (i64 4723475120 to %jl_value_t*), %jl_value_t** %46, align 8 store %jl_value_t* %45, %jl_value_t** %41, align 8 store %jl_value_t* %27, %jl_value_t** %42, align 8 %47 = getelementptr inbounds %jl_value_t, %jl_value_t* %45, i64 0, i32 0 store %jl_value_t* %27
[julia-users] Re: Tracking down type instability
What if you add @simd after @inbounds? On Friday, September 16, 2016 at 5:48:00 PM UTC+2, Ben Ward wrote: > > The code for that function not shown is rather similar to the version that > is shown: > > function > distance{T<:MutationType,A<:NucleotideAlphabet}(::Type{Proportion{T}}, > seqs::Vector{BioSequence{A}}) > d, l = distance(Count{T}, seqs) > D = Vector{Float64}(length(d)) > @inbounds for i in 1:length(D) > D[i] = d[i] / l[i] > end > return D, l > end > > Which has a @code_warntype which seems ok: > > *julia> **@code_warntype distance(Proportion{AnyMutation}, dnas2)* > > Variables: > > #self#::Bio.Var.#distance > > #unused#::Type{Bio.Var.Proportion{Bio.Var.AnyMutation}} > > seqs@_3::Array{Bio.Seq.BioSequence{Bio.Seq.DNAAlphabet{4}},1} > > d::Array{Int64,1} > > l::Array{Int64,1} > > #temp#@_6::Int64 > > D::Array{Float64,1} > > #temp#@_8::Int64 > > i::Int64 > > seqs@_10::Array{Bio.Seq.DNANucleotide,2} > > > Body: > > begin > > $(Expr(:inbounds, false)) > > # meta: location /Users/bward/.julia/v0.5/Bio/src/var/distances.jl > distance 133 > > # meta: location > /Users/bward/.julia/v0.5/Bio/src/var/mutation_counting.jl count_mutations > 263 > > seqs@_10::Array{Bio.Seq.DNANucleotide,2} = $(Expr(:invoke, > LambdaInfo for > seqmatrix(::Array{Bio.Seq.BioSequence{Bio.Seq.DNAAlphabet{4}},1}, > ::Symbol), :(Bio.Var.seqmatrix), :(seqs@_3), :(:seq))) > > # meta: pop location > > # meta: pop location > > $(Expr(:inbounds, :pop)) > > SSAValue(0) = $(Expr(:invoke, LambdaInfo for > count_mutations(::Type{Bio.Var.AnyMutation}, > ::Array{Bio.Seq.DNANucleotide,2}), :(Bio.Var.count_mutations), > Bio.Var.AnyMutation, :(seqs@_10))) > > #temp#@_6::Int64 = $(QuoteNode(1)) > > SSAValue(9) = (Base.getfield)(SSAValue(0),1)::Array{Int64,1} > > SSAValue(10) = (Base.box)(Int64,(Base.add_int)(1,1)) > > d::Array{Int64,1} = SSAValue(9) > > #temp#@_6::Int64 = SSAValue(10) > > SSAValue(11) = (Base.getfield)(SSAValue(0),2)::Array{Int64,1} > > SSAValue(12) = (Base.box)(Int64,(Base.add_int)(2,1)) > > l::Array{Int64,1} = SSAValue(11) > > #temp#@_6::Int64 = SSAValue(12) # line 256: > > SSAValue(6) = (Base.arraylen)(d::Array{Int64,1})::Int64 > > D::Array{Float64,1} = > (Core.ccall)(:jl_alloc_array_1d,(Core.apply_type)(Core.Array,Float64,1)::Type{Array{Float64,1}},(Core.svec)(Core.Any,Core.Int)::SimpleVector,Array{Float64,1},0,SSAValue(6),0)::Array{Float64,1} > > # line 257: > > $(Expr(:inbounds, true)) > > SSAValue(8) = (Base.arraylen)(D::Array{Float64,1})::Int64 > > SSAValue(13) = > (Base.select_value)((Base.sle_int)(1,SSAValue(8))::Bool,SSAValue(8),(Base.box)(Int64,(Base.sub_int)(1,1)))::Int64 > > #temp#@_8::Int64 = 1 > > 26: > > unless (Base.box)(Base.Bool,(Base.not_int)((#temp#@_8::Int64 === > (Base.box)(Int64,(Base.add_int)(SSAValue(13),1)))::Bool)) goto 37 > > SSAValue(14) = #temp#@_8::Int64 > > SSAValue(15) = (Base.box)(Int64,(Base.add_int)(#temp#@_8::Int64,1)) > > i::Int64 = SSAValue(14) > > #temp#@_8::Int64 = SSAValue(15) # line 258: > > SSAValue(5) = > (Base.box)(Base.Float64,(Base.div_float)((Base.box)(Float64,(Base.sitofp)(Float64,(Base.arrayref)(d::Array{Int64,1},i::Int64)::Int64)),(Base.box)(Float64,(Base.sitofp)(Float64,(Base.arrayref)(l::Array{Int64,1},i::Int64)::Int64 > > > (Base.arrayset)(D::Array{Float64,1},SSAValue(5),i::Int64)::Array{Float64,1} > > 35: > > goto 26 > > 37: > > $(Expr(:inbounds, :pop)) # line 260: > > return > (Core.tuple)(D::Array{Float64,1},l::Array{Int64,1})::Tuple{Array{Float64,1},Array{Int64,1}} > > end::Tuple{Array{Float64,1},Array{Int64,1}} > > But then again the loop in this function is also not vectorised which I > struggle to see why... unless division can't be. > > On Friday, September 16, 2016 at 3:27:47 AM UTC+1, Ralph Smith wrote: >> >> SSAValue(15) = (Base.getfield)(SSAValue(0),1) >> *::Union{Array{Float64,1},Array{Int64,1}}* >> >> >> indicates that the first element of SSAValue(0) is ambiguous. Earlier it >> shows that this means p from >> >> p, l = distance(Proportion{AnyMutation}, seqs) >> >> which we can't analyze from what you show here. >> >> On Thursday, September 15, 2016 at 10:08:16 AM UTC-4, Ben Ward wrote: >>> >>> Hi I have two functions and a function which calls them: >>> >>> @inline function expected_distance(::Type{JukesCantor69}, p::Float64) >>> return -0.75 * log(1 - 4 * p / 3) >>> end >>> >>> @inline function variance(::Type{JukesCantor69}, p::Float64, l::Int64) >>> return p * (1 - p) / (((1 - 4 * p / 3) ^ 2) * l) >>> end >>> >>> function distance{A<:NucleotideAlphabet}(::Type{JukesCantor69}, >>> seqs::Vector{BioSequence{A}}) >>> p, l = distance(Proportion{AnyMutation}, seqs) >>> D = Vector{Float64}(length(p)) >>> V = Vector{Float64}(length(p)) >>> @inbound
[julia-users] Re: Tracking down type instability
The code for that function not shown is rather similar to the version that is shown: function distance{T<:MutationType,A<:NucleotideAlphabet}(::Type{Proportion{T}}, seqs::Vector{BioSequence{A}}) d, l = distance(Count{T}, seqs) D = Vector{Float64}(length(d)) @inbounds for i in 1:length(D) D[i] = d[i] / l[i] end return D, l end Which has a @code_warntype which seems ok: *julia> **@code_warntype distance(Proportion{AnyMutation}, dnas2)* Variables: #self#::Bio.Var.#distance #unused#::Type{Bio.Var.Proportion{Bio.Var.AnyMutation}} seqs@_3::Array{Bio.Seq.BioSequence{Bio.Seq.DNAAlphabet{4}},1} d::Array{Int64,1} l::Array{Int64,1} #temp#@_6::Int64 D::Array{Float64,1} #temp#@_8::Int64 i::Int64 seqs@_10::Array{Bio.Seq.DNANucleotide,2} Body: begin $(Expr(:inbounds, false)) # meta: location /Users/bward/.julia/v0.5/Bio/src/var/distances.jl distance 133 # meta: location /Users/bward/.julia/v0.5/Bio/src/var/mutation_counting.jl count_mutations 263 seqs@_10::Array{Bio.Seq.DNANucleotide,2} = $(Expr(:invoke, LambdaInfo for seqmatrix(::Array{Bio.Seq.BioSequence{Bio.Seq.DNAAlphabet{4}},1}, ::Symbol), :(Bio.Var.seqmatrix), :(seqs@_3), :(:seq))) # meta: pop location # meta: pop location $(Expr(:inbounds, :pop)) SSAValue(0) = $(Expr(:invoke, LambdaInfo for count_mutations(::Type{Bio.Var.AnyMutation}, ::Array{Bio.Seq.DNANucleotide,2}), :(Bio.Var.count_mutations), Bio.Var.AnyMutation, :(seqs@_10))) #temp#@_6::Int64 = $(QuoteNode(1)) SSAValue(9) = (Base.getfield)(SSAValue(0),1)::Array{Int64,1} SSAValue(10) = (Base.box)(Int64,(Base.add_int)(1,1)) d::Array{Int64,1} = SSAValue(9) #temp#@_6::Int64 = SSAValue(10) SSAValue(11) = (Base.getfield)(SSAValue(0),2)::Array{Int64,1} SSAValue(12) = (Base.box)(Int64,(Base.add_int)(2,1)) l::Array{Int64,1} = SSAValue(11) #temp#@_6::Int64 = SSAValue(12) # line 256: SSAValue(6) = (Base.arraylen)(d::Array{Int64,1})::Int64 D::Array{Float64,1} = (Core.ccall)(:jl_alloc_array_1d,(Core.apply_type)(Core.Array,Float64,1)::Type{Array{Float64,1}},(Core.svec)(Core.Any,Core.Int)::SimpleVector,Array{Float64,1},0,SSAValue(6),0)::Array{Float64,1} # line 257: $(Expr(:inbounds, true)) SSAValue(8) = (Base.arraylen)(D::Array{Float64,1})::Int64 SSAValue(13) = (Base.select_value)((Base.sle_int)(1,SSAValue(8))::Bool,SSAValue(8),(Base.box)(Int64,(Base.sub_int)(1,1)))::Int64 #temp#@_8::Int64 = 1 26: unless (Base.box)(Base.Bool,(Base.not_int)((#temp#@_8::Int64 === (Base.box)(Int64,(Base.add_int)(SSAValue(13),1)))::Bool)) goto 37 SSAValue(14) = #temp#@_8::Int64 SSAValue(15) = (Base.box)(Int64,(Base.add_int)(#temp#@_8::Int64,1)) i::Int64 = SSAValue(14) #temp#@_8::Int64 = SSAValue(15) # line 258: SSAValue(5) = (Base.box)(Base.Float64,(Base.div_float)((Base.box)(Float64,(Base.sitofp)(Float64,(Base.arrayref)(d::Array{Int64,1},i::Int64)::Int64)),(Base.box)(Float64,(Base.sitofp)(Float64,(Base.arrayref)(l::Array{Int64,1},i::Int64)::Int64 (Base.arrayset)(D::Array{Float64,1},SSAValue(5),i::Int64)::Array{Float64,1} 35: goto 26 37: $(Expr(:inbounds, :pop)) # line 260: return (Core.tuple)(D::Array{Float64,1},l::Array{Int64,1})::Tuple{Array{Float64,1},Array{Int64,1}} end::Tuple{Array{Float64,1},Array{Int64,1}} But then again the loop in this function is also not vectorised which I struggle to see why... unless division can't be. On Friday, September 16, 2016 at 3:27:47 AM UTC+1, Ralph Smith wrote: > > SSAValue(15) = (Base.getfield)(SSAValue(0),1) > *::Union{Array{Float64,1},Array{Int64,1}}* > > > indicates that the first element of SSAValue(0) is ambiguous. Earlier it > shows that this means p from > > p, l = distance(Proportion{AnyMutation}, seqs) > > which we can't analyze from what you show here. > > On Thursday, September 15, 2016 at 10:08:16 AM UTC-4, Ben Ward wrote: >> >> Hi I have two functions and a function which calls them: >> >> @inline function expected_distance(::Type{JukesCantor69}, p::Float64) >> return -0.75 * log(1 - 4 * p / 3) >> end >> >> @inline function variance(::Type{JukesCantor69}, p::Float64, l::Int64) >> return p * (1 - p) / (((1 - 4 * p / 3) ^ 2) * l) >> end >> >> function distance{A<:NucleotideAlphabet}(::Type{JukesCantor69}, >> seqs::Vector{BioSequence{A}}) >> p, l = distance(Proportion{AnyMutation}, seqs) >> D = Vector{Float64}(length(p)) >> V = Vector{Float64}(length(p)) >> @inbounds for i in 1:length(p) >> D[i] = expected_distance(JukesCantor69, p[i]) >> V[i] = variance(JukesCantor69, p[i], l[i]) >> end >> return D, V >> end >> >> But I'm seeing type uncertainty: >> >> *@code_warntype distance(JukesCantor69, dnas)* >> >> Variables: >> >> #self#::Bio.Var.#distance >> >> #unused#::Type{Bio.Var.JukesCantor69} >> >
[julia-users] Re: Tracking down type instability
SSAValue(15) = (Base.getfield)(SSAValue(0),1) *::Union{Array{Float64,1},Array{Int64,1}}* indicates that the first element of SSAValue(0) is ambiguous. Earlier it shows that this means p from p, l = distance(Proportion{AnyMutation}, seqs) which we can't analyze from what you show here. On Thursday, September 15, 2016 at 10:08:16 AM UTC-4, Ben Ward wrote: > > Hi I have two functions and a function which calls them: > > @inline function expected_distance(::Type{JukesCantor69}, p::Float64) > return -0.75 * log(1 - 4 * p / 3) > end > > @inline function variance(::Type{JukesCantor69}, p::Float64, l::Int64) > return p * (1 - p) / (((1 - 4 * p / 3) ^ 2) * l) > end > > function distance{A<:NucleotideAlphabet}(::Type{JukesCantor69}, > seqs::Vector{BioSequence{A}}) > p, l = distance(Proportion{AnyMutation}, seqs) > D = Vector{Float64}(length(p)) > V = Vector{Float64}(length(p)) > @inbounds for i in 1:length(p) > D[i] = expected_distance(JukesCantor69, p[i]) > V[i] = variance(JukesCantor69, p[i], l[i]) > end > return D, V > end > > But I'm seeing type uncertainty: > > *@code_warntype distance(JukesCantor69, dnas)* > > Variables: > > #self#::Bio.Var.#distance > > #unused#::Type{Bio.Var.JukesCantor69} > > seqs::Array{Bio.Seq.BioSequence{Bio.Seq.DNAAlphabet{4}},1} > > p::Array{Float64,1} > > l::Array{Int64,1} > > #temp#@_6::Int64 > > D::Array{Float64,1} > > V::Array{Float64,1} > > #temp#@_9::Int64 > > i::Int64 > > > Body: > > begin > > SSAValue(0) = $(Expr(:invoke, LambdaInfo for > distance(::Type{Bio.Var.Proportion{Bio.Var.AnyMutation}}, > ::Array{Bio.Seq.BioSequence{Bio.Seq.DNAAlphabet{4}},1}), > :(Bio.Var.distance), Bio.Var.Proportion{Bio.Var.AnyMutation}, :(seqs))) > > #temp#@_6::Int64 = $(QuoteNode(1)) > > SSAValue(15) = (Base.getfield)(SSAValue(0),1) > *::Union{Array{Float64,1},Array{Int64,1}}* > > SSAValue(16) = (Base.box)(Int64,(Base.add_int)(1,1)) > > p::Array{Float64,1} = SSAValue(15) > > #temp#@_6::Int64 = SSAValue(16) > > SSAValue(17) = (Base.getfield)(SSAValue(0),2) > *::Union{Array{Float64,1},Array{Int64,1}}* > > SSAValue(18) = (Base.box)(Int64,(Base.add_int)(2,1)) > > l::Array{Int64,1} = SSAValue(17) > > #temp#@_6::Int64 = SSAValue(18) # line 314: > > SSAValue(7) = (Base.arraylen)(p::Array{Float64,1})::Int64 > > D::Array{Float64,1} = > (Core.ccall)(:jl_alloc_array_1d,(Core.apply_type)(Core.Array,Float64,1)::Type{Array{Float64,1}},(Core.svec)(Core.Any,Core.Int)::SimpleVector,Array{Float64,1},0,SSAValue(7),0)::Array{Float64,1} > > # line 315: > > SSAValue(9) = (Base.arraylen)(p::Array{Float64,1})::Int64 > > V::Array{Float64,1} = > (Core.ccall)(:jl_alloc_array_1d,(Core.apply_type)(Core.Array,Float64,1)::Type{Array{Float64,1}},(Core.svec)(Core.Any,Core.Int)::SimpleVector,Array{Float64,1},0,SSAValue(9),0)::Array{Float64,1} > > # line 316: > > $(Expr(:inbounds, true)) > > SSAValue(11) = (Base.arraylen)(p::Array{Float64,1})::Int64 > > SSAValue(19) = > (Base.select_value)((Base.sle_int)(1,SSAValue(11))::Bool,SSAValue(11),(Base.box)(Int64,(Base.sub_int)(1,1)))::Int64 > > #temp#@_9::Int64 = 1 > > 22: > > unless (Base.box)(Base.Bool,(Base.not_int)((#temp#@_9::Int64 === > (Base.box)(Int64,(Base.add_int)(SSAValue(19),1)))::Bool)) goto 43 > > SSAValue(20) = #temp#@_9::Int64 > > SSAValue(21) = (Base.box)(Int64,(Base.add_int)(#temp#@_9::Int64,1)) > > i::Int64 = SSAValue(20) > > #temp#@_9::Int64 = SSAValue(21) # line 317: > > SSAValue(12) = (Base.arrayref)(p::Array{Float64,1},i::Int64)::Float64 > > $(Expr(:inbounds, false)) > > # meta: location /Users/bward/.julia/v0.5/Bio/src/var/distances.jl > expected_distance 69 > > SSAValue(13) = $(Expr(:invoke, LambdaInfo for log(::Float64), > :(Bio.Var.log), > :((Base.box)(Base.Float64,(Base.sub_float)((Base.box)(Float64,(Base.sitofp)(Float64,1)),(Base.box)(Base.Float64,(Base.div_float)((Base.box)(Base.Float64,(Base.mul_float)((Base.box)(Float64,(Base.sitofp)(Float64,4)),SSAValue(12))),(Base.box)(Float64,(Base.sitofp)(Float64,3) > > # meta: pop location > > $(Expr(:inbounds, :pop)) > > SSAValue(5) = > (Base.box)(Base.Float64,(Base.mul_float)(-0.75,SSAValue(13))) > > > (Base.arrayset)(D::Array{Float64,1},SSAValue(5),i::Int64)::Array{Float64,1} > # line 318: > > SSAValue(14) = (Base.arrayref)(p::Array{Float64,1},i::Int64)::Float64 > > SSAValue(6) = > (Base.box)(Base.Float64,(Base.div_float)((Base.box)(Base.Float64,(Base.mul_float)(SSAValue(14),(Base.box)(Base.Float64,(Base.sub_float)((Base.box)(Float64,(Base.sitofp)(Float64,1)),SSAValue(14),(Base.box)(Base.Float64,(Base.mul_float)((Base.Math.box)(Base.Math.Float64,(Base.Math.powi_llvm)((Base.box)(Base.Float64,(Base.sub_float)((Base.box)(Float64,(Base.sitofp)(Float64,1)),(Base.box)(Base.Float64,(Base.div_float)((Base.box)(Base
[julia-users] Re: Tracking down type instability
Removing the @inlines may be useful while investigating. On Thursday, September 15, 2016 at 10:08:16 AM UTC-4, Ben Ward wrote: > > Hi I have two functions and a function which calls them: > > @inline function expected_distance(::Type{JukesCantor69}, p::Float64) > return -0.75 * log(1 - 4 * p / 3) > end > > @inline function variance(::Type{JukesCantor69}, p::Float64, l::Int64) > return p * (1 - p) / (((1 - 4 * p / 3) ^ 2) * l) > end > > function distance{A<:NucleotideAlphabet}(::Type{JukesCantor69}, > seqs::Vector{BioSequence{A}}) > p, l = distance(Proportion{AnyMutation}, seqs) > D = Vector{Float64}(length(p)) > V = Vector{Float64}(length(p)) > @inbounds for i in 1:length(p) > D[i] = expected_distance(JukesCantor69, p[i]) > V[i] = variance(JukesCantor69, p[i], l[i]) > end > return D, V > end > > But I'm seeing type uncertainty: > > *@code_warntype distance(JukesCantor69, dnas)* > > Variables: > > #self#::Bio.Var.#distance > > #unused#::Type{Bio.Var.JukesCantor69} > > seqs::Array{Bio.Seq.BioSequence{Bio.Seq.DNAAlphabet{4}},1} > > p::Array{Float64,1} > > l::Array{Int64,1} > > #temp#@_6::Int64 > > D::Array{Float64,1} > > V::Array{Float64,1} > > #temp#@_9::Int64 > > i::Int64 > > > Body: > > begin > > SSAValue(0) = $(Expr(:invoke, LambdaInfo for > distance(::Type{Bio.Var.Proportion{Bio.Var.AnyMutation}}, > ::Array{Bio.Seq.BioSequence{Bio.Seq.DNAAlphabet{4}},1}), > :(Bio.Var.distance), Bio.Var.Proportion{Bio.Var.AnyMutation}, :(seqs))) > > #temp#@_6::Int64 = $(QuoteNode(1)) > > SSAValue(15) = (Base.getfield)(SSAValue(0),1) > *::Union{Array{Float64,1},Array{Int64,1}}* > > SSAValue(16) = (Base.box)(Int64,(Base.add_int)(1,1)) > > p::Array{Float64,1} = SSAValue(15) > > #temp#@_6::Int64 = SSAValue(16) > > SSAValue(17) = (Base.getfield)(SSAValue(0),2) > *::Union{Array{Float64,1},Array{Int64,1}}* > > SSAValue(18) = (Base.box)(Int64,(Base.add_int)(2,1)) > > l::Array{Int64,1} = SSAValue(17) > > #temp#@_6::Int64 = SSAValue(18) # line 314: > > SSAValue(7) = (Base.arraylen)(p::Array{Float64,1})::Int64 > > D::Array{Float64,1} = > (Core.ccall)(:jl_alloc_array_1d,(Core.apply_type)(Core.Array,Float64,1)::Type{Array{Float64,1}},(Core.svec)(Core.Any,Core.Int)::SimpleVector,Array{Float64,1},0,SSAValue(7),0)::Array{Float64,1} > > # line 315: > > SSAValue(9) = (Base.arraylen)(p::Array{Float64,1})::Int64 > > V::Array{Float64,1} = > (Core.ccall)(:jl_alloc_array_1d,(Core.apply_type)(Core.Array,Float64,1)::Type{Array{Float64,1}},(Core.svec)(Core.Any,Core.Int)::SimpleVector,Array{Float64,1},0,SSAValue(9),0)::Array{Float64,1} > > # line 316: > > $(Expr(:inbounds, true)) > > SSAValue(11) = (Base.arraylen)(p::Array{Float64,1})::Int64 > > SSAValue(19) = > (Base.select_value)((Base.sle_int)(1,SSAValue(11))::Bool,SSAValue(11),(Base.box)(Int64,(Base.sub_int)(1,1)))::Int64 > > #temp#@_9::Int64 = 1 > > 22: > > unless (Base.box)(Base.Bool,(Base.not_int)((#temp#@_9::Int64 === > (Base.box)(Int64,(Base.add_int)(SSAValue(19),1)))::Bool)) goto 43 > > SSAValue(20) = #temp#@_9::Int64 > > SSAValue(21) = (Base.box)(Int64,(Base.add_int)(#temp#@_9::Int64,1)) > > i::Int64 = SSAValue(20) > > #temp#@_9::Int64 = SSAValue(21) # line 317: > > SSAValue(12) = (Base.arrayref)(p::Array{Float64,1},i::Int64)::Float64 > > $(Expr(:inbounds, false)) > > # meta: location /Users/bward/.julia/v0.5/Bio/src/var/distances.jl > expected_distance 69 > > SSAValue(13) = $(Expr(:invoke, LambdaInfo for log(::Float64), > :(Bio.Var.log), > :((Base.box)(Base.Float64,(Base.sub_float)((Base.box)(Float64,(Base.sitofp)(Float64,1)),(Base.box)(Base.Float64,(Base.div_float)((Base.box)(Base.Float64,(Base.mul_float)((Base.box)(Float64,(Base.sitofp)(Float64,4)),SSAValue(12))),(Base.box)(Float64,(Base.sitofp)(Float64,3) > > # meta: pop location > > $(Expr(:inbounds, :pop)) > > SSAValue(5) = > (Base.box)(Base.Float64,(Base.mul_float)(-0.75,SSAValue(13))) > > > (Base.arrayset)(D::Array{Float64,1},SSAValue(5),i::Int64)::Array{Float64,1} > # line 318: > > SSAValue(14) = (Base.arrayref)(p::Array{Float64,1},i::Int64)::Float64 > > SSAValue(6) = > (Base.box)(Base.Float64,(Base.div_float)((Base.box)(Base.Float64,(Base.mul_float)(SSAValue(14),(Base.box)(Base.Float64,(Base.sub_float)((Base.box)(Float64,(Base.sitofp)(Float64,1)),SSAValue(14),(Base.box)(Base.Float64,(Base.mul_float)((Base.Math.box)(Base.Math.Float64,(Base.Math.powi_llvm)((Base.box)(Base.Float64,(Base.sub_float)((Base.box)(Float64,(Base.sitofp)(Float64,1)),(Base.box)(Base.Float64,(Base.div_float)((Base.box)(Base.Float64,(Base.mul_float)((Base.box)(Float64,(Base.sitofp)(Float64,4)),SSAValue(14))),(Base.box)(Float64,(Base.sitofp)(Float64,3)),(Base.box)(Int32,(Base.checked_trunc_sint)(Int32,2::Float64,(Base.box)(Float64,(Base.sitofp)(Floa