[julia-users] Type stability (or not) in core stats functions

Michael Francis Mon, 31 Aug 2015 14:49:11 -0700

The following is taken from statistics.jl line 428 

    function cor(x::AbstractVector, y::AbstractVector; mean=nothing)
        mean == 0 ? corzm(x, y) :
        mean == nothing ? corm(x, Base.mean(x), y, Base.mean(y)) :
        isa(mean, (Number,Number)) ? corm(x, mean[1], y, mean[2]) :
        error("Invalid value of mean.")
    end


due to the 'mean' initially having a type of 'Nothing' I am unable to 
inference the return type of the function - the following will return Any 
for the return type.

    rt = {}
    for x in Base._methods(f,types,-1)
        linfo = x[3].func.code
        (tree, ty) = Base.typeinf(linfo, x[1], x[2])
        push!(rt, ty)
    end

Each of the underlying functions are type stable when called directly. 

Code lowered doesn't give much of a pointer to what will actually happen 
here, 

julia> code_lowered( cor, ( Vector{Float64}, Vector{Float64} ) )
1-element Array{Any,1}:
 :($(Expr(:lambda, {:x,:y}, {{},{{:x,:Any,0},{:y,:Any,0}},{}}, :(begin $(
Expr(:line, 429, symbol("statistics.jl"), symbol("")))
        return __cor#195__(nothing,x,y)
    end))))


If I re-write with a regular optional arg for the mean 

code_lowered( cordf, ( Vector{Float64}, Vector{Float64}, Nothing ) )
1-element Array{Any,1}:
 :($(Expr(:lambda, {:x,:y,:mean}, {{},{{:x,:Any,0},{:y,:Any,0},{:mean,:Any,0
}},{}}, :(begin  # none, line 2:
        unless mean == 0 goto 0
        return corzm(x,y)
        0: 
        unless mean == nothing goto 1
        return corm(x,((top(getfield))(Base,:mean))(x),y,((top(getfield))(
Base,:mean))(y))
        1: 
        unless isa(mean,(top(tuple))(Number,Number)) goto 2
        return corm(x,getindex(mean,1),y,getindex(mean,2))
        2: 
        return error("Invalid value of mean.")
    end))))

The LLVM code does not look very clean, If I have a real type for the mean 
(say Float64 ) it looks better  88 lines vs 140 

julia> code_llvm( cor, ( Vector{Float64}, Vector{Float64}, Nothing ) )


define %jl_value_t* @julia_cordf_20322(%jl_value_t*, %jl_value_t*, %
jl_value_t*) {
top:
  %3 = alloca [7 x %jl_value_t*], align 8
  %.sub = getelementptr inbounds [7 x %jl_value_t*]* %3, i64 0, i64 0
  %4 = getelementptr [7 x %jl_value_t*]* %3, i64 0, i64 2, !dbg !949
  store %jl_value_t* inttoptr (i64 10 to %jl_value_t*), %jl_value_t** %.sub, 
align 8
  %5 = getelementptr [7 x %jl_value_t*]* %3, i64 0, i64 1, !dbg !949
  %6 = load %jl_value_t*** @jl_pgcstack, align 8, !dbg !949
  %.c = bitcast %jl_value_t** %6 to %jl_value_t*, !dbg !949
  store %jl_value_t* %.c, %jl_value_t** %5, align 8, !dbg !949
  store %jl_value_t** %.sub, %jl_value_t*** @jl_pgcstack, align 8, !dbg !949
  store %jl_value_t* null, %jl_value_t** %4, align 8
  %7 = getelementptr [7 x %jl_value_t*]* %3, i64 0, i64 3
  store %jl_value_t* null, %jl_value_t** %7, align 8
  %8 = getelementptr [7 x %jl_value_t*]* %3, i64 0, i64 4
  store %jl_value_t* null, %jl_value_t** %8, align 8
  %9 = getelementptr [7 x %jl_value_t*]* %3, i64 0, i64 5
  store %jl_value_t* null, %jl_value_t** %9, align 8
  %10 = getelementptr [7 x %jl_value_t*]* %3, i64 0, i64 6
  store %jl_value_t* null, %jl_value_t** %10, align 8
  %11 = load %jl_value_t** inttoptr (i64 52494032 to %jl_value_t**), align 
16, !dbg !950
  %12 = call i32 @jl_egal(%jl_value_t* %2, %jl_value_t* %11), !dbg !950
  %13 = and i32 %12, 1, !dbg !950
  %14 = icmp eq i32 %13, 0, !dbg !950
  br i1 %14, label %L7, label %if1, !dbg !950


if1:                                              ; preds = %top
  %15 = load %jl_value_t** inttoptr (i64 116157440 to %jl_value_t**), align 
1024, !dbg !950
  %16 = icmp eq %jl_value_t* %15, null, !dbg !950
  br i1 %16, label %err2, label %ok3, !dbg !950


err2:                                             ; preds = %if1
  call void @jl_undefined_var_error(%jl_value_t* inttoptr (i64 
140022028478128 to %jl_value_t*)), !dbg !950
  unreachable


ok3:                                              ; preds = %if1
  %17 = getelementptr inbounds %jl_value_t* %15, i64 0, i32 0, !dbg !950
  %18 = load %jl_value_t** %17, align 8, !dbg !950
  %magicptr = ptrtoint %jl_value_t* %18 to i64, !dbg !950
  switch i64 %magicptr, label %notf4 [
    i64 23428288, label %isf5
    i64 23296304, label %isf5
  ], !dbg !950
....

lots more removed


You can resolve this by writing something like the below, but this doesn't 
handle the case where the two vectors are of different types. Just making 
two versions of the function one with mean and one without would almost 
certainly be cleaner. 

function cor{T<:Number}(x::Vector{T}, y::Vector{T}; corrected::Bool=true, 
mean=nothing)    
    if( mean == nothing )
        covnomean( x,y,corrected )
    else
        mymean::T = convert( T, mean )
        covmean( x, y, corrected, mymean )
    end    
end

Is this intended behavior, how concerned should I be?

[julia-users] Type stability (or not) in core stats functions

Reply via email to