On Sat, Apr 2, 2016 at 10:53 PM, Cedric St-Jean <[email protected]> wrote:
> That's actually a compiler bug, nice!
>
> abstract Feature
>
> type A <: Feature end
> evaluate(f::A) = 1.0
>
> foo(features::Vector{Feature}) = isa(features[1], A) ?
> evaluate(features[1]::A) : evaluate(features[1])
>
> @show foo(Feature[A()])
>
> type C <: Feature end
> evaluate(f::C) = 100
>
> @show foo(Feature[C()])
>
> yields
>
> foo(Feature[A()]) = 1.0
>
> foo(Feature[C()]) = 4.94e-322
>
>
> That explains why performance was the same on your computer: the compiler
> was making an incorrect assumption about the return type of `evaluate`. Or
> maybe it's an intentional gamble by the Julia devs, for the sake of
> performance.
>
> I couldn't find any issue describing this. Yichao?

This is effectively #265. It's not always predictable what assumption
the compiler makes now...

>
>
> On Saturday, April 2, 2016 at 10:16:59 PM UTC-4, Greg Plowman wrote:
>>
>> Thanks Cedric and Yichao.
>>
>> This makes sense that there might be new subtypes and associated
>> specialised methods. I understand that now. Thanks.
>>
>> On my machine (v0.4.5 Windows), fast() and pretty_fast() seem to run in
>> similar time.
>> So I looked as @code_warntype as Yichao suggested and get the following.
>> I don't fully know how to interpret output, but return type from the final
>> "catchall" evaluate() seems to be inferred/asserted as Float64 (see
>> highlighted yellow line below)
>>
>> Would this explain why pretty_fast() seems to be as efficient as fast()?
>>
>> Why is the return type being inferred asserted as Float64?
>>
>>
>> julia> @code_warntype fast(features)
>> Variables:
>>   features::Array{Feature,1}
>>   retval::Float64
>>   #s1::Int64
>>   i::Int64
>>
>> Body:
>>   begin  # none, line 2:
>>       retval = 0.0 # none, line 3:
>>       GenSym(2) = (Base.arraylen)(features::Array{Feature,1})::Int64
>>       GenSym(0) = $(Expr(:new, UnitRange{Int64}, 1,
>> :(((top(getfield))(Base.Intrinsics,:select_value)::I)((Base.sle_int)(1,GenSym(2))::Bool,GenSym(2),(Base.box)
>> (Int64,(Base.sub_int)(1,1)))::Int64)))
>>       #s1 = (top(getfield))(GenSym(0),:start)::Int64
>>       unless (Base.box)(Base.Bool,(Base.not_int)(#s1::Int64 ===
>> (Base.box)(Base.Int,(Base.add_int)((top(getfield))(GenSym(0),:stop)::Int64,1))::Bool))
>> goto 1
>>       2:
>>       GenSym(3) = #s1::Int64
>>       GenSym(4) = (Base.box)(Base.Int,(Base.add_int)(#s1::Int64,1))
>>       i = GenSym(3)
>>       #s1 = GenSym(4) # none, line 4:
>>       unless
>> (Main.isa)((Base.arrayref)(features::Array{Feature,1},i::Int64)::Feature,Main.A)::Bool
>> goto 4 # none, line 5:
>>       retval =
>> (Base.box)(Base.Float64,(Base.add_float)(retval::Float64,1.0))
>>       goto 5
>>       4:  # none, line 7:
>>       retval =
>> (Base.box)(Base.Float64,(Base.add_float)(retval::Float64,0.0))
>>       5:
>>       3:
>>       unless
>> (Base.box)(Base.Bool,(Base.not_int)((Base.box)(Base.Bool,(Base.not_int)(#s1::Int64
>> === (Base.box)(Base.Int,(Base.add_int)((top(getfield))(GenSym(0)
>> ,:stop)::Int64,1))::Bool)))) goto 2
>>       1:
>>       0:  # none, line 10:
>>       return retval::Float64
>>   end::Float64
>>
>>
>> julia> @code_warntype pretty_fast(features)
>> Variables:
>>   features::Array{Feature,1}
>>   retval::Float64
>>   #s1::Int64
>>   i::Int64
>>
>> Body:
>>   begin  # none, line 2:
>>       retval = 0.0 # none, line 3:
>>       GenSym(2) = (Base.arraylen)(features::Array{Feature,1})::Int64
>>       GenSym(0) = $(Expr(:new, UnitRange{Int64}, 1,
>> :(((top(getfield))(Base.Intrinsics,:select_value)::I)((Base.sle_int)(1,GenSym(2))::Bool,GenSym(2),(Base.box)
>> (Int64,(Base.sub_int)(1,1)))::Int64)))
>>       #s1 = (top(getfield))(GenSym(0),:start)::Int64
>>       unless (Base.box)(Base.Bool,(Base.not_int)(#s1::Int64 ===
>> (Base.box)(Base.Int,(Base.add_int)((top(getfield))(GenSym(0),:stop)::Int64,1))::Bool))
>> goto 1
>>       2:
>>       GenSym(4) = #s1::Int64
>>       GenSym(5) = (Base.box)(Base.Int,(Base.add_int)(#s1::Int64,1))
>>       i = GenSym(4)
>>       #s1 = GenSym(5) # none, line 4:
>>       unless
>> (Main.isa)((Base.arrayref)(features::Array{Feature,1},i::Int64)::Feature,Main.A)::Bool
>> goto 4 # none, line 5:
>>       retval =
>> (Base.box)(Base.Float64,(Base.add_float)(retval::Float64,1.0))
>>       goto 6
>>       4:  # none, line 6:
>>       unless
>> (Main.isa)((Base.arrayref)(features::Array{Feature,1},i::Int64)::Feature,Main.B)::Bool
>> goto 5 # none, line 7:
>>       retval =
>> (Base.box)(Base.Float64,(Base.add_float)(retval::Float64,0.0))
>>       goto 6
>>       5:  # none, line 9:
>>       GenSym(3) =
>> (Main.evaluate)((Base.arrayref)(features::Array{Feature,1},i::Int64)::Feature)::Float64
>>       retval =
>> (Base.box)(Base.Float64,(Base.add_float)(retval::Float64,GenSym(3)))
>>       6:
>>       3:
>>       unless
>> (Base.box)(Base.Bool,(Base.not_int)((Base.box)(Base.Bool,(Base.not_int)(#s1::Int64
>> === (Base.box)(Base.Int,(Base.add_int)((top(getfield))(GenSym(0)
>> ,:stop)::Int64,1))::Bool)))) goto 2
>>       1:
>>       0:  # none, line 12:
>>       return retval::Float64
>>   end::Float64
>>
>> Julia>
>>
>>
>>
>> On Sunday, April 3, 2016 at 8:04:35 AM UTC+10, Cedric St-Jean wrote:
>>>
>>>
>>>
>>> On Saturday, April 2, 2016 at 5:39:45 PM UTC-4, Greg Plowman wrote:
>>>>
>>>> Cedric,
>>>> On my machine fast() and pretty_fast() run in the roughly the same time.
>>>> Are you sure pre-compiled first?
>>>
>>>
>>> Yep. I'm using @benchmark on Julia 0.4.5 on OSX, and the time difference
>>> is definitely > 2X.
>>>
>>>>
>>>>
>>>> 1. If you add a default fallback method, say, evaluate(f::Feature) =
>>>> -1.0
>>>> Theoretically, would that inform the compiler that evaluate() always
>>>> returns a Float64 and therefore is type stable?
>>>
>>>
>>> No, because I might write
>>>
>>> type C <: Feature
>>> end
>>> evaluate(f::C) = 100
>>>
>>> and a Vector{Feature} might contain a C. Abstract types are open-ended,
>>> new types can be created a runtime, so the compiler can't assume anything
>>> about them, which is why they're not useful for performance. See here.
>>>
>>>
>>>>
>>>> Does dynamic dispatch mean compiler has to look up method at run time
>>>> (because it can't know type ahead of time).
>>>> This is equivalent to explicitly coding "static dispatch" with if
>>>> statements?
>>>> If so then why is it so much slower?
>>>> Providing a fallback method as in 1. or explicitly returning Float64
>>>> doesn't seem to improve speed, which naively suggests that slowness is from
>>>> dynamic dispatch not from type instability???
>>>>
>>>> 3. Also, on my machine pretty_fast() is as fast as fast(). Why is this
>>>> so, if pretty_fast() is supposedly type unstable?
>>>>
>>>> As you can probably tell, I'm pretty confused on this.
>>>>
>>>>
>>>> On Sunday, April 3, 2016 at 6:34:09 AM UTC+10, Cedric St-Jean wrote:
>>>>>
>>>>> Thank you for the detailed explanation. I tried it out:
>>>>>
>>>>> function pretty_fast(features::Vector{Feature})
>>>>>     retval = 0.0
>>>>>     for i in 1 : length(features)
>>>>>         if isa(features[i], A)
>>>>>             x = evaluate(features[i]::A)
>>>>>         elseif isa(features[i], B)
>>>>>             x = evaluate(features[i]::B)
>>>>>         else
>>>>>             x = evaluate(features[i])
>>>>>         end
>>>>>         retval += x
>>>>>     end
>>>>>     retval
>>>>> end
>>>>>
>>>>> On my laptop, fast runs in 10 microseconds, pretty_fast in 30, and slow
>>>>> in 210.
>>>>>
>>>>> On Saturday, April 2, 2016 at 12:24:18 PM UTC-4, Yichao Yu wrote:
>>>>>>
>>>>>> On Sat, Apr 2, 2016 at 12:16 PM, Tim Wheeler <[email protected]>
>>>>>> wrote:
>>>>>> > Thank you for the comments. In my original code it means the
>>>>>> > difference
>>>>>> > between a 30 min execution with memory allocation in the Gigabytes
>>>>>> > and a few
>>>>>> > seconds of execution with only 800 bytes using the second version.
>>>>>> > I thought under-the-hood Julia basically runs those if statements
>>>>>> > anyway for
>>>>>> > its dispatch, and don't know why it needs to allocate any memory.
>>>>>> > Having the if-statement workaround will be fine though.
>>>>>>
>>>>>> Well, if you have a lot of these cheap functions being dynamically
>>>>>> dispatched I think it is not a good way to use the type. Depending on
>>>>>> your problem, you may be better off using a enum/flags/dict to
>>>>>> represent the type/get the values.
>>>>>>
>>>>>> The reason for the allocation is that the return type is unknown. It
>>>>>> should be obvious to see if you check your code with code_warntype.
>>>>>>
>>>>>> >
>>>>>> > On Saturday, April 2, 2016 at 7:26:11 AM UTC-7, Cedric St-Jean
>>>>>> > wrote:
>>>>>> >>
>>>>>> >>
>>>>>> >>> Therefore there's no way the compiler can rewrite the slow version
>>>>>> >>> to the
>>>>>> >>> fast version.
>>>>>> >>
>>>>>> >>
>>>>>> >> It knows that the element type is a Feature, so it could produce:
>>>>>> >>
>>>>>> >> if isa(features[i], A)
>>>>>> >>     retval += evaluate(features[i]::A)
>>>>>> >> elseif isa(features[i], B)
>>>>>> >>     retval += evaluate(features[i]::B)
>>>>>> >> else
>>>>>> >>     retval += evaluate(features[i])
>>>>>> >> end
>>>>>> >>
>>>>>> >> and it would make sense for abstract types that have few subtypes.
>>>>>> >> I
>>>>>> >> didn't realize that dispatch was an order of magnitude slower than
>>>>>> >> type
>>>>>> >> checking. It's easy enough to write a macro generating this
>>>>>> >> expansion, too.
>>>>>> >>
>>>>>> >> On Saturday, April 2, 2016 at 2:05:20 AM UTC-4, Yichao Yu wrote:
>>>>>> >>>
>>>>>> >>> On Fri, Apr 1, 2016 at 9:56 PM, Tim Wheeler
>>>>>> >>> <[email protected]>
>>>>>> >>> wrote:
>>>>>> >>> > Hello Julia Users.
>>>>>> >>> >
>>>>>> >>> > I ran into a weird slowdown issue and reproduced a minimal
>>>>>> >>> > working
>>>>>> >>> > example.
>>>>>> >>> > Maybe someone can help shed some light.
>>>>>> >>> >
>>>>>> >>> > abstract Feature
>>>>>> >>> >
>>>>>> >>> > type A <: Feature end
>>>>>> >>> > evaluate(f::A) = 1.0
>>>>>> >>> >
>>>>>> >>> > type B <: Feature end
>>>>>> >>> > evaluate(f::B) = 0.0
>>>>>> >>> >
>>>>>> >>> > function slow(features::Vector{Feature})
>>>>>> >>> >     retval = 0.0
>>>>>> >>> >     for i in 1 : length(features)
>>>>>> >>> >         retval += evaluate(features[i])
>>>>>> >>> >     end
>>>>>> >>> >     retval
>>>>>> >>> > end
>>>>>> >>> >
>>>>>> >>> > function fast(features::Vector{Feature})
>>>>>> >>> >     retval = 0.0
>>>>>> >>> >     for i in 1 : length(features)
>>>>>> >>> >         if isa(features[i], A)
>>>>>> >>> >             retval += evaluate(features[i]::A)
>>>>>> >>> >         else
>>>>>> >>> >             retval += evaluate(features[i]::B)
>>>>>> >>> >         end
>>>>>> >>> >     end
>>>>>> >>> >     retval
>>>>>> >>> > end
>>>>>> >>> >
>>>>>> >>> > using ProfileView
>>>>>> >>> >
>>>>>> >>> > features = Feature[]
>>>>>> >>> > for i in 1 : 10000
>>>>>> >>> >     push!(features, A())
>>>>>> >>> > end
>>>>>> >>> >
>>>>>> >>> > slow(features)
>>>>>> >>> > @time slow(features)
>>>>>> >>> > fast(features)
>>>>>> >>> > @time fast(features)
>>>>>> >>> >
>>>>>> >>> > The output is:
>>>>>> >>> >
>>>>>> >>> > 0.000136 seconds (10.15 k allocations: 166.417 KB)
>>>>>> >>> > 0.000012 seconds (5 allocations: 176 bytes)
>>>>>> >>> >
>>>>>> >>> >
>>>>>> >>> > This is a HUGE difference! Am I missing something big? Is there
>>>>>> >>> > a good
>>>>>> >>> > way
>>>>>> >>> > to inspect code to figure out where I am going wrong?
>>>>>> >>>
>>>>>> >>> This is because of type instability as you will find in the
>>>>>> >>> performance
>>>>>> >>> tips.
>>>>>> >>> Note that slow and fast are not equivalent since the fast version
>>>>>> >>> only
>>>>>> >>> accept `A` or `B` but the slow version accepts any subtype of
>>>>>> >>> feature
>>>>>> >>> that you may ever define. Therefore there's no way the compiler
>>>>>> >>> can
>>>>>> >>> rewrite the slow version to the fast version.
>>>>>> >>> There are optimizations that can be applied to bring down the gap
>>>>>> >>> but
>>>>>> >>> there'll always be a large difference between the two.
>>>>>> >>>
>>>>>> >>> >
>>>>>> >>> >
>>>>>> >>> > Thank you in advance for any guidance.
>>>>>> >>> >
>>>>>> >>> >
>>>>>> >>> > -Tim
>>>>>> >>> >
>>>>>> >>> >
>>>>>> >>> >
>>>>>> >>> >
>>>>>> >>> >

Reply via email to