On Wed, 2016-09-28 at 08:50, Gunnar Farnebäck <gun...@lysator.liu.se> wrote:
> It's normal that manually inlined code of this kind is faster than wrapped
> code unless the compiler manages to see the full inlining potential. In
> this case the huge memory allocations for the wrapped solutions indicates
> that it's nowhere near doing that at all. I doubt it will take you all the
> way but start with modifying your inner M_CPS function to only take
> positional arguments or declaring the type of the keyword argument as
> suggested in the performance tips section of the manual.

Even annotated keywords are slower than normal, positional ones (except
when their default value is used, as far as I recall).

> Den onsdag 28 september 2016 kl. 06:29:37 UTC+2 skrev K leo:
>>
>> I tested a few different ways of wrapping functions.  It looks different
>> ways of wrapping has slightly different costs.  But the most confusing to
>> me is that putting everything inline looks much faster than wrapping things
>> up.  I would understand this in other languages, but I thought Julia
>> advocates simple wrapping.  Can anyone help explain what is happening
>> below, and how I can do most efficient wrapping in the demo code?
>>
>> Demo code is included below.
>>
>> julia> versioninfo()
>> Julia Version 0.5.0
>> Commit 3c9d753 (2016-09-19 18:14 UTC)
>> Platform Info:
>>   System: Linux (x86_64-pc-linux-gnu)
>>   CPU: Intel(R) Core(TM) i5-5257U CPU @ 2.70GHz
>>   WORD_SIZE: 64
>>   BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
>>   LAPACK: libopenblas64_
>>   LIBM: libopenlibm
>>   LLVM: libLLVM-3.7.1 (ORCJIT, broadwell)
>>
>> julia> testFunc()
>> calling LoopCP (everything inline)
>>   0.097556 seconds (2.10 k allocations: 290.625 KB)
>> elapsed time (ns): 97555896
>> bytes allocated:   297600
>> pool allocs:       2100
>> [0.0,4200.0,0.0,0.0,4200.0,4200.0,4200.0,4200.0,0.0,4200.0,4200.0]
>>
>> calling LoopCP0 (slightly wrapped)
>>   4.173830 seconds (49.78 M allocations: 2.232 GB, 5.83% gc time)
>> elapsed time (ns): 4173830495
>> gc time (ns):      243516584
>> bytes allocated:   2396838538
>> pool allocs:       49783357
>> GC pauses:         104
>> full collections:  1
>> [4200.0,0.0,4200.0,4200.0,0.0,0.0,0.0,0.0,4200.0,0.0,0.0]
>>
>> calling LoopCP1 (wrapped one way)
>>   5.274723 seconds (59.59 M allocations: 2.378 GB, 3.62% gc time)
>> elapsed time (ns): 5274722983
>> gc time (ns):      191036337
>> bytes allocated:   2553752638
>> pool allocs:       59585834
>> GC pauses:         112
>> [8400.0,0.0,8400.0,8400.0,0.0,0.0,0.0,0.0,8400.0,0.0,0.0]
>>
>> calling LoopCP2 (wrapped another way)
>>   5.212895 seconds (59.58 M allocations: 2.378 GB, 3.60% gc time)
>> elapsed time (ns): 5212894550
>> gc time (ns):      187696529
>> bytes allocated:   2553577600
>> pool allocs:       59582100
>> GC pauses:         111
>> [0.0,8400.0,0.0,0.0,8400.0,8400.0,8400.0,8400.0,0.0,8400.0,8400.0]
>>
>> const dim=1000
>>>
>>>
>>>> type Tech
>>>
>>>     a::Array{Float64,1}
>>>
>>>     c::Array{Int,1}
>>>
>>>
>>>>     function Tech()
>>>
>>>         this = new()
>>>
>>>         this.a = zeros(Float64, dim)
>>>
>>>         this.c = rand([0,1;], dim)
>>>
>>>         this
>>>
>>>     end
>>>
>>> end
>>>
>>>
>>>> function LoopCP(csign::Int, tech::Tech)
>>>
>>>     for j=1:10
>>>
>>>         for xRat in [1.:20.;]
>>>
>>>             @inbounds for i = 1:dim
>>>
>>>                 if csign == tech.c[i]
>>>
>>>                     tech.a[i] += 2.*xRat
>>>
>>>                 else
>>>
>>>                     tech.a[i] = 0.
>>>
>>>                 end
>>>
>>>             end
>>>
>>>         end #
>>>
>>>     end
>>>
>>>     nothing
>>>
>>> end
>>>
>>>
>>>> function M_CPS(i::Int, csign::Int, tech::Tech; xRat=0.)
>>>
>>>     if csign == tech.c[i]
>>>
>>>         tech.a[i] += 2.*xRat
>>>
>>>     else
>>>
>>>         tech.a[i] = 0.
>>>
>>>     end
>>>
>>>     nothing
>>>
>>> end
>>>
>>>
>>>> function LoopCP0(csign::Int, tech::Tech)
>>>
>>>     for j=1:10
>>>
>>>         for xRat in [1.:20.;]
>>>
>>>             @inbounds for i = 1:dim
>>>
>>>                 M_CPS(i, csign, tech, xRat=xRat)
>>>
>>>             end
>>>
>>>         end #
>>>
>>>     end
>>>
>>>     nothing
>>>
>>> end
>>>
>>>
>>>> function MoleculeWrapS(csign::Int, tech::Tech, molecule::Function,
>>>> xRat=0.)
>>>
>>>     @inbounds for i = 1:dim
>>>
>>>         molecule(i, csign, tech; xRat=xRat)
>>>
>>>     end
>>>
>>>     nothing
>>>
>>> end
>>>
>>>
>>>> function LoopRunnerM1(csign::Int, tech::Tech, molecule::Function)
>>>
>>>     for j=1:10
>>>
>>>         for xRat in [1.:20.;]
>>>
>>>             MoleculeWrapS(csign, tech, molecule, xRat)
>>>
>>>         end #
>>>
>>>     end
>>>
>>>     nothing
>>>
>>> end
>>>
>>>
>>>> LoopCP1(csign::Int, tech::Tech) = LoopRunnerM1(csign, tech, M_CPS)
>>>
>>>
>>>> WrapCPS(csign::Int, tech::Tech, xRat=0.) = MoleculeWrapS(csign, tech,
>>>> M_CPS, xRat)
>>>
>>>
>>>> function LoopRunnerM2(csign::Int, tech::Tech, loop::Function)
>>>
>>>     for j=1:10
>>>
>>>         for xRat in [1.:20.;]
>>>
>>>             loop(csign, tech, xRat)
>>>
>>>         end #
>>>
>>>     end
>>>
>>>     nothing
>>>
>>> end
>>>
>>>
>>>> LoopCP2(csign::Int, tech::Tech) = LoopRunnerM2(csign, tech, WrapCPS)
>>>
>>>
>>>> function testFunc()
>>>
>>>     tech = Tech()
>>>
>>>     nloops = 100
>>>
>>>
>>>>     println("calling LoopCP (everything inline)")
>>>
>>>     tech.a = zeros(tech.a)
>>>
>>>     @timev for i=1:nloops
>>>
>>>         LoopCP(rand([0,1]), tech)
>>>
>>>     end
>>>
>>>     println(tech.a[10:20], "\n")
>>>
>>>
>>>>     println("calling LoopCP0 (slightly wrapped)")
>>>
>>>     tech.a = zeros(tech.a)
>>>
>>>     @timev for i=1:nloops
>>>
>>>         LoopCP0(rand([0,1]), tech)
>>>
>>>     end
>>>
>>>     println(tech.a[10:20], "\n")
>>>
>>>
>>>>     println("calling LoopCP1 (wrapped one way)")
>>>
>>>     tech.a = zeros(tech.a)
>>>
>>>     @timev for i=1:nloops
>>>
>>>         LoopCP1(rand([0,1]), tech)
>>>
>>>     end
>>>
>>>     println(tech.a[10:20], "\n")
>>>
>>>
>>>>     println("calling LoopCP2 (wrapped another way)")
>>>
>>>     tech.a = zeros(tech.a)
>>>
>>>     @timev for i=1:nloops
>>>
>>>         LoopCP2(rand([0,1]), tech)
>>>
>>>     end
>>>
>>>     println(tech.a[10:20], "\n")
>>>
>>>
>>>
>>>     nothing
>>>
>>> end
>>>
>>>
>>>

Reply via email to