Thank you!
julia> include("test.jl")
test_all (generic function with 1 method)
julia> test_all(5);
test_stride
elapsed time: 1.835237919 seconds (0 bytes allocated)
test_view
elapsed time: 9.453440318 seconds (42 MB allocated, 0.10% gc time in 2
pauses with 1 full sweep)
test_unsafe
elapsed time: 1.821716706 seconds (320 bytes allocated)
test_unsafeview
elapsed time: 9.520118901 seconds (0 bytes allocated)
Can you explain why this fixes the issue? My understanding (which is
apparently wrong) is that this kind of parameterization wasn't necessary
since the compiler determines the types when the function is called and
compiles it for that specific combination of types. I was trying to follow
the suggestions
here
http://docs.julialang.org/en/release-0.3/manual/style-guide/#avoid-writing-overly-specific-types.
Are things different for constructors?
On Monday, April 20, 2015 at 12:08:12 PM UTC-6, Tim Holy wrote:
>
> I wonder if your color printing isn't working? Because for me
> @code_warntype
> raised some big red flags about setk_Unsafe(a). This fixed the problems:
>
> function UnsafeSlice{T,N}(a::Array{T,N}, slicedim::Int, start=1)
> p = pointer(a)
> str = stride(a, slicedim)
> UnsafeSlice{T, N, Ptr{T}}(start, str, size(a),p)
> end
>
> --Tim
>
> On Monday, April 20, 2015 10:47:04 AM Peter Brady wrote:
> > I tried making UnsafeSlice a subtype of Abstract{T,N} but that didn't
> have
> > an impact.
> >
> > The @code_warntype for update, size, and setindex! didn't raise any red
> > flags
> >
> > julia> @code_warntype setindex!(UnsafeSlice(zeros(Int, (10, 10, 10)),
> 3),
> > -10, 1)
> > Variables:
> > s::UnsafeSlice{Int64,3,Ptr{Int64}}
> > x::Int64
> > i::Int64
> >
> > Body:
> > begin # /usr/local/runs/compact-fd/symbolic/julia/test_alloc.jl, line
> 19:
> > GenSym(1) =
> > (top(getfield))(s::UnsafeSlice{Int64,3,Ptr{Int64}},:p)::Ptr{Int64}
> > GenSym(0) =
> >
> (top(box))(Int64,(top(add_int))((top(getfield))(s::UnsafeSlice{Int64,3,Ptr{I
>
> >
> nt64}},:start)::Int64,(top(box))(Int64,(top(mul_int))((top(box))(Int64,(top(
>
> >
> sub_int))(i::Int64,1)),(top(getfield))(s::UnsafeSlice{Int64,3,Ptr{Int64}},:s
>
> > tride)::Int64)))) return
> > (top(pointerset))(GenSym(1),x::Int64,GenSym(0))::Ptr{Int64}
> end::Ptr{Int64}
> >
> > julia> @code_warntype size(UnsafeSlice(zeros(Int, (10, 10, 10)), 3), 3)
> > Variables:
> > s::UnsafeSlice{Int64,3,Ptr{Int64}}
> > i::Int64
> >
> > Body:
> > begin # /usr/local/runs/compact-fd/symbolic/julia/test_alloc.jl, line
> 13:
> > return
> >
> getfield((top(getfield))(s::UnsafeSlice{Int64,3,Ptr{Int64}},:size)::Tuple{In
>
> > t64,Int64,Int64},i::Int64)::Int64 end::Int64
> >
> > julia> @code_warntype update(UnsafeSlice(zeros(Int, (10, 10, 10)), 3),
> 3,
> > 20)
> > Variables:
> > a::UnsafeSlice{Int64,3,Ptr{Int64}}
> > idx::Int64
> > off::Int64
> > #s3::Int64
> > i::Int64
> >
> > Body:
> > begin # /usr/local/runs/compact-fd/symbolic/julia/test_alloc.jl, line
> 31:
> > GenSym(3) =
> >
> getfield((top(getfield))(a::UnsafeSlice{Int64,3,Ptr{Int64}},:size)::Tuple{In
>
> > t64,Int64,Int64},idx::Int64)::Int64 GenSym(0) = $(Expr(:new,
> > UnitRange{Int64}, 1,
> >
> >
> :(((top(getfield))(Intrinsics,:select_value))((top(sle_int))(1,GenSym(3))::B
>
> > :ool,GenSym(3),(top(box))(Int64,(top(sub_int))(1,1)))::Int64)))
> > #s3 = (top(getfield))(GenSym(0),:start)::Int64
> > unless (top(box))(Bool,(top(not_int))(#s3::Int64 ===
> >
> (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(0),:stop)::Int64,1)):
>
> > :Bool)) goto 1
> > 2:
> > GenSym(6) = #s3::Int64
> > GenSym(7) = (top(box))(Int64,(top(add_int))(#s3::Int64,1))
> > i = GenSym(6)
> > #s3 = GenSym(7) # line 33:
> > GenSym(2) =
> >
> (top(box))(Int64,(top(add_int))((top(box))(Int64,(top(mul_int))(-10,off::Int
>
> > 64)),i::Int64)) GenSym(5) =
> > (top(getfield))(a::UnsafeSlice{Int64,3,Ptr{Int64}},:p)::Ptr{Int64}
> > GenSym(4) =
> >
> (top(box))(Int64,(top(add_int))((top(getfield))(a::UnsafeSlice{Int64,3,Ptr{I
>
> >
> nt64}},:start)::Int64,(top(box))(Int64,(top(mul_int))((top(box))(Int64,(top(
>
> >
> sub_int))(i::Int64,1)),(top(getfield))(a::UnsafeSlice{Int64,3,Ptr{Int64}},:s
>
> > tride)::Int64))))
> > (top(pointerset))(GenSym(5),GenSym(2),GenSym(4))::Ptr{Int64} 3:
> > unless
> > (top(box))(Bool,(top(not_int))((top(box))(Bool,(top(not_int))(#s3::Int64
> > ===
> >
> (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(0),:stop)::Int64,1)):
>
> > :Bool)))) goto 2
> > 1:
> > 0: # line 36:
> > return a::UnsafeSlice{Int64,3,Ptr{Int64}}
> > end::UnsafeSlice{Int64,3,Ptr{Int64}}
> > .
> >
> > However, running this on the setk_Unsafe function:
> >
> > julia> @code_warntype setk_UnSafe(zeros(Int, (10, 10, 10)))
> > Variables:
> > a::Array{Int64,3}
> > us::UnsafeSlice{T,N,P<:Ptr{T}}
> > #s1::Int64
> > j::Int64
> > #s3::Int64
> > i::Int64
> > ####p#3255#3279::Ptr{Int64}
> > ####str#3256#3280::Int64
> >
> >
> > Body:
> > begin # /usr/local/runs/compact-fd/symbolic/julia/test_alloc.jl, line
> 40:
> > ####p#3255#3279 = (top(ccall))(:jl_array_ptr,$(Expr(:call1,
> >
> > :(top(apply_type)), :Ptr, Int64)),$(Expr(:call1, :(top(svec)),
> > :Any)),a::Array{Int64,3},0)::Ptr{Int64}
> >
> > ####str#3256#3280 = stride(a::Array{Int64,3},3)::Int64
> > us =
> >
> ((top(apply_type))(UnsafeSlice,Int64,3,typeof(####p#3255#3279::Ptr{Int64}):
> >
> :Type{Ptr{Int64}})::Type{_<:UnsafeSlice{Int64,N,Ptr{Int64}}})(1,####str#3256
>
> >
> #3280::Int64,(top(tuple))((top(arraysize))(a::Array{Int64,3},1)::Int64,(top(
>
> >
> arraysize))(a::Array{Int64,3},2)::Int64,(top(arraysize))(a::Array{Int64,3},3
>
> >
> )::Int64)::Tuple{Int64,Int64,Int64},####p#3255#3279::Ptr{Int64})::UnsafeSlic
>
> > e{T,N,P<:Ptr{T}} # line 42:
> > GenSym(4) = (top(arraysize))(a::Array{Int64,3},2)::Int64
> > GenSym(0) = $(Expr(:new, UnitRange{Int64}, 1, :(((top(getfield))(
> >
> Intrinsics,:select_value))((top(sle_int))(1,GenSym(4))::Bool,GenSym(4),(top(
>
> > box))(Int64,(top(sub_int))(1,1)))::Int64)))
> > #s1 = (top(getfield))(GenSym(0),:start)::Int64
> > unless (top(box))(Bool,(top(not_int))(#s1::Int64 ===
> >
> (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(0),:stop)::Int64,1)):
>
> > :Bool)) goto 1
> > 2:
> > GenSym(7) = #s1::Int64
> > GenSym(8) = (top(box))(Int64,(top(add_int))(#s1::Int64,1))
> > j = GenSym(7)
> > #s1 = GenSym(8)
> > GenSym(5) = (top(arraysize))(a::Array{Int64,3},1)::Int64
> > GenSym(2) = $(Expr(:new, UnitRange{Int64}, 1, :(((top(getfield))(
> >
> Intrinsics,:select_value))((top(sle_int))(1,GenSym(5))::Bool,GenSym(5),(top(
>
> > box))(Int64,(top(sub_int))(1,1)))::Int64)))
> > #s3 = (top(getfield))(GenSym(2),:start)::Int64
> > unless (top(box))(Bool,(top(not_int))(#s3::Int64 ===
> >
> (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(2),:stop)::Int64,1)):
>
> > :Bool)) goto 4
> > 5:
> > GenSym(9) = #s3::Int64
> > GenSym(10) = (top(box))(Int64,(top(add_int))(#s3::Int64,1))
> > i = GenSym(9)
> > #s3 = GenSym(10) # line 44:
> > GenSym(11) = (top(arraysize))(a::Array{Int64,3},1)::Int64
> > GenSym(12) = (top(arraysize))(a::Array{Int64,3},2)::Int64
> > GenSym(13) = (top(arraysize))(a::Array{Int64,3},3)::Int64
> >
> (top(setfield!))(us::UnsafeSlice{T,N,P<:Ptr{T}},:start,(top(convert
> >
> ))((top(fieldtype))((top(typeof))(us::UnsafeSlice{T,N,P<:Ptr{T}})::Type{_<:
> >
> UnsafeSlice{T,N,P<:Ptr{T}}},:start)::Type{_},(top(box))(Int64,(top(add_int
> >
> ))(i::Int64,(top(box))(Int64,(top(mul_int))(GenSym(11),(top(box))(Int64,(top
>
> >
> (add_int))((top(box))(Int64,(top(sub_int))(j::Int64,1)),(top(box))(Int64,(
> >
> top(mul_int))(GenSym(12),(top(box))(Int64,(top(sub_int))(1,1)))))))))))::An
> > y )::Any # line 46:
> > update(us::UnsafeSlice{T,N,P<:Ptr{T}},3,(top(getfield))(us::
> > UnsafeSlice{T,N,P<:Ptr{T}},:start)::Int64)::UnsafeSlice{T,N,P<:Ptr{T}}
> > 6:
> > unless
> > (top(box))(Bool,(top(not_int))((top(box))(Bool,(top(not_int))(#s3::Int64
> > ===
> >
> (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(2),:stop)::Int64,1)):
>
> > :Bool)))) goto 5
> > 4:
> > 3:
> > unless
> > (top(box))(Bool,(top(not_int))((top(box))(Bool,(top(not_int))(#s1::Int64
> > ===
> >
> (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(0),:stop)::Int64,1)):
>
> > :Bool)))) goto 2
> > 1:
> > 0: # line 49:
> > return a::Array{Int64,3}
> > end::Array{Int64,3}
> >
> >
> > On my terminal the type ::UnsafeSlice{T,N<P<:Ptr{T}} is highlighted in
> red
> > in this function as is the ::Any on what claims to be 'line 46'. Is
> there
> > something about my type that is tripping up the inferencer?
> >
> > Here's the test_alloc.jl script ( I don't normally double space
> everything
> > but it made the .mem file more readable):
> >
> > using ArrayViews
> > import Base: size, getindex, setindex!, ndims, start, stride, pointer
> >
> >
> > type UnsafeSlice{T,N, P<:Ptr} <: AbstractArray{T,N}
> > start::Int
> > stride::Int
> > size::NTuple{N,Int}
> > p::P
> > end
> >
> >
> > size(s::UnsafeSlice) = s.size
> >
> >
> > size(s::UnsafeSlice, i::Int) = s.size[i]
> >
> >
> > ndims{T,N}(s::UnsafeSlice{T,N}) = N
> >
> >
> > getindex(s::UnsafeSlice, i::Int) = unsafe_load(s.p,
> s.start+(i-1)*s.stride)
> >
> >
> > setindex!(s::UnsafeSlice, x, i::Int) = unsafe_store!(s.p, x,
> s.start+(i-1)*s
> > .stride)
> >
> >
> > function UnsafeSlice(a, slicedim::Int, start=1)
> > p = pointer(a)
> >
> >
> > str = stride(a, slicedim)
> >
> >
> > UnsafeSlice{eltype(a), ndims(a), typeof(p)}(start, str, size(a),p)
> > end
> >
> >
> > function update(a::UnsafeSlice, idx, off)
> >
> >
> > for i=1:size(a, idx)
> >
> >
> > a[i] = -10*off+i
> > end
> >
> >
> > a
> > end
> >
> >
> > function setk_UnSafe{T}(a::Array{T,3})
> > us = UnsafeSlice(a, 3)
> >
> >
> > for j=1:size(a,2),i=1:size(a,1)
> >
> >
> > us.start = sub2ind(size(a), i, j, 1)
> >
> >
> > update(us, 3, us.start)
> > end
> >
> >
> > a
> > end
> >
> >
> > function test_unsafe(n, time=true)
> > a = zeros(Int, (320, 320, 320))
> >
> >
> > # warmup
> > setk_UnSafe(a);
> >
> >
> > Profile.clear_malloc_data()
> >
> >
> > for i=1:n
> >
> >
> > setk_UnSafe(a)
> >
> >
> > end
> >
> >
> > a
> > end
> >
> >
> > Thanks for taking a look at this.
> >
> > On Monday, April 20, 2015 at 11:14:32 AM UTC-6, Tim Holy wrote:
> > > Sorry, I didn't notice you'd included the test function.
> > >
> > > What happens if you make
> > >
> > > UnsafeSlice{T,N, P<:Ptr} <: AbstractArray{T,N}
> > >
> > > rather than
> > >
> > > UnsafeSlice{T,N, P<:Ptr} <: AbstractArray
> > >
> > > ?
> > >
> > > Also try @code_warntype on those functions.
> > >
> > > If these don't work, can you paste in a version of your code that
> doesn't
> > > have
> > > all the malloc markup?
> > >
> > > --Tim
> > >
> > > On Monday, April 20, 2015 10:05:01 AM Peter Brady wrote:
> > > > The body of the test_unsafe function is:
> > > > - function test_unsafe(n, time=true)
> > > > 0 a = zeros(Int, (320, 320, 320))
> > > > -
> > > > - # warmup
> > > > 0 setk_UnSafe(a);
> > > > -
> > > > 0 Profile.clear_malloc_data()
> > > > -
> > > > 0 for i=1:n
> > > > -
> > > > 0 setk_UnSafe(a)
> > > > -
> > > > - end
> > > > -
> > > > 0 a
> > > > - end
> > > >
> > > > So I clear the malloc data after running my expensive function once
> and
> > > > then run it a second time. There is no difference in the .mem file
> if I
> > > > call Profile.clear_malloc_data and then call test_unsafe a second
> time.
> > > >
> > > > You are right about the bug but that line of code is never called in
> > >
> > > these
> > >
> > > > tests. Correcting that line did not change the allocation patterns.
> > > >
> > > > On Monday, April 20, 2015 at 10:54:49 AM UTC-6, Tim Holy wrote:
> > > > > First, you need to run it twice, see
> > >
> > >
> http://docs.julialang.org/en/latest/manual/profile/#memory-allocation-anal
> > >
> > > > > ysis and the part about clear_malloc_data.
> > > > >
> > > > > Second, I think you have a bug:
> > > > > size(s::UnsafeSlice) = size(s.size)
> > > > >
> > > > > should presumably be
> > > > >
> > > > > size(s::UnsafeSlice) = s.size
> > > > >
> > > > > --Tim
> > > > >
> > > > > On Monday, April 20, 2015 09:35:33 AM Peter Brady wrote:
> > > > > > Here's the results of running --track-allocation=user
> --inline=no on
> > > > >
> > > > > 0.4.
> > > > >
> > > > > > Note that I also deleted all the macros which were affecting
> the
> > > > >
> > > > > reported
> > > > >
> > > > > > line numbers.
> > > > > >
> > > > > > I have three questions base on the data below:
> > > > > > 1. Why is the call to size which indexes into a tuple so
> expensive
> > > > > > 2. Why is setindex! so expensive?
> > > > > > 3. Why is it so expensive to update the 'start' attribute of my
> > > > > > unsafeslice?
> > > > > > Does anyone have any answers or any suggestions on what tools to
> use
> > >
> > > to
> > >
> > > > > > find the answers?
> > > > > >
> > > > > > Here's my session:
> > > > > >
> > > > > >
> > > > > > $ ~/gitrepos/julia0.4/julia --track-allocation=user --inline=no
> > > > > >
> > > > > > _
> > > > > >
> > > > > > _ _ _(_)_ | A fresh approach to technical
> computing
> > > > > >
> > > > > > (_) | (_) (_) | Documentation:
> http://docs.julialang.org
> > > > > >
> > > > > > _ _ _| |_ __ _ | Type "help()" for help.
> > > > > >
> > > > > > | | | | | | |/ _` | |
> > > > > > | | |
> > > > > > | | |_| | | | (_| | | Version 0.4.0-dev+4385 (2015-04-20
> 14:52
> > >
> > > UTC)
> > >
> > > > > > _/ |\__'_|_|_|\__'_| | Commit 5499882 (0 days old master)
> > > > > >
> > > > > > |__/ | x86_64-redhat-linux
> > > > > >
> > > > > > julia> include("test_alloc.jl")
> > > > > > test_unsafe (generic function with 2 methods)
> > > > > >
> > > > > >
> > > > > > julia> test_unsafe(1);
> > > > > >
> > > > > >
> > > > > > julia>
> > > > > > [ptb@cyrus julia]$
> > > > > >
> > > > > >
> > > > > > And the output
> > > > > >
> > > > > > - using ArrayViews
> > > > > >
> > > > > > - import Base: size, getindex, setindex!, ndims, start,
> > >
> > > stride,
> > >
> > > > > > pointer
> > > > > >
> > > > > > -
> > > > > > - type UnsafeSlice{T,N, P<:Ptr} <: AbstractArray
> > > > > > - start::Int
> > > > > > - stride::Int
> > > > > > - size::NTuple{N,Int}
> > > > > > - p::P
> > > > > > - end
> > > > > > -
> > > > > > - size(s::UnsafeSlice) = size(s.size)
> > > > > > -
> > > > > >
> > > > > > 7356448 size(s::UnsafeSlice, i::Int) = s.size[i]
> > > > > >
> > > > > > -
> > > > > > - ndims{T,N}(s::UnsafeSlice{T,N}) = N
> > > > > > -
> > > > > > - getindex(s::UnsafeSlice, i::Int) = unsafe_load(s.p,
> > > > >
> > > > > s.start+(i-1)*
> > > > >
> > > > > > s.stride)
> > > > > >
> > > > > > -
> > > > > >
> > > > > > 1048559648 setindex!(s::UnsafeSlice, x, i::Int) =
> unsafe_store!(s.p,
> > >
> > > x,
> > >
> > > > > s.
> > > > >
> > > > > > start+(i-1)*s.stride)
> > > > > >
> > > > > > -
> > > > > > - function UnsafeSlice(a, slicedim::Int, start=1)
> > > > > > 0 p = pointer(a)
> > > > > > -
> > > > > > 0 str = stride(a, slicedim)
> > > > > > -
> > > > > >
> > > > > > 368 UnsafeSlice{eltype(a), ndims(a), typeof(p)}(start,
> > >
> > > str,
> > >
> > > > > size(a
> > > > >
> > > > > > ),p)
> > > > > >
> > > > > > - end
> > > > > > -
> > > > > > - function update(a::UnsafeSlice, idx, off)
> > > > > > -
> > > > > > 0 for i=1:size(a, idx)
> > > > > > -
> > > > > > 0 a[i] = -10*off+i
> > > > > > - end
> > > > > > -
> > > > > > 0 a
> > > > > > - end
> > > > > > -
> > > > > > - function setk_UnSafe{T}(a::Array{T,3})
> > > > > > 0 us = UnsafeSlice(a, 3)
> > > > > > -
> > > > > > 0 for j=1:size(a,2),i=1:size(a,1)
> > > > > > -
> > > > > >
> > > > > > 14712896 us.start = sub2ind(size(a), i, j, 1)
> > > > > >
> > > > > > -
> > > > > > 0 update(us, 3, us.start)
> > > > > > - end
> > > > > > -
> > > > > > 0 a
> > > > > > - end
> > > > > > -
> > > > > > - function test_unsafe(n, time=true)
> > > > > > 0 a = zeros(Int, (320, 320, 320))
> > > > > > -
> > > > > > - # warmup
> > > > > > 0 setk_UnSafe(a);
> > > > > > -
> > > > > > 0 Profile.clear_malloc_data()
> > > > > > -
> > > > > > 0 for i=1:n
> > > > > > -
> > > > > > 0 setk_UnSafe(a)
> > > > > > -
> > > > > > - end
> > > > > > -
> > > > > > 0 a
> > > > > > - end
> > > > > > -
> > > > > >
> > > > > > On Monday, April 20, 2015 at 9:04:41 AM UTC-6, Peter Brady
> wrote:
> > > > > > > Accidentally hit reply instead of reply-all. Sorry for the
> double
> > > > >
> > > > > post.
> > > > >
> > > > > > > Ran my script in 0.4 and got these results...
> > > > > > >
> > > > > > > julia> test_all(5)
> > > > > > > test_stride
> > > > > > > elapsed time: 2.008043041 seconds (0 bytes allocated)
> > > > > > > test_view
> > > > > > > elapsed time: 8.871387399 seconds (42 MB allocated, 0.23% gc
> time
> > >
> > > in 2
> > >
> > > > > > > pauses with 1 full sweep)
> > > > > > > test_unsafe
> > > > > > > elapsed time: 2.308598574 seconds (46 MB allocated, 0.68% gc
> time
> > >
> > > in 2
> > >
> > > > > > > pauses with 1 full sweep)
> > > > > > > test_unsafeview
> > > > > > > elapsed time: 9.106651158 seconds (0 bytes allocated)
> > > > > > >
> > > > > > > julia> test_all(10)
> > > > > > > test_stride
> > > > > > > elapsed time: 4.012240175 seconds (0 bytes allocated)
> > > > > > > test_view
> > > > > > > elapsed time: 18.085514211 seconds (85 MB allocated, 0.16% gc
> time
> > >
> > > in
> > >
> > > > > 4
> > > > >
> > > > > > > pauses with 1 full sweep)
> > > > > > > test_unsafe
> > > > > > > elapsed time: 4.477773618 seconds (93 MB allocated, 1.12% gc
> time
> > >
> > > in 4
> > >
> > > > > > > pauses with 1 full sweep)
> > > > > > > test_unsafeview
> > > > > > > elapsed time: 18.146163969 seconds (0 bytes allocated)
> > > > > > >
> > > > > > > So the allocation for the new unsafeview has been reduced to
> zero
> > >
> > > but
> > >
> > > > > it
> > > > >
> > > > > > > has become slower than the regular view.
> > > > > > >
> > > > > > > Perhaps the compiler optimizations that have been discussed
> here
> > >
> > > are
> > >
> > > > > > > occuring since the only occurence of 'unsafeview' is the
> argument
> > >
> > > to a
> > >
> > > > > > > function.
> > > > > > >
> > > > > > > On Mon, Apr 20, 2015 at 12:57 AM, René Donner <[email protected]
> > > > >
> > > > > <javascript:>> wrote:
> > > > > > >> What about something like unsafe_updateview!(view,
> indices...) ?
> > > > > > >>
> > > > > > >> It could be used like this (pseudocode):
> > > > > > >> view = unsafe_view(data, 1, 1, :) # to construct /
> allocate
> > > > > > >> for i in ..., j in ...
> > > > > > >>
> > > > > > >> unsafe_updateview!(view, i, j, :)
> > > > > > >> # use view
> > > > > > >>
> > > > > > >> end
> > > > > > >>
> > > > > > >> In the trivial case of unsafe_view(data, :, :, i) this would
> boil
> > > > >
> > > > > down to
> > > > >
> > > > > > >> a single pointer update. Of course passing around these views
> > >
> > > outside
> > >
> > > > > of
> > > > >
> > > > > > >> their scope is rather discouraged. I use this pattern a lot
> and
> > >
> > > it
> > >
> > > > > proved
> > > > >
> > > > > > >> to be very handy / fast.
> > > > > > >>
> > > > > > >> Am 20.04.2015 um 02:08 schrieb Dahua Lin <[email protected]
> > > > >
> > > > > <javascript:>>:
> > > > > > >> > My benchmark shows that element indexing has been as fast
> as it
> > >
> > > can
> > >
> > > > > be
> > > > >
> > > > > > >> for array views (or subarrays in Julia 0.4).
> > > > > > >>
> > > > > > >> > Now the problem is actually the construction of
> > >
> > > views/subarrays. To
> > >
> > > > > > >> optimize the overhead of this part, the compiler may need to
> > > > >
> > > > > introduce
> > > > >
> > > > > > >> additional optimization.
> > > > > > >>
> > > > > > >> > Dahua
> > > > > > >> >
> > > > > > >> >
> > > > > > >> > On Monday, April 20, 2015 at 6:39:35 AM UTC+8, Sebastian
> Good
> > > > >
> > > > > wrote:
> > > > > > >> > —track-allocation still requires guesswork, as
> optimizations
> > >
> > > can
> > >
> > > > > move
> > > > >
> > > > > > >> the allocation to a different place than you would expect.
> > > > > > >>
> > > > > > >> > On April 19, 2015 at 4:36:19 PM, Peter Brady (
> > >
> > > [email protected])
> > >
> > > > > > >> wrote:
> > > > > > >> >> So I discovered the --track-allocation option and now I am
> > >
> > > really
> > >
> > > > > > >> confused:
> > > > > > >> >> Here's my session:
> > > > > > >> >>
> > > > > > >> >> $ julia --track-allocation=all
> > > > > > >> >>
> > > > > > >> >> _
> > > > > > >> >>
> > > > > > >> >> _ _ _(_)_ | A fresh approach to technical
> > >
> > > computing
> > >
> > > > > > >> >> (_) | (_) (_) | Documentation:
> > > http://docs.julialang.org
> > >
> > > > > > >> >> _ _ _| |_ __ _ | Type "help()" for help.
> > > > > > >> >>
> > > > > > >> >> | | | | | | |/ _` | |
> > > > > > >> >> | | |
> > > > > > >> >> | | |_| | | | (_| | | Version 0.3.8-pre+13 (2015-04-17
> > >
> > > 18:08
> > >
> > > > > UTC)
> > > > >
> > > > > > >> >> _/ |\__'_|_|_|\__'_| | Commit 0df962d* (2 days old
> > >
> > > release-0.3)
> > >
> > > > > > >> >> |__/ | x86_64-redhat-linux
> > > > > > >> >>
> > > > > > >> >> julia> include("test.jl")
> > > > > > >> >> test_all (generic function with 1 method)
> > > > > > >> >>
> > > > > > >> >> julia> test_unsafe(5)
> > > > > > >> >>
> > > > > > >> >> And here's the relevant part of the resulting test.jl.mem
> > >
> > > file.
> > >
> > > > > Note
> > > > >
> > > > > > >> that I commented out some calls to 'size' and replaced with
> the
> > > > > > >> appropriate
> > > > > > >> hard-coded values but the resulting allocation is the same...
> Can
> > > > >
> > > > > anyone
> > > > >
> > > > > > >> shed some light on this while I wait for 0.4 to compile?
> > > > > > >>
> > > > > > >> >> - function update(a::AbstractArray, idx, off)
> > > > > > >> >>
> > > > > > >> >> 8151120 for i=1:320 #size(a, idx)
> > > > > > >> >>
> > > > > > >> >> 0 a[i] = -10*off+i
> > > > > > >> >> - end
> > > > > > >> >> 0 a
> > > > > > >> >> - end
> > > > > > >> >> -
> > > > > > >> >>
> > > > > > >> >> - function setk_UnSafe{T}(a::Array{T,3})
> > > > > > >> >>
> > > > > > >> >> 760 us = UnsafeSlice(a, 3)
> > > > > > >> >>
> > > > > > >> >> 0 for j=1:size(a,2),i=1:size(a,1)
> > > > > > >> >>
> > > > > > >> >> 8151120 us.start = (j-1)*320+i #size(a,1)+i
> > > > > > >> >>
> > > > > > >> >> - #off = sub2ind(size(a), i, j, 1)
> > > > > > >> >> 0 update(us, 3, us.start)
> > > > > > >> >> - end
> > > > > > >> >> 0 a
> > > > > > >> >> - end
> > > > > > >> >> - function test_unsafe(n)
> > > > > > >> >> 0 a = zeros(Int, (320, 320, 320))
> > > > > > >> >> - # warmup
> > > > > > >> >> 0 setk_UnSafe(a);
> > > > > > >> >> 0 clear_malloc_data()
> > > > > > >> >> - #@time (
> > > > > > >> >> 0 for i=1:n; setk_UnSafe(a); end
> > > > > > >> >> - end
> > > > > > >> >>
> > > > > > >> >> On Sunday, April 19, 2015 at 2:21:56 PM UTC-6, Peter Brady
> > >
> > > wrote:
> > > > > > >> >> @Dahua, thanks for adding an unsafeview! I appreciate how
> > >
> > > quickly
> > >
> > > > > > >> this community responds.
> > > > > > >>
> > > > > > >> >> I've added the following function to my test.jl script
> > > > > > >> >> function setk_unsafeview{T}(a::Array{T,3})
> > > > > > >> >>
> > > > > > >> >> for j=1:size(a,2),i=1:size(a,1)
> > > > > > >> >>
> > > > > > >> >> off = sub2ind(size(a), i, j, 1)
> > > > > > >> >> update(unsafe_view(a, i, j, :), 3, off)
> > > > > > >> >>
> > > > > > >> >> end
> > > > > > >> >> a
> > > > > > >> >>
> > > > > > >> >> end
> > > > > > >> >>
> > > > > > >> >> But I'm not seeing the large increase in performance I
> was
> > > > > > >>
> > > > > > >> expecting. My timings are now
> > > > > > >>
> > > > > > >> >> julia> test_all(5);
> > > > > > >> >> test_stride
> > > > > > >> >> elapsed time: 2.156173128 seconds (0 bytes allocated)
> > > > > > >> >> test_view
> > > > > > >> >> elapsed time: 9.30964534 seconds (94208000 bytes
> allocated,
> > >
> > > 0.47%
> > >
> > > > > gc
> > > > >
> > > > > > >> time)
> > > > > > >>
> > > > > > >> >> test_unsafe
> > > > > > >> >> elapsed time: 2.169307471 seconds (16303000 bytes
> allocated)
> > > > > > >> >> test_unsafeview
> > > > > > >> >> elapsed time: 8.955876793 seconds (90112000 bytes
> allocated,
> > >
> > > 0.41%
> > >
> > > > > gc
> > > > >
> > > > > > >> time)
> > > > > > >>
> > > > > > >> >> To be fair, I am cheating a bit with my custom
> 'UnsafeSlice'
> > >
> > > since
> > >
> > > > > I
> > > > >
> > > > > > >> make only one instance and simply update the offset on each
> > > > >
> > > > > iteration.
> > > > >
> > > > > > >> If
> > > > > > >> I make it immutable and create a new instance on every
> iteration
> > >
> > > (as
> > >
> > > > > I do
> > > > >
> > > > > > >> for the view and unsafeview), things slow down a little and
> the
> > > > > > >> allocation
> > > > > > >>
> > > > > > >> goes south:
> > > > > > >> >> julia> test_all(5);
> > > > > > >> >> test_stride
> > > > > > >> >> elapsed time: 2.159909265 seconds (0 bytes allocated)
> > > > > > >> >> test_view
> > > > > > >> >> elapsed time: 9.029025282 seconds (94208000 bytes
> allocated,
> > >
> > > 0.43%
> > >
> > > > > gc
> > > > >
> > > > > > >> time)
> > > > > > >>
> > > > > > >> >> test_unsafe
> > > > > > >> >> elapsed time: 2.621667854 seconds (114606240 bytes
> allocated,
> > > > >
> > > > > 2.41% gc
> > > > >
> > > > > > >> time)
> > > > > > >>
> > > > > > >> >> test_unsafeview
> > > > > > >> >> elapsed time: 8.888434466 seconds (90112000 bytes
> allocated,
> > >
> > > 0.44%
> > >
> > > > > gc
> > > > >
> > > > > > >> time)
> > > > > > >>
> > > > > > >> >> These are all with 0.3.8-pre. I'll try compiling master
> and
> > >
> > > see
> > >
> > > > > what
> > > > >
> > > > > > >> happens. I'm still confused about why allocating a single
> type
> > >
> > > with
> > >
> > > > > a
> > > > >
> > > > > > >> pointer, 2 ints and a tuple costs so much memory though.
> > > > > > >>
> > > > > > >> >> On Sunday, April 19, 2015 at 11:38:17 AM UTC-6, Tim Holy
> > >
> > > wrote:
> > > > > > >> >> It's not just escape analysis, as this (new) issue
> > >
> > > demonstrates:
> > > > > > >> >> https://github.com/JuliaLang/julia/issues/10899
> > > > > > >> >>
> > > > > > >> >> --Tim
> > > > > > >> >>
> > > > > > >> >> On Sunday, April 19, 2015 12:33:51 PM Sebastian Good
> wrote:
> > > > > > >> >> > Their size seems much decreased. I’d imagine to totally
> > >
> > > avoid
> > >
> > > > > > >> allocation in
> > > > > > >>
> > > > > > >> >> > this benchmark requires an optimization that really has
> > >
> > > nothing
> > >
> > > > > to
> > > > >
> > > > > > >> do with
> > > > > > >>
> > > > > > >> >> > subarrays per se. You’d have to do an escape analysis
> and
> > >
> > > see
> > >
> > > > > that
> > > > >
> > > > > > >> Aj never
> > > > > > >>
> > > > > > >> >> > left sumcols. Not easy in practice, since it’s passed to
> > >
> > > slice
> > >
> > > > > and
> > > > >
> > > > > > >> length,
> > > > > > >>
> > > > > > >> >> > and you’d have to make sure they didn’t squirrel it away
> or
> > >
> > > pass
> > >
> > > > > it
> > > > >
> > > > > > >> on to
> > > > > > >>
> > > > > > >> >> > someone else. Then you could stack allocate it, or even
> > > > >
> > > > > destructure
> > > > >
> > > > > > >> it into
> > > > > > >>
> > > > > > >> >> > a bunch of scalar mutations on the stack. After
> eliminating
> > >
> > > dead
> > >
> > > > > > >> code,
> > > > > > >>
> > > > > > >> >> > you’d end up with a no-allocation loop much like you’d
> write
> > >
> > > by
> > >
> > > > > > >> hand. This
> > > > > > >>
> > > > > > >> >> > sort of optimization seems to be quite tricky for
> compilers
> > >
> > > to
> > >
> > > > > pull
> > > > >
> > > > > > >> off,
> > > > > > >>
> > > > > > >> >> > but it’s a common pattern in numerical code.
> > > > > > >> >> >
> > > > > > >> >> > In Julia is such cleverness left entirely to LLVM, or
> are
> > >
> > > there
> > >
> > > > > > >> optimization
> > > > > > >>
> > > > > > >> >> > passes in Julia itself? On April 19, 2015 at 6:49:21 AM,
> Tim
> > > > >
> > > > > Holy
> > > > >
> > > > > > >> >> > ([email protected]) wrote:
> > > > > > >> >> >
> > > > > > >> >> > Sorry to be slow to chime in here, but the tuple
> overhaul
> > >
> > > has
> > >
> > > > > landed
> > > > >
> > > > > > >> and
> > > > > > >>
> > > > > > >> >> > they are still not zero-cost:
> > > > > > >> >> >
> > > > > > >> >> > function sumcols(A)
> > > > > > >> >> > s = 0.0
> > > > > > >> >> > for j = 1:size(A,2)
> > > > > > >> >> > Aj = slice(A, :, j)
> > > > > > >> >> > for i = 1:length(Aj)
> > > > > > >> >> > s += Aj[i]
> > > > > > >> >> > end
> > > > > > >> >> > end
> > > > > > >> >> > s
> > > > > > >> >> > end
> > > > > > >> >> >
> > > > > > >> >> > Even in the latest 0.4, this still allocates memory. On
> the
> > > > >
> > > > > other
> > > > >
> > > > > > >> hand,
> > > > > > >>
> > > > > > >> >> > while SubArrays allocate nearly 2x more memory than
> > >
> > > ArrayViews,
> > >
> > > > > the
> > > > >
> > > > > > >> speed
> > > > > > >>
> > > > > > >> >> > of the two (replacing `slice` with `view` above) is, for
> me,
> > > > >
> > > > > nearly
> > > > >
> > > > > > >> >> > identical.
> > > > > > >> >> >
> > > > > > >> >> > --Tim
> > > > > > >> >> >
> > > > > > >> >> > On Friday, April 17, 2015 08:30:27 PM Sebastian Good
> wrote:
> > > > > > >> >> > > This was discussed a few weeks ago
> > >
> > > https://groups.google.com/d/msg/julia-users/IxrvV8ABZoQ/uWZu5-IB3McJ
> > >
> > > > > > >> >> > > I think the bottom line is that the current
> implementation
> > > > > > >>
> > > > > > >> *should* be
> > > > > > >>
> > > > > > >> >> > > 'zero-cost' once a set of planned improvements and
> > > > >
> > > > > optimizations
> > > > >
> > > > > > >> take
> > > > > > >>
> > > > > > >> >> > > place. One of the key ones is a tuple overhaul.
> > > > > > >> >> > >
> > > > > > >> >> > > Fair to say it can never be 'zero' cost since there is
> > > > >
> > > > > different
> > > > >
> > > > > > >> inherent
> > > > > > >>
> > > > > > >> >> > > overhead depending on the type of subarray, e.g.
> offset,
> > > > >
> > > > > slice,
> > > > >
> > > > > > >> >> > > re-dimension, etc. however the implementation is quite
> > >
> > > clever
> > >
> > > > > > >> >> > > about
> > > > > > >> >> > > allowing specialization of those.
> > > > > > >> >> > >
> > > > > > >> >> > > In a common case (e.g. a constant offset or simple
> stride)
> > >
> > > my
> > >
> > > > > > >> >> > > understanding
> > > > > > >> >> > > is that the structure will be type-specialized and
> likely
> > > > >
> > > > > stack
> > > > >
> > > > > > >> allocated
> > > > > > >>
> > > > > > >> >> > > in many cases, reducing to what you'd write by hand.
> At
> > >
> > > least
> > >
> > > > > this
> > > > >
> > > > > > >> is what
> > > > > > >>
> > > > > > >> >> > > they're after.
> > > > > > >> >> > >
> > > > > > >> >> > > On Friday, April 17, 2015 at 4:24:14 PM UTC-4, Peter
> Brady
> > > > >
> > > > > wrote:
> > > > > > >> >> > > > Thanks for the links. I'll check out ArrayViews as
> it
> > >
> > > looks
> > >
> > > > > like
> > > > >
> > > > > > >> what I
> > > > > > >>
> > > > > > >> >> > > > was going to do manually without wrapping it in a
> type.
> > > > > > >> >> > > >
> > > > > > >> >> > > > By semi-dim agnostic I meant that the differencing
> > >
> > > algorithm
> > >
> > > > > > >> itself only
> > > > > > >>
> > > > > > >> >> > > > cares about one dimension but that dimension is
> > >
> > > different
> > >
> > > > > for
> > > > >
> > > > > > >> different
> > > > > > >>
> > > > > > >> >> > > > directions. Only a few toplevel routines actually
> need
> > >
> > > to
> > >
> > > > > know
> > > > >
> > > > > > >> about the
> > > > > > >>
> > > > > > >> >> > > > dimensionality of the problem.
> > > > > > >> >> > > >
> > > > > > >> >> > > > On Friday, April 17, 2015 at 2:04:39 PM UTC-6, René
> > >
> > > Donner
> > >
> > > > > wrote:
> > > > > > >> >> > > >> As far as I have measured it sub in 0.4 is still
> not
> > >
> > > cheap,
> > >
> > > > > as
> > > > >
> > > > > > >> it
> > > > > > >>
> > > > > > >> >> > > >> provides the flexibility to deal with all kinds of
> > >
> > > strides
> > >
> > > > > and
> > > > >
> > > > > > >> offsets,
> > > > > > >>
> > > > > > >> >> > > >> and
> > > > > > >> >> > > >> the view object itself thus has a certain size. See
> > > > > > >> >> > > >>
> https://github.com/rened/FunctionalData.jl#efficiency
> > >
> > > for
> > >
> > > > > a
> > > > >
> > > > > > >> simple
> > > > > > >>
> > > > > > >> >> > > >> analysis, where the speed is mostly dominated by
> the
> > >
> > > speed
> > >
> > > > > of
> > > > >
> > > > > > >> the
> > > > > > >>
> > > > > > >> >> > > >> "sub-view" mechanism.
> > > > > > >> >> > > >>
> > > > > > >> >> > > >> To get faster views which require strides etc you
> can
> > >
> > > try
> > >
> > > > > > >> >> > > >> https://github.com/JuliaLang/ArrayViews.jl
> > > > > > >> >> > > >>
> > > > > > >> >> > > >> What do you mean by semi-dim agnostic? In case you
> only
> > > > >
> > > > > need
> > > > >
> > > > > > >> indexing
> > > > > > >>
> > > > > > >> >> > > >> along the last dimension (like a[:,:,i] and
> a[:,:,:,i])
> > >
> > > you
> > >
> > > > > can
> > > > >
> > > > > > >> use
> > >
> > > https://github.com/rened/FunctionalData.jl#efficient-views-details
> > >
> > > > > > >> >> > > >> which uses normal DenseArrays and simple pointer
> > >
> > > updates
> > >
> > > > > > >> internally. It
> > > > > > >>
> > > > > > >> >> > > >> can also update a view in-place, by just
> incrementing
> > >
> > > the
> > >
> > > > > > >> pointer.
> > > > > > >>
> > > > > > >> >> > > >> Am 17.04.2015 um 21:48 schrieb Peter Brady <
> > > > >
> > > > > [email protected]
> > > > >
> > > > > > >> >> > > >> > Inorder to write some differencing algorithms in
> a
> > > > > > >>
> > > > > > >> semi-dimensional
> > > > > > >>
> > > > > > >> >> > > >> agnostic manner the code I've written makes heavy
> use
> > >
> > > of
> > >
> > > > > > >> subarrays
> > > > > > >>
> > > > > > >> >> > > >> which
> > > > > > >> >> > > >> turn out to be rather costly. I've noticed some
> posts
> > >
> > > on
> > >
> > > > > the
> > > > >
> > > > > > >> cost of
> > > > > > >>
> > > > > > >> >> > > >> subarrays here and that things will be better in
> 0.4.
> > >
> > > Can
> > >
> > > > > > >> someone
> > > > > > >>
> > > > > > >> >> > > >> comment
> > > > > > >> >> > > >> on how much better? Would subarray (or anything
> like
> > >
> > > it) be
> > >
> > > > > on
> > > > >
> > > > > > >> par with
> > > > > > >>
> > > > > > >> >> > > >> simply passing an offset and stride (constant) and
> > > > >
> > > > > computing
> > > > >
> > > > > > >> the index
> > > > > > >>
> > > > > > >> >> > > >> myself? I'm currently using the 0.3 release branch.
>
>