An addition/answer from my side:
I think pmap does indeed "send" the data to the process each time (my code
below seems to indicate this). I suppose pmap cannot know/assume that it is
constant.
The sendto functions help my use case!
addprocs(3);
@everywhere function do_something{T<:Number}(arr::Array{T,2})
return arr[end]
end
@everywhere function do_work(i,arr)
x=do_something(arr)
println("done $(i). $(now())")
return x
end
function sendto(ps::Vector{Int}; args...)
for p in ps
sendto(p; args...)
end
end
function sendto(p::Int; args...)
for (nm, val) in args
@spawnat(p, eval(Main, Expr(:(=), nm, val)))
end
end
function something(nn,cols,reps)
srand(2)
data=rand(nn,cols)
@time sendto(workers(),local_data=data)
@time myres=pmap(i->do_work(i,local_data),1:reps)
return sum(myres)
end
function something_wo_sendto(nn,cols,reps)
srand(2)
data=rand(nn,cols)
myres=pmap(i->do_work(i,data),1:reps)
return sum(myres)
end
nn=200000
cols=200
reps=30
@time x1=something(nn,cols,reps)
@time x2=something_wo_sendto(nn,cols,reps)
x1==x2
Am Freitag, 13. November 2015 12:56:49 UTC+1 schrieb bernhard:
>
> Hi
>
> I was wondering how pmap handles data transfer between processes:
>
> pmap(i->foo(some_settings[i],bigDataFrameOrSo),1:800)
>
> Say I have a big array or DataFrame (or anything) which is used by each
> iteration. Will it only be "sent" to each process only once?
> If not, it may make sense to approach this differently (by invoking a
> process which runs from 1:100 and another process going from 101:200,
> ....).
>
> From the timings I observed, I think that Julia realizes, that
> bigDataFrameOrSo
> is the same for each iteration, but I though asking would not hurt.
>
> Thanks
> Bernhard
>