An addition/answer from my side:

I think pmap does indeed "send" the data to the process each time (my code 
below seems to indicate this). I suppose pmap cannot know/assume that it is 
constant.

The sendto functions help my use case!

addprocs(3);

@everywhere function do_something{T<:Number}(arr::Array{T,2})
return arr[end]
end

@everywhere function do_work(i,arr)
x=do_something(arr)
println("done $(i). $(now())")
return x
end

function sendto(ps::Vector{Int}; args...)
  for p in ps
    sendto(p; args...)
  end
end

function sendto(p::Int; args...)
      for (nm, val) in args
          @spawnat(p, eval(Main, Expr(:(=), nm, val)))
      end
end

function something(nn,cols,reps)
srand(2)
data=rand(nn,cols) 
@time sendto(workers(),local_data=data) 
@time myres=pmap(i->do_work(i,local_data),1:reps) 
return sum(myres)
end

function something_wo_sendto(nn,cols,reps) 
srand(2)
data=rand(nn,cols)
myres=pmap(i->do_work(i,data),1:reps) 
return sum(myres)
end

nn=200000
cols=200
reps=30 
@time x1=something(nn,cols,reps)
@time x2=something_wo_sendto(nn,cols,reps)
x1==x2


Am Freitag, 13. November 2015 12:56:49 UTC+1 schrieb bernhard:
>
> Hi 
>
> I was wondering how pmap handles data transfer between processes:
>
> pmap(i->foo(some_settings[i],bigDataFrameOrSo),1:800)
>
> Say I have a big array or DataFrame (or anything) which is used by each 
> iteration. Will it only be "sent" to each process only once?
> If not, it may make sense to approach this differently (by invoking a 
> process which runs from 1:100 and another  process going from 101:200, 
> ....).
>
> From the timings I observed, I think that Julia realizes, that 
> bigDataFrameOrSo 
> is the same for each iteration, but I though asking would not hurt.
>
> Thanks
> Bernhard
>

Reply via email to