When using SharedArrays with pmap, I'm getting an increase in memory usage 
and time proportional to the number of tasks. This doesn't happen when 
using @parallel. What's the right way to pass shared arrays to workers 
using functional syntax?

(code for file q3.jl pasted below and also attached; the first timing 
result refers to a @parallel implementation, the second to a pmap-style 
implementation)

ᐅ julia -p 10 q3.jl 100
elapsed time: 1.14932906 seconds (12402424 bytes allocated)
elapsed time: 0.097900614 seconds (2716048 bytes allocated)
ᐅ julia -p 10 q3.jl 1000
elapsed time: 1.140016584 seconds (12390724 bytes allocated)
elapsed time: 0.302179888 seconds (21641260 bytes allocated)
ᐅ julia -p 10 q3.jl 10000
elapsed time: 1.173121314 seconds (12402424 bytes allocated)
elapsed time: 2.429918636 seconds (197840960 bytes allocated)

n = int(ARGS[1])
arr = randn(n)
function make_shared(a::AbstractArray,pids=workers())
    sh = SharedArray(typeof(a[1]),size(a),pids=pids)
    sh[:] = a[:]
    return sh
end
arr = make_shared(arr)
tasks = 1:n

@time begin
@parallel (+) for i in tasks
arr[i]
end
end

@everywhere function f(task,arr)
arr[task]
end
function mypmap(f::Function, tasks, arr)
    # if this resends the shared data every time, it shouldn't)
    np = nprocs()  # determine the number of processes available
    n = length(tasks)
    results = 0
    i = 1
    # function to produce the next work item from the queue.
    # in this case it's just an index.
    nextidx() = (idx=i; i+=1; idx)
    @sync begin
        for p=1:np
            if p != myid() || np == 1
                @async begin
                    while true
                        idx = nextidx()
                        if idx > n
                            break
                        end
                        task = tasks[idx]
                        results += remotecall_fetch(p, f, task, arr)
                    end
                end
            end
        end
    end
    results
end

@time mypmap(f,tasks,arr)

Attachment: q3.jl
Description: Binary data

Reply via email to