Hi all, I'm trying to figure out how to best initialize a SharedArray,
using a C function to fill it up that computes a huge matrix in parts, and
all comments are appreciated. To summarise: Is A, making an empty shared
array, computing the matrix in parallel using pmap and then filling it up
serially, better than using B, computing in parallel and storing in one
step by using an init function in the SharedArray declaration?
The difference tends to be that B uses a lot more memory, each process
using the exact same amount of memory. However it is much faster than A, as
the copy step takes longer than the computation, but in A most of the
memory usage is in one process, using less memory overall.
Any tips on how to do this better? Also, this pmap is how I'm handling more
complex paralellizations in Julia. Any comments on that approach?
Thanks a lot!
Best,
Ben
CODE A:
Is this, making an empty shared array, computing the matrix in parallel and
then filling it up serially:
function findZeroDividends(model::ModelPrivate)
nW = length(model.vW)
nZ = length(model.vZ)
nK = length(model.vK)
nQ = length(model.vQ)
zeroMatrix = SharedArray(Float64,(nW,nZ,nK,nQ,nQ,nQ),pids=workers())
input = [stateFindZeroK(w,z,k,model) for w in 1:nW, z in 1:nZ, k in 1:nK];
results = pmap(findZeroInC,input);
for w in 1:nW
for z in 1:nZ
for k in 1:nK
zeroMatrix[w,z,k,:,:,:] = results[w + nW*((z-1) + nZ*(k-1))]
end
end
end
return zeroMatrix
end
_______________________
CODE B:
Better than these two:
function
start(x::SharedArray,nW::Int64,nZ::Int64,nK::Int64,model::ModelPrivate)
for j in myid()-1:nworkers():(nW*nZ*nK)
inds = ind2sub((nW,nZ,nK),j)
x[inds[1],inds[2],inds[3],:,:,:]
=findZeroInC(stateFindZeroK(inds[1],inds[2],inds[3],model))
end
x
end
function findZeroDividendsSmart(model::ModelPrivate)
nW = length(model.vW)
nZ = length(model.vZ)
nK = length(model.vK)
nQ = length(model.vQ)
#input = [stateFindZeroK(w,z,k,model) for w in 1:nW, z in 1:nZ, k in 1:nK];
#results = pmap(findZeroInC,input);
zeroMatrix = SharedArray(Float64,(nW,nZ,nK,nQ,nQ,nQ),pids=workers(), init =
x->start(x,nW,nZ,nK,model) )
return zeroMatrix
end
________________________
The C function being called is inside this wrapper and returns the pointer
to double *capitalChoices = (double *)malloc(sizeof(double)*nQ*nQ*nQ);
function findZeroInC(state::stateFindZeroK)
w = state.wealth
z = state.z
k = state.k
model = state.model
#findZeroInC(double wealth, int z,int k, double theta, double delta,
double* vK,
# int nK, double* vQ, int nQ, double* transition, double betaGov)
nQ = length(model.vQ)
t = ccall((:findZeroInC,"findP.so"),
Ptr{Float64},(Float64,Int64,Int64,Float64,Float64,Ptr{Float64},Int64,Ptr{Float64},Int64,Ptr{Float64},Float64),
model.vW[w],z-1,k-1,model.theta,model.delta,model.vK,length(model.vK),model.vQ,nQ,model.transition,model.betaGov)
if t == C_NULL
error("NULL")
end
return pointer_to_array(t,(nQ,nQ,nQ),true)
end
<https://lh5.googleusercontent.com/-5rJqYh2oUqQ/VIIiFQUl2rI/AAAAAAAAAvM/gwAXG7N0Gxc/s1600/mem.png>