Hello: I recently started exploring the parallel capabilities of Julia and 
I need some help in understanding and improving the performance a very 
elementary parallel code using DArrays (I use Julia 
version 0.4.0-dev+2431). The code pasted below (based essentially on 
plife.jl) solves u''(x) = 0, x \in [0,1] with u(0) and u(1) specified, 
using the 2nd order central difference approximation. The parallel version 
of the code runs significantly slower than the serial version. It would be 
nice if someone could point out ways to improve this and/or suggest an 
alternative efficient version.

function laplace_1D_serial(u::Array{Float64})
   N = length(u) - 2
   u_new = zeros(N)
   
   for i = 1:N
      u_new[i] = 0.5(u[i] + u[i + 2])
   end

   u_new
end

function serial_iterate(u::Array{Float64})
   u_new = laplace_1D_serial(u)
   
   for i = 1:length(u_new)
      u[i + 1] = u_new[i]
   end
end

function parallel_iterate(u::DArray)
   DArray(size(u), procs(u)) do I
      J = I[1]

      if myid() == 2
         local_array = zeros(length(J) + 1)
         for i = J[1] : J[end] + 1
            local_array[i - J[1] + 1] = u[i]
         end
         append!([float(u[1])], laplace_1D_serial(local_array))
      
      elseif myid() == length(procs(u)) + 1
         local_array = zeros(length(J) + 1)
         for i = J[1] - 1 : J[end]
            local_array[i - J[1] + 2] = u[i]
         end
         append!(laplace_1D_serial(local_array), [float(u[end])])
      
      else
         local_array = zeros(length(J) + 2)
         for i = J[1] - 1 : J[end] + 1
            local_array[i - J[1] + 2] = u[i]
         end
         laplace_1D_serial(local_array)

      end
   end
end

A sample run on my laptop with 4 processors:
julia> u = zeros(1000); u[end] = 1.0; u_distributed = distribute(u);

julia> @time for i = 1:1000
         serial_iterate(u)
       end
elapsed time: 0.011452192 seconds (8300112 bytes allocated)

julia> @time for i = 1:1000
         u_distributed = parallel_iterate(u_distributed)
       end
elapsed time: 4.461922218 seconds (190565036 bytes allocated, 10.17% gc 
time)

Thanks for your help!

Cheers,
Amuthan
 

Reply via email to