Below is part of my code, I wrote it with FFTW.set_num_threads() to get
parallel fft calculation. However, the for loops "for eachindex" are just
serial. Is it possible to rewrite the code in a parallel way? I don't know
how to implement it with @parallel, SharedArray or the simple @spawn
features. To me, the documentations is far from clear.
using JLD
function ground(ϕ0::Array{Complex{Float64},2}, dx::Float64
, dt::Float64)
FFTW.set_num_threads(CPU_CORES)
ns = size( ϕ0, 1)
x = get_x(ns, dx)
p = get_p(ns, dx)
##### FFT plan
p_fft! = plan_fft!( similar(ϕ0), flags=FFTW.MEASURE )
prop_x = Array( Float64, ns , ns)
prop_p = similar( prop_x )
@inbounds for j in 1:ns
@inbounds for i in 1:ns
prop_x[i, j] = exp( -get_potential(x[i], x[j]) * dt / 2 )
prop_p[i, j] = exp( -(p[i]^2 + p[j]^2)/2.0 * dt )
end
end
normϕ = √(sumabs2(ϕ0)) * dx
scale!( ϕ0, 1 / normϕ )
ϕ2 = similar( ϕ0 )
Δϕ = Array(Float64, 0)
push!(Δϕ, 1.0)
nn = 0
while Δϕ[1] > 1.e-15
for j in eachindex(ϕ2)
ϕ2[j] = ϕ0[j] * prop_x[j]
end
p_fft! * ϕ2
for j in eachindex(ϕ2)
ϕ2[j] *= prop_p[j]
end
p_fft! \ ϕ2
for j in eachindex(ϕ2)
ϕ2[j] *= prop_x[j]
end
normϕ = √(sumabs2(ϕ2)) * dx
scale!( ϕ2, 1 / normϕ )
nn += 1
empty!(Δϕ)
push!(Δϕ, maxabs( ϕ2 - ϕ0 ))
ϕ0, ϕ2 = ϕ2, ϕ0
end
save("data/gs.jld", "ϕ", ϕ0)
ϕ0
end