Below is part of my code, I wrote it with FFTW.set_num_threads() to get 
parallel fft calculation. However, the for loops "for eachindex" are just 
serial. Is it possible to rewrite the code in a parallel way? I don't know 
how to implement it with @parallel, SharedArray or the simple @spawn 
features. To me, the documentations is far from clear.


using JLD

function ground(ϕ0::Array{Complex{Float64},2}, dx::Float64
                , dt::Float64)

    FFTW.set_num_threads(CPU_CORES)

    ns = size( ϕ0, 1)
    x = get_x(ns, dx)
    p = get_p(ns, dx)

    ##### FFT plan
    p_fft! = plan_fft!( similar(ϕ0), flags=FFTW.MEASURE )

    prop_x = Array( Float64, ns , ns)
    prop_p = similar( prop_x )

    @inbounds for j in 1:ns
        @inbounds for i in 1:ns
            prop_x[i, j] = exp( -get_potential(x[i], x[j]) * dt / 2 )
            prop_p[i, j] = exp( -(p[i]^2 + p[j]^2)/2.0 * dt )
        end
    end

    normϕ = √(sumabs2(ϕ0)) * dx
    scale!( ϕ0, 1 / normϕ )

    ϕ2 = similar( ϕ0 )
    Δϕ = Array(Float64, 0)
    push!(Δϕ, 1.0)
    nn = 0
    while Δϕ[1] > 1.e-15
        for j in eachindex(ϕ2)
            ϕ2[j] = ϕ0[j] * prop_x[j]
        end

        p_fft! * ϕ2
        for j in eachindex(ϕ2)
            ϕ2[j] *= prop_p[j]
        end

        p_fft! \ ϕ2
        for j in eachindex(ϕ2)
            ϕ2[j] *= prop_x[j]
        end

        normϕ = √(sumabs2(ϕ2)) * dx
        scale!( ϕ2, 1 / normϕ )

        nn += 1
        
        empty!(Δϕ)
        push!(Δϕ, maxabs( ϕ2 - ϕ0 ))
        ϕ0, ϕ2 = ϕ2, ϕ0
    end

    save("data/gs.jld", "ϕ", ϕ0)

    ϕ0
end


Reply via email to