The code get great performance improvement. My original version cost 133 s 
and about 166 G memories, while your version cost 61 s and about 423M 
memories!



在 2015年12月15日星期二 UTC+8下午7:34:55,Pablo Zubieta写道:
>
> Stefan said that your code is well type (type stable). Have you tried 
> Yichao's suggestions?
>
> Here is the function ground() taking them into account:
>
> function ground()
>     R = 320.
>     Ks = 2^11 - 1
>     x = linspace(-R, R, Ks+1)
>     dx = 2R/Ks
>     dt = 0.1
>     pm = 2π/2dx
>     px = circshift( linspace(-pm, pm, Ks+1),  round(Int64, Ks/2) )
>     FFTW.set_num_threads(8)
>
>
>     V = Float64[Potential(ix, iy)  for ix in x, iy in x]
>     ko = Float64[e^(-dt/4* (Ko(ipx,ipy))) for ipx in px, ipy in px]
>     ko2 = ko.^2
>     vo = Float64[e^(-dt* (Potential(ix,iy))) for ix in x, iy in x]
>     ϕ = Array(Complex128,(Ks+1,Ks+1))
>     Pl = plan_fft!(ϕ; flags=FFTW.MEASURE)
>     for i in eachindex(ϕ); ϕ[i] = V[i]; end # Conversion is automatic here
>
>
>     invnormphi = 1. / (sqrt(sumabs2(ϕ))*dx)
>     scale!(ϕ, invnormphi)
>
>
>     Pl * ϕ # No need to assign to ϕ
>     for i in eachindex(ϕ); ϕ[i] *= ko[i]; end
>     Pl \ ϕ
>
>     ϕ′ = similar(ϕ)
>
>     Δϕ = 1.
>     nstep = 0
>     println("start loop")
>     while Δϕ > 1.e-15
>         copy!(ϕ′, ϕ)
>         for i in eachindex(ϕ), ϕ[i] *= vo[i]; end
>
>         invnormphi = 1. / (sqrt(sumabs2(ϕ))*dx)
>         scale!(ϕ, invnormphi)
>
>
>         Pl * ϕ
>         for i in eachindex(ϕ); ϕ[i] *= ko2[i]; end
>         Pl \ ϕ
> # if check  Δϕ for every step, 35s is needed.
>         if nstep>500
>             Δϕ = maxabs(ϕ-ϕ′)
>         end
>         nstep += 1
>         if mod(nstep,200)==0
>             print(nstep,"  ")
>             println("Δϕ=",Δϕ)
>         end
>
>     end
>     for i in eachindex(ϕ); ϕ[i] *= vo[i]; end
>
>     Pl * ϕ
>     for i in eachindex(ϕ); ϕ[i] *= ko[i]; end
>     Pl \ ϕ
>
>
>     invnormphi = 1. / (sqrt(sumabs2(ϕ))*dx)
>     scale!(ϕ, invnormphi)
> end
>
>
>
>

Reply via email to