I agree with John about the insane amount of copying going on. However, I
added some @times to your code and it looks like most of the time is spent
in conj. You probably want to precompute that for both B and C's
calculation.

function expensive_hat(S::Array{Complex{Float64},2}, mx::Array{Int64,2},
my::Array{Int64,2})
    samples = 64

    @time A = @parallel (+) for i = 1:samples
        abs2(S[:,i][my] .* S[:,i][mx]);
    end

#    @time B = @parallel (+) for i = 1:samples
        # abs2( sqrt( conj(S[:,i][mx+my]) .* S[:,i][mx+my] ) )
        @time b0 = conj(S[:,1][mx+my])
        @time b1 = b0 .* S[:,1][mx+my]
        @time b2 = sqrt(b1)
        @time B = abs2(b2)
#    end

    @time C = @parallel (+) for i = 1:samples
        conj(S[:,i][mx+my]) .* S[:,i][my].*S[:,i][mx];
    end

    @time ans = (A .* B ./ samples ./ samples, C./samples)

    return ans
end

data = rand(24000,64);
limit = 2000;

ix = int64([1:limit/2]);
iy = ix[1:end/2];
mg = zeros(Int64,length(iy),length(ix));
mx = broadcast(+,ix',mg);
my = broadcast(+,iy,mg);
S = rfft(data,1)./24000;

@time (AB, C) = expensive_hat(S,mx,my)


On Mon, Feb 3, 2014 at 6:59 AM, John Myles White
<[email protected]>wrote:

> One potential performance issue here is that the array indexing steps like 
> S[:,i][my]
> currently produce copies, not references, which would slow things down.
> Someone with more expertise in parallel programming might have better
> suggestions than that.
>
> Have you tried profiling your code?
> http://docs.julialang.org/en/latest/stdlib/profile/
>
>  -- John
>
> On Feb 3, 2014, at 6:32 AM, Alex C <[email protected]> wrote:
>
> Hi,
> I am trying to port some Matlab code into Julia in order to improve
> performance. The Julia parallel code currently takes about 2-3x as long as
> my Matlab implementation. I am at wit's end as to how to improve the
> performance. Any suggestions?  I tried using pmap but couldn't figure out
> how to implement it in this case. FYI, I am using Julia on Windows 7 with
> nprocs() = 5.
>
> Thanks,
> Alex
>
>
> function 
> expensive_hat(S::Array{Complex{Float64},2},mx::Array{Int64,2},my::Array{Int64,2})
>
>
>     samples = 64
>
>     A = @parallel (+) for i = 1:samples
>
>         abs2(S[:,i][my].*S[:,i][mx]);
>
>     end
>
>     B = @parallel (+) for i = 1:samples
>
>         abs2(sqrt(conj(S[:,i][mx+my]).*S[:,i][mx+my]));
>
>     end
>
>     C = @parallel (+) for i = 1:samples
>
>         conj(S[:,i][mx+my]).*S[:,i][my].*S[:,i][mx];
>
>     end
>
>     return (A.*B./samples./samples, C./samples);
>
>
> end
>
>
> data = rand(24000,64);
>
> limit = 2000;
>
>
> ix = int64([1:limit/2]);
>
> iy = ix[1:end/2];
>
> mg = zeros(Int64,length(iy),length(ix));
>
> mx = broadcast(+,ix',mg);
>
> my = broadcast(+,iy,mg);
>
> S = rfft(data,1)./24000;
>
>
> @elapsed (AB, C) = expensive_hat(S,mx,my)
>
>
>

Reply via email to