Please consider this implementation of a naive nearest neighbors distance 
search:

function mindists_sq(pos, dists_min, Acp, t)
    for i in 1:size(pos, 2)
        dists_min[i] = Inf
        for j in 1:size(Acp, 2)
            sum!(t, (sub(pos, :, i) - sub(Acp, :, j)).^2)
            if t[1] < dists_min[i]
                dists_min[i] = t[1]
            end
        end
    end
    return dists_min
end


And this:

function test()
    const pos = rand(3, 64)
    const Acp = rand(3, 1200)
    const dists_min = zeros(64)
    const tmp = zeros(typeof(Acp[1]), 1)
    @time mindists_sq(pos, dists_min, Acp, tmp)
end
test()


I get:

elapsed time: 1.075699957 seconds (130182144 bytes allocated, 10.60% gc time
)


There are two very surprising things here:

   -  First I am not sure where the allocation comes from.  
   (sub(pos, :, i) - sub(Acp, :, j)).^2
   most certainly generate a new array in the process. t[1] and 
   dists_min[i] might too (but these are immutable bit types). I tried to 
   unroll the loop, but I got even more allocation and it's twice slower.
   - Second, the execution time is insane. I tried to put @inbounds 
   wherever possible but without success. More surprising, the following is 10 
   times faster and allocates 90MB less:
   function mindists_sq(pos, dists_min, Acp)
       for i in 1:size(pos, 2)
           dists_min[i] = Inf
           for j in 1:size(Acp, 2)
               t = sum((pos[:, i]-Acp[:, j]).^2)
               if t .< dists_min[i]
                   dists_min[i] = t
               end
           end
       end
       return dists_min
   end
   
elapsed time: 0.11141394 seconds (37555212 bytes allocated, 48.96% gc time)


Is there anything that can help me map new allocations with line numbers? 
Is there any way to get the allocation number to zero? Can you reproduce 
the described behavior?

Julia Version 0.3.1
Commit c03f413 (2014-09-21 21:30 UTC)
Platform Info:
  System: Linux (x86_64-unknown-linux-gnu)
  CPU: Intel(R) Core(TM)2 Duo CPU     P8700  @ 2.53GHz
  WORD_SIZE: 64
  BLAS: libopenblas (NO_AFFINITY PENRYN)
  LAPACK: liblapack
  LIBM: libm
  LLVM: libLLVM-3.3

Best regards

Reply via email to