Unfortunately I haven't been able to find a minimal reproducing example, so 
first the relevant snippet then the whole function. I have some code which 
contains the following inner loop:

for after in 1:(length(path) - 1) # can't insert after end of path
counter += 1
c = inscost(k, after)
if c < bestCost
bestCost = c
bestInsertion = (k, after)
end
end

where inscost is a one-line closure:

function inscost(k, after)
return distmat[path[after], k] + 
       distmat[k, path[after + 1]] -
       distmat[path[after], path[after + 1]]
end

Running on Julia 0.5.0-rc2 is about 4x slower than running on Julia 0.4.6, 
and it seems to be because 0.5.0 is doing way more allocations. In 
particular, using the @timev macro shows that Julia 0.5.0 does 6 pool 
allocs* per inner loop, whereas Julia 0.4.6 does only 1 pool alloc per 
inner loop. This is true whether I prefix function inscost with @inline, 
@noinline, or no decoration. On a typical function call, Julia 0.5.0 
allocates 18.6 MB, and Julia 0.4.6 allocates 3.0 MB.

If I move the body of inscost into the inner loop, Julia 0.5.0 is now 
lightning fast and allocates only 584 KB for the same function call:

for after in 1:(length(path) - 1) # can't insert after end of path
counter += 1
c = distmat[path[after], k] + 
    distmat[k, path[after + 1]] -
    distmat[path[after], path[after + 1]]
if c < bestCost
bestCost = c
bestInsertion = (k, after)
end
end

Any thought what's going on, how to avoid this performance penalty in 
general (manually inlining here is easy, but could be painful or hard to 
diagnose elsewhere), and whether this is worth a Github issue?

For context, here's the full function. It's a simple/naive implementation 
of the cheapest insertion heuristic for the traveling salesman problem.

function cheapest_insertion{T<:Real}(distmat::Matrix{T}, 
initpath::Vector{Int})
check_square(distmat, "Distance matrix passed to cheapest_insertion must be 
square.")
n = size(distmat, 1)
path = copy(initpath)
# collect cities to visited
visitus = setdiff(collect(1:n), initpath)
# helper for insertion cost
# tour cost change for inserting node k after the node at index after in 
the path
function inscost(k, after)
return distmat[path[after], k] + 
 distmat[k, path[after + 1]] -
 distmat[path[after], path[after + 1]]
end
while !isempty(visitus)
bestCost = Inf
bestInsertion = (-1, -1)
for k in visitus
for after in 1:(length(path) - 1) # can't insert after end of path
c = inscost(k, after)
if c < bestCost
bestCost = c
bestInsertion = (k, after)
end
end
end
# bestInsertion now holds (k, after)
# insert into path, remove from to-do list
k, after = bestInsertion
insert!(path, after + 1, k)
visitus = setdiff(visitus, k)
end
return (path, pathcost(distmat, path))
end

* confession time: I don't know what "pool allocs" is measuring or whether 
to focus on that vs. bytes allocated...

Reply via email to