Unfortunately I haven't been able to find a minimal reproducing example, so
first the relevant snippet then the whole function. I have some code which
contains the following inner loop:
for after in 1:(length(path) - 1) # can't insert after end of path
counter += 1
c = inscost(k, after)
if c < bestCost
bestCost = c
bestInsertion = (k, after)
end
end
where inscost is a one-line closure:
function inscost(k, after)
return distmat[path[after], k] +
distmat[k, path[after + 1]] -
distmat[path[after], path[after + 1]]
end
Running on Julia 0.5.0-rc2 is about 4x slower than running on Julia 0.4.6,
and it seems to be because 0.5.0 is doing way more allocations. In
particular, using the @timev macro shows that Julia 0.5.0 does 6 pool
allocs* per inner loop, whereas Julia 0.4.6 does only 1 pool alloc per
inner loop. This is true whether I prefix function inscost with @inline,
@noinline, or no decoration. On a typical function call, Julia 0.5.0
allocates 18.6 MB, and Julia 0.4.6 allocates 3.0 MB.
If I move the body of inscost into the inner loop, Julia 0.5.0 is now
lightning fast and allocates only 584 KB for the same function call:
for after in 1:(length(path) - 1) # can't insert after end of path
counter += 1
c = distmat[path[after], k] +
distmat[k, path[after + 1]] -
distmat[path[after], path[after + 1]]
if c < bestCost
bestCost = c
bestInsertion = (k, after)
end
end
Any thought what's going on, how to avoid this performance penalty in
general (manually inlining here is easy, but could be painful or hard to
diagnose elsewhere), and whether this is worth a Github issue?
For context, here's the full function. It's a simple/naive implementation
of the cheapest insertion heuristic for the traveling salesman problem.
function cheapest_insertion{T<:Real}(distmat::Matrix{T},
initpath::Vector{Int})
check_square(distmat, "Distance matrix passed to cheapest_insertion must be
square.")
n = size(distmat, 1)
path = copy(initpath)
# collect cities to visited
visitus = setdiff(collect(1:n), initpath)
# helper for insertion cost
# tour cost change for inserting node k after the node at index after in
the path
function inscost(k, after)
return distmat[path[after], k] +
distmat[k, path[after + 1]] -
distmat[path[after], path[after + 1]]
end
while !isempty(visitus)
bestCost = Inf
bestInsertion = (-1, -1)
for k in visitus
for after in 1:(length(path) - 1) # can't insert after end of path
c = inscost(k, after)
if c < bestCost
bestCost = c
bestInsertion = (k, after)
end
end
end
# bestInsertion now holds (k, after)
# insert into path, remove from to-do list
k, after = bestInsertion
insert!(path, after + 1, k)
visitus = setdiff(visitus, k)
end
return (path, pathcost(distmat, path))
end
* confession time: I don't know what "pool allocs" is measuring or whether
to focus on that vs. bytes allocated...