Is it possible to chain iterators in julia efficiently? By chaining I mean 
the following. I have a tuple of iterators and I would like to have a 
function "chain" such that

for itr in iters, i in itr
    ...
end

is equivalent to 

for i in chain(iters)
   ...
end

I found such a function in the Iterators package, however it is not type 
stable and hence dog slow. So I tried to cook up a fast version as follows:

immutable FastChain{n,T}
    xss::NTuple{n, T}
end

import Base.start
import Base.next
import Base.done

start(it::FastChain) = 1, start(it.xss[1])

function next{n, T}(it::FastChain{n,T}, state)
    i, xs_state = state
    v, xs_state = next(it.xss[i], xs_state)
    
    if done(it.xss[i], xs_state)
        i += 1
        if i <= n 
            xs_state = start(it.xss[i])
        end
    end
    return v, (i, xs_state)
end

done{n,T}(it::FastChain{n,T}, state) = state[1] > n

While this is indeed type stable and faster then Iterators.chain, it is 
still much slower then the manual version:

function perf_chain(ch)
    res = 0
    for i in ch
        res += 1
    end
    res
end

function perf_manual(iters)
    res = 0
    for itr in iters, i in itr
        res += 1
    end
    res 
end

importall Base.Cartesian
typealias Ci CartesianIndex
typealias Cr CartesianRange

# create some iterators e.g. cartesian ranges with 2_000_000 elements
iter1 = Cr(Ci((2, 3)), Ci((1000, 2000)))
iter2 = Cr(Ci((2, 3)), Ci((2000, 1000)))
iters = (iter1, iter2)
ch = FastChain(iters)

@assert perf_chain(ch) == perf_manual(iters)

@time perf_chain(ch)  # 0.035134 seconds (5 allocations: 176 bytes)
@time perf_manual(iters)  # 0.002136 seconds (5 allocations: 176 bytes)

I do not understand where this huge performance gap comes from? Any ideas 
for speedup?

Also I would love to chain iterators which yield the same type of elements, 
but have different state type. Is there a way to achieve this efficiently? 
What worries me is that I don't see how to make the `next` function type 
stable in this case.

Reply via email to