Hey guys,

I'm currently playing with some Eratosthenes sieving in Julia, and found a 
strange behavior of memory allocation.
My naive sieve is as follows:

#=
Naive version of Erato sieve.
    * bitarray to store primes
    * only eliminate multiples of primes
    * separately eliminate even non-primes
=#
function erato1(n::Int)
    # Create bitarray to store primes
    primes_mask = trues(n)
    primes_mask[1] = false

    # Eliminate even numbers first
    for i = 4:2:n
        primes_mask[i] = false
    end

    # Eliminate odd non-primes numbers
    for i = 3:2:n
        if primes_mask[i]
            for j = (i + i):i:n
                primes_mask[j] = false
            end
        end
    end

    # Collect every primes in an array
    n_primes = countnz(primes_mask)
    primes_arr = Array(Int64, n_primes)
    collect1!(primes_mask, primes_arr)
end


With the collect1! function that takes a BitArray as argument and return an 
Array containing the primes numbers.

function collect1!(primes_mask::BitArray{1}, primes_arr::Array{Int64, 1})
    prime_index = 1
    for i = 2:n
        if primes_mask[i]
            primes_arr[prime_index] = i
            prime_index += 1
        end
    end
    return primes_arr
end

The codes works, but is slow because of a lot of memory allocation at the 
line:
primes_arr[prime_index] = i

Here is an extract of the memory allocation profile 
(--track-allocation=user):

        - function collect1!(primes_mask::BitArray{1}, primes_arr::Array{
Int64, 1})
        0     prime_index = 1
-84934576     for i = 2:n
        0         if primes_mask[i]
*184350208*             primes_arr[prime_index] = i
        0             prime_index += 1
        -         end
        -     end
        0     return primes_arr
        - end



But, if I inline the definition of collect1! into the erato1, this is much 
faster and the allocation in the loop of collect disapears. Here is the 
code updated:

function erato1(n::Int)
    # Create bitarray to store primes
    primes_mask = trues(n)
    primes_mask[1] = false

    # Eliminate even numbers first
    for i = 4:2:n
        primes_mask[i] = false
    end

    # Eliminate odd non-primes numbers
    for i = 3:2:n
        if primes_mask[i]
            for j = (i + i):i:n
                primes_mask[j] = false
            end
        end
    end

    # Collect every primes in an array
    n_primes = countnz(primes_mask)
    primes_arr = Array(Int64, n_primes)
    prime_index = 1
    for i = 2:n
        if primes_mask[i]
            primes_arr[prime_index] = i
            prime_index += 1
        end
    end
    return primes_arr
end

And the memory profile seems more reasonable:

        0     n_primes = countnz(primes_mask)
 92183392     primes_arr = Array(Int64, n_primes)
        0     prime_index = 1
        0     for i = 2:n
        0         if primes_mask[i]
        0             primes_arr[prime_index] = i
        0             prime_index += 1
        -         end
        -     end


So I'm wondering why the simple fact of inlining the code would remove the 
massive memory allocation when assigning to the array.

Thank you for your help,
Rémis

Reply via email to