Hey guys,
I'm currently playing with some Eratosthenes sieving in Julia, and found a
strange behavior of memory allocation.
My naive sieve is as follows:
#=
Naive version of Erato sieve.
* bitarray to store primes
* only eliminate multiples of primes
* separately eliminate even non-primes
=#
function erato1(n::Int)
# Create bitarray to store primes
primes_mask = trues(n)
primes_mask[1] = false
# Eliminate even numbers first
for i = 4:2:n
primes_mask[i] = false
end
# Eliminate odd non-primes numbers
for i = 3:2:n
if primes_mask[i]
for j = (i + i):i:n
primes_mask[j] = false
end
end
end
# Collect every primes in an array
n_primes = countnz(primes_mask)
primes_arr = Array(Int64, n_primes)
collect1!(primes_mask, primes_arr)
end
With the collect1! function that takes a BitArray as argument and return an
Array containing the primes numbers.
function collect1!(primes_mask::BitArray{1}, primes_arr::Array{Int64, 1})
prime_index = 1
for i = 2:n
if primes_mask[i]
primes_arr[prime_index] = i
prime_index += 1
end
end
return primes_arr
end
The codes works, but is slow because of a lot of memory allocation at the
line:
primes_arr[prime_index] = i
Here is an extract of the memory allocation profile
(--track-allocation=user):
- function collect1!(primes_mask::BitArray{1}, primes_arr::Array{
Int64, 1})
0 prime_index = 1
-84934576 for i = 2:n
0 if primes_mask[i]
*184350208* primes_arr[prime_index] = i
0 prime_index += 1
- end
- end
0 return primes_arr
- end
But, if I inline the definition of collect1! into the erato1, this is much
faster and the allocation in the loop of collect disapears. Here is the
code updated:
function erato1(n::Int)
# Create bitarray to store primes
primes_mask = trues(n)
primes_mask[1] = false
# Eliminate even numbers first
for i = 4:2:n
primes_mask[i] = false
end
# Eliminate odd non-primes numbers
for i = 3:2:n
if primes_mask[i]
for j = (i + i):i:n
primes_mask[j] = false
end
end
end
# Collect every primes in an array
n_primes = countnz(primes_mask)
primes_arr = Array(Int64, n_primes)
prime_index = 1
for i = 2:n
if primes_mask[i]
primes_arr[prime_index] = i
prime_index += 1
end
end
return primes_arr
end
And the memory profile seems more reasonable:
0 n_primes = countnz(primes_mask)
92183392 primes_arr = Array(Int64, n_primes)
0 prime_index = 1
0 for i = 2:n
0 if primes_mask[i]
0 primes_arr[prime_index] = i
0 prime_index += 1
- end
- end
So I'm wondering why the simple fact of inlining the code would remove the
massive memory allocation when assigning to the array.
Thank you for your help,
Rémis