I need to repeatedly take subsets of large symmetric matrices
(symmetrically with the same subset indicies for rows and columns) and
first tried like so:
function extract_with_getindex(matrix, subset)
matrix[subset,:][:,subset]
end
which does not scale as well as
function extract_in_loop(matrix, subset)
m = length(subset)
res = zeros(m, m)
for i in 1:m
si = subset[i]
for j in 1:m
res[i, j] = matrix[si, subset[j]]
end
end
res
end
as can be seen in:
n = 100, extract_in_loop = 4.792700000000001e-6, extract_with_getindex =
8.1156e-6, factor faster = 1.6933252655079598
n = 1000, extract_in_loop = 1.75141e-5, extract_with_getindex =
0.0002920263, factor faster = 16.673782837827808
n = 10000, extract_in_loop = 9.493099999999999e-5, extract_with_getindex =
0.0091407535, factor faster = 96.28839367540635
from the benchmark code below. If there is some even faster way of
achieving this I'd appreciate pointers/help.
Thanks,
Robert Feldt
# Do some runs to ensure everything is compiled
m = random_large_matrix(1000)
ss = sort(shuffle(collect(1:1000))[1:10])
r1 = extract_in_loop(m, ss)
r2 = extract_with_getindex(m, ss)
num_features(n) = rand(int(floor(log(n))):int(floor(10*log(n))))
NumRepeats = 10
for n in [100, 1000, 10000]
times1 = zeros(NumRepeats)
times2 = zeros(NumRepeats)
for i in 1:NumRepeats
matrix = randn(n, n)
ss = sort(shuffle(collect(1:n))[1:num_features(n)])
tic()
res1 = extract_in_loop(matrix, ss)
times1[i] = toq()
tic()
res2 = extract_with_getindex(matrix, ss)
times2[i] = toq()
if res1 != res2
println("Results differ!")
end
end
println("n = $(n), extract_in_loop = $(mean(times1)),
extract_with_getindex = $(mean(times2)), factor faster =
$(mean(times2)/mean(times1))")
end