I need to repeatedly take subsets of large symmetric matrices 
(symmetrically with the same subset indicies for rows and columns) and 
first tried like so:

function extract_with_getindex(matrix, subset)
  matrix[subset,:][:,subset]
end

which does not scale as well as

function extract_in_loop(matrix, subset)
  m = length(subset)
  res = zeros(m, m)
  for i in 1:m
    si = subset[i]
    for j in 1:m
      res[i, j] = matrix[si, subset[j]]
    end
  end
  res
end

as can be seen in:

n = 100, extract_in_loop = 4.792700000000001e-6, extract_with_getindex = 
8.1156e-6, factor faster = 1.6933252655079598
n = 1000, extract_in_loop = 1.75141e-5, extract_with_getindex = 
0.0002920263, factor faster = 16.673782837827808
n = 10000, extract_in_loop = 9.493099999999999e-5, extract_with_getindex = 
0.0091407535, factor faster = 96.28839367540635

from the benchmark code below. If there is some even faster way of 
achieving this I'd appreciate pointers/help.

Thanks,

Robert Feldt


# Do some runs to ensure everything is compiled
m = random_large_matrix(1000)
ss = sort(shuffle(collect(1:1000))[1:10])
r1 = extract_in_loop(m, ss)
r2 = extract_with_getindex(m, ss)

num_features(n) = rand(int(floor(log(n))):int(floor(10*log(n))))

NumRepeats = 10
for n in [100, 1000, 10000]
  times1 = zeros(NumRepeats)
  times2 = zeros(NumRepeats)
  for i in 1:NumRepeats
    matrix = randn(n, n)
    ss = sort(shuffle(collect(1:n))[1:num_features(n)])
    tic()
      res1 = extract_in_loop(matrix, ss)
    times1[i] = toq()
    tic()
      res2 = extract_with_getindex(matrix, ss)
    times2[i] = toq()
    if res1 != res2
      println("Results differ!")
    end
  end
  println("n = $(n), extract_in_loop = $(mean(times1)), 
extract_with_getindex = $(mean(times2)), factor faster = 
$(mean(times2)/mean(times1))")
end

Reply via email to