I transformed it into a single-file testcase:
#########################################################
type NeuralLayer
w::Matrix{Float32} # weights
cm::Matrix{Float32} # connection matrix
b::Vector{Float32} # biases
scale::Vector{Float32} #
a_func::Symbol # activation function
hx::Vector{Float32} # input values
pa::Vector{Float32} # pre activation values
pr::Vector{Float32} # predictions (activation values)
frozen::Bool
end
function forward_propagate(nl::NeuralLayer,x::Vector{Float32})
nl.hx = x
wx = nl.w * nl.hx
nl.pa = nl.b+wx
nl.pr = tanh(nl.pa).*nl.scale
end
out_dim = 10
in_dim = 10
b = sqrt(6) / sqrt(in_dim + out_dim)
nl = NeuralLayer(
float32(2.0b * rand(Float32,out_dim,in_dim) - b), #setup rand weights
ones(Float32,out_dim,in_dim), #connection matrix
float32(map(x->x*(randbool()?-1:1),rand(out_dim)*rand(1:4))),
#biases
rand(Float32,out_dim), # scale
:tanh,
rand(Float32,in_dim),
rand(Float32,out_dim),
rand(Float32,out_dim),
false
)
x = ones(Float32,in_dim)
forward_propagate(nl,x)
clear_malloc_data()
for i in 1:(1920*1080)
forward_propagate(nl,x)
end
println("nl.pr is: $(nl.pr)")
#############################################################################
Now the interesting part of the .mem file looks like this:
- function forward_propagate(nl::NeuralLayer,x::Vector{Float32})
0 nl.hx = x
0 wx = nl.w * nl.hx
348368752 nl.pa = nl.b+wx
0 nl.pr = tanh(nl.pa).*nl.scale
- end
I split up the matrix multiply and the addition of bias vector into two
separate lines and it looks like it's the vector addition that's allocating
all of the memory (which seems surprising, but maybe I'm missing something).
Phil