Hi,
I have been trying code a draw like attention model for a project. The code
is not compiling with floatX set to float32 due a dtype conflict. It says
that some operation inside scan is causing the result to be of float64 type
but my outputs_info are initialized as float32. I checked everything
multiple times. All numpy arrays are in float32, and I couldn't find any
area where upcasting to float64 could occur. Can someone please help me
with this and tell me what exactly is causing the error?
Note: I have reduced code from multiple files to this one file which is
about 135 lines of code.
Also, this works okay when floatX flag is set to float64.
Thanks and Regards,
Pranav
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.
import numpy as np
import theano
import theano.tensor as T
def batched_dot(A, B):
C = A.dimshuffle([0, 1, 2, 'x']) * B.dimshuffle([0, 'x', 1, 2])
return C.sum(axis=-2)
class Attender(object):
def __init__(self, img_dim, N):
self.img_dim = img_dim
self.N = N
def read(self, I, center_y, center_x, delta, sigma):
N = self.N
batch_size = I.shape[0]
rng = T.arange(N) - N / 2.0 + 0.5
muX = center_x.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x']) * rng
muY = center_y.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x']) * rng
a = T.arange(self.img_dim)
b = T.arange(self.img_dim)
FX = T.exp(-(a - muX.dimshuffle([0,1,'x'])) ** 2 / 2.0 / sigma.dimshuffle([0,'x','x']) ** 2)
FY = T.exp(-(b - muY.dimshuffle([0,1,'x'])) ** 2 / 2.0 / sigma.dimshuffle([0,'x','x']) ** 2)
FX = FX / (FX.sum(axis=-1).dimshuffle(0, 1, 'x') + 1e-4)
FY = FY / (FY.sum(axis=-1).dimshuffle(0, 1, 'x') + 1e-4)
G = batched_dot(batched_dot(FY, I), FX.transpose([0, 2, 1]))
return G
def orthogonal(shape):
"""
taken from: https://github.com/Lasagne/Lasagne/blob/master/lasagne/init.py#L327-L367
"""
a = np.random.normal(0.0, 1.0, shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
q = u if u.shape == shape else v # pick the one with the correct shape
return q.astype(theano.config.floatX)
def gp_from_hidden(H, W_att, img_dim, N):
gp = T.dot(W_att, H.T).T
center_y = gp[:, 0]
center_x = gp[:, 1]
delta = T.abs_(gp[:,2])
sigma = T.abs_(gp[:,3])
center_y = (img_dim + 1.0) * (center_y + 1.0) / 2.0
center_x = (img_dim + 1.0) * (center_x + 1.0) / 2.0
delta = (img_dim - 1.0) / (N - 1.0) * delta
return center_y, center_x, delta, sigma
num_states = 8
img_dim = 32
N = 5
num_glimpses = 20
fg_bias_init = 1.0
batch_size = 1
num_input = 2 * (N ** 2)
W = np.empty((num_states * 4, num_input + num_states + 1), dtype=theano.config.floatX)
for i in range(4):
W[i*num_states:(i + 1) * num_states, :num_input] = orthogonal((num_states, num_input))
W[i*num_states:(i + 1) * num_states, num_input:-1] = orthogonal((num_states, num_states))
W[2 * num_states:3 * num_states, -1] = fg_bias_init
WgA = np.random.randn(4, num_states)
WgB = np.random.randn(4, num_states)
W = W.astype(theano.config.floatX)
WgA = WgA.astype(theano.config.floatX)
WgB = WgB.astype(theano.config.floatX)
W = theano.shared(W)
WgA = theano.shared(WgA)
WgB = theano.shared(WgB)
attender = Attender(img_dim, N)
input = T.tensor3('input')
def step(c_tm1, h_tm1, input, W, WgA, WgB):
center_yA, center_xA, deltaA, sigmaA = gp_from_hidden(h_tm1, WgA, img_dim, N)
center_yB, center_xB, deltaB, sigmaB = gp_from_hidden(h_tm1, WgB, img_dim, N)
gA = attender.read(input[:, :, :img_dim], center_yA, center_xA, deltaA, sigmaA) # (batch_size, N, N)
gB = attender.read(input[:, :, img_dim:], center_yB, center_xB, deltaB, sigmaB)
flat_gA = gA.reshape((batch_size, N * N)) # (batch_size, N * N)
flat_gB = gB.reshape((batch_size, N * N))
# concatenate gA, gB and h_tm1 to form a single matrix # (batch_size, N * N + N * N + num_states + 1)
lstm_inp = T.concatenate([flat_gA, flat_gB, h_tm1, T.ones((batch_size, 1))], axis=1)
# multiply by LSTM weights
# (num_states * 4, num_input + num_states + 1) dot (batch_size, N * N + N * N + num_states + 1).T
pre_act = T.dot(W, lstm_inp.T) # (4 * num_states, batch_size)
# split up to get individual gates
z = T.tanh(pre_act[0*num_states:1*num_states]) # (num_states, batch_size)
i = T.nnet.sigmoid(pre_act[1*num_states:2*num_states])
f = T.nnet.sigmoid(pre_act[2*num_states:3*num_states])
o = T.nnet.sigmoid(pre_act[3*num_states:4*num_states])
# do LSTM update
c_t = z * i + f * c_tm1.T
h_t = o * T.tanh(c_t)
return c_t.T, h_t.T # (batch_size, num_states)
c0 = T.zeros((batch_size, num_states))
h0 = T.zeros((batch_size, num_states))
cells, hiddens = theano.scan(fn=step, non_sequences=[input, W, WgA, WgB], outputs_info=[c0, h0],
n_steps=num_glimpses, strict=True)[0]
fn = theano.function([input], hiddens[-1])
_X = np.random.randn(batch_size, img_dim, img_dim * 2)
_X = _X.astype(theano.config.floatX)
print fn(_X)