Hi,

I have been trying code a draw like attention model for a project. The code 
is not compiling with floatX set to float32 due a dtype conflict. It says 
that some operation inside scan is causing the result to be of float64 type 
but my outputs_info are initialized as float32. I checked everything 
multiple times. All numpy arrays are in float32, and I couldn't find any 
area where upcasting to float64 could occur. Can someone please help me 
with this and tell me what exactly is causing the error?

Note: I have reduced code from multiple files to this one file which is 
about 135 lines of code.

Also, this works okay when floatX flag is set to float64. 

Thanks and Regards,
Pranav

-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.
import numpy as np

import theano
import theano.tensor as T


def batched_dot(A, B):
    C = A.dimshuffle([0, 1, 2, 'x']) * B.dimshuffle([0, 'x', 1, 2])      
    return C.sum(axis=-2)


class Attender(object):
    def __init__(self, img_dim, N):
        self.img_dim = img_dim
        self.N = N

    def read(self, I, center_y, center_x, delta, sigma):
        N = self.N
        batch_size = I.shape[0]

        rng = T.arange(N) - N / 2.0 + 0.5  

        muX = center_x.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x']) * rng
        muY = center_y.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x']) * rng

        a = T.arange(self.img_dim)
        b = T.arange(self.img_dim)
        
        FX = T.exp(-(a - muX.dimshuffle([0,1,'x'])) ** 2 / 2.0 / sigma.dimshuffle([0,'x','x']) ** 2)
        FY = T.exp(-(b - muY.dimshuffle([0,1,'x'])) ** 2 / 2.0 / sigma.dimshuffle([0,'x','x']) ** 2)
        FX = FX / (FX.sum(axis=-1).dimshuffle(0, 1, 'x') + 1e-4)
        FY = FY / (FY.sum(axis=-1).dimshuffle(0, 1, 'x') + 1e-4)

        G = batched_dot(batched_dot(FY, I), FX.transpose([0, 2, 1]))

        return G


def orthogonal(shape):
	"""
	taken from: https://github.com/Lasagne/Lasagne/blob/master/lasagne/init.py#L327-L367
	"""
	a = np.random.normal(0.0, 1.0, shape)
	u, _, v = np.linalg.svd(a, full_matrices=False)
	q = u if u.shape == shape else v 	# pick the one with the correct shape
	return q.astype(theano.config.floatX)


def gp_from_hidden(H, W_att, img_dim, N):
	gp = T.dot(W_att, H.T).T

	center_y  = gp[:, 0]
	center_x  = gp[:, 1]
	delta = T.abs_(gp[:,2])
	sigma = T.abs_(gp[:,3])

	center_y = (img_dim + 1.0) * (center_y + 1.0) / 2.0
	center_x = (img_dim + 1.0) * (center_x + 1.0) / 2.0
	delta = (img_dim - 1.0) / (N - 1.0) * delta

	return center_y, center_x, delta, sigma


num_states = 8
img_dim = 32
N = 5
num_glimpses = 20
fg_bias_init = 1.0
batch_size = 1

num_input = 2 * (N ** 2)
W = np.empty((num_states * 4, num_input + num_states + 1), dtype=theano.config.floatX)
for i in range(4):
	W[i*num_states:(i + 1) * num_states, :num_input] = orthogonal((num_states, num_input))
	W[i*num_states:(i + 1) * num_states, num_input:-1] = orthogonal((num_states, num_states))
W[2 * num_states:3 * num_states, -1] = fg_bias_init

WgA = np.random.randn(4, num_states)
WgB = np.random.randn(4, num_states)

W = W.astype(theano.config.floatX)
WgA = WgA.astype(theano.config.floatX)
WgB = WgB.astype(theano.config.floatX)

W = theano.shared(W)
WgA = theano.shared(WgA)
WgB = theano.shared(WgB)

attender = Attender(img_dim, N)

input = T.tensor3('input')

def step(c_tm1, h_tm1, input, W, WgA, WgB):
	center_yA, center_xA, deltaA, sigmaA = gp_from_hidden(h_tm1, WgA, img_dim, N)
	center_yB, center_xB, deltaB, sigmaB = gp_from_hidden(h_tm1, WgB, img_dim, N)

	gA = attender.read(input[:, :, :img_dim], center_yA, center_xA, deltaA, sigmaA) # (batch_size, N, N)
	gB = attender.read(input[:, :, img_dim:], center_yB, center_xB, deltaB, sigmaB)

	flat_gA = gA.reshape((batch_size, N * N)) # (batch_size, N * N)
	flat_gB = gB.reshape((batch_size, N * N))

	# concatenate gA, gB and h_tm1 to form a single matrix # (batch_size, N * N + N * N + num_states + 1)
	lstm_inp = T.concatenate([flat_gA, flat_gB, h_tm1, T.ones((batch_size, 1))], axis=1)

	# multiply by LSTM weights
	# (num_states * 4, num_input + num_states + 1) dot (batch_size, N * N + N * N + num_states + 1).T
	pre_act = T.dot(W, lstm_inp.T) 	# (4 * num_states, batch_size)

	# split up to get individual gates
	z = T.tanh(pre_act[0*num_states:1*num_states]) # (num_states, batch_size)
	i = T.nnet.sigmoid(pre_act[1*num_states:2*num_states])
	f = T.nnet.sigmoid(pre_act[2*num_states:3*num_states])
	o = T.nnet.sigmoid(pre_act[3*num_states:4*num_states])

	# do LSTM update
	c_t = z * i + f * c_tm1.T
	h_t = o * T.tanh(c_t)

	return c_t.T, h_t.T 	# (batch_size, num_states)

c0 = T.zeros((batch_size, num_states))
h0 = T.zeros((batch_size, num_states))

cells, hiddens = theano.scan(fn=step, non_sequences=[input, W, WgA, WgB], outputs_info=[c0, h0], 
								n_steps=num_glimpses, strict=True)[0]


fn = theano.function([input], hiddens[-1])

_X = np.random.randn(batch_size, img_dim, img_dim * 2)
_X = _X.astype(theano.config.floatX)

print fn(_X)

Reply via email to