I am trying to create a word generator for a LSTM model. The problem is
every time it runs it gives me the same output words regardless of what the
input text is and the sentences generated does not make much sense either.
Below are some relevant segments of the code (since the script is quite
long). As the network parameters are in the certain shape, I had to make
the right input format with the vocab indexes to work with the theano
function "p_dist" but because of this I wonder if the problem stems from
here.
Other settings I have changed were the timestep at each time to 1 and the
random seed of numpy but this does not seem to fix the problem. Another
function of the model is being used to train on the text files which works
so I do not want to make changes to the parameters shape etc.
"""network parameters"""
x = T.tensor3(dtype=theano.config.floatX) #symbolic tensor of shape
(n_steps,n_samples,n_in) (theano.tensor.d/fmatrix)
y = T.matrix(dtype='int32') # symboic matrix of shape (n_samples,n_steps)
"""LSTM network"""
n_samples = x.shape[1]
def _slice(_x,n,dim):
return _x[:,n * dim:(n + 1) * dim]
def _step(x_t,h_tm1,c_tm1,
W_x,W_h,W_hy,
b_i,b_f,b_c,b_o,b_y):
h_dim = h_tm1.shape[-1] # hidden unit dimension
preact_x = T.dot(x_t,W_x)
preact_h = T.dot(h_tm1,W_h)
# input gate
i_t = T.nnet.sigmoid(_slice(preact_x,0,h_dim) + _slice(preact_h,0,h_dim) +
b_i)
# forget gate
f_t = T.nnet.sigmoid(_slice(preact_x,1,h_dim) + _slice(preact_h,1,h_dim) +
b_f)
# output gate
o_t = T.nnet.sigmoid(_slice(preact_x,2,h_dim) + _slice(preact_h,2,h_dim) +
b_o)
# cell unit
c_t = f_t * c_tm1 + i_t * T.tanh(_slice(preact_x,3,h_dim) + _slice(preact_h
,3,h_dim) + b_c)
# cell output
h_t = o_t * T.tanh(c_t)
y_t = theano.dot(h_t,W_hy) + b_y
return [h_t,c_t,y_t]
"""
unrolled model
h_vals: symbolic tensor for hidden units
y_vals: symbolic tensor for output units' pre-activation
"""
[h_vals,_,y_vals],_ = theano.scan(fn = _step, sequences = [x],
outputs_info = [T.alloc(numpy_floatX(0.),n_samples,
n_hidden),
T.alloc(numpy_floatX(0,),n_samples,n_hidden),None],
non_sequences = [self.W_x,self.W_h,self.W_hy,
self.b_i,self.b_f,self.b_c,self.b_o,self.b_y],
n_steps=n_steps)
"""
Define output and cost function
We take our n_steps x n_seq x n_classes output from the net
and reshape it into a (n_steps * n_seq) x n_classes matrix
apply softmax, then reshape back
"""
y_p_m = T.reshape(y_vals, (y_vals.shape[0] * y_vals.shape[1], -1))
y_p_s = T.nnet.softmax(y_p_m) # pred
y_f = y.flatten(ndim=1) # y is n_seq x n_steps
cost = -T.mean(T.log(y_p_s)[T.arange(y_p_s.shape[0]),y_f])
self.p_dist = theano.function([x],y_p_s, on_unused_input='ignore')
"""next word generator"""
def random_generator(probs):
xk = xrange(10000)
custm = stats.rv_discrete(name='custm', values=(xk,probs))
return custm.rvs(size=1)
def next_word(text, vocab_map, index2word, num_steps, length, p_dist,
vocab_size):
words = text.split()
for j in xrange(20):
idxs = [vocab_map[w] for w in words]
vocab_id = np.zeros((idxs[1],seq_length,vocab_size)).astype(
theano.config.floatX)
for i in xrange(length):
prob_dist = p_dist(np.asarray(vocab_id).astype('int32'
))
next_index = random_generator(prob_dist[-1,:])
idxs.append(next_index[0])
print [index2word[index] for index in idxs]
index2word = dict([(v,k) for k,v in train_dict.iteritems()])
text = "<eof> new york is"
output_generator = next_word(text, train_dict, index2word, 1, 10,lstm_net.
p_dist,vocab_size)
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.