I am trying to create a word generator for a LSTM model. The problem is 
every time it runs it gives me the same output words regardless of what the 
input text is and the sentences generated does not make much sense either. 
Below are some relevant segments of the code (since the script is quite 
long). As the network parameters are in the certain shape, I had to make 
the right input format with the vocab indexes to work with the theano 
function "p_dist" but because of this I wonder if the problem stems from 
here. 


Other settings I have changed were the timestep at each time to 1 and the 
random seed of numpy but this does not seem to fix the problem. Another 
function of the model is being used to train on the text files which works 
so I do not want to make changes to the parameters shape etc. 



"""network parameters"""


x = T.tensor3(dtype=theano.config.floatX) #symbolic tensor of shape 
(n_steps,n_samples,n_in) (theano.tensor.d/fmatrix)

y = T.matrix(dtype='int32') # symboic matrix of shape (n_samples,n_steps)  


"""LSTM network"""                          

n_samples = x.shape[1]

def _slice(_x,n,dim):

 return _x[:,n * dim:(n + 1) * dim]

 
def _step(x_t,h_tm1,c_tm1, 
   W_x,W_h,W_hy,
   b_i,b_f,b_c,b_o,b_y):


 h_dim = h_tm1.shape[-1] # hidden unit dimension

 preact_x = T.dot(x_t,W_x)

 preact_h = T.dot(h_tm1,W_h)

 
 # input gate

 i_t = T.nnet.sigmoid(_slice(preact_x,0,h_dim) + _slice(preact_h,0,h_dim) + 
b_i)

 # forget gate

 f_t = T.nnet.sigmoid(_slice(preact_x,1,h_dim) + _slice(preact_h,1,h_dim) + 
b_f)

 # output gate

 o_t = T.nnet.sigmoid(_slice(preact_x,2,h_dim) + _slice(preact_h,2,h_dim) + 
b_o)

 # cell unit

 c_t = f_t * c_tm1 + i_t * T.tanh(_slice(preact_x,3,h_dim) + _slice(preact_h
,3,h_dim) + b_c)

 # cell output

 h_t = o_t * T.tanh(c_t)

 y_t = theano.dot(h_t,W_hy) + b_y 

 return [h_t,c_t,y_t]


 """ 

 unrolled model

 h_vals: symbolic tensor for hidden units

 y_vals: symbolic tensor for output units' pre-activation

 """ 

  [h_vals,_,y_vals],_ = theano.scan(fn = _step, sequences = [x],

                      outputs_info = [T.alloc(numpy_floatX(0.),n_samples,
n_hidden),

                      T.alloc(numpy_floatX(0,),n_samples,n_hidden),None],

                      non_sequences = [self.W_x,self.W_h,self.W_hy,

                     self.b_i,self.b_f,self.b_c,self.b_o,self.b_y],

                      n_steps=n_steps)


 """

 Define output and cost function


 We take our n_steps x n_seq x n_classes output from the net

 and reshape it into a (n_steps * n_seq) x n_classes matrix

 apply softmax, then reshape back

 """

y_p_m = T.reshape(y_vals, (y_vals.shape[0] * y_vals.shape[1], -1)) 

y_p_s = T.nnet.softmax(y_p_m) # pred 

y_f = y.flatten(ndim=1) # y is n_seq x n_steps 

cost = -T.mean(T.log(y_p_s)[T.arange(y_p_s.shape[0]),y_f]) 

self.p_dist = theano.function([x],y_p_s, on_unused_input='ignore')


"""next word generator"""


def random_generator(probs): 

       xk = xrange(10000) 

       custm = stats.rv_discrete(name='custm', values=(xk,probs)) 

       return custm.rvs(size=1) 

  
def next_word(text, vocab_map, index2word, num_steps, length, p_dist, 
vocab_size): 

        words = text.split() 

         for j in xrange(20): 

                idxs = [vocab_map[w] for w in words] 

                vocab_id = np.zeros((idxs[1],seq_length,vocab_size)).astype(
theano.config.floatX) 

                 for i in xrange(length): 

                      prob_dist = p_dist(np.asarray(vocab_id).astype('int32'
)) 

                      next_index = random_generator(prob_dist[-1,:]) 

                      idxs.append(next_index[0]) 

                print [index2word[index] for index in idxs]



index2word = dict([(v,k) for k,v in train_dict.iteritems()])

text = "<eof> new york is"

output_generator = next_word(text, train_dict, index2word, 1, 10,lstm_net.
p_dist,vocab_size)




-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to