The following is the code:
# coding: utf-8
# In[68]:
#Importing stuff
import theano
import theano.tensor as T
import numpy as np
# In[69]:
import nltk
import sys
import operator
import csv
import itertools
from utils import *
from datetime import datetime
# In[70]:
#Fixing vocabulary size for one hot vectors and some initialization stuff
v_size = 8000
unknown_token = "UNKNOWN_TOKEN"
start_token = "<s>"
end_token = "</s>"
# In[71]:
#Read data and start preprocessing
with open('reddit-comments-2015-08.csv','rb') as f:
reader = csv.reader(f, skipinitialspace=True)
reader.next()
sentences = list(itertools.chain(*[nltk.sent_tokenize(x[0].decode('utf-8'))
for x in reader]))
print len(sentences)
# In[72]:
#Tokenize the sentences and add start and end tokens
tokenized_sentences = [nltk.word_tokenize(s) for s in sentences]
tokenized_sentences = [[start_token] + s + [end_token] for s in
tokenized_sentences]
# In[73]:
#Get word frequencies and use only most frequent words in vocabulary
word_freq = nltk.FreqDist(itertools.chain(*tokenized_sentences))
vocab = word_freq.most_common(v_size-1)
# In[74]:
#Do mapping and reverse mapping
index_to_word = [x[0] for x in vocab]
index_to_word.append(unknown_token)
word_to_index = {w:i for i,w in enumerate(index_to_word)}
#Removing less frequent words
for i, s in enumerate(tokenized_sentences):
tokenized_sentences[i] = [w if w in word_to_index else unknown_token for w
in s]
#Got vectors but they are not one hot
X_train = np.asarray([[word_to_index[w] for w in s[:-1]] for s in
tokenized_sentences])
Y_train = np.asarray([[word_to_index[w] for w in s[1:]] for s in
tokenized_sentences])
#Preprocessing ends here
# In[75]:
#Take only one sentence for now
X_train = X_train[0]
Y_train = Y_train[0]
# In[76]:
#Make input and output as onehot vectors. This can easily be replaced with
vectors generated by word2vec.
X_train_onehot = np.eye(v_size)[X_train]
X = theano.shared(np.array(X_train_onehot).astype('float32'), name = 'X')
Y_train_onehot = np.eye(v_size)[Y_train]
Y = theano.shared(np.array(Y_train_onehot).astype('float32'), name = 'Y')
# In[77]:
#Initializing U, V and W
i_dim = v_size
h_dim = 100
o_dim = v_size
U = theano.shared(np.random.randn(i_dim, h_dim).astype('float32'), name = 'U')
W = theano.shared(np.random.randn(h_dim, h_dim).astype('float32'), name = 'W')
V = theano.shared(np.random.randn(h_dim, o_dim).astype('float32'), name = 'V')
# In[78]:
#forward propagation
s = T.vector('s')
results, updates = theano.scan(lambda x, sm1: T.tanh( T.dot(x, U) + T.dot(sm1,
W)),
sequences = X_train_onehot,
outputs_info = s
)
y_hat = T.dot(results, V)
forward_propagation = theano.function(inputs=[s], outputs = y_hat)
# In[80]:
#loss
loss = T.sum(T.nnet.categorical_crossentropy(y_hat, Y))
# In[81]:
#Gradients
dw = T.grad(loss, W)
du = T.grad(loss, U)
dv = T.grad(loss, V)
# In[82]:
#BPTT
learning_rate = T.scalar('learning_rate')
gradient_step = theano.function(inputs = [s, learning_rate],
updates = (
(U, U - learning_rate * du),
(V, V - learning_rate * dv),
(W, W - learning_rate * dw)
)
)
# In[ ]:
On Wednesday, October 12, 2016 at 2:43:37 AM UTC+5:30, Raghu Ram wrote:
>
> Hello all,
> I am new to theano.
> I have the following code(Attached the file). It keeps throwing the error
> at gradient step. I added screenshot of Jupyter notebook as well. Can
> someone tell me what is going wrong.
>
>
> <https://lh3.googleusercontent.com/-hwPVqLsX-S4/V_1VxALWkqI/AAAAAAAAAOg/LBVl-40z9f4WvjGynRX975rR0HPijDRVgCLcB/s1600/Screenshot_20161012_024046.png>
> Please look at attached file for full code.
>
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.