Hi everyone!
As you can tell I am new to the theano world and I've been recently looking
at the examples at the deep learning website.
I wanted to implement the Multi Layer Perceptron but with a few changes.
For instance I wanted to make things a little bit more condenced and that's
why I made 1 class and not 3 like in the example.
Also I changed a little bit how the data is being passed in the
train,validate and test functions in order to be compiled.
Instead of passing the mini batch index I pass the training data sliced by
the mini batch index (e.g.)
trX[minibatch_index*batch_size:(minibatch_index+1)*batch_size].
Unfortunately I've been getting an error which I can not resolve. I've
looked everywhere but without luck. The error messages are not at all
helpful in identifying the error,
on the contrary in most cases they are very cryptic and uninformative.
I have honestly been trying for the last 2 days to solve this problem but
without any success. I would be more than grateful if someone could point
me out where the problem relies so I can finally understand and move
forward with my training.
I am attaching the script file with the code to make it easier for everyone
to have a look at it.
I would appreciate any help at all!
Thank you all in advance!
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.
import numpy as np
import theano
import theano.tensor as T
import timeit
import pdb
def load_dataset(rng, batch_size=20):
print("...loading the dataset")
train_set_x = rng.randn(1000, 784)
train_set_y = rng.randint(low=0, high=9, size=1000)
valid_set_x = rng.randn(500, 784)
valid_set_y = rng.randint(low=0, high=9, size=500)
test_set_x = rng.randn(500, 784)
test_set_y = rng.randint(low=0, high=9, size=500)
n_train_batches = train_set_x.shape[0] // batch_size
n_valid_batches = valid_set_x.shape[0] // batch_size
n_test_batches = test_set_x.shape[0] // batch_size
dataset = [train_set_x, train_set_y, valid_set_x, valid_set_y,
test_set_x, test_set_y, n_train_batches, n_valid_batches,
n_test_batches]
return dataset
class MLP(object):
def init_hidden_weights(self, rng, n_in, n_hidden, activation=T.tanh):
W_values = np.asarray(rng.uniform
(
low = -np.sqrt(6. / (n_in + n_hidden)),
high = np.sqrt(6. / (n_in + n_hidden)),
size = (n_in, n_hidden)
),
dtype=theano.config.floatX
)
if activation == T.nnet.sigmoid:
W_values *= 4
b_values = np.zeros((n_hidden,), dtype=theano.config.floatX)
return (
theano.shared(value=W_values, name='W', borrow=True),
theano.shared(value=b_values, name='b', borrow=True)
)
def init_logitstic_weights(self, n_hidden, n_out):
return (
theano.shared(value=np.zeros((n_hidden, n_out),
dtype=theano.config.floatX), name='W', borrow=True),
theano.shared(value=np.zeros((n_out,),
dtype=theano.config.floatX), name='b', borrow=True)
)
def feed_forward(self, X, W, b):
return T.dot(X, W) + b
def activation_func(self, forward, activation=T.tanh):
return (forward if activation is None
else activation(forward))
def logistic_layer(self, hidden):
p_y_given_x = T.exp(hidden) / T.sum(T.exp(hidden), axis=1, keepdims=True)
ypred = T.argmax(p_y_given_x, axis=1)
return p_y_given_x, ypred
def neg_log_likelihood(self, p_y_given_x, y):
return -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y])
def errors(self, y, y_pred):
if y.ndim != y_pred.ndim:
raise TypeError('y should have same shape as y_pred',
('y', y.type, 'y_pred', y_pred.type))
if y.dtype.startswith('int'):
# T.neq returns vector, 0s and 1s, 1
# represents mistake in prediction
return T.mean(T.neq(y_pred, y))
else:
raise NotImplementedError()
def L1(self, l1_reg, W1, W2):
return l1_reg * (T.sum(abs(W1)) + T.sum(abs(W2)))
def L2(self, l2_reg, W1, W2):
return l2_reg * (T.sum(W1 ** 2) + T.sum(W2 ** 2))
def build_model(rng):
print('... building the model')
# define symbolic variables
# theano.config.compute_test_value = "warn"
X = T.matrix('X') # the data as rasterized images
# X.tag.test_value = train_set_x
y = T.lvector('y') # the labels as 1D vector of [int] labels
# y.tag.test_value = train_set_y
# build the model
classifier = MLP()
h_W, h_b = classifier.init_hidden_weights(rng, n_in=784,
n_hidden=500)
l_W, l_b = classifier.init_logitstic_weights(n_hidden=500,
n_out=10)
layer0 = classifier.activation_func(
classifier.feed_forward(X, h_W, h_b)
)
layer1 = classifier.activation_func(
classifier.feed_forward(layer0, l_W, l_b)
)
p_y_given_x, y_pred = classifier.logistic_layer(layer1)
# compute negative log likelihood symbolically
cost = (classifier.neg_log_likelihood(p_y_given_x, y)
+ classifier.L1(0.001, h_W, l_W)
+ classifier.L2(0.0001, h_W, l_W))
# compute derivatives symbolically
params = [h_W, h_b, l_W, l_b]
gparams = [T.grad(cost, param) for param in params]
learning_rate = 0.01
# batch_size = 20
# update the weights symbolically using the following rules
updates = [(param, param - learning_rate * gparam)
for param, gparam in zip(params, gparams)]
pdb.set_trace()
print("... compiling the model")
# compile the train model
pdb.set_trace()
train_model = theano.function(inputs=[X, y], outputs=cost,
updates=updates,
allow_input_downcast=True,
mode='DebugMode')
# compile the test model
test_model = theano.function(inputs=[X, y],
outputs=classifier.errors(y, y_pred),
allow_input_downcast=True,
mode='DebugMode')
# compile the validation model
validate_model = theano.function(inputs=[X, y],
outputs=classifier.errors(y, y_pred),
allow_input_downcast=True,
mode='DebugMode')
return [train_model, validate_model, test_model]
def training(batch_size=20):
rng = np.random.RandomState(1234)
datasets = load_dataset(rng) # load the dataset
trX = datasets[0]
trY = datasets[1]
valX = datasets[2]
valY = datasets[3]
teX = datasets[4]
teY = datasets[5]
n_train_batches = datasets[6]
n_valid_batches = datasets[7]
n_test_batches = datasets[8]
models = build_model(rng) # build the models
train = models[0]
validate = models[1]
test = models[2]
###############
# TRAIN MODEL #
###############
print('... training')
# early-stopping parameters
patience = 10000 # look this many examples regardless
patience_increase = 2 # wait this longer when a new best is found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant
validation_frequency = min(n_train_batches, patience // 2)
# go through this many
# minibatches before checking the network
# on the validation set; in this case we
# check every epoch
best_validation_loss = np.inf
best_iter = 0
test_score = 0.
start_time = timeit.default_timer()
n_epochs = 1000
epoch = 0
done_looping = False
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in range(n_train_batches):
minibatch_avg_cost = train(trX[minibatch_index *
batch_size:
(minibatch_index + 1) *
batch_size],
trY[minibatch_index *
batch_size:
(minibatch_index + 1) *
batch_size])
# iteration number
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
# compute zero-one loss on validation set
validation_losses = [
validate(
valX[i * batch_size: (i + 1) * batch_size],
valY[i * batch_size: (i + 1) * batch_size]
) for i in range(n_valid_batches)
]
this_validation_loss = np.mean(validation_losses)
print("epoch {}, minibatch {}/{}, validation error {}"
.format
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
# if we got the best validation score until now
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if (
this_validation_loss < best_validation_loss *
improvement_threshold
):
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
best_iter = iter
# test it on the test set
test_losses = [
test(
teX[i * batch_size: (i + 1) * batch_size],
teY[i * batch_size: (i + 1) * batch_size]
) for i in range(n_test_batches)
]
test_score = np.mean(test_losses)
print("epoch {}, minibatch {}/{}, test error of\
'best model {}".format
(epoch, minibatch_index + 1, n_train_batches,
test_score * 100.))
if patience <= iter:
done_looping = True
break
end_time = timeit.default_timer()
print("Optimization complete. Best validation score of {}\
obtained at iteration %i, with test performance {}".format
(best_validation_loss * 100., best_iter + 1, test_score * 100.))
print("The code for file {} ran for {:.2f}m"
.format(os.path.split(__file__)[1], (end_time - start_time)/60.))
if __name__ == "__main__":
training()