Hi everyone!

As you can tell I am new to the theano world and I've been recently looking 
at the examples at the deep learning website.
I wanted to implement the Multi Layer Perceptron but with a few changes.
For instance I wanted to make things a little bit more condenced and that's 
why I made 1 class and not 3 like in the example.
Also I changed a little bit how the data is being passed in the 
train,validate and test functions in order to be compiled.
Instead of passing the mini batch index I pass the training data sliced by 
the mini batch index (e.g.) 
trX[minibatch_index*batch_size:(minibatch_index+1)*batch_size].

Unfortunately I've been getting an error which I can not resolve. I've 
looked everywhere but without luck. The error messages are not at all 
helpful in identifying the error,
on the contrary in most cases they are very cryptic and uninformative.

I have honestly been trying for the last 2 days to solve this problem but 
without any success. I would be more than grateful if someone could point 
me out where the problem relies so I can finally understand and move 
forward with my training.

I am attaching the script file with the code to make it easier for everyone 
to have a look at it.

I would appreciate any help at all!

Thank you all in advance!
 

-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.
import numpy as np
import theano
import theano.tensor as T
import timeit
import pdb

def load_dataset(rng, batch_size=20):

    print("...loading the dataset")

    train_set_x = rng.randn(1000, 784)
    train_set_y = rng.randint(low=0, high=9, size=1000)

    valid_set_x = rng.randn(500, 784)
    valid_set_y = rng.randint(low=0, high=9, size=500)

    test_set_x  = rng.randn(500, 784)
    test_set_y  = rng.randint(low=0, high=9, size=500)

    n_train_batches = train_set_x.shape[0] // batch_size
    n_valid_batches = valid_set_x.shape[0] // batch_size
    n_test_batches  = test_set_x.shape[0]  // batch_size

    dataset = [train_set_x, train_set_y, valid_set_x, valid_set_y,
    test_set_x, test_set_y, n_train_batches, n_valid_batches,
               n_test_batches]

    return dataset

class MLP(object):
    def init_hidden_weights(self, rng, n_in, n_hidden, activation=T.tanh):
        W_values = np.asarray(rng.uniform
            (
                low  = -np.sqrt(6. / (n_in + n_hidden)),
                high = np.sqrt(6. / (n_in + n_hidden)),
                size = (n_in, n_hidden)
            ),
            dtype=theano.config.floatX
        )
        if activation == T.nnet.sigmoid:
            W_values *= 4
        b_values = np.zeros((n_hidden,), dtype=theano.config.floatX)
        return (
            theano.shared(value=W_values, name='W', borrow=True),
            theano.shared(value=b_values, name='b', borrow=True)
        )

    def init_logitstic_weights(self, n_hidden, n_out):
        return (
            theano.shared(value=np.zeros((n_hidden, n_out),
              dtype=theano.config.floatX), name='W', borrow=True),

             theano.shared(value=np.zeros((n_out,),
              dtype=theano.config.floatX), name='b', borrow=True)
        )

    def feed_forward(self, X, W, b):
        return T.dot(X, W) + b

    def activation_func(self, forward, activation=T.tanh):
        return (forward if activation is None
                       else activation(forward))

    def logistic_layer(self, hidden):
        p_y_given_x = T.exp(hidden) / T.sum(T.exp(hidden), axis=1, keepdims=True)
        ypred = T.argmax(p_y_given_x, axis=1)
        return p_y_given_x, ypred

    def neg_log_likelihood(self, p_y_given_x, y):
        return -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y])

    def errors(self, y, y_pred):
        if y.ndim != y_pred.ndim:
            raise TypeError('y should have same shape as y_pred',
                ('y', y.type, 'y_pred', y_pred.type))
        if y.dtype.startswith('int'):
            # T.neq returns vector, 0s and 1s, 1
            # represents mistake in prediction
            return T.mean(T.neq(y_pred, y))
        else:
            raise NotImplementedError()

    def L1(self, l1_reg, W1, W2):
        return l1_reg * (T.sum(abs(W1)) + T.sum(abs(W2)))

    def L2(self, l2_reg, W1, W2):
        return l2_reg * (T.sum(W1 ** 2) + T.sum(W2 ** 2))

def build_model(rng):

    print('... building the model')

    # define symbolic variables
    # theano.config.compute_test_value = "warn"
    X     = T.matrix('X')  # the data as rasterized images
    # X.tag.test_value = train_set_x
    y     = T.lvector('y')  # the labels as 1D vector of [int] labels
    # y.tag.test_value = train_set_y

    # build the model
    classifier = MLP()
    h_W, h_b   = classifier.init_hidden_weights(rng, n_in=784,
                                                n_hidden=500)
    l_W, l_b   = classifier.init_logitstic_weights(n_hidden=500,
                                                   n_out=10)
    layer0     = classifier.activation_func(
                                classifier.feed_forward(X, h_W, h_b)
    )
    layer1     = classifier.activation_func(
                            classifier.feed_forward(layer0, l_W, l_b)
    )
    p_y_given_x, y_pred = classifier.logistic_layer(layer1)

    # compute negative log likelihood symbolically
    cost = (classifier.neg_log_likelihood(p_y_given_x, y)
            + classifier.L1(0.001, h_W, l_W)
            + classifier.L2(0.0001, h_W, l_W))

    # compute derivatives symbolically
    params = [h_W, h_b, l_W, l_b]
    gparams = [T.grad(cost, param) for param in params]

    learning_rate = 0.01
    # batch_size = 20
    # update the weights symbolically using the following rules
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(params, gparams)]

    pdb.set_trace()
    print("... compiling the model")
    # compile the train model
    pdb.set_trace()
    train_model = theano.function(inputs=[X, y], outputs=cost,
                                  updates=updates,
                                  allow_input_downcast=True,
                                  mode='DebugMode')

    # compile the test model
    test_model = theano.function(inputs=[X, y],
                                 outputs=classifier.errors(y, y_pred),
                                allow_input_downcast=True,
                                 mode='DebugMode')

    # compile the validation model
    validate_model = theano.function(inputs=[X, y],
                                     outputs=classifier.errors(y, y_pred),
                                     allow_input_downcast=True,
                                     mode='DebugMode')

    return [train_model, validate_model, test_model]

def training(batch_size=20):

    rng = np.random.RandomState(1234)
    datasets = load_dataset(rng) # load the dataset
    trX = datasets[0]
    trY = datasets[1]
    valX = datasets[2]
    valY = datasets[3]
    teX  = datasets[4]
    teY  = datasets[5]
    n_train_batches = datasets[6]
    n_valid_batches = datasets[7]
    n_test_batches  = datasets[8]
    models         = build_model(rng) # build the models
    train    = models[0]
    validate = models[1]
    test     = models[2]
    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 10000       # look this many examples regardless
    patience_increase = 2  # wait this longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatches before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = np.inf
    best_iter    = 0
    test_score   = 0.
    start_time   = timeit.default_timer()

    n_epochs     = 1000
    epoch        = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train(trX[minibatch_index *
                                           batch_size:
                                           (minibatch_index + 1) *
                                           batch_size],
                                       trY[minibatch_index *
                                           batch_size:
                                           (minibatch_index + 1) *
                                           batch_size])
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate(
                    valX[i * batch_size: (i + 1) * batch_size],
                    valY[i * batch_size: (i + 1) * batch_size]
                    ) for i in range(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                print("epoch {}, minibatch {}/{}, validation error {}"
                      .format
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test(
                            teX[i * batch_size: (i + 1) * batch_size],
                            teY[i * batch_size: (i + 1) * batch_size]
                        ) for i in range(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)

                    print("epoch {}, minibatch {}/{}, test error of\
                           'best model {}".format
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()

    print("Optimization complete. Best validation score of {}\
           obtained at iteration %i, with test performance {}".format
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))

    print("The code for file {} ran for {:.2f}m"
          .format(os.path.split(__file__)[1], (end_time - start_time)/60.))

if __name__ == "__main__":
    training()

Reply via email to