[theano-users] Simple regression with NN

Jacob Chen Mon, 26 Sep 2016 14:46:51 -0700

Hi all,

I'm really new to Theano and I am just trying to approximate a nonlinear 
function with a Neural Network. It compiles and runs but it doesn't work. I 
was wondering if someone could help me by taking a look at it.


I don't think my parameters are updating from using the gradients since the 
validation errors don't seem to decrease.
I took most of the code from the online tutorial on Theano.

Probably the culprit is the MLP.error function since I am just trying to 
get the squared error.
I then add a L2 regularization to the loss as well for the cost function.

The entire code is pasted below. Excuse the hackyness i just want to have 
the NN working:

import numpy
import math
import matplotlib.pyplot as plt
import theano
import theano.tensor as T


# Generate data from crazy function
def get_data(query_list):
    data = list()
    for query in query_list:
        data.append((query, math.sqrt(2 * query) + math.sin(3 * query) + 
math.cos(2 * query)))

    # Break up into training, validation, and testing
    last_idx = len(data) - 1
    training = data[0: int(math.floor(0.7 * last_idx))]
    validation = data[int(math.floor(0.7 * last_idx)) + 1: int(math.floor(0.85 
* last_idx)) - 1]
    testing = data[int(math.floor(0.85 * last_idx)) + 1: last_idx]
    # (xs, ys)
    training_set = ([pt[0] for pt in training], [pt[1] for pt in training])
    validation_set = ([pt[0] for pt in validation], [pt[1] for pt in 
validation])
    testing_set = ([pt[0] for pt in testing], [pt[1] for pt in testing])

    train_set_x = theano.shared(numpy.asarray(training_set[0], 
dtype=theano.config.floatX), borrow=True)
    train_set_y = theano.shared(numpy.asarray(training_set[1], 
dtype=theano.config.floatX), borrow=True)

    valid_set_x = theano.shared(numpy.asarray(validation_set[0], 
dtype=theano.config.floatX), borrow=True)
    valid_set_y = theano.shared(numpy.asarray(validation_set[1], 
dtype=theano.config.floatX), borrow=True)

    testing_set_x = theano.shared(numpy.asarray(testing_set[0], 
dtype=theano.config.floatX), borrow=True)
    testing_set_y = theano.shared(numpy.asarray(testing_set[1], 
dtype=theano.config.floatX), borrow=True)

    return train_set_x, train_set_y, valid_set_x, valid_set_y, testing_set_x, 
testing_set_y


# Class to construct layers
class HiddenLayer(object):
    def __init__(self, rng, input, n_in, n_out, W=None, b=None,
                 activation=T.tanh):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
        and the bias vector b is of shape (n_out,).

        NOTE : The nonlinearity used here is tanh

        Hidden unit activation is given by: tanh(dot(input,W) + b)

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dmatrix
        :param input: a symbolic tensor of shape (n_examples, n_in)

        :type n_in: int
        :param n_in: dimensionality of input

        :type n_out: int
        :param n_out: number of hidden units

        :type activation: theano.Op or function
        :param activation: Non linearity to be applied in the hidden
                           layer
        """
        self.input = input
        # `W` is initialized with `W_values` which is uniformely sampled
        # from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden))
        # for tanh activation function
        # the output of uniform if converted using asarray to dtype
        # theano.config.floatX so that the code is runable on GPU
        # Note : optimal initialization of weights is dependent on the
        #        activation function used (among other things).
        #        For example, results presented in [Xavier10] suggest that you
        #        should use 4 times larger initial weights for sigmoid
        #        compared to tanh
        #        We have no info for other function, so we use the same as
        #        tanh.
        if W is None:
            W_values = numpy.asarray(
                rng.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            )
            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name='W', borrow=True)

        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name='b', borrow=True)

        self.W = W
        self.b = b

        self.params = [self.W, self.b]

        lin_output = T.dot(input, self.W) + self.b
        self.output = (
            activation(lin_output)
        )


class MLP(object):
    def __init__(self, rng, input, n_in, n_hidden, n_out):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie

        """

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        self.hiddenLayer = HiddenLayer(
            rng=rng,
            input=input,
            n_in=n_in,
            n_out=n_hidden,
            activation=T.tanh
        )

        self.outputLayer = HiddenLayer(
            rng=rng,
            input=self.hiddenLayer.output,
            n_in=n_hidden,
            n_out=n_out,
            activation=T.tanh
        )

        # end-snippet-2 start-snippet-3
        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = (
            abs(self.hiddenLayer.W).sum()
            + abs(self.outputLayer.W).sum()
        )

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (
            T.sum(self.hiddenLayer.W ** 2)
            + T.sum(self.outputLayer.W ** 2)
        )

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.output = (
            self.outputLayer.output
        )

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = self.hiddenLayer.params + self.outputLayer.params
        # end-snippet-3

        # keep track of model input
        self.input = input

    def error(self, y):
        return T.sqr(y - self.output)


def test_reg(
        learning_rate=0.1,
        L1_reg=0.00,
        L2_reg=0.01,
        n_epochs=80,
        data_generator=get_data,
        batch_size=20,
        n_hidden=100):

    query_list = numpy.linspace(0, 20, 120)
    query_list = numpy.random.permutation(query_list)

    train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y 
= data_generator(query_list)

    index = T.lscalar()
    x = T.dscalar('query')
    y = T.dscalar('out')

    rng = numpy.random.RandomState(1234)

    regression = MLP(
        rng=rng,
        input=x,
        n_in=1,
        n_hidden=n_hidden,
        n_out=1
    )

    cost = (
        T.mean(regression.error(y))
        + L2_reg * regression.L2_sqr
    )

    test_model = theano.function(
        inputs=[index],
        outputs=[regression.error(y), regression.output],
        givens={
            x: test_set_x[index],
            y: test_set_y[index]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=regression.error(y),
        givens={
            x: valid_set_x[index],
            y: valid_set_y[index]
        }
    )

    gparams = [T.grad(cost, param) for param in regression.params]

    updates = [
        (param, param - learning_rate * gparam) for param, gparam in 
zip(regression.params, gparams)
    ]

    train_model = theano.function(
        inputs=[index],
        outputs=[cost, regression.output],
        updates=updates,
        givens={
            x: train_set_x[index],
            y: train_set_y[index]
        }
    )

    best_validation_error = numpy.inf
    improvement_threshold = 0.999
    epoch = 0
    test_score = 0
    done_looping = False
    validation_frequency = 30

    while (epoch < n_epochs) and (not done_looping):

        epoch = epoch + 1
        for index in range(len(train_set_x.container.data)):
            cost, output = train_model(index)
        validation_error = 0
        for valid in range(len(valid_set_x.container.data)):
            validation_error += validate_model(valid)
        print('epoch = ' + str(epoch) + ' validation error = ' + 
str(validation_error))

        if validation_error < best_validation_error:
            if validation_error < best_validation_error:
                best_validation_error = validation_error
            #else:
            #    done_looping = True

    # Test on test set
    test_error = list()
    output = list()
    for test in range(len(test_set_x.container.data)):
        error, val = test_model(test)
        test_error.append(error)
        output.append(val[0][0])
    print('test error = ' + str(sum(test_error)))

    # Plot the data
    plt.title("data")
    plt.scatter(train_set_x.container.data, train_set_y.container.data, c='b', 
hold=True)
    # plt.scatter([pt[0] for pt in validation], [pt[1] for pt in validation], 
c='g', hold=True)
    # plt.scatter([pt[0] for pt in testing], [pt[1] for pt in testing], c='r', 
hold=True)
    plt.scatter(test_set_x.container.data, test_set_y.container.data, c='r')
    plt.scatter(test_set_x.container.data, output, c='g')
    plt.show()

if __name__ == "__main__":
    test_reg()



-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

[theano-users] Simple regression with NN

Reply via email to