Added: hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java?rev=1513873&view=auto ============================================================================== --- hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java (added) +++ hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java Wed Aug 14 13:27:18 2013 @@ -0,0 +1,542 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hama.ml.ann; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hama.ml.ann.AbstractLayeredNeuralNetwork.TrainingMethod; +import org.apache.hama.ml.math.DenseDoubleMatrix; +import org.apache.hama.ml.math.DenseDoubleVector; +import org.apache.hama.ml.math.DoubleMatrix; +import org.apache.hama.ml.math.DoubleVector; +import org.apache.hama.ml.math.FunctionFactory; +import org.apache.hama.ml.writable.VectorWritable; +import org.junit.Test; +import org.mortbay.log.Log; + +/** + * Test the functionality of SmallLayeredNeuralNetwork. + * + */ +public class TestSmallLayeredNeuralNetwork { + + @Test + public void testReadWrite() { + SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork(); + ann.addLayer(2, false, + FunctionFactory.createDoubleFunction("IdentityFunction")); + ann.addLayer(5, false, + FunctionFactory.createDoubleFunction("IdentityFunction")); + ann.addLayer(1, true, + FunctionFactory.createDoubleFunction("IdentityFunction")); + ann.setCostFunction(FunctionFactory + .createDoubleDoubleFunction("SquaredError")); + double learningRate = 0.2; + ann.setLearningRate(learningRate); + double momentumWeight = 0.5; + ann.setMomemtumWeight(momentumWeight); + double regularizationWeight = 0.05; + ann.setRegularizationWeight(regularizationWeight); + // intentionally initialize all weights to 0.5 + DoubleMatrix[] matrices = new DenseDoubleMatrix[2]; + matrices[0] = new DenseDoubleMatrix(5, 3, 0.2); + matrices[1] = new DenseDoubleMatrix(1, 6, 0.8); + ann.setWeightMatrices(matrices); + + // write to file + String modelPath = "/tmp/testSmallLayeredNeuralNetworkReadWrite"; + ann.setModelPath(modelPath); + try { + ann.writeModelToFile(); + } catch (IOException e) { + e.printStackTrace(); + } + + // read from file + SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath); + assertEquals(annCopy.getClass().getSimpleName(), annCopy.getModelType()); + assertEquals(modelPath, annCopy.getModelPath()); + assertEquals(learningRate, annCopy.getLearningRate(), 0.000001); + assertEquals(momentumWeight, annCopy.getMomemtumWeight(), 0.000001); + assertEquals(regularizationWeight, annCopy.getRegularizationWeight(), + 0.000001); + assertEquals(TrainingMethod.GRADIATE_DESCENT, annCopy.getTrainingMethod()); + + // compare weights + DoubleMatrix[] weightsMatrices = annCopy.getWeightMatrices(); + for (int i = 0; i < weightsMatrices.length; ++i) { + DoubleMatrix expectMat = matrices[i]; + DoubleMatrix actualMat = weightsMatrices[i]; + for (int j = 0; j < expectMat.getRowCount(); ++j) { + for (int k = 0; k < expectMat.getColumnCount(); ++k) { + assertEquals(expectMat.get(j, k), actualMat.get(j, k), 0.000001); + } + } + } + } + + @Test + /** + * Test the forward functionality. + */ + public void testOutput() { + // first network + SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork(); + ann.addLayer(2, false, + FunctionFactory.createDoubleFunction("IdentityFunction")); + ann.addLayer(5, false, + FunctionFactory.createDoubleFunction("IdentityFunction")); + ann.addLayer(1, true, + FunctionFactory.createDoubleFunction("IdentityFunction")); + ann.setCostFunction(FunctionFactory + .createDoubleDoubleFunction("SquaredError")); + ann.setLearningRate(0.1); + // intentionally initialize all weights to 0.5 + DoubleMatrix[] matrices = new DenseDoubleMatrix[2]; + matrices[0] = new DenseDoubleMatrix(5, 3, 0.5); + matrices[1] = new DenseDoubleMatrix(1, 6, 0.5); + ann.setWeightMatrices(matrices); + + double[] arr = new double[] { 0, 1 }; + DoubleVector training = new DenseDoubleVector(arr); + DoubleVector result = ann.getOutput(training); + assertEquals(1, result.getDimension()); + // assertEquals(3, result.get(0), 0.000001); + + // second network + SmallLayeredNeuralNetwork ann2 = new SmallLayeredNeuralNetwork(); + ann2.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid")); + ann2.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid")); + ann2.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid")); + ann2.setCostFunction(FunctionFactory + .createDoubleDoubleFunction("SquaredError")); + ann2.setLearningRate(0.3); + // intentionally initialize all weights to 0.5 + DoubleMatrix[] matrices2 = new DenseDoubleMatrix[2]; + matrices2[0] = new DenseDoubleMatrix(3, 3, 0.5); + matrices2[1] = new DenseDoubleMatrix(1, 4, 0.5); + ann2.setWeightMatrices(matrices2); + + double[] test = { 0, 0 }; + double[] result2 = { 0.807476 }; + + DoubleVector vec = ann2.getOutput(new DenseDoubleVector(test)); + assertArrayEquals(result2, vec.toArray(), 0.000001); + + SmallLayeredNeuralNetwork ann3 = new SmallLayeredNeuralNetwork(); + ann3.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid")); + ann3.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid")); + ann3.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid")); + ann3.setCostFunction(FunctionFactory + .createDoubleDoubleFunction("SquaredError")); + ann3.setLearningRate(0.3); + // intentionally initialize all weights to 0.5 + DoubleMatrix[] initMatrices = new DenseDoubleMatrix[2]; + initMatrices[0] = new DenseDoubleMatrix(3, 3, 0.5); + initMatrices[1] = new DenseDoubleMatrix(1, 4, 0.5); + ann3.setWeightMatrices(initMatrices); + + double[] instance = { 0, 1 }; + DoubleVector output = ann3.getOutput(new DenseDoubleVector(instance)); + assertEquals(0.8315410, output.get(0), 0.000001); + } + + @Test + public void testXORlocal() { + SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork(); + ann.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.setCostFunction(FunctionFactory + .createDoubleDoubleFunction("SquaredError")); + ann.setLearningRate(0.5); + ann.setMomemtumWeight(0.0); + + int iterations = 50000; // iteration should be set to a very large number + double[][] instances = { { 0, 1, 1 }, { 0, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 } }; + for (int i = 0; i < iterations; ++i) { + DoubleMatrix[] matrices = null; + for (int j = 0; j < instances.length; ++j) { + matrices = ann.trainByInstance(new DenseDoubleVector(instances[j + % instances.length])); + ann.updateWeightMatrices(matrices); + } + } + + for (int i = 0; i < instances.length; ++i) { + DoubleVector input = new DenseDoubleVector(instances[i]).slice(2); + // the expected output is the last element in array + double result = instances[i][2]; + assertEquals(result, ann.getOutput(input).get(0), 0.1); + } + + // write model into file and read out + String modelPath = "/tmp/testSmallLayeredNeuralNetworkXORLocal"; + ann.setModelPath(modelPath); + try { + ann.writeModelToFile(); + } catch (IOException e) { + e.printStackTrace(); + } + SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath); + // test on instances + for (int i = 0; i < instances.length; ++i) { + DoubleVector input = new DenseDoubleVector(instances[i]).slice(2); + // the expected output is the last element in array + double result = instances[i][2]; + assertEquals(result, annCopy.getOutput(input).get(0), 0.1); + } + } + + @Test + public void testXORWithMomentum() { + SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork(); + ann.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.setCostFunction(FunctionFactory + .createDoubleDoubleFunction("SquaredError")); + ann.setLearningRate(0.6); + ann.setMomemtumWeight(0.3); + + int iterations = 2000; // iteration should be set to a very large number + double[][] instances = { { 0, 1, 1 }, { 0, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 } }; + for (int i = 0; i < iterations; ++i) { + for (int j = 0; j < instances.length; ++j) { + ann.trainOnline(new DenseDoubleVector(instances[j % instances.length])); + } + } + + for (int i = 0; i < instances.length; ++i) { + DoubleVector input = new DenseDoubleVector(instances[i]).slice(2); + // the expected output is the last element in array + double result = instances[i][2]; + assertEquals(result, ann.getOutput(input).get(0), 0.1); + } + + // write model into file and read out + String modelPath = "/tmp/testSmallLayeredNeuralNetworkXORLocalWithMomentum"; + ann.setModelPath(modelPath); + try { + ann.writeModelToFile(); + } catch (IOException e) { + e.printStackTrace(); + } + SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath); + // test on instances + for (int i = 0; i < instances.length; ++i) { + DoubleVector input = new DenseDoubleVector(instances[i]).slice(2); + // the expected output is the last element in array + double result = instances[i][2]; + assertEquals(result, annCopy.getOutput(input).get(0), 0.1); + } + } + + @Test + public void testXORLocalWithRegularization() { + SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork(); + ann.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.setCostFunction(FunctionFactory + .createDoubleDoubleFunction("SquaredError")); + ann.setLearningRate(0.7); + ann.setMomemtumWeight(0.5); + ann.setRegularizationWeight(0.002); + + int iterations = 5000; // iteration should be set to a very large number + double[][] instances = { { 0, 1, 1 }, { 0, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 } }; + for (int i = 0; i < iterations; ++i) { + for (int j = 0; j < instances.length; ++j) { + ann.trainOnline(new DenseDoubleVector(instances[j % instances.length])); + } + } + + for (int i = 0; i < instances.length; ++i) { + DoubleVector input = new DenseDoubleVector(instances[i]).slice(2); + // the expected output is the last element in array + double result = instances[i][2]; + assertEquals(result, ann.getOutput(input).get(0), 0.05); + } + + // write model into file and read out + String modelPath = "/tmp/testSmallLayeredNeuralNetworkXORLocalWithRegularization"; + ann.setModelPath(modelPath); + try { + ann.writeModelToFile(); + } catch (IOException e) { + e.printStackTrace(); + } + SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath); + // test on instances + for (int i = 0; i < instances.length; ++i) { + DoubleVector input = new DenseDoubleVector(instances[i]).slice(2); + // the expected output is the last element in array + double result = instances[i][2]; + assertEquals(result, annCopy.getOutput(input).get(0), 0.05); + } + } + + @Test + public void testTwoClassClassification() { + // use logistic regression data + String filepath = "src/test/resources/logistic_regression_data.txt"; + List<double[]> instanceList = new ArrayList<double[]>(); + + try { + BufferedReader br = new BufferedReader(new FileReader(filepath)); + String line = null; + while ((line = br.readLine()) != null) { + String[] tokens = line.trim().split(","); + double[] instance = new double[tokens.length]; + for (int i = 0; i < tokens.length; ++i) { + instance[i] = Double.parseDouble(tokens[i]); + } + instanceList.add(instance); + } + br.close(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + int dimension = instanceList.get(0).length - 1; + + // min-max normalization + double[] mins = new double[dimension]; + double[] maxs = new double[dimension]; + Arrays.fill(mins, Double.MAX_VALUE); + Arrays.fill(maxs, Double.MIN_VALUE); + + for (double[] instance : instanceList) { + for (int i = 0; i < instance.length - 1; ++i) { + if (mins[i] > instance[i]) { + mins[i] = instance[i]; + } + if (maxs[i] < instance[i]) { + maxs[i] = instance[i]; + } + } + } + + for (double[] instance : instanceList) { + for (int i = 0; i < instance.length - 1; ++i) { + double range = maxs[i] - mins[i]; + if (range != 0) { + instance[i] = (instance[i] - mins[i]) / range; + } + } + } + + // divide dataset into training and testing + List<double[]> testInstances = new ArrayList<double[]>(); + testInstances.addAll(instanceList.subList(instanceList.size() - 100, + instanceList.size())); + List<double[]> trainingInstances = instanceList.subList(0, + instanceList.size() - 100); + + SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork(); + ann.setLearningRate(0.001); + ann.setMomemtumWeight(0.1); + ann.setRegularizationWeight(0.01); + ann.addLayer(dimension, false, + FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(dimension, false, + FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(dimension, false, + FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.setCostFunction(FunctionFactory + .createDoubleDoubleFunction("CrossEntropy")); + + long start = new Date().getTime(); + int iterations = 1000; + for (int i = 0; i < iterations; ++i) { + for (double[] trainingInstance : trainingInstances) { + ann.trainOnline(new DenseDoubleVector(trainingInstance)); + } + } + long end = new Date().getTime(); + Log.info(String.format("Training time: %fs\n", + (double) (end - start) / 1000)); + + double errorRate = 0; + // calculate the error on test instance + for (double[] testInstance : testInstances) { + DoubleVector instance = new DenseDoubleVector(testInstance); + double expected = instance.get(instance.getDimension() - 1); + instance = instance.slice(instance.getDimension() - 1); + double actual = ann.getOutput(instance).get(0); + if (actual < 0.5 && expected >= 0.5 || actual >= 0.5 && expected < 0.5) { + ++errorRate; + } + } + errorRate /= testInstances.size(); + + Log.info(String.format("Relative error: %f%%\n", errorRate * 100)); + } + + @Test + public void testDistributedVersion() { + // write data into a sequence file + String tmpStrDatasetPath = "/tmp/logistic_regression_data"; + Path tmpDatasetPath = new Path(tmpStrDatasetPath); + String strDataPath = "src/test/resources/logistic_regression_data.txt"; + String modelPath = "/tmp/distributed-model"; + + Configuration conf = new Configuration(); + List<double[]> instanceList = new ArrayList<double[]>(); + List<double[]> trainingInstances = null; + List<double[]> testInstances = null; + + try { + FileSystem fs = FileSystem.get(new URI(tmpStrDatasetPath), conf); + fs.delete(tmpDatasetPath, true); + if (fs.exists(tmpDatasetPath)) { + fs.createNewFile(tmpDatasetPath); + } + + BufferedReader br = new BufferedReader(new FileReader(strDataPath)); + String line = null; + int count = 0; + while ((line = br.readLine()) != null) { + String[] tokens = line.trim().split(","); + double[] instance = new double[tokens.length]; + for (int i = 0; i < tokens.length; ++i) { + instance[i] = Double.parseDouble(tokens[i]); + } + instanceList.add(instance); + } + br.close(); + + int dimension = instanceList.get(0).length - 1; + // min-max normalization + double[] mins = new double[dimension]; + double[] maxs = new double[dimension]; + Arrays.fill(mins, Double.MAX_VALUE); + Arrays.fill(maxs, Double.MIN_VALUE); + + for (double[] instance : instanceList) { + for (int i = 0; i < instance.length - 1; ++i) { + mins[i] = Math.min(mins[i], instance[i]); + maxs[i] = Math.max(maxs[i], instance[i]); + } + } + + for (double[] instance : instanceList) { + for (int i = 0; i < instance.length - 1; ++i) { + double range = maxs[i] - mins[i]; + if (range != 0) { + instance[i] = (instance[i] - mins[i]) / range; + } + } + } + + // write training data to temporal sequence file + SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, + tmpDatasetPath, LongWritable.class, VectorWritable.class); + int testSize = 150; + + Collections.shuffle(instanceList); + testInstances = new ArrayList<double[]>(); + testInstances.addAll(instanceList.subList(instanceList.size() - testSize, + instanceList.size())); + trainingInstances = instanceList.subList(0, instanceList.size() + - testSize); + + for (double[] instance : trainingInstances) { + DoubleVector vec = new DenseDoubleVector(instance); + writer.append(new LongWritable(count++), new VectorWritable(vec)); + } + writer.close(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + + // create model + int dimension = 8; + SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork(); + ann.setLearningRate(0.7); + ann.setMomemtumWeight(0.5); + ann.setRegularizationWeight(0.1); + ann.addLayer(dimension, false, + FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(dimension, false, + FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(dimension, false, + FunctionFactory.createDoubleFunction("Sigmoid")); + ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid")); + ann.setCostFunction(FunctionFactory + .createDoubleDoubleFunction("CrossEntropy")); + ann.setModelPath(modelPath); + + long start = new Date().getTime(); + Map<String, String> trainingParameters = new HashMap<String, String>(); + trainingParameters.put("tasks", "5"); + trainingParameters.put("training.max.iterations", "2000"); + trainingParameters.put("training.batch.size", "300"); + trainingParameters.put("convergence.check.interval", "1000"); + ann.train(tmpDatasetPath, trainingParameters); + + long end = new Date().getTime(); + + // validate results + double errorRate = 0; + // calculate the error on test instance + for (double[] testInstance : testInstances) { + DoubleVector instance = new DenseDoubleVector(testInstance); + double expected = instance.get(instance.getDimension() - 1); + instance = instance.slice(instance.getDimension() - 1); + double actual = ann.getOutput(instance).get(0); + if (actual < 0.5 && expected >= 0.5 || actual >= 0.5 && expected < 0.5) { + ++errorRate; + } + } + errorRate /= testInstances.size(); + + Log.info(String.format("Training time: %fs\n", + (double) (end - start) / 1000)); + Log.info(String.format("Relative error: %f%%\n", errorRate * 100)); + } + +}
Added: hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java?rev=1513873&view=auto ============================================================================== --- hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java (added) +++ hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java Wed Aug 14 13:27:18 2013 @@ -0,0 +1,172 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hama.ml.ann; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hama.ml.math.DenseDoubleMatrix; +import org.apache.hama.ml.math.DoubleMatrix; +import org.junit.Test; + +/** + * Test the functionalities of SmallLayeredNeuralNetworkMessage. + * + */ +public class TestSmallLayeredNeuralNetworkMessage { + + @Test + public void testReadWriteWithoutPrev() { + double error = 0.22; + double[][] matrix1 = new double[][] { { 0.1, 0.2, 0.8, 0.5 }, + { 0.3, 0.4, 0.6, 0.2 }, { 0.5, 0.6, 0.1, 0.5 } }; + double[][] matrix2 = new double[][] { { 0.8, 1.2, 0.5 } }; + DoubleMatrix[] matrices = new DoubleMatrix[2]; + matrices[0] = new DenseDoubleMatrix(matrix1); + matrices[1] = new DenseDoubleMatrix(matrix2); + + boolean isConverge = false; + + SmallLayeredNeuralNetworkMessage message = new SmallLayeredNeuralNetworkMessage( + error, isConverge, matrices, null); + Configuration conf = new Configuration(); + String strPath = "/tmp/testReadWriteSmallLayeredNeuralNetworkMessage"; + Path path = new Path(strPath); + try { + FileSystem fs = FileSystem.get(new URI(strPath), conf); + FSDataOutputStream out = fs.create(path); + message.write(out); + out.close(); + + FSDataInputStream in = fs.open(path); + SmallLayeredNeuralNetworkMessage readMessage = new SmallLayeredNeuralNetworkMessage( + 0, isConverge, null, null); + readMessage.readFields(in); + in.close(); + assertEquals(error, readMessage.getTrainingError(), 0.000001); + assertFalse(readMessage.isConverge()); + DoubleMatrix[] readMatrices = readMessage.getCurMatrices(); + assertEquals(2, readMatrices.length); + for (int i = 0; i < readMatrices.length; ++i) { + double[][] doubleMatrices = ((DenseDoubleMatrix) readMatrices[i]) + .getValues(); + double[][] doubleExpected = ((DenseDoubleMatrix) matrices[i]) + .getValues(); + for (int r = 0; r < doubleMatrices.length; ++r) { + assertArrayEquals(doubleExpected[r], doubleMatrices[r], 0.000001); + } + } + + DoubleMatrix[] readPrevMatrices = readMessage.getPrevMatrices(); + assertNull(readPrevMatrices); + + // delete + fs.delete(path, true); + } catch (IOException e) { + e.printStackTrace(); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + } + + @Test + public void testReadWriteWithPrev() { + double error = 0.22; + boolean isConverge = true; + + double[][] matrix1 = new double[][] { { 0.1, 0.2, 0.8, 0.5 }, + { 0.3, 0.4, 0.6, 0.2 }, { 0.5, 0.6, 0.1, 0.5 } }; + double[][] matrix2 = new double[][] { { 0.8, 1.2, 0.5 } }; + DoubleMatrix[] matrices = new DoubleMatrix[2]; + matrices[0] = new DenseDoubleMatrix(matrix1); + matrices[1] = new DenseDoubleMatrix(matrix2); + + double[][] prevMatrix1 = new double[][] { { 0.1, 0.1, 0.2, 0.3 }, + { 0.2, 0.4, 0.1, 0.5 }, { 0.5, 0.1, 0.5, 0.2 } }; + double[][] prevMatrix2 = new double[][] { { 0.1, 0.2, 0.5, 0.9 }, + { 0.3, 0.5, 0.2, 0.6 }, { 0.6, 0.8, 0.7, 0.5 } }; + + DoubleMatrix[] prevMatrices = new DoubleMatrix[2]; + prevMatrices[0] = new DenseDoubleMatrix(prevMatrix1); + prevMatrices[1] = new DenseDoubleMatrix(prevMatrix2); + + SmallLayeredNeuralNetworkMessage message = new SmallLayeredNeuralNetworkMessage( + error, isConverge, matrices, prevMatrices); + Configuration conf = new Configuration(); + String strPath = "/tmp/testReadWriteSmallLayeredNeuralNetworkMessageWithPrev"; + Path path = new Path(strPath); + try { + FileSystem fs = FileSystem.get(new URI(strPath), conf); + FSDataOutputStream out = fs.create(path); + message.write(out); + out.close(); + + FSDataInputStream in = fs.open(path); + SmallLayeredNeuralNetworkMessage readMessage = new SmallLayeredNeuralNetworkMessage( + 0, isConverge, null, null); + readMessage.readFields(in); + in.close(); + + assertTrue(readMessage.isConverge()); + + DoubleMatrix[] readMatrices = readMessage.getCurMatrices(); + assertEquals(2, readMatrices.length); + for (int i = 0; i < readMatrices.length; ++i) { + double[][] doubleMatrices = ((DenseDoubleMatrix) readMatrices[i]) + .getValues(); + double[][] doubleExpected = ((DenseDoubleMatrix) matrices[i]) + .getValues(); + for (int r = 0; r < doubleMatrices.length; ++r) { + assertArrayEquals(doubleExpected[r], doubleMatrices[r], 0.000001); + } + } + + DoubleMatrix[] readPrevMatrices = readMessage.getPrevMatrices(); + assertEquals(2, readPrevMatrices.length); + for (int i = 0; i < readPrevMatrices.length; ++i) { + double[][] doubleMatrices = ((DenseDoubleMatrix) readPrevMatrices[i]) + .getValues(); + double[][] doubleExpected = ((DenseDoubleMatrix) prevMatrices[i]) + .getValues(); + for (int r = 0; r < doubleMatrices.length; ++r) { + assertArrayEquals(doubleExpected[r], doubleMatrices[r], 0.000001); + } + } + + // delete + fs.delete(path, true); + } catch (IOException e) { + e.printStackTrace(); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + } + +} Modified: hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java?rev=1513873&r1=1513872&r2=1513873&view=diff ============================================================================== --- hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java (original) +++ hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java Wed Aug 14 13:27:18 2013 @@ -40,7 +40,6 @@ import org.apache.hama.ml.math.DoubleMat import org.apache.hama.ml.math.DoubleVector; import org.apache.hama.ml.writable.MatrixWritable; import org.apache.hama.ml.writable.VectorWritable; -import org.junit.Ignore; import org.junit.Test; public class TestSmallMultiLayerPerceptron { @@ -305,10 +304,9 @@ public class TestSmallMultiLayerPerceptr e.printStackTrace(); } } - + /** - * Test training with momentum. - * The MLP can converge faster. + * Test training with momentum. The MLP can converge faster. */ @Test public void testWithMomentum() { @@ -359,7 +357,6 @@ public class TestSmallMultiLayerPerceptr * Test the XOR problem. */ @Test - @Ignore public void testTrainingByXOR() { // write in some training instances Configuration conf = new Configuration(); Added: hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLinearRegression.java URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLinearRegression.java?rev=1513873&view=auto ============================================================================== --- hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLinearRegression.java (added) +++ hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLinearRegression.java Wed Aug 14 13:27:18 2013 @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hama.ml.regression; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hama.ml.math.DenseDoubleVector; +import org.apache.hama.ml.math.DoubleVector; +import org.junit.Test; +import org.mortbay.log.Log; + +/** + * Test the functionalities of the linear regression model. + * + */ +public class TestLinearRegression { + + @Test + public void testLinearRegressionSimple() { + // y = 2.1 * x_1 + 0.7 * x_2 * 0.1 * x_3 + double[][] instances = { { 1, 1, 1, 2.9 }, { 5, 2, 3, 12.2 }, + { 2, 5, 8, 8.5 }, { 0.5, 0.1, 0.2, 1.14 }, { 10, 20, 30, 38 }, + { 0.6, 20, 5, 16.76 } }; + + LinearRegression regression = new LinearRegression(instances[0].length - 1); + regression.setLearningRate(0.001); + regression.setMomemtumWeight(0.1); + + int iterations = 100; + for (int i = 0; i < iterations; ++i) { + for (int j = 0; j < instances.length; ++j) { + regression.trainOnline(new DenseDoubleVector(instances[j])); + } + } + + double relativeError = 0; + for (int i = 0; i < instances.length; ++i) { + DoubleVector test = new DenseDoubleVector(instances[i]); + double expected = test.get(test.getDimension() - 1); + test = test.slice(test.getDimension() - 1); + double actual = regression.getOutput(test).get(0); + relativeError += Math.abs((expected - actual) / expected); + } + + relativeError /= instances.length; + Log.info(String.format("Relative error %f%%\n", relativeError)); + } + + @Test + public void testLinearRegressionOnlineTraining() { + // read linear regression data + String filepath = "src/test/resources/linear_regression_data.txt"; + List<double[]> instanceList = new ArrayList<double[]>(); + + try { + BufferedReader br = new BufferedReader(new FileReader(filepath)); + String line = null; + while ((line = br.readLine()) != null) { + if (line.startsWith("#")) { // ignore comments + continue; + } + String[] tokens = line.trim().split(" "); + double[] instance = new double[tokens.length]; + for (int i = 0; i < tokens.length; ++i) { + instance[i] = Double.parseDouble(tokens[i]); + } + instanceList.add(instance); + } + br.close(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + // divide dataset into training and testing + List<double[]> testInstances = new ArrayList<double[]>(); + testInstances.addAll(instanceList.subList(instanceList.size() - 20, + instanceList.size())); + List<double[]> trainingInstances = instanceList.subList(0, + instanceList.size() - 20); + + int dimension = instanceList.get(0).length - 1; + + LinearRegression regression = new LinearRegression(dimension); + regression.setLearningRate(0.00000005); + regression.setMomemtumWeight(0.1); + regression.setRegularizationWeight(0.05); + int iterations = 2000; + for (int i = 0; i < iterations; ++i) { + for (double[] trainingInstance : trainingInstances) { + regression.trainOnline(new DenseDoubleVector(trainingInstance)); + } + } + + double relativeError = 0.0; + // calculate the error on test instance + for (double[] testInstance : testInstances) { + DoubleVector instance = new DenseDoubleVector(testInstance); + double expected = instance.get(instance.getDimension() - 1); + instance = instance.slice(instance.getDimension() - 1); + double actual = regression.getOutput(instance).get(0); + if (expected == 0) { + expected = 0.0000001; + } + relativeError += Math.abs((expected - actual) / expected); + } + relativeError /= testInstances.size(); + + Log.info(String.format("Relative error: %f%%\n", relativeError * 100)); + } + +} Added: hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLogisticRegression.java URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLogisticRegression.java?rev=1513873&view=auto ============================================================================== --- hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLogisticRegression.java (added) +++ hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLogisticRegression.java Wed Aug 14 13:27:18 2013 @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hama.ml.regression; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hama.ml.math.DenseDoubleVector; +import org.apache.hama.ml.math.DoubleVector; +import org.junit.Test; +import org.mortbay.log.Log; + +/** + * Test the functionalities of LogisticRegression. + * + */ +public class TestLogisticRegression { + + @Test + public void testLogisticRegressionLocal() { + // read logistic regression data + String filepath = "src/test/resources/logistic_regression_data.txt"; + List<double[]> instanceList = new ArrayList<double[]>(); + + try { + BufferedReader br = new BufferedReader(new FileReader(filepath)); + String line = null; + while ((line = br.readLine()) != null) { + if (line.startsWith("#")) { // ignore comments + continue; + } + String[] tokens = line.trim().split(","); + double[] instance = new double[tokens.length]; + for (int i = 0; i < tokens.length; ++i) { + instance[i] = Double.parseDouble(tokens[i]); + } + instanceList.add(instance); + } + br.close(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + int dimension = instanceList.get(0).length - 1; + + // min-max normalization + double[] mins = new double[dimension]; + double[] maxs = new double[dimension]; + Arrays.fill(mins, Double.MAX_VALUE); + Arrays.fill(maxs, Double.MIN_VALUE); + + for (double[] instance : instanceList) { + for (int i = 0; i < instance.length - 1; ++i) { + if (mins[i] > instance[i]) { + mins[i] = instance[i]; + } + if (maxs[i] < instance[i]) { + maxs[i] = instance[i]; + } + } + } + + for (double[] instance : instanceList) { + for (int i = 0; i < instance.length - 1; ++i) { + double range = maxs[i] - mins[i]; + if (range != 0) { + instance[i] = (instance[i] - mins[i]) / range; + } + } + } + + // divide dataset into training and testing + List<double[]> testInstances = new ArrayList<double[]>(); + testInstances.addAll(instanceList.subList(instanceList.size() - 100, + instanceList.size())); + List<double[]> trainingInstances = instanceList.subList(0, + instanceList.size() - 100); + + LogisticRegression regression = new LogisticRegression(dimension); + regression.setLearningRate(0.2); + regression.setMomemtumWeight(0.1); + regression.setRegularizationWeight(0.1); + int iterations = 1000; + for (int i = 0; i < iterations; ++i) { + for (double[] trainingInstance : trainingInstances) { + regression.trainOnline(new DenseDoubleVector(trainingInstance)); + } + } + + double errorRate = 0; + // calculate the error on test instance + for (double[] testInstance : testInstances) { + DoubleVector instance = new DenseDoubleVector(testInstance); + double expected = instance.get(instance.getDimension() - 1); + DoubleVector features = instance.slice(instance.getDimension() - 1); + double actual = regression.getOutput(features).get(0); + if (actual < 0.5 && expected >= 0.5 || actual >= 0.5 && expected < 0.5) { + ++errorRate; + } + + } + errorRate /= testInstances.size(); + + Log.info(String.format("Relative error: %f%%\n", errorRate * 100)); + } + +}
