IGNITE-7214: performance measurement for FCM and KNN algorithms this closes #3314
Project: http://git-wip-us.apache.org/repos/asf/ignite/repo Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/a3b83246 Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/a3b83246 Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/a3b83246 Branch: refs/heads/master Commit: a3b83246714be990425337522c9fe03fcffbe1a2 Parents: 6efc4d9 Author: Oleg Ignatenko <[email protected]> Authored: Fri Dec 29 18:45:20 2017 +0300 Committer: Yury Babak <[email protected]> Committed: Fri Dec 29 18:45:20 2017 +0300 ---------------------------------------------------------------------- .../ml/clustering/FuzzyCMeansExample.java | 113 ++--- .../ml/clustering/FuzzyCMeansLocalExample.java | 95 ++++ .../KMeansDistributedClustererExample.java | 2 + .../KNNClassificationExample.java | 11 +- .../ignite/examples/ml/knn/datasets/README.md | 2 + .../ml/knn/datasets/cleared_machines.txt | 209 +++++++++ .../ignite/examples/ml/knn/datasets/iris.txt | 150 ++++++ .../ml/knn/regression/KNNRegressionExample.java | 18 +- .../src/main/resources/datasets/knn/README.md | 2 - .../resources/datasets/knn/cleared_machines.txt | 209 --------- .../src/main/resources/datasets/knn/iris.txt | 150 ------ .../ignite/ml/structures/LabeledDataset.java | 4 +- .../FuzzyCMeansDistributedClustererTest.java | 11 +- .../FuzzyCMeansLocalClustererTest.java | 25 +- .../yardstick/config/benchmark-ml.properties | 4 + ...uzzyCMeansDistributedClustererBenchmark.java | 130 ++++++ ...gniteFuzzyCMeansLocalClustererBenchmark.java | 93 ++++ .../ignite/yardstick/ml/knn/Datasets.java | 453 +++++++++++++++++++ .../knn/IgniteKNNClassificationBenchmark.java | 73 +++ .../ml/knn/IgniteKNNRegressionBenchmark.java | 82 ++++ .../ignite/yardstick/ml/knn/package-info.java | 22 + parent/pom.xml | 5 + 22 files changed, 1422 insertions(+), 441 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/FuzzyCMeansExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/FuzzyCMeansExample.java b/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/FuzzyCMeansExample.java index 3fce624..23aeed7 100644 --- a/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/FuzzyCMeansExample.java +++ b/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/FuzzyCMeansExample.java @@ -19,6 +19,7 @@ package org.apache.ignite.examples.ml.clustering; import org.apache.ignite.Ignite; import org.apache.ignite.Ignition; +import org.apache.ignite.examples.ExampleNodeStartup; import org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer; import org.apache.ignite.ml.clustering.FuzzyCMeansDistributedClusterer; import org.apache.ignite.ml.clustering.FuzzyCMeansModel; @@ -30,8 +31,14 @@ import org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix; import org.apache.ignite.thread.IgniteThread; /** - * This example shows how to use Fuzzy C-Means clusterer - * ({@link org.apache.ignite.ml.clustering.FuzzyCMeansDistributedClusterer}). + * <p> + * This example shows how to use {@link FuzzyCMeansDistributedClusterer}.</p> + * <p> + * Remote nodes should always be started with special configuration file which + * enables P2P class loading: {@code 'ignite.{sh|bat} examples/config/example-ignite.xml'}.</p> + * <p> + * Alternatively you can run {@link ExampleNodeStartup} in another JVM which will start node + * with {@code examples/config/example-ignite.xml} configuration.</p> */ public final class FuzzyCMeansExample { /** @@ -40,83 +47,85 @@ public final class FuzzyCMeansExample { * @param args Command line arguments, none required. */ public static void main(String[] args) throws InterruptedException { - System.out.println(); System.out.println(">>> Fuzzy C-Means usage example started."); + // Start ignite grid. try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { System.out.println(">>> Ignite grid started."); // Start new Ignite thread. IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), - FuzzyCMeansExample.class.getSimpleName(), - () -> { - - // Distance measure that computes distance between two points. - DistanceMeasure distanceMeasure = new EuclideanDistance(); + FuzzyCMeansExample.class.getSimpleName(), + () -> { + // Distance measure that computes distance between two points. + DistanceMeasure distanceMeasure = new EuclideanDistance(); - // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means). - double exponentialWeight = 2.0; + // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means). + double exponentialWeight = 2.0; - // Condition that indicated when algorithm must stop. - // In this example algorithm stops if memberships have changed insignificantly. - BaseFuzzyCMeansClusterer.StopCondition stopCond = + // Condition that indicated when algorithm must stop. + // In this example algorithm stops if memberships have changed insignificantly. + BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS; - // Maximum difference between new and old membership values with which algorithm will continue to work. - double maxDelta = 0.01; + // Maximum difference between new and old membership values with which algorithm will continue to work. + double maxDelta = 0.01; - // The maximum number of FCM iterations. - int maxIterations = 50; + // The maximum number of FCM iterations. + int maxIterations = 50; - // Value that is used to initialize random numbers generator. You can choose it randomly. - Long seed = null; + // Value that is used to initialize random numbers generator. You can choose it randomly. + Long seed = null; - // Number of steps of primary centers selection (more steps more candidates). - int initializationSteps = 2; + // Number of steps of primary centers selection (more steps more candidates). + int initializationSteps = 2; - // Number of K-Means iteration that is used to choose required number of primary centers from candidates. - int kMeansMaxIterations = 50; + // Number of K-Means iteration that is used to choose required number of primary centers from candidates. + int kMeansMaxIterations = 50; - // Create new distributed clusterer with parameters described above. - System.out.println(">>> Create new Distributed Fuzzy C-Means clusterer."); - FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer( + // Create new distributed clusterer with parameters described above. + System.out.println(">>> Create new Distributed Fuzzy C-Means clusterer."); + FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer( distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, seed, initializationSteps, kMeansMaxIterations); - // Create sample data. - double[][] points = new double[][]{{-10, -10}, {-9, -11}, {-10, -9}, {-11, -9}, - {10, 10}, {9, 11}, {10, 9}, {11, 9}, - {-10, 10}, {-9, 11}, {-10, 9}, {-11, 9}, - {10, -10}, {9, -11}, {10, -9}, {11, -9}}; + // Create sample data. + double[][] points = new double[][] { + {-10, -10}, {-9, -11}, {-10, -9}, {-11, -9}, + {10, 10}, {9, 11}, {10, 9}, {11, 9}, + {-10, 10}, {-9, 11}, {-10, 9}, {-11, 9}, + {10, -10}, {9, -11}, {10, -9}, {11, -9}}; - // Initialize matrix of data points. Each row contains one point. - int rows = points.length; - int cols = points[0].length; + // Initialize matrix of data points. Each row contains one point. + int rows = points.length; + int cols = points[0].length; - System.out.println(">>> Create the matrix that contains sample points."); - SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, + System.out.println(">>> Create the matrix that contains sample points."); + SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE); - // Store points into matrix. - pntMatrix.assign(points); + // Store points into matrix. + pntMatrix.assign(points); + + // Call clusterization method with some number of centers. + // It returns model that can predict results for new points. + System.out.println(">>> Perform clusterization."); + int numCenters = 4; + FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters); - // Call clusterization method with some number of centers. - // It returns model that can predict results for new points. - System.out.println(">>> Perform clusterization."); - int numCenters = 4; - FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters); + // You can also get centers of clusters that is computed by Fuzzy C-Means algorithm. + Vector[] centers = mdl.centers(); - // You can also get centers of clusters that is computed by Fuzzy C-Means algorithm. - Vector[] centers = mdl.centers(); + String res = ">>> Results:\n" + + ">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n" + + ">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n" + + ">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n" + + ">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n"; - StringBuilder results = new StringBuilder(">>> Results:\n"); - results.append(">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n"); - results.append(">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n"); - results.append(">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n"); - results.append(">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n"); + System.out.println(res); - System.out.println(results.toString()); - }); + pntMatrix.destroy(); + }); igniteThread.start(); igniteThread.join(); http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/FuzzyCMeansLocalExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/FuzzyCMeansLocalExample.java b/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/FuzzyCMeansLocalExample.java new file mode 100644 index 0000000..5c1753a --- /dev/null +++ b/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/FuzzyCMeansLocalExample.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.examples.ml.clustering; + +import org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer; +import org.apache.ignite.ml.clustering.FuzzyCMeansLocalClusterer; +import org.apache.ignite.ml.clustering.FuzzyCMeansModel; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.distances.DistanceMeasure; +import org.apache.ignite.ml.math.distances.EuclideanDistance; +import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix; + +/** + * This example shows how to use {@link FuzzyCMeansLocalClusterer}. + */ +public final class FuzzyCMeansLocalExample { + /** + * Executes example. + * + * @param args Command line arguments, none required. + */ + public static void main(String[] args) { + System.out.println(">>> Local Fuzzy C-Means usage example started."); + + // Distance measure that computes distance between two points. + DistanceMeasure distanceMeasure = new EuclideanDistance(); + + // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means). + double exponentialWeight = 2.0; + + // Condition that indicated when algorithm must stop. + // In this example algorithm stops if memberships have changed insignificantly. + BaseFuzzyCMeansClusterer.StopCondition stopCond = + BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS; + + // Maximum difference between new and old membership values with which algorithm will continue to work. + double maxDelta = 0.01; + + // The maximum number of FCM iterations. + int maxIterations = 50; + + // Value that is used to initialize random numbers generator. You can choose it randomly. + Long seed = null; + + // Create new distributed clusterer with parameters described above. + System.out.println(">>> Create new Local Fuzzy C-Means clusterer."); + FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(distanceMeasure, + exponentialWeight, stopCond, + maxDelta, maxIterations, seed); + + // Create sample data. + double[][] points = new double[][] { + {-10, -10}, {-9, -11}, {-10, -9}, {-11, -9}, + {10, 10}, {9, 11}, {10, 9}, {11, 9}, + {-10, 10}, {-9, 11}, {-10, 9}, {-11, 9}, + {10, -10}, {9, -11}, {10, -9}, {11, -9}}; + + // Initialize matrix of data points. Each row contains one point. + System.out.println(">>> Create the matrix that contains sample points."); + // Store points into matrix. + DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points); + + // Call clusterization method with some number of centers. + // It returns model that can predict results for new points. + System.out.println(">>> Perform clusterization."); + int numCenters = 4; + FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters); + + // You can also get centers of clusters that is computed by Fuzzy C-Means algorithm. + Vector[] centers = mdl.centers(); + + String res = ">>> Results:\n" + + ">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n" + + ">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n" + + ">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n" + + ">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n"; + + System.out.println(res); + } +} http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/KMeansDistributedClustererExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/KMeansDistributedClustererExample.java b/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/KMeansDistributedClustererExample.java index 09f35d2..f8709e6 100644 --- a/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/KMeansDistributedClustererExample.java +++ b/examples/src/main/ml/org/apache/ignite/examples/ml/clustering/KMeansDistributedClustererExample.java @@ -53,9 +53,11 @@ public class KMeansDistributedClustererExample { public static void main(String[] args) throws InterruptedException { // IMPL NOTE based on KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure System.out.println(">>> K-means distributed clusterer example started."); + // Start ignite grid. try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { System.out.println(">>> Ignite grid started."); + // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread // because we create ignite cache internally. IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/ml/org/apache/ignite/examples/ml/knn/classification/KNNClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/ml/org/apache/ignite/examples/ml/knn/classification/KNNClassificationExample.java b/examples/src/main/ml/org/apache/ignite/examples/ml/knn/classification/KNNClassificationExample.java index efdacd7..0e1a52f 100644 --- a/examples/src/main/ml/org/apache/ignite/examples/ml/knn/classification/KNNClassificationExample.java +++ b/examples/src/main/ml/org/apache/ignite/examples/ml/knn/classification/KNNClassificationExample.java @@ -19,6 +19,7 @@ package org.apache.ignite.examples.ml.knn.classification; import java.io.IOException; import java.net.URISyntaxException; +import java.net.URL; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; @@ -52,7 +53,7 @@ public class KNNClassificationExample { private static final String SEPARATOR = "\t"; /** Path to the Iris dataset. */ - static final String KNN_IRIS_TXT = "datasets/knn/iris.txt"; + private static final String KNN_IRIS_TXT = "../datasets/iris.txt"; /** * Executes example. @@ -70,7 +71,11 @@ public class KNNClassificationExample { try { // Prepare path to read - Path path = Paths.get(KNNClassificationExample.class.getClassLoader().getResource(KNN_IRIS_TXT).toURI()); + URL url = KNNClassificationExample.class.getResource(KNN_IRIS_TXT); + if (url == null) + throw new RuntimeException("Can't get URL for: " + KNN_IRIS_TXT); + + Path path = Paths.get(url.toURI()); // Read dataset from file LabeledDataset dataset = LabeledDatasetLoader.loadFromTxtFile(path, SEPARATOR, true, false); @@ -135,7 +140,7 @@ public class KNNClassificationExample { } catch (URISyntaxException | IOException e) { e.printStackTrace(); - System.out.println("\n>>> Check resources"); + System.out.println("\n>>> Unexpected exception, check resources: " + e); } finally { System.out.println("\n>>> kNN classification example completed."); http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/README.md ---------------------------------------------------------------------- diff --git a/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/README.md b/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/README.md new file mode 100644 index 0000000..2f9c5ec --- /dev/null +++ b/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/README.md @@ -0,0 +1,2 @@ +iris.txt and cleared_machines are from Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science. +Read more about machine dataset http://archive.ics.uci.edu/ml/machine-learning-databases/cpu-performance/machine.names \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/cleared_machines.txt ---------------------------------------------------------------------- diff --git a/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/cleared_machines.txt b/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/cleared_machines.txt new file mode 100644 index 0000000..cf8b6b0 --- /dev/null +++ b/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/cleared_machines.txt @@ -0,0 +1,209 @@ +199,125,256,6000,256,16,128 +253,29,8000,32000,32,8,32 +253,29,8000,32000,32,8,32 +253,29,8000,32000,32,8,32 +132,29,8000,16000,32,8,16 +290,26,8000,32000,64,8,32 +381,23,16000,32000,64,16,32 +381,23,16000,32000,64,16,32 +749,23,16000,64000,64,16,32 +1238,23,32000,64000,128,32,64 +23,400,1000,3000,0,1,2 +24,400,512,3500,4,1,6 +70,60,2000,8000,65,1,8 +117,50,4000,16000,65,1,8 +15,350,64,64,0,1,4 +64,200,512,16000,0,4,32 +23,167,524,2000,8,4,15 +29,143,512,5000,0,7,32 +22,143,1000,2000,0,5,16 +124,110,5000,5000,142,8,64 +35,143,1500,6300,0,5,32 +39,143,3100,6200,0,5,20 +40,143,2300,6200,0,6,64 +45,110,3100,6200,0,6,64 +28,320,128,6000,0,1,12 +21,320,512,2000,4,1,3 +28,320,256,6000,0,1,6 +22,320,256,3000,4,1,3 +28,320,512,5000,4,1,5 +27,320,256,5000,4,1,6 +102,25,1310,2620,131,12,24 +102,25,1310,2620,131,12,24 +74,50,2620,10480,30,12,24 +74,50,2620,10480,30,12,24 +138,56,5240,20970,30,12,24 +136,64,5240,20970,30,12,24 +23,50,500,2000,8,1,4 +29,50,1000,4000,8,1,5 +44,50,2000,8000,8,1,5 +30,50,1000,4000,8,3,5 +41,50,1000,8000,8,3,5 +74,50,2000,16000,8,3,5 +74,50,2000,16000,8,3,6 +74,50,2000,16000,8,3,6 +54,133,1000,12000,9,3,12 +41,133,1000,8000,9,3,12 +18,810,512,512,8,1,1 +28,810,1000,5000,0,1,1 +36,320,512,8000,4,1,5 +38,200,512,8000,8,1,8 +34,700,384,8000,0,1,1 +19,700,256,2000,0,1,1 +72,140,1000,16000,16,1,3 +36,200,1000,8000,0,1,2 +30,110,1000,4000,16,1,2 +56,110,1000,12000,16,1,2 +42,220,1000,8000,16,1,2 +34,800,256,8000,0,1,4 +34,800,256,8000,0,1,4 +34,800,256,8000,0,1,4 +34,800,256,8000,0,1,4 +34,800,256,8000,0,1,4 +19,125,512,1000,0,8,20 +75,75,2000,8000,64,1,38 +113,75,2000,16000,64,1,38 +157,75,2000,16000,128,1,38 +18,90,256,1000,0,3,10 +20,105,256,2000,0,3,10 +28,105,1000,4000,0,3,24 +33,105,2000,4000,8,3,19 +47,75,2000,8000,8,3,24 +54,75,3000,8000,8,3,48 +20,175,256,2000,0,3,24 +23,300,768,3000,0,6,24 +25,300,768,3000,6,6,24 +52,300,768,12000,6,6,24 +27,300,768,4500,0,1,24 +50,300,384,12000,6,1,24 +18,300,192,768,6,6,24 +53,180,768,12000,6,1,31 +23,330,1000,3000,0,2,4 +30,300,1000,4000,8,3,64 +73,300,1000,16000,8,2,112 +20,330,1000,2000,0,1,2 +25,330,1000,4000,0,3,6 +28,140,2000,4000,0,3,6 +29,140,2000,4000,0,4,8 +32,140,2000,4000,8,1,20 +175,140,2000,32000,32,1,20 +57,140,2000,8000,32,1,54 +181,140,2000,32000,32,1,54 +181,140,2000,32000,32,1,54 +32,140,2000,4000,8,1,20 +82,57,4000,16000,1,6,12 +171,57,4000,24000,64,12,16 +361,26,16000,32000,64,16,24 +350,26,16000,32000,64,8,24 +220,26,8000,32000,0,8,24 +113,26,8000,16000,0,8,16 +15,480,96,512,0,1,1 +21,203,1000,2000,0,1,5 +35,115,512,6000,16,1,6 +18,1100,512,1500,0,1,1 +20,1100,768,2000,0,1,1 +20,600,768,2000,0,1,1 +28,400,2000,4000,0,1,1 +45,400,4000,8000,0,1,1 +18,900,1000,1000,0,1,2 +17,900,512,1000,0,1,2 +26,900,1000,4000,4,1,2 +28,900,1000,4000,8,1,2 +28,900,2000,4000,0,3,6 +31,225,2000,4000,8,3,6 +31,225,2000,4000,8,3,6 +42,180,2000,8000,8,1,6 +76,185,2000,16000,16,1,6 +76,180,2000,16000,16,1,6 +26,225,1000,4000,2,3,6 +59,25,2000,12000,8,1,4 +65,25,2000,12000,16,3,5 +101,17,4000,16000,8,6,12 +116,17,4000,16000,32,6,12 +18,1500,768,1000,0,0,0 +20,1500,768,2000,0,0,0 +20,800,768,2000,0,0,0 +30,50,2000,4000,0,3,6 +44,50,2000,8000,8,3,6 +44,50,2000,8000,8,1,6 +82,50,2000,16000,24,1,6 +82,50,2000,16000,24,1,6 +128,50,8000,16000,48,1,10 +37,100,1000,8000,0,2,6 +46,100,1000,8000,24,2,6 +46,100,1000,8000,24,3,6 +80,50,2000,16000,12,3,16 +88,50,2000,16000,24,6,16 +88,50,2000,16000,24,6,16 +33,150,512,4000,0,8,128 +46,115,2000,8000,16,1,3 +29,115,2000,4000,2,1,5 +53,92,2000,8000,32,1,6 +53,92,2000,8000,32,1,6 +41,92,2000,8000,4,1,6 +86,75,4000,16000,16,1,6 +95,60,4000,16000,32,1,6 +107,60,2000,16000,64,5,8 +117,60,4000,16000,64,5,8 +119,50,4000,16000,64,5,10 +120,72,4000,16000,64,8,16 +48,72,2000,8000,16,6,8 +126,40,8000,16000,32,8,16 +266,40,8000,32000,64,8,24 +270,35,8000,32000,64,8,24 +426,38,16000,32000,128,16,32 +151,48,4000,24000,32,8,24 +267,38,8000,32000,64,8,24 +603,30,16000,32000,256,16,24 +19,112,1000,1000,0,1,4 +21,84,1000,2000,0,1,6 +26,56,1000,4000,0,1,6 +35,56,2000,6000,0,1,8 +41,56,2000,8000,0,1,8 +47,56,4000,8000,0,1,8 +62,56,4000,12000,0,1,8 +78,56,4000,16000,0,1,8 +80,38,4000,8000,32,16,32 +80,38,4000,8000,32,16,32 +142,38,8000,16000,64,4,8 +281,38,8000,24000,160,4,8 +190,38,4000,16000,128,16,32 +21,200,1000,2000,0,1,2 +25,200,1000,4000,0,1,4 +67,200,2000,8000,64,1,5 +24,250,512,4000,0,1,7 +24,250,512,4000,0,4,7 +64,250,1000,16000,1,1,8 +25,160,512,4000,2,1,5 +20,160,512,2000,2,3,8 +29,160,1000,4000,8,1,14 +43,160,1000,8000,16,1,14 +53,160,2000,8000,32,1,13 +19,240,512,1000,8,1,3 +22,240,512,2000,8,1,5 +31,105,2000,4000,8,3,8 +41,105,2000,6000,16,6,16 +47,105,2000,8000,16,4,14 +99,52,4000,16000,32,4,12 +67,70,4000,12000,8,6,8 +81,59,4000,12000,32,6,12 +149,59,8000,16000,64,12,24 +183,26,8000,24000,32,8,16 +275,26,8000,32000,64,12,16 +382,26,8000,32000,128,24,32 +56,116,2000,8000,32,5,28 +182,50,2000,32000,24,6,26 +227,50,2000,32000,48,26,52 +341,50,2000,32000,112,52,104 +360,50,4000,32000,112,52,104 +919,30,8000,64000,96,12,176 +978,30,8000,64000,128,12,176 +24,180,262,4000,0,1,3 +24,180,512,4000,0,1,3 +24,180,262,4000,0,1,3 +24,180,512,4000,0,1,3 +37,124,1000,8000,0,1,8 +50,98,1000,8000,32,2,8 +41,125,2000,8000,0,2,14 +47,480,512,8000,32,0,0 +25,480,1000,4000,0,0,0 http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/iris.txt ---------------------------------------------------------------------- diff --git a/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/iris.txt b/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/iris.txt new file mode 100644 index 0000000..18f5f7c --- /dev/null +++ b/examples/src/main/ml/org/apache/ignite/examples/ml/knn/datasets/iris.txt @@ -0,0 +1,150 @@ +1.0 5.1 3.5 1.4 0.2 +1.0 4.9 3.0 1.4 0.2 +1.0 4.7 3.2 1.3 0.2 +1.0 4.6 3.1 1.5 0.2 +1.0 5.0 3.6 1.4 0.2 +1.0 5.4 3.9 1.7 0.4 +1.0 4.6 3.4 1.4 0.3 +1.0 5.0 3.4 1.5 0.2 +1.0 4.4 2.9 1.4 0.2 +1.0 4.9 3.1 1.5 0.1 +1.0 5.4 3.7 1.5 0.2 +1.0 4.8 3.4 1.6 0.2 +1.0 4.8 3.0 1.4 0.1 +1.0 4.3 3.0 1.1 0.1 +1.0 5.8 4.0 1.2 0.2 +1.0 5.7 4.4 1.5 0.4 +1.0 5.4 3.9 1.3 0.4 +1.0 5.1 3.5 1.4 0.3 +1.0 5.7 3.8 1.7 0.3 +1.0 5.1 3.8 1.5 0.3 +1.0 5.4 3.4 1.7 0.2 +1.0 5.1 3.7 1.5 0.4 +1.0 4.6 3.6 1.0 0.2 +1.0 5.1 3.3 1.7 0.5 +1.0 4.8 3.4 1.9 0.2 +1.0 5.0 3.0 1.6 0.2 +1.0 5.0 3.4 1.6 0.4 +1.0 5.2 3.5 1.5 0.2 +1.0 5.2 3.4 1.4 0.2 +1.0 4.7 3.2 1.6 0.2 +1.0 4.8 3.1 1.6 0.2 +1.0 5.4 3.4 1.5 0.4 +1.0 5.2 4.1 1.5 0.1 +1.0 5.5 4.2 1.4 0.2 +1.0 4.9 3.1 1.5 0.1 +1.0 5.0 3.2 1.2 0.2 +1.0 5.5 3.5 1.3 0.2 +1.0 4.9 3.1 1.5 0.1 +1.0 4.4 3.0 1.3 0.2 +1.0 5.1 3.4 1.5 0.2 +1.0 5.0 3.5 1.3 0.3 +1.0 4.5 2.3 1.3 0.3 +1.0 4.4 3.2 1.3 0.2 +1.0 5.0 3.5 1.6 0.6 +1.0 5.1 3.8 1.9 0.4 +1.0 4.8 3.0 1.4 0.3 +1.0 5.1 3.8 1.6 0.2 +1.0 4.6 3.2 1.4 0.2 +1.0 5.3 3.7 1.5 0.2 +1.0 5.0 3.3 1.4 0.2 +2.0 7.0 3.2 4.7 1.4 +2.0 6.4 3.2 4.5 1.5 +2.0 6.9 3.1 4.9 1.5 +2.0 5.5 2.3 4.0 1.3 +2.0 6.5 2.8 4.6 1.5 +2.0 5.7 2.8 4.5 1.3 +2.0 6.3 3.3 4.7 1.6 +2.0 4.9 2.4 3.3 1.0 +2.0 6.6 2.9 4.6 1.3 +2.0 5.2 2.7 3.9 1.4 +2.0 5.0 2.0 3.5 1.0 +2.0 5.9 3.0 4.2 1.5 +2.0 6.0 2.2 4.0 1.0 +2.0 6.1 2.9 4.7 1.4 +2.0 5.6 2.9 3.6 1.3 +2.0 6.7 3.1 4.4 1.4 +2.0 5.6 3.0 4.5 1.5 +2.0 5.8 2.7 4.1 1.0 +2.0 6.2 2.2 4.5 1.5 +2.0 5.6 2.5 3.9 1.1 +2.0 5.9 3.2 4.8 1.8 +2.0 6.1 2.8 4.0 1.3 +2.0 6.3 2.5 4.9 1.5 +2.0 6.1 2.8 4.7 1.2 +2.0 6.4 2.9 4.3 1.3 +2.0 6.6 3.0 4.4 1.4 +2.0 6.8 2.8 4.8 1.4 +2.0 6.7 3.0 5.0 1.7 +2.0 6.0 2.9 4.5 1.5 +2.0 5.7 2.6 3.5 1.0 +2.0 5.5 2.4 3.8 1.1 +2.0 5.5 2.4 3.7 1.0 +2.0 5.8 2.7 3.9 1.2 +2.0 6.0 2.7 5.1 1.6 +2.0 5.4 3.0 4.5 1.5 +2.0 6.0 3.4 4.5 1.6 +2.0 6.7 3.1 4.7 1.5 +2.0 6.3 2.3 4.4 1.3 +2.0 5.6 3.0 4.1 1.3 +2.0 5.5 2.5 4.0 1.3 +2.0 5.5 2.6 4.4 1.2 +2.0 6.1 3.0 4.6 1.4 +2.0 5.8 2.6 4.0 1.2 +2.0 5.0 2.3 3.3 1.0 +2.0 5.6 2.7 4.2 1.3 +2.0 5.7 3.0 4.2 1.2 +2.0 5.7 2.9 4.2 1.3 +2.0 6.2 2.9 4.3 1.3 +2.0 5.1 2.5 3.0 1.1 +2.0 5.7 2.8 4.1 1.3 +3.0 6.3 3.3 6.0 2.5 +3.0 5.8 2.7 5.1 1.9 +3.0 7.1 3.0 5.9 2.1 +3.0 6.3 2.9 5.6 1.8 +3.0 6.5 3.0 5.8 2.2 +3.0 7.6 3.0 6.6 2.1 +3.0 4.9 2.5 4.5 1.7 +3.0 7.3 2.9 6.3 1.8 +3.0 6.7 2.5 5.8 1.8 +3.0 7.2 3.6 6.1 2.5 +3.0 6.5 3.2 5.1 2.0 +3.0 6.4 2.7 5.3 1.9 +3.0 6.8 3.0 5.5 2.1 +3.0 5.7 2.5 5.0 2.0 +3.0 5.8 2.8 5.1 2.4 +3.0 6.4 3.2 5.3 2.3 +3.0 6.5 3.0 5.5 1.8 +3.0 7.7 3.8 6.7 2.2 +3.0 7.7 2.6 6.9 2.3 +3.0 6.0 2.2 5.0 1.5 +3.0 6.9 3.2 5.7 2.3 +3.0 5.6 2.8 4.9 2.0 +3.0 7.7 2.8 6.7 2.0 +3.0 6.3 2.7 4.9 1.8 +3.0 6.7 3.3 5.7 2.1 +3.0 7.2 3.2 6.0 1.8 +3.0 6.2 2.8 4.8 1.8 +3.0 6.1 3.0 4.9 1.8 +3.0 6.4 2.8 5.6 2.1 +3.0 7.2 3.0 5.8 1.6 +3.0 7.4 2.8 6.1 1.9 +3.0 7.9 3.8 6.4 2.0 +3.0 6.4 2.8 5.6 2.2 +3.0 6.3 2.8 5.1 1.5 +3.0 6.1 2.6 5.6 1.4 +3.0 7.7 3.0 6.1 2.3 +3.0 6.3 3.4 5.6 2.4 +3.0 6.4 3.1 5.5 1.8 +3.0 6.0 3.0 4.8 1.8 +3.0 6.9 3.1 5.4 2.1 +3.0 6.7 3.1 5.6 2.4 +3.0 6.9 3.1 5.1 2.3 +3.0 5.8 2.7 5.1 1.9 +3.0 6.8 3.2 5.9 2.3 +3.0 6.7 3.3 5.7 2.5 +3.0 6.7 3.0 5.2 2.3 +3.0 6.3 2.5 5.0 1.9 +3.0 6.5 3.0 5.2 2.0 +3.0 6.2 3.4 5.4 2.3 +3.0 5.9 3.0 5.1 1.8 http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/ml/org/apache/ignite/examples/ml/knn/regression/KNNRegressionExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/ml/org/apache/ignite/examples/ml/knn/regression/KNNRegressionExample.java b/examples/src/main/ml/org/apache/ignite/examples/ml/knn/regression/KNNRegressionExample.java index 31f7191..b52613a 100644 --- a/examples/src/main/ml/org/apache/ignite/examples/ml/knn/regression/KNNRegressionExample.java +++ b/examples/src/main/ml/org/apache/ignite/examples/ml/knn/regression/KNNRegressionExample.java @@ -19,6 +19,7 @@ package org.apache.ignite.examples.ml.knn.regression; import java.io.IOException; import java.net.URISyntaxException; +import java.net.URL; import java.nio.file.Path; import java.nio.file.Paths; import org.apache.ignite.Ignite; @@ -53,7 +54,7 @@ public class KNNRegressionExample { private static final String SEPARATOR = ","; /** */ - public static final String KNN_CLEARED_MACHINES_TXT = "datasets/knn/cleared_machines.txt"; + private static final String KNN_CLEARED_MACHINES_TXT = "../datasets/cleared_machines.txt"; /** * Executes example. @@ -71,7 +72,11 @@ public class KNNRegressionExample { try { // Prepare path to read - Path path = Paths.get(KNNClassificationExample.class.getClassLoader().getResource(KNN_CLEARED_MACHINES_TXT).toURI()); + URL url = KNNClassificationExample.class.getResource(KNN_CLEARED_MACHINES_TXT); + if (url == null) + throw new RuntimeException("Can't get URL for: " + KNN_CLEARED_MACHINES_TXT); + + Path path = Paths.get(url.toURI()); // Read dataset from file LabeledDataset dataset = LabeledDatasetLoader.loadFromTxtFile(path, SEPARATOR, false, false); @@ -82,14 +87,15 @@ public class KNNRegressionExample { // Random splitting of iris data as 80% train and 20% test datasets LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.2); - System.out.println("\n>>> Amount of observations in train dataset " + split.train().rowSize()); - System.out.println("\n>>> Amount of observations in test dataset " + split.test().rowSize()); + System.out.println("\n>>> Amount of observations in train dataset: " + split.train().rowSize()); + System.out.println("\n>>> Amount of observations in test dataset: " + split.test().rowSize()); LabeledDataset test = split.test(); LabeledDataset train = split.train(); // Builds weighted kNN-regression with Manhattan Distance - KNNMultipleLinearRegression knnMdl = new KNNMultipleLinearRegression(7, new ManhattanDistance(), KNNStrategy.WEIGHTED, train); + KNNMultipleLinearRegression knnMdl = new KNNMultipleLinearRegression(7, new ManhattanDistance(), + KNNStrategy.WEIGHTED, train); // Clone labels final double[] labels = test.labels(); @@ -137,7 +143,7 @@ public class KNNRegressionExample { } catch (URISyntaxException | IOException e) { e.printStackTrace(); - System.out.println("\n>>> Check resources"); + System.out.println("\n>>> Unexpected exception, check resources: " + e); } finally { System.out.println("\n>>> kNN regression example completed."); http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/resources/datasets/knn/README.md ---------------------------------------------------------------------- diff --git a/examples/src/main/resources/datasets/knn/README.md b/examples/src/main/resources/datasets/knn/README.md deleted file mode 100644 index 2f9c5ec..0000000 --- a/examples/src/main/resources/datasets/knn/README.md +++ /dev/null @@ -1,2 +0,0 @@ -iris.txt and cleared_machines are from Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science. -Read more about machine dataset http://archive.ics.uci.edu/ml/machine-learning-databases/cpu-performance/machine.names \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/resources/datasets/knn/cleared_machines.txt ---------------------------------------------------------------------- diff --git a/examples/src/main/resources/datasets/knn/cleared_machines.txt b/examples/src/main/resources/datasets/knn/cleared_machines.txt deleted file mode 100644 index cf8b6b0..0000000 --- a/examples/src/main/resources/datasets/knn/cleared_machines.txt +++ /dev/null @@ -1,209 +0,0 @@ -199,125,256,6000,256,16,128 -253,29,8000,32000,32,8,32 -253,29,8000,32000,32,8,32 -253,29,8000,32000,32,8,32 -132,29,8000,16000,32,8,16 -290,26,8000,32000,64,8,32 -381,23,16000,32000,64,16,32 -381,23,16000,32000,64,16,32 -749,23,16000,64000,64,16,32 -1238,23,32000,64000,128,32,64 -23,400,1000,3000,0,1,2 -24,400,512,3500,4,1,6 -70,60,2000,8000,65,1,8 -117,50,4000,16000,65,1,8 -15,350,64,64,0,1,4 -64,200,512,16000,0,4,32 -23,167,524,2000,8,4,15 -29,143,512,5000,0,7,32 -22,143,1000,2000,0,5,16 -124,110,5000,5000,142,8,64 -35,143,1500,6300,0,5,32 -39,143,3100,6200,0,5,20 -40,143,2300,6200,0,6,64 -45,110,3100,6200,0,6,64 -28,320,128,6000,0,1,12 -21,320,512,2000,4,1,3 -28,320,256,6000,0,1,6 -22,320,256,3000,4,1,3 -28,320,512,5000,4,1,5 -27,320,256,5000,4,1,6 -102,25,1310,2620,131,12,24 -102,25,1310,2620,131,12,24 -74,50,2620,10480,30,12,24 -74,50,2620,10480,30,12,24 -138,56,5240,20970,30,12,24 -136,64,5240,20970,30,12,24 -23,50,500,2000,8,1,4 -29,50,1000,4000,8,1,5 -44,50,2000,8000,8,1,5 -30,50,1000,4000,8,3,5 -41,50,1000,8000,8,3,5 -74,50,2000,16000,8,3,5 -74,50,2000,16000,8,3,6 -74,50,2000,16000,8,3,6 -54,133,1000,12000,9,3,12 -41,133,1000,8000,9,3,12 -18,810,512,512,8,1,1 -28,810,1000,5000,0,1,1 -36,320,512,8000,4,1,5 -38,200,512,8000,8,1,8 -34,700,384,8000,0,1,1 -19,700,256,2000,0,1,1 -72,140,1000,16000,16,1,3 -36,200,1000,8000,0,1,2 -30,110,1000,4000,16,1,2 -56,110,1000,12000,16,1,2 -42,220,1000,8000,16,1,2 -34,800,256,8000,0,1,4 -34,800,256,8000,0,1,4 -34,800,256,8000,0,1,4 -34,800,256,8000,0,1,4 -34,800,256,8000,0,1,4 -19,125,512,1000,0,8,20 -75,75,2000,8000,64,1,38 -113,75,2000,16000,64,1,38 -157,75,2000,16000,128,1,38 -18,90,256,1000,0,3,10 -20,105,256,2000,0,3,10 -28,105,1000,4000,0,3,24 -33,105,2000,4000,8,3,19 -47,75,2000,8000,8,3,24 -54,75,3000,8000,8,3,48 -20,175,256,2000,0,3,24 -23,300,768,3000,0,6,24 -25,300,768,3000,6,6,24 -52,300,768,12000,6,6,24 -27,300,768,4500,0,1,24 -50,300,384,12000,6,1,24 -18,300,192,768,6,6,24 -53,180,768,12000,6,1,31 -23,330,1000,3000,0,2,4 -30,300,1000,4000,8,3,64 -73,300,1000,16000,8,2,112 -20,330,1000,2000,0,1,2 -25,330,1000,4000,0,3,6 -28,140,2000,4000,0,3,6 -29,140,2000,4000,0,4,8 -32,140,2000,4000,8,1,20 -175,140,2000,32000,32,1,20 -57,140,2000,8000,32,1,54 -181,140,2000,32000,32,1,54 -181,140,2000,32000,32,1,54 -32,140,2000,4000,8,1,20 -82,57,4000,16000,1,6,12 -171,57,4000,24000,64,12,16 -361,26,16000,32000,64,16,24 -350,26,16000,32000,64,8,24 -220,26,8000,32000,0,8,24 -113,26,8000,16000,0,8,16 -15,480,96,512,0,1,1 -21,203,1000,2000,0,1,5 -35,115,512,6000,16,1,6 -18,1100,512,1500,0,1,1 -20,1100,768,2000,0,1,1 -20,600,768,2000,0,1,1 -28,400,2000,4000,0,1,1 -45,400,4000,8000,0,1,1 -18,900,1000,1000,0,1,2 -17,900,512,1000,0,1,2 -26,900,1000,4000,4,1,2 -28,900,1000,4000,8,1,2 -28,900,2000,4000,0,3,6 -31,225,2000,4000,8,3,6 -31,225,2000,4000,8,3,6 -42,180,2000,8000,8,1,6 -76,185,2000,16000,16,1,6 -76,180,2000,16000,16,1,6 -26,225,1000,4000,2,3,6 -59,25,2000,12000,8,1,4 -65,25,2000,12000,16,3,5 -101,17,4000,16000,8,6,12 -116,17,4000,16000,32,6,12 -18,1500,768,1000,0,0,0 -20,1500,768,2000,0,0,0 -20,800,768,2000,0,0,0 -30,50,2000,4000,0,3,6 -44,50,2000,8000,8,3,6 -44,50,2000,8000,8,1,6 -82,50,2000,16000,24,1,6 -82,50,2000,16000,24,1,6 -128,50,8000,16000,48,1,10 -37,100,1000,8000,0,2,6 -46,100,1000,8000,24,2,6 -46,100,1000,8000,24,3,6 -80,50,2000,16000,12,3,16 -88,50,2000,16000,24,6,16 -88,50,2000,16000,24,6,16 -33,150,512,4000,0,8,128 -46,115,2000,8000,16,1,3 -29,115,2000,4000,2,1,5 -53,92,2000,8000,32,1,6 -53,92,2000,8000,32,1,6 -41,92,2000,8000,4,1,6 -86,75,4000,16000,16,1,6 -95,60,4000,16000,32,1,6 -107,60,2000,16000,64,5,8 -117,60,4000,16000,64,5,8 -119,50,4000,16000,64,5,10 -120,72,4000,16000,64,8,16 -48,72,2000,8000,16,6,8 -126,40,8000,16000,32,8,16 -266,40,8000,32000,64,8,24 -270,35,8000,32000,64,8,24 -426,38,16000,32000,128,16,32 -151,48,4000,24000,32,8,24 -267,38,8000,32000,64,8,24 -603,30,16000,32000,256,16,24 -19,112,1000,1000,0,1,4 -21,84,1000,2000,0,1,6 -26,56,1000,4000,0,1,6 -35,56,2000,6000,0,1,8 -41,56,2000,8000,0,1,8 -47,56,4000,8000,0,1,8 -62,56,4000,12000,0,1,8 -78,56,4000,16000,0,1,8 -80,38,4000,8000,32,16,32 -80,38,4000,8000,32,16,32 -142,38,8000,16000,64,4,8 -281,38,8000,24000,160,4,8 -190,38,4000,16000,128,16,32 -21,200,1000,2000,0,1,2 -25,200,1000,4000,0,1,4 -67,200,2000,8000,64,1,5 -24,250,512,4000,0,1,7 -24,250,512,4000,0,4,7 -64,250,1000,16000,1,1,8 -25,160,512,4000,2,1,5 -20,160,512,2000,2,3,8 -29,160,1000,4000,8,1,14 -43,160,1000,8000,16,1,14 -53,160,2000,8000,32,1,13 -19,240,512,1000,8,1,3 -22,240,512,2000,8,1,5 -31,105,2000,4000,8,3,8 -41,105,2000,6000,16,6,16 -47,105,2000,8000,16,4,14 -99,52,4000,16000,32,4,12 -67,70,4000,12000,8,6,8 -81,59,4000,12000,32,6,12 -149,59,8000,16000,64,12,24 -183,26,8000,24000,32,8,16 -275,26,8000,32000,64,12,16 -382,26,8000,32000,128,24,32 -56,116,2000,8000,32,5,28 -182,50,2000,32000,24,6,26 -227,50,2000,32000,48,26,52 -341,50,2000,32000,112,52,104 -360,50,4000,32000,112,52,104 -919,30,8000,64000,96,12,176 -978,30,8000,64000,128,12,176 -24,180,262,4000,0,1,3 -24,180,512,4000,0,1,3 -24,180,262,4000,0,1,3 -24,180,512,4000,0,1,3 -37,124,1000,8000,0,1,8 -50,98,1000,8000,32,2,8 -41,125,2000,8000,0,2,14 -47,480,512,8000,32,0,0 -25,480,1000,4000,0,0,0 http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/examples/src/main/resources/datasets/knn/iris.txt ---------------------------------------------------------------------- diff --git a/examples/src/main/resources/datasets/knn/iris.txt b/examples/src/main/resources/datasets/knn/iris.txt deleted file mode 100644 index 18f5f7c..0000000 --- a/examples/src/main/resources/datasets/knn/iris.txt +++ /dev/null @@ -1,150 +0,0 @@ -1.0 5.1 3.5 1.4 0.2 -1.0 4.9 3.0 1.4 0.2 -1.0 4.7 3.2 1.3 0.2 -1.0 4.6 3.1 1.5 0.2 -1.0 5.0 3.6 1.4 0.2 -1.0 5.4 3.9 1.7 0.4 -1.0 4.6 3.4 1.4 0.3 -1.0 5.0 3.4 1.5 0.2 -1.0 4.4 2.9 1.4 0.2 -1.0 4.9 3.1 1.5 0.1 -1.0 5.4 3.7 1.5 0.2 -1.0 4.8 3.4 1.6 0.2 -1.0 4.8 3.0 1.4 0.1 -1.0 4.3 3.0 1.1 0.1 -1.0 5.8 4.0 1.2 0.2 -1.0 5.7 4.4 1.5 0.4 -1.0 5.4 3.9 1.3 0.4 -1.0 5.1 3.5 1.4 0.3 -1.0 5.7 3.8 1.7 0.3 -1.0 5.1 3.8 1.5 0.3 -1.0 5.4 3.4 1.7 0.2 -1.0 5.1 3.7 1.5 0.4 -1.0 4.6 3.6 1.0 0.2 -1.0 5.1 3.3 1.7 0.5 -1.0 4.8 3.4 1.9 0.2 -1.0 5.0 3.0 1.6 0.2 -1.0 5.0 3.4 1.6 0.4 -1.0 5.2 3.5 1.5 0.2 -1.0 5.2 3.4 1.4 0.2 -1.0 4.7 3.2 1.6 0.2 -1.0 4.8 3.1 1.6 0.2 -1.0 5.4 3.4 1.5 0.4 -1.0 5.2 4.1 1.5 0.1 -1.0 5.5 4.2 1.4 0.2 -1.0 4.9 3.1 1.5 0.1 -1.0 5.0 3.2 1.2 0.2 -1.0 5.5 3.5 1.3 0.2 -1.0 4.9 3.1 1.5 0.1 -1.0 4.4 3.0 1.3 0.2 -1.0 5.1 3.4 1.5 0.2 -1.0 5.0 3.5 1.3 0.3 -1.0 4.5 2.3 1.3 0.3 -1.0 4.4 3.2 1.3 0.2 -1.0 5.0 3.5 1.6 0.6 -1.0 5.1 3.8 1.9 0.4 -1.0 4.8 3.0 1.4 0.3 -1.0 5.1 3.8 1.6 0.2 -1.0 4.6 3.2 1.4 0.2 -1.0 5.3 3.7 1.5 0.2 -1.0 5.0 3.3 1.4 0.2 -2.0 7.0 3.2 4.7 1.4 -2.0 6.4 3.2 4.5 1.5 -2.0 6.9 3.1 4.9 1.5 -2.0 5.5 2.3 4.0 1.3 -2.0 6.5 2.8 4.6 1.5 -2.0 5.7 2.8 4.5 1.3 -2.0 6.3 3.3 4.7 1.6 -2.0 4.9 2.4 3.3 1.0 -2.0 6.6 2.9 4.6 1.3 -2.0 5.2 2.7 3.9 1.4 -2.0 5.0 2.0 3.5 1.0 -2.0 5.9 3.0 4.2 1.5 -2.0 6.0 2.2 4.0 1.0 -2.0 6.1 2.9 4.7 1.4 -2.0 5.6 2.9 3.6 1.3 -2.0 6.7 3.1 4.4 1.4 -2.0 5.6 3.0 4.5 1.5 -2.0 5.8 2.7 4.1 1.0 -2.0 6.2 2.2 4.5 1.5 -2.0 5.6 2.5 3.9 1.1 -2.0 5.9 3.2 4.8 1.8 -2.0 6.1 2.8 4.0 1.3 -2.0 6.3 2.5 4.9 1.5 -2.0 6.1 2.8 4.7 1.2 -2.0 6.4 2.9 4.3 1.3 -2.0 6.6 3.0 4.4 1.4 -2.0 6.8 2.8 4.8 1.4 -2.0 6.7 3.0 5.0 1.7 -2.0 6.0 2.9 4.5 1.5 -2.0 5.7 2.6 3.5 1.0 -2.0 5.5 2.4 3.8 1.1 -2.0 5.5 2.4 3.7 1.0 -2.0 5.8 2.7 3.9 1.2 -2.0 6.0 2.7 5.1 1.6 -2.0 5.4 3.0 4.5 1.5 -2.0 6.0 3.4 4.5 1.6 -2.0 6.7 3.1 4.7 1.5 -2.0 6.3 2.3 4.4 1.3 -2.0 5.6 3.0 4.1 1.3 -2.0 5.5 2.5 4.0 1.3 -2.0 5.5 2.6 4.4 1.2 -2.0 6.1 3.0 4.6 1.4 -2.0 5.8 2.6 4.0 1.2 -2.0 5.0 2.3 3.3 1.0 -2.0 5.6 2.7 4.2 1.3 -2.0 5.7 3.0 4.2 1.2 -2.0 5.7 2.9 4.2 1.3 -2.0 6.2 2.9 4.3 1.3 -2.0 5.1 2.5 3.0 1.1 -2.0 5.7 2.8 4.1 1.3 -3.0 6.3 3.3 6.0 2.5 -3.0 5.8 2.7 5.1 1.9 -3.0 7.1 3.0 5.9 2.1 -3.0 6.3 2.9 5.6 1.8 -3.0 6.5 3.0 5.8 2.2 -3.0 7.6 3.0 6.6 2.1 -3.0 4.9 2.5 4.5 1.7 -3.0 7.3 2.9 6.3 1.8 -3.0 6.7 2.5 5.8 1.8 -3.0 7.2 3.6 6.1 2.5 -3.0 6.5 3.2 5.1 2.0 -3.0 6.4 2.7 5.3 1.9 -3.0 6.8 3.0 5.5 2.1 -3.0 5.7 2.5 5.0 2.0 -3.0 5.8 2.8 5.1 2.4 -3.0 6.4 3.2 5.3 2.3 -3.0 6.5 3.0 5.5 1.8 -3.0 7.7 3.8 6.7 2.2 -3.0 7.7 2.6 6.9 2.3 -3.0 6.0 2.2 5.0 1.5 -3.0 6.9 3.2 5.7 2.3 -3.0 5.6 2.8 4.9 2.0 -3.0 7.7 2.8 6.7 2.0 -3.0 6.3 2.7 4.9 1.8 -3.0 6.7 3.3 5.7 2.1 -3.0 7.2 3.2 6.0 1.8 -3.0 6.2 2.8 4.8 1.8 -3.0 6.1 3.0 4.9 1.8 -3.0 6.4 2.8 5.6 2.1 -3.0 7.2 3.0 5.8 1.6 -3.0 7.4 2.8 6.1 1.9 -3.0 7.9 3.8 6.4 2.0 -3.0 6.4 2.8 5.6 2.2 -3.0 6.3 2.8 5.1 1.5 -3.0 6.1 2.6 5.6 1.4 -3.0 7.7 3.0 6.1 2.3 -3.0 6.3 3.4 5.6 2.4 -3.0 6.4 3.1 5.5 1.8 -3.0 6.0 3.0 4.8 1.8 -3.0 6.9 3.1 5.4 2.1 -3.0 6.7 3.1 5.6 2.4 -3.0 6.9 3.1 5.1 2.3 -3.0 5.8 2.7 5.1 1.9 -3.0 6.8 3.2 5.9 2.3 -3.0 6.7 3.3 5.7 2.5 -3.0 6.7 3.0 5.2 2.3 -3.0 6.3 2.5 5.0 1.9 -3.0 6.5 3.0 5.2 2.0 -3.0 6.2 3.4 5.4 2.3 -3.0 5.9 3.0 5.1 1.8 http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/modules/ml/src/main/java/org/apache/ignite/ml/structures/LabeledDataset.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/structures/LabeledDataset.java b/modules/ml/src/main/java/org/apache/ignite/ml/structures/LabeledDataset.java index 53f74f3..c5581cb 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/structures/LabeledDataset.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/structures/LabeledDataset.java @@ -149,7 +149,7 @@ public class LabeledDataset<L, Row extends LabeledVector> extends Dataset<Row> { * @return Label. */ public double label(int idx) { - LabeledVector labeledVector = (LabeledVector)data[idx]; + LabeledVector labeledVector = data[idx]; if(labeledVector!=null) return (double)labeledVector.label(); @@ -182,7 +182,7 @@ public class LabeledDataset<L, Row extends LabeledVector> extends Dataset<Row> { * @param lb The given label. */ public void setLabel(int idx, double lb) { - LabeledVector labeledVector = data[idx]; + LabeledVector<Vector, Double> labeledVector = data[idx]; if(labeledVector != null) labeledVector.setLabel(lb); http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/modules/ml/src/test/java/org/apache/ignite/ml/clustering/FuzzyCMeansDistributedClustererTest.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/FuzzyCMeansDistributedClustererTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/FuzzyCMeansDistributedClustererTest.java index 0aa8f83..4b415bb 100644 --- a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/FuzzyCMeansDistributedClustererTest.java +++ b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/FuzzyCMeansDistributedClustererTest.java @@ -88,6 +88,8 @@ public class FuzzyCMeansDistributedClustererTest extends GridCommonAbstractTest assertEquals(0, measure.compute(centers[1], new DenseLocalOnHeapVector(new double[]{10, -10})), 1); assertEquals(0, measure.compute(centers[2], new DenseLocalOnHeapVector(new double[]{10, 10})), 1); assertEquals(0, measure.compute(centers[3], new DenseLocalOnHeapVector(new double[]{-10, 10})), 1); + + pntMatrix.destroy(); } /** Perform N tests each of which contains M random points placed around K centers on the plane. */ @@ -116,7 +118,7 @@ public class FuzzyCMeansDistributedClustererTest extends GridCommonAbstractTest * @param distributedClusterer Tested clusterer. * @param seed Seed for the random numbers generator. */ - public void performRandomTest(FuzzyCMeansDistributedClusterer distributedClusterer, long seed) { + private void performRandomTest(FuzzyCMeansDistributedClusterer distributedClusterer, long seed) { final int minNumCenters = 2; final int maxNumCenters = 5; final double maxRadius = 1000; @@ -130,11 +132,10 @@ public class FuzzyCMeansDistributedClustererTest extends GridCommonAbstractTest double[][] centers = new double[numCenters][2]; for (int i = 0; i < numCenters; i++) { - double radius = maxRadius; double angle = Math.PI * 2.0 * i / numCenters; - centers[i][0] = Math.cos(angle) * radius; - centers[i][1] = Math.sin(angle) * radius; + centers[i][0] = Math.cos(angle) * maxRadius; + centers[i][1] = Math.sin(angle) * maxRadius; } int numPoints = minPoints + random.nextInt(maxPoints - minPoints); @@ -173,5 +174,7 @@ public class FuzzyCMeansDistributedClustererTest extends GridCommonAbstractTest } assertEquals(0, cntr); + + pntMatrix.destroy(); } } http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/modules/ml/src/test/java/org/apache/ignite/ml/clustering/FuzzyCMeansLocalClustererTest.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/FuzzyCMeansLocalClustererTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/FuzzyCMeansLocalClustererTest.java index 2af94aa..4fe1eee 100644 --- a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/FuzzyCMeansLocalClustererTest.java +++ b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/FuzzyCMeansLocalClustererTest.java @@ -21,7 +21,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; -import org.apache.ignite.ml.math.Matrix; import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.distances.DistanceMeasure; import org.apache.ignite.ml.math.distances.EuclideanDistance; @@ -38,7 +37,7 @@ public class FuzzyCMeansLocalClustererTest { /** Test FCM on points that forms three clusters on the line. */ @Test public void equalWeightsOneDimension() { - BaseFuzzyCMeansClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), + FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_CENTERS, 0.01, 10, null); @@ -46,7 +45,7 @@ public class FuzzyCMeansLocalClustererTest { {7}, {8}, {9}, {10}, {-1}, {0}, {1}}; - Matrix pntMatrix = new DenseLocalOnHeapMatrix(points); + DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points); FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, 3); @@ -60,7 +59,7 @@ public class FuzzyCMeansLocalClustererTest { /** Test FCM on points that forms four clusters on the plane. */ @Test public void equalWeightsTwoDimensions() { - BaseFuzzyCMeansClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), + FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_CENTERS, 0.01, 20, null); @@ -69,7 +68,7 @@ public class FuzzyCMeansLocalClustererTest { {-10, 10}, {-9, 11}, {-10, 9}, {-11, 9}, {10, -10}, {9, -11}, {10, -9}, {11, -9}}; - Matrix pntMatrix = new DenseLocalOnHeapMatrix(points); + DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points); FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, 4); Vector[] centers = mdl.centers(); @@ -86,12 +85,12 @@ public class FuzzyCMeansLocalClustererTest { /** Test FCM on points which have the equal coordinates. */ @Test public void checkCentersOfTheSamePointsTwoDimensions() { - BaseFuzzyCMeansClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), + FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS, 0.01, 10, null); double[][] points = new double[][] {{3.3, 10}, {3.3, 10}, {3.3, 10}, {3.3, 10}, {3.3, 10}}; - Matrix pntMatrix = new DenseLocalOnHeapMatrix(points); + DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points); int k = 2; FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, k); @@ -107,7 +106,7 @@ public class FuzzyCMeansLocalClustererTest { /** Test FCM on points located on the circle. */ @Test public void checkCentersLocationOnSphere() { - BaseFuzzyCMeansClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), + FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_CENTERS, 0.01, 100, null); int numOfPoints = 650; @@ -119,7 +118,7 @@ public class FuzzyCMeansLocalClustererTest { points[i][1] = Math.sin(Math.PI * 2 * i / numOfPoints) * radius; } - Matrix pntMatrix = new DenseLocalOnHeapMatrix(points); + DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points); int k = 10; FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, k); @@ -134,12 +133,12 @@ public class FuzzyCMeansLocalClustererTest { /** Test FCM on points that forms the line located on the plane. */ @Test public void test2DLineClustering() { - BaseFuzzyCMeansClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), + FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_CENTERS, 0.01, 50, null); double[][] points = new double[][]{{1, 2}, {3, 6}, {5, 10}}; - Matrix pntMatrix = new DenseLocalOnHeapMatrix(points); + DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points); int k = 2; FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, k); @@ -185,7 +184,7 @@ public class FuzzyCMeansLocalClustererTest { 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_CENTERS, 0.01, 10, null); double[][] points = new double[][]{{1}, {2}, {3}, {4}}; - FuzzyCMeansModel cluster = clusterer.cluster(new DenseLocalOnHeapMatrix(points), 1); + clusterer.cluster(new DenseLocalOnHeapMatrix(points), 1); } /** Test FCM on different numbers of points and weights. */ @@ -198,6 +197,6 @@ public class FuzzyCMeansLocalClustererTest { ArrayList<Double> weights = new ArrayList<>(); Collections.addAll(weights, 1.0, 34.0, 2.5, 5.0, 0.5); - FuzzyCMeansModel cluster = clusterer.cluster(new DenseLocalOnHeapMatrix(points), 2, weights); + clusterer.cluster(new DenseLocalOnHeapMatrix(points), 2, weights); } } http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/modules/yardstick/config/benchmark-ml.properties ---------------------------------------------------------------------- diff --git a/modules/yardstick/config/benchmark-ml.properties b/modules/yardstick/config/benchmark-ml.properties index 5992665..acc9c5a 100644 --- a/modules/yardstick/config/benchmark-ml.properties +++ b/modules/yardstick/config/benchmark-ml.properties @@ -89,4 +89,8 @@ CONFIGS="\ -cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteSparseDistributedMatrixMul2Benchmark -sn IgniteNode -ds ${ver}sparse-distributed-matrix-mul2-${b}-backup,\ -cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteColumnDecisionTreeVarianceBenchmark -sn IgniteNode -ds ${ver}column-decision-tree-variance-${b}-backup,\ -cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteColumnDecisionTreeGiniBenchmark -sn IgniteNode -ds ${ver}column-decision-tree-gini-${b}-backup,\ +-cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteKNNClassificationBenchmark -sn IgniteNode -ds ${ver}knn-classification-${b}-backup,\ +-cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteKNNRegressionBenchmark -sn IgniteNode -ds ${ver}knn-regression-${b}-backup,\ +-cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteFuzzyCMeansLocalClustererBenchmark -sn IgniteNode -ds ${ver}fuzzy-cmeans-local-${b}-backup,\ +-cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteFuzzyCMeansDistributedClustererBenchmark -sn IgniteNode -ds ${ver}fuzzy-cmeans-distributed-${b}-backup,\ " http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/clustering/IgniteFuzzyCMeansDistributedClustererBenchmark.java ---------------------------------------------------------------------- diff --git a/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/clustering/IgniteFuzzyCMeansDistributedClustererBenchmark.java b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/clustering/IgniteFuzzyCMeansDistributedClustererBenchmark.java new file mode 100644 index 0000000..e356746 --- /dev/null +++ b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/clustering/IgniteFuzzyCMeansDistributedClustererBenchmark.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.yardstick.ml.clustering; + +import java.util.Map; +import org.apache.ignite.Ignite; +import org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer; +import org.apache.ignite.ml.clustering.FuzzyCMeansDistributedClusterer; +import org.apache.ignite.ml.clustering.FuzzyCMeansModel; +import org.apache.ignite.ml.math.StorageConstants; +import org.apache.ignite.ml.math.distances.DistanceMeasure; +import org.apache.ignite.ml.math.distances.EuclideanDistance; +import org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix; +import org.apache.ignite.resources.IgniteInstanceResource; +import org.apache.ignite.thread.IgniteThread; +import org.apache.ignite.yardstick.IgniteAbstractBenchmark; +import org.apache.ignite.yardstick.ml.DataChanger; + +/** + * Ignite benchmark that performs ML Grid operations. + */ +@SuppressWarnings("unused") +public class IgniteFuzzyCMeansDistributedClustererBenchmark extends IgniteAbstractBenchmark { + /** */ + @IgniteInstanceResource + private Ignite ignite; + + /** {@inheritDoc} */ + @Override public boolean test(Map<Object, Object> ctx) throws Exception { + // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread + // because we create ignite cache internally. + IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), + this.getClass().getSimpleName(), new Runnable() { + /** {@inheritDoc} */ + @Override public void run() { + // IMPL NOTE originally taken from FuzzyCMeansExample. + // Distance measure that computes distance between two points. + DistanceMeasure distanceMeasure = new EuclideanDistance(); + + // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means). + double exponentialWeight = 2.0; + + // Condition that indicated when algorithm must stop. + // In this example algorithm stops if memberships have changed insignificantly. + BaseFuzzyCMeansClusterer.StopCondition stopCond = + BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS; + + // Maximum difference between new and old membership values with which algorithm will continue to work. + double maxDelta = 0.01; + + // The maximum number of FCM iterations. + int maxIterations = 50; + + // Number of steps of primary centers selection (more steps more candidates). + int initializationSteps = 2; + + // Number of K-Means iteration that is used to choose required number of primary centers from candidates. + int kMeansMaxIterations = 50; + + // Create new distributed clusterer with parameters described above. + FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer( + distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, + null, initializationSteps, kMeansMaxIterations); + + // Create sample data. + double[][] points = shuffle((int)(DataChanger.next())); + + // Initialize matrix of data points. Each row contains one point. + int rows = points.length; + int cols = points[0].length; + + // Create the matrix that contains sample points. + SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, + StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE); + + // Store points into matrix. + pntMatrix.assign(points); + + // Call clusterization method with some number of centers. + // It returns model that can predict results for new points. + int numCenters = 4; + FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters); + + // Get centers of clusters that is computed by Fuzzy C-Means algorithm. + mdl.centers(); + + pntMatrix.destroy(); + } + }); + + igniteThread.start(); + + igniteThread.join(); + + return true; + } + + /** */ + private double[][] shuffle(int off) { + final double[][] points = new double[][] { + {-10, -10}, {-9, -11}, {-10, -9}, {-11, -9}, + {10, 10}, {9, 11}, {10, 9}, {11, 9}, + {-10, 10}, {-9, 11}, {-10, 9}, {-11, 9}, + {10, -10}, {9, -11}, {10, -9}, {11, -9}}; + + final int size = points.length; + + final double[][] res = new double[size][]; + + for (int i = 0; i < size; i++) + res[i] = points[(i + off) % size]; + + return res; + } +} http://git-wip-us.apache.org/repos/asf/ignite/blob/a3b83246/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/clustering/IgniteFuzzyCMeansLocalClustererBenchmark.java ---------------------------------------------------------------------- diff --git a/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/clustering/IgniteFuzzyCMeansLocalClustererBenchmark.java b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/clustering/IgniteFuzzyCMeansLocalClustererBenchmark.java new file mode 100644 index 0000000..8c4c9ce --- /dev/null +++ b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/clustering/IgniteFuzzyCMeansLocalClustererBenchmark.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.yardstick.ml.clustering; + +import java.util.Map; +import org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer; +import org.apache.ignite.ml.clustering.FuzzyCMeansLocalClusterer; +import org.apache.ignite.ml.clustering.FuzzyCMeansModel; +import org.apache.ignite.ml.math.distances.DistanceMeasure; +import org.apache.ignite.ml.math.distances.EuclideanDistance; +import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix; +import org.apache.ignite.yardstick.IgniteAbstractBenchmark; +import org.apache.ignite.yardstick.ml.DataChanger; + +/** + * Ignite benchmark that performs ML Grid operations. + */ +@SuppressWarnings("unused") +public class IgniteFuzzyCMeansLocalClustererBenchmark extends IgniteAbstractBenchmark { + /** {@inheritDoc} */ + @Override public boolean test(Map<Object, Object> ctx) throws Exception { + // IMPL NOTE originally taken from FuzzyLocalCMeansExample. + // Distance measure that computes distance between two points. + DistanceMeasure distanceMeasure = new EuclideanDistance(); + + // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means). + double exponentialWeight = 2.0; + + // Condition that indicated when algorithm must stop. + // In this example algorithm stops if memberships have changed insignificantly. + BaseFuzzyCMeansClusterer.StopCondition stopCond = + BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS; + + // Maximum difference between new and old membership values with which algorithm will continue to work. + double maxDelta = 0.01; + + // The maximum number of FCM iterations. + int maxIterations = 50; + + // Create new local clusterer with parameters described above. + FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(distanceMeasure, + exponentialWeight, stopCond, maxDelta, maxIterations, null); + + // Create sample data. + double[][] points = shuffle((int)(DataChanger.next())); + + // Create the matrix that contains sample points. + DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points); + + // Call clusterization method with some number of centers. + // It returns model that can predict results for new points. + int numCenters = 4; + FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters); + + // Get centers of clusters that is computed by Fuzzy C-Means algorithm. + mdl.centers(); + + return true; + } + + /** */ + private double[][] shuffle(int off) { + final double[][] points = new double[][] { + {-10, -10}, {-9, -11}, {-10, -9}, {-11, -9}, + {10, 10}, {9, 11}, {10, 9}, {11, 9}, + {-10, 10}, {-9, 11}, {-10, 9}, {-11, 9}, + {10, -10}, {9, -11}, {10, -9}, {11, -9}}; + + final int size = points.length; + + final double[][] res = new double[size][]; + + for (int i = 0; i < size; i++) + res[i] = points[(i + off) % size]; + + return res; + } +}
