http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieFitnessFunction.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieFitnessFunction.java b/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieFitnessFunction.java index bfd50f0..154ce79 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieFitnessFunction.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieFitnessFunction.java @@ -23,78 +23,68 @@ import org.apache.ignite.ml.genetic.Gene; import org.apache.ignite.ml.genetic.IFitnessFunction; /** - * This example demonstrates how to create a IFitnessFunction - * - * Your IFitness function will vary depending on your particular use case. - * - * For this fitness function, we simply want to calculate the value of - * - * an individual solution relative to other solutions. - * - * - * To do this, we simply increase fitness score by number of times - * - * genre is found in list of movies. - * - * In addition, we increase score by fictional IMDB rating. - * - * If there are duplicate movies in selection, we automatically apply a '0' - * - * fitness score. + * This example demonstrates how to create a {@link IFitnessFunction}. + * <p> + * Your fitness function will vary depending on your particular use case. For this fitness function, we want + * to calculate the value of an individual solution relative to other solutions.</p> + * <p> + * To do this, we increase fitness score by number of times genre is found in list of movies. In addition, + * we increase score by fictional IMDB rating.</p> + * <p> + * If there are duplicate movies in selection, we automatically apply a '0' fitness score.</p> */ public class MovieFitnessFunction implements IFitnessFunction { - /** genes */ - private List<String> genres = null; + /** Genres. */ + private List<String> genres; /** - * @param genres List of genres + * Create instance. + * + * @param genres List of genres. */ public MovieFitnessFunction(List<String> genres) { this.genres = genres; } /** - * Calculate fitness score + * Calculate fitness score. * - * @param genes List of Genes - * @return Fitness score + * @param genes List of Genes. + * @return Fitness score. */ public double evaluate(List<Gene> genes) { - double score = 0; - List<String> dups = new ArrayList(); + List<String> duplicates = new ArrayList<>(); int badSolution = 1; - for (int i = 0; i < genes.size(); i++) { - Movie movie = (Movie)genes.get(i).getVal(); - if (dups.contains(movie.getName())) { + for (Gene gene : genes) { + Movie movie = (Movie)gene.getVal(); + if (duplicates.contains(movie.getName())) badSolution = 0; - } - else { - dups.add(movie.getName()); - } + else + duplicates.add(movie.getName()); + double genreScore = getGenreScore(movie); - if (genreScore == 0) { + if (genreScore == 0) badSolution = 0; - } + score = (score + movie.getImdbRating()) + (genreScore); } return (score * badSolution); } /** - * helper to calculate genre score + * Helper to calculate genre score. * - * @param movie Move - * @return Genre score + * @param movie Movie. + * @return Genre score. */ private double getGenreScore(Movie movie) { double genreScore = 0; for (String genre : this.genres) { - if (movie.getGenre().contains(genre)) { + if (movie.getGenre().contains(genre)) genreScore = genreScore + 1; - } } return genreScore; }
http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieGAExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieGAExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieGAExample.java index a0b368af..51e28be 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieGAExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieGAExample.java @@ -29,56 +29,54 @@ import org.apache.ignite.ml.genetic.parameter.GAConfiguration; import org.apache.ignite.ml.genetic.parameter.GAGridConstants; /** - * This example demonstrates how to use the GAGrid framework. - * - * In this example, we utilize GA Grid to calculate an optimal set of movies based on our interests in various genres - * (ie: Action, Comedy, and Romance) - * - * - * How To Run: - * - * mvn exec:java -Dexec.mainClass="org.apache.ignite.examples.ml.genetic.movie.MovieGAExample" -DGENRES=Action,Comedy - * - * <p> Remote nodes should always be started with special configuration file which enables P2P class loading: {@code - * 'ignite.{sh|bat} examples/config/example-ignite.xml'}.</p> <p> Alternatively you can run ExampleNodeStartup in - * another JVM which will start node with {@code examples/config/example-ignite.xml} configuration.</p> + * In this example, we utilize {@link GAGrid} framework to calculate an optimal set of movies based on our interests + * in various genres (ie: Action, Comedy, and Romance). + * <p> + * Code in this example launches Ignite grid, prepares simple test data (gene pool) and configures GA grid.</p> + * <p> + * After that it launches the process of evolution on GA grid and outputs the progress and results.</p> + * <p> + * You can change the test data and parameters of GA grid used in this example and re-run it to explore + * this functionality further.</p> + * <p> + * How to run from command line:</p> + * <p> + * {@code mvn exec:java -Dexec.mainClass="org.apache.ignite.examples.ml.genetic.movie.MovieGAExample" + * -DGENRES=Action,Comedy}</p> + * <p> + * Remote nodes should always be started with special configuration file which enables P2P class loading: {@code + * 'ignite.{sh|bat} examples/config/example-ignite.xml'}.</p> + * <p> + * Alternatively you can run ExampleNodeStartup in another JVM which will start node with + * {@code examples/config/example-ignite.xml} configuration.</p> */ public class MovieGAExample { - /** Ignite instance */ - private static Ignite ignite = null; - /** GAGrid */ - private static GAGrid gaGrid = null; - /** GAConfiguration */ - private static GAConfiguration gaConfig = null; - /** * Executes example. * - * Specify value for -DGENRES JVM system variable + * Specify value for {@code -DGENRES} JVM system variable. * * @param args Command line arguments, none required. */ - public static void main(String args[]) { + System.out.println(">>> Movie GA grid example started."); + System.setProperty("IGNITE_QUIET", "false"); - List genres = new ArrayList(); + List<String> genres = new ArrayList<>(); String sGenres = "Action,Comedy,Romance"; - StringBuffer sbErrorMessage = new StringBuffer(); - sbErrorMessage.append("GENRES System property not set. Please provide GENRES information."); - sbErrorMessage.append(" "); - sbErrorMessage.append("IE: -DGENRES=Action,Comedy,Romance"); - sbErrorMessage.append("\n"); - sbErrorMessage.append("Using default value: Action,Comedy,Romance"); - - if (System.getProperty("GENRES") == null) { - System.out.println(sbErrorMessage); + StringBuffer sbErrorMsg = new StringBuffer(); + sbErrorMsg.append("GENRES System property not set. Please provide GENRES information."); + sbErrorMsg.append(" "); + sbErrorMsg.append("IE: -DGENRES=Action,Comedy,Romance"); + sbErrorMsg.append("\n"); + sbErrorMsg.append("Using default value: Action,Comedy,Romance"); - } - else { + if (System.getProperty("GENRES") == null) + System.out.println(sbErrorMsg); + else sGenres = System.getProperty("GENRES"); - } StringTokenizer st = new StringTokenizer(sGenres, ","); @@ -87,58 +85,61 @@ public class MovieGAExample { genres.add(genre); } - // Create GAConfiguration - gaConfig = new GAConfiguration(); + // Create GAConfiguration. + GAConfiguration gaCfg = new GAConfiguration(); - // set Gene Pool + // Set Gene Pool. List<Gene> genes = getGenePool(); - // Define Chromosome - gaConfig.setChromosomeLen(3); - gaConfig.setPopulationSize(100); - gaConfig.setGenePool(genes); - gaConfig.setTruncateRate(.10); - gaConfig.setCrossOverRate(.50); - gaConfig.setMutationRate(.50); - gaConfig.setSelectionMtd(GAGridConstants.SELECTION_METHOD.SELECTION_METHOD_TRUNCATION); + // Define Chromosome. + gaCfg.setChromosomeLen(3); + gaCfg.setPopulationSize(100); + gaCfg.setGenePool(genes); + gaCfg.setTruncateRate(.10); + gaCfg.setCrossOverRate(.50); + gaCfg.setMutationRate(.50); + gaCfg.setSelectionMtd(GAGridConstants.SELECTION_METHOD.SELECTION_METHOD_TRUNCATION); - //Create fitness function + // Create fitness function. MovieFitnessFunction function = new MovieFitnessFunction(genres); - //set fitness function - gaConfig.setFitnessFunction(function); + // Set fitness function. + gaCfg.setFitnessFunction(function); try { - - //Create an Ignite instance as you would in any other use case. - ignite = Ignition.start("examples/config/example-ignite.xml"); + // Create an Ignite instance as you would in any other use case. + Ignite ignite = Ignition.start("examples/config/example-ignite.xml"); MovieTerminateCriteria termCriteria = new MovieTerminateCriteria(ignite); - gaConfig.setTerminateCriteria(termCriteria); + gaCfg.setTerminateCriteria(termCriteria); - gaGrid = new GAGrid(gaConfig, ignite); + GAGrid gaGrid = new GAGrid(gaCfg, ignite); ignite.log(); - Chromosome fittestChromosome = gaGrid.evolve(); + + Chromosome chromosome = gaGrid.evolve(); + + System.out.println(">>> Evolution result: " + chromosome); Ignition.stop(true); - ignite = null; + System.out.println(">>> Movie GA grid example completed."); } catch (Exception e) { - System.out.println(e); + System.out.println(e.getMessage()); + e.printStackTrace(); } - } + /** */ private static List<Gene> getGenePool() { - List list = new ArrayList(); + List<Gene> list = new ArrayList<>(); Movie movie1 = new Movie(); movie1.setName("The Matrix"); movie1.setImdbRating(7); - List genre1 = new ArrayList(); + List<String> genre1 = new ArrayList<>(); genre1.add("SciFi"); genre1.add("Action"); movie1.setGenre(genre1); @@ -150,7 +151,7 @@ public class MovieGAExample { Movie movie2 = new Movie(); movie2.setName("The Dark Knight"); movie2.setImdbRating(9.6); - List genre2 = new ArrayList(); + List<String> genre2 = new ArrayList<>(); genre2.add("Action"); movie2.setGenre(genre2); movie2.setRating("PG-13"); @@ -163,7 +164,7 @@ public class MovieGAExample { movie3.setImdbRating(9.6); movie3.setYear("2012"); - List genre3 = new ArrayList(); + List<String> genre3 = new ArrayList<>(); genre3.add("Action"); movie3.setGenre(genre3); movie3.setRating("PG-13"); @@ -173,7 +174,7 @@ public class MovieGAExample { Movie movie4 = new Movie(); movie4.setName("The Hangover"); movie4.setImdbRating(7.6); - List genre4 = new ArrayList(); + List<String> genre4 = new ArrayList<>(); genre4.add("Comedy"); movie4.setGenre(genre4); movie4.setRating("R"); @@ -184,7 +185,7 @@ public class MovieGAExample { Movie movie5 = new Movie(); movie5.setName("The Hangover 2"); movie5.setImdbRating(9.6); - List genre5 = new ArrayList(); + List<String> genre5 = new ArrayList<>(); genre5.add("Comedy"); movie5.setGenre(genre5); movie5.setRating("R"); @@ -195,7 +196,7 @@ public class MovieGAExample { Movie movie6 = new Movie(); movie6.setName("This Means War"); movie6.setImdbRating(6.4); - List genre6 = new ArrayList(); + List<String> genre6 = new ArrayList<>(); genre6.add("Comedy"); genre6.add("Action"); genre6.add("Romance"); @@ -208,7 +209,7 @@ public class MovieGAExample { Movie movie7 = new Movie(); movie7.setName("Hitch"); movie7.setImdbRating(10); - List genre7 = new ArrayList(); + List<String> genre7 = new ArrayList<>(); genre7.add("Comedy"); genre7.add("Romance"); movie7.setGenre(genre7); @@ -220,7 +221,7 @@ public class MovieGAExample { Movie movie8 = new Movie(); movie8.setName("21 Jump Street"); movie8.setImdbRating(6.7); - List genre8 = new ArrayList(); + List<String> genre8 = new ArrayList<>(); genre8.add("Comedy"); genre8.add("Action"); movie8.setGenre(genre8); @@ -232,7 +233,7 @@ public class MovieGAExample { Movie movie9 = new Movie(); movie9.setName("Killers"); movie9.setImdbRating(5.1); - List genre9 = new ArrayList(); + List<String> genre9 = new ArrayList<>(); genre9.add("Comedy"); genre9.add("Action"); genre9.add("Romance"); @@ -245,7 +246,7 @@ public class MovieGAExample { Movie movie10 = new Movie(); movie10.setName("What to Expect When You're Expecting"); movie10.setImdbRating(5.1); - List genre10 = new ArrayList(); + List<String> genre10 = new ArrayList<>(); genre10.add("Comedy"); genre10.add("Romance"); movie10.setGenre(genre10); @@ -266,7 +267,5 @@ public class MovieGAExample { list.add(gene10); return list; - } - } http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieTerminateCriteria.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieTerminateCriteria.java b/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieTerminateCriteria.java index 43804b7..7110a7c 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieTerminateCriteria.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/genetic/movie/MovieTerminateCriteria.java @@ -26,61 +26,62 @@ import org.apache.ignite.ml.genetic.parameter.ITerminateCriteria; import org.apache.ignite.ml.genetic.utils.GAGridUtils; /** - * Represents the terminate condition for Movie Genetic algorithm <br/> - * - * Class terminates Genetic algorithm when fitnessScore > 32 <br/> + * Represents the terminate condition for {@link MovieGAExample}. + * <p> + * Class terminates Genetic algorithm when fitness score is more than 32.</p> */ public class MovieTerminateCriteria implements ITerminateCriteria { - /** Ignite logger */ - private IgniteLogger igniteLogger = null; - /** Ignite instance */ - private Ignite ignite = null; + /** Ignite logger. */ + private IgniteLogger igniteLog; + /** Ignite instance. */ + private Ignite ignite; /** - * @param ignite + * Create class instance. + * + * @param ignite Ignite instance. */ public MovieTerminateCriteria(Ignite ignite) { this.ignite = ignite; - this.igniteLogger = ignite.log(); + this.igniteLog = ignite.log(); } /** - * @param fittestChromosome Most fit chromosome at for the nth generation - * @param averageFitnessScore Average fitness score as of the nth generation - * @param currentGeneration Current generation - * @return Boolean value + * Check whether termination condition is met. + * + * @param fittestChromosome Most fit chromosome at for the nth generation. + * @param averageFitnessScore Average fitness score as of the nth generation. + * @param currGeneration Current generation. + * @return Status whether condition is met or not. */ public boolean isTerminationConditionMet(Chromosome fittestChromosome, double averageFitnessScore, - int currentGeneration) { + int currGeneration) { boolean isTerminate = true; - igniteLogger.info("##########################################################################################"); - igniteLogger.info("Generation: " + currentGeneration); - igniteLogger.info("Fittest is Chromosome Key: " + fittestChromosome); - igniteLogger.info("Chromsome: " + fittestChromosome); + igniteLog.info("##########################################################################################"); + igniteLog.info("Generation: " + currGeneration); + igniteLog.info("Fittest is Chromosome Key: " + fittestChromosome); + igniteLog.info("Chromosome: " + fittestChromosome); printMovies(GAGridUtils.getGenesInOrderForChromosome(ignite, fittestChromosome)); - igniteLogger.info("##########################################################################################"); + igniteLog.info("##########################################################################################"); - if (!(fittestChromosome.getFitnessScore() > 32)) { + if (!(fittestChromosome.getFitnessScore() > 32)) isTerminate = false; - } return isTerminate; } /** - * Helper to print change detail + * Helper to print change details. * - * @param genes List of Genes + * @param genes List of Genes. */ private void printMovies(List<Gene> genes) { for (Gene gene : genes) { - igniteLogger.info("Name: " + ((Movie)gene.getVal()).getName().toString()); - igniteLogger.info("Genres: " + ((Movie)gene.getVal()).getGenre().toString()); - igniteLogger.info("IMDB Rating: " + ((Movie)gene.getVal()).getImdbRating()); + igniteLog.info("Name: " + ((Movie)gene.getVal()).getName()); + igniteLog.info("Genres: " + ((Movie)gene.getVal()).getGenre().toString()); + igniteLog.info("IMDB Rating: " + ((Movie)gene.getVal()).getImdbRating()); } - } - } http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/knn/ANNClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/knn/ANNClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/knn/ANNClassificationExample.java index 9a68207..37cb231 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/knn/ANNClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/knn/ANNClassificationExample.java @@ -29,7 +29,6 @@ import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.ml.knn.NNClassificationModel; import org.apache.ignite.ml.knn.ann.ANNClassificationTrainer; -import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer; import org.apache.ignite.ml.knn.classification.NNStrategy; import org.apache.ignite.ml.math.distances.EuclideanDistance; import org.apache.ignite.ml.math.distances.ManhattanDistance; @@ -38,9 +37,17 @@ import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector; import org.apache.ignite.thread.IgniteThread; /** - * Run ANN multi-class classification trainer over distributed dataset. - * - * @see KNNClassificationTrainer + * Run ANN multi-class classification trainer ({@link ANNClassificationTrainer}) over distributed dataset. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data points (based on the + * <a href="https://en.wikipedia.org/wiki/Iris_flower_data_set"></a>Iris dataset</a>).</p> + * <p> + * After that it trains the model based on the specified data using kNN algorithm.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained model to predict what cluster + * does this point belong to, and compares prediction to expected outcome (ground truth).</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class ANNClassificationExample { /** Run example. */ @@ -111,6 +118,8 @@ public class ANNClassificationExample { System.out.println("\n>>> Absolute amount of errors " + amountOfErrors); System.out.println("\n>>> Accuracy " + (1 - amountOfErrors / (double) totalAmount)); System.out.println(totalAmount); + + System.out.println(">>> ANN multi-class classification algorithm over cached dataset usage example completed."); } }); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java index b4602cc..541e70c 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.knn; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.knn.NNClassificationModel; import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer; import org.apache.ignite.ml.knn.classification.NNStrategy; @@ -36,9 +34,17 @@ import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector; import org.apache.ignite.thread.IgniteThread; /** - * Run kNN multi-class classification trainer over distributed dataset. - * - * @see KNNClassificationTrainer + * Run kNN multi-class classification trainer ({@link KNNClassificationTrainer}) over distributed dataset. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data points (based on the + * <a href="https://en.wikipedia.org/wiki/Iris_flower_data_set"></a>Iris dataset</a>).</p> + * <p> + * After that it trains the model based on the specified data using kNN algorithm.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained model to predict what cluster + * does this point belong to, and compares prediction to expected outcome (ground truth).</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class KNNClassificationExample { /** Run example. */ @@ -51,7 +57,7 @@ public class KNNClassificationExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), KNNClassificationExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, double[]> dataCache = new TestCache(ignite).get(data); KNNClassificationTrainer trainer = new KNNClassificationTrainer(); @@ -90,6 +96,8 @@ public class KNNClassificationExample { System.out.println("\n>>> Absolute amount of errors " + amountOfErrors); System.out.println("\n>>> Accuracy " + (1 - amountOfErrors / (double) totalAmount)); + + System.out.println(">>> kNN multi-class classification algorithm over cached dataset usage example completed."); } }); @@ -98,25 +106,6 @@ public class KNNClassificationExample { } } - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); - - return cache; - } - /** The Iris dataset. */ private static final double[][] data = { {1, 5.1, 3.5, 1.4, 0.2}, http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java index 7c84949..3803eea 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java @@ -18,16 +18,13 @@ package org.apache.ignite.examples.ml.knn; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; -import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.knn.classification.NNStrategy; import org.apache.ignite.ml.knn.regression.KNNRegressionModel; import org.apache.ignite.ml.knn.regression.KNNRegressionTrainer; @@ -37,9 +34,18 @@ import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector; import org.apache.ignite.thread.IgniteThread; /** - * Run kNN regression trainer over distributed dataset. - * - * @see KNNClassificationTrainer + * Run kNN regression trainer ({@link KNNRegressionTrainer}) over distributed dataset. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data points (based on the + * <a href="https://en.wikipedia.org/wiki/Iris_flower_data_set"></a>Iris dataset</a>).</p> + * <p> + * After that it trains the model based on the specified data using kNN regression algorithm.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained model to predict what cluster + * does this point belong to, and compares prediction to expected outcome (ground truth).</p> + * <p> + * You can change the test data used in this example or trainer object settings and re-run it to explore + * this algorithm further.</p> */ public class KNNRegressionExample { /** Run example. */ @@ -52,7 +58,7 @@ public class KNNRegressionExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), KNNRegressionExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, double[]> dataCache = new TestCache(ignite).get(data); KNNRegressionTrainer trainer = new KNNRegressionTrainer(); @@ -65,6 +71,10 @@ public class KNNRegressionExample { .withDistanceMeasure(new ManhattanDistance()) .withStrategy(NNStrategy.WEIGHTED); + System.out.println(">>> ---------------------------------"); + System.out.println(">>> | Prediction\t| Ground Truth\t|"); + System.out.println(">>> ---------------------------------"); + int totalAmount = 0; // Calculate mean squared error (MSE) double mse = 0.0; @@ -83,13 +93,19 @@ public class KNNRegressionExample { mae += Math.abs(prediction - groundTruth); totalAmount++; + + System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth); } + System.out.println(">>> ---------------------------------"); + mse = mse / totalAmount; System.out.println("\n>>> Mean squared error (MSE) " + mse); mae = mae / totalAmount; System.out.println("\n>>> Mean absolute error (MAE) " + mae); + + System.out.println(">>> kNN regression over cached dataset usage example completed."); } }); @@ -98,25 +114,6 @@ public class KNNRegressionExample { } } - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); - - return cache; - } - /** The Iris dataset. */ private static final double[][] data = { {199, 125, 256, 6000, 256, 16, 128}, http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java index 7873b12..30f8769 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java @@ -39,6 +39,16 @@ import org.apache.ignite.thread.IgniteThread; /** * Example of using distributed {@link MultilayerPerceptron}. * <p> + * Code in this example launches Ignite grid and fills the cache with simple test data.</p> + * <p> + * After that it defines a layered architecture and a neural network trainer, trains neural network + * and obtains multilayer perceptron model.</p> + * <p> + * Finally, this example loops over the test set, applies the trained model to predict the value and + * compares prediction to expected outcome.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this functionality further.</p> + * <p> * Remote nodes should always be started with special configuration file which * enables P2P class loading: {@code 'ignite.{sh|bat} examples/config/example-ignite.xml'}.</p> * <p> @@ -112,7 +122,11 @@ public class MLPTrainerExample { for (int i = 0; i < 4; i++) { LabeledPoint pnt = trainingSet.get(i); Matrix predicted = mlp.apply(new DenseMatrix(new double[][] {{pnt.x, pnt.y}})); - failCnt += Math.abs(predicted.get(0, 0) - pnt.lb) < 0.5 ? 0 : 1; + + double predictedVal = predicted.get(0, 0); + double lbl = pnt.lb; + System.out.printf(">>> key: %d\t\t predicted: %.4f\t\tlabel: %.4f\n", i, predictedVal, lbl); + failCnt += Math.abs(predictedVal - lbl) < 0.5 ? 0 : 1; } double failRatio = (double)failCnt / totalCnt; http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java index 4c873d9..a1e7672 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java @@ -17,13 +17,13 @@ package org.apache.ignite.examples.ml.preprocessing; -import java.util.Arrays; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; +import org.apache.ignite.examples.ml.util.DatasetHelper; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; import org.apache.ignite.ml.math.functions.IgniteBiFunction; @@ -33,6 +33,15 @@ import org.apache.ignite.ml.preprocessing.binarization.BinarizationTrainer; /** * Example that shows how to use binarization preprocessor to binarize data. + * <p> + * Code in this example launches Ignite grid and fills the cache with simple test data.</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and normalize features.</p> + * <p> + * Finally, it creates the dataset based on the processed data and uses Dataset API to find and output + * various statistical metrics of the data.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this functionality further.</p> */ public class BinarizationExample { /** Run example. */ @@ -54,25 +63,7 @@ public class BinarizationExample { // Creates a cache based simple dataset containing features and providing standard dataset API. try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, persons, preprocessor)) { - // Calculation of the mean value. This calculation will be performed in map-reduce manner. - double[] mean = dataset.mean(); - System.out.println("Mean \n\t" + Arrays.toString(mean)); - - // Calculation of the standard deviation. This calculation will be performed in map-reduce manner. - double[] std = dataset.std(); - System.out.println("Standard deviation \n\t" + Arrays.toString(std)); - - // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner. - double[][] cov = dataset.cov(); - System.out.println("Covariance matrix "); - for (double[] row : cov) - System.out.println("\t" + Arrays.toString(row)); - - // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner. - double[][] corr = dataset.corr(); - System.out.println("Correlation matrix "); - for (double[] row : corr) - System.out.println("\t" + Arrays.toString(row)); + new DatasetHelper(dataset).describe(); } System.out.println(">>> Binarization example completed."); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java index 3ea52d8..eefe063 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java @@ -17,13 +17,13 @@ package org.apache.ignite.examples.ml.preprocessing; -import java.util.Arrays; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; +import org.apache.ignite.examples.ml.util.DatasetHelper; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; import org.apache.ignite.ml.math.functions.IgniteBiFunction; @@ -32,7 +32,17 @@ import org.apache.ignite.ml.math.primitives.vector.VectorUtils; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; /** - * Example that shows how to use Imputing preprocessor to impute the missing value in the given data. + * Example that shows how to use <a href="https://en.wikipedia.org/wiki/Imputation_(statistics)">Imputing</a> + * preprocessor to impute the missing value in the given data. + * <p> + * Code in this example launches Ignite grid and fills the cache with simple test data.</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and impute missing values.</p> + * <p> + * Finally, it creates the dataset based on the processed data and uses Dataset API to find and output + * various statistical metrics of the data.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this functionality further.</p> */ public class ImputingExample { /** Run example. */ @@ -54,25 +64,7 @@ public class ImputingExample { // Creates a cache based simple dataset containing features and providing standard dataset API. try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, persons, preprocessor)) { - // Calculation of the mean value. This calculation will be performed in map-reduce manner. - double[] mean = dataset.mean(); - System.out.println("Mean \n\t" + Arrays.toString(mean)); - - // Calculation of the standard deviation. This calculation will be performed in map-reduce manner. - double[] std = dataset.std(); - System.out.println("Standard deviation \n\t" + Arrays.toString(std)); - - // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner. - double[][] cov = dataset.cov(); - System.out.println("Covariance matrix "); - for (double[] row : cov) - System.out.println("\t" + Arrays.toString(row)); - - // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner. - double[][] corr = dataset.corr(); - System.out.println("Correlation matrix "); - for (double[] row : corr) - System.out.println("\t" + Arrays.toString(row)); + new DatasetHelper(dataset).describe(); } System.out.println(">>> Imputing example completed."); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingWithMostFrequentValuesExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingWithMostFrequentValuesExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingWithMostFrequentValuesExample.java index 47a5728..8e39409 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingWithMostFrequentValuesExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingWithMostFrequentValuesExample.java @@ -17,13 +17,13 @@ package org.apache.ignite.examples.ml.preprocessing; -import java.util.Arrays; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; +import org.apache.ignite.examples.ml.util.DatasetHelper; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; import org.apache.ignite.ml.math.functions.IgniteBiFunction; @@ -33,13 +33,23 @@ import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; import org.apache.ignite.ml.preprocessing.imputing.ImputingStrategy; /** - * Example that shows how to use Imputing preprocessor to impute the missing values in the given data. + * Example that shows how to use <a href="https://en.wikipedia.org/wiki/Imputation_(statistics)">Imputing</a> + * preprocessor to impute the missing values in the given data with most frequent values. + * <p> + * Code in this example launches Ignite grid and fills the cache with simple test data.</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and impute missing values.</p> + * <p> + * Finally, it creates the dataset based on the processed data and uses Dataset API to find and output + * various statistical metrics of the data.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this functionality further.</p> */ public class ImputingWithMostFrequentValuesExample { /** Run example. */ public static void main(String[] args) throws Exception { try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { - System.out.println(">>> Imputing example started."); + System.out.println(">>> Imputing with most frequent values example started."); IgniteCache<Integer, Person> persons = createCache(ignite); @@ -56,28 +66,10 @@ public class ImputingWithMostFrequentValuesExample { // Creates a cache based simple dataset containing features and providing standard dataset API. try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, persons, preprocessor)) { - // Calculation of the mean value. This calculation will be performed in map-reduce manner. - double[] mean = dataset.mean(); - System.out.println("Mean \n\t" + Arrays.toString(mean)); - - // Calculation of the standard deviation. This calculation will be performed in map-reduce manner. - double[] std = dataset.std(); - System.out.println("Standard deviation \n\t" + Arrays.toString(std)); - - // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner. - double[][] cov = dataset.cov(); - System.out.println("Covariance matrix "); - for (double[] row : cov) - System.out.println("\t" + Arrays.toString(row)); - - // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner. - double[][] corr = dataset.corr(); - System.out.println("Correlation matrix "); - for (double[] row : corr) - System.out.println("\t" + Arrays.toString(row)); + new DatasetHelper(dataset).describe(); } - System.out.println(">>> Imputing example completed."); + System.out.println(">>> Imputing with most frequent values example completed."); } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java index 3f3b0d6..c1b32ab 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java @@ -17,13 +17,13 @@ package org.apache.ignite.examples.ml.preprocessing; -import java.util.Arrays; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; +import org.apache.ignite.examples.ml.util.DatasetHelper; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; import org.apache.ignite.ml.math.functions.IgniteBiFunction; @@ -33,10 +33,19 @@ import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer; /** * Example that shows how to use MinMaxScaler preprocessor to scale the given data. - * + * <p> * Machine learning preprocessors are built as a chain. Most often a first preprocessor is a feature extractor as shown * in this example. The second preprocessor here is a MinMaxScaler preprocessor which is built on top of the feature - * extractor and represents a chain of itself and the underlying feature extractor. + * extractor and represents a chain of itself and the underlying feature extractor.</p> + * <p> + * Code in this example launches Ignite grid and fills the cache with simple test data.</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and normalize their values.</p> + * <p> + * Finally, it creates the dataset based on the processed data and uses Dataset API to find and output + * various statistical metrics of the data.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this functionality further.</p> */ public class MinMaxScalerExample { /** Run example. */ @@ -58,25 +67,7 @@ public class MinMaxScalerExample { // Creates a cache based simple dataset containing features and providing standard dataset API. try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, persons, preprocessor)) { - // Calculation of the mean value. This calculation will be performed in map-reduce manner. - double[] mean = dataset.mean(); - System.out.println("Mean \n\t" + Arrays.toString(mean)); - - // Calculation of the standard deviation. This calculation will be performed in map-reduce manner. - double[] std = dataset.std(); - System.out.println("Standard deviation \n\t" + Arrays.toString(std)); - - // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner. - double[][] cov = dataset.cov(); - System.out.println("Covariance matrix "); - for (double[] row : cov) - System.out.println("\t" + Arrays.toString(row)); - - // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner. - double[][] corr = dataset.corr(); - System.out.println("Correlation matrix "); - for (double[] row : corr) - System.out.println("\t" + Arrays.toString(row)); + new DatasetHelper(dataset).describe(); } System.out.println(">>> Normalization example completed."); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java index b8581d0..3159845 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java @@ -17,13 +17,13 @@ package org.apache.ignite.examples.ml.preprocessing; -import java.util.Arrays; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; +import org.apache.ignite.examples.ml.util.DatasetHelper; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; import org.apache.ignite.ml.math.functions.IgniteBiFunction; @@ -33,6 +33,15 @@ import org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer; /** * Example that shows how to use normalization preprocessor to normalize each vector in the given data. + * <p> + * Code in this example launches Ignite grid and fills the cache with simple test data.</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and normalize their values.</p> + * <p> + * Finally, it creates the dataset based on the processed data and uses Dataset API to find and output + * various statistical metrics of the data.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this functionality further.</p> */ public class NormalizationExample { /** Run example. */ @@ -55,25 +64,7 @@ public class NormalizationExample { // Creates a cache based simple dataset containing features and providing standard dataset API. try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, persons, preprocessor)) { - // Calculation of the mean value. This calculation will be performed in map-reduce manner. - double[] mean = dataset.mean(); - System.out.println("Mean \n\t" + Arrays.toString(mean)); - - // Calculation of the standard deviation. This calculation will be performed in map-reduce manner. - double[] std = dataset.std(); - System.out.println("Standard deviation \n\t" + Arrays.toString(std)); - - // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner. - double[][] cov = dataset.cov(); - System.out.println("Covariance matrix "); - for (double[] row : cov) - System.out.println("\t" + Arrays.toString(row)); - - // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner. - double[][] corr = dataset.corr(); - System.out.println("Correlation matrix "); - for (double[] row : corr) - System.out.println("\t" + Arrays.toString(row)); + new DatasetHelper(dataset).describe(); } System.out.println(">>> Normalization example completed."); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java index 47cbb76..efababf 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.regression.linear; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.math.primitives.vector.VectorUtils; import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector; import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer; @@ -34,9 +32,16 @@ import org.apache.ignite.ml.regressions.linear.LinearRegressionModel; import org.apache.ignite.thread.IgniteThread; /** - * Run linear regression model over cached dataset. - * - * @see LinearRegressionLSQRTrainer + * Run linear regression model ({@link LinearRegressionLSQRTrainer}) over cached dataset. + * <p> + * Code in this example launches Ignite grid and fills the cache with simple test data.</p> + * <p> + * After that it trains the linear regression model based on the specified data.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained model to predict the target value + * and compares prediction to expected outcome (ground truth).</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class LinearRegressionLSQRTrainerExample { /** */ @@ -106,7 +111,7 @@ public class LinearRegressionLSQRTrainerExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), LinearRegressionLSQRTrainerExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, double[]> dataCache = new TestCache(ignite).get(data); System.out.println(">>> Create new linear regression trainer object."); LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer(); @@ -138,6 +143,8 @@ public class LinearRegressionLSQRTrainerExample { } System.out.println(">>> ---------------------------------"); + + System.out.println(">>> Linear regression model over cache based dataset usage example completed."); }); igniteThread.start(); @@ -145,23 +152,4 @@ public class LinearRegressionLSQRTrainerExample { igniteThread.join(); } } - - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); - - return cache; - } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java index 03c82ef..39f3771 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.regression.linear; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.primitives.vector.Vector; import org.apache.ignite.ml.math.primitives.vector.VectorUtils; @@ -37,11 +35,19 @@ import org.apache.ignite.ml.regressions.linear.LinearRegressionModel; import org.apache.ignite.thread.IgniteThread; /** - * Run linear regression model over cached dataset. - * - * @see LinearRegressionLSQRTrainer - * @see MinMaxScalerTrainer - * @see MinMaxScalerPreprocessor + * Run linear regression model ({@link LinearRegressionLSQRTrainer}) over cached dataset that was created using + * a minmaxscaling preprocessor ({@link MinMaxScalerTrainer}, {@link MinMaxScalerPreprocessor}). + * <p> + * Code in this example launches Ignite grid, fills the cache with simple test data, and defines minmaxscaling + * trainer and preprocessor.</p> + * <p> + * After that it trains the linear regression model based on the specified data that has been processed + * using minmaxscaling.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained model to predict predict the target + * value and compares prediction to expected outcome (ground truth).</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class LinearRegressionLSQRTrainerWithMinMaxScalerExample { /** */ @@ -104,14 +110,14 @@ public class LinearRegressionLSQRTrainerWithMinMaxScalerExample { /** Run example. */ public static void main(String[] args) throws InterruptedException { System.out.println(); - System.out.println(">>> Linear regression model over cached dataset usage example started."); + System.out.println(">>> Linear regression model with minmaxscaling preprocessor over cached dataset usage example started."); // Start ignite grid. try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { System.out.println(">>> Ignite grid started."); IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), LinearRegressionLSQRTrainerWithMinMaxScalerExample.class.getSimpleName(), () -> { - IgniteCache<Integer, Vector> dataCache = getTestCache(ignite); + IgniteCache<Integer, Vector> dataCache = new TestCache(ignite).getVectors(data); System.out.println(">>> Create new minmaxscaling trainer object."); MinMaxScalerTrainer<Integer, Vector> normalizationTrainer = new MinMaxScalerTrainer<>(); @@ -151,6 +157,8 @@ public class LinearRegressionLSQRTrainerWithMinMaxScalerExample { } System.out.println(">>> ---------------------------------"); + + System.out.println(">>> Linear regression model with minmaxscaling preprocessor over cache based dataset usage example completed."); }); igniteThread.start(); @@ -158,23 +166,4 @@ public class LinearRegressionLSQRTrainerWithMinMaxScalerExample { igniteThread.join(); } } - - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, Vector> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, Vector> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, Vector> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, VectorUtils.of(data[i])); - - return cache; - } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java index a518c87..bc7cd6f 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.regression.linear; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.math.primitives.vector.VectorUtils; import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector; import org.apache.ignite.ml.nn.UpdatesStrategy; @@ -37,9 +35,18 @@ import org.apache.ignite.ml.regressions.linear.LinearRegressionSGDTrainer; import org.apache.ignite.thread.IgniteThread; /** - * Run linear regression model over cached dataset. - * - * @see LinearRegressionSGDTrainer + * Run linear regression model based on stochastic gradient descent algorithm ({@link LinearRegressionSGDTrainer}) + * over cached dataset. + * <p> + * Code in this example launches Ignite grid and fills the cache with simple test data.</p> + * <p> + * After that it trains the linear regression model based on stochastic gradient descent algorithm using + * the specified data.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained model to predict the target value + * and compares prediction to expected outcome (ground truth).</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class LinearRegressionSGDTrainerExample { /** */ @@ -109,7 +116,7 @@ public class LinearRegressionSGDTrainerExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), LinearRegressionSGDTrainerExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, double[]> dataCache = new TestCache(ignite).get(data); System.out.println(">>> Create new linear regression trainer object."); LinearRegressionSGDTrainer<?> trainer = new LinearRegressionSGDTrainer<>(new UpdatesStrategy<>( @@ -145,6 +152,8 @@ public class LinearRegressionSGDTrainerExample { } System.out.println(">>> ---------------------------------"); + + System.out.println(">>> Linear regression model over cache based dataset usage example completed."); }); igniteThread.start(); @@ -152,23 +161,4 @@ public class LinearRegressionSGDTrainerExample { igniteThread.join(); } } - - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); - - return cache; - } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerExample.java index 5f3350b..1fe38f3 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.regression.logistic.binary; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.math.primitives.vector.VectorUtils; import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector; import org.apache.ignite.ml.nn.UpdatesStrategy; @@ -37,9 +35,19 @@ import org.apache.ignite.ml.regressions.logistic.binomial.LogisticRegressionSGDT import org.apache.ignite.thread.IgniteThread; /** - * Run logistic regression model over distributed cache. - * - * @see LogisticRegressionSGDTrainer + * Run logistic regression model based on stochastic gradient descent algorithm ({@link LogisticRegressionSGDTrainer}) + * over distributed cache. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data points (based on the + * <a href="https://en.wikipedia.org/wiki/Iris_flower_data_set"></a>Iris dataset</a>).</p> + * <p> + * After that it trains the logistic regression model based on the specified data.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained model to predict the target value, + * compares prediction to expected outcome (ground truth), and builds + * <a href="https://en.wikipedia.org/wiki/Confusion_matrix">confusion matrix</a>.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class LogisticRegressionSGDTrainerExample { /** Run example. */ @@ -52,7 +60,7 @@ public class LogisticRegressionSGDTrainerExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), LogisticRegressionSGDTrainerExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, double[]> dataCache = new TestCache(ignite).get(data); System.out.println(">>> Create new logistic regression trainer object."); LogisticRegressionSGDTrainer<?> trainer = new LogisticRegressionSGDTrainer<>(new UpdatesStrategy<>( @@ -105,6 +113,8 @@ public class LogisticRegressionSGDTrainerExample { System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx)); System.out.println(">>> ---------------------------------"); + + System.out.println(">>> Logistic regression model over partitioned dataset usage example completed."); }); igniteThread.start(); @@ -112,25 +122,6 @@ public class LogisticRegressionSGDTrainerExample { igniteThread.join(); } } - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); - - return cache; - } - /** The 1st and 2nd classes from the Iris dataset. */ private static final double[][] data = { http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java index 351f1c6..e670f01 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.regression.logistic.multiclass; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.primitives.vector.Vector; import org.apache.ignite.ml.math.primitives.vector.VectorUtils; @@ -37,14 +35,23 @@ import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalcula import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer; import org.apache.ignite.ml.regressions.logistic.multiclass.LogRegressionMultiClassModel; import org.apache.ignite.ml.regressions.logistic.multiclass.LogRegressionMultiClassTrainer; -import org.apache.ignite.ml.svm.SVMLinearMultiClassClassificationModel; import org.apache.ignite.thread.IgniteThread; /** - * Run Logistic Regression multi-class classification trainer over distributed dataset to build two models: - * one with minmaxscaling and one without minmaxscaling. - * - * @see SVMLinearMultiClassClassificationModel + * Run Logistic Regression multi-class classification trainer ({@link LogRegressionMultiClassModel}) over distributed + * dataset to build two models: one with minmaxscaling and one without minmaxscaling. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data points (preprocessed + * <a href="https://archive.ics.uci.edu/ml/datasets/Glass+Identification">Glass dataset</a>).</p> + * <p> + * After that it trains two logistic regression models based on the specified data - one model is with minmaxscaling + * and one without minmaxscaling.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained models to predict the target value, + * compares prediction to expected outcome (ground truth), and builds + * <a href="https://en.wikipedia.org/wiki/Confusion_matrix">confusion matrices</a>.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class LogRegressionMultiClassClassificationExample { /** Run example. */ @@ -57,7 +64,7 @@ public class LogRegressionMultiClassClassificationExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), LogRegressionMultiClassClassificationExample.class.getSimpleName(), () -> { - IgniteCache<Integer, Vector> dataCache = getTestCache(ignite); + IgniteCache<Integer, Vector> dataCache = new TestCache(ignite).getVectors(data); LogRegressionMultiClassTrainer<?> trainer = new LogRegressionMultiClassTrainer<>() .withUpdatesStgy(new UpdatesStrategy<>( @@ -157,6 +164,8 @@ public class LogRegressionMultiClassClassificationExample { System.out.println("\n>>> Absolute amount of errors " + amountOfErrorsWithNormalization); System.out.println("\n>>> Accuracy " + (1 - amountOfErrorsWithNormalization / (double)totalAmount)); System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtxWithNormalization)); + + System.out.println(">>> Logistic Regression Multi-class classification model over cached dataset usage example completed."); } }); @@ -165,25 +174,6 @@ public class LogRegressionMultiClassClassificationExample { } } - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, Vector> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, Vector> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, Vector> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, VectorUtils.of(data[i])); - - return cache; - } - /** The preprocessed Glass dataset from the Machine Learning Repository https://archive.ics.uci.edu/ml/datasets/Glass+Identification * There are 3 classes with labels: 1 {building_windows_float_processed}, 3 {vehicle_windows_float_processed}, 7 {headlamps}. * Feature names: 'Na-Sodium', 'Mg-Magnesium', 'Al-Aluminum', 'Ba-Barium', 'Fe-Iron'. http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java index 83656c5..f1b5650 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java @@ -33,9 +33,11 @@ import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.thread.IgniteThread; /** - * Run decision tree classification with cross validation. - * - * @see CrossValidation + * Run decision tree classification with cross validation ({@link CrossValidation}). + * <p> + * Code in this example launches Ignite grid and fills the cache with pseudo random training data points.</p> + * <p> + * After that it creates classification trainer and computes cross-validated metrics based on the training set.</p> */ public class CrossValidationExample { /** http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java index e310ded..53bd6b5 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.selection.split; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.math.primitives.vector.VectorUtils; import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector; import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer; @@ -36,9 +34,18 @@ import org.apache.ignite.ml.selection.split.TrainTestSplit; import org.apache.ignite.thread.IgniteThread; /** - * Run linear regression model over dataset splitted on train and test subsets. - * - * @see TrainTestDatasetSplitter + * Run linear regression model over dataset split on train and test subsets ({@link TrainTestDatasetSplitter}). + * <p> + * Code in this example launches Ignite grid and fills the cache with simple test data.</p> + * <p> + * After that it creates dataset splitter and trains the linear regression model based on the specified data using + * this splitter.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained model to predict the target value + * and compares prediction to expected outcome (ground truth).</p> + * <p> + * You can change the test data and split parameters used in this example and re-run it to explore this functionality + * further.</p> */ public class TrainTestDatasetSplitterExample { /** */ @@ -108,11 +115,12 @@ public class TrainTestDatasetSplitterExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), TrainTestDatasetSplitterExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, double[]> dataCache = new TestCache(ignite).get(data); System.out.println(">>> Create new linear regression trainer object."); LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer(); + System.out.println(">>> Create new training dataset splitter object."); TrainTestSplit<Integer, double[]> split = new TrainTestDatasetSplitter<Integer, double[]>() .split(0.75); @@ -147,6 +155,8 @@ public class TrainTestDatasetSplitterExample { } System.out.println(">>> ---------------------------------"); + + System.out.println(">>> Linear regression model over cache based dataset usage example completed."); }); igniteThread.start(); @@ -154,23 +164,4 @@ public class TrainTestDatasetSplitterExample { igniteThread.join(); } } - - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 3)); - - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); - - return cache; - } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java index 855517d..b923f4f 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.svm.binary; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.math.primitives.vector.VectorUtils; import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector; import org.apache.ignite.ml.svm.SVMLinearBinaryClassificationModel; @@ -34,9 +32,18 @@ import org.apache.ignite.ml.svm.SVMLinearBinaryClassificationTrainer; import org.apache.ignite.thread.IgniteThread; /** - * Run SVM binary-class classification model over distributed dataset. - * - * @see SVMLinearBinaryClassificationModel + * Run SVM binary-class classification model ({@link SVMLinearBinaryClassificationModel}) over distributed dataset. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data points (based on the + * <a href="https://en.wikipedia.org/wiki/Iris_flower_data_set"></a>Iris dataset</a>).</p> + * <p> + * After that it trains the model based on the specified data using KMeans algorithm.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained model to predict what cluster + * does this point belong to, compares prediction to expected outcome (ground truth), and builds + * <a href="https://en.wikipedia.org/wiki/Confusion_matrix">confusion matrix</a>.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class SVMBinaryClassificationExample { /** Run example. */ @@ -49,7 +56,7 @@ public class SVMBinaryClassificationExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), SVMBinaryClassificationExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, double[]> dataCache = new TestCache(ignite).get(data); SVMLinearBinaryClassificationTrainer trainer = new SVMLinearBinaryClassificationTrainer(); @@ -99,6 +106,8 @@ public class SVMBinaryClassificationExample { } System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx)); + + System.out.println(">>> Linear regression model over cache based dataset usage example completed."); }); igniteThread.start(); @@ -106,26 +115,6 @@ public class SVMBinaryClassificationExample { } } - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); - - return cache; - } - - /** The 1st and 2nd classes from the Iris dataset. */ private static final double[][] data = { {-1, 5.1, 3.5, 1.4, 0.2},