Repository: ignite Updated Branches: refs/heads/master 0a19d010f -> fea694fef
http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java index b9e24c0..b7ca448 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.svm.multiclass; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.primitives.vector.Vector; import org.apache.ignite.ml.math.primitives.vector.VectorUtils; @@ -37,10 +35,20 @@ import org.apache.ignite.ml.svm.SVMLinearMultiClassClassificationTrainer; import org.apache.ignite.thread.IgniteThread; /** - * Run SVM multi-class classification trainer over distributed dataset to build two models: - * one with minmaxscaling and one without minmaxscaling. - * - * @see SVMLinearMultiClassClassificationModel + * Run SVM multi-class classification trainer ({@link SVMLinearMultiClassClassificationModel}) over distributed dataset + * to build two models: one with minmaxscaling and one without minmaxscaling. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data points (preprocessed + * <a href="https://archive.ics.uci.edu/ml/datasets/Glass+Identification">Glass dataset</a>).</p> + * <p> + * After that it trains two SVM multi-class models based on the specified data - one model is with minmaxscaling + * and one without minmaxscaling.</p> + * <p> + * Finally, this example loops over the test set of data points, applies the trained models to predict what cluster + * does this point belong to, compares prediction to expected outcome (ground truth), and builds + * <a href="https://en.wikipedia.org/wiki/Confusion_matrix">confusion matrix</a>.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class SVMMultiClassClassificationExample { /** Run example. */ @@ -53,7 +61,7 @@ public class SVMMultiClassClassificationExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), SVMMultiClassClassificationExample.class.getSimpleName(), () -> { - IgniteCache<Integer, Vector> dataCache = getTestCache(ignite); + IgniteCache<Integer, Vector> dataCache = new TestCache(ignite).getVectors(data); SVMLinearMultiClassClassificationTrainer trainer = new SVMLinearMultiClassClassificationTrainer(); @@ -144,6 +152,8 @@ public class SVMMultiClassClassificationExample { System.out.println("\n>>> Absolute amount of errors " + amountOfErrorsWithNormalization); System.out.println("\n>>> Accuracy " + (1 - amountOfErrorsWithNormalization / (double)totalAmount)); System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtxWithNormalization)); + + System.out.println(">>> Linear regression model over cache based dataset usage example completed."); } }); @@ -152,25 +162,6 @@ public class SVMMultiClassClassificationExample { } } - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, Vector> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, Vector> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, Vector> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, VectorUtils.of(data[i])); - - return cache; - } - /** The preprocessed Glass dataset from the Machine Learning Repository https://archive.ics.uci.edu/ml/datasets/Glass+Identification * There are 3 classes with labels: 1 {building_windows_float_processed}, 3 {vehicle_windows_float_processed}, 7 {headlamps}. * Feature names: 'Na-Sodium', 'Mg-Magnesium', 'Al-Aluminum', 'Ba-Barium', 'Fe-Iron'. http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java index f5a804d..28a5fbc 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java @@ -30,6 +30,15 @@ import org.apache.ignite.thread.IgniteThread; /** * Example of using distributed {@link DecisionTreeClassificationTrainer}. + * <p> + * Code in this example launches Ignite grid and fills the cache with pseudo random training data points.</p> + * <p> + * After that it creates classification trainer and uses it to train the model on the training set.</p> + * <p> + * Finally, this example loops over the pseudo randomly generated test set of data points, applies the trained model, + * and compares prediction to expected outcome.</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class DecisionTreeClassificationTrainerExample { /** @@ -71,14 +80,20 @@ public class DecisionTreeClassificationTrainerExample { (k, v) -> v.lb ); + System.out.println(">>> Decision tree classification model: " + mdl); + // Calculate score. int correctPredictions = 0; for (int i = 0; i < 1000; i++) { LabeledPoint pnt = generatePoint(rnd); double prediction = mdl.apply(VectorUtils.of(pnt.x, pnt.y)); + double lbl = pnt.lb; + + if (i %50 == 1) + System.out.printf(">>> test #: %d\t\t predicted: %.4f\t\tlabel: %.4f\n", i, prediction, lbl); - if (prediction == pnt.lb) + if (prediction == lbl) correctPredictions++; } http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java index 3ebc56a..301df10 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java @@ -29,6 +29,16 @@ import org.apache.ignite.thread.IgniteThread; /** * Example of using distributed {@link DecisionTreeRegressionTrainer}. + * <p> + * Code in this example launches Ignite grid and fills the cache with generated test data points ({@code sin(x)} + * on interval {@code [0, 10)}).</p> + * <p> + * After that it creates classification trainer and uses it to train the model on the training set.</p> + * <p> + * Finally, this example loops over the test data points, applies the trained model, and compares prediction + * to expected outcome (ground truth).</p> + * <p> + * You can change the test data used in this example and re-run it to explore this algorithm further.</p> */ public class DecisionTreeRegressionTrainerExample { /** @@ -92,7 +102,7 @@ public class DecisionTreeRegressionTrainerExample { } /** - * Generates {@code sin(x)} on interval [0, 10) and loads into the specified cache. + * Generates {@code sin(x)} on interval {@code [0, 10)} and loads into the specified cache. */ private static void generatePoints(IgniteCache<Integer, Point> trainingSet) { for (int i = 0; i < 1000; i++) { http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java index 384d2d9..130b91a 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java @@ -32,10 +32,10 @@ import org.jetbrains.annotations.NotNull; /** * Example represents a solution for the task of classification learning based on - * Gradient Boosting on trees implementation. It shows an initialization of {@link org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer}, + * Gradient Boosting on trees implementation. It shows an initialization of {@link GDBBinaryClassifierOnTreesTrainer}, * initialization of Ignite Cache, learning step and comparing of predicted and real values. - * - * In this example dataset is creating automatically by meander function f(x) = [sin(x) > 0]. + * <p> + * In this example dataset is created automatically by meander function {@code f(x) = [sin(x) > 0]}.</p> */ public class GDBOnTreesClassificationTrainerExample { /** @@ -44,6 +44,8 @@ public class GDBOnTreesClassificationTrainerExample { * @param args Command line arguments, none required. */ public static void main(String... args) throws InterruptedException { + System.out.println(); + System.out.println(">>> GDB classification trainer example started."); // Start ignite grid. try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { System.out.println(">>> Ignite grid started."); @@ -100,7 +102,7 @@ public class GDBOnTreesClassificationTrainerExample { /** * Fill meander-like training data. * - * @param ignite Ignite. + * @param ignite Ignite instance. * @param trainingSetCfg Training set config. */ @NotNull private static IgniteCache<Integer, double[]> fillTrainingData(Ignite ignite, http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesRegressionTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesRegressionTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesRegressionTrainerExample.java index fa7a0d4..31dd2b0 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesRegressionTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesRegressionTrainerExample.java @@ -32,10 +32,10 @@ import org.jetbrains.annotations.NotNull; /** * Example represents a solution for the task of regression learning based on - * Gradient Boosting on trees implementation. It shows an initialization of {@link org.apache.ignite.ml.tree.boosting.GDBRegressionOnTreesTrainer}, + * Gradient Boosting on trees implementation. It shows an initialization of {@link GDBRegressionOnTreesTrainer}, * initialization of Ignite Cache, learning step and comparing of predicted and real values. - * - * In this example dataset is creating automatically by parabolic function f(x) = x^2. + * <p> + * In this example dataset is created automatically by parabolic function {@code f(x) = x^2}.</p> */ public class GDBOnTreesRegressionTrainerExample { /** @@ -44,6 +44,8 @@ public class GDBOnTreesRegressionTrainerExample { * @param args Command line arguments, none required. */ public static void main(String... args) throws InterruptedException { + System.out.println(); + System.out.println(">>> GDB regression trainer example started."); // Start ignite grid. try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { System.out.println(">>> Ignite grid started."); @@ -79,7 +81,7 @@ public class GDBOnTreesRegressionTrainerExample { System.out.println(">>> ---------------------------------"); - System.out.println(">>> GDB Regression trainer example completed."); + System.out.println(">>> GDB regression trainer example completed."); }); igniteThread.start(); @@ -98,9 +100,9 @@ public class GDBOnTreesRegressionTrainerExample { } /** - * Fill parabola training data. + * Fill parabolic training data. * - * @param ignite Ignite. + * @param ignite Ignite instance. * @param trainingSetCfg Training set config. */ @NotNull private static IgniteCache<Integer, double[]> fillTrainingData(Ignite ignite, http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java index 33c3a5f..528adc9 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.tree.randomforest; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.composition.ModelsComposition; import org.apache.ignite.ml.math.primitives.vector.VectorUtils; import org.apache.ignite.ml.tree.randomforest.RandomForestClassifierTrainer; @@ -55,7 +53,7 @@ public class RandomForestClassificationExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), RandomForestClassificationExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, double[]> dataCache = new TestCache(ignite).get(data); RandomForestClassifierTrainer trainer = new RandomForestClassifierTrainer(13, 4, 101, 0.3, 2, 0); @@ -92,25 +90,6 @@ public class RandomForestClassificationExample { } /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); - - return cache; - } - - /** * The Wine dataset. */ private static final double[][] data = { http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java index c803354..3cd4cd2 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java @@ -18,15 +18,13 @@ package org.apache.ignite.examples.ml.tree.randomforest; import java.util.Arrays; -import java.util.UUID; import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.util.TestCache; import org.apache.ignite.ml.composition.ModelsComposition; import org.apache.ignite.ml.environment.LearningEnvironment; import org.apache.ignite.ml.environment.logging.ConsoleLogger; @@ -60,7 +58,7 @@ public class RandomForestRegressionExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), RandomForestRegressionExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, double[]> dataCache = new TestCache(ignite).get(data); RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(13, 4, 101, 0.3, 2, 0); trainer.setEnvironment(LearningEnvironment.builder() @@ -106,25 +104,6 @@ public class RandomForestRegressionExample { } /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); - - return cache; - } - - /** * The Boston housing dataset. */ private static final double[][] data = { http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java index 460ca67..78ec9f5 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java @@ -31,21 +31,30 @@ import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.thread.IgniteThread; /** - * Usage of DecisionTreeClassificationTrainer to predict death in the disaster. - * - * Extract 3 features "pclass", "sibsp", "parch" to use in prediction. + * Usage of {@link DecisionTreeClassificationTrainer} to predict death in the disaster. + * <p> + * Extract 3 features "pclass", "sibsp", "parch" to use in prediction.</p> + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it trains the model based on the specified data using decision tree classification.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> */ public class Step_1_Read_and_Learn { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 1 (read and learn) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_1_Read_and_Learn.class.getSimpleName(), () -> { try { - IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - IgniteBiFunction<Integer, Object[], Vector> featureExtractor = (k, v) -> VectorUtils.of((double) v[0], (double) v[5], (double) v[6]); + IgniteBiFunction<Integer, Object[], Vector> featureExtractor + = (k, v) -> VectorUtils.of((double) v[0], (double) v[5], (double) v[6]); IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; @@ -58,6 +67,8 @@ public class Step_1_Read_and_Learn { lbExtractor ); + System.out.println("\n>>> Trained model: " + mdl); + double accuracy = Evaluator.evaluate( dataCache, mdl, @@ -69,6 +80,7 @@ public class Step_1_Read_and_Learn { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + System.out.println(">>> Tutorial step 1 (read and learn) example completed."); } catch (FileNotFoundException e) { e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java index 8127a51..f86e1b6 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java @@ -32,18 +32,31 @@ import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.thread.IgniteThread; /** - * Usage of imputer to fill missed data (Double.NaN) values in the chosen columns. + * Usage of {@link ImputerTrainer} to fill missed data ({@code Double.NaN}) values in the chosen columns. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and + * <a href="https://en.wikipedia.org/wiki/Imputation_(statistics)">impute</a> missing values.</p> + * <p> + * Then, it trains the model based on the processed data using decision tree classification.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> */ public class Step_2_Imputing { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 2 (imputing) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_2_Imputing.class.getSimpleName(), () -> { try { IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - IgniteBiFunction<Integer, Object[], Vector> featureExtractor = (k, v) -> VectorUtils.of((double) v[0], (double) v[5], (double) v[6]); + IgniteBiFunction<Integer, Object[], Vector> featureExtractor + = (k, v) -> VectorUtils.of((double) v[0], (double) v[5], (double) v[6]); IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; @@ -63,6 +76,8 @@ public class Step_2_Imputing { lbExtractor ); + System.out.println("\n>>> Trained model: " + mdl); + double accuracy = Evaluator.evaluate( dataCache, mdl, @@ -73,6 +88,8 @@ public class Step_2_Imputing { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 2 (imputing) example completed."); } catch (FileNotFoundException e) { e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java index e623083..54726ec 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java @@ -23,8 +23,8 @@ import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.primitives.vector.Vector; -import org.apache.ignite.ml.preprocessing.encoding.EncoderType; import org.apache.ignite.ml.preprocessing.encoding.EncoderTrainer; +import org.apache.ignite.ml.preprocessing.encoding.EncoderType; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator; import org.apache.ignite.ml.selection.scoring.metric.Accuracy; @@ -33,13 +33,25 @@ import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.thread.IgniteThread; /** - * Let's add two categorial features "sex", "embarked" to predict more precisely. - * - * To encode categorial features the StringEncoderTrainer will be used. + * Let's add two categorial features "sex", "embarked" to predict more precisely than in {@link Step_1_Read_and_Learn}. + * <p> + * To encode categorial features the String kind type of {@link EncoderTrainer} will be used.</p> + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and encode string values (categories) + * to double values in specified range.</p> + * <p> + * Then, it trains the model based on the processed data using decision tree classification.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> */ public class Step_3_Categorial { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 3 (categorial) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_3_Categorial.class.getSimpleName(), () -> { @@ -77,6 +89,8 @@ public class Step_3_Categorial { lbExtractor ); + System.out.println("\n>>> Trained model: " + mdl); + double accuracy = Evaluator.evaluate( dataCache, mdl, @@ -87,6 +101,8 @@ public class Step_3_Categorial { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 3 (categorial) example completed."); } catch (FileNotFoundException e) { e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial_with_One_Hot_Encoder.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial_with_One_Hot_Encoder.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial_with_One_Hot_Encoder.java index d80f647..4a10c29 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial_with_One_Hot_Encoder.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial_with_One_Hot_Encoder.java @@ -23,8 +23,8 @@ import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.primitives.vector.Vector; -import org.apache.ignite.ml.preprocessing.encoding.EncoderType; import org.apache.ignite.ml.preprocessing.encoding.EncoderTrainer; +import org.apache.ignite.ml.preprocessing.encoding.EncoderType; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator; import org.apache.ignite.ml.selection.scoring.metric.Accuracy; @@ -33,13 +33,26 @@ import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.thread.IgniteThread; /** - * Let's add two categorial features "sex", "embarked" to predict more precisely. - * - * To encode categorial features the StringEncoderTrainer will be used. + * Let's add two categorial features "sex", "embarked" to predict more precisely than in {@link Step_1_Read_and_Learn}.. + * <p> + * To encode categorial features the {@link EncoderTrainer} of the + * <a href="https://en.wikipedia.org/wiki/One-hot">One-hot</a> type will be used.</p> + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and encode string values (categories) + * to double values in specified range.</p> + * <p> + * Then, it trains the model based on the processed data using decision tree classification.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> */ public class Step_3_Categorial_with_One_Hot_Encoder { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 3 (categorial with One-hot encoder) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_3_Categorial_with_One_Hot_Encoder.class.getSimpleName(), () -> { @@ -79,6 +92,8 @@ public class Step_3_Categorial_with_One_Hot_Encoder { lbExtractor ); + System.out.println("\n>>> Trained model: " + mdl); + double accuracy = Evaluator.evaluate( dataCache, mdl, @@ -89,6 +104,9 @@ public class Step_3_Categorial_with_One_Hot_Encoder { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 3 (categorial with One-hot encoder) example started."); + } catch (FileNotFoundException e) { e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java index 2ea9860..166de44 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java @@ -33,11 +33,23 @@ import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.thread.IgniteThread; /** - * Add yet two numerical features "age", "fare" to improve our model. + * Add yet two numerical features "age", "fare" to improve our model over {@link Step_3_Categorial}. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and encode string values (categories) + * to double values in specified range.</p> + * <p> + * Then, it trains the model based on the processed data using decision tree classification.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> */ public class Step_4_Add_age_fare { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 4 (add age and fare) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_4_Add_age_fare.class.getSimpleName(), () -> { @@ -45,7 +57,7 @@ public class Step_4_Add_age_fare { IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". IgniteBiFunction<Integer, Object[], Object[]> featureExtractor = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; @@ -54,7 +66,7 @@ public class Step_4_Add_age_fare { IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() .withEncoderType(EncoderType.STRING_ENCODER) .encodeFeature(1) - .encodeFeature(6) // <--- Changed index here + .encodeFeature(6) // <--- Changed index here. .fit(ignite, dataCache, featureExtractor @@ -76,6 +88,8 @@ public class Step_4_Add_age_fare { lbExtractor ); + System.out.println("\n>>> Trained model: " + mdl); + double accuracy = Evaluator.evaluate( dataCache, mdl, @@ -86,6 +100,8 @@ public class Step_4_Add_age_fare { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 4 (add age and fare) example completed."); } catch (FileNotFoundException e) { e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java index 01a4c3f..937a041 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java @@ -35,11 +35,24 @@ import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.thread.IgniteThread; /** - * MinMaxScalerTrainer and NormalizationTrainer are used in this example due to different values distribution in columns and rows. + * {@link MinMaxScalerTrainer} and {@link NormalizationTrainer} are used in this example due to different values + * distribution in columns and rows. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and perform other desired changes + * over the extracted data, including the scaling.</p> + * <p> + * Then, it trains the model based on the processed data using decision tree classification.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> */ public class Step_5_Scaling { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 5 (scaling) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_5_Scaling.class.getSimpleName(), () -> { @@ -47,7 +60,7 @@ public class Step_5_Scaling { IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". IgniteBiFunction<Integer, Object[], Object[]> featureExtractor = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; @@ -56,7 +69,7 @@ public class Step_5_Scaling { IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() .withEncoderType(EncoderType.STRING_ENCODER) .encodeFeature(1) - .encodeFeature(6) // <--- Changed index here + .encodeFeature(6) // <--- Changed index here. .fit(ignite, dataCache, featureExtractor @@ -68,21 +81,20 @@ public class Step_5_Scaling { strEncoderPreprocessor ); - IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() .fit( - ignite, - dataCache, - imputingPreprocessor - ); + ignite, + dataCache, + imputingPreprocessor + ); IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() .withP(1) .fit( - ignite, - dataCache, - minMaxScalerPreprocessor - ); + ignite, + dataCache, + minMaxScalerPreprocessor + ); DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(5, 0); @@ -94,6 +106,8 @@ public class Step_5_Scaling { lbExtractor ); + System.out.println("\n>>> Trained model: " + mdl); + double accuracy = Evaluator.evaluate( dataCache, mdl, @@ -104,6 +118,8 @@ public class Step_5_Scaling { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 5 (scaling) example completed."); } catch (FileNotFoundException e) { e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java index 56e70f1..7d0986d 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java @@ -36,11 +36,24 @@ import org.apache.ignite.ml.selection.scoring.metric.Accuracy; import org.apache.ignite.thread.IgniteThread; /** - * Sometimes is better to change algorithm, let's say on kNN. + * Change classification algorithm that was used in {@link Step_5_Scaling} from decision tree to kNN + * ({@link KNNClassificationTrainer}) because sometimes this can be beneficial. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it defines preprocessors that extract features from an upstream data and perform other desired changes + * over the extracted data.</p> + * <p> + * Then, it trains the model based on the processed data using kNN classification.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> */ public class Step_6_KNN { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 6 (kNN) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_6_KNN.class.getSimpleName(), () -> { @@ -48,7 +61,7 @@ public class Step_6_KNN { IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". IgniteBiFunction<Integer, Object[], Object[]> featureExtractor = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; @@ -57,7 +70,7 @@ public class Step_6_KNN { IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() .withEncoderType(EncoderType.STRING_ENCODER) .encodeFeature(1) - .encodeFeature(6) // <--- Changed index here + .encodeFeature(6) // <--- Changed index here. .fit(ignite, dataCache, featureExtractor @@ -69,21 +82,20 @@ public class Step_6_KNN { strEncoderPreprocessor ); - IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() .fit( - ignite, - dataCache, - imputingPreprocessor - ); + ignite, + dataCache, + imputingPreprocessor + ); IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() .withP(1) .fit( - ignite, - dataCache, - minMaxScalerPreprocessor - ); + ignite, + dataCache, + minMaxScalerPreprocessor + ); KNNClassificationTrainer trainer = new KNNClassificationTrainer(); @@ -95,6 +107,8 @@ public class Step_6_KNN { lbExtractor ).withK(1).withStrategy(NNStrategy.WEIGHTED); + System.out.println("\n>>> Trained model: " + mdl); + double accuracy = Evaluator.evaluate( dataCache, mdl, @@ -105,6 +119,8 @@ public class Step_6_KNN { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 6 (kNN) example completed."); } catch (FileNotFoundException e) { e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java index f62054e..9f1a505 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java @@ -37,13 +37,25 @@ import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.thread.IgniteThread; /** - * The highest accuracy in the previous example is the result of overfitting. - * - * For real model estimation is better to use test-train split via TrainTestDatasetSplitter. + * The highest accuracy in the previous example ({@link Step_6_KNN}) is the result of + * <a href="https://en.wikipedia.org/wiki/Overfitting">overfitting</a>. + * For real model estimation is better to use test-train split via {@link TrainTestDatasetSplitter}. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it defines how to split the data to train and test sets and configures preprocessors that extract + * features from an upstream data and perform other desired changes over the extracted data.</p> + * <p> + * Then, it trains the model based on the processed data using decision tree classification.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> */ public class Step_7_Split_train_test { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 7 (split to train and test) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_7_Split_train_test.class.getSimpleName(), () -> { @@ -51,7 +63,7 @@ public class Step_7_Split_train_test { IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". IgniteBiFunction<Integer, Object[], Object[]> featureExtractor = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; @@ -63,7 +75,7 @@ public class Step_7_Split_train_test { IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() .withEncoderType(EncoderType.STRING_ENCODER) .encodeFeature(1) - .encodeFeature(6) // <--- Changed index here + .encodeFeature(6) // <--- Changed index here. .fit(ignite, dataCache, featureExtractor @@ -75,21 +87,20 @@ public class Step_7_Split_train_test { strEncoderPreprocessor ); - IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() .fit( - ignite, - dataCache, - imputingPreprocessor - ); + ignite, + dataCache, + imputingPreprocessor + ); IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() .withP(1) .fit( - ignite, - dataCache, - minMaxScalerPreprocessor - ); + ignite, + dataCache, + minMaxScalerPreprocessor + ); DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(5, 0); @@ -102,6 +113,8 @@ public class Step_7_Split_train_test { lbExtractor ); + System.out.println("\n>>> Trained model: " + mdl); + double accuracy = Evaluator.evaluate( dataCache, split.getTestFilter(), @@ -113,6 +126,8 @@ public class Step_7_Split_train_test { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 7 (split to train and test) example completed."); } catch (FileNotFoundException e) { e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java index d7e6e27..12e5fe8 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java @@ -40,21 +40,34 @@ import org.apache.ignite.thread.IgniteThread; /** * To choose the best hyperparameters the cross-validation will be used in this example. - * - * The purpose of cross-validation is model checking, not model building. - * - * We train k different models. - * - * They differ in that 1/(k-1)th of the training data is exchanged against other cases. - * + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it defines how to split the data to train and test sets and configures preprocessors that extract + * features from an upstream data and perform other desired changes over the extracted data.</p> + * <p> + * Then, it tunes hyperparams with K-fold Cross-Validation on the split training set and trains the model based on + * the processed data using decision tree classification and the obtained hyperparams.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> + * <p> + * The purpose of cross-validation is model checking, not model building.</p> + * <p> + * We train {@code k} different models.</p> + * <p> + * They differ in that {@code 1/(k-1)}th of the training data is exchanged against other cases.</p> + * <p> * These models are sometimes called surrogate models because the (average) performance measured for these models - * is taken as a surrogate of the performance of the model trained on all cases. - * - * All scenarios are described there: https://sebastianraschka.com/faq/docs/evaluate-a-model.html + * is taken as a surrogate of the performance of the model trained on all cases.</p> + * <p> + * All scenarios are described there: https://sebastianraschka.com/faq/docs/evaluate-a-model.html</p> */ public class Step_8_CV { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 8 (cross-validation) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_8_CV.class.getSimpleName(), () -> { @@ -62,7 +75,7 @@ public class Step_8_CV { IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". IgniteBiFunction<Integer, Object[], Object[]> featureExtractor = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; @@ -74,7 +87,7 @@ public class Step_8_CV { IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() .withEncoderType(EncoderType.STRING_ENCODER) .encodeFeature(1) - .encodeFeature(6) // <--- Changed index here + .encodeFeature(6) // <--- Changed index here. .fit(ignite, dataCache, featureExtractor @@ -88,12 +101,12 @@ public class Step_8_CV { IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() .fit( - ignite, - dataCache, - imputingPreprocessor - ); + ignite, + dataCache, + imputingPreprocessor + ); - // Tune hyperparams with K-fold Cross-Validation on the splitted training set. + // Tune hyperparams with K-fold Cross-Validation on the split training set. int[] pSet = new int[]{1, 2}; int[] maxDeepSet = new int[]{1, 2, 3, 4, 5, 10, 20}; int bestP = 1; @@ -102,7 +115,8 @@ public class Step_8_CV { for(int p: pSet){ for(int maxDeep: maxDeepSet){ - IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor + = new NormalizationTrainer<Integer, Object[]>() .withP(p) .fit( ignite, @@ -110,7 +124,8 @@ public class Step_8_CV { minMaxScalerPreprocessor ); - DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(maxDeep, 0); + DecisionTreeClassificationTrainer trainer + = new DecisionTreeClassificationTrainer(maxDeep, 0); CrossValidation<DecisionTreeNode, Double, Integer, Object[]> scoreCalculator = new CrossValidation<>(); @@ -161,6 +176,8 @@ public class Step_8_CV { lbExtractor ); + System.out.println("\n>>> Trained model: " + bestMdl); + double accuracy = Evaluator.evaluate( dataCache, split.getTestFilter(), @@ -172,6 +189,8 @@ public class Step_8_CV { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 8 (cross-validation) example completed."); } catch (FileNotFoundException e) { e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV_with_Param_Grid.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV_with_Param_Grid.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV_with_Param_Grid.java index 9311cfb..785064c 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV_with_Param_Grid.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV_with_Param_Grid.java @@ -41,22 +41,35 @@ import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.thread.IgniteThread; /** - * To choose the best hyperparameters the cross-validation will be used in this example. - * - * The purpose of cross-validation is model checking, not model building. - * - * We train k different models. - * - * They differ in that 1/(k-1)th of the training data is exchanged against other cases. - * + * To choose the best hyperparameters the cross-validation with {@link ParamGrid} will be used in this example. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it defines how to split the data to train and test sets and configures preprocessors that extract + * features from an upstream data and perform other desired changes over the extracted data.</p> + * <p> + * Then, it tunes hyperparams with K-fold Cross-Validation on the split training set and trains the model based on + * the processed data using decision tree classification and the obtained hyperparams.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> + * <p> + * The purpose of cross-validation is model checking, not model building.</p> + * <p> + * We train {@code k} different models.</p> + * <p> + * They differ in that {@code 1/(k-1)}th of the training data is exchanged against other cases.</p> + * <p> * These models are sometimes called surrogate models because the (average) performance measured for these models - * is taken as a surrogate of the performance of the model trained on all cases. - * - * All scenarios are described there: https://sebastianraschka.com/faq/docs/evaluate-a-model.html + * is taken as a surrogate of the performance of the model trained on all cases.</p> + * <p> + * All scenarios are described there: https://sebastianraschka.com/faq/docs/evaluate-a-model.html</p> */ public class Step_8_CV_with_Param_Grid { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 8 (cross-validation with param grid) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_8_CV_with_Param_Grid.class.getSimpleName(), () -> { @@ -64,7 +77,7 @@ public class Step_8_CV_with_Param_Grid { IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" . IgniteBiFunction<Integer, Object[], Object[]> featureExtractor = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; @@ -76,7 +89,7 @@ public class Step_8_CV_with_Param_Grid { IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() .withEncoderType(EncoderType.STRING_ENCODER) .encodeFeature(1) - .encodeFeature(6) // <--- Changed index here + .encodeFeature(6) // <--- Changed index here. .fit(ignite, dataCache, featureExtractor @@ -103,7 +116,7 @@ public class Step_8_CV_with_Param_Grid { minMaxScalerPreprocessor ); - // Tune hyperparams with K-fold Cross-Validation on the splitted training set. + // Tune hyperparams with K-fold Cross-Validation on the split training set. DecisionTreeClassificationTrainer trainerCV = new DecisionTreeClassificationTrainer(); @@ -126,7 +139,8 @@ public class Step_8_CV_with_Param_Grid { paramGrid ); - System.out.println("Train with maxDeep: " + crossValidationRes.getBest("maxDeep") + " and minImpurityDecrease: " + crossValidationRes.getBest("minImpurityDecrease")); + System.out.println("Train with maxDeep: " + crossValidationRes.getBest("maxDeep") + + " and minImpurityDecrease: " + crossValidationRes.getBest("minImpurityDecrease")); DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer() .withMaxDeep(crossValidationRes.getBest("maxDeep")) @@ -138,9 +152,8 @@ public class Step_8_CV_with_Param_Grid { System.out.println("Best hyper params: " + crossValidationRes.getBestHyperParams()); System.out.println("Best average score: " + crossValidationRes.getBestAvgScore()); - crossValidationRes.getScoringBoard().forEach((hyperParams, score) -> { - System.out.println("Score " + Arrays.toString(score) + " for hyper params " + hyperParams); - }); + crossValidationRes.getScoringBoard().forEach((hyperParams, score) + -> System.out.println("Score " + Arrays.toString(score) + " for hyper params " + hyperParams)); // Train decision tree model. DecisionTreeNode bestMdl = trainer.fit( @@ -151,6 +164,8 @@ public class Step_8_CV_with_Param_Grid { lbExtractor ); + System.out.println("\n>>> Trained model: " + bestMdl); + double accuracy = Evaluator.evaluate( dataCache, split.getTestFilter(), @@ -162,6 +177,8 @@ public class Step_8_CV_with_Param_Grid { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 8 (cross-validation with param grid) example started."); } catch (FileNotFoundException e) { e.printStackTrace(); } http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java index 9fcc9ba..93dff53 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java @@ -42,13 +42,25 @@ import org.apache.ignite.ml.selection.split.TrainTestSplit; import org.apache.ignite.thread.IgniteThread; /** - * Maybe the another algorithm can give us the higher accuracy? - * - * Let's win with the LogisticRegressionSGDTrainer! + * Change classification algorithm that was used in {@link Step_8_CV_with_Param_Grid} from decision tree to logistic + * regression ({@link LogisticRegressionSGDTrainer}) because sometimes this can give the higher accuracy. + * <p> + * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> + * <p> + * After that it defines how to split the data to train and test sets and configures preprocessors that extract + * features from an upstream data and perform other desired changes over the extracted data.</p> + * <p> + * Then, it tunes hyperparams with K-fold Cross-Validation on the split training set and trains the model based on + * the processed data using logistic regression and the obtained hyperparams.</p> + * <p> + * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> */ public class Step_9_Go_to_LogReg { /** Run example. */ public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Tutorial step 9 (logistic regression) example started."); + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), Step_9_Go_to_LogReg.class.getSimpleName(), () -> { @@ -82,19 +94,18 @@ public class Step_9_Go_to_LogReg { IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() .fit( - ignite, - dataCache, - imputingPreprocessor - ); + ignite, + dataCache, + imputingPreprocessor + ); - // Tune hyperparams with K-fold Cross-Validation on the splitted training set. + // Tune hyperparams with K-fold Cross-Validation on the split training set. int[] pSet = new int[]{1, 2}; int[] maxIterationsSet = new int[]{ 100, 1000}; int[] batchSizeSet = new int[]{100, 10}; int[] locIterationsSet = new int[]{10, 100}; double[] learningRateSet = new double[]{0.1, 0.2, 0.5}; - int bestP = 1; int bestMaxIterations = 100; int bestBatchSize = 10; @@ -107,8 +118,8 @@ public class Step_9_Go_to_LogReg { for (int batchSize : batchSizeSet) { for (int locIterations : locIterationsSet) { for (double learningRate : learningRateSet) { - - IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor + = new NormalizationTrainer<Integer, Object[]>() .withP(p) .fit( ignite, @@ -116,14 +127,15 @@ public class Step_9_Go_to_LogReg { minMaxScalerPreprocessor ); - LogisticRegressionSGDTrainer<?> trainer = new LogisticRegressionSGDTrainer<>(new UpdatesStrategy<>( + LogisticRegressionSGDTrainer<?> trainer + = new LogisticRegressionSGDTrainer<>(new UpdatesStrategy<>( new SimpleGDUpdateCalculator(learningRate), SimpleGDParameterUpdate::sumLocal, SimpleGDParameterUpdate::avg ), maxIterations, batchSize, locIterations, 123L); - CrossValidation<LogisticRegressionModel, Double, Integer, Object[]> scoreCalculator - = new CrossValidation<>(); + CrossValidation<LogisticRegressionModel, Double, Integer, Object[]> + scoreCalculator = new CrossValidation<>(); double[] scores = scoreCalculator.score( trainer, @@ -193,6 +205,8 @@ public class Step_9_Go_to_LogReg { lbExtractor ); + System.out.println("\n>>> Trained model: " + bestMdl); + double accuracy = Evaluator.evaluate( dataCache, split.getTestFilter(), @@ -204,6 +218,8 @@ public class Step_9_Go_to_LogReg { System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 9 (logistic regression) example completed."); } catch (FileNotFoundException e) { e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TitanicUtils.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TitanicUtils.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TitanicUtils.java index a339638..3a68ecb 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TitanicUtils.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TitanicUtils.java @@ -38,7 +38,7 @@ public class TitanicUtils { * * @param ignite The ignite. * @return The filled cache. - * @throws FileNotFoundException + * @throws FileNotFoundException If data file is not found. */ public static IgniteCache<Integer, Object[]> readPassengers(Ignite ignite) throws FileNotFoundException { http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TutorialStepByStepExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TutorialStepByStepExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TutorialStepByStepExample.java index ab2c746..67f4bf5 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TutorialStepByStepExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TutorialStepByStepExample.java @@ -19,10 +19,10 @@ package org.apache.ignite.examples.ml.tutorial; /** * Run all the tutorial examples step by step with primary purpose to provide - * automatic execution from IgniteExamplesMLTestSuite. + * automatic execution from {@code IgniteExamplesMLTestSuite}. */ public class TutorialStepByStepExample { - /** Run example. */ + /** Run examples with default settings. */ public static void main(String[] args) throws InterruptedException { Step_1_Read_and_Learn.main(args); Step_2_Imputing.main(args); http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/util/DatasetHelper.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/util/DatasetHelper.java b/examples/src/main/java/org/apache/ignite/examples/ml/util/DatasetHelper.java new file mode 100644 index 0000000..96de9ad --- /dev/null +++ b/examples/src/main/java/org/apache/ignite/examples/ml/util/DatasetHelper.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.examples.ml.util; + +import java.util.Arrays; +import org.apache.ignite.ml.dataset.primitive.SimpleDataset; + +/** + * Common utility code used in some ML examples to report some statistic metrics of the dataset. + */ +public class DatasetHelper { + /** */ + private final SimpleDataset dataset; + + /** */ + public DatasetHelper(SimpleDataset dataset) { + this.dataset = dataset; + } + + /** */ + public void describe() { + // Calculation of the mean value. This calculation will be performed in map-reduce manner. + double[] mean = dataset.mean(); + System.out.println("Mean \n\t" + Arrays.toString(mean)); + + // Calculation of the standard deviation. This calculation will be performed in map-reduce manner. + double[] std = dataset.std(); + System.out.println("Standard deviation \n\t" + Arrays.toString(std)); + + // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner. + double[][] cov = dataset.cov(); + System.out.println("Covariance matrix "); + for (double[] row : cov) + System.out.println("\t" + Arrays.toString(row)); + + // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner. + double[][] corr = dataset.corr(); + System.out.println("Correlation matrix "); + for (double[] row : corr) + System.out.println("\t" + Arrays.toString(row)); + } +} http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/util/MLExamplesCommonArgs.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/util/MLExamplesCommonArgs.java b/examples/src/main/java/org/apache/ignite/examples/ml/util/MLExamplesCommonArgs.java new file mode 100644 index 0000000..23cbe27 --- /dev/null +++ b/examples/src/main/java/org/apache/ignite/examples/ml/util/MLExamplesCommonArgs.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.examples.ml.util; + +/** + * Some common arguments for examples in ML module. + */ +public class MLExamplesCommonArgs { + /** + * Unattended argument. + */ + public static String UNATTENDED = "unattended"; + + /** Empty args for ML examples. */ + public static final String[] EMPTY_ARGS_ML = new String[] {"--" + UNATTENDED}; +} http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/util/TestCache.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/util/TestCache.java b/examples/src/main/java/org/apache/ignite/examples/ml/util/TestCache.java new file mode 100644 index 0000000..9de20d6 --- /dev/null +++ b/examples/src/main/java/org/apache/ignite/examples/ml/util/TestCache.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.examples.ml.util; + +import java.util.UUID; +import org.apache.ignite.Ignite; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.ml.math.primitives.vector.Vector; +import org.apache.ignite.ml.math.primitives.vector.VectorUtils; + +/** + * Common utility code used in some ML examples to set up test cache. + */ +public class TestCache { + /** */ + private final Ignite ignite; + + /** */ + public TestCache(Ignite ignite) { + this.ignite = ignite; + } + + /** + * Fills cache with data and returns it. + * + * @param data Data to fill the cache with. + * @return Filled Ignite Cache. + */ + public IgniteCache<Integer, double[]> get(double[][] data) { + CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); + cacheConfiguration.setName("TEST_" + UUID.randomUUID()); + cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); + + IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); + + for (int i = 0; i < data.length; i++) + cache.put(i, data[i]); + + return cache; + } + + /** + * Fills cache with data and returns it. + * + * @param data Data to fill the cache with. + * @return Filled Ignite Cache. + */ + public IgniteCache<Integer, Vector> getVectors(double[][] data) { + CacheConfiguration<Integer, Vector> cacheConfiguration = new CacheConfiguration<>(); + cacheConfiguration.setName("TEST_" + UUID.randomUUID()); + cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); + + IgniteCache<Integer, Vector> cache = ignite.createCache(cacheConfiguration); + + for (int i = 0; i < data.length; i++) + cache.put(i, VectorUtils.of(data[i])); + + return cache; + } +} http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/main/java/org/apache/ignite/examples/ml/util/package-info.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/util/package-info.java b/examples/src/main/java/org/apache/ignite/examples/ml/util/package-info.java new file mode 100644 index 0000000..5f912e1 --- /dev/null +++ b/examples/src/main/java/org/apache/ignite/examples/ml/util/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * <!-- Package description. --> + * Utilities for machine learning examples. + */ +package org.apache.ignite.examples.ml.util; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/examples/src/test/java/org/apache/ignite/testsuites/IgniteExamplesMLTestSuite.java ---------------------------------------------------------------------- diff --git a/examples/src/test/java/org/apache/ignite/testsuites/IgniteExamplesMLTestSuite.java b/examples/src/test/java/org/apache/ignite/testsuites/IgniteExamplesMLTestSuite.java index df85f1a..6b41301 100644 --- a/examples/src/test/java/org/apache/ignite/testsuites/IgniteExamplesMLTestSuite.java +++ b/examples/src/test/java/org/apache/ignite/testsuites/IgniteExamplesMLTestSuite.java @@ -30,7 +30,7 @@ import javassist.CtClass; import javassist.CtNewMethod; import javassist.NotFoundException; import junit.framework.TestSuite; -import org.apache.ignite.examples.ml.MLExamplesCommonArgs; +import org.apache.ignite.examples.ml.util.MLExamplesCommonArgs; import org.apache.ignite.testframework.GridTestUtils; import org.apache.ignite.testframework.junits.common.GridAbstractExamplesTest; http://git-wip-us.apache.org/repos/asf/ignite/blob/fea694fe/modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionModel.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionModel.java b/modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionModel.java index c0d6680..0761ff5 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionModel.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionModel.java @@ -95,7 +95,7 @@ public class KNNRegressionModel extends KNNClassificationModel { /** {@inheritDoc} */ @Override public String toString(boolean pretty) { - return ModelTrace.builder("KNNClassificationModel", pretty) + return ModelTrace.builder("KNNRegressionModel", pretty) .addField("k", String.valueOf(k)) .addField("measure", distanceMeasure.getClass().getSimpleName()) .addField("strategy", stgy.name())