http://git-wip-us.apache.org/repos/asf/ignite/blob/609266fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java index e99494b..0c8b562 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java @@ -33,7 +33,6 @@ import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer; import org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer; import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator; import org.apache.ignite.ml.selection.scoring.metric.Accuracy; -import org.apache.ignite.thread.IgniteThread; /** * Change classification algorithm that was used in {@link Step_5_Scaling} from decision tree to kNN @@ -55,80 +54,74 @@ public class Step_6_KNN { System.out.println(">>> Tutorial step 6 (kNN) example started."); try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { - IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), - Step_6_KNN.class.getSimpleName(), () -> { - try { - IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - - // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". - IgniteBiFunction<Integer, Object[], Object[]> featureExtractor - = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; - - IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - - IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() - .withEncoderType(EncoderType.STRING_ENCODER) - .withEncodedFeature(1) - .withEncodedFeature(6) // <--- Changed index here. - .fit(ignite, - dataCache, - featureExtractor - ); + try { + IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() - .fit(ignite, - dataCache, - strEncoderPreprocessor - ); - - IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() - .fit( - ignite, - dataCache, - imputingPreprocessor - ); - - IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() - .withP(1) - .fit( - ignite, - dataCache, - minMaxScalerPreprocessor - ); - - KNNClassificationTrainer trainer = new KNNClassificationTrainer(); - - // Train decision tree model. - NNClassificationModel mdl = trainer.fit( - ignite, - dataCache, - normalizationPreprocessor, - lbExtractor - ).withK(1).withStrategy(NNStrategy.WEIGHTED); + // Defines first preprocessor that extracts features from an upstream data. + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". + IgniteBiFunction<Integer, Object[], Object[]> featureExtractor + = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; - System.out.println("\n>>> Trained model: " + mdl); + IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - double accuracy = Evaluator.evaluate( + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() + .withEncoderType(EncoderType.STRING_ENCODER) + .withEncodedFeature(1) + .withEncodedFeature(6) // <--- Changed index here. + .fit(ignite, dataCache, - mdl, - normalizationPreprocessor, - lbExtractor, - new Accuracy<>() + featureExtractor + ); + + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + .fit(ignite, + dataCache, + strEncoderPreprocessor ); - System.out.println("\n>>> Accuracy " + accuracy); - System.out.println("\n>>> Test Error " + (1 - accuracy)); + IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() + .fit( + ignite, + dataCache, + imputingPreprocessor + ); - System.out.println(">>> Tutorial step 6 (kNN) example completed."); - } - catch (FileNotFoundException e) { - e.printStackTrace(); - } - }); + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + .withP(1) + .fit( + ignite, + dataCache, + minMaxScalerPreprocessor + ); - igniteThread.start(); - igniteThread.join(); + KNNClassificationTrainer trainer = new KNNClassificationTrainer(); + + // Train decision tree model. + NNClassificationModel mdl = trainer.fit( + ignite, + dataCache, + normalizationPreprocessor, + lbExtractor + ).withK(1).withStrategy(NNStrategy.WEIGHTED); + + System.out.println("\n>>> Trained model: " + mdl); + + double accuracy = Evaluator.evaluate( + dataCache, + mdl, + normalizationPreprocessor, + lbExtractor, + new Accuracy<>() + ); + + System.out.println("\n>>> Accuracy " + accuracy); + System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 6 (kNN) example completed."); + } + catch (FileNotFoundException e) { + e.printStackTrace(); + } } } }
http://git-wip-us.apache.org/repos/asf/ignite/blob/609266fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java index 2ce2b27..c6d033c 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java @@ -34,7 +34,6 @@ import org.apache.ignite.ml.selection.split.TrainTestDatasetSplitter; import org.apache.ignite.ml.selection.split.TrainTestSplit; import org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer; import org.apache.ignite.ml.tree.DecisionTreeNode; -import org.apache.ignite.thread.IgniteThread; /** * The highest accuracy in the previous example ({@link Step_6_KNN}) is the result of @@ -57,86 +56,79 @@ public class Step_7_Split_train_test { System.out.println(">>> Tutorial step 7 (split to train and test) example started."); try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { - IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), - Step_7_Split_train_test.class.getSimpleName(), () -> { - try { - IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - - // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". - IgniteBiFunction<Integer, Object[], Object[]> featureExtractor - = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; - - IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - - TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() - .split(0.75); - - IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() - .withEncoderType(EncoderType.STRING_ENCODER) - .withEncodedFeature(1) - .withEncodedFeature(6) // <--- Changed index here. - .fit(ignite, - dataCache, - featureExtractor + try { + IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); + + // Defines first preprocessor that extracts features from an upstream data. + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". + IgniteBiFunction<Integer, Object[], Object[]> featureExtractor + = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; + + IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; + + TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() + .split(0.75); + + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() + .withEncoderType(EncoderType.STRING_ENCODER) + .withEncodedFeature(1) + .withEncodedFeature(6) // <--- Changed index here. + .fit(ignite, + dataCache, + featureExtractor + ); + + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + .fit(ignite, + dataCache, + strEncoderPreprocessor ); - IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() - .fit(ignite, - dataCache, - strEncoderPreprocessor - ); - - IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() - .fit( - ignite, - dataCache, - imputingPreprocessor - ); - - IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() - .withP(1) - .fit( - ignite, - dataCache, - minMaxScalerPreprocessor - ); - - DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(5, 0); - - // Train decision tree model. - DecisionTreeNode mdl = trainer.fit( + IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() + .fit( ignite, dataCache, - split.getTrainFilter(), - normalizationPreprocessor, - lbExtractor + imputingPreprocessor ); - System.out.println("\n>>> Trained model: " + mdl); - - double accuracy = Evaluator.evaluate( + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + .withP(1) + .fit( + ignite, dataCache, - split.getTestFilter(), - mdl, - normalizationPreprocessor, - lbExtractor, - new Accuracy<>() + minMaxScalerPreprocessor ); - System.out.println("\n>>> Accuracy " + accuracy); - System.out.println("\n>>> Test Error " + (1 - accuracy)); - - System.out.println(">>> Tutorial step 7 (split to train and test) example completed."); - } - catch (FileNotFoundException e) { - e.printStackTrace(); - } - }); - - igniteThread.start(); - - igniteThread.join(); + DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(5, 0); + + // Train decision tree model. + DecisionTreeNode mdl = trainer.fit( + ignite, + dataCache, + split.getTrainFilter(), + normalizationPreprocessor, + lbExtractor + ); + + System.out.println("\n>>> Trained model: " + mdl); + + double accuracy = Evaluator.evaluate( + dataCache, + split.getTestFilter(), + mdl, + normalizationPreprocessor, + lbExtractor, + new Accuracy<>() + ); + + System.out.println("\n>>> Accuracy " + accuracy); + System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 7 (split to train and test) example completed."); + } + catch (FileNotFoundException e) { + e.printStackTrace(); + } } } } http://git-wip-us.apache.org/repos/asf/ignite/blob/609266fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java index 83c2cca..d83e14a 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java @@ -36,7 +36,6 @@ import org.apache.ignite.ml.selection.split.TrainTestDatasetSplitter; import org.apache.ignite.ml.selection.split.TrainTestSplit; import org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer; import org.apache.ignite.ml.tree.DecisionTreeNode; -import org.apache.ignite.thread.IgniteThread; /** * To choose the best hyperparameters the cross-validation will be used in this example. @@ -69,137 +68,130 @@ public class Step_8_CV { System.out.println(">>> Tutorial step 8 (cross-validation) example started."); try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { - IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), - Step_8_CV.class.getSimpleName(), () -> { - try { - IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - - // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". - IgniteBiFunction<Integer, Object[], Object[]> featureExtractor - = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; - - IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - - TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() - .split(0.75); - - IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() - .withEncoderType(EncoderType.STRING_ENCODER) - .withEncodedFeature(1) - .withEncodedFeature(6) // <--- Changed index here. - .fit(ignite, - dataCache, - featureExtractor - ); + try { + IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() - .fit(ignite, - dataCache, - strEncoderPreprocessor - ); + // Defines first preprocessor that extracts features from an upstream data. + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". + IgniteBiFunction<Integer, Object[], Object[]> featureExtractor + = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; - IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() - .fit( - ignite, - dataCache, - imputingPreprocessor - ); + IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - // Tune hyperparams with K-fold Cross-Validation on the split training set. - int[] pSet = new int[]{1, 2}; - int[] maxDeepSet = new int[]{1, 2, 3, 4, 5, 10, 20}; - int bestP = 1; - int bestMaxDeep = 1; - double avg = Double.MIN_VALUE; - - for(int p: pSet){ - for(int maxDeep: maxDeepSet){ - IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor - = new NormalizationTrainer<Integer, Object[]>() - .withP(p) - .fit( - ignite, - dataCache, - minMaxScalerPreprocessor - ); - - DecisionTreeClassificationTrainer trainer - = new DecisionTreeClassificationTrainer(maxDeep, 0); - - CrossValidation<DecisionTreeNode, Double, Integer, Object[]> scoreCalculator - = new CrossValidation<>(); - - double[] scores = scoreCalculator.score( - trainer, - new Accuracy<>(), - ignite, - dataCache, - split.getTrainFilter(), - normalizationPreprocessor, - lbExtractor, - 3 - ); + TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() + .split(0.75); - System.out.println("Scores are: " + Arrays.toString(scores)); + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() + .withEncoderType(EncoderType.STRING_ENCODER) + .withEncodedFeature(1) + .withEncodedFeature(6) // <--- Changed index here. + .fit(ignite, + dataCache, + featureExtractor + ); - final double currAvg = Arrays.stream(scores).average().orElse(Double.MIN_VALUE); + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + .fit(ignite, + dataCache, + strEncoderPreprocessor + ); - if(currAvg > avg) { - avg = currAvg; - bestP = p; - bestMaxDeep = maxDeep; - } + IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() + .fit( + ignite, + dataCache, + imputingPreprocessor + ); - System.out.println("Avg is: " + currAvg + " with p: " + p + " with maxDeep: " + maxDeep); - } - } + // Tune hyperparams with K-fold Cross-Validation on the split training set. + int[] pSet = new int[]{1, 2}; + int[] maxDeepSet = new int[]{1, 2, 3, 4, 5, 10, 20}; + int bestP = 1; + int bestMaxDeep = 1; + double avg = Double.MIN_VALUE; + + for(int p: pSet){ + for(int maxDeep: maxDeepSet){ + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor + = new NormalizationTrainer<Integer, Object[]>() + .withP(p) + .fit( + ignite, + dataCache, + minMaxScalerPreprocessor + ); - System.out.println("Train with p: " + bestP + " and maxDeep: " + bestMaxDeep); + DecisionTreeClassificationTrainer trainer + = new DecisionTreeClassificationTrainer(maxDeep, 0); - IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() - .withP(bestP) - .fit( + CrossValidation<DecisionTreeNode, Double, Integer, Object[]> scoreCalculator + = new CrossValidation<>(); + + double[] scores = scoreCalculator.score( + trainer, + new Accuracy<>(), ignite, dataCache, - minMaxScalerPreprocessor + split.getTrainFilter(), + normalizationPreprocessor, + lbExtractor, + 3 ); - DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(bestMaxDeep, 0); - - // Train decision tree model. - DecisionTreeNode bestMdl = trainer.fit( - ignite, - dataCache, - split.getTrainFilter(), - normalizationPreprocessor, - lbExtractor - ); - - System.out.println("\n>>> Trained model: " + bestMdl); + System.out.println("Scores are: " + Arrays.toString(scores)); - double accuracy = Evaluator.evaluate( - dataCache, - split.getTestFilter(), - bestMdl, - normalizationPreprocessor, - lbExtractor, - new Accuracy<>() - ); + final double currAvg = Arrays.stream(scores).average().orElse(Double.MIN_VALUE); - System.out.println("\n>>> Accuracy " + accuracy); - System.out.println("\n>>> Test Error " + (1 - accuracy)); + if(currAvg > avg) { + avg = currAvg; + bestP = p; + bestMaxDeep = maxDeep; + } - System.out.println(">>> Tutorial step 8 (cross-validation) example completed."); - } - catch (FileNotFoundException e) { - e.printStackTrace(); + System.out.println("Avg is: " + currAvg + " with p: " + p + " with maxDeep: " + maxDeep); + } } - }); - igniteThread.start(); + System.out.println("Train with p: " + bestP + " and maxDeep: " + bestMaxDeep); + + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + .withP(bestP) + .fit( + ignite, + dataCache, + minMaxScalerPreprocessor + ); - igniteThread.join(); + DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(bestMaxDeep, 0); + + // Train decision tree model. + DecisionTreeNode bestMdl = trainer.fit( + ignite, + dataCache, + split.getTrainFilter(), + normalizationPreprocessor, + lbExtractor + ); + + System.out.println("\n>>> Trained model: " + bestMdl); + + double accuracy = Evaluator.evaluate( + dataCache, + split.getTestFilter(), + bestMdl, + normalizationPreprocessor, + lbExtractor, + new Accuracy<>() + ); + + System.out.println("\n>>> Accuracy " + accuracy); + System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 8 (cross-validation) example completed."); + } + catch (FileNotFoundException e) { + e.printStackTrace(); + } } } } http://git-wip-us.apache.org/repos/asf/ignite/blob/609266fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV_with_Param_Grid.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV_with_Param_Grid.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV_with_Param_Grid.java index 73a0303..594c0eb 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV_with_Param_Grid.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV_with_Param_Grid.java @@ -38,7 +38,6 @@ import org.apache.ignite.ml.selection.split.TrainTestDatasetSplitter; import org.apache.ignite.ml.selection.split.TrainTestSplit; import org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer; import org.apache.ignite.ml.tree.DecisionTreeNode; -import org.apache.ignite.thread.IgniteThread; /** * To choose the best hyperparameters the cross-validation with {@link ParamGrid} will be used in this example. @@ -71,122 +70,115 @@ public class Step_8_CV_with_Param_Grid { System.out.println(">>> Tutorial step 8 (cross-validation with param grid) example started."); try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { - IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), - Step_8_CV_with_Param_Grid.class.getSimpleName(), () -> { - try { - IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - - // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" . - IgniteBiFunction<Integer, Object[], Object[]> featureExtractor - = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; - - IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - - TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() - .split(0.75); - - IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() - .withEncoderType(EncoderType.STRING_ENCODER) - .withEncodedFeature(1) - .withEncodedFeature(6) // <--- Changed index here. - .fit(ignite, - dataCache, - featureExtractor - ); - - IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() - .fit(ignite, - dataCache, - strEncoderPreprocessor - ); - - IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() - .fit( - ignite, - dataCache, - imputingPreprocessor - ); - - IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() - .withP(2) - .fit( - ignite, - dataCache, - minMaxScalerPreprocessor - ); - - // Tune hyperparams with K-fold Cross-Validation on the split training set. - - DecisionTreeClassificationTrainer trainerCV = new DecisionTreeClassificationTrainer(); - - CrossValidation<DecisionTreeNode, Double, Integer, Object[]> scoreCalculator - = new CrossValidation<>(); - - ParamGrid paramGrid = new ParamGrid() - .addHyperParam("maxDeep", new Double[]{1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 10.0}) - .addHyperParam("minImpurityDecrease", new Double[]{0.0, 0.25, 0.5}); - - CrossValidationResult crossValidationRes = scoreCalculator.score( - trainerCV, - new Accuracy<>(), - ignite, - dataCache, - split.getTrainFilter(), - normalizationPreprocessor, - lbExtractor, - 3, - paramGrid - ); + try { + IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - System.out.println("Train with maxDeep: " + crossValidationRes.getBest("maxDeep") - + " and minImpurityDecrease: " + crossValidationRes.getBest("minImpurityDecrease")); + // Defines first preprocessor that extracts features from an upstream data. + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" . + IgniteBiFunction<Integer, Object[], Object[]> featureExtractor + = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; - DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer() - .withMaxDeep(crossValidationRes.getBest("maxDeep")) - .withMinImpurityDecrease(crossValidationRes.getBest("minImpurityDecrease")); + IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - System.out.println(crossValidationRes); + TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() + .split(0.75); - System.out.println("Best score: " + Arrays.toString(crossValidationRes.getBestScore())); - System.out.println("Best hyper params: " + crossValidationRes.getBestHyperParams()); - System.out.println("Best average score: " + crossValidationRes.getBestAvgScore()); + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() + .withEncoderType(EncoderType.STRING_ENCODER) + .withEncodedFeature(1) + .withEncodedFeature(6) // <--- Changed index here. + .fit(ignite, + dataCache, + featureExtractor + ); - crossValidationRes.getScoringBoard().forEach((hyperParams, score) - -> System.out.println("Score " + Arrays.toString(score) + " for hyper params " + hyperParams)); + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + .fit(ignite, + dataCache, + strEncoderPreprocessor + ); - // Train decision tree model. - DecisionTreeNode bestMdl = trainer.fit( + IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() + .fit( ignite, dataCache, - split.getTrainFilter(), - normalizationPreprocessor, - lbExtractor + imputingPreprocessor ); - System.out.println("\n>>> Trained model: " + bestMdl); - - double accuracy = Evaluator.evaluate( + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + .withP(2) + .fit( + ignite, dataCache, - split.getTestFilter(), - bestMdl, - normalizationPreprocessor, - lbExtractor, - new Accuracy<>() + minMaxScalerPreprocessor ); - System.out.println("\n>>> Accuracy " + accuracy); - System.out.println("\n>>> Test Error " + (1 - accuracy)); - - System.out.println(">>> Tutorial step 8 (cross-validation with param grid) example started."); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } - }); - - igniteThread.start(); - - igniteThread.join(); + // Tune hyperparams with K-fold Cross-Validation on the split training set. + + DecisionTreeClassificationTrainer trainerCV = new DecisionTreeClassificationTrainer(); + + CrossValidation<DecisionTreeNode, Double, Integer, Object[]> scoreCalculator + = new CrossValidation<>(); + + ParamGrid paramGrid = new ParamGrid() + .addHyperParam("maxDeep", new Double[]{1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 10.0}) + .addHyperParam("minImpurityDecrease", new Double[]{0.0, 0.25, 0.5}); + + CrossValidationResult crossValidationRes = scoreCalculator.score( + trainerCV, + new Accuracy<>(), + ignite, + dataCache, + split.getTrainFilter(), + normalizationPreprocessor, + lbExtractor, + 3, + paramGrid + ); + + System.out.println("Train with maxDeep: " + crossValidationRes.getBest("maxDeep") + + " and minImpurityDecrease: " + crossValidationRes.getBest("minImpurityDecrease")); + + DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer() + .withMaxDeep(crossValidationRes.getBest("maxDeep")) + .withMinImpurityDecrease(crossValidationRes.getBest("minImpurityDecrease")); + + System.out.println(crossValidationRes); + + System.out.println("Best score: " + Arrays.toString(crossValidationRes.getBestScore())); + System.out.println("Best hyper params: " + crossValidationRes.getBestHyperParams()); + System.out.println("Best average score: " + crossValidationRes.getBestAvgScore()); + + crossValidationRes.getScoringBoard().forEach((hyperParams, score) + -> System.out.println("Score " + Arrays.toString(score) + " for hyper params " + hyperParams)); + + // Train decision tree model. + DecisionTreeNode bestMdl = trainer.fit( + ignite, + dataCache, + split.getTrainFilter(), + normalizationPreprocessor, + lbExtractor + ); + + System.out.println("\n>>> Trained model: " + bestMdl); + + double accuracy = Evaluator.evaluate( + dataCache, + split.getTestFilter(), + bestMdl, + normalizationPreprocessor, + lbExtractor, + new Accuracy<>() + ); + + System.out.println("\n>>> Accuracy " + accuracy); + System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 8 (cross-validation with param grid) example started."); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } } } } http://git-wip-us.apache.org/repos/asf/ignite/blob/609266fe/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java index 088caf7..4e1e005 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java @@ -39,7 +39,6 @@ import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator; import org.apache.ignite.ml.selection.scoring.metric.Accuracy; import org.apache.ignite.ml.selection.split.TrainTestDatasetSplitter; import org.apache.ignite.ml.selection.split.TrainTestSplit; -import org.apache.ignite.thread.IgniteThread; /** * Change classification algorithm that was used in {@link Step_8_CV_with_Param_Grid} from decision tree to logistic @@ -62,173 +61,166 @@ public class Step_9_Go_to_LogReg { System.out.println(">>> Tutorial step 9 (logistic regression) example started."); try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { - IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), - Step_9_Go_to_LogReg.class.getSimpleName(), () -> { - try { - IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - - // Defines first preprocessor that extracts features from an upstream data. - // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" - IgniteBiFunction<Integer, Object[], Object[]> featureExtractor - = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; - - IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - - TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() - .split(0.75); - - IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() - .withEncoderType(EncoderType.STRING_ENCODER) - .withEncodedFeature(1) - .withEncodedFeature(6) // <--- Changed index here - .fit(ignite, - dataCache, - featureExtractor + try { + IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); + + // Defines first preprocessor that extracts features from an upstream data. + // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare" + IgniteBiFunction<Integer, Object[], Object[]> featureExtractor + = (k, v) -> new Object[]{v[0], v[3], v[4], v[5], v[6], v[8], v[10]}; + + IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; + + TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() + .split(0.75); + + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Object[]>() + .withEncoderType(EncoderType.STRING_ENCODER) + .withEncodedFeature(1) + .withEncodedFeature(6) // <--- Changed index here + .fit(ignite, + dataCache, + featureExtractor + ); + + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + .fit(ignite, + dataCache, + strEncoderPreprocessor ); - IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() - .fit(ignite, - dataCache, - strEncoderPreprocessor - ); - - IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() - .fit( - ignite, - dataCache, - imputingPreprocessor - ); - - // Tune hyperparams with K-fold Cross-Validation on the split training set. - int[] pSet = new int[]{1, 2}; - int[] maxIterationsSet = new int[]{ 100, 1000}; - int[] batchSizeSet = new int[]{100, 10}; - int[] locIterationsSet = new int[]{10, 100}; - double[] learningRateSet = new double[]{0.1, 0.2, 0.5}; - - int bestP = 1; - int bestMaxIterations = 100; - int bestBatchSize = 10; - int bestLocIterations = 10; - double bestLearningRate = 0.0; - double avg = Double.MIN_VALUE; - - for(int p: pSet){ - for(int maxIterations: maxIterationsSet) { - for (int batchSize : batchSizeSet) { - for (int locIterations : locIterationsSet) { - for (double learningRate : learningRateSet) { - IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor - = new NormalizationTrainer<Integer, Object[]>() - .withP(p) - .fit( - ignite, - dataCache, - minMaxScalerPreprocessor - ); - - LogisticRegressionSGDTrainer<?> trainer - = new LogisticRegressionSGDTrainer<>(new UpdatesStrategy<>( - new SimpleGDUpdateCalculator(learningRate), - SimpleGDParameterUpdate::sumLocal, - SimpleGDParameterUpdate::avg - ), maxIterations, batchSize, locIterations, 123L); - - CrossValidation<LogisticRegressionModel, Double, Integer, Object[]> - scoreCalculator = new CrossValidation<>(); - - double[] scores = scoreCalculator.score( - trainer, - new Accuracy<>(), + IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() + .fit( + ignite, + dataCache, + imputingPreprocessor + ); + + // Tune hyperparams with K-fold Cross-Validation on the split training set. + int[] pSet = new int[]{1, 2}; + int[] maxIterationsSet = new int[]{ 100, 1000}; + int[] batchSizeSet = new int[]{100, 10}; + int[] locIterationsSet = new int[]{10, 100}; + double[] learningRateSet = new double[]{0.1, 0.2, 0.5}; + + int bestP = 1; + int bestMaxIterations = 100; + int bestBatchSize = 10; + int bestLocIterations = 10; + double bestLearningRate = 0.0; + double avg = Double.MIN_VALUE; + + for(int p: pSet){ + for(int maxIterations: maxIterationsSet) { + for (int batchSize : batchSizeSet) { + for (int locIterations : locIterationsSet) { + for (double learningRate : learningRateSet) { + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor + = new NormalizationTrainer<Integer, Object[]>() + .withP(p) + .fit( ignite, dataCache, - split.getTrainFilter(), - normalizationPreprocessor, - lbExtractor, - 3 + minMaxScalerPreprocessor ); - System.out.println("Scores are: " + Arrays.toString(scores)); - - final double currAvg = Arrays.stream(scores).average().orElse(Double.MIN_VALUE); - - if (currAvg > avg) { - avg = currAvg; - bestP = p; - bestMaxIterations = maxIterations; - bestBatchSize = batchSize; - bestLearningRate = learningRate; - bestLocIterations = locIterations; - } - - System.out.println("Avg is: " + currAvg - + " with p: " + p - + " with maxIterations: " + maxIterations - + " with batchSize: " + batchSize - + " with learningRate: " + learningRate - + " with locIterations: " + locIterations - ); + LogisticRegressionSGDTrainer<?> trainer + = new LogisticRegressionSGDTrainer<>(new UpdatesStrategy<>( + new SimpleGDUpdateCalculator(learningRate), + SimpleGDParameterUpdate::sumLocal, + SimpleGDParameterUpdate::avg + ), maxIterations, batchSize, locIterations, 123L); + + CrossValidation<LogisticRegressionModel, Double, Integer, Object[]> + scoreCalculator = new CrossValidation<>(); + + double[] scores = scoreCalculator.score( + trainer, + new Accuracy<>(), + ignite, + dataCache, + split.getTrainFilter(), + normalizationPreprocessor, + lbExtractor, + 3 + ); + + System.out.println("Scores are: " + Arrays.toString(scores)); + + final double currAvg = Arrays.stream(scores).average().orElse(Double.MIN_VALUE); + + if (currAvg > avg) { + avg = currAvg; + bestP = p; + bestMaxIterations = maxIterations; + bestBatchSize = batchSize; + bestLearningRate = learningRate; + bestLocIterations = locIterations; } + + System.out.println("Avg is: " + currAvg + + " with p: " + p + + " with maxIterations: " + maxIterations + + " with batchSize: " + batchSize + + " with learningRate: " + learningRate + + " with locIterations: " + locIterations + ); } } } } + } - System.out.println("Train " - + " with p: " + bestP - + " with maxIterations: " + bestMaxIterations - + " with batchSize: " + bestBatchSize - + " with learningRate: " + bestLearningRate - + " with locIterations: " + bestLocIterations - ); - - IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() - .withP(bestP) - .fit( - ignite, - dataCache, - minMaxScalerPreprocessor - ); - - LogisticRegressionSGDTrainer<?> trainer = new LogisticRegressionSGDTrainer<>(new UpdatesStrategy<>( - new SimpleGDUpdateCalculator(bestLearningRate), - SimpleGDParameterUpdate::sumLocal, - SimpleGDParameterUpdate::avg - ), bestMaxIterations, bestBatchSize, bestLocIterations, 123L); - - System.out.println(">>> Perform the training to get the model."); - LogisticRegressionModel bestMdl = trainer.fit( + System.out.println("Train " + + " with p: " + bestP + + " with maxIterations: " + bestMaxIterations + + " with batchSize: " + bestBatchSize + + " with learningRate: " + bestLearningRate + + " with locIterations: " + bestLocIterations + ); + + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + .withP(bestP) + .fit( ignite, dataCache, - split.getTrainFilter(), - normalizationPreprocessor, - lbExtractor - ); - - System.out.println("\n>>> Trained model: " + bestMdl); - - double accuracy = Evaluator.evaluate( - dataCache, - split.getTestFilter(), - bestMdl, - normalizationPreprocessor, - lbExtractor, - new Accuracy<>() + minMaxScalerPreprocessor ); - System.out.println("\n>>> Accuracy " + accuracy); - System.out.println("\n>>> Test Error " + (1 - accuracy)); - - System.out.println(">>> Tutorial step 9 (logistic regression) example completed."); - } - catch (FileNotFoundException e) { - e.printStackTrace(); - } - }); - - igniteThread.start(); - - igniteThread.join(); + LogisticRegressionSGDTrainer<?> trainer = new LogisticRegressionSGDTrainer<>(new UpdatesStrategy<>( + new SimpleGDUpdateCalculator(bestLearningRate), + SimpleGDParameterUpdate::sumLocal, + SimpleGDParameterUpdate::avg + ), bestMaxIterations, bestBatchSize, bestLocIterations, 123L); + + System.out.println(">>> Perform the training to get the model."); + LogisticRegressionModel bestMdl = trainer.fit( + ignite, + dataCache, + split.getTrainFilter(), + normalizationPreprocessor, + lbExtractor + ); + + System.out.println("\n>>> Trained model: " + bestMdl); + + double accuracy = Evaluator.evaluate( + dataCache, + split.getTestFilter(), + bestMdl, + normalizationPreprocessor, + lbExtractor, + new Accuracy<>() + ); + + System.out.println("\n>>> Accuracy " + accuracy); + System.out.println("\n>>> Test Error " + (1 - accuracy)); + + System.out.println(">>> Tutorial step 9 (logistic regression) example completed."); + } + catch (FileNotFoundException e) { + e.printStackTrace(); + } } } }