IGNITE-8907: [ML] Using vectors in featureExtractor this closes #4293
Project: http://git-wip-us.apache.org/repos/asf/ignite/repo Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/fa56a584 Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/fa56a584 Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/fa56a584 Branch: refs/heads/ignite-8446 Commit: fa56a584ca4ceec0480f8fce3acd5b60e23a2438 Parents: 45ea7eb Author: Alexey Platonov <[email protected]> Authored: Tue Jul 10 15:59:46 2018 +0300 Committer: Yury Babak <[email protected]> Committed: Tue Jul 10 15:59:47 2018 +0300 ---------------------------------------------------------------------- .../clustering/KMeansClusterizationExample.java | 3 +- .../AlgorithmSpecificDatasetExample.java | 3 +- .../ml/dataset/CacheBasedDatasetExample.java | 3 +- .../ml/dataset/LocalDatasetExample.java | 3 +- .../ml/knn/KNNClassificationExample.java | 10 ++--- .../examples/ml/knn/KNNRegressionExample.java | 3 +- .../examples/ml/nn/MLPTrainerExample.java | 5 ++- .../ml/preprocessing/BinarizationExample.java | 8 ++-- .../ml/preprocessing/ImputingExample.java | 8 ++-- .../ImputingExampleWithMostFrequentValues.java | 8 ++-- .../ml/preprocessing/MinMaxScalerExample.java | 11 ++--- .../ml/preprocessing/NormalizationExample.java | 9 ++-- .../LinearRegressionLSQRTrainerExample.java | 3 +- ...ssionLSQRTrainerWithMinMaxScalerExample.java | 41 +++++++++--------- .../LinearRegressionSGDTrainerExample.java | 10 ++--- .../LogisticRegressionSGDTrainerSample.java | 10 ++--- ...gressionMultiClassClassificationExample.java | 36 ++++++++++------ .../ml/selection/cv/CrossValidationExample.java | 3 +- .../split/TrainTestDatasetSplitterExample.java | 3 +- .../binary/SVMBinaryClassificationExample.java | 10 ++--- .../SVMMultiClassClassificationExample.java | 43 +++++++++++-------- ...ecisionTreeClassificationTrainerExample.java | 6 +-- .../DecisionTreeRegressionTrainerExample.java | 6 +-- .../GDBOnTreesClassificationTrainerExample.java | 45 ++++++++++++++------ .../GRBOnTreesRegressionTrainerExample.java | 45 ++++++++++++++------ .../RandomForestClassificationExample.java | 6 +-- .../RandomForestRegressionExample.java | 6 +-- .../ml/tutorial/Step_1_Read_and_Learn.java | 4 +- .../examples/ml/tutorial/Step_2_Imputing.java | 6 ++- .../examples/ml/tutorial/Step_3_Categorial.java | 5 ++- .../ml/tutorial/Step_4_Add_age_fare.java | 5 ++- .../examples/ml/tutorial/Step_5_Scaling.java | 9 ++-- .../ignite/examples/ml/tutorial/Step_6_KNN.java | 9 ++-- .../ml/tutorial/Step_7_Split_train_test.java | 9 ++-- .../ignite/examples/ml/tutorial/Step_8_CV.java | 11 ++--- .../ml/tutorial/Step_9_Go_to_LogReg.java | 11 ++--- .../ml/clustering/kmeans/KMeansTrainer.java | 2 +- .../ml/composition/BaggingModelTrainer.java | 15 ++++--- .../boosting/GDBBinaryClassifierTrainer.java | 3 +- .../boosting/GDBRegressionTrainer.java | 3 +- .../ml/composition/boosting/GDBTrainer.java | 9 ++-- .../ignite/ml/dataset/DatasetFactory.java | 25 +++++------ .../builder/data/SimpleDatasetDataBuilder.java | 13 +++--- .../data/SimpleLabeledDatasetDataBuilder.java | 13 +++--- .../java/org/apache/ignite/ml/knn/KNNUtils.java | 3 +- .../KNNClassificationTrainer.java | 3 +- .../ml/knn/regression/KNNRegressionTrainer.java | 3 +- .../java/org/apache/ignite/ml/math/Vector.java | 9 ++++ .../org/apache/ignite/ml/nn/MLPTrainer.java | 11 +++-- .../binarization/BinarizationPreprocessor.java | 17 ++++---- .../binarization/BinarizationTrainer.java | 5 ++- .../StringEncoderPreprocessor.java | 8 ++-- .../stringencoder/StringEncoderTrainer.java | 3 +- .../imputing/ImputerPreprocessor.java | 23 +++++----- .../preprocessing/imputing/ImputerTrainer.java | 40 ++++++++--------- .../minmaxscaling/MinMaxScalerPreprocessor.java | 19 +++++---- .../minmaxscaling/MinMaxScalerTrainer.java | 25 +++++------ .../NormalizationPreprocessor.java | 21 ++++----- .../normalization/NormalizationTrainer.java | 5 ++- .../linear/FeatureExtractorWrapper.java | 17 ++++---- .../linear/LinearRegressionLSQRTrainer.java | 2 +- .../linear/LinearRegressionSGDTrainer.java | 10 ++--- .../binomial/LogisticRegressionSGDTrainer.java | 8 ++-- .../LogRegressionMultiClassTrainer.java | 3 +- .../ignite/ml/selection/cv/CrossValidation.java | 18 ++++---- .../cursor/CacheBasedLabelPairCursor.java | 11 +++-- .../scoring/cursor/LocalLabelPairCursor.java | 9 ++-- .../selection/scoring/evaluator/Evaluator.java | 4 +- ...abeledDatasetPartitionDataBuilderOnHeap.java | 13 +++--- .../SVMLinearBinaryClassificationTrainer.java | 8 ++-- ...VMLinearMultiClassClassificationTrainer.java | 7 +-- .../ignite/ml/trainers/DatasetTrainer.java | 11 ++--- .../org/apache/ignite/ml/tree/DecisionTree.java | 3 +- .../ml/tree/data/DecisionTreeDataBuilder.java | 7 +-- .../org/apache/ignite/ml/LocalModelsTest.java | 3 +- .../ignite/ml/clustering/KMeansTrainerTest.java | 3 +- .../ml/composition/boosting/GDBTrainerTest.java | 4 +- .../ignite/ml/knn/KNNClassificationTest.java | 9 ++-- .../apache/ignite/ml/knn/KNNRegressionTest.java | 9 ++-- .../ml/math/isolve/lsqr/LSQROnHeapTest.java | 7 +-- .../ignite/ml/nn/MLPTrainerIntegrationTest.java | 13 ++++-- .../org/apache/ignite/ml/nn/MLPTrainerTest.java | 21 +++++---- .../MLPTrainerMnistIntegrationTest.java | 7 ++- .../ml/nn/performance/MLPTrainerMnistTest.java | 11 +++-- .../BinarizationPreprocessorTest.java | 5 ++- .../binarization/BinarizationTrainerTest.java | 5 ++- .../encoding/StringEncoderPreprocessorTest.java | 2 +- .../encoding/StringEncoderTrainerTest.java | 2 +- .../imputing/ImputerPreprocessorTest.java | 8 ++-- .../imputing/ImputerTrainerTest.java | 20 +++++---- .../MinMaxScalerPreprocessorTest.java | 6 ++- .../minmaxscaling/MinMaxScalerTrainerTest.java | 25 +++++------ .../NormalizationPreprocessorTest.java | 6 ++- .../normalization/NormalizationTrainerTest.java | 6 +-- .../linear/LinearRegressionLSQRTrainerTest.java | 5 ++- .../linear/LinearRegressionSGDTrainerTest.java | 12 +++--- .../logistic/LogRegMultiClassTrainerTest.java | 3 +- .../LogisticRegressionSGDTrainerTest.java | 12 +++--- .../ignite/ml/selection/SelectionTestSuite.java | 3 +- .../ml/selection/cv/CrossValidationTest.java | 5 ++- .../cursor/CacheBasedLabelPairCursorTest.java | 3 +- .../cursor/LocalLabelPairCursorTest.java | 3 +- .../selection/scoring/metric/AccuracyTest.java | 2 - .../ignite/ml/svm/SVMBinaryTrainerTest.java | 10 ++--- .../ignite/ml/svm/SVMMultiClassTrainerTest.java | 10 ++--- ...reeClassificationTrainerIntegrationTest.java | 8 ++-- .../DecisionTreeClassificationTrainerTest.java | 12 ++++-- ...ionTreeRegressionTrainerIntegrationTest.java | 8 ++-- .../tree/DecisionTreeRegressionTrainerTest.java | 11 +++-- .../DecisionTreeMNISTIntegrationTest.java | 3 +- .../tree/performance/DecisionTreeMNISTTest.java | 3 +- .../RandomForestClassifierTrainerTest.java | 3 +- .../RandomForestRegressionTrainerTest.java | 3 +- 113 files changed, 637 insertions(+), 479 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansClusterizationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansClusterizationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansClusterizationExample.java index cb140d4..44d3a23 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansClusterizationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansClusterizationExample.java @@ -31,6 +31,7 @@ import org.apache.ignite.ml.clustering.kmeans.KMeansModel; import org.apache.ignite.ml.clustering.kmeans.KMeansTrainer; import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer; import org.apache.ignite.ml.math.Tracer; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.thread.IgniteThread; @@ -58,7 +59,7 @@ public class KMeansClusterizationExample { KMeansModel mdl = trainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length), + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), (k, v) -> v[0] ); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/dataset/AlgorithmSpecificDatasetExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/dataset/AlgorithmSpecificDatasetExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/dataset/AlgorithmSpecificDatasetExample.java index b73e5fb..a3b6abc 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/dataset/AlgorithmSpecificDatasetExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/dataset/AlgorithmSpecificDatasetExample.java @@ -31,6 +31,7 @@ import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.DatasetWrapper; import org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetDataBuilder; import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData; +import org.apache.ignite.ml.math.VectorUtils; /** * Example that shows how to implement your own algorithm (gradient descent trainer for linear regression) which uses @@ -65,7 +66,7 @@ public class AlgorithmSpecificDatasetExample { persons, (upstream, upstreamSize) -> new AlgorithmSpecificPartitionContext(), new SimpleLabeledDatasetDataBuilder<Integer, Person, AlgorithmSpecificPartitionContext>( - (k, v) -> new double[] {v.getAge()}, + (k, v) -> VectorUtils.of(v.getAge()), (k, v) -> new double[] {v.getSalary()} ).andThen((data, ctx) -> { double[] features = data.getFeatures(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/dataset/CacheBasedDatasetExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/dataset/CacheBasedDatasetExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/dataset/CacheBasedDatasetExample.java index 1ab9210..47b0c0c 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/dataset/CacheBasedDatasetExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/dataset/CacheBasedDatasetExample.java @@ -26,6 +26,7 @@ import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; +import org.apache.ignite.ml.math.VectorUtils; /** * Example that shows how to create dataset based on an existing Ignite Cache and then use it to calculate {@code mean} @@ -43,7 +44,7 @@ public class CacheBasedDatasetExample { try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset( ignite, persons, - (k, v) -> new double[]{ v.getAge(), v.getSalary() } + (k, v) -> VectorUtils.of(v.getAge(), v.getSalary()) )) { // Calculation of the mean value. This calculation will be performed in map-reduce manner. double[] mean = dataset.mean(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/dataset/LocalDatasetExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/dataset/LocalDatasetExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/dataset/LocalDatasetExample.java index 7ede803..af1ae67 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/dataset/LocalDatasetExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/dataset/LocalDatasetExample.java @@ -25,6 +25,7 @@ import org.apache.ignite.Ignition; import org.apache.ignite.examples.ml.dataset.model.Person; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; +import org.apache.ignite.ml.math.VectorUtils; /** * Example that shows how to create dataset based on an existing local storage and then use it to calculate {@code mean} @@ -42,7 +43,7 @@ public class LocalDatasetExample { try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset( persons, 2, - (k, v) -> new double[]{ v.getAge(), v.getSalary() } + (k, v) -> VectorUtils.of(v.getAge(), v.getSalary()) )) { // Calculation of the mean value. This calculation will be performed in map-reduce manner. double[] mean = dataset.mean(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java index 15375a1..2e13cd2 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java @@ -17,6 +17,9 @@ package org.apache.ignite.examples.ml.knn; +import java.util.Arrays; +import java.util.UUID; +import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; @@ -27,14 +30,11 @@ import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.ml.knn.classification.KNNClassificationModel; import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer; import org.apache.ignite.ml.knn.classification.KNNStrategy; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.distances.EuclideanDistance; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.thread.IgniteThread; -import javax.cache.Cache; -import java.util.Arrays; -import java.util.UUID; - /** * Run kNN multi-class classification trainer over distributed dataset. * @@ -58,7 +58,7 @@ public class KNNClassificationExample { KNNClassificationModel knnMdl = trainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length), + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), (k, v) -> v[0] ).withK(3) .withDistanceMeasure(new EuclideanDistance()) http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java index 757c8e6..566146a 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java @@ -31,6 +31,7 @@ import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer; import org.apache.ignite.ml.knn.classification.KNNStrategy; import org.apache.ignite.ml.knn.regression.KNNRegressionModel; import org.apache.ignite.ml.knn.regression.KNNRegressionTrainer; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.distances.ManhattanDistance; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.thread.IgniteThread; @@ -58,7 +59,7 @@ public class KNNRegressionExample { KNNRegressionModel knnMdl = (KNNRegressionModel) trainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length), + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), (k, v) -> v[0] ).withK(5) .withDistanceMeasure(new ManhattanDistance()) http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java index 5d1ac38..e7b7489 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java @@ -24,15 +24,16 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ExampleNodeStartup; import org.apache.ignite.ml.math.Matrix; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix; import org.apache.ignite.ml.nn.Activators; import org.apache.ignite.ml.nn.MLPTrainer; import org.apache.ignite.ml.nn.MultilayerPerceptron; +import org.apache.ignite.ml.nn.UpdatesStrategy; import org.apache.ignite.ml.nn.architecture.MLPArchitecture; import org.apache.ignite.ml.optimization.LossFunctions; import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate; import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator; -import org.apache.ignite.ml.nn.UpdatesStrategy; import org.apache.ignite.thread.IgniteThread; /** @@ -100,7 +101,7 @@ public class MLPTrainerExample { MultilayerPerceptron mlp = trainer.fit( ignite, trainingSet, - (k, v) -> new double[] {v.x, v.y}, + (k, v) -> VectorUtils.of(v.x, v.y), (k, v) -> new double[] {v.lb} ); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java index a8f2fa0..a54e5d3 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java @@ -26,6 +26,8 @@ import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.binarization.BinarizationTrainer; @@ -41,12 +43,12 @@ public class BinarizationExample { IgniteCache<Integer, Person> persons = createCache(ignite); // Defines first preprocessor that extracts features from an upstream data. - IgniteBiFunction<Integer, Person, double[]> featureExtractor = (k, v) -> new double[] { + IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of( v.getAge() - }; + ); // Defines second preprocessor that normalizes features. - IgniteBiFunction<Integer, Person, double[]> preprocessor = new BinarizationTrainer<Integer, Person>() + IgniteBiFunction<Integer, Person, Vector> preprocessor = new BinarizationTrainer<Integer, Person>() .withThreshold(40) .fit(ignite, persons, featureExtractor); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java index 68483ad..582f420 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java @@ -26,6 +26,8 @@ import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; @@ -41,13 +43,13 @@ public class ImputingExample { IgniteCache<Integer, Person> persons = createCache(ignite); // Defines first preprocessor that extracts features from an upstream data. - IgniteBiFunction<Integer, Person, double[]> featureExtractor = (k, v) -> new double[] { + IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of( v.getAge(), v.getSalary() - }; + ); // Defines second preprocessor that imputing features. - IgniteBiFunction<Integer, Person, double[]> preprocessor = new ImputerTrainer<Integer, Person>() + IgniteBiFunction<Integer, Person, Vector> preprocessor = new ImputerTrainer<Integer, Person>() .fit(ignite, persons, featureExtractor); // Creates a cache based simple dataset containing features and providing standard dataset API. http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExampleWithMostFrequentValues.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExampleWithMostFrequentValues.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExampleWithMostFrequentValues.java index 2611c46..7b08c7a 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExampleWithMostFrequentValues.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExampleWithMostFrequentValues.java @@ -26,6 +26,8 @@ import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; import org.apache.ignite.ml.preprocessing.imputing.ImputingStrategy; @@ -42,13 +44,13 @@ public class ImputingExampleWithMostFrequentValues { IgniteCache<Integer, Person> persons = createCache(ignite); // Defines first preprocessor that extracts features from an upstream data. - IgniteBiFunction<Integer, Person, double[]> featureExtractor = (k, v) -> new double[] { + IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of( v.getAge(), v.getSalary() - }; + ); // Defines second preprocessor that normalizes features. - IgniteBiFunction<Integer, Person, double[]> preprocessor = new ImputerTrainer<Integer, Person>() + IgniteBiFunction<Integer, Person, Vector> preprocessor = new ImputerTrainer<Integer, Person>() .withImputingStrategy(ImputingStrategy.MOST_FREQUENT) .fit(ignite, persons, featureExtractor); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java index e60b72b..ababa65 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java @@ -17,6 +17,7 @@ package org.apache.ignite.examples.ml.preprocessing; +import java.util.Arrays; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; @@ -25,11 +26,11 @@ import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer; -import java.util.Arrays; - /** * Example that shows how to use MinMaxScaler preprocessor to scale the given data. * @@ -46,13 +47,13 @@ public class MinMaxScalerExample { IgniteCache<Integer, Person> persons = createCache(ignite); // Defines first preprocessor that extracts features from an upstream data. - IgniteBiFunction<Integer, Person, double[]> featureExtractor = (k, v) -> new double[] { + IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of( v.getAge(), v.getSalary() - }; + ); // Defines second preprocessor that normalizes features. - IgniteBiFunction<Integer, Person, double[]> preprocessor = new MinMaxScalerTrainer<Integer, Person>() + IgniteBiFunction<Integer, Person, Vector> preprocessor = new MinMaxScalerTrainer<Integer, Person>() .fit(ignite, persons, featureExtractor); // Creates a cache based simple dataset containing features and providing standard dataset API. http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java index 16169ab..2df369e 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java @@ -26,8 +26,9 @@ import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.dataset.model.Person; import org.apache.ignite.ml.dataset.DatasetFactory; import org.apache.ignite.ml.dataset.primitive.SimpleDataset; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; -import org.apache.ignite.ml.preprocessing.binarization.BinarizationTrainer; import org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer; /** @@ -42,13 +43,13 @@ public class NormalizationExample { IgniteCache<Integer, Person> persons = createCache(ignite); // Defines first preprocessor that extracts features from an upstream data. - IgniteBiFunction<Integer, Person, double[]> featureExtractor = (k, v) -> new double[] { + IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of( v.getAge(), v.getSalary() - }; + ); // Defines second preprocessor that normalizes features. - IgniteBiFunction<Integer, Person, double[]> preprocessor = new NormalizationTrainer<Integer, Person>() + IgniteBiFunction<Integer, Person, Vector> preprocessor = new NormalizationTrainer<Integer, Person>() .withP(1) .fit(ignite, persons, featureExtractor); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java index bfb4e0a..e7a3daf 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java @@ -27,6 +27,7 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer; import org.apache.ignite.ml.regressions.linear.LinearRegressionModel; @@ -114,7 +115,7 @@ public class LinearRegressionLSQRTrainerExample { LinearRegressionModel mdl = trainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length), + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), (k, v) -> v[0] ); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java index d3ab681..a56db61 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java @@ -17,6 +17,9 @@ package org.apache.ignite.examples.ml.regression.linear; +import java.util.Arrays; +import java.util.UUID; +import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; @@ -24,18 +27,15 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; -import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerPreprocessor; import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer; import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer; import org.apache.ignite.ml.regressions.linear.LinearRegressionModel; import org.apache.ignite.thread.IgniteThread; -import javax.cache.Cache; -import java.util.Arrays; -import java.util.UUID; - /** * Run linear regression model over cached dataset. * @@ -111,23 +111,26 @@ public class LinearRegressionLSQRTrainerWithMinMaxScalerExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), LinearRegressionLSQRTrainerWithMinMaxScalerExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, Vector> dataCache = getTestCache(ignite); System.out.println(">>> Create new minmaxscaling trainer object."); - MinMaxScalerTrainer<Integer, double[]> normalizationTrainer = new MinMaxScalerTrainer<>(); + MinMaxScalerTrainer<Integer, Vector> normalizationTrainer = new MinMaxScalerTrainer<>(); System.out.println(">>> Perform the training to get the minmaxscaling preprocessor."); - IgniteBiFunction<Integer, double[], double[]> preprocessor = normalizationTrainer.fit( + IgniteBiFunction<Integer, Vector, Vector> preprocessor = normalizationTrainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length) + (k, v) -> { + double[] arr = v.asArray(); + return VectorUtils.of(Arrays.copyOfRange(arr, 1, arr.length)); + } ); System.out.println(">>> Create new linear regression trainer object."); LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer(); System.out.println(">>> Perform the training to get the model."); - LinearRegressionModel mdl = trainer.fit(ignite, dataCache, preprocessor, (k, v) -> v[0]); + LinearRegressionModel mdl = trainer.fit(ignite, dataCache, preprocessor, (k, v) -> v.get(0)); System.out.println(">>> Linear regression model: " + mdl); @@ -135,13 +138,13 @@ public class LinearRegressionLSQRTrainerWithMinMaxScalerExample { System.out.println(">>> | Prediction\t| Ground Truth\t|"); System.out.println(">>> ---------------------------------"); - try (QueryCursor<Cache.Entry<Integer, double[]>> observations = dataCache.query(new ScanQuery<>())) { - for (Cache.Entry<Integer, double[]> observation : observations) { + try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) { + for (Cache.Entry<Integer, Vector> observation : observations) { Integer key = observation.getKey(); - double[] val = observation.getValue(); - double groundTruth = val[0]; + Vector val = observation.getValue(); + double groundTruth = val.get(0); - double prediction = mdl.apply(new DenseLocalOnHeapVector(preprocessor.apply(key, val))); + double prediction = mdl.apply(preprocessor.apply(key, val)); System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth); } @@ -162,15 +165,15 @@ public class LinearRegressionLSQRTrainerWithMinMaxScalerExample { * @param ignite Ignite instance. * @return Filled Ignite Cache. */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); + private static IgniteCache<Integer, Vector> getTestCache(Ignite ignite) { + CacheConfiguration<Integer, Vector> cacheConfiguration = new CacheConfiguration<>(); cacheConfiguration.setName("TEST_" + UUID.randomUUID()); cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); + IgniteCache<Integer, Vector> cache = ignite.createCache(cacheConfiguration); for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); + cache.put(i, VectorUtils.of(data[i])); return cache; } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java index da5f942..78874eb 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java @@ -17,6 +17,9 @@ package org.apache.ignite.examples.ml.regression.linear; +import java.util.Arrays; +import java.util.UUID; +import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; @@ -24,6 +27,7 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.ml.nn.UpdatesStrategy; import org.apache.ignite.ml.optimization.updatecalculators.RPropParameterUpdate; @@ -32,10 +36,6 @@ import org.apache.ignite.ml.regressions.linear.LinearRegressionModel; import org.apache.ignite.ml.regressions.linear.LinearRegressionSGDTrainer; import org.apache.ignite.thread.IgniteThread; -import javax.cache.Cache; -import java.util.Arrays; -import java.util.UUID; - /** * Run linear regression model over cached dataset. * @@ -122,7 +122,7 @@ public class LinearRegressionSGDTrainerExample { LinearRegressionModel mdl = trainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length), + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), (k, v) -> v[0] ); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerSample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerSample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerSample.java index 215d7a4..27ecead 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerSample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerSample.java @@ -17,6 +17,9 @@ package org.apache.ignite.examples.ml.regression.logistic.binary; +import java.util.Arrays; +import java.util.UUID; +import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; @@ -24,6 +27,7 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.ml.nn.UpdatesStrategy; import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate; @@ -32,10 +36,6 @@ import org.apache.ignite.ml.regressions.logistic.binomial.LogisticRegressionMode import org.apache.ignite.ml.regressions.logistic.binomial.LogisticRegressionSGDTrainer; import org.apache.ignite.thread.IgniteThread; -import javax.cache.Cache; -import java.util.Arrays; -import java.util.UUID; - /** * Run logistic regression model over distributed cache. * @@ -65,7 +65,7 @@ public class LogisticRegressionSGDTrainerSample { LogisticRegressionModel mdl = trainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length), + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), (k, v) -> v[0] ).withRawLabels(true); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java index 61a711e..40ab74d 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java @@ -27,6 +27,8 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.ml.nn.UpdatesStrategy; @@ -55,7 +57,7 @@ public class LogRegressionMultiClassClassificationExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), LogRegressionMultiClassClassificationExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, Vector> dataCache = getTestCache(ignite); LogRegressionMultiClassTrainer<?> trainer = new LogRegressionMultiClassTrainer<>() .withUpdatesStgy(new UpdatesStrategy<>( @@ -71,26 +73,32 @@ public class LogRegressionMultiClassClassificationExample { LogRegressionMultiClassModel mdl = trainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length), - (k, v) -> v[0] + (k, v) -> { + double[] arr = v.asArray(); + return VectorUtils.of(Arrays.copyOfRange(arr, 1, arr.length)); + }, + (k, v) -> v.get(0) ); System.out.println(">>> SVM Multi-class model"); System.out.println(mdl.toString()); - MinMaxScalerTrainer<Integer, double[]> normalizationTrainer = new MinMaxScalerTrainer<>(); + MinMaxScalerTrainer<Integer, Vector> normalizationTrainer = new MinMaxScalerTrainer<>(); - IgniteBiFunction<Integer, double[], double[]> preprocessor = normalizationTrainer.fit( + IgniteBiFunction<Integer, Vector, Vector> preprocessor = normalizationTrainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length) + (k, v) -> { + double[] arr = v.asArray(); + return VectorUtils.of(Arrays.copyOfRange(arr, 1, arr.length)); + } ); LogRegressionMultiClassModel mdlWithNormalization = trainer.fit( ignite, dataCache, preprocessor, - (k, v) -> v[0] + (k, v) -> v.get(0) ); System.out.println(">>> Logistic Regression Multi-class model with minmaxscaling"); @@ -108,9 +116,9 @@ public class LogRegressionMultiClassClassificationExample { int[][] confusionMtx = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; int[][] confusionMtxWithNormalization = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; - try (QueryCursor<Cache.Entry<Integer, double[]>> observations = dataCache.query(new ScanQuery<>())) { - for (Cache.Entry<Integer, double[]> observation : observations) { - double[] val = observation.getValue(); + try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) { + for (Cache.Entry<Integer, Vector> observation : observations) { + double[] val = observation.getValue().asArray(); double[] inputs = Arrays.copyOfRange(val, 1, val.length); double groundTruth = val[0]; @@ -163,15 +171,15 @@ public class LogRegressionMultiClassClassificationExample { * @param ignite Ignite instance. * @return Filled Ignite Cache. */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); + private static IgniteCache<Integer, Vector> getTestCache(Ignite ignite) { + CacheConfiguration<Integer, Vector> cacheConfiguration = new CacheConfiguration<>(); cacheConfiguration.setName("TEST_" + UUID.randomUUID()); cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); + IgniteCache<Integer, Vector> cache = ignite.createCache(cacheConfiguration); for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); + cache.put(i, VectorUtils.of(data[i])); return cache; } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java index 93c67ce..8086962 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java @@ -25,6 +25,7 @@ import org.apache.ignite.Ignition; import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.examples.ml.tree.DecisionTreeClassificationTrainerExample; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.selection.cv.CrossValidation; import org.apache.ignite.ml.selection.scoring.metric.Accuracy; import org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer; @@ -76,7 +77,7 @@ public class CrossValidationExample { new Accuracy<>(), ignite, trainingSet, - (k, v) -> new double[]{v.x, v.y}, + (k, v) -> VectorUtils.of(v.x, v.y), (k, v) -> v.lb, 4 ); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java index ebd899c..275d835 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java @@ -27,6 +27,7 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer; import org.apache.ignite.ml.regressions.linear.LinearRegressionModel; @@ -120,7 +121,7 @@ public class TrainTestDatasetSplitterExample { ignite, dataCache, split.getTrainFilter(), - (k, v) -> Arrays.copyOfRange(v, 1, v.length), + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), (k, v) -> v[0] ); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java index ce37112..f275ffd 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java @@ -17,6 +17,9 @@ package org.apache.ignite.examples.ml.svm.binary; +import java.util.Arrays; +import java.util.UUID; +import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; @@ -24,15 +27,12 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.ml.svm.SVMLinearBinaryClassificationModel; import org.apache.ignite.ml.svm.SVMLinearBinaryClassificationTrainer; import org.apache.ignite.thread.IgniteThread; -import javax.cache.Cache; -import java.util.Arrays; -import java.util.UUID; - /** * Run SVM binary-class classification model over distributed dataset. * @@ -56,7 +56,7 @@ public class SVMBinaryClassificationExample { SVMLinearBinaryClassificationModel mdl = trainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length), + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), (k, v) -> v[0] ); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java index c2be971..8455ced 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java @@ -17,6 +17,9 @@ package org.apache.ignite.examples.ml.svm.multiclass; +import java.util.Arrays; +import java.util.UUID; +import javax.cache.Cache; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; @@ -24,6 +27,8 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer; @@ -31,10 +36,6 @@ import org.apache.ignite.ml.svm.SVMLinearMultiClassClassificationModel; import org.apache.ignite.ml.svm.SVMLinearMultiClassClassificationTrainer; import org.apache.ignite.thread.IgniteThread; -import javax.cache.Cache; -import java.util.Arrays; -import java.util.UUID; - /** * Run SVM multi-class classification trainer over distributed dataset to build two models: * one with minmaxscaling and one without minmaxscaling. @@ -52,33 +53,39 @@ public class SVMMultiClassClassificationExample { IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), SVMMultiClassClassificationExample.class.getSimpleName(), () -> { - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + IgniteCache<Integer, Vector> dataCache = getTestCache(ignite); SVMLinearMultiClassClassificationTrainer trainer = new SVMLinearMultiClassClassificationTrainer(); SVMLinearMultiClassClassificationModel mdl = trainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length), - (k, v) -> v[0] + (k, v) -> { + double[] arr = v.asArray(); + return VectorUtils.of(Arrays.copyOfRange(arr, 1, arr.length)); + }, + (k, v) -> v.get(0) ); System.out.println(">>> SVM Multi-class model"); System.out.println(mdl.toString()); - MinMaxScalerTrainer<Integer, double[]> normalizationTrainer = new MinMaxScalerTrainer<>(); + MinMaxScalerTrainer<Integer, Vector> normalizationTrainer = new MinMaxScalerTrainer<>(); - IgniteBiFunction<Integer, double[], double[]> preprocessor = normalizationTrainer.fit( + IgniteBiFunction<Integer, Vector, Vector> preprocessor = normalizationTrainer.fit( ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length) + (k, v) -> { + double[] arr = v.asArray(); + return VectorUtils.of(Arrays.copyOfRange(arr, 1, arr.length)); + } ); SVMLinearMultiClassClassificationModel mdlWithNormalization = trainer.fit( ignite, dataCache, preprocessor, - (k, v) -> v[0] + (k, v) -> v.get(0) ); System.out.println(">>> SVM Multi-class model with minmaxscaling"); @@ -96,9 +103,9 @@ public class SVMMultiClassClassificationExample { int[][] confusionMtx = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; int[][] confusionMtxWithNormalization = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; - try (QueryCursor<Cache.Entry<Integer, double[]>> observations = dataCache.query(new ScanQuery<>())) { - for (Cache.Entry<Integer, double[]> observation : observations) { - double[] val = observation.getValue(); + try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) { + for (Cache.Entry<Integer, Vector> observation : observations) { + double[] val = observation.getValue().asArray(); double[] inputs = Arrays.copyOfRange(val, 1, val.length); double groundTruth = val[0]; @@ -151,15 +158,15 @@ public class SVMMultiClassClassificationExample { * @param ignite Ignite instance. * @return Filled Ignite Cache. */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); + private static IgniteCache<Integer, Vector> getTestCache(Ignite ignite) { + CacheConfiguration<Integer, Vector> cacheConfiguration = new CacheConfiguration<>(); cacheConfiguration.setName("TEST_" + UUID.randomUUID()); cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); + IgniteCache<Integer, Vector> cache = ignite.createCache(cacheConfiguration); for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); + cache.put(i, VectorUtils.of(data[i])); return cache; } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java index ca70b29..744e0fc 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java @@ -23,7 +23,7 @@ import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.configuration.CacheConfiguration; -import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer; import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.thread.IgniteThread; @@ -67,7 +67,7 @@ public class DecisionTreeClassificationTrainerExample { DecisionTreeNode mdl = trainer.fit( ignite, trainingSet, - (k, v) -> new double[]{v.x, v.y}, + (k, v) -> VectorUtils.of(v.x, v.y), (k, v) -> v.lb ); @@ -76,7 +76,7 @@ public class DecisionTreeClassificationTrainerExample { for (int i = 0; i < 1000; i++) { LabeledPoint pnt = generatePoint(rnd); - double prediction = mdl.apply(new DenseLocalOnHeapVector(new double[]{pnt.x, pnt.y})); + double prediction = mdl.apply(VectorUtils.of(pnt.x, pnt.y)); if (prediction == pnt.lb) correctPredictions++; http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java index 5443ded..63454c6 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java @@ -22,7 +22,7 @@ import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; import org.apache.ignite.configuration.CacheConfiguration; -import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.tree.DecisionTreeNode; import org.apache.ignite.ml.tree.DecisionTreeRegressionTrainer; import org.apache.ignite.thread.IgniteThread; @@ -63,7 +63,7 @@ public class DecisionTreeRegressionTrainerExample { DecisionTreeNode mdl = trainer.fit( ignite, trainingSet, - (k, v) -> new double[] {v.x}, + (k, v) -> VectorUtils.of(v.x), (k, v) -> v.y ); @@ -75,7 +75,7 @@ public class DecisionTreeRegressionTrainerExample { // Calculate score. for (int x = 0; x < 10; x++) { - double predicted = mdl.apply(new DenseLocalOnHeapVector(new double[] {x})); + double predicted = mdl.apply(VectorUtils.of(x)); System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", predicted, Math.sin(x)); } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java index b3e89fc..f484ac0 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java @@ -28,6 +28,7 @@ import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.trainers.DatasetTrainer; import org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer; import org.apache.ignite.thread.IgniteThread; +import org.jetbrains.annotations.NotNull; /** * Example represents a solution for the task of classification learning based on @@ -38,7 +39,7 @@ import org.apache.ignite.thread.IgniteThread; */ public class GDBOnTreesClassificationTrainerExample { /** - * Executes example. + * Run example. * * @param args Command line arguments, none required. */ @@ -51,16 +52,8 @@ public class GDBOnTreesClassificationTrainerExample { GDBBinaryClassifierOnTreesTrainer.class.getSimpleName(), () -> { // Create cache with training data. - CacheConfiguration<Integer, double[]> trainingSetCfg = new CacheConfiguration<>(); - trainingSetCfg.setName("TRAINING_SET"); - trainingSetCfg.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> trainingSet = ignite.createCache(trainingSetCfg); - for(int i = -50; i <= 50; i++) { - double x = ((double)i) / 10.0; - double y = Math.sin(x) < 0 ? 0.0 : 1.0; - trainingSet.put(i, new double[] {x, y}); - } + CacheConfiguration<Integer, double[]> trainingSetCfg = createCacheConfiguration(); + IgniteCache<Integer, double[]> trainingSet = fillTrainingData(ignite, trainingSetCfg); // Create regression trainer. DatasetTrainer<Model<Vector, Double>, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(1.0, 300, 2, 0.); @@ -69,7 +62,7 @@ public class GDBOnTreesClassificationTrainerExample { Model<Vector, Double> mdl = trainer.fit( ignite, trainingSet, - (k, v) -> new double[] { v[0] }, + (k, v) -> VectorUtils.of(v[0]), (k, v) -> v[1] ); @@ -90,8 +83,34 @@ public class GDBOnTreesClassificationTrainerExample { }); igniteThread.start(); - igniteThread.join(); } } + + /** + * Create cache configuration. + */ + @NotNull private static CacheConfiguration<Integer, double[]> createCacheConfiguration() { + CacheConfiguration<Integer, double[]> trainingSetCfg = new CacheConfiguration<>(); + trainingSetCfg.setName("TRAINING_SET"); + trainingSetCfg.setAffinity(new RendezvousAffinityFunction(false, 10)); + return trainingSetCfg; + } + + /** + * Fill meander-like training data. + * + * @param ignite Ignite. + * @param trainingSetCfg Training set config. + */ + @NotNull private static IgniteCache<Integer, double[]> fillTrainingData(Ignite ignite, + CacheConfiguration<Integer, double[]> trainingSetCfg) { + IgniteCache<Integer, double[]> trainingSet = ignite.createCache(trainingSetCfg); + for(int i = -50; i <= 50; i++) { + double x = ((double)i) / 10.0; + double y = Math.sin(x) < 0 ? 0.0 : 1.0; + trainingSet.put(i, new double[] {x, y}); + } + return trainingSet; + } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GRBOnTreesRegressionTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GRBOnTreesRegressionTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GRBOnTreesRegressionTrainerExample.java index 66b6869..062c446 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GRBOnTreesRegressionTrainerExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GRBOnTreesRegressionTrainerExample.java @@ -28,6 +28,7 @@ import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.trainers.DatasetTrainer; import org.apache.ignite.ml.tree.boosting.GDBRegressionOnTreesTrainer; import org.apache.ignite.thread.IgniteThread; +import org.jetbrains.annotations.NotNull; /** * Example represents a solution for the task of regression learning based on @@ -38,7 +39,7 @@ import org.apache.ignite.thread.IgniteThread; */ public class GRBOnTreesRegressionTrainerExample { /** - * Executes example. + * Run example. * * @param args Command line arguments, none required. */ @@ -51,16 +52,8 @@ public class GRBOnTreesRegressionTrainerExample { GRBOnTreesRegressionTrainerExample.class.getSimpleName(), () -> { // Create cache with training data. - CacheConfiguration<Integer, double[]> trainingSetCfg = new CacheConfiguration<>(); - trainingSetCfg.setName("TRAINING_SET"); - trainingSetCfg.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> trainingSet = ignite.createCache(trainingSetCfg); - for(int i = -50; i <= 50; i++) { - double x = ((double)i) / 10.0; - double y = Math.pow(x, 2); - trainingSet.put(i, new double[] {x, y}); - } + CacheConfiguration<Integer, double[]> trainingSetCfg = createCacheConfiguration(); + IgniteCache<Integer, double[]> trainingSet = fillTrainingData(ignite, trainingSetCfg); // Create regression trainer. DatasetTrainer<Model<Vector, Double>, Double> trainer = new GDBRegressionOnTreesTrainer(1.0, 2000, 1, 0.); @@ -69,7 +62,7 @@ public class GRBOnTreesRegressionTrainerExample { Model<Vector, Double> mdl = trainer.fit( ignite, trainingSet, - (k, v) -> new double[] { v[0] }, + (k, v) -> VectorUtils.of(v[0]), (k, v) -> v[1] ); @@ -90,8 +83,34 @@ public class GRBOnTreesRegressionTrainerExample { }); igniteThread.start(); - igniteThread.join(); } } + + /** + * Create cache configuration. + */ + @NotNull private static CacheConfiguration<Integer, double[]> createCacheConfiguration() { + CacheConfiguration<Integer, double[]> trainingSetCfg = new CacheConfiguration<>(); + trainingSetCfg.setName("TRAINING_SET"); + trainingSetCfg.setAffinity(new RendezvousAffinityFunction(false, 10)); + return trainingSetCfg; + } + + /** + * Fill parabola training data. + * + * @param ignite Ignite. + * @param trainingSetCfg Training set config. + */ + @NotNull private static IgniteCache<Integer, double[]> fillTrainingData(Ignite ignite, + CacheConfiguration<Integer, double[]> trainingSetCfg) { + IgniteCache<Integer, double[]> trainingSet = ignite.createCache(trainingSetCfg); + for(int i = -50; i <= 50; i++) { + double x = ((double)i) / 10.0; + double y = Math.pow(x, 2); + trainingSet.put(i, new double[] {x, y}); + } + return trainingSet; + } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java index aaf4fb9..a43c4e9 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java @@ -28,7 +28,7 @@ import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.ml.composition.ModelsComposition; -import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.tree.randomforest.RandomForestClassifierTrainer; import org.apache.ignite.ml.tree.randomforest.RandomForestTrainer; import org.apache.ignite.thread.IgniteThread; @@ -60,7 +60,7 @@ public class RandomForestClassificationExample { RandomForestClassifierTrainer trainer = new RandomForestClassifierTrainer(13, 4, 101, 0.3, 2, 0); ModelsComposition randomForest = trainer.fit(ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 1, v.length), + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), (k, v) -> v[0] ); @@ -73,7 +73,7 @@ public class RandomForestClassificationExample { double[] inputs = Arrays.copyOfRange(val, 1, val.length); double groundTruth = val[0]; - double prediction = randomForest.apply(new DenseLocalOnHeapVector(inputs)); + double prediction = randomForest.apply(VectorUtils.of(inputs)); totalAmount++; if (groundTruth != prediction) http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java index 3ad60df..4ae775e 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java @@ -28,7 +28,7 @@ import org.apache.ignite.cache.query.QueryCursor; import org.apache.ignite.cache.query.ScanQuery; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.ml.composition.ModelsComposition; -import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.tree.randomforest.RandomForestRegressionTrainer; import org.apache.ignite.ml.tree.randomforest.RandomForestTrainer; import org.apache.ignite.thread.IgniteThread; @@ -60,7 +60,7 @@ public class RandomForestRegressionExample { RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(13, 4, 101, 0.3, 2, 0); ModelsComposition randomForest = trainer.fit(ignite, dataCache, - (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1), + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)), (k, v) -> v[v.length - 1] ); @@ -74,7 +74,7 @@ public class RandomForestRegressionExample { double[] inputs = Arrays.copyOfRange(val, 0, val.length - 1); double groundTruth = val[val.length - 1]; - double prediction = randomForest.apply(new DenseLocalOnHeapVector(inputs)); + double prediction = randomForest.apply(VectorUtils.of(inputs)); mse += Math.pow(prediction - groundTruth, 2.0); mae += Math.abs(prediction - groundTruth); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java index cdea8a7..d8601f7 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java @@ -21,6 +21,8 @@ import java.io.FileNotFoundException; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator; import org.apache.ignite.ml.selection.scoring.metric.Accuracy; @@ -43,7 +45,7 @@ public class Step_1_Read_and_Learn { IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - IgniteBiFunction<Integer, Object[], double[]> featureExtractor = (k, v) -> new double[]{(double) v[0], (double) v[5], (double) v[6]}; + IgniteBiFunction<Integer, Object[], Vector> featureExtractor = (k, v) -> VectorUtils.of((double) v[0], (double) v[5], (double) v[6]); IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java index 1adb3da..b63e3ac 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java @@ -21,6 +21,8 @@ import java.io.FileNotFoundException; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator; @@ -41,11 +43,11 @@ public class Step_2_Imputing { try { IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite); - IgniteBiFunction<Integer, Object[], double[]> featureExtractor = (k, v) -> new double[]{(double) v[0], (double) v[5], (double) v[6]}; + IgniteBiFunction<Integer, Object[], Vector> featureExtractor = (k, v) -> VectorUtils.of((double) v[0], (double) v[5], (double) v[6]); IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() .fit(ignite, dataCache, featureExtractor // "pclass", "sibsp", "parch" http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java index ddf92ed..1f6995c 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java @@ -21,6 +21,7 @@ import java.io.FileNotFoundException; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; @@ -50,7 +51,7 @@ public class Step_3_Categorial { IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() .encodeFeature(1) .encodeFeature(4) .fit(ignite, @@ -58,7 +59,7 @@ public class Step_3_Categorial { featureExtractor ); - IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() .fit(ignite, dataCache, strEncoderPreprocessor http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java index 2464d24..26d5973 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java @@ -21,6 +21,7 @@ import java.io.FileNotFoundException; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; @@ -49,7 +50,7 @@ public class Step_4_Add_age_fare { IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() .encodeFeature(1) .encodeFeature(6) // <--- Changed index here .fit(ignite, @@ -57,7 +58,7 @@ public class Step_4_Add_age_fare { featureExtractor ); - IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() .fit(ignite, dataCache, strEncoderPreprocessor http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java index e3562ac..88da548 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java @@ -21,6 +21,7 @@ import java.io.FileNotFoundException; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; @@ -51,7 +52,7 @@ public class Step_5_Scaling { IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() .encodeFeature(1) .encodeFeature(6) // <--- Changed index here .fit(ignite, @@ -59,21 +60,21 @@ public class Step_5_Scaling { featureExtractor ); - IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() .fit(ignite, dataCache, strEncoderPreprocessor ); - IgniteBiFunction<Integer, Object[], double[]> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() .fit( ignite, dataCache, imputingPreprocessor ); - IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() .withP(1) .fit( ignite,
