http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java index 95d54d8..142baca 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java @@ -24,6 +24,7 @@ import org.apache.ignite.Ignition; import org.apache.ignite.ml.knn.classification.KNNClassificationModel; import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer; import org.apache.ignite.ml.knn.classification.KNNStrategy; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; @@ -52,7 +53,7 @@ public class Step_6_KNN { IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1]; - IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() .encodeFeature(1) .encodeFeature(6) // <--- Changed index here .fit(ignite, @@ -60,21 +61,21 @@ public class Step_6_KNN { featureExtractor ); - IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() .fit(ignite, dataCache, strEncoderPreprocessor ); - IgniteBiFunction<Integer, Object[], double[]> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() .fit( ignite, dataCache, imputingPreprocessor ); - IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() .withP(1) .fit( ignite,
http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java index e8641c6..0d8fa67 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java @@ -21,6 +21,7 @@ import java.io.FileNotFoundException; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; @@ -58,7 +59,7 @@ public class Step_7_Split_train_test { TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() .split(0.75); - IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() .encodeFeature(1) .encodeFeature(6) // <--- Changed index here .fit(ignite, @@ -66,21 +67,21 @@ public class Step_7_Split_train_test { featureExtractor ); - IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() .fit(ignite, dataCache, strEncoderPreprocessor ); - IgniteBiFunction<Integer, Object[], double[]> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() .fit( ignite, dataCache, imputingPreprocessor ); - IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() .withP(1) .fit( ignite, http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java index 1fe8dfe..89710df 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java @@ -22,6 +22,7 @@ import java.util.Arrays; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; @@ -69,7 +70,7 @@ public class Step_8_CV { TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() .split(0.75); - IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() .encodeFeature(1) .encodeFeature(6) // <--- Changed index here .fit(ignite, @@ -77,13 +78,13 @@ public class Step_8_CV { featureExtractor ); - IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() .fit(ignite, dataCache, strEncoderPreprocessor ); - IgniteBiFunction<Integer, Object[], double[]> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() .fit( ignite, dataCache, @@ -99,7 +100,7 @@ public class Step_8_CV { for(int p: pSet){ for(int maxDeep: maxDeepSet){ - IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() .withP(p) .fit( ignite, @@ -139,7 +140,7 @@ public class Step_8_CV { System.out.println("Train with p: " + bestP + " and maxDeep: " + bestMaxDeep); - IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() .withP(bestP) .fit( ignite, http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java index 113fe56..c06f089 100644 --- a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java @@ -22,6 +22,7 @@ import java.util.Arrays; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.nn.UpdatesStrategy; import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate; @@ -63,7 +64,7 @@ public class Step_9_Go_to_LogReg { TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>() .split(0.75); - IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>() .encodeFeature(1) .encodeFeature(6) // <--- Changed index here .fit(ignite, @@ -71,13 +72,13 @@ public class Step_9_Go_to_LogReg { featureExtractor ); - IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>() .fit(ignite, dataCache, strEncoderPreprocessor ); - IgniteBiFunction<Integer, Object[], double[]> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>() .fit( ignite, dataCache, @@ -105,7 +106,7 @@ public class Step_9_Go_to_LogReg { for (int locIterations : locIterationsSet) { for (double learningRate : learningRateSet) { - IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() .withP(p) .fit( ignite, @@ -167,7 +168,7 @@ public class Step_9_Go_to_LogReg { + " with locIterations: " + bestLocIterations ); - IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() + IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>() .withP(bestP) .fit( ignite, http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/clustering/kmeans/KMeansTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/kmeans/KMeansTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/clustering/kmeans/KMeansTrainer.java index f65a3fe..c189e1a 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/kmeans/KMeansTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/clustering/kmeans/KMeansTrainer.java @@ -68,7 +68,7 @@ public class KMeansTrainer implements SingleLabelDatasetTrainer<KMeansModel> { * @return Model. */ @Override public <K, V> KMeansModel fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { assert datasetBuilder != null; PartitionDataBuilder<K, V, EmptyContext, LabeledDataset<Double, LabeledVector>> partDataBuilder = new LabeledDatasetPartitionDataBuilderOnHeap<>( http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/composition/BaggingModelTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/BaggingModelTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/BaggingModelTrainer.java index 3d22cc8..514cd21 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/composition/BaggingModelTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/composition/BaggingModelTrainer.java @@ -27,6 +27,7 @@ import org.apache.ignite.ml.Model; import org.apache.ignite.ml.composition.predictionsaggregator.PredictionsAggregator; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.functions.IgniteFunction; import org.apache.ignite.ml.selection.split.mapper.SHA256UniformMapper; @@ -85,7 +86,7 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos /** {@inheritDoc} */ @Override public <K, V> ModelsComposition fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { List<ModelOnFeaturesSubspace> learnedModels = new ArrayList<>(); @@ -104,7 +105,7 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos */ @NotNull private <K, V> ModelOnFeaturesSubspace learnModel( DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { Random rnd = new Random(); @@ -148,14 +149,14 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos * @param featureExtractor Feature extractor. * @param featureMapping Feature mapping. */ - private <K, V> IgniteBiFunction<K, V, double[]> wrapFeatureExtractor( - IgniteBiFunction<K, V, double[]> featureExtractor, + private <K, V> IgniteBiFunction<K, V, Vector> wrapFeatureExtractor( + IgniteBiFunction<K, V, Vector> featureExtractor, Map<Integer, Integer> featureMapping) { - return featureExtractor.andThen((IgniteFunction<double[], double[]>)featureValues -> { + return featureExtractor.andThen((IgniteFunction<Vector, Vector>)featureValues -> { double[] newFeaturesValues = new double[featureMapping.size()]; - featureMapping.forEach((localId, featureValueId) -> newFeaturesValues[localId] = featureValues[featureValueId]); - return newFeaturesValues; + featureMapping.forEach((localId, featureValueId) -> newFeaturesValues[localId] = featureValues.get(featureValueId)); + return VectorUtils.of(newFeaturesValues); }); } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBBinaryClassifierTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBBinaryClassifierTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBBinaryClassifierTrainer.java index b100881..dfd3c75 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBBinaryClassifierTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBBinaryClassifierTrainer.java @@ -25,6 +25,7 @@ import java.util.stream.Collectors; import org.apache.ignite.internal.util.typedef.internal.A; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilder; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.functions.IgniteFunction; import org.apache.ignite.ml.math.functions.IgniteTriFunction; @@ -70,7 +71,7 @@ public abstract class GDBBinaryClassifierTrainer extends GDBTrainer { } /** {@inheritDoc} */ - @Override protected <V, K> void learnLabels(DatasetBuilder<K, V> builder, IgniteBiFunction<K, V, double[]> featureExtractor, + @Override protected <V, K> void learnLabels(DatasetBuilder<K, V> builder, IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lExtractor) { List<Double> uniqLabels = new ArrayList<Double>( http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBRegressionTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBRegressionTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBRegressionTrainer.java index 903d95a..76cefc5 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBRegressionTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBRegressionTrainer.java @@ -18,6 +18,7 @@ package org.apache.ignite.ml.composition.boosting; import org.apache.ignite.ml.dataset.DatasetBuilder; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; /** @@ -38,7 +39,7 @@ public abstract class GDBRegressionTrainer extends GDBTrainer { } /** {@inheritDoc} */ - @Override protected <V, K> void learnLabels(DatasetBuilder<K, V> builder, IgniteBiFunction<K, V, double[]> featureExtractor, + @Override protected <V, K> void learnLabels(DatasetBuilder<K, V> builder, IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lExtractor) { } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBTrainer.java index 41fc32d..dd4d0ad 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBTrainer.java @@ -30,7 +30,6 @@ import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilde import org.apache.ignite.ml.dataset.primitive.context.EmptyContext; import org.apache.ignite.ml.knn.regression.KNNRegressionTrainer; import org.apache.ignite.ml.math.Vector; -import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.functions.IgniteTriFunction; import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer; @@ -80,7 +79,7 @@ abstract class GDBTrainer implements DatasetTrainer<Model<Vector, Double>, Doubl /** {@inheritDoc} */ @Override public <K, V> Model<Vector, Double> fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { learnLabels(datasetBuilder, featureExtractor, lbExtractor); @@ -102,7 +101,7 @@ abstract class GDBTrainer implements DatasetTrainer<Model<Vector, Double>, Doubl IgniteBiFunction<K, V, Double> lbExtractorWrap = (k, v) -> { Double realAnswer = externalLabelToInternal(lbExtractor.apply(k, v)); - Double mdlAnswer = currComposition.apply(VectorUtils.of(featureExtractor.apply(k, v))); + Double mdlAnswer = currComposition.apply(featureExtractor.apply(k, v)); return -lossGradient.apply(sampleSize, realAnswer, mdlAnswer); }; @@ -124,7 +123,7 @@ abstract class GDBTrainer implements DatasetTrainer<Model<Vector, Double>, Doubl * @param lExtractor Labels extractor. */ protected abstract <V, K> void learnLabels(DatasetBuilder<K, V> builder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lExtractor); + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lExtractor); /** * Returns regressor model trainer for one step of GDB. @@ -153,7 +152,7 @@ abstract class GDBTrainer implements DatasetTrainer<Model<Vector, Double>, Doubl * @param lbExtractor Label extractor. */ protected <V, K> IgniteBiTuple<Double, Long> computeInitialValue(DatasetBuilder<K, V> builder, - IgniteBiFunction<K, V, double[]> featureExtractor, + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { try (Dataset<EmptyContext, DecisionTreeData> dataset = builder.build( http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/dataset/DatasetFactory.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/dataset/DatasetFactory.java b/modules/ml/src/main/java/org/apache/ignite/ml/dataset/DatasetFactory.java index 9e580c4..76e1281 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/dataset/DatasetFactory.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/dataset/DatasetFactory.java @@ -31,6 +31,7 @@ import org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetD import org.apache.ignite.ml.dataset.primitive.context.EmptyContext; import org.apache.ignite.ml.dataset.primitive.data.SimpleDatasetData; import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; /** @@ -132,7 +133,7 @@ public class DatasetFactory { */ public static <K, V, C extends Serializable> SimpleDataset<C> createSimpleDataset( DatasetBuilder<K, V> datasetBuilder, PartitionContextBuilder<K, V, C> partCtxBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor) { return create( datasetBuilder, partCtxBuilder, @@ -156,7 +157,7 @@ public class DatasetFactory { */ public static <K, V, C extends Serializable> SimpleDataset<C> createSimpleDataset(Ignite ignite, IgniteCache<K, V> upstreamCache, PartitionContextBuilder<K, V, C> partCtxBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor) { return createSimpleDataset( new CacheBasedDatasetBuilder<>(ignite, upstreamCache), partCtxBuilder, @@ -180,7 +181,7 @@ public class DatasetFactory { */ public static <K, V, C extends Serializable> SimpleLabeledDataset<C> createSimpleLabeledDataset( DatasetBuilder<K, V> datasetBuilder, PartitionContextBuilder<K, V, C> partCtxBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { return create( datasetBuilder, partCtxBuilder, @@ -205,7 +206,7 @@ public class DatasetFactory { */ public static <K, V, C extends Serializable> SimpleLabeledDataset<C> createSimpleLabeledDataset(Ignite ignite, IgniteCache<K, V> upstreamCache, PartitionContextBuilder<K, V, C> partCtxBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { return createSimpleLabeledDataset( new CacheBasedDatasetBuilder<>(ignite, upstreamCache), partCtxBuilder, @@ -226,7 +227,7 @@ public class DatasetFactory { * @return Dataset. */ public static <K, V> SimpleDataset<EmptyContext> createSimpleDataset(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor) { return createSimpleDataset( datasetBuilder, new EmptyContextBuilder<>(), @@ -247,7 +248,7 @@ public class DatasetFactory { * @return Dataset. */ public static <K, V> SimpleDataset<EmptyContext> createSimpleDataset(Ignite ignite, IgniteCache<K, V> upstreamCache, - IgniteBiFunction<K, V, double[]> featureExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor) { return createSimpleDataset( new CacheBasedDatasetBuilder<>(ignite, upstreamCache), featureExtractor @@ -267,7 +268,7 @@ public class DatasetFactory { * @return Dataset. */ public static <K, V> SimpleLabeledDataset<EmptyContext> createSimpleLabeledDataset( - DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor, + DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { return createSimpleLabeledDataset( datasetBuilder, @@ -291,7 +292,7 @@ public class DatasetFactory { * @return Dataset. */ public static <K, V> SimpleLabeledDataset<EmptyContext> createSimpleLabeledDataset(Ignite ignite, - IgniteCache<K, V> upstreamCache, IgniteBiFunction<K, V, double[]> featureExtractor, + IgniteCache<K, V> upstreamCache, IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { return createSimpleLabeledDataset( new CacheBasedDatasetBuilder<>(ignite, upstreamCache), @@ -341,7 +342,7 @@ public class DatasetFactory { */ public static <K, V, C extends Serializable> SimpleDataset<C> createSimpleDataset(Map<K, V> upstreamMap, int partitions, PartitionContextBuilder<K, V, C> partCtxBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor) { return createSimpleDataset( new LocalDatasetBuilder<>(upstreamMap, partitions), partCtxBuilder, @@ -366,7 +367,7 @@ public class DatasetFactory { */ public static <K, V, C extends Serializable> SimpleLabeledDataset<C> createSimpleLabeledDataset( Map<K, V> upstreamMap, int partitions, PartitionContextBuilder<K, V, C> partCtxBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { return createSimpleLabeledDataset( new LocalDatasetBuilder<>(upstreamMap, partitions), partCtxBuilder, @@ -387,7 +388,7 @@ public class DatasetFactory { * @return Dataset. */ public static <K, V> SimpleDataset<EmptyContext> createSimpleDataset(Map<K, V> upstreamMap, int partitions, - IgniteBiFunction<K, V, double[]> featureExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor) { return createSimpleDataset( new LocalDatasetBuilder<>(upstreamMap, partitions), featureExtractor @@ -408,7 +409,7 @@ public class DatasetFactory { * @return Dataset. */ public static <K, V> SimpleLabeledDataset<EmptyContext> createSimpleLabeledDataset(Map<K, V> upstreamMap, - int partitions, IgniteBiFunction<K, V, double[]> featureExtractor, + int partitions, IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { return createSimpleLabeledDataset( new LocalDatasetBuilder<>(upstreamMap, partitions), http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleDatasetDataBuilder.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleDatasetDataBuilder.java b/modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleDatasetDataBuilder.java index dc7d8cb..f02a71a 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleDatasetDataBuilder.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleDatasetDataBuilder.java @@ -22,6 +22,7 @@ import java.util.Iterator; import org.apache.ignite.ml.dataset.PartitionDataBuilder; import org.apache.ignite.ml.dataset.UpstreamEntry; import org.apache.ignite.ml.dataset.primitive.data.SimpleDatasetData; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; /** @@ -37,14 +38,14 @@ public class SimpleDatasetDataBuilder<K, V, C extends Serializable> private static final long serialVersionUID = 756800193212149975L; /** Function that extracts features from an {@code upstream} data. */ - private final IgniteBiFunction<K, V, double[]> featureExtractor; + private final IgniteBiFunction<K, V, Vector> featureExtractor; /** * Construct a new instance of partition {@code data} builder that makes {@link SimpleDatasetData}. * * @param featureExtractor Function that extracts features from an {@code upstream} data. */ - public SimpleDatasetDataBuilder(IgniteBiFunction<K, V, double[]> featureExtractor) { + public SimpleDatasetDataBuilder(IgniteBiFunction<K, V, Vector> featureExtractor) { this.featureExtractor = featureExtractor; } @@ -57,17 +58,17 @@ public class SimpleDatasetDataBuilder<K, V, C extends Serializable> int ptr = 0; while (upstreamData.hasNext()) { UpstreamEntry<K, V> entry = upstreamData.next(); - double[] row = featureExtractor.apply(entry.getKey(), entry.getValue()); + Vector row = featureExtractor.apply(entry.getKey(), entry.getValue()); if (cols < 0) { - cols = row.length; + cols = row.size(); features = new double[Math.toIntExact(upstreamDataSize * cols)]; } else - assert row.length == cols : "Feature extractor must return exactly " + cols + " features"; + assert row.size() == cols : "Feature extractor must return exactly " + cols + " features"; for (int i = 0; i < cols; i++) - features[Math.toIntExact(i * upstreamDataSize + ptr)] = row[i]; + features[Math.toIntExact(i * upstreamDataSize + ptr)] = row.get(i); ptr++; } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleLabeledDatasetDataBuilder.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleLabeledDatasetDataBuilder.java b/modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleLabeledDatasetDataBuilder.java index d301bbe..d110df8 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleLabeledDatasetDataBuilder.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleLabeledDatasetDataBuilder.java @@ -22,6 +22,7 @@ import java.util.Iterator; import org.apache.ignite.ml.dataset.PartitionDataBuilder; import org.apache.ignite.ml.dataset.UpstreamEntry; import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; /** @@ -37,7 +38,7 @@ public class SimpleLabeledDatasetDataBuilder<K, V, C extends Serializable> private static final long serialVersionUID = 3678784980215216039L; /** Function that extracts features from an {@code upstream} data. */ - private final IgniteBiFunction<K, V, double[]> featureExtractor; + private final IgniteBiFunction<K, V, Vector> featureExtractor; /** Function that extracts labels from an {@code upstream} data. */ private final IgniteBiFunction<K, V, double[]> lbExtractor; @@ -48,7 +49,7 @@ public class SimpleLabeledDatasetDataBuilder<K, V, C extends Serializable> * @param featureExtractor Function that extracts features from an {@code upstream} data. * @param lbExtractor Function that extracts labels from an {@code upstream} data. */ - public SimpleLabeledDatasetDataBuilder(IgniteBiFunction<K, V, double[]> featureExtractor, + public SimpleLabeledDatasetDataBuilder(IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { this.featureExtractor = featureExtractor; this.lbExtractor = lbExtractor; @@ -67,18 +68,18 @@ public class SimpleLabeledDatasetDataBuilder<K, V, C extends Serializable> while (upstreamData.hasNext()) { UpstreamEntry<K, V> entry = upstreamData.next(); - double[] featureRow = featureExtractor.apply(entry.getKey(), entry.getValue()); + Vector featureRow = featureExtractor.apply(entry.getKey(), entry.getValue()); if (featureCols < 0) { - featureCols = featureRow.length; + featureCols = featureRow.size(); features = new double[Math.toIntExact(upstreamDataSize * featureCols)]; } else - assert featureRow.length == featureCols : "Feature extractor must return exactly " + featureCols + assert featureRow.size() == featureCols : "Feature extractor must return exactly " + featureCols + " features"; for (int i = 0; i < featureCols; i++) - features[Math.toIntExact(i * upstreamDataSize) + ptr] = featureRow[i]; + features[Math.toIntExact(i * upstreamDataSize) + ptr] = featureRow.get(i); double[] lbRow = lbExtractor.apply(entry.getKey(), entry.getValue()); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/knn/KNNUtils.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/knn/KNNUtils.java b/modules/ml/src/main/java/org/apache/ignite/ml/knn/KNNUtils.java index 716eb52..2ba919a 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/knn/KNNUtils.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/knn/KNNUtils.java @@ -21,6 +21,7 @@ import org.apache.ignite.ml.dataset.Dataset; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.dataset.PartitionDataBuilder; import org.apache.ignite.ml.dataset.primitive.context.EmptyContext; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.structures.LabeledDataset; import org.apache.ignite.ml.structures.LabeledVector; @@ -39,7 +40,7 @@ public class KNNUtils { * @param lbExtractor Label extractor. * @return Dataset. */ - @Nullable public static <K, V> Dataset<EmptyContext, LabeledDataset<Double, LabeledVector>> buildDataset(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { + @Nullable public static <K, V> Dataset<EmptyContext, LabeledDataset<Double, LabeledVector>> buildDataset(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { PartitionDataBuilder<K, V, EmptyContext, LabeledDataset<Double, LabeledVector>> partDataBuilder = new LabeledDatasetPartitionDataBuilderOnHeap<>( featureExtractor, http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/knn/classification/KNNClassificationTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/knn/classification/KNNClassificationTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/knn/classification/KNNClassificationTrainer.java index 98507d8..c84bfd8 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/knn/classification/KNNClassificationTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/knn/classification/KNNClassificationTrainer.java @@ -19,6 +19,7 @@ package org.apache.ignite.ml.knn.classification; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.knn.KNNUtils; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer; @@ -35,7 +36,7 @@ public class KNNClassificationTrainer implements SingleLabelDatasetTrainer<KNNCl * @return Model. */ @Override public <K, V> KNNClassificationModel fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { return new KNNClassificationModel(KNNUtils.buildDataset(datasetBuilder, featureExtractor, lbExtractor)); } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionTrainer.java index 84a217a..4960370 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionTrainer.java @@ -19,6 +19,7 @@ package org.apache.ignite.ml.knn.regression; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.knn.KNNUtils; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer; @@ -35,7 +36,7 @@ public class KNNRegressionTrainer implements SingleLabelDatasetTrainer<KNNRegres * @return Model. */ public <K, V> KNNRegressionModel fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { return new KNNRegressionModel(KNNUtils.buildDataset(datasetBuilder, featureExtractor, lbExtractor)); } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/math/Vector.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/Vector.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/Vector.java index 391c5fe..d09cc65 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/Vector.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/math/Vector.java @@ -505,4 +505,13 @@ public interface Vector extends MetaAttributes, Externalizable, StorageOpsMetric * @param f Function used for replacing. **/ public void compute(int i, IgniteIntDoubleToDoubleBiFunction f); + + + /** + * Returns array of doubles corresponds to vector components. + * @return Array of doubles. + */ + public default double[] asArray() { + return getStorage().data(); + } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java index d12a276..f78e6ea 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java @@ -17,6 +17,10 @@ package org.apache.ignite.ml.nn; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; import org.apache.ignite.ml.dataset.Dataset; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilder; @@ -35,11 +39,6 @@ import org.apache.ignite.ml.optimization.updatecalculators.ParameterUpdateCalcul import org.apache.ignite.ml.trainers.MultiLabelDatasetTrainer; import org.apache.ignite.ml.util.Utils; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - /** * Multilayer perceptron trainer based on partition based {@link Dataset}. * @@ -110,7 +109,7 @@ public class MLPTrainer<P extends Serializable> implements MultiLabelDatasetTrai /** {@inheritDoc} */ public <K, V> MultilayerPerceptron fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) { try (Dataset<EmptyContext, SimpleLabeledDatasetData> dataset = datasetBuilder.build( new EmptyContextBuilder<>(), http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationPreprocessor.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationPreprocessor.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationPreprocessor.java index 0d482d9..140511b 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationPreprocessor.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationPreprocessor.java @@ -17,6 +17,7 @@ package org.apache.ignite.ml.preprocessing.binarization; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; /** @@ -28,7 +29,7 @@ import org.apache.ignite.ml.math.functions.IgniteBiFunction; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class BinarizationPreprocessor<K, V> implements IgniteBiFunction<K, V, double[]> { +public class BinarizationPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> { /** */ private static final long serialVersionUID = 6877811577892621239L; @@ -36,7 +37,7 @@ public class BinarizationPreprocessor<K, V> implements IgniteBiFunction<K, V, do private final double threshold; /** Base preprocessor. */ - private final IgniteBiFunction<K, V, double[]> basePreprocessor; + private final IgniteBiFunction<K, V, Vector> basePreprocessor; /** * Constructs a new instance of Binarization preprocessor. @@ -44,7 +45,7 @@ public class BinarizationPreprocessor<K, V> implements IgniteBiFunction<K, V, do * @param threshold Threshold value. * @param basePreprocessor Base preprocessor. */ - public BinarizationPreprocessor(double threshold, IgniteBiFunction<K, V, double[]> basePreprocessor) { + public BinarizationPreprocessor(double threshold, IgniteBiFunction<K, V, Vector> basePreprocessor) { this.threshold = threshold; this.basePreprocessor = basePreprocessor; } @@ -56,12 +57,12 @@ public class BinarizationPreprocessor<K, V> implements IgniteBiFunction<K, V, do * @param v Value. * @return Preprocessed row. */ - @Override public double[] apply(K k, V v) { - double[] res = basePreprocessor.apply(k, v); + @Override public Vector apply(K k, V v) { + Vector res = basePreprocessor.apply(k, v); - for (int i = 0; i < res.length; i++) { - if(res[i] > threshold) res[i] = 1.0; - else res[i] = 0.0; + for (int i = 0; i < res.size(); i++) { + if(res.get(i) > threshold) res.set(i, 1.0); + else res.set(i, 0.0); } return res; http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationTrainer.java index abbf644..c9c0b90 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationTrainer.java @@ -18,6 +18,7 @@ package org.apache.ignite.ml.preprocessing.binarization; import org.apache.ignite.ml.dataset.DatasetBuilder; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.PreprocessingTrainer; @@ -27,13 +28,13 @@ import org.apache.ignite.ml.preprocessing.PreprocessingTrainer; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class BinarizationTrainer<K, V> implements PreprocessingTrainer<K, V, double[], double[]> { +public class BinarizationTrainer<K, V> implements PreprocessingTrainer<K, V, Vector, Vector> { /** Threshold. */ private double threshold; /** {@inheritDoc} */ @Override public BinarizationPreprocessor<K, V> fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> basePreprocessor) { + IgniteBiFunction<K, V, Vector> basePreprocessor) { return new BinarizationPreprocessor<>(threshold, basePreprocessor); } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java index 275de13..cdd980b 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java @@ -19,6 +19,8 @@ package org.apache.ignite.ml.preprocessing.encoding.stringencoder; import java.util.Map; import java.util.Set; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.exceptions.preprocessing.UnknownStringValue; import org.apache.ignite.ml.math.functions.IgniteBiFunction; @@ -28,7 +30,7 @@ import org.apache.ignite.ml.math.functions.IgniteBiFunction; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class StringEncoderPreprocessor<K, V> implements IgniteBiFunction<K, V, double[]> { +public class StringEncoderPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> { /** */ private static final long serialVersionUID = 6237812226382623469L; /** */ @@ -63,7 +65,7 @@ public class StringEncoderPreprocessor<K, V> implements IgniteBiFunction<K, V, d * @param v Value. * @return Preprocessed row. */ - @Override public double[] apply(K k, V v) { + @Override public Vector apply(K k, V v) { Object[] tmp = basePreprocessor.apply(k, v); double[] res = new double[tmp.length]; @@ -79,6 +81,6 @@ public class StringEncoderPreprocessor<K, V> implements IgniteBiFunction<K, V, d } else res[i] = (double)tmpObj; } - return res; + return VectorUtils.of(res); } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderTrainer.java index 8ed073c..f52f0bb 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderTrainer.java @@ -27,6 +27,7 @@ import org.apache.ignite.ml.dataset.Dataset; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.dataset.UpstreamEntry; import org.apache.ignite.ml.dataset.primitive.context.EmptyContext; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.PreprocessingTrainer; import org.jetbrains.annotations.NotNull; @@ -39,7 +40,7 @@ import org.jetbrains.annotations.NotNull; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class StringEncoderTrainer<K, V> implements PreprocessingTrainer<K, V, Object[], double[]> { +public class StringEncoderTrainer<K, V> implements PreprocessingTrainer<K, V, Object[], Vector> { /** Indices of features which should be encoded. */ private Set<Integer> handledIndices = new HashSet<>(); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerPreprocessor.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerPreprocessor.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerPreprocessor.java index 95344ee..469163f 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerPreprocessor.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerPreprocessor.java @@ -17,6 +17,7 @@ package org.apache.ignite.ml.preprocessing.imputing; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; /** @@ -25,23 +26,23 @@ import org.apache.ignite.ml.math.functions.IgniteBiFunction; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class ImputerPreprocessor<K, V> implements IgniteBiFunction<K, V, double[]> { +public class ImputerPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> { /** */ private static final long serialVersionUID = 6887800576392623469L; /** Filling values. */ - private final double[] imputingValues; + private final Vector imputingValues; /** Base preprocessor. */ - private final IgniteBiFunction<K, V, double[]> basePreprocessor; + private final IgniteBiFunction<K, V, Vector> basePreprocessor; /** * Constructs a new instance of imputing preprocessor. * * @param basePreprocessor Base preprocessor. */ - public ImputerPreprocessor(double[] imputingValues, - IgniteBiFunction<K, V, double[]> basePreprocessor) { + public ImputerPreprocessor(Vector imputingValues, + IgniteBiFunction<K, V, Vector> basePreprocessor) { this.imputingValues = imputingValues; this.basePreprocessor = basePreprocessor; } @@ -53,14 +54,14 @@ public class ImputerPreprocessor<K, V> implements IgniteBiFunction<K, V, double[ * @param v Value. * @return Preprocessed row. */ - @Override public double[] apply(K k, V v) { - double[] res = basePreprocessor.apply(k, v); + @Override public Vector apply(K k, V v) { + Vector res = basePreprocessor.apply(k, v); - assert res.length == imputingValues.length; + assert res.size() == imputingValues.size(); - for (int i = 0; i < res.length; i++) { - if (Double.valueOf(res[i]).equals(Double.NaN)) - res[i] = imputingValues[i]; + for (int i = 0; i < res.size(); i++) { + if (Double.valueOf(res.get(i)).equals(Double.NaN)) + res.set(i, imputingValues.get(i)); } return res; } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerTrainer.java index 7d3a161..5ad6bd0 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerTrainer.java @@ -25,6 +25,8 @@ import org.apache.ignite.ml.dataset.Dataset; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.dataset.UpstreamEntry; import org.apache.ignite.ml.dataset.primitive.context.EmptyContext; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.PreprocessingTrainer; @@ -36,13 +38,13 @@ import org.apache.ignite.ml.preprocessing.PreprocessingTrainer; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[], double[]> { +public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, Vector, Vector> { /** The imputing strategy. */ private ImputingStrategy imputingStgy = ImputingStrategy.MEAN; /** {@inheritDoc} */ @Override public ImputerPreprocessor<K, V> fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> basePreprocessor) { + IgniteBiFunction<K, V, Vector> basePreprocessor) { try (Dataset<EmptyContext, ImputerPartitionData> dataset = datasetBuilder.build( (upstream, upstreamSize) -> new EmptyContext(), (upstream, upstreamSize, ctx) -> { @@ -52,7 +54,7 @@ public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[] while (upstream.hasNext()) { UpstreamEntry<K, V> entity = upstream.next(); - double[] row = basePreprocessor.apply(entity.getKey(), entity.getValue()); + Vector row = basePreprocessor.apply(entity.getKey(), entity.getValue()); switch (imputingStgy) { case MEAN: @@ -81,14 +83,14 @@ public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[] } )) { - double[] imputingValues; + Vector imputingValues; switch (imputingStgy) { case MEAN: - imputingValues = calculateImputingValuesBySumsAndCounts(dataset); + imputingValues = VectorUtils.of(calculateImputingValuesBySumsAndCounts(dataset)); break; case MOST_FREQUENT: - imputingValues = calculateImputingValuesByFrequencies(dataset); + imputingValues = VectorUtils.of(calculateImputingValuesByFrequencies(dataset)); break; default: throw new UnsupportedOperationException("The chosen strategy is not supported"); } @@ -200,17 +202,17 @@ public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[] * @param valuesByFreq Holds the sums by values and features. * @return Updated sums by values and features. */ - private Map<Double, Integer>[] calculateFrequencies(double[] row, Map<Double, Integer>[] valuesByFreq) { + private Map<Double, Integer>[] calculateFrequencies(Vector row, Map<Double, Integer>[] valuesByFreq) { if (valuesByFreq == null) { - valuesByFreq = new HashMap[row.length]; + valuesByFreq = new HashMap[row.size()]; for (int i = 0; i < valuesByFreq.length; i++) valuesByFreq[i] = new HashMap<>(); } else - assert valuesByFreq.length == row.length : "Base preprocessor must return exactly " + valuesByFreq.length + assert valuesByFreq.length == row.size() : "Base preprocessor must return exactly " + valuesByFreq.length + " features"; for (int i = 0; i < valuesByFreq.length; i++) { - double v = row[i]; + double v = row.get(i); if(!Double.valueOf(v).equals(Double.NaN)) { Map<Double, Integer> map = valuesByFreq[i]; @@ -231,16 +233,16 @@ public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[] * @param sums Holds the sums by features. * @return Updated sums by features. */ - private double[] calculateTheSums(double[] row, double[] sums) { + private double[] calculateTheSums(Vector row, double[] sums) { if (sums == null) - sums = new double[row.length]; + sums = new double[row.size()]; else - assert sums.length == row.length : "Base preprocessor must return exactly " + sums.length + assert sums.length == row.size() : "Base preprocessor must return exactly " + sums.length + " features"; for (int i = 0; i < sums.length; i++){ - if(!Double.valueOf(row[i]).equals(Double.NaN)) - sums[i] += row[i]; + if(!Double.valueOf(row.get(i)).equals(Double.NaN)) + sums[i] += row.get(i); } return sums; @@ -253,15 +255,15 @@ public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[] * @param counts Holds the counts by features. * @return Updated counts by features. */ - private int[] calculateTheCounts(double[] row, int[] counts) { + private int[] calculateTheCounts(Vector row, int[] counts) { if (counts == null) - counts = new int[row.length]; + counts = new int[row.size()]; else - assert counts.length == row.length : "Base preprocessor must return exactly " + counts.length + assert counts.length == row.size() : "Base preprocessor must return exactly " + counts.length + " features"; for (int i = 0; i < counts.length; i++){ - if(!Double.valueOf(row[i]).equals(Double.NaN)) + if(!Double.valueOf(row.get(i)).equals(Double.NaN)) counts[i]++; } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerPreprocessor.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerPreprocessor.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerPreprocessor.java index f75f927..92cb190 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerPreprocessor.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerPreprocessor.java @@ -17,6 +17,7 @@ package org.apache.ignite.ml.preprocessing.minmaxscaling; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; /** @@ -31,7 +32,7 @@ import org.apache.ignite.ml.math.functions.IgniteBiFunction; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, double[]> { +public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> { /** */ private static final long serialVersionUID = 6997800576392623469L; @@ -42,7 +43,7 @@ public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, do private final double[] max; /** Base preprocessor. */ - private final IgniteBiFunction<K, V, double[]> basePreprocessor; + private final IgniteBiFunction<K, V, Vector> basePreprocessor; /** * Constructs a new instance of minmaxscaling preprocessor. @@ -51,7 +52,7 @@ public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, do * @param max Maximum values. * @param basePreprocessor Base preprocessor. */ - public MinMaxScalerPreprocessor(double[] min, double[] max, IgniteBiFunction<K, V, double[]> basePreprocessor) { + public MinMaxScalerPreprocessor(double[] min, double[] max, IgniteBiFunction<K, V, Vector> basePreprocessor) { this.min = min; this.max = max; this.basePreprocessor = basePreprocessor; @@ -64,14 +65,14 @@ public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, do * @param v Value. * @return Preprocessed row. */ - @Override public double[] apply(K k, V v) { - double[] res = basePreprocessor.apply(k, v); + @Override public Vector apply(K k, V v) { + Vector res = basePreprocessor.apply(k, v); - assert res.length == min.length; - assert res.length == max.length; + assert res.size() == min.length; + assert res.size() == max.length; - for (int i = 0; i < res.length; i++) - res[i] = (res[i] - min[i]) / (max[i] - min[i]); + for (int i = 0; i < res.size(); i++) + res.set(i, (res.get(i) - min[i]) / (max[i] - min[i])); return res; } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerTrainer.java index c8b547f..9b218fb 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerTrainer.java @@ -21,6 +21,7 @@ import org.apache.ignite.ml.dataset.Dataset; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.dataset.UpstreamEntry; import org.apache.ignite.ml.dataset.primitive.context.EmptyContext; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.PreprocessingTrainer; @@ -30,10 +31,10 @@ import org.apache.ignite.ml.preprocessing.PreprocessingTrainer; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class MinMaxScalerTrainer<K, V> implements PreprocessingTrainer<K, V, double[], double[]> { +public class MinMaxScalerTrainer<K, V> implements PreprocessingTrainer<K, V, Vector, Vector> { /** {@inheritDoc} */ @Override public MinMaxScalerPreprocessor<K, V> fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> basePreprocessor) { + IgniteBiFunction<K, V, Vector> basePreprocessor) { try (Dataset<EmptyContext, MinMaxScalerPartitionData> dataset = datasetBuilder.build( (upstream, upstreamSize) -> new EmptyContext(), (upstream, upstreamSize, ctx) -> { @@ -42,31 +43,31 @@ public class MinMaxScalerTrainer<K, V> implements PreprocessingTrainer<K, V, dou while (upstream.hasNext()) { UpstreamEntry<K, V> entity = upstream.next(); - double[] row = basePreprocessor.apply(entity.getKey(), entity.getValue()); + Vector row = basePreprocessor.apply(entity.getKey(), entity.getValue()); if (min == null) { - min = new double[row.length]; + min = new double[row.size()]; for (int i = 0; i < min.length; i++) min[i] = Double.MAX_VALUE; } else - assert min.length == row.length : "Base preprocessor must return exactly " + min.length + assert min.length == row.size() : "Base preprocessor must return exactly " + min.length + " features"; if (max == null) { - max = new double[row.length]; + max = new double[row.size()]; for (int i = 0; i < max.length; i++) max[i] = -Double.MAX_VALUE; } else - assert max.length == row.length : "Base preprocessor must return exactly " + min.length + assert max.length == row.size() : "Base preprocessor must return exactly " + min.length + " features"; - for (int i = 0; i < row.length; i++) { - if (row[i] < min[i]) - min[i] = row[i]; - if (row[i] > max[i]) - max[i] = row[i]; + for (int i = 0; i < row.size(); i++) { + if (row.get(i) < min[i]) + min[i] = row.get(i); + if (row.get(i) > max[i]) + max[i] = row.get(i); } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationPreprocessor.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationPreprocessor.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationPreprocessor.java index 89186e0..65b4fff 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationPreprocessor.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationPreprocessor.java @@ -17,6 +17,7 @@ package org.apache.ignite.ml.preprocessing.normalization; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.Functions; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.functions.IgniteDoubleFunction; @@ -31,7 +32,7 @@ import org.apache.ignite.ml.math.functions.IgniteDoubleFunction; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, double[]> { +public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> { /** */ private static final long serialVersionUID = 6873438115778921295L; @@ -39,7 +40,7 @@ public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, d private int p = 2; /** Base preprocessor. */ - private final IgniteBiFunction<K, V, double[]> basePreprocessor; + private final IgniteBiFunction<K, V, Vector> basePreprocessor; /** * Constructs a new instance of Normalization preprocessor. @@ -47,7 +48,7 @@ public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, d * @param p Degree of L^p space value. * @param basePreprocessor Base preprocessor. */ - public NormalizationPreprocessor(int p, IgniteBiFunction<K, V, double[]> basePreprocessor) { + public NormalizationPreprocessor(int p, IgniteBiFunction<K, V, Vector> basePreprocessor) { this.p = p; this.basePreprocessor = basePreprocessor; } @@ -59,13 +60,13 @@ public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, d * @param v Value. * @return Preprocessed row. */ - @Override public double[] apply(K k, V v) { - double[] res = basePreprocessor.apply(k, v); + @Override public Vector apply(K k, V v) { + Vector res = basePreprocessor.apply(k, v); double pNorm = Math.pow(foldMap(res, Functions.PLUS, Functions.pow(p), 0d), 1.0 / p); - for (int i = 0; i < res.length; i++) - res[i] /= pNorm; + for (int i = 0; i < res.size(); i++) + res.set(i, res.get(i) / pNorm); return res; } @@ -79,9 +80,9 @@ public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, d * @param zero Zero value for fold operation. * @return Folded value of this vector. */ - private double foldMap(double[] vec, IgniteBiFunction<Double,Double,Double> foldFun, IgniteDoubleFunction<Double> mapFun, double zero) { - for (double feature : vec) - zero = foldFun.apply(zero, mapFun.apply(feature)); + private double foldMap(Vector vec, IgniteBiFunction<Double,Double,Double> foldFun, IgniteDoubleFunction<Double> mapFun, double zero) { + for (int i = 0; i< vec.size(); i++) + zero = foldFun.apply(zero, mapFun.apply(vec.get(i))); return zero; } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationTrainer.java index 5db4218..5ffcb33 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationTrainer.java @@ -18,6 +18,7 @@ package org.apache.ignite.ml.preprocessing.normalization; import org.apache.ignite.ml.dataset.DatasetBuilder; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.preprocessing.PreprocessingTrainer; @@ -27,13 +28,13 @@ import org.apache.ignite.ml.preprocessing.PreprocessingTrainer; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class NormalizationTrainer<K, V> implements PreprocessingTrainer<K, V, double[], double[]> { +public class NormalizationTrainer<K, V> implements PreprocessingTrainer<K, V, Vector, Vector> { /** Normalization in L^p space. Must be greater than 0. Default value is 2. */ private int p = 2; /** {@inheritDoc} */ @Override public NormalizationPreprocessor<K, V> fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> basePreprocessor) { + IgniteBiFunction<K, V, Vector> basePreprocessor) { return new NormalizationPreprocessor<>(p, basePreprocessor); } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/FeatureExtractorWrapper.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/FeatureExtractorWrapper.java b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/FeatureExtractorWrapper.java index 8e8f467..dfb1282 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/FeatureExtractorWrapper.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/FeatureExtractorWrapper.java @@ -17,9 +17,10 @@ package org.apache.ignite.ml.regressions.linear; -import org.apache.ignite.ml.math.functions.IgniteBiFunction; - import java.util.Arrays; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.VectorUtils; +import org.apache.ignite.ml.math.functions.IgniteBiFunction; /** * Feature extractor wrapper that adds additional column filled by 1. @@ -27,29 +28,29 @@ import java.util.Arrays; * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ -public class FeatureExtractorWrapper<K, V> implements IgniteBiFunction<K, V, double[]> { +public class FeatureExtractorWrapper<K, V> implements IgniteBiFunction<K, V, Vector> { /** */ private static final long serialVersionUID = -2686524650955735635L; /** Underlying feature extractor. */ - private final IgniteBiFunction<K, V, double[]> featureExtractor; + private final IgniteBiFunction<K, V, Vector> featureExtractor; /** * Constructs a new instance of feature extractor wrapper. * * @param featureExtractor Underlying feature extractor. */ - FeatureExtractorWrapper(IgniteBiFunction<K, V, double[]> featureExtractor) { + FeatureExtractorWrapper(IgniteBiFunction<K, V, Vector> featureExtractor) { this.featureExtractor = featureExtractor; } /** {@inheritDoc} */ - @Override public double[] apply(K k, V v) { - double[] featureRow = featureExtractor.apply(k, v); + @Override public Vector apply(K k, V v) { + double[] featureRow = featureExtractor.apply(k, v).asArray(); double[] row = Arrays.copyOf(featureRow, featureRow.length + 1); row[featureRow.length] = 1.0; - return row; + return VectorUtils.of(row); } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionLSQRTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionLSQRTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionLSQRTrainer.java index 095aa31..36285e6 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionLSQRTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionLSQRTrainer.java @@ -36,7 +36,7 @@ import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer; public class LinearRegressionLSQRTrainer implements SingleLabelDatasetTrainer<LinearRegressionModel> { /** {@inheritDoc} */ @Override public <K, V> LinearRegressionModel fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { LSQRResult res; http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionSGDTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionSGDTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionSGDTrainer.java index 98b8885..674d208 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionSGDTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionSGDTrainer.java @@ -17,23 +17,23 @@ package org.apache.ignite.ml.regressions.linear; +import java.io.Serializable; +import java.util.Arrays; import org.apache.ignite.ml.dataset.Dataset; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.dataset.primitive.context.EmptyContext; import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.functions.IgniteFunction; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; import org.apache.ignite.ml.nn.Activators; import org.apache.ignite.ml.nn.MLPTrainer; import org.apache.ignite.ml.nn.MultilayerPerceptron; +import org.apache.ignite.ml.nn.UpdatesStrategy; import org.apache.ignite.ml.nn.architecture.MLPArchitecture; import org.apache.ignite.ml.optimization.LossFunctions; import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer; -import org.apache.ignite.ml.nn.UpdatesStrategy; - -import java.io.Serializable; -import java.util.Arrays; /** * Trainer of the linear regression model based on stochastic gradient descent algorithm. @@ -74,7 +74,7 @@ public class LinearRegressionSGDTrainer<P extends Serializable> implements Singl /** {@inheritDoc} */ @Override public <K, V> LinearRegressionModel fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { IgniteFunction<Dataset<EmptyContext, SimpleLabeledDatasetData>, MLPArchitecture> archSupplier = dataset -> { http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/binomial/LogisticRegressionSGDTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/binomial/LogisticRegressionSGDTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/binomial/LogisticRegressionSGDTrainer.java index 8fe57cf..80773a6 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/binomial/LogisticRegressionSGDTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/binomial/LogisticRegressionSGDTrainer.java @@ -17,10 +17,13 @@ package org.apache.ignite.ml.regressions.logistic.binomial; +import java.io.Serializable; +import java.util.Arrays; import org.apache.ignite.ml.dataset.Dataset; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.dataset.primitive.context.EmptyContext; import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.math.functions.IgniteFunction; import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; @@ -32,9 +35,6 @@ import org.apache.ignite.ml.nn.architecture.MLPArchitecture; import org.apache.ignite.ml.optimization.LossFunctions; import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer; -import java.io.Serializable; -import java.util.Arrays; - /** * Trainer of the logistic regression model based on stochastic gradient descent algorithm. */ @@ -74,7 +74,7 @@ public class LogisticRegressionSGDTrainer<P extends Serializable> implements Sin /** {@inheritDoc} */ @Override public <K, V> LogisticRegressionModel fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { IgniteFunction<Dataset<EmptyContext, SimpleLabeledDatasetData>, MLPArchitecture> archSupplier = dataset -> { http://git-wip-us.apache.org/repos/asf/ignite/blob/fa56a584/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/multiclass/LogRegressionMultiClassTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/multiclass/LogRegressionMultiClassTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/multiclass/LogRegressionMultiClassTrainer.java index e8ed67b..b67ac27 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/multiclass/LogRegressionMultiClassTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/multiclass/LogRegressionMultiClassTrainer.java @@ -29,6 +29,7 @@ import org.apache.ignite.ml.dataset.Dataset; import org.apache.ignite.ml.dataset.DatasetBuilder; import org.apache.ignite.ml.dataset.PartitionDataBuilder; import org.apache.ignite.ml.dataset.primitive.context.EmptyContext; +import org.apache.ignite.ml.math.Vector; import org.apache.ignite.ml.math.functions.IgniteBiFunction; import org.apache.ignite.ml.nn.MultilayerPerceptron; import org.apache.ignite.ml.nn.UpdatesStrategy; @@ -66,7 +67,7 @@ public class LogRegressionMultiClassTrainer<P extends Serializable> * @return Model. */ @Override public <K, V> LogRegressionMultiClassModel fit(DatasetBuilder<K, V> datasetBuilder, - IgniteBiFunction<K, V, double[]> featureExtractor, + IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) { List<Double> classes = extractClassLabels(datasetBuilder, lbExtractor);
