IGNITE-9146: Analyse and improve code coverage in ML module this closes #4489
Project: http://git-wip-us.apache.org/repos/asf/ignite/repo Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/25f83819 Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/25f83819 Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/25f83819 Branch: refs/heads/master Commit: 25f83819a94a6dd75f4cc7cd6b08f74d7c551ace Parents: 7ac2d4d Author: Oleg Ignatenko <[email protected]> Authored: Wed Aug 8 16:13:30 2018 +0300 Committer: Yury Babak <[email protected]> Committed: Wed Aug 8 16:13:30 2018 +0300 ---------------------------------------------------------------------- .../ImputingExampleWithMostFrequentValues.java | 102 ---- .../ImputingWithMostFrequentValuesExample.java | 102 ++++ .../LogisticRegressionSGDTrainerExample.java | 239 ++++++++ .../LogisticRegressionSGDTrainerSample.java | 239 -------- .../ml/tutorial/TutorialStepByStepExample.java | 39 ++ .../environment/LearningEnvironmentBuilder.java | 2 + .../org/apache/ignite/ml/math/Isomorphism.java | 69 --- .../org/apache/ignite/ml/math/MurmurHash.java | 247 -------- .../org/apache/ignite/ml/math/Precision.java | 588 ------------------- .../ml/math/distances/EuclideanDistance.java | 7 +- .../ml/math/distances/HammingDistance.java | 5 + .../ml/math/distances/ManhattanDistance.java | 5 + .../exceptions/MathArithmeticException.java | 45 -- .../knn/SmallTrainingDatasetSizeException.java | 38 -- .../ml/math/isolve/lsqr/AbstractLSQR.java | 32 +- .../ignite/ml/math/isolve/lsqr/LSQRResult.java | 16 +- .../ignite/ml/nn/MultilayerPerceptron.java | 2 +- .../apache/ignite/ml/structures/DatasetRow.java | 3 +- .../org/apache/ignite/ml/IgniteMLTestSuite.java | 15 +- .../java/org/apache/ignite/ml/TestUtils.java | 132 +++-- .../ignite/ml/clustering/KMeansModelTest.java | 2 + .../ignite/ml/clustering/KMeansTrainerTest.java | 12 +- .../ignite/ml/common/CollectionsTest.java | 136 +++++ .../ignite/ml/common/CommonTestSuite.java | 32 + .../ignite/ml/common/ExternalizeTest.java | 101 ++++ .../ml/composition/CompositionTestSuite.java | 38 ++ .../ml/composition/boosting/GDBTrainerTest.java | 12 +- .../WeightedPredictionsAggregatorTest.java | 5 + .../ignite/ml/dataset/DatasetTestSuite.java | 6 +- .../impl/cache/CacheBasedDatasetTest.java | 5 +- .../impl/local/LocalDatasetBuilderTest.java | 46 +- .../ml/dataset/primitive/SimpleDatasetTest.java | 105 ++++ .../primitive/SimpleLabeledDatasetTest.java | 112 ++++ .../ml/environment/EnvironmentTestSuite.java | 32 + .../LearningEnvironmentBuilderTest.java | 90 +++ .../ml/environment/LearningEnvironmentTest.java | 187 ++++++ .../ignite/ml/knn/KNNClassificationTest.java | 11 +- .../apache/ignite/ml/knn/KNNRegressionTest.java | 46 +- .../ignite/ml/knn/LabeledDatasetTest.java | 27 +- .../org/apache/ignite/ml/math/BlasTest.java | 10 + .../ignite/ml/math/distances/DistanceTest.java | 26 +- .../ml/math/isolve/lsqr/LSQROnHeapTest.java | 16 + .../vector/DelegatingVectorConstructorTest.java | 4 +- .../vector/SparseVectorConstructorTest.java | 21 + .../apache/ignite/ml/nn/LossFunctionsTest.java | 92 +++ .../java/org/apache/ignite/ml/nn/MLPTest.java | 71 ++- .../org/apache/ignite/ml/nn/MLPTestSuite.java | 3 +- .../binarization/BinarizationTrainerTest.java | 30 + .../normalization/NormalizationTrainerTest.java | 5 + .../linear/LinearRegressionModelTest.java | 6 + .../logistic/LogRegMultiClassTrainerTest.java | 4 + .../logistic/LogisticRegressionModelTest.java | 43 +- .../LogisticRegressionSGDTrainerTest.java | 4 +- .../ignite/ml/selection/SelectionTestSuite.java | 2 + .../scoring/evaluator/EvaluatorTest.java | 293 +++++++++ .../selection/scoring/metric/FmeasureTest.java | 2 +- .../selection/scoring/metric/PrecisionTest.java | 2 +- .../ml/selection/scoring/metric/RecallTest.java | 2 +- .../ml/structures/DatasetStructureTest.java | 53 ++ .../ml/structures/StructuresTestSuite.java | 31 + .../org/apache/ignite/ml/svm/SVMModelTest.java | 10 +- .../DecisionTreeClassificationTrainerTest.java | 15 +- .../util/SimpleStepFunctionCompressorTest.java | 33 ++ .../RandomForestClassifierTrainerTest.java | 21 +- .../RandomForestRegressionTrainerTest.java | 6 +- 65 files changed, 2243 insertions(+), 1494 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExampleWithMostFrequentValues.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExampleWithMostFrequentValues.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExampleWithMostFrequentValues.java deleted file mode 100644 index 10344bc..0000000 --- a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExampleWithMostFrequentValues.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.ignite.examples.ml.preprocessing; - -import java.util.Arrays; -import org.apache.ignite.Ignite; -import org.apache.ignite.IgniteCache; -import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; -import org.apache.ignite.configuration.CacheConfiguration; -import org.apache.ignite.examples.ml.dataset.model.Person; -import org.apache.ignite.ml.dataset.DatasetFactory; -import org.apache.ignite.ml.dataset.primitive.SimpleDataset; -import org.apache.ignite.ml.math.functions.IgniteBiFunction; -import org.apache.ignite.ml.math.primitives.vector.Vector; -import org.apache.ignite.ml.math.primitives.vector.VectorUtils; -import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; -import org.apache.ignite.ml.preprocessing.imputing.ImputingStrategy; - -/** - * Example that shows how to use Imputing preprocessor to impute the missing values in the given data. - */ -public class ImputingExampleWithMostFrequentValues { - /** Run example. */ - public static void main(String[] args) throws Exception { - try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { - System.out.println(">>> Imputing example started."); - - IgniteCache<Integer, Person> persons = createCache(ignite); - - // Defines first preprocessor that extracts features from an upstream data. - IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of( - v.getAge(), - v.getSalary() - ); - - // Defines second preprocessor that normalizes features. - IgniteBiFunction<Integer, Person, Vector> preprocessor = new ImputerTrainer<Integer, Person>() - .withImputingStrategy(ImputingStrategy.MOST_FREQUENT) - .fit(ignite, persons, featureExtractor); - - // Creates a cache based simple dataset containing features and providing standard dataset API. - try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, persons, preprocessor)) { - // Calculation of the mean value. This calculation will be performed in map-reduce manner. - double[] mean = dataset.mean(); - System.out.println("Mean \n\t" + Arrays.toString(mean)); - - // Calculation of the standard deviation. This calculation will be performed in map-reduce manner. - double[] std = dataset.std(); - System.out.println("Standard deviation \n\t" + Arrays.toString(std)); - - // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner. - double[][] cov = dataset.cov(); - System.out.println("Covariance matrix "); - for (double[] row : cov) - System.out.println("\t" + Arrays.toString(row)); - - // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner. - double[][] corr = dataset.corr(); - System.out.println("Correlation matrix "); - for (double[] row : corr) - System.out.println("\t" + Arrays.toString(row)); - } - - System.out.println(">>> Imputing example completed."); - } - } - - /** */ - private static IgniteCache<Integer, Person> createCache(Ignite ignite) { - CacheConfiguration<Integer, Person> cacheConfiguration = new CacheConfiguration<>(); - - cacheConfiguration.setName("PERSONS"); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 2)); - - IgniteCache<Integer, Person> persons = ignite.createCache(cacheConfiguration); - - persons.put(1, new Person("Mike", 10, 1)); - persons.put(2, new Person("John", 20, 2)); - persons.put(3, new Person("George", 15, 1)); - persons.put(4, new Person("Piter", 25, Double.NaN)); - persons.put(5, new Person("Karl", Double.NaN, 1)); - persons.put(6, new Person("Gustaw", 20, 2)); - persons.put(7, new Person("Alex", 20, 3)); - return persons; - } -} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingWithMostFrequentValuesExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingWithMostFrequentValuesExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingWithMostFrequentValuesExample.java new file mode 100644 index 0000000..47a5728 --- /dev/null +++ b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingWithMostFrequentValuesExample.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.examples.ml.preprocessing; + +import java.util.Arrays; +import org.apache.ignite.Ignite; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.Ignition; +import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.examples.ml.dataset.model.Person; +import org.apache.ignite.ml.dataset.DatasetFactory; +import org.apache.ignite.ml.dataset.primitive.SimpleDataset; +import org.apache.ignite.ml.math.functions.IgniteBiFunction; +import org.apache.ignite.ml.math.primitives.vector.Vector; +import org.apache.ignite.ml.math.primitives.vector.VectorUtils; +import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; +import org.apache.ignite.ml.preprocessing.imputing.ImputingStrategy; + +/** + * Example that shows how to use Imputing preprocessor to impute the missing values in the given data. + */ +public class ImputingWithMostFrequentValuesExample { + /** Run example. */ + public static void main(String[] args) throws Exception { + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { + System.out.println(">>> Imputing example started."); + + IgniteCache<Integer, Person> persons = createCache(ignite); + + // Defines first preprocessor that extracts features from an upstream data. + IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of( + v.getAge(), + v.getSalary() + ); + + // Defines second preprocessor that normalizes features. + IgniteBiFunction<Integer, Person, Vector> preprocessor = new ImputerTrainer<Integer, Person>() + .withImputingStrategy(ImputingStrategy.MOST_FREQUENT) + .fit(ignite, persons, featureExtractor); + + // Creates a cache based simple dataset containing features and providing standard dataset API. + try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, persons, preprocessor)) { + // Calculation of the mean value. This calculation will be performed in map-reduce manner. + double[] mean = dataset.mean(); + System.out.println("Mean \n\t" + Arrays.toString(mean)); + + // Calculation of the standard deviation. This calculation will be performed in map-reduce manner. + double[] std = dataset.std(); + System.out.println("Standard deviation \n\t" + Arrays.toString(std)); + + // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner. + double[][] cov = dataset.cov(); + System.out.println("Covariance matrix "); + for (double[] row : cov) + System.out.println("\t" + Arrays.toString(row)); + + // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner. + double[][] corr = dataset.corr(); + System.out.println("Correlation matrix "); + for (double[] row : corr) + System.out.println("\t" + Arrays.toString(row)); + } + + System.out.println(">>> Imputing example completed."); + } + } + + /** */ + private static IgniteCache<Integer, Person> createCache(Ignite ignite) { + CacheConfiguration<Integer, Person> cacheConfiguration = new CacheConfiguration<>(); + + cacheConfiguration.setName("PERSONS"); + cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 2)); + + IgniteCache<Integer, Person> persons = ignite.createCache(cacheConfiguration); + + persons.put(1, new Person("Mike", 10, 1)); + persons.put(2, new Person("John", 20, 2)); + persons.put(3, new Person("George", 15, 1)); + persons.put(4, new Person("Piter", 25, Double.NaN)); + persons.put(5, new Person("Karl", Double.NaN, 1)); + persons.put(6, new Person("Gustaw", 20, 2)); + persons.put(7, new Person("Alex", 20, 3)); + return persons; + } +} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerExample.java new file mode 100644 index 0000000..5f3350b --- /dev/null +++ b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerExample.java @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.examples.ml.regression.logistic.binary; + +import java.util.Arrays; +import java.util.UUID; +import javax.cache.Cache; +import org.apache.ignite.Ignite; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.Ignition; +import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; +import org.apache.ignite.cache.query.QueryCursor; +import org.apache.ignite.cache.query.ScanQuery; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.ml.math.primitives.vector.VectorUtils; +import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector; +import org.apache.ignite.ml.nn.UpdatesStrategy; +import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate; +import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator; +import org.apache.ignite.ml.regressions.logistic.binomial.LogisticRegressionModel; +import org.apache.ignite.ml.regressions.logistic.binomial.LogisticRegressionSGDTrainer; +import org.apache.ignite.thread.IgniteThread; + +/** + * Run logistic regression model over distributed cache. + * + * @see LogisticRegressionSGDTrainer + */ +public class LogisticRegressionSGDTrainerExample { + /** Run example. */ + public static void main(String[] args) throws InterruptedException { + System.out.println(); + System.out.println(">>> Logistic regression model over partitioned dataset usage example started."); + // Start ignite grid. + try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { + System.out.println(">>> Ignite grid started."); + IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), + LogisticRegressionSGDTrainerExample.class.getSimpleName(), () -> { + + IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); + + System.out.println(">>> Create new logistic regression trainer object."); + LogisticRegressionSGDTrainer<?> trainer = new LogisticRegressionSGDTrainer<>(new UpdatesStrategy<>( + new SimpleGDUpdateCalculator(0.2), + SimpleGDParameterUpdate::sumLocal, + SimpleGDParameterUpdate::avg + ), 100000, 10, 100, 123L); + + System.out.println(">>> Perform the training to get the model."); + LogisticRegressionModel mdl = trainer.fit( + ignite, + dataCache, + (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), + (k, v) -> v[0] + ).withRawLabels(true); + + System.out.println(">>> Logistic regression model: " + mdl); + + int amountOfErrors = 0; + int totalAmount = 0; + + // Build confusion matrix. See https://en.wikipedia.org/wiki/Confusion_matrix + int[][] confusionMtx = {{0, 0}, {0, 0}}; + + try (QueryCursor<Cache.Entry<Integer, double[]>> observations = dataCache.query(new ScanQuery<>())) { + for (Cache.Entry<Integer, double[]> observation : observations) { + double[] val = observation.getValue(); + double[] inputs = Arrays.copyOfRange(val, 1, val.length); + double groundTruth = val[0]; + + double prediction = mdl.apply(new DenseVector(inputs)); + + totalAmount++; + if(groundTruth != prediction) + amountOfErrors++; + + int idx1 = (int)prediction; + int idx2 = (int)groundTruth; + + confusionMtx[idx1][idx2]++; + + System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth); + } + + System.out.println(">>> ---------------------------------"); + + System.out.println("\n>>> Absolute amount of errors " + amountOfErrors); + System.out.println("\n>>> Accuracy " + (1 - amountOfErrors / (double)totalAmount)); + } + + System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx)); + System.out.println(">>> ---------------------------------"); + }); + + igniteThread.start(); + + igniteThread.join(); + } + } + /** + * Fills cache with data and returns it. + * + * @param ignite Ignite instance. + * @return Filled Ignite Cache. + */ + private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { + CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); + cacheConfiguration.setName("TEST_" + UUID.randomUUID()); + cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); + + IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); + + for (int i = 0; i < data.length; i++) + cache.put(i, data[i]); + + return cache; + } + + + /** The 1st and 2nd classes from the Iris dataset. */ + private static final double[][] data = { + {0, 5.1, 3.5, 1.4, 0.2}, + {0, 4.9, 3, 1.4, 0.2}, + {0, 4.7, 3.2, 1.3, 0.2}, + {0, 4.6, 3.1, 1.5, 0.2}, + {0, 5, 3.6, 1.4, 0.2}, + {0, 5.4, 3.9, 1.7, 0.4}, + {0, 4.6, 3.4, 1.4, 0.3}, + {0, 5, 3.4, 1.5, 0.2}, + {0, 4.4, 2.9, 1.4, 0.2}, + {0, 4.9, 3.1, 1.5, 0.1}, + {0, 5.4, 3.7, 1.5, 0.2}, + {0, 4.8, 3.4, 1.6, 0.2}, + {0, 4.8, 3, 1.4, 0.1}, + {0, 4.3, 3, 1.1, 0.1}, + {0, 5.8, 4, 1.2, 0.2}, + {0, 5.7, 4.4, 1.5, 0.4}, + {0, 5.4, 3.9, 1.3, 0.4}, + {0, 5.1, 3.5, 1.4, 0.3}, + {0, 5.7, 3.8, 1.7, 0.3}, + {0, 5.1, 3.8, 1.5, 0.3}, + {0, 5.4, 3.4, 1.7, 0.2}, + {0, 5.1, 3.7, 1.5, 0.4}, + {0, 4.6, 3.6, 1, 0.2}, + {0, 5.1, 3.3, 1.7, 0.5}, + {0, 4.8, 3.4, 1.9, 0.2}, + {0, 5, 3, 1.6, 0.2}, + {0, 5, 3.4, 1.6, 0.4}, + {0, 5.2, 3.5, 1.5, 0.2}, + {0, 5.2, 3.4, 1.4, 0.2}, + {0, 4.7, 3.2, 1.6, 0.2}, + {0, 4.8, 3.1, 1.6, 0.2}, + {0, 5.4, 3.4, 1.5, 0.4}, + {0, 5.2, 4.1, 1.5, 0.1}, + {0, 5.5, 4.2, 1.4, 0.2}, + {0, 4.9, 3.1, 1.5, 0.1}, + {0, 5, 3.2, 1.2, 0.2}, + {0, 5.5, 3.5, 1.3, 0.2}, + {0, 4.9, 3.1, 1.5, 0.1}, + {0, 4.4, 3, 1.3, 0.2}, + {0, 5.1, 3.4, 1.5, 0.2}, + {0, 5, 3.5, 1.3, 0.3}, + {0, 4.5, 2.3, 1.3, 0.3}, + {0, 4.4, 3.2, 1.3, 0.2}, + {0, 5, 3.5, 1.6, 0.6}, + {0, 5.1, 3.8, 1.9, 0.4}, + {0, 4.8, 3, 1.4, 0.3}, + {0, 5.1, 3.8, 1.6, 0.2}, + {0, 4.6, 3.2, 1.4, 0.2}, + {0, 5.3, 3.7, 1.5, 0.2}, + {0, 5, 3.3, 1.4, 0.2}, + {1, 7, 3.2, 4.7, 1.4}, + {1, 6.4, 3.2, 4.5, 1.5}, + {1, 6.9, 3.1, 4.9, 1.5}, + {1, 5.5, 2.3, 4, 1.3}, + {1, 6.5, 2.8, 4.6, 1.5}, + {1, 5.7, 2.8, 4.5, 1.3}, + {1, 6.3, 3.3, 4.7, 1.6}, + {1, 4.9, 2.4, 3.3, 1}, + {1, 6.6, 2.9, 4.6, 1.3}, + {1, 5.2, 2.7, 3.9, 1.4}, + {1, 5, 2, 3.5, 1}, + {1, 5.9, 3, 4.2, 1.5}, + {1, 6, 2.2, 4, 1}, + {1, 6.1, 2.9, 4.7, 1.4}, + {1, 5.6, 2.9, 3.6, 1.3}, + {1, 6.7, 3.1, 4.4, 1.4}, + {1, 5.6, 3, 4.5, 1.5}, + {1, 5.8, 2.7, 4.1, 1}, + {1, 6.2, 2.2, 4.5, 1.5}, + {1, 5.6, 2.5, 3.9, 1.1}, + {1, 5.9, 3.2, 4.8, 1.8}, + {1, 6.1, 2.8, 4, 1.3}, + {1, 6.3, 2.5, 4.9, 1.5}, + {1, 6.1, 2.8, 4.7, 1.2}, + {1, 6.4, 2.9, 4.3, 1.3}, + {1, 6.6, 3, 4.4, 1.4}, + {1, 6.8, 2.8, 4.8, 1.4}, + {1, 6.7, 3, 5, 1.7}, + {1, 6, 2.9, 4.5, 1.5}, + {1, 5.7, 2.6, 3.5, 1}, + {1, 5.5, 2.4, 3.8, 1.1}, + {1, 5.5, 2.4, 3.7, 1}, + {1, 5.8, 2.7, 3.9, 1.2}, + {1, 6, 2.7, 5.1, 1.6}, + {1, 5.4, 3, 4.5, 1.5}, + {1, 6, 3.4, 4.5, 1.6}, + {1, 6.7, 3.1, 4.7, 1.5}, + {1, 6.3, 2.3, 4.4, 1.3}, + {1, 5.6, 3, 4.1, 1.3}, + {1, 5.5, 2.5, 4, 1.3}, + {1, 5.5, 2.6, 4.4, 1.2}, + {1, 6.1, 3, 4.6, 1.4}, + {1, 5.8, 2.6, 4, 1.2}, + {1, 5, 2.3, 3.3, 1}, + {1, 5.6, 2.7, 4.2, 1.3}, + {1, 5.7, 3, 4.2, 1.2}, + {1, 5.7, 2.9, 4.2, 1.3}, + {1, 6.2, 2.9, 4.3, 1.3}, + {1, 5.1, 2.5, 3, 1.1}, + {1, 5.7, 2.8, 4.1, 1.3}, + }; + +} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerSample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerSample.java b/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerSample.java deleted file mode 100644 index 9648bbd..0000000 --- a/examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerSample.java +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.ignite.examples.ml.regression.logistic.binary; - -import java.util.Arrays; -import java.util.UUID; -import javax.cache.Cache; -import org.apache.ignite.Ignite; -import org.apache.ignite.IgniteCache; -import org.apache.ignite.Ignition; -import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; -import org.apache.ignite.cache.query.QueryCursor; -import org.apache.ignite.cache.query.ScanQuery; -import org.apache.ignite.configuration.CacheConfiguration; -import org.apache.ignite.ml.math.primitives.vector.VectorUtils; -import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector; -import org.apache.ignite.ml.nn.UpdatesStrategy; -import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate; -import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator; -import org.apache.ignite.ml.regressions.logistic.binomial.LogisticRegressionModel; -import org.apache.ignite.ml.regressions.logistic.binomial.LogisticRegressionSGDTrainer; -import org.apache.ignite.thread.IgniteThread; - -/** - * Run logistic regression model over distributed cache. - * - * @see LogisticRegressionSGDTrainer - */ -public class LogisticRegressionSGDTrainerSample { - /** Run example. */ - public static void main(String[] args) throws InterruptedException { - System.out.println(); - System.out.println(">>> Logistic regression model over partitioned dataset usage example started."); - // Start ignite grid. - try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { - System.out.println(">>> Ignite grid started."); - IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), - LogisticRegressionSGDTrainerSample.class.getSimpleName(), () -> { - - IgniteCache<Integer, double[]> dataCache = getTestCache(ignite); - - System.out.println(">>> Create new logistic regression trainer object."); - LogisticRegressionSGDTrainer<?> trainer = new LogisticRegressionSGDTrainer<>(new UpdatesStrategy<>( - new SimpleGDUpdateCalculator(0.2), - SimpleGDParameterUpdate::sumLocal, - SimpleGDParameterUpdate::avg - ), 100000, 10, 100, 123L); - - System.out.println(">>> Perform the training to get the model."); - LogisticRegressionModel mdl = trainer.fit( - ignite, - dataCache, - (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)), - (k, v) -> v[0] - ).withRawLabels(true); - - System.out.println(">>> Logistic regression model: " + mdl); - - int amountOfErrors = 0; - int totalAmount = 0; - - // Build confusion matrix. See https://en.wikipedia.org/wiki/Confusion_matrix - int[][] confusionMtx = {{0, 0}, {0, 0}}; - - try (QueryCursor<Cache.Entry<Integer, double[]>> observations = dataCache.query(new ScanQuery<>())) { - for (Cache.Entry<Integer, double[]> observation : observations) { - double[] val = observation.getValue(); - double[] inputs = Arrays.copyOfRange(val, 1, val.length); - double groundTruth = val[0]; - - double prediction = mdl.apply(new DenseVector(inputs)); - - totalAmount++; - if(groundTruth != prediction) - amountOfErrors++; - - int idx1 = (int)prediction; - int idx2 = (int)groundTruth; - - confusionMtx[idx1][idx2]++; - - System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth); - } - - System.out.println(">>> ---------------------------------"); - - System.out.println("\n>>> Absolute amount of errors " + amountOfErrors); - System.out.println("\n>>> Accuracy " + (1 - amountOfErrors / (double)totalAmount)); - } - - System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx)); - System.out.println(">>> ---------------------------------"); - }); - - igniteThread.start(); - - igniteThread.join(); - } - } - /** - * Fills cache with data and returns it. - * - * @param ignite Ignite instance. - * @return Filled Ignite Cache. - */ - private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) { - CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>(); - cacheConfiguration.setName("TEST_" + UUID.randomUUID()); - cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10)); - - IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration); - - for (int i = 0; i < data.length; i++) - cache.put(i, data[i]); - - return cache; - } - - - /** The 1st and 2nd classes from the Iris dataset. */ - private static final double[][] data = { - {0, 5.1, 3.5, 1.4, 0.2}, - {0, 4.9, 3, 1.4, 0.2}, - {0, 4.7, 3.2, 1.3, 0.2}, - {0, 4.6, 3.1, 1.5, 0.2}, - {0, 5, 3.6, 1.4, 0.2}, - {0, 5.4, 3.9, 1.7, 0.4}, - {0, 4.6, 3.4, 1.4, 0.3}, - {0, 5, 3.4, 1.5, 0.2}, - {0, 4.4, 2.9, 1.4, 0.2}, - {0, 4.9, 3.1, 1.5, 0.1}, - {0, 5.4, 3.7, 1.5, 0.2}, - {0, 4.8, 3.4, 1.6, 0.2}, - {0, 4.8, 3, 1.4, 0.1}, - {0, 4.3, 3, 1.1, 0.1}, - {0, 5.8, 4, 1.2, 0.2}, - {0, 5.7, 4.4, 1.5, 0.4}, - {0, 5.4, 3.9, 1.3, 0.4}, - {0, 5.1, 3.5, 1.4, 0.3}, - {0, 5.7, 3.8, 1.7, 0.3}, - {0, 5.1, 3.8, 1.5, 0.3}, - {0, 5.4, 3.4, 1.7, 0.2}, - {0, 5.1, 3.7, 1.5, 0.4}, - {0, 4.6, 3.6, 1, 0.2}, - {0, 5.1, 3.3, 1.7, 0.5}, - {0, 4.8, 3.4, 1.9, 0.2}, - {0, 5, 3, 1.6, 0.2}, - {0, 5, 3.4, 1.6, 0.4}, - {0, 5.2, 3.5, 1.5, 0.2}, - {0, 5.2, 3.4, 1.4, 0.2}, - {0, 4.7, 3.2, 1.6, 0.2}, - {0, 4.8, 3.1, 1.6, 0.2}, - {0, 5.4, 3.4, 1.5, 0.4}, - {0, 5.2, 4.1, 1.5, 0.1}, - {0, 5.5, 4.2, 1.4, 0.2}, - {0, 4.9, 3.1, 1.5, 0.1}, - {0, 5, 3.2, 1.2, 0.2}, - {0, 5.5, 3.5, 1.3, 0.2}, - {0, 4.9, 3.1, 1.5, 0.1}, - {0, 4.4, 3, 1.3, 0.2}, - {0, 5.1, 3.4, 1.5, 0.2}, - {0, 5, 3.5, 1.3, 0.3}, - {0, 4.5, 2.3, 1.3, 0.3}, - {0, 4.4, 3.2, 1.3, 0.2}, - {0, 5, 3.5, 1.6, 0.6}, - {0, 5.1, 3.8, 1.9, 0.4}, - {0, 4.8, 3, 1.4, 0.3}, - {0, 5.1, 3.8, 1.6, 0.2}, - {0, 4.6, 3.2, 1.4, 0.2}, - {0, 5.3, 3.7, 1.5, 0.2}, - {0, 5, 3.3, 1.4, 0.2}, - {1, 7, 3.2, 4.7, 1.4}, - {1, 6.4, 3.2, 4.5, 1.5}, - {1, 6.9, 3.1, 4.9, 1.5}, - {1, 5.5, 2.3, 4, 1.3}, - {1, 6.5, 2.8, 4.6, 1.5}, - {1, 5.7, 2.8, 4.5, 1.3}, - {1, 6.3, 3.3, 4.7, 1.6}, - {1, 4.9, 2.4, 3.3, 1}, - {1, 6.6, 2.9, 4.6, 1.3}, - {1, 5.2, 2.7, 3.9, 1.4}, - {1, 5, 2, 3.5, 1}, - {1, 5.9, 3, 4.2, 1.5}, - {1, 6, 2.2, 4, 1}, - {1, 6.1, 2.9, 4.7, 1.4}, - {1, 5.6, 2.9, 3.6, 1.3}, - {1, 6.7, 3.1, 4.4, 1.4}, - {1, 5.6, 3, 4.5, 1.5}, - {1, 5.8, 2.7, 4.1, 1}, - {1, 6.2, 2.2, 4.5, 1.5}, - {1, 5.6, 2.5, 3.9, 1.1}, - {1, 5.9, 3.2, 4.8, 1.8}, - {1, 6.1, 2.8, 4, 1.3}, - {1, 6.3, 2.5, 4.9, 1.5}, - {1, 6.1, 2.8, 4.7, 1.2}, - {1, 6.4, 2.9, 4.3, 1.3}, - {1, 6.6, 3, 4.4, 1.4}, - {1, 6.8, 2.8, 4.8, 1.4}, - {1, 6.7, 3, 5, 1.7}, - {1, 6, 2.9, 4.5, 1.5}, - {1, 5.7, 2.6, 3.5, 1}, - {1, 5.5, 2.4, 3.8, 1.1}, - {1, 5.5, 2.4, 3.7, 1}, - {1, 5.8, 2.7, 3.9, 1.2}, - {1, 6, 2.7, 5.1, 1.6}, - {1, 5.4, 3, 4.5, 1.5}, - {1, 6, 3.4, 4.5, 1.6}, - {1, 6.7, 3.1, 4.7, 1.5}, - {1, 6.3, 2.3, 4.4, 1.3}, - {1, 5.6, 3, 4.1, 1.3}, - {1, 5.5, 2.5, 4, 1.3}, - {1, 5.5, 2.6, 4.4, 1.2}, - {1, 6.1, 3, 4.6, 1.4}, - {1, 5.8, 2.6, 4, 1.2}, - {1, 5, 2.3, 3.3, 1}, - {1, 5.6, 2.7, 4.2, 1.3}, - {1, 5.7, 3, 4.2, 1.2}, - {1, 5.7, 2.9, 4.2, 1.3}, - {1, 6.2, 2.9, 4.3, 1.3}, - {1, 5.1, 2.5, 3, 1.1}, - {1, 5.7, 2.8, 4.1, 1.3}, - }; - -} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TutorialStepByStepExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TutorialStepByStepExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TutorialStepByStepExample.java new file mode 100644 index 0000000..ab2c746 --- /dev/null +++ b/examples/src/main/java/org/apache/ignite/examples/ml/tutorial/TutorialStepByStepExample.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.examples.ml.tutorial; + +/** + * Run all the tutorial examples step by step with primary purpose to provide + * automatic execution from IgniteExamplesMLTestSuite. + */ +public class TutorialStepByStepExample { + /** Run example. */ + public static void main(String[] args) throws InterruptedException { + Step_1_Read_and_Learn.main(args); + Step_2_Imputing.main(args); + Step_3_Categorial.main(args); + Step_3_Categorial_with_One_Hot_Encoder.main(args); + Step_4_Add_age_fare.main(args); + Step_5_Scaling.main(args); + Step_6_KNN.main(args); + Step_7_Split_train_test.main(args); + Step_8_CV.main(args); + Step_8_CV_with_Param_Grid.main(args); + Step_9_Go_to_LogReg.main(args); + } +} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/environment/LearningEnvironmentBuilder.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/environment/LearningEnvironmentBuilder.java b/modules/ml/src/main/java/org/apache/ignite/ml/environment/LearningEnvironmentBuilder.java index be56ccc..91e832d 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/environment/LearningEnvironmentBuilder.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/environment/LearningEnvironmentBuilder.java @@ -60,8 +60,10 @@ public class LearningEnvironmentBuilder { switch (stgyType) { case NO_PARALLELISM: this.parallelismStgy = NoParallelismStrategy.INSTANCE; + break; case ON_DEFAULT_POOL: this.parallelismStgy = new DefaultParallelismStrategy(); + break; } return this; } http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/math/Isomorphism.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/Isomorphism.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/Isomorphism.java deleted file mode 100644 index 6f17e3a..0000000 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/Isomorphism.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.ignite.ml.math; - -import org.apache.ignite.ml.math.functions.IgniteFunction; - -/** - * Function from {@code K} to {@code V} with defined inverse. - * - * @param <K> - * @param <V> - */ -public class Isomorphism<K, V> { - /** */ - private IgniteFunction<K, V> forward; - /** */ - private IgniteFunction<V, K> back; - - /** - * Identity isomorphism. - */ - public static <K> Isomorphism<K, K> id() { - return new Isomorphism<>(a -> a, a -> a); - } - - /** - * Build isomorphism with forward and backward functions. - * - * @param forward Forward. - * @param back Back. - */ - public Isomorphism(IgniteFunction<K, V> forward, IgniteFunction<V, K> back) { - this.forward = forward; - this.back = back; - } - - /** - * Forward function. - * - * @param k K. - */ - public V forward(K k) { - return forward.apply(k); - } - - /** - * Backward function. - * - * @param v V. - */ - public K back(V v) { - return back.apply(v); - } -} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/math/MurmurHash.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/MurmurHash.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/MurmurHash.java deleted file mode 100644 index d1ebf53..0000000 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/MurmurHash.java +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.ignite.ml.math; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * This is a very fast, non-cryptographic hash suitable for general hash-based lookup. - * <p> - * See http://murmurhash.googlepages.com/ for mre details.</p> - */ -public class MurmurHash { - /** Hide it. */ - private MurmurHash() { - } - - /** - * This produces exactly the same hash values as the final C+ version of MurmurHash3 and is - * thus suitable for producing the same hash values across platforms. - * <p> - * The 32 bit x86 version of this hash should be the fastest variant for relatively short keys like IDs.</p> - * <p> - * Note - The x86 and x64 versions do _not_ produce the same results, as the algorithms are - * optimized for their respective platforms.</p> - * <p> - * See also http://github.com/yonik/java_util for future updates to this method.</p> - * - * @param data Data to hash. - * @param off Where to start munging. - * @param len How many bytes to process. - * @param seed The seed to start with. - * @return 32 bit hash platform compatible with C++ MurmurHash3 implementation on x86. - */ - public static int hash3X86(byte[] data, int off, int len, int seed) { - int c1 = 0xcc9e2d51; - int c2 = 0x1b873593; - - int h1 = seed; - int roundedEnd = off + (len & 0xfffffffc); // Round down to 4 byte block. - - for (int i = off; i < roundedEnd; i += 4) { - int k1 = (data[i] & 0xff) | ((data[i + 1] & 0xff) << 8) | ((data[i + 2] & 0xff) << 16) | (data[i + 3] << 24); - - k1 *= c1; - k1 = (k1 << 15) | (k1 >>> 17); - k1 *= c2; - - h1 ^= k1; - h1 = (h1 << 13) | (h1 >>> 19); - h1 = h1 * 5 + 0xe6546b64; - } - - // Tail. - int k1 = 0; - - switch (len & 0x03) { - case 3: - k1 = (data[roundedEnd + 2] & 0xff) << 16; - // Fallthrough - WTF? - case 2: - k1 |= (data[roundedEnd + 1] & 0xff) << 8; - // Fallthrough - WTF? - case 1: - k1 |= data[roundedEnd] & 0xff; - k1 *= c1; - k1 = (k1 << 15) | (k1 >>> 17); - k1 *= c2; - h1 ^= k1; - default: - } - - // Finalization. - h1 ^= len; - - h1 ^= h1 >>> 16; - h1 *= 0x85ebca6b; - h1 ^= h1 >>> 13; - h1 *= 0xc2b2ae35; - h1 ^= h1 >>> 16; - - return h1; - } - - /** - * Hashes an int. - * - * @param data The int to hash. - * @param seed The seed to start with. - * @return The 32 bit hash of the bytes in question. - */ - public static int hash(int data, int seed) { - byte[] arr = new byte[] { - (byte)(data >>> 24), - (byte)(data >>> 16), - (byte)(data >>> 8), - (byte)data - }; - - return hash(ByteBuffer.wrap(arr), seed); - } - - /** - * Hashes bytes in an array. - * - * @param data The bytes to hash. - * @param seed The seed to start with. - * @return The 32 bit hash of the bytes in question. - */ - public static int hash(byte[] data, int seed) { - return hash(ByteBuffer.wrap(data), seed); - } - - /** - * Hashes bytes in part of an array. - * - * @param data The data to hash. - * @param off Where to start munging. - * @param len How many bytes to process. - * @param seed The seed to start with. - * @return The 32-bit hash of the data in question. - */ - public static int hash(byte[] data, int off, int len, int seed) { - return hash(ByteBuffer.wrap(data, off, len), seed); - } - - /** - * Hashes the bytes in a buffer from the current position to the limit. - * - * @param buf The bytes to hash. - * @param seed The seed to start with. - * @return The 32 bit murmur hash of the bytes in the buffer. - */ - public static int hash(ByteBuffer buf, int seed) { - ByteOrder byteOrder = buf.order(); - buf.order(ByteOrder.LITTLE_ENDIAN); - - int m = 0x5bd1e995; - int r = 24; - - int h = seed ^ buf.remaining(); - - while (buf.remaining() >= 4) { - int k = buf.getInt(); - - k *= m; - k ^= k >>> r; - k *= m; - - h *= m; - h ^= k; - } - - if (buf.remaining() > 0) { - ByteBuffer finish = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN); - - finish.put(buf).rewind(); - - h ^= finish.getInt(); - h *= m; - } - - h ^= h >>> 13; - h *= m; - h ^= h >>> 15; - - buf.order(byteOrder); - - return h; - } - - /** - * @param data The data to hash. - * @param seed The seed to start with. - * @return Hash value for given data and seed. - */ - public static long hash64A(byte[] data, int seed) { - return hash64A(ByteBuffer.wrap(data), seed); - } - - /** - * @param data The data to hash. - * @param off Where to start munging. - * @param len How many bytes to process. - * @param seed The seed to start with. - */ - public static long hash64A(byte[] data, int off, int len, int seed) { - return hash64A(ByteBuffer.wrap(data, off, len), seed); - } - - /** - * @param buf The data to hash. - * @param seed The seed to start with. - */ - public static long hash64A(ByteBuffer buf, int seed) { - ByteOrder byteOrder = buf.order(); - buf.order(ByteOrder.LITTLE_ENDIAN); - - long m = 0xc6a4a7935bd1e995L; - int r = 47; - - long h = seed ^ (buf.remaining() * m); - - while (buf.remaining() >= 8) { - long k = buf.getLong(); - - k *= m; - k ^= k >>> r; - k *= m; - - h ^= k; - h *= m; - } - - if (buf.remaining() > 0) { - ByteBuffer finish = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN); - - finish.put(buf).rewind(); - - h ^= finish.getLong(); - h *= m; - } - - h ^= h >>> r; - h *= m; - h ^= h >>> r; - - buf.order(byteOrder); - - return h; - } -} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/math/Precision.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/Precision.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/Precision.java deleted file mode 100644 index 830644c..0000000 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/Precision.java +++ /dev/null @@ -1,588 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.ignite.ml.math; - -import java.math.BigDecimal; -import org.apache.ignite.ml.math.exceptions.MathArithmeticException; -import org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException; - -/** - * This class is based on the corresponding class from Apache Common Math lib. - * Utilities for comparing numbers. * - */ -public class Precision { - /** - * <p> - * Largest double-precision floating-point number such that - * {@code 1 + EPSILON} is numerically equal to 1. This value is an upper - * bound on the relative error due to rounding real numbers to double - * precision floating-point numbers. - * </p> - * <p> - * In IEEE 754 arithmetic, this is 2<sup>-53</sup>. - * </p> - * - * @see <a href="http://en.wikipedia.org/wiki/Machine_epsilon">Machine epsilon</a> - */ - public static final double EPSILON; - - /** - * Safe minimum, such that {@code 1 / SAFE_MIN} does not overflow. - * <br/> - * In IEEE 754 arithmetic, this is also the smallest normalized - * number 2<sup>-1022</sup>. - */ - public static final double SAFE_MIN; - - /** Exponent offset in IEEE754 representation. */ - private static final long EXPONENT_OFFSET = 1023L; - - /** Offset to order signed double numbers lexicographically. */ - private static final long SGN_MASK = 0x8000000000000000L; - /** Offset to order signed double numbers lexicographically. */ - private static final int SGN_MASK_FLOAT = 0x80000000; - /** Positive zero. */ - private static final double POSITIVE_ZERO = 0d; - /** Positive zero bits. */ - private static final long POSITIVE_ZERO_DOUBLE_BITS = Double.doubleToRawLongBits(+0.0); - /** Negative zero bits. */ - private static final long NEGATIVE_ZERO_DOUBLE_BITS = Double.doubleToRawLongBits(-0.0); - /** Positive zero bits. */ - private static final int POSITIVE_ZERO_FLOAT_BITS = Float.floatToRawIntBits(+0.0f); - /** Negative zero bits. */ - private static final int NEGATIVE_ZERO_FLOAT_BITS = Float.floatToRawIntBits(-0.0f); - /** */ - private static final String INVALID_ROUNDING_METHOD = "invalid rounding method {0}, " + - "valid methods: {1} ({2}), {3} ({4}), {5} ({6}), {7} ({8}), {9} ({10}), {11} ({12}), {13} ({14}), {15} ({16})"; - - static { - /* - * This was previously expressed as = 0x1.0p-53; - * However, OpenJDK (Sparc Solaris) cannot handle such small - * constants: MATH-721 - */ - EPSILON = Double.longBitsToDouble((EXPONENT_OFFSET - 53L) << 52); - - /* - * This was previously expressed as = 0x1.0p-1022; - * However, OpenJDK (Sparc Solaris) cannot handle such small - * constants: MATH-721 - */ - SAFE_MIN = Double.longBitsToDouble((EXPONENT_OFFSET - 1022L) << 52); - } - - /** - * Private constructor. - */ - private Precision() { - } - - /** - * Compares two numbers given some amount of allowed error. - * - * @param x the first number - * @param y the second number - * @param eps the amount of error to allow when checking for equality - * @return <ul><li>0 if {@link #equals(double, double, double) equals(x, y, eps)}</li> <li>< 0 if !{@link - * #equals(double, double, double) equals(x, y, eps)} && x < y</li> <li>> 0 if !{@link #equals(double, - * double, double) equals(x, y, eps)} && x > y or either argument is NaN</li></ul> - */ - public static int compareTo(double x, double y, double eps) { - if (equals(x, y, eps)) - return 0; - else if (x < y) - return -1; - return 1; - } - - /** - * Compares two numbers given some amount of allowed error. - * Two float numbers are considered equal if there are {@code (maxUlps - 1)} - * (or fewer) floating point numbers between them, i.e. two adjacent floating - * point numbers are considered equal. - * Adapted from <a - * href="http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/"> - * Bruce Dawson</a>. Returns {@code false} if either of the arguments is NaN. - * - * @param x first value - * @param y second value - * @param maxUlps {@code (maxUlps - 1)} is the number of floating point values between {@code x} and {@code y}. - * @return <ul><li>0 if {@link #equals(double, double, int) equals(x, y, maxUlps)}</li> <li>< 0 if !{@link - * #equals(double, double, int) equals(x, y, maxUlps)} && x < y</li> <li>> 0 if !{@link - * #equals(double, double, int) equals(x, y, maxUlps)} && x > y or either argument is NaN</li></ul> - */ - public static int compareTo(final double x, final double y, final int maxUlps) { - if (equals(x, y, maxUlps)) - return 0; - else if (x < y) - return -1; - return 1; - } - - /** - * Returns true iff they are equal as defined by - * {@link #equals(float, float, int) equals(x, y, 1)}. - * - * @param x first value - * @param y second value - * @return {@code true} if the values are equal. - */ - public static boolean equals(float x, float y) { - return equals(x, y, 1); - } - - /** - * Returns true if both arguments are NaN or they are - * equal as defined by {@link #equals(float, float) equals(x, y, 1)}. - * - * @param x first value - * @param y second value - * @return {@code true} if the values are equal or both are NaN. - * @since 2.2 - */ - public static boolean equalsIncludingNaN(float x, float y) { - return (x != x || y != y) ? !(x != x ^ y != y) : equals(x, y, 1); - } - - /** - * Returns true if the arguments are equal or within the range of allowed - * error (inclusive). Returns {@code false} if either of the arguments - * is NaN. - * - * @param x first value - * @param y second value - * @param eps the amount of absolute error to allow. - * @return {@code true} if the values are equal or within range of each other. - * @since 2.2 - */ - public static boolean equals(float x, float y, float eps) { - return equals(x, y, 1) || Math.abs(y - x) <= eps; - } - - /** - * Returns true if the arguments are both NaN, are equal, or are within the range - * of allowed error (inclusive). - * - * @param x first value - * @param y second value - * @param eps the amount of absolute error to allow. - * @return {@code true} if the values are equal or within range of each other, or both are NaN. - * @since 2.2 - */ - public static boolean equalsIncludingNaN(float x, float y, float eps) { - return equalsIncludingNaN(x, y) || (Math.abs(y - x) <= eps); - } - - /** - * Returns true if the arguments are equal or within the range of allowed - * error (inclusive). - * Two float numbers are considered equal if there are {@code (maxUlps - 1)} - * (or fewer) floating point numbers between them, i.e. two adjacent floating - * point numbers are considered equal. - * Adapted from <a - * href="http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/"> - * Bruce Dawson</a>. Returns {@code false} if either of the arguments is NaN. - * - * @param x first value - * @param y second value - * @param maxUlps {@code (maxUlps - 1)} is the number of floating point values between {@code x} and {@code y}. - * @return {@code true} if there are fewer than {@code maxUlps} floating point values between {@code x} and {@code - * y}. - * @since 2.2 - */ - public static boolean equals(final float x, final float y, final int maxUlps) { - - final int xInt = Float.floatToRawIntBits(x); - final int yInt = Float.floatToRawIntBits(y); - - final boolean isEqual; - if (((xInt ^ yInt) & SGN_MASK_FLOAT) == 0) { - // number have same sign, there is no risk of overflow - isEqual = Math.abs(xInt - yInt) <= maxUlps; - } - else { - // number have opposite signs, take care of overflow - final int deltaPlus; - final int deltaMinus; - if (xInt < yInt) { - deltaPlus = yInt - POSITIVE_ZERO_FLOAT_BITS; - deltaMinus = xInt - NEGATIVE_ZERO_FLOAT_BITS; - } - else { - deltaPlus = xInt - POSITIVE_ZERO_FLOAT_BITS; - deltaMinus = yInt - NEGATIVE_ZERO_FLOAT_BITS; - } - - if (deltaPlus > maxUlps) - isEqual = false; - else - isEqual = deltaMinus <= (maxUlps - deltaPlus); - - } - - return isEqual && !Float.isNaN(x) && !Float.isNaN(y); - - } - - /** - * Returns true if the arguments are both NaN or if they are equal as defined - * by {@link #equals(float, float, int) equals(x, y, maxUlps)}. - * - * @param x first value - * @param y second value - * @param maxUlps {@code (maxUlps - 1)} is the number of floating point values between {@code x} and {@code y}. - * @return {@code true} if both arguments are NaN or if there are less than {@code maxUlps} floating point values - * between {@code x} and {@code y}. - * @since 2.2 - */ - public static boolean equalsIncludingNaN(float x, float y, int maxUlps) { - return (x != x || y != y) ? !(x != x ^ y != y) : equals(x, y, maxUlps); - } - - /** - * Returns true iff they are equal as defined by - * {@link #equals(double, double, int) equals(x, y, 1)}. - * - * @param x first value - * @param y second value - * @return {@code true} if the values are equal. - */ - public static boolean equals(double x, double y) { - return equals(x, y, 1); - } - - /** - * Returns true if the arguments are both NaN or they are - * equal as defined by {@link #equals(double, double) equals(x, y, 1)}. - * - * @param x first value - * @param y second value - * @return {@code true} if the values are equal or both are NaN. - * @since 2.2 - */ - public static boolean equalsIncludingNaN(double x, double y) { - return (x != x || y != y) ? !(x != x ^ y != y) : equals(x, y, 1); - } - - /** - * Returns {@code true} if there is no double value strictly between the - * arguments or the difference between them is within the range of allowed - * error (inclusive). Returns {@code false} if either of the arguments - * is NaN. - * - * @param x First value. - * @param y Second value. - * @param eps Amount of allowed absolute error. - * @return {@code true} if the values are two adjacent floating point numbers or they are within range of each - * other. - */ - public static boolean equals(double x, double y, double eps) { - return equals(x, y, 1) || Math.abs(y - x) <= eps; - } - - /** - * Returns {@code true} if there is no double value strictly between the - * arguments or the relative difference between them is less than or equal - * to the given tolerance. Returns {@code false} if either of the arguments - * is NaN. - * - * @param x First value. - * @param y Second value. - * @param eps Amount of allowed relative error. - * @return {@code true} if the values are two adjacent floating point numbers or they are within range of each - * other. - * @since 3.1 - */ - public static boolean equalsWithRelativeTolerance(double x, double y, double eps) { - if (equals(x, y, 1)) - return true; - - final double absMax = Math.max(Math.abs(x), Math.abs(y)); - final double relativeDifference = Math.abs((x - y) / absMax); - - return relativeDifference <= eps; - } - - /** - * Returns true if the arguments are both NaN, are equal or are within the range - * of allowed error (inclusive). - * - * @param x first value - * @param y second value - * @param eps the amount of absolute error to allow. - * @return {@code true} if the values are equal or within range of each other, or both are NaN. - * @since 2.2 - */ - public static boolean equalsIncludingNaN(double x, double y, double eps) { - return equalsIncludingNaN(x, y) || (Math.abs(y - x) <= eps); - } - - /** - * Returns true if the arguments are equal or within the range of allowed - * error (inclusive). - * <p> - * Two float numbers are considered equal if there are {@code (maxUlps - 1)} - * (or fewer) floating point numbers between them, i.e. two adjacent - * floating point numbers are considered equal. - * </p> - * <p> - * Adapted from <a - * href="http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/"> - * Bruce Dawson</a>. Returns {@code false} if either of the arguments is NaN. - * </p> - * - * @param x first value - * @param y second value - * @param maxUlps {@code (maxUlps - 1)} is the number of floating point values between {@code x} and {@code y}. - * @return {@code true} if there are fewer than {@code maxUlps} floating point values between {@code x} and {@code - * y}. - */ - public static boolean equals(final double x, final double y, final int maxUlps) { - - final long xInt = Double.doubleToRawLongBits(x); - final long yInt = Double.doubleToRawLongBits(y); - - final boolean isEqual; - if (((xInt ^ yInt) & SGN_MASK) == 0L) { - // number have same sign, there is no risk of overflow - isEqual = Math.abs(xInt - yInt) <= maxUlps; - } - else { - // number have opposite signs, take care of overflow - final long deltaPlus; - final long deltaMinus; - if (xInt < yInt) { - deltaPlus = yInt - POSITIVE_ZERO_DOUBLE_BITS; - deltaMinus = xInt - NEGATIVE_ZERO_DOUBLE_BITS; - } - else { - deltaPlus = xInt - POSITIVE_ZERO_DOUBLE_BITS; - deltaMinus = yInt - NEGATIVE_ZERO_DOUBLE_BITS; - } - - if (deltaPlus > maxUlps) - isEqual = false; - else - isEqual = deltaMinus <= (maxUlps - deltaPlus); - - } - - return isEqual && !Double.isNaN(x) && !Double.isNaN(y); - - } - - /** - * Returns true if both arguments are NaN or if they are equal as defined - * by {@link #equals(double, double, int) equals(x, y, maxUlps)}. - * - * @param x first value - * @param y second value - * @param maxUlps {@code (maxUlps - 1)} is the number of floating point values between {@code x} and {@code y}. - * @return {@code true} if both arguments are NaN or if there are less than {@code maxUlps} floating point values - * between {@code x} and {@code y}. - * @since 2.2 - */ - public static boolean equalsIncludingNaN(double x, double y, int maxUlps) { - return (x != x || y != y) ? !(x != x ^ y != y) : equals(x, y, maxUlps); - } - - /** - * Rounds the given value to the specified number of decimal places. - * The value is rounded using the {@link BigDecimal#ROUND_HALF_UP} method. - * - * @param x Value to round. - * @param scale Number of digits to the right of the decimal point. - * @return the rounded value. - * @since 1.1 (previously in {@code MathUtils}, moved as of version 3.0) - */ - public static double round(double x, int scale) { - return round(x, scale, BigDecimal.ROUND_HALF_UP); - } - - /** - * Rounds the given value to the specified number of decimal places. - * The value is rounded using the given method which is any method defined - * in {@link BigDecimal}. - * If {@code x} is infinite or {@code NaN}, then the value of {@code x} is - * returned unchanged, regardless of the other parameters. - * - * @param x Value to round. - * @param scale Number of digits to the right of the decimal point. - * @param roundingMtd Rounding method as defined in {@link BigDecimal}. - * @return the rounded value. - * @throws ArithmeticException if {@code roundingMethod == ROUND_UNNECESSARY} and the specified scaling operation - * would require rounding. - * @throws IllegalArgumentException if {@code roundingMethod} does not represent a valid rounding mode. - * @since 1.1 (previously in {@code MathUtils}, moved as of version 3.0) - */ - public static double round(double x, int scale, int roundingMtd) { - try { - final double rounded = (new BigDecimal(Double.toString(x)) - .setScale(scale, roundingMtd)) - .doubleValue(); - // MATH-1089: negative values rounded to zero should result in negative zero - return rounded == POSITIVE_ZERO ? POSITIVE_ZERO * x : rounded; - } - catch (NumberFormatException ex) { - if (Double.isInfinite(x)) - return x; - else - return Double.NaN; - } - } - - /** - * Rounds the given value to the specified number of decimal places. - * The value is rounded using the {@link BigDecimal#ROUND_HALF_UP} method. - * - * @param x Value to round. - * @param scale Number of digits to the right of the decimal point. - * @return the rounded value. - * @since 1.1 (previously in {@code MathUtils}, moved as of version 3.0) - */ - public static float round(float x, int scale) { - return round(x, scale, BigDecimal.ROUND_HALF_UP); - } - - /** - * Rounds the given value to the specified number of decimal places. - * The value is rounded using the given method which is any method defined - * in {@link BigDecimal}. - * - * @param x Value to round. - * @param scale Number of digits to the right of the decimal point. - * @param roundingMtd Rounding method as defined in {@link BigDecimal}. - * @return the rounded value. - * @throws MathArithmeticException if an exact operation is required but result is not exact - * @throws MathIllegalArgumentException if {@code roundingMethod} is not a valid rounding method. - * @since 1.1 (previously in {@code MathUtils}, moved as of version 3.0) - */ - public static float round(float x, int scale, int roundingMtd) - throws MathArithmeticException, MathIllegalArgumentException { - final float sign = Math.copySign(1f, x); - final float factor = (float)Math.pow(10.0f, scale) * sign; - return (float)roundUnscaled(x * factor, sign, roundingMtd) / factor; - } - - /** - * Rounds the given non-negative value to the "nearest" integer. Nearest is - * determined by the rounding method specified. Rounding methods are defined - * in {@link BigDecimal}. - * - * @param unscaled Value to round. - * @param sign Sign of the original, scaled value. - * @param roundingMtd Rounding method, as defined in {@link BigDecimal}. - * @return the rounded value. - * @throws MathArithmeticException if an exact operation is required but result is not exact - * @throws MathIllegalArgumentException if {@code roundingMethod} is not a valid rounding method. - * @since 1.1 (previously in {@code MathUtils}, moved as of version 3.0) - */ - private static double roundUnscaled(double unscaled, double sign, int roundingMtd) - throws MathArithmeticException, MathIllegalArgumentException { - switch (roundingMtd) { - case BigDecimal.ROUND_CEILING: - if (sign == -1) - unscaled = Math.floor(Math.nextAfter(unscaled, Double.NEGATIVE_INFINITY)); - else - unscaled = Math.ceil(Math.nextAfter(unscaled, Double.POSITIVE_INFINITY)); - break; - case BigDecimal.ROUND_DOWN: - unscaled = Math.floor(Math.nextAfter(unscaled, Double.NEGATIVE_INFINITY)); - break; - case BigDecimal.ROUND_FLOOR: - if (sign == -1) - unscaled = Math.ceil(Math.nextAfter(unscaled, Double.POSITIVE_INFINITY)); - else - unscaled = Math.floor(Math.nextAfter(unscaled, Double.NEGATIVE_INFINITY)); - break; - case BigDecimal.ROUND_HALF_DOWN: { - unscaled = Math.nextAfter(unscaled, Double.NEGATIVE_INFINITY); - double fraction = unscaled - Math.floor(unscaled); - if (fraction > 0.5) - unscaled = Math.ceil(unscaled); - else - unscaled = Math.floor(unscaled); - break; - } - case BigDecimal.ROUND_HALF_EVEN: { - double fraction = unscaled - Math.floor(unscaled); - if (fraction > 0.5) - unscaled = Math.ceil(unscaled); - else if (fraction < 0.5) - unscaled = Math.floor(unscaled); - else { - // The following equality test is intentional and needed for rounding purposes - if (Math.floor(unscaled) / 2.0 == Math.floor(Math.floor(unscaled) / 2.0)) { // even - unscaled = Math.floor(unscaled); - } - else { // odd - unscaled = Math.ceil(unscaled); - } - } - break; - } - case BigDecimal.ROUND_HALF_UP: { - unscaled = Math.nextAfter(unscaled, Double.POSITIVE_INFINITY); - double fraction = unscaled - Math.floor(unscaled); - if (fraction >= 0.5) - unscaled = Math.ceil(unscaled); - else - unscaled = Math.floor(unscaled); - break; - } - case BigDecimal.ROUND_UNNECESSARY: - if (unscaled != Math.floor(unscaled)) - throw new MathArithmeticException(); - break; - case BigDecimal.ROUND_UP: - // do not round if the discarded fraction is equal to zero - if (unscaled != Math.floor(unscaled)) - unscaled = Math.ceil(Math.nextAfter(unscaled, Double.POSITIVE_INFINITY)); - break; - default: - throw new MathIllegalArgumentException(INVALID_ROUNDING_METHOD, - roundingMtd, - "ROUND_CEILING", BigDecimal.ROUND_CEILING, - "ROUND_DOWN", BigDecimal.ROUND_DOWN, - "ROUND_FLOOR", BigDecimal.ROUND_FLOOR, - "ROUND_HALF_DOWN", BigDecimal.ROUND_HALF_DOWN, - "ROUND_HALF_EVEN", BigDecimal.ROUND_HALF_EVEN, - "ROUND_HALF_UP", BigDecimal.ROUND_HALF_UP, - "ROUND_UNNECESSARY", BigDecimal.ROUND_UNNECESSARY, - "ROUND_UP", BigDecimal.ROUND_UP); - } - return unscaled; - } - - /** - * Computes a number {@code delta} close to {@code originalDelta} with - * the property that <pre><code> - * x + delta - x - * </code></pre> - * is exactly machine-representable. - * This is useful when computing numerical derivatives, in order to reduce - * roundoff errors. - * - * @param x Value. - * @param originalDelta Offset value. - * @return a number {@code delta} so that {@code x + delta} and {@code x} differ by a representable floating number. - */ - public static double representableDelta(double x, double originalDelta) { - return x + originalDelta - x; - } -} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/EuclideanDistance.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/EuclideanDistance.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/EuclideanDistance.java index fa5c21c..98f994f 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/EuclideanDistance.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/EuclideanDistance.java @@ -41,7 +41,7 @@ public class EuclideanDistance implements DistanceMeasure { double res = 0.0; for (int i = 0; i < b.length; i++) - res+= Math.abs(b[i] - a.get(i)); + res += Math.pow(Math.abs(b[i] - a.get(i)), 2.0); return Math.sqrt(res); } @@ -63,4 +63,9 @@ public class EuclideanDistance implements DistanceMeasure { return obj != null && getClass() == obj.getClass(); } + + /** {@inheritDoc} */ + @Override public int hashCode() { + return getClass().hashCode(); + } } http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/HammingDistance.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/HammingDistance.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/HammingDistance.java index ef50a69..c886f53 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/HammingDistance.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/HammingDistance.java @@ -64,4 +64,9 @@ public class HammingDistance implements DistanceMeasure { return obj != null && getClass() == obj.getClass(); } + + /** {@inheritDoc} */ + @Override public int hashCode() { + return getClass().hashCode(); + } } http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/ManhattanDistance.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/ManhattanDistance.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/ManhattanDistance.java index bd3df14..fec3120 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/ManhattanDistance.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/math/distances/ManhattanDistance.java @@ -58,4 +58,9 @@ public class ManhattanDistance implements DistanceMeasure { return obj != null && getClass() == obj.getClass(); } + + /** {@inheritDoc} */ + @Override public int hashCode() { + return getClass().hashCode(); + } } http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/math/exceptions/MathArithmeticException.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/exceptions/MathArithmeticException.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/exceptions/MathArithmeticException.java deleted file mode 100644 index ccd019c..0000000 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/exceptions/MathArithmeticException.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.ignite.ml.math.exceptions; - -/** - * This class is based on the corresponding class from Apache Common Math lib. - * Base class for arithmetic exceptions. - */ -public class MathArithmeticException extends MathRuntimeException { - /** Serializable version Id. */ - private static final long serialVersionUID = -6024911025449780478L; - - /** - * Default constructor. - */ - public MathArithmeticException() { - this("Arithmetic exception."); - } - - /** - * Constructor with a specific message. - * - * @param format Message pattern providing the specific context of the error. - * @param args Arguments. - */ - public MathArithmeticException(String format, Object... args) { - super(format, args); - } - -} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/math/exceptions/knn/SmallTrainingDatasetSizeException.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/exceptions/knn/SmallTrainingDatasetSizeException.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/exceptions/knn/SmallTrainingDatasetSizeException.java deleted file mode 100644 index 5eb3f7a..0000000 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/exceptions/knn/SmallTrainingDatasetSizeException.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.ignite.ml.math.exceptions.knn; - -import org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException; - -/** - * Indicates a small training dataset size in ML algorithms. - */ -public class SmallTrainingDatasetSizeException extends MathIllegalArgumentException { - /** */ - private static final long serialVersionUID = 0L; - - /** - * Creates new small training dataset size exception. - * - * @param exp Expected dataset size. - * @param act Actual dataset size. - */ - public SmallTrainingDatasetSizeException(int exp, int act) { - super("Small training dataset size [expected=%d, actual=%d]", exp, act); - } -} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/math/isolve/lsqr/AbstractLSQR.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/isolve/lsqr/AbstractLSQR.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/isolve/lsqr/AbstractLSQR.java index d1d3219..7a362f7 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/isolve/lsqr/AbstractLSQR.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/math/isolve/lsqr/AbstractLSQR.java @@ -19,7 +19,6 @@ package org.apache.ignite.ml.math.isolve.lsqr; import com.github.fommil.netlib.BLAS; import java.util.Arrays; -import org.apache.ignite.ml.math.Precision; /** * Basic implementation of the LSQR algorithm without assumptions about dataset storage format or data processing @@ -30,8 +29,35 @@ import org.apache.ignite.ml.math.Precision; */ // TODO: IGNITE-7660: Refactor LSQR algorithm public abstract class AbstractLSQR { + /** + * <p> + * Largest double-precision floating-point number such that + * {@code 1 + EPSILON} is numerically equal to 1. This value is an upper + * bound on the relative error due to rounding real numbers to double + * precision floating-point numbers. + * </p> + * <p> + * In IEEE 754 arithmetic, this is 2<sup>-53</sup>. + * </p> + * + * @see <a href="http://en.wikipedia.org/wiki/Machine_epsilon">Machine epsilon</a> + */ + private static final double EPSILON; + + /** Exponent offset in IEEE754 representation. */ + private static final long EXPONENT_OFFSET = 1023L; + + static { + /* + * This was previously expressed as = 0x1.0p-53; + * However, OpenJDK (Sparc Solaris) cannot handle such small + * constants: MATH-721 + */ + EPSILON = Double.longBitsToDouble((EXPONENT_OFFSET - 53L) << 52); + } + /** The smallest representable positive number such that 1.0 + eps != 1.0. */ - private static final double eps = Precision.EPSILON; + private static final double eps = EPSILON; /** BLAS (Basic Linear Algebra Subprograms) instance. */ private static BLAS blas = BLAS.getInstance(); @@ -331,4 +357,4 @@ public abstract class AbstractLSQR { return res; } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/math/isolve/lsqr/LSQRResult.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/isolve/lsqr/LSQRResult.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/isolve/lsqr/LSQRResult.java index 47beddb..0d6681c 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/math/isolve/lsqr/LSQRResult.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/math/isolve/lsqr/LSQRResult.java @@ -34,7 +34,7 @@ public class LSQRResult extends IterativeSolverResult { private final int isstop; /** Represents norm(r), where r = b - Ax. */ - private final double r1norn; + private final double r1norm; /**Represents sqrt( norm(r)^2 + damp^2 * norm(x)^2 ). Equal to r1norm if damp == 0. */ private final double r2norm; @@ -63,7 +63,7 @@ public class LSQRResult extends IterativeSolverResult { * @param x X value. * @param iterations Number of performed iterations. * @param isstop Stop reason. - * @param r1norn R1 norm value. + * @param r1norm R1 norm value. * @param r2norm R2 norm value. * @param anorm A norm value. * @param acond A cond value. @@ -71,11 +71,11 @@ public class LSQRResult extends IterativeSolverResult { * @param xnorm X norm value. * @param var Var value. */ - public LSQRResult(double[] x, int iterations, int isstop, double r1norn, double r2norm, double anorm, double acond, + public LSQRResult(double[] x, int iterations, int isstop, double r1norm, double r2norm, double anorm, double acond, double arnorm, double xnorm, double[] var) { super(x, iterations); this.isstop = isstop; - this.r1norn = r1norn; + this.r1norm = r1norm; this.r2norm = r2norm; this.anorm = anorm; this.acond = acond; @@ -90,8 +90,8 @@ public class LSQRResult extends IterativeSolverResult { } /** */ - public double getR1norn() { - return r1norn; + public double getR1norm() { + return r1norm; } /** */ @@ -128,7 +128,7 @@ public class LSQRResult extends IterativeSolverResult { @Override public String toString() { return "LSQRResult{" + "isstop=" + isstop + - ", r1norn=" + r1norn + + ", r1norm=" + r1norm + ", r2norm=" + r2norm + ", anorm=" + anorm + ", acond=" + acond + @@ -137,4 +137,4 @@ public class LSQRResult extends IterativeSolverResult { ", var=" + Arrays.toString(var) + '}'; } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/nn/MultilayerPerceptron.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/nn/MultilayerPerceptron.java b/modules/ml/src/main/java/org/apache/ignite/ml/nn/MultilayerPerceptron.java index 639bb44..b469603 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/nn/MultilayerPerceptron.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/nn/MultilayerPerceptron.java @@ -349,7 +349,7 @@ public class MultilayerPerceptron implements Model<Matrix, Matrix>, SmoothParame */ public MLPArchitecture architecture() { if (below != null) - return below.architecture().add(architecture()); + return below.architecture().add(architecture); return architecture; } http://git-wip-us.apache.org/repos/asf/ignite/blob/25f83819/modules/ml/src/main/java/org/apache/ignite/ml/structures/DatasetRow.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/structures/DatasetRow.java b/modules/ml/src/main/java/org/apache/ignite/ml/structures/DatasetRow.java index 1e3e12c..eda901e 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/structures/DatasetRow.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/structures/DatasetRow.java @@ -58,7 +58,7 @@ public class DatasetRow<V extends Vector> implements Serializable, Externalizabl DatasetRow vector1 = (DatasetRow)o; - return vector != null ? !vector.equals(vector1.vector) : vector1.vector != null; + return vector != null ? vector.equals(vector1.vector) : vector1.vector == null; } /** {@inheritDoc} */ @@ -72,6 +72,7 @@ public class DatasetRow<V extends Vector> implements Serializable, Externalizabl } /** {@inheritDoc} */ + @SuppressWarnings("unchecked") @Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { vector = (V)in.readObject(); }
