Repository: ignite
Updated Branches:
  refs/heads/master da6e7319f -> f5120b9b8


IGNITE-9285: [ML] Add MaxAbsScaler as a preprocessing stage

this closes #4655


Project: http://git-wip-us.apache.org/repos/asf/ignite/repo
Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/f5120b9b
Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/f5120b9b
Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/f5120b9b

Branch: refs/heads/master
Commit: f5120b9b8f822cc90fbd4b396bfb7ad1e09a5746
Parents: da6e731
Author: Ravil Galeyev <[email protected]>
Authored: Fri Sep 7 17:29:12 2018 +0300
Committer: Yury Babak <[email protected]>
Committed: Fri Sep 7 17:29:12 2018 +0300

----------------------------------------------------------------------
 .../ml/preprocessing/MaxAbsScalerExample.java   | 84 +++++++++++++++++++
 .../ml/preprocessing/MinMaxScalerExample.java   |  8 +-
 .../MaxAbsScalerPartitionData.java              | 48 +++++++++++
 .../maxabsscaling/MaxAbsScalerPreprocessor.java | 77 ++++++++++++++++++
 .../maxabsscaling/MaxAbsScalerTrainer.java      | 85 ++++++++++++++++++++
 .../maxabsscaling/package-info.java             | 22 +++++
 .../MaxAbsScalerPreprocessorTest.java           | 55 +++++++++++++
 .../maxabsscaling/MaxAbsScalerTrainerTest.java  | 76 +++++++++++++++++
 8 files changed, 451 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ignite/blob/f5120b9b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MaxAbsScalerExample.java
----------------------------------------------------------------------
diff --git 
a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MaxAbsScalerExample.java
 
b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MaxAbsScalerExample.java
new file mode 100644
index 0000000..955702a
--- /dev/null
+++ 
b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MaxAbsScalerExample.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.ml.preprocessing;
+
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.examples.ml.dataset.model.Person;
+import org.apache.ignite.examples.ml.util.DatasetHelper;
+import org.apache.ignite.ml.dataset.DatasetFactory;
+import org.apache.ignite.ml.dataset.primitive.SimpleDataset;
+import org.apache.ignite.ml.math.functions.IgniteBiFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.apache.ignite.ml.preprocessing.maxabsscaling.MaxAbsScalerTrainer;
+
+/**
+ * Example that shows how to use MaxAbsScaler preprocessor to scale the given 
data.
+ *
+ * Machine learning preprocessors are built as a chain. Most often a first 
preprocessor is a feature extractor as shown
+ * in this example. The second preprocessor here is a MinMaxScaler 
preprocessor which is built on top of the feature
+ * extractor and represents a chain of itself and the underlying feature 
extractor.
+ */
+public class MaxAbsScalerExample {
+    /** Run example. */
+    public static void main(String[] args) throws Exception {
+        try (Ignite ignite = 
Ignition.start("examples/config/example-ignite.xml")) {
+            System.out.println(">>> Max abs example started.");
+
+            IgniteCache<Integer, Person> persons = createCache(ignite);
+
+            // Defines first preprocessor that extracts features from an 
upstream data.
+            IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, 
v) -> VectorUtils.of(
+                v.getAge(),
+                v.getSalary()
+            );
+
+            // Defines second preprocessor that processes features.
+            IgniteBiFunction<Integer, Person, Vector> preprocessor = new 
MaxAbsScalerTrainer<Integer, Person>()
+                .fit(ignite, persons, featureExtractor);
+
+            // Creates a cache based simple dataset containing features and 
providing standard dataset API.
+            try (SimpleDataset<?> dataset = 
DatasetFactory.createSimpleDataset(ignite, persons, preprocessor)) {
+                new DatasetHelper(dataset).describe();
+            }
+
+            System.out.println(">>> Max abs example completed.");
+        }
+    }
+
+    /** */
+    private static IgniteCache<Integer, Person> createCache(Ignite ignite) {
+        CacheConfiguration<Integer, Person> cacheConfiguration = new 
CacheConfiguration<>();
+
+        cacheConfiguration.setName("PERSONS");
+        cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 
2));
+
+        IgniteCache<Integer, Person> persons = 
ignite.createCache(cacheConfiguration);
+
+        persons.put(1, new Person("Mike", 42, 10000));
+        persons.put(2, new Person("John", 32, 64000));
+        persons.put(3, new Person("George", 53, 120000));
+        persons.put(4, new Person("Karl", 24, 70000));
+
+        return persons;
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/f5120b9b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java
----------------------------------------------------------------------
diff --git 
a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java
 
b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java
index c1b32ab..f73228f 100644
--- 
a/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java
+++ 
b/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java
@@ -42,8 +42,8 @@ import 
org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer;
  * <p>
  * After that it defines preprocessors that extract features from an upstream 
data and normalize their values.</p>
  * <p>
- * Finally, it creates the dataset based on the processed data and uses 
Dataset API to find and output
- * various statistical metrics of the data.</p>
+ * Finally, it creates the dataset based on the processed data and uses 
Dataset API to find and output various
+ * statistical metrics of the data.</p>
  * <p>
  * You can change the test data used in this example and re-run it to explore 
this functionality further.</p>
  */
@@ -51,7 +51,7 @@ public class MinMaxScalerExample {
     /** Run example. */
     public static void main(String[] args) throws Exception {
         try (Ignite ignite = 
Ignition.start("examples/config/example-ignite.xml")) {
-            System.out.println(">>> Normalization example started.");
+            System.out.println(">>> MinMax preprocessing example started.");
 
             IgniteCache<Integer, Person> persons = createCache(ignite);
 
@@ -70,7 +70,7 @@ public class MinMaxScalerExample {
                 new DatasetHelper(dataset).describe();
             }
 
-            System.out.println(">>> Normalization example completed.");
+            System.out.println(">>> MinMax preprocessing example completed.");
         }
     }
 

http://git-wip-us.apache.org/repos/asf/ignite/blob/f5120b9b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPartitionData.java
----------------------------------------------------------------------
diff --git 
a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPartitionData.java
 
b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPartitionData.java
new file mode 100644
index 0000000..e4658da
--- /dev/null
+++ 
b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPartitionData.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.preprocessing.maxabsscaling;
+
+/**
+ * Partition data used in maxabsscaling preprocessor.
+ *
+ * @see MaxAbsScalerTrainer
+ * @see MaxAbsScalerPreprocessor
+ */
+public class MaxAbsScalerPartitionData implements AutoCloseable {
+    /** Maximum absolute values. */
+    private final double[] maxAbs;
+
+    /**
+     * Constructs a new instance of maxabsscaling partition data.
+     *
+     * @param maxAbs Maximal absolute values.
+     */
+    public MaxAbsScalerPartitionData(double[] maxAbs) {
+        this.maxAbs = maxAbs;
+    }
+
+    /** */
+    public double[] getMaxAbs() {
+        return maxAbs;
+    }
+
+    /** */
+    @Override public void close() {
+        // Do nothing, GC will clean up.
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/f5120b9b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPreprocessor.java
----------------------------------------------------------------------
diff --git 
a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPreprocessor.java
 
b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPreprocessor.java
new file mode 100644
index 0000000..4eb0e31
--- /dev/null
+++ 
b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPreprocessor.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.preprocessing.maxabsscaling;
+
+import org.apache.ignite.ml.math.functions.IgniteBiFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+
+/**
+ * The preprocessing function that makes maxabsscaling, transforms features to 
the scale {@code [-1,+1]}. From
+ * mathematical point of view it's the following function which is applied to 
every element in a dataset:
+ *
+ * {@code a_i = a_i / maxabs_i for all i},
+ *
+ * where {@code i} is a number of column, {@code maxabs_i} is the value of the 
absolute maximum element in this column.
+ *
+ * @param <K> Type of a key in {@code upstream} data.
+ * @param <V> Type of a value in {@code upstream} data.
+ */
+public class MaxAbsScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, 
Vector> {
+    /** */
+    private static final long serialVersionUID = 1L;
+
+    /** Maximum absolute values. */
+    private final double[] maxAbs;
+
+    /** Base preprocessor. */
+    private final IgniteBiFunction<K, V, Vector> basePreprocessor;
+
+    /**
+     * Constructs a new instance of maxabsscaling preprocessor.
+     *
+     * @param maxAbs Maximal absolute values.
+     * @param basePreprocessor Base preprocessor.
+     */
+    public MaxAbsScalerPreprocessor(double[] maxAbs, IgniteBiFunction<K, V, 
Vector> basePreprocessor) {
+        this.maxAbs = maxAbs;
+        this.basePreprocessor = basePreprocessor;
+    }
+
+    /**
+     * Applies this preprocessor.
+     *
+     * @param k Key.
+     * @param v Value.
+     * @return Preprocessed row.
+     */
+    @Override public Vector apply(K k, V v) {
+        Vector res = basePreprocessor.apply(k, v);
+
+        assert res.size() == maxAbs.length;
+
+        for (int i = 0; i < res.size(); i++)
+            res.set(i, res.get(i) / maxAbs[i]);
+
+        return res;
+    }
+
+    /** */
+    public double[] getMaxAbs() {
+        return maxAbs;
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/f5120b9b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerTrainer.java
----------------------------------------------------------------------
diff --git 
a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerTrainer.java
 
b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerTrainer.java
new file mode 100644
index 0000000..d3e5734
--- /dev/null
+++ 
b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerTrainer.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.preprocessing.maxabsscaling;
+
+import org.apache.ignite.ml.dataset.Dataset;
+import org.apache.ignite.ml.dataset.DatasetBuilder;
+import org.apache.ignite.ml.dataset.UpstreamEntry;
+import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
+import org.apache.ignite.ml.math.functions.IgniteBiFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.preprocessing.PreprocessingTrainer;
+
+/**
+ * Trainer of the maxabsscaling preprocessor.
+ *
+ * @param <K> Type of a key in {@code upstream} data.
+ * @param <V> Type of a value in {@code upstream} data.
+ */
+public class MaxAbsScalerTrainer<K, V> implements PreprocessingTrainer<K, V, 
Vector, Vector> {
+    /** {@inheritDoc} */
+    @Override public MaxAbsScalerPreprocessor<K, V> fit(DatasetBuilder<K, V> 
datasetBuilder,
+        IgniteBiFunction<K, V, Vector> basePreprocessor) {
+        try (Dataset<EmptyContext, MaxAbsScalerPartitionData> dataset = 
datasetBuilder.build(
+            (upstream, upstreamSize) -> new EmptyContext(),
+            (upstream, upstreamSize, ctx) -> {
+                double[] maxAbs = null;
+
+                while (upstream.hasNext()) {
+                    UpstreamEntry<K, V> entity = upstream.next();
+                    Vector row = basePreprocessor.apply(entity.getKey(), 
entity.getValue());
+
+                    if (maxAbs == null) {
+                        maxAbs = new double[row.size()];
+                        for (int i = 0; i < maxAbs.length; i++)
+                            maxAbs[i] = .0;
+                    }
+                    else
+                        assert maxAbs.length == row.size() : "Base 
preprocessor must return exactly " + maxAbs.length
+                            + " features";
+
+                    for (int i = 0; i < row.size(); i++) {
+                        if (Math.abs(row.get(i)) > Math.abs(maxAbs[i]))
+                            maxAbs[i] = Math.abs(row.get(i));
+                    }
+                }
+                return new MaxAbsScalerPartitionData(maxAbs);
+            }
+        )) {
+            double[] maxAbs = 
dataset.compute(MaxAbsScalerPartitionData::getMaxAbs,
+                (a, b) -> {
+                    if (a == null)
+                        return b;
+
+                    if (b == null)
+                        return a;
+
+                    double[] result = new double[a.length];
+
+                    for (int i = 0; i < result.length; i++) {
+                        result[i] = Math.max(Math.abs(a[i]), Math.abs(b[i]));
+                    }
+                    return result;
+                });
+            return new MaxAbsScalerPreprocessor<>(maxAbs, basePreprocessor);
+        }
+        catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/f5120b9b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/package-info.java
----------------------------------------------------------------------
diff --git 
a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/package-info.java
 
b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/package-info.java
new file mode 100644
index 0000000..343ba30
--- /dev/null
+++ 
b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/maxabsscaling/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Contains Max Abs Scaler preprocessor.
+ */
+package org.apache.ignite.ml.preprocessing.maxabsscaling;

http://git-wip-us.apache.org/repos/asf/ignite/blob/f5120b9b/modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPreprocessorTest.java
----------------------------------------------------------------------
diff --git 
a/modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPreprocessorTest.java
 
b/modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPreprocessorTest.java
new file mode 100644
index 0000000..3c30f3e
--- /dev/null
+++ 
b/modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerPreprocessorTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.preprocessing.maxabsscaling;
+
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.junit.Test;
+
+import static org.junit.Assert.assertArrayEquals;
+
+/**
+ * Tests for {@link MaxAbsScalerPreprocessor}.
+ */
+public class MaxAbsScalerPreprocessorTest {
+    /** Tests {@code apply()} method. */
+    @Test
+    public void testApply() {
+        double[][] data = new double[][] {
+            {2., 4., 1.},
+            {1., 8., 22.},
+            {-4., 10., 100.},
+            {0., 22., 300.}
+        };
+        double[] maxAbs = new double[] {4, 22, 300};
+        MaxAbsScalerPreprocessor<Integer, Vector> preprocessor = new 
MaxAbsScalerPreprocessor<>(
+            maxAbs,
+            (k, v) -> v
+        );
+
+        double[][] expectedData = new double[][] {
+            {.5, 4. / 22, 1. / 300},
+            {.25, 8. / 22, 22. / 300},
+            {-1., 10. / 22, 100. / 300},
+            {0., 22. / 22, 300. / 300}
+        };
+
+        for (int i = 0; i < data.length; i++)
+            assertArrayEquals(expectedData[i], preprocessor.apply(i, 
VectorUtils.of(data[i])).asArray(), 1e-8);
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/f5120b9b/modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerTrainerTest.java
----------------------------------------------------------------------
diff --git 
a/modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerTrainerTest.java
 
b/modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerTrainerTest.java
new file mode 100644
index 0000000..5711660
--- /dev/null
+++ 
b/modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/maxabsscaling/MaxAbsScalerTrainerTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.preprocessing.maxabsscaling;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.ignite.ml.dataset.DatasetBuilder;
+import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import static org.junit.Assert.assertArrayEquals;
+
+/**
+ * Tests for {@link MaxAbsScalerTrainer}.
+ */
+@RunWith(Parameterized.class)
+public class MaxAbsScalerTrainerTest {
+    /** Parameters. */
+    @Parameterized.Parameters(name = "Data divided on {0} partitions")
+    public static Iterable<Integer[]> data() {
+        return Arrays.asList(
+            new Integer[] {1},
+            new Integer[] {2},
+            new Integer[] {3},
+            new Integer[] {5},
+            new Integer[] {7},
+            new Integer[] {100},
+            new Integer[] {1000}
+        );
+    }
+
+    /** Number of partitions. */
+    @Parameterized.Parameter
+    public int parts;
+
+    /** Tests {@code fit()} method. */
+    @Test
+    public void testFit() {
+        Map<Integer, Vector> data = new HashMap<>();
+        data.put(1, VectorUtils.of(2, -4, 1));
+        data.put(2, VectorUtils.of(1, -8, 22));
+        data.put(3, VectorUtils.of(-4, 10, 100));
+        data.put(4, VectorUtils.of(0, 22, 300));
+
+        DatasetBuilder<Integer, Vector> datasetBuilder = new 
LocalDatasetBuilder<>(data, parts);
+
+        MaxAbsScalerTrainer<Integer, Vector> standardizationTrainer = new 
MaxAbsScalerTrainer<>();
+
+        MaxAbsScalerPreprocessor<Integer, Vector> preprocessor = 
standardizationTrainer.fit(
+            datasetBuilder,
+            (k, v) -> v
+        );
+
+        assertArrayEquals(new double[] {4, 22, 300}, preprocessor.getMaxAbs(), 
1e-8);
+    }
+}

Reply via email to