Repository: ignite Updated Branches: refs/heads/master d0bddfa52 -> b04b58005
http://git-wip-us.apache.org/repos/asf/ignite/blob/b04b5800/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansDistributedClustererTest.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansDistributedClustererTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansDistributedClustererTest.java new file mode 100644 index 0000000..7fb3534 --- /dev/null +++ b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansDistributedClustererTest.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.ml.clustering; + +import java.io.IOException; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.apache.ignite.Ignite; +import org.apache.ignite.internal.util.IgniteUtils; +import org.apache.ignite.ml.math.*; +import org.apache.ignite.ml.math.Vector; +import org.apache.ignite.ml.math.functions.Functions; +import org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix; +import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector; +import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.ignite.ml.clustering.KMeansUtil.checkIsInEpsilonNeighbourhood; + +/** */ +public class KMeansDistributedClustererTest extends GridCommonAbstractTest { + /** Number of nodes in grid. We should use 1 in this test because otherwise algorithm will be unstable + * (We cannot guarantee the order in which results are returned from each node). */ + private static final int NODE_COUNT = 1; + + /** Grid instance. */ + private Ignite ignite; + + /** + * Default constructor. + */ + public KMeansDistributedClustererTest() { + super(false); + } + + /** + * {@inheritDoc} + */ + @Override protected void beforeTest() throws Exception { + ignite = grid(NODE_COUNT); + } + + /** {@inheritDoc} */ + @Override protected void beforeTestsStarted() throws Exception { + for (int i = 1; i <= NODE_COUNT; i++) + startGrid(i); + } + + /** {@inheritDoc} */ + @Override protected void afterTestsStopped() throws Exception { + stopAllGrids(); + } + + /** */ + @Test + public void testPerformClusterAnalysisDegenerate() { + IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName()); + + KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(new EuclideanDistance(), 1, 1, 1L); + + double[] v1 = new double[] {1959, 325100}; + double[] v2 = new double[] {1960, 373200}; + + SparseDistributedMatrix points = new SparseDistributedMatrix(2, 2, StorageConstants.ROW_STORAGE_MODE, + StorageConstants.RANDOM_ACCESS_MODE); + + points.setRow(0, v1); + points.setRow(1, v2); + + KMeansModel mdl = clusterer.cluster(points, 1); + + Assert.assertEquals(1, mdl.centers().length); + Assert.assertEquals(2, mdl.centers()[0].size()); + } + + /** */ + @Test + public void testClusterizationOnDatasetWithObviousStructure() throws IOException { + IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName()); + + int ptsCnt = 10000; + int squareSideLen = 10000; + + Random rnd = new Random(123456L); + + // Let centers be in the vertices of square. + Map<Integer, Vector> centers = new HashMap<>(); + centers.put(100, new DenseLocalOnHeapVector(new double[] {0.0, 0.0})); + centers.put(900, new DenseLocalOnHeapVector(new double[] {squareSideLen, 0.0})); + centers.put(3000, new DenseLocalOnHeapVector(new double[] {0.0, squareSideLen})); + centers.put(6000, new DenseLocalOnHeapVector(new double[] {squareSideLen, squareSideLen})); + + int centersCnt = centers.size(); + + SparseDistributedMatrix points = new SparseDistributedMatrix(ptsCnt, 2, StorageConstants.ROW_STORAGE_MODE, + StorageConstants.RANDOM_ACCESS_MODE); + + List<Integer> permutation = IntStream.range(0, ptsCnt).boxed().collect(Collectors.toList()); + Collections.shuffle(permutation, rnd); + + Vector[] mc = new Vector[centersCnt]; + Arrays.fill(mc, VectorUtils.zeroes(2)); + + int centIndex = 0; + int totalCount = 0; + + List<Vector> massCenters = new ArrayList<>(); + + for (Integer count : centers.keySet()) { + for (int i = 0; i < count; i++) { + DenseLocalOnHeapVector pnt = (DenseLocalOnHeapVector)new DenseLocalOnHeapVector(2).assign(centers.get(count)); + // pertrubate point on random value. + pnt.map(val -> val + rnd.nextDouble() * squareSideLen / 100); + mc[centIndex] = mc[centIndex].plus(pnt); + points.assignRow(permutation.get(totalCount), pnt); + totalCount++; + } + massCenters.add(mc[centIndex].times(1 / (double)count)); + centIndex++; + } + + EuclideanDistance dist = new EuclideanDistance(); + OrderedNodesComparator comp = new OrderedNodesComparator(centers.values().toArray(new Vector[] {}), dist); + + massCenters.sort(comp); + KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(dist, 3, 100, 1L); + + KMeansModel mdl = clusterer.cluster(points, 4); + Vector[] resCenters = mdl.centers(); + Arrays.sort(resCenters, comp); + + checkIsInEpsilonNeighbourhood(resCenters, massCenters.toArray(new Vector[]{}), 30.0); + } + + /** */ + private static class OrderedNodesComparator implements Comparator<Vector> { + /** */ + private final DistanceMeasure measure; + + /** */ + List<Vector> orderedNodes; + + public OrderedNodesComparator(Vector[] orderedNodes, DistanceMeasure measure) { + this.orderedNodes = Arrays.asList(orderedNodes); + this.measure = measure; + } + + /** */ + private int findClosestNodeIndex(Vector v) { + return Functions.argmin(orderedNodes, v1 -> measure.compute(v1, v)).get1(); + } + + /** */ + @Override public int compare(Vector v1, Vector v2) { + int ind1 = findClosestNodeIndex(v1); + int ind2 = findClosestNodeIndex(v2); + + int signum = (int)Math.signum(ind1 - ind2); + + if (signum != 0) + return signum; + + return (int)Math.signum(orderedNodes.get(ind1).minus(v1).kNorm(2) - + orderedNodes.get(ind2).minus(v2).kNorm(2)); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ignite/blob/b04b5800/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansLocalClustererTest.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansLocalClustererTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansLocalClustererTest.java new file mode 100644 index 0000000..b396f5b --- /dev/null +++ b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansLocalClustererTest.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.ml.clustering; + +import org.apache.ignite.ml.math.EuclideanDistance; +import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix; +import org.junit.Assert; +import org.junit.Test; + +/** */ +public class KMeansLocalClustererTest { + /** + * Two points, one cluster, one iteration + */ + @Test + public void testPerformClusterAnalysisDegenerate() { + KMeansLocalClusterer clusterer = new KMeansLocalClusterer(new EuclideanDistance(), 1, 1L); + + double[] v1 = new double[] {1959, 325100}; + double[] v2 = new double[] {1960, 373200}; + + DenseLocalOnHeapMatrix points = new DenseLocalOnHeapMatrix(new double[][] { + v1, + v2}); + + KMeansModel mdl = clusterer.cluster(points, 1); + + Assert.assertEquals(1, mdl.centers().length); + Assert.assertEquals(2, mdl.centers()[0].size()); + } +} http://git-wip-us.apache.org/repos/asf/ignite/blob/b04b5800/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansUtil.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansUtil.java b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansUtil.java new file mode 100644 index 0000000..0a39748 --- /dev/null +++ b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansUtil.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.ml.clustering; + +import org.apache.ignite.ml.math.Vector; + +import static org.junit.Assert.assertTrue; + +/** Base test for k-means algorithms. */ +public class KMeansUtil { + /** */ + public static void checkIsInEpsilonNeighbourhood(Vector[] v1s, Vector[] v2s, double epsilon) { + for (int i = 0; i < v1s.length; i++) { + assertTrue("Not in epsilon neighbourhood (index " + i + ") ", + v1s[i].minus(v2s[i]).kNorm(2) < epsilon); + } + } +} http://git-wip-us.apache.org/repos/asf/ignite/blob/b04b5800/modules/ml/src/test/java/org/apache/ignite/ml/math/MathImplLocalTestSuite.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/math/MathImplLocalTestSuite.java b/modules/ml/src/test/java/org/apache/ignite/ml/math/MathImplLocalTestSuite.java index 216fd7b..d164be9 100644 --- a/modules/ml/src/test/java/org/apache/ignite/ml/math/MathImplLocalTestSuite.java +++ b/modules/ml/src/test/java/org/apache/ignite/ml/math/MathImplLocalTestSuite.java @@ -59,6 +59,7 @@ import org.apache.ignite.ml.math.impls.vector.VectorIterableTest; import org.apache.ignite.ml.math.impls.vector.VectorNormTest; import org.apache.ignite.ml.math.impls.vector.VectorToMatrixTest; import org.apache.ignite.ml.math.impls.vector.VectorViewTest; +import org.apache.ignite.ml.regressions.OLSMultipleLinearRegressionTest; import org.junit.runner.RunWith; import org.junit.runners.Suite;
