This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git


The following commit(s) were added to refs/heads/master by this push:
     new 50fac03  [SYSTEMDS-393] Builtin function for connected components, 
tests
50fac03 is described below

commit 50fac03eed584c2019c95a4702367ddc442e7bf2
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat May 23 22:09:04 2020 +0200

    [SYSTEMDS-393] Builtin function for connected components, tests
    
    This patch adds a new built-in function for finding the connected
    components in a undirected graph, represented as a symmetric 0/1 matrix.
    
    On a scenario of finding the connected components of the DBLP co-author
    graph (for selected DB venues and >=2011 -> 35632x35632, 310582
    non-zeros), the algorithm terminated in 12 iterations, found 2443
    connected components (w/ 837 single-author components), and took only
    4.1s including I/O, transform recoding, and graph construction.
---
 docs/Tasks.txt                                     |  1 +
 scripts/builtin/components.dml                     | 53 ++++++++++++
 .../java/org/apache/sysds/common/Builtins.java     |  1 +
 .../functions/builtin/BuiltinComponentsTest.java   | 94 ++++++++++++++++++++++
 .../functions/builtin/ConnectedComponents.dml      | 28 +++++++
 5 files changed, 177 insertions(+)

diff --git a/docs/Tasks.txt b/docs/Tasks.txt
index 66d0901..6539ff8 100644
--- a/docs/Tasks.txt
+++ b/docs/Tasks.txt
@@ -301,6 +301,7 @@ SYSTEMDS-380 Memory Footprint
 SYSTEMDS-390 New Builtin Functions IV
  * 391 New GLM builtin function (from algorithms)                     OK
  * 392 Builtin function for missing value imputation via FDs          OK
+ * 393 Builtin to find Connected Components of a graph                OK
 
 Others:
  * Break append instruction to cbind and rbind 
diff --git a/scripts/builtin/components.dml b/scripts/builtin/components.dml
new file mode 100644
index 0000000..f760a49
--- /dev/null
+++ b/scripts/builtin/components.dml
@@ -0,0 +1,53 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Computes the connected components of a graph and returns a
+# vector indicating the assignment of vertices to components,
+# where each component is identified by the maximum vertex ID
+# (i.e., row/column position of the input graph) 
+
+m_components = function(Matrix[Double] G, Integer maxi = 0, Boolean verbose = 
TRUE) 
+  return (Matrix[Double] C) 
+{
+  # ensure there are no self-edges in the graph
+  if( trace(G) != 0 ) {
+    G = G - diag(diag(G));
+    if(verbose)
+      print("Connected Components: warning - removed self-edges from input 
graph");
+  }
+
+  # initialize state with vertex ids
+  c = seq(1,nrow(G));
+  diff = Inf;
+  iter = 1;
+
+  # iterative computation of connected components
+  while( diff > 0 & (maxi==0 | maxi<=iter) ) {
+    u = max(rowMaxs(G * t(c)), c);
+    diff = sum(u != c)
+    c = u; # update assignment
+    if( verbose )
+      print("Connected components: iter = "+iter+", #diff = "+diff);
+    iter = iter + 1;
+  }
+
+  C = c;
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java 
b/src/main/java/org/apache/sysds/common/Builtins.java
index b738d40..6c53692 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -68,6 +68,7 @@ public enum Builtins {
        COLSD("colSds", false),
        COLSUM("colSums", false),
        COLVAR("colVars", false),
+       COMPONENTS("components", true),
        CONV2D("conv2d", false),
        CONV2D_BACKWARD_FILTER("conv2d_backward_filter", false),
        CONV2D_BACKWARD_DATA("conv2d_backward_data", false),
diff --git 
a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinComponentsTest.java
 
b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinComponentsTest.java
new file mode 100644
index 0000000..ca54528
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinComponentsTest.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin;
+
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.sysds.common.Types;
+import org.apache.sysds.lops.LopProperties;
+import org.apache.sysds.lops.LopProperties.ExecType;
+import org.apache.sysds.runtime.matrix.data.MatrixValue;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.apache.sysds.test.TestUtils;
+
+public class BuiltinComponentsTest extends AutomatedTestBase {
+       private final static String TEST_NAME = "ConnectedComponents";
+       private final static String TEST_DIR = "functions/builtin/";
+       private static final String TEST_CLASS_DIR = TEST_DIR + 
BuiltinComponentsTest.class.getSimpleName() + "/";
+       
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_NAME,new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME,new String[]{"r"}));
+       }
+
+       @Test
+       public void testConnectedComponents11CP() {
+               runConnectedComponentsTest(11, LopProperties.ExecType.CP);
+       }
+       
+       @Test
+       public void testConnectedComponents201CP() {
+               runConnectedComponentsTest(201, LopProperties.ExecType.CP);
+       }
+       
+       @Test
+       public void testConnectedComponents2001CP() {
+               runConnectedComponentsTest(2001, LopProperties.ExecType.CP);
+       }
+
+       private void runConnectedComponentsTest(int numVertices, ExecType 
instType)
+       {
+               Types.ExecMode platformOld = setExecMode(instType);
+
+               try
+               {
+                       loadTestConfiguration(getTestConfiguration(TEST_NAME));
+
+                       String HOME = SCRIPT_DIR + TEST_DIR;
+                       fullDMLScriptName = HOME + TEST_NAME + ".dml";
+                       programArgs = new String[]{ "-explain", "-stats", 
"-args", input("X"), output("R")};
+
+                       //generate actual dataset (3 components)
+                       double[][] X = new double[numVertices-3][2];
+                       for(int i=0; i<numVertices-3; i++) {
+                               X[i][0] = i<(numVertices/2-1) ? i+1 : i+3;
+                               X[i][1] = i<(numVertices/2-1) ? i+2 : i+4;
+                       }
+                       writeInputMatrixWithMTD("X", X, true);
+
+                       runTest(true, false, null, -1);
+
+                       HashMap<MatrixValue.CellIndex, Double> dmlfile = 
readDMLMatrixFromHDFS("R");
+                       for( int i=0; i<numVertices; i++ ) {
+                               int expected = i<(numVertices/2) ? 
(numVertices/2) :
+                                       i==(numVertices/2) ? i+1 : numVertices;
+                               Assert.assertEquals(new Double(expected), 
dmlfile.get(new MatrixValue.CellIndex(i+1,1)));
+                       }
+               }
+               finally {
+                       rtplatform = platformOld;
+               }
+       }
+}
diff --git a/src/test/scripts/functions/builtin/ConnectedComponents.dml 
b/src/test/scripts/functions/builtin/ConnectedComponents.dml
new file mode 100644
index 0000000..0c6fbe7
--- /dev/null
+++ b/src/test/scripts/functions/builtin/ConnectedComponents.dml
@@ -0,0 +1,28 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1)
+n = max(X);
+G = table(X[,1], X[, 2], n, n)
+G = G + t(G); #symmetry
+C = components(G=G, verbose=FALSE)
+
+write(C, $2)

Reply via email to