Repository: incubator-madlib
Updated Branches:
  refs/heads/master 492fc7e41 -> 8c9b955cd


Graph: Add Breadth-first Search algorithm with grouping support.

JIRA: MADLIB-1102

This algorithm searches or traverses connected nodes in a graph in 
breadth-first order
starting at a user-specified origin node. Grouping support is also included.


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/8c9b955c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/8c9b955c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/8c9b955c

Branch: refs/heads/master
Commit: 8c9b955cd2e3150ad935ad1581e164670723184f
Parents: 492fc7e
Author: Rashmi Raghu <rra...@pivotal.io>
Authored: Wed Jul 12 15:15:21 2017 -0700
Committer: Rashmi Raghu <rra...@pivotal.io>
Committed: Wed Jul 12 15:24:16 2017 -0700

----------------------------------------------------------------------
 doc/mainpage.dox.in                             |   3 +
 src/ports/postgres/modules/graph/bfs.py_in      | 477 +++++++++++++++++++
 src/ports/postgres/modules/graph/bfs.sql_in     | 471 ++++++++++++++++++
 .../postgres/modules/graph/graph_utils.py_in    |  11 +
 .../postgres/modules/graph/test/bfs.sql_in      | 278 +++++++++++
 5 files changed, 1240 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8c9b955c/doc/mainpage.dox.in
----------------------------------------------------------------------
diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index e16f9b2..52afd48 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in
@@ -129,6 +129,9 @@ complete matrix stored as a distributed table.
     @defgroup grp_apsp All Pairs Shortest Path
     @ingroup grp_graph
 
+    @defgroup grp_bfs Breadth-first Search
+    @ingroup grp_graph
+    
     @defgroup grp_pagerank PageRank
     @ingroup grp_graph
 

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8c9b955c/src/ports/postgres/modules/graph/bfs.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/bfs.py_in 
b/src/ports/postgres/modules/graph/bfs.py_in
new file mode 100644
index 0000000..a5631ea
--- /dev/null
+++ b/src/ports/postgres/modules/graph/bfs.py_in
@@ -0,0 +1,477 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Breadth-First Search
+
+# Please refer to the bfs.sql_in file for the documentation
+
+"""
+@file bfs.py_in
+
+@namespace graph
+"""
+
+import plpy
+from graph_utils import validate_graph_coding
+from graph_utils import get_graph_usage
+from graph_utils import _grp_null_checks
+from utilities.control import MinWarning
+from utilities.utilities import _assert
+from utilities.utilities import extract_keyvalue_params
+from utilities.utilities import split_quoted_delimited_str
+from utilities.validate_args import table_exists
+from utilities.validate_args import columns_exist_in_table
+
+m4_changequote(`<!', `!>')
+
+def _validate_bfs(vertex_table, vertex_id, edge_table, edge_params,
+    source_vertex, out_table, max_distance, directed, grouping_cols_list, 
**kwargs):
+
+    validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
+        out_table,'BFS')
+
+    _assert((max_distance >= 0) and isinstance(max_distance,int),
+        """Graph BFS: Invalid max_distance type or value ({0}), must be 
integer, 
+        be greater than or equal to 0 and be less than max allowable integer 
+        (2147483647).""".
+        format(max_distance))
+
+    _assert(isinstance(directed,bool),
+        """Graph BFS: Invalid value for directed ({0}), must be boolean.""".
+        format(directed))
+
+    _assert(isinstance(source_vertex,int),
+        """Graph BFS: Source vertex {source_vertex} has to be an integer.""".
+        format(**locals()))
+    src_exists = plpy.execute("""
+        SELECT * FROM {vertex_table} WHERE {vertex_id}={source_vertex}
+        """.format(**locals()))
+    if src_exists.nrows() == 0:
+        plpy.error(
+            """Graph BFS: Source vertex {source_vertex} is not present in the 
+            vertex table {vertex_table}.""".
+            format(**locals()))
+
+    vt_error = plpy.execute(
+        """ SELECT {vertex_id}
+            FROM {vertex_table}
+            WHERE {vertex_id} IS NOT NULL
+            GROUP BY {vertex_id}
+            HAVING count(*) > 1 """.format(**locals()))
+    if vt_error.nrows() != 0:
+        plpy.error(
+            """Graph BFS: Source vertex table {vertex_table} contains 
duplicate 
+            vertex id's.""".
+            format(**locals()))
+
+    _assert(not table_exists(out_table+"_summary"),
+        "Graph BFS: Output summary table already exists!")
+
+    if grouping_cols_list is not None:
+        _assert(columns_exist_in_table(edge_table, grouping_cols_list),
+            """Graph BFS: Not all columns from {grouping_cols_list} are 
present 
+            in edge table ({edge_table}).""".
+            format(**locals()))
+
+    return None
+
+
+def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table,
+        edge_args, source_vertex, out_table, max_distance, directed, 
grouping_cols,
+        **kwargs):
+
+    """
+    Breadth First Search algorithm for graphs [1].
+    Args:
+        @param vertex_table    Name of the table that contains the vertex data.
+        @param vertex_id       Name of the column containing the vertex ids.
+        @param edge_table      Name of the table that contains the edge data.
+        @param edge_args       A comma-delimited string containing multiple
+                               named arguments of the form "name=value".
+        @param source_vertex   The source vertex id for the algorithm to start.
+        @param out_table       Name of the table to store the result of BFS.
+        @param max_distance    Maximum distance from the source_vertex to 
search for.
+        @param directed        Graph will be treated as directed if this 
boolean flag 
+                               is set to TRUE. Graph is treated as undirected 
by default.
+        @param grouping_cols   The list of grouping columns.
+
+    [1] https://en.wikipedia.org/wiki/Breadth-first_search
+    """
+
+    with MinWarning("warning"):
+
+        INT_MAX = 2147483647
+
+        params_types = {'src': str, 'dest': str}
+        default_args = {'src': 'src', 'dest': 'dest'}
+        edge_params = extract_keyvalue_params(edge_args,
+                                            params_types,
+                                            default_args)
+
+        # Prepare the input for recording in the summary table
+        if vertex_id is None:
+            v_st= "NULL"
+            vertex_id = "id"
+        else:
+            v_st = vertex_id
+        if edge_args is None:
+            e_st = "NULL"
+        else:
+            e_st = edge_args
+        if max_distance is None:
+            d_st= "NULL"
+            max_distance = INT_MAX
+        else:
+            d_st = max_distance
+        if directed is None:
+            dir_st= "NULL"
+            directed = False
+        else:
+            dir_st = directed
+        if grouping_cols is None or grouping_cols is '':
+            g_st = "NULL"
+            glist = None
+        else:
+            g_st = grouping_cols
+            glist = split_quoted_delimited_str(grouping_cols)
+
+        src = edge_params["src"]
+        dest = edge_params["dest"]
+        
+        distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>,
+            <!"DISTRIBUTED BY ({0})".format(vertex_id)!>)
+        local_distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>,
+            <!"DISTRIBUTED RANDOMLY"!>)
+
+        _validate_bfs(vertex_table, vertex_id, edge_table,
+            edge_params, source_vertex, out_table, max_distance, directed, 
glist)
+
+        # Initialize grouping related variables
+        grp_comma = ""
+        and_grp_null_checks = ""
+
+        if grouping_cols is not None and grouping_cols is not '':
+            grp_comma = grouping_cols + ", "
+            and_grp_null_checks = " AND " + _grp_null_checks(glist)
+            
+        # We keep a table of every vertex, the distance to that vertex from 
source
+        # and the parent in the path to the vertex.
+        # This table will be updated throughout the execution.
+        dist_col = "dist"
+        parent_col = "parent"
+        curr_dist_val = 0
+
+        # Creating the output table with the appropriate columns and data types
+        plpy.execute("""
+            CREATE TABLE {out_table} AS ( 
+                SELECT
+                    {grp_comma} 
+                    {src} AS {vertex_id}, 
+                    {curr_dist_val}::INT AS {dist_col},
+                    {src} AS {parent_col}
+                FROM {edge_table} 
+                LIMIT 0
+            ) {distribution}""".format(**locals()))
+
+        # We keep a summary table to keep track of the parameters used for this
+        # BFS run
+        plpy.execute( """ 
+            CREATE TABLE {out_table}_summary  (
+                vertex_table            TEXT,
+                vertex_id               TEXT,
+                edge_table              TEXT,
+                edge_args               TEXT,
+                source_vertex           INTEGER,
+                out_table               TEXT,
+                max_distance            INTEGER,
+                directed                BOOLEAN,
+                grouping_cols           TEXT
+            )""".format(**locals()))
+
+        plpy.execute(""" 
+            INSERT INTO {out_table}_summary VALUES
+                ('{vertex_table}', '{v_st}', '{edge_table}', '{e_st}',
+                {source_vertex}, '{out_table}', {d_st}, {dir_st}, '{g_st}')
+            """.format(**locals()))
+
+        
+        # The queries for directed and undirected graphs share a common 
section.
+        # There are additional clauses added to the undirected graph queries.
+        # In the undirected case edges can be considered to go from {src} to
+        # {dest} and {dest} to {src}
+
+        insert_qry_undirected_init = ""
+        count_qry_undirected = ""
+        insert_qry_undirected_loop = ""
+
+        if not directed:
+            insert_qry_undirected_init = """ OR {dest} = {source_vertex}
+                """.format(**locals())
+
+            count_qry_undirected = """ OR (
+                    ({grp_comma} {dest}) IN ( 
+                        SELECT {grp_comma} {vertex_id} FROM {out_table} 
+                        WHERE {dist_col}={{curr_dist_val}}
+                    ) 
+                    AND 
+                    ({grp_comma} {src}) NOT IN (
+                        SELECT {grp_comma} {vertex_id} FROM {out_table}
+                    )
+                )
+            """.format(**locals())
+
+            insert_qry_undirected_loop = """ UNION
+                SELECT {grp_comma} 
+                    {src} AS {vertex_id}, 
+                    {{curr_dist_val}}+1 AS {dist_col}, 
+                    {dest} AS {parent_col}
+                FROM {edge_table} 
+                WHERE (
+                    ({grp_comma} {dest}) IN (
+                        SELECT {grp_comma} {vertex_id} FROM {out_table} 
+                        WHERE {dist_col}={{curr_dist_val}}
+                    ) 
+                    AND 
+                    ({grp_comma} {src}) NOT IN (
+                        SELECT {grp_comma} {vertex_id} FROM {out_table}
+                    )
+                )
+            """.format(**locals())
+
+        # This step inserts into the output table the source vertex for each 
+        # group in which it is present. Grouping behavior is not predictable
+        # when there are NULLs in any grouping column. Therefore those rows
+        # are explicitly removed from analysis
+        insert_qry_init = """
+            INSERT INTO {out_table}
+            SELECT {grp_comma} 
+                {source_vertex} AS {vertex_id},
+                {curr_dist_val} AS {dist_col},
+                NULL AS {parent_col}
+            FROM {edge_table}
+            WHERE ({src} = {source_vertex} {insert_qry_undirected_init})
+                {and_grp_null_checks}
+            GROUP BY {grp_comma} {vertex_id}, {dist_col}
+        """.format(**locals())
+        plpy.execute(insert_qry_init.format(**locals()))
+
+        # After initialization of the output table, number of nodes connected
+        # by edges to the source vertex in each group is counted. This is also 
used 
+        # below in the BFS iteration while-loop
+        count_qry = """
+            SELECT COUNT(*) 
+            FROM {edge_table} 
+            WHERE (
+                ({grp_comma} {src}) IN ( 
+                    SELECT {grp_comma} {vertex_id} FROM {out_table} 
+                    WHERE {dist_col}={{curr_dist_val}}
+                ) 
+                AND 
+                ({grp_comma} {dest}) NOT IN (
+                    SELECT {grp_comma} {vertex_id} FROM {out_table}
+                )
+            ) {count_qry_undirected}
+        """.format(**locals())
+
+        vct = plpy.execute(count_qry.format(**locals()))[0]['count']
+
+        # This insert statement is executed within the BFS iteration 
while-loop 
+        # below. It is used to discover and store all nodes (not already 
found) 
+        # connected to those found in the immediate previous iteration.
+        insert_qry_loop = """
+            INSERT INTO {out_table}
+            SELECT {grp_comma} {vertex_id}, {dist_col}, min({parent_col})
+            FROM (
+                SELECT {grp_comma} 
+                    {dest} AS {vertex_id}, 
+                    {{curr_dist_val}}+1 AS {dist_col}, 
+                    {src} AS {parent_col}
+                FROM {edge_table} 
+                WHERE (
+                    ({grp_comma} {src}) IN (
+                        SELECT {grp_comma} {vertex_id} FROM {out_table} 
+                        WHERE {dist_col}={{curr_dist_val}}
+                    ) 
+                    AND 
+                    ({grp_comma} {dest}) NOT IN (
+                        SELECT {grp_comma} {vertex_id} FROM {out_table}
+                    )
+                )
+                {insert_qry_undirected_loop}
+            ) t1
+            GROUP BY {grp_comma} {vertex_id}, {dist_col}
+        """.format(**locals())
+
+        # Main loop for traversing the graph
+        while vct > 0 and curr_dist_val < max_distance:
+            # The loop consists of two steps:
+            # 1) Disover and store all nodes that are linked to nodes found in 
+            #    the immediate previous iteration of the loop that have not 
already 
+            #    been found in all previous iterations  
+            # 2) Check for any nodes linked to those discovered in Step 1 above
+            #    that have not yet been discovered
+            # 
+            # If a node has multiple possible parents then the parent with the 
+            # smallest ID is chosen for output
+
+            # In the directed graph case only nodes in the {dest} column of 
+            # the edge table are searched to find new nodes reachable from 
+            # previously discovered nodes
+
+            # In the undirected graph case edges are treated as 
non-directional 
+            # (or bidirectional). Nodes in both the {src} and {dest} columns 
of 
+            # the edge table are searched to find new nodes reachable from 
+            # previously discovered nodes.
+            #
+            # This approach does NOT require the user to provide a forward edge
+            # and a reverse edge between the same two nodes to indicate the 
+            # graph's undirected nature. However, it will work in that scenario
+            # as well.
+
+            # Discover and store all nodes (not already found) connected to 
+            # those found in the immediate previous iteration
+            plpy.execute(insert_qry_loop.format(**locals()))
+
+            # Update distance value for next iteration
+            curr_dist_val = curr_dist_val + 1
+
+            # Count / find any nodes that are connected to those discovered 
and 
+            # stored in this iteration. This is used to check if the iterations
+            # need to continue.
+            vct = plpy.execute(count_qry.format(**locals()))[0]['count']
+    
+    return None
+
+def graph_bfs_help(schema_madlib, message, **kwargs):
+    """
+    Help function for graph_bfs
+
+    Args:
+        @param schema_madlib
+        @param message: string, Help message string
+        @param kwargs
+
+    Returns:
+        String. Help/usage information
+    """
+    if not message:
+        help_string = """
+-----------------------------------------------------------------------
+                            SUMMARY
+-----------------------------------------------------------------------
+
+Given a graph and a source vertex, the Breadth-first Search (BFS) algorithm
+finds all nodes reachable from the source vertex.
+
+For more details on function usage:
+    SELECT {schema_madlib}.graph_bfs('usage')
+            """
+    elif message.lower() in ['usage', 'help', '?']:
+        help_string = """
+Given a graph and a source vertex, the Breadth-first Search (BFS) algorithm
+finds all nodes reachable from the source vertex.
+
+{graph_usage}
+
+----------------------------------------------------------------------------
+                            OUTPUT
+----------------------------------------------------------------------------
+The output of BFS ('out_table' above) contains a row for every vertex of that 
is 
+reachable from the source_vertex. In the presence of grouping columns, only 
those 
+edges are used for which there are no NULL values in any grouping column.
+The output table will have the following columns (in addition to the 
+grouping columns):
+  - vertex_id : The id for any node reachable from source_vertex in addition 
to 
+                the source_vertex. Will use the input parameter 'vertex_id' 
for 
+                column naming.
+  - dist      : The distance in number of edges (or hops) from the 
source_vertex 
+                to where this vertex is located. 
+  - parent    : The parent of this vertex in BFS traversal of the graph from 
+                source_vertex. Will use 'parent' for column naming. For the 
+                case where vertex_id = source_vertex, the value for parent is 
NULL.
+"""
+    elif message.lower() in ("example", "examples"):
+        help_string = """
+----------------------------------------------------------------------------
+                                EXAMPLES
+----------------------------------------------------------------------------
+-- Create a graph, represented as vertex and edge tables.
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER
+        );
+CREATE TABLE edge(
+        src INTEGER,
+        dest INTEGER
+        );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7),
+(8),
+(9),
+(10),
+(11)
+;
+INSERT INTO edge VALUES
+(0, 5),
+(1, 0),
+(1, 3),
+(2, 6),
+(3, 4),
+(3, 5),
+(4, 2),
+(8, 9),
+(9, 10),
+(9, 11),
+(10, 8)
+;
+
+-- Traverse undirected graph from vertex 3:
+DROP TABLE IF EXISTS out, out_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertix id column (NULL means use 
default naming)
+                         'edge',        -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         3,             -- Source vertex for BFS
+                         'out'          -- Output table of nodes reachable 
from source_vertex
+                        );        
+                        -- Default values used for the other arguments
+SELECT * FROM out ORDER BY dist,id;
+
+SELECT * FROM out_summary;
+
+"""
+    else:
+        help_string = "No such option. Use {schema_madlib}.graph_bfs()"
+
+    return help_string.format(schema_madlib=schema_madlib,
+        graph_usage=get_graph_usage(schema_madlib, 'graph_bfs',
+    """source_vertex INT,  -- The source vertex id for the algorithm to start.
+    out_table     TEXT, -- Name of the table to store the result of BFS.
+    max_distance  INT,  -- Maximum distance from source_vertex to search 
through in the graph.
+    directed      INT,  -- If TRUE the graph will be treated as directed.
+    grouping_cols TEXT  -- A comma-separated list of grouping columns."""))
+# ---------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8c9b955c/src/ports/postgres/modules/graph/bfs.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/bfs.sql_in 
b/src/ports/postgres/modules/graph/bfs.sql_in
new file mode 100644
index 0000000..fd7a396
--- /dev/null
+++ b/src/ports/postgres/modules/graph/bfs.sql_in
@@ -0,0 +1,471 @@
+/* ----------------------------------------------------------------------- 
*//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *
+ * @file bfs.sql_in
+ *
+ * @brief SQL functions for graph analytics
+ * @date Jun 2017
+ *
+ * @sa Provides Breadth First Search graph algorithm.
+ *
+ *//* ----------------------------------------------------------------------- 
*/
+m4_include(`SQLCommon.m4')
+/**
+@addtogroup grp_bfs
+
+<div class="toc"><b>Contents</b>
+<ul>
+<li><a href="#bfs">Breadth-first Search</a></li>
+<li><a href="#notes">Notes</a></li>
+<li><a href="#examples">Examples</a></li>
+<li><a href="#literature">Literature</a></li>
+</ul>
+</div>
+
+@brief Finds the nodes reachable from a given source vertex using a 
breadth-first approach.
+
+Given a graph and a source vertex, the Breadth-first Search (BFS) algorithm
+finds all nodes reachable from the source vertex by searching / traversing the 
graph 
+in a breadth-first manner.
+
+@anchor bfs
+@par BFS
+<pre class="syntax">
+graph_bfs( vertex_table,
+           vertex_id,
+           edge_table,
+           edge_args,
+           source_vertex,
+           out_table,
+           max_distance,
+           directed,
+           grouping_cols
+          )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+<dt>vertex_table</dt>
+<dd>TEXT. Name of the table containing the vertex data for the graph. Must 
contain the
+column specified in the 'vertex_id' parameter below.</dd>
+
+<dt>vertex_id</dt>
+<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
+vertex ids.  The vertex ids are of type INTEGER with no duplicates.
+They do not need to be contiguous.</dd>
+
+<dt>edge_table</dt>
+<dd>TEXT. Name of the table containing the edge data. The edge table must 
contain 
+columns for source vertex and destination vertex. Column naming convention is 
+described below in the 'edge_args' parameter. 
+In addition to vertex columns, if grouping is used then the columns specified
+in the 'grouping_cols' parameter must be present. </dd>
+
+<dt>edge_args</dt>
+<dd>TEXT. A comma-delimited string containing multiple named arguments of
+the form "name=value". The following parameters are supported for
+this string argument:
+  - src (INTEGER): Name of the column containing the source vertex ids in the 
edge table. 
+  Default column name is 'src'.
+  (Not to be confused with the source_vertex argument passed to the BFS 
function)
+  - dest (INTEGER): Name of the column containing the destination vertex ids 
in 
+  the edge table. Default column name is 'dest'.
+  
+<dt>source_vertex</dt>
+<dd>INTEGER. The source vertex id for the algorithm to start. This vertex id 
must
+exist in the 'vertex_id' column of 'vertex_table'.</dd>
+
+<dt>out_table</dt>
+<dd>TEXT. Name of the table to store the result of BFS.
+It contains a row for every vertex that is reachable from the source_vertex.
+In the presence of grouping columns, only those edges are used for which there 
are no NULL values 
+in any grouping column.
+The output table will have the following columns (in addition to the grouping 
columns):
+  - vertex_id : The id for any node reachable from source_vertex in addition 
to 
+                the source_vertex. Will use the input parameter 'vertex_id' 
for 
+                column naming.
+  - dist      : The distance in number of edges (or hops) from the 
source_vertex 
+                to where this vertex is located. 
+  - parent    : The parent of this vertex in BFS traversal of the graph from 
source_vertex. 
+                Will use 'parent' for column naming. For the 
+                case where vertex_id = source_vertex, the value for parent is 
NULL.
+
+A summary table named <out_table>_summary is also created. This is an internal 
table that keeps a record of the input parameters.
+</dd>
+
+<dt>max_distance</dt>
+<dd>INT, default = NULL. Maximum distance (number of edges) from source_vertex 
to search through in the graph.</dd>
+
+<dt>directed</dt>
+<dd>BOOLEAN, default = FALSE. If TRUE the graph will be treated as directed, 
else it will be treated as an undirected graph.</dd>
+
+<dt>grouping_cols</dt>
+<dd>TEXT, default = NULL. A comma-separated list of columns used to group the 
+input into discrete subgraphs. 
+These columns must exist in the edge table. When this value is NULL, no 
grouping is used 
+and a single BFS result is generated. 
+@note Expressions are not currently supported for 'grouping_cols'.</dd>
+
+</dl>
+
+@anchor notes
+@par Notes
+
+The graph_bfs function is a SQL implementation of the well-known Breadth-first 
+Search algorithm [1] modified appropriately for a relational database. It will 
+find any node in the graph reachable from the source_vertex only once. If a 
node 
+is reachable by many different paths from the source_vertex (i.e. has more 
than 
+one parent), then only one of those parents is present in the output table. 
+The BFS result will, in general, be different for different choices of 
source_vertex.
+
+@anchor examples
+@examp
+
+-# Create vertex and edge tables to represent the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER
+        );
+CREATE TABLE edge(
+        src INTEGER,
+        dest INTEGER
+        );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7),
+(8),
+(9),
+(10),
+(11)
+;
+INSERT INTO edge VALUES
+(0, 5),
+(1, 0),
+(1, 3),
+(2, 6),
+(3, 4),
+(3, 5),
+(4, 2),
+(8, 9),
+(9, 10),
+(9, 11),
+(10, 8)
+;
+</pre>
+
+-# Traverse undirected graph from vertex 3:
+<pre class="syntax">
+DROP TABLE IF EXISTS out, out_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertix id column (NULL means use 
default naming)
+                         'edge',        -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         3,             -- Source vertex for BFS
+                         'out');        -- Output table of nodes reachable 
from source_vertex
+                         -- Default values used for the other arguments
+SELECT * FROM out ORDER BY dist,id;
+</pre>
+<pre class="result">
+ id | dist | parent 
+----+------+--------
+  3 |    0 |       
+  1 |    1 |      3
+  4 |    1 |      3
+  5 |    1 |      3
+  0 |    2 |      1
+  2 |    2 |      4
+  6 |    3 |      2
+(7 rows)
+</pre>
+<pre class="syntax">
+SELECT * FROM out_summary;
+</pre>
+<pre class="result">
+ vertex_table | vertex_id | edge_table | edge_args | source_vertex | out_table 
| max_distance | directed | grouping_cols 
+--------------+-----------+------------+-----------+---------------+-----------+--------------+----------+---------------
+ vertex       | NULL      | edge       | NULL      |             3 | out       
|              |          | NULL
+(1 row)
+</pre>
+
+-# In this example, we use max_distance to limit the search distance.
+<pre class="syntax">
+DROP TABLE IF EXISTS out_max, out_max_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertix id column (NULL means use 
default naming)
+                         'edge',        -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         3,             -- Source vertex for BFS
+                         'out_max',     -- Output table of nodes reachable 
from source_vertex
+                         2);            -- Maximum distance to traverse from 
source_vertex        
+                         -- Default values used for the other arguments
+SELECT * FROM out_max ORDER BY dist,id;
+</pre>
+<pre class="result">
+ id | dist | parent 
+----+------+--------
+  3 |    0 |       
+  1 |    1 |      3
+  4 |    1 |      3
+  5 |    1 |      3
+  0 |    2 |      1
+  2 |    2 |      4
+(6 rows)
+</pre>
+
+-# Now let's do an example using
+different column names in the tables (i.e., not the defaults).
+Create the vertex and edge tables:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex_alt, edge_alt;
+CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;
+CREATE TABLE edge_alt AS SELECT src AS n1, dest AS n2 FROM edge;
+</pre>
+
+-# Run BFS from vertex 8:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_alt, out_alt_summary;
+SELECT madlib.graph_bfs(
+                         'vertex_alt',                  -- Vertex table
+                         'v_id',                        -- Vertex id column 
(NULL means use default naming)
+                         'edge_alt',                    -- Edge table
+                         'src=n1, dest=n2',             -- Edge arguments 
(NULL means use default naming)
+                         8,                             -- Source vertex for 
BFS
+                         'out_alt');                    -- Output table of 
nodes reachable from source_vertex
+SELECT * FROM out_alt ORDER BY v_id;
+</pre>
+<pre class="result">
+ v_id | dist | parent 
+------+------+--------
+    8 |    0 |       
+    9 |    1 |      8
+   10 |    1 |      8
+   11 |    2 |      9
+</pre>
+
+-# Now we show an example where the graph is treated as a directed graph.
+<pre class="syntax">
+DROP TABLE IF EXISTS out_alt_dir, out_alt_dir_summary;
+SELECT madlib.graph_bfs(
+                         'vertex_alt',                  -- Vertex table
+                         'v_id',                        -- Vertex id column 
(NULL means use default naming)
+                         'edge_alt',                    -- Edge table
+                         'src=n1, dest=n2',             -- Edge arguments 
(NULL means use default naming)
+                         8,                             -- Source vertex for 
BFS
+                         'out_alt_dir',                 -- Output table of 
nodes reachable from source_vertex
+                         NULL,                          -- Maximum distance to 
traverse from source_vertex
+                         TRUE);                         -- Flag for specifying 
directed graph
+SELECT * FROM out_alt_dir ORDER BY v_id;
+</pre>
+<pre class="result">
+ v_id | dist | parent 
+------+------+--------
+    8 |    0 |       
+    9 |    1 |      8
+   10 |    2 |      9
+   11 |    2 |      9
+(4 rows)
+</pre>
+Notice that, with the graph being treated as directed, the parent of v_id=10 
+is now vertex 9 and not 8 as in the undirected case.
+
+-# Create a graph with 2 groups:
+<pre class="syntax">
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr(
+                  g1 INTEGER,
+                  g2 TEXT,
+                  src INTEGER,
+                  dest INTEGER
+                );
+INSERT INTO edge_gr VALUES
+(100, 'a', 0, 5),
+(100, 'a', 1, 0),
+(100, 'a', 1, 3),
+(100, 'a', 2, 6),
+(100, 'a', 3, 4),
+(100, 'a', 3, 5),
+(100, 'a', 4, 2),
+(100, 'a', 8, 9),
+(100, 'a', 9, 10),
+(100, 'a', 9, 11),
+(100, 'a', 10, 8),
+(202, 'c', 8, 9),
+(202, 'c', 9, 10),
+(202, 'c', 9, 11),
+(202, 'c', 10, 8)
+;
+</pre>
+
+-# Run BFS for all groups from a given source_vertex.
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use 
default naming)
+                         'edge_gr',     -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         8,             -- Source vertex for BFS
+                         'out_gr',      -- Output table of nodes reachable 
from source_vertex
+                         NULL,          -- Maximum distance to traverse from 
source_vertex
+                         NULL,          -- Flag for specifying directed graph
+                         'g1,g2'        -- Grouping columns
+);
+SELECT * FROM out_gr ORDER BY g1,g2,dist,id;
+</pre>
+<pre class="result">
+ g1  | g2 | id | dist | parent 
+-----+----+----+------+--------
+ 100 | a  |  8 |    0 |       
+ 100 | a  |  9 |    1 |      8
+ 100 | a  | 10 |    1 |      8
+ 100 | a  | 11 |    2 |      9
+ 202 | c  |  8 |    0 |       
+ 202 | c  |  9 |    1 |      8
+ 202 | c  | 10 |    1 |      8
+ 202 | c  | 11 |    2 |      9
+(8 rows)
+</pre>
+If source_vertex is not present in
+a group, then that group will not appear in the output table.
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use 
default naming)
+                         'edge_gr',     -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         3,             -- Source vertex for BFS
+                         'out_gr',      -- Output table of nodes reachable 
from source_vertex
+                         NULL,          -- Maximum distance to traverse from 
source_vertex
+                         NULL,          -- Flag for specifying directed graph
+                         'g1,g2'        -- Grouping columns
+);
+SELECT * FROM out_gr ORDER BY g1,g2,dist,id;
+</pre>
+<pre class="result">
+ g1  | g2 | id | dist | parent 
+-----+----+----+------+--------
+ 100 | a  |  3 |    0 |       
+ 100 | a  |  1 |    1 |      3
+ 100 | a  |  4 |    1 |      3
+ 100 | a  |  5 |    1 |      3
+ 100 | a  |  0 |    2 |      1
+ 100 | a  |  2 |    2 |      4
+ 100 | a  |  6 |    3 |      2
+(7 rows)
+</pre>
+
+@anchor literature
+@par Literature
+
+[1] Breadth-first Search algorithm. 
https://en.wikipedia.org/wiki/Breadth-first_search
+*/
+
+-------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs(
+    vertex_table            TEXT,
+    vertex_id               TEXT,
+    edge_table              TEXT,
+    edge_args               TEXT,
+    source_vertex           INT,
+    out_table               TEXT,
+    max_distance            INT,
+    directed                BOOLEAN,
+    grouping_cols           TEXT
+) RETURNS VOID AS $$
+    PythonFunction(graph, bfs, graph_bfs)
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+
+-------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs(
+    vertex_table            TEXT,
+    vertex_id               TEXT,
+    edge_table              TEXT,
+    edge_args               TEXT,
+    source_vertex           INT,
+    out_table               TEXT,
+    max_distance            INT,
+    directed                BOOLEAN
+) RETURNS VOID AS $$
+     SELECT MADLIB_SCHEMA.graph_bfs($1, $2, $3, $4, $5, $6, $7, $8, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+
+-------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs(
+    vertex_table            TEXT,
+    vertex_id               TEXT,
+    edge_table              TEXT,
+    edge_args               TEXT,
+    source_vertex           INT,
+    out_table               TEXT,
+    max_distance            INT
+) RETURNS VOID AS $$
+     SELECT MADLIB_SCHEMA.graph_bfs($1, $2, $3, $4, $5, $6, $7, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+
+-------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs(
+    vertex_table            TEXT,
+    vertex_id               TEXT,
+    edge_table              TEXT,
+    edge_args               TEXT,
+    source_vertex           INT,
+    out_table               TEXT
+) RETURNS VOID AS $$
+     SELECT MADLIB_SCHEMA.graph_bfs($1, $2, $3, $4, $5, $6, NULL, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+
+-------------------------------------------------------------------------
+
+-- Online help
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs(
+    message VARCHAR
+) RETURNS VARCHAR AS $$
+    PythonFunction(graph, bfs, graph_bfs_help)
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs()
+RETURNS VARCHAR AS $$
+    SELECT MADLIB_SCHEMA.graph_bfs('');
+$$ LANGUAGE sql IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+--------------------------------------------------------------------------------
+

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8c9b955c/src/ports/postgres/modules/graph/graph_utils.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/graph_utils.py_in 
b/src/ports/postgres/modules/graph/graph_utils.py_in
index 77378d6..944c301 100644
--- a/src/ports/postgres/modules/graph/graph_utils.py_in
+++ b/src/ports/postgres/modules/graph/graph_utils.py_in
@@ -38,6 +38,17 @@ from utilities.validate_args import table_exists
 from utilities.validate_args import columns_exist_in_table
 from utilities.validate_args import table_is_empty
 
+def _grp_null_checks(grp_list):
+
+    """
+    Helper function for generating NULL checks for grouping columns 
+    to be used within a WHERE clause
+    Args:
+        @param grp_list   The list of grouping columns
+    """
+    return ' AND '.join([" {i} IS NOT NULL ".format(**locals())
+        for i in grp_list])
+
 def _check_groups(tbl1, tbl2, grp_list):
 
        """

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8c9b955c/src/ports/postgres/modules/graph/test/bfs.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/test/bfs.sql_in 
b/src/ports/postgres/modules/graph/test/bfs.sql_in
new file mode 100644
index 0000000..223ab84
--- /dev/null
+++ b/src/ports/postgres/modules/graph/test/bfs.sql_in
@@ -0,0 +1,278 @@
+/* ----------------------------------------------------------------------- 
*//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *//* ----------------------------------------------------------------------- 
*/
+
+
+DROP TABLE IF EXISTS vertex,edge,edge_grp;
+
+CREATE TABLE vertex(
+                  id INTEGER
+                );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7),
+(8),
+(9),
+(10),
+(11)
+;
+
+CREATE TABLE edge(
+                  src INTEGER,
+                  dest INTEGER,
+                  weight DOUBLE PRECISION
+                );
+INSERT INTO edge VALUES
+(0, 5, 1),
+(1, 0, 1),
+(1, 3, 1),
+(2, 6, 1),
+(3, 4, 1),
+(3, 5, 1),
+(4, 2, 1),
+(8, 9, 1),
+(9, 10, 1),
+(9, 11, 1),
+(10, 8, 1)
+;
+
+CREATE TABLE edge_grp(
+                  g1 INTEGER,
+                  g2 TEXT,
+                  src INTEGER,
+                  dest INTEGER,
+                  weight DOUBLE PRECISION
+                );
+INSERT INTO edge_grp VALUES
+(100, 'a', 0, 5, 1),
+(100, 'a', 1, 0, 1),
+(100, 'a', 1, 3, 1),
+(100, 'a', 2, 6, 1),
+(100, 'a', 3, 4, 1),
+(100, 'a', 3, 5, 1),
+(100, 'a', 4, 2, 1),
+(100, 'a', 8, 9, 1),
+(100, 'a', 9, 10, 1),
+(100, 'a', 9, 11, 1),
+(100, 'a', 10, 8, 1),
+(100, 'b', 0, 5, 1),
+(100, 'b', 1, 0, 1),
+(100, 'b', 1, 3, 1),
+(100, 'b', 2, 6, 1),
+(100, 'b', 3, 4, 1),
+(100, 'b', 3, 5, 1),
+(100, 'b', 4, 2, 1),
+(100, 'b', 8, 9, 1),
+(100, 'b', 9, 10, 1),
+(100, 'b', 9, 11, 1),
+(100, 'b', 10, 8, 1),
+(202, 'c', 8, 9, 1),
+(202, 'c', 9, 10, 1),
+(202, 'c', 9, 11, 1),
+(202, 'c', 10, 8, 1),
+(NULL, 'b', 8, 9, 1),
+(NULL, 'b', 9, 10, 1),
+(NULL, 'b', 9, 11, 1),
+(NULL, 'b', 10, 8, 1)
+;
+;
+
+---------------------
+-- Undirected Graph
+---------------------
+-- Results to check against - uncomment if needed
+-- -- source_vertex = 3
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- INSERT INTO out_actual VALUES
+-- (3,3,0,NULL),
+-- (3,1,1,3),
+-- (3,4,1,3),
+-- (3,5,1,3),
+-- (3,0,2,1),
+-- (3,2,2,4),
+-- (3,6,3,2)
+-- ;
+-- -- source_vertex = 7
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- 
+-- -- source_vertex = 8
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- INSERT INTO out_actual VALUES
+-- (8,8,0,NULL),
+-- (8,9,1,8),
+-- (8,10,1,8),
+-- (8,11,2,9)
+-- ;
+
+-- Undirected graph
+-- source_vertex = 3
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge',NULL,3,'out_frombfs');
+
+SELECT assert(
+       (array_agg(id ORDER BY id) = ARRAY[1,4,5]::INT[])
+       AND 
+       (array_agg(parent ORDER BY id) = ARRAY[3,3,3]::INT[]), 
+       'Wrong output in graph (BFS)') 
+       FROM out_frombfs WHERE dist = 1 GROUP BY dist;
+
+SELECT assert(max(dist) = 3, 'Wrong output in graph (BFS)') 
+       FROM out_frombfs;
+
+-- source_vertex = 7
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge',NULL,7,'out_frombfs');
+
+SELECT assert(COUNT(*) = 0, 'Wrong output in graph (BFS)') FROM out_frombfs;
+
+-- source_vertex = 8
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge',NULL,8,'out_frombfs',NULL,FALSE,NULL);
+
+SELECT assert(parent = 8, 'Wrong output in graph (BFS)') 
+       FROM out_frombfs WHERE dist = 2 AND id = 10;
+
+-------------------
+-- Directed graph
+-------------------
+-- Results to check against - uncomment if needed
+
+-- -- source_vertex = 3
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- INSERT INTO out_actual VALUES
+-- (3,3,0,NULL),
+-- (3,4,1,3),
+-- (3,5,1,3),
+-- (3,2,2,4),
+-- (3,6,3,2)
+-- ;
+-- -- source_vertex = 7
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- -- source_vertex = 8
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- (8, 8, 0, NULL),
+-- (8, 9, 1, 8),
+-- (8, 10, 2, 9),
+-- (8, 11, 2, 9)
+-- ;
+
+-- source_vertex = 3
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT 
graph_bfs('vertex','id','edge','src,dest',3,'out_frombfs',NULL,TRUE,NULL);
+
+SELECT assert(
+       (array_agg(id ORDER BY id) = ARRAY[4,5]::INT[])
+       AND 
+       (array_agg(parent ORDER BY id) = ARRAY[3,3]::INT[]),
+       'Wrong output in graph (BFS)') 
+       FROM out_frombfs WHERE dist = 1 GROUP BY dist;
+SELECT assert(COUNT(*) = 5, 'Wrong output in graph (BFS)') FROM out_frombfs;
+
+-- source_vertex = 7
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT 
graph_bfs('vertex','id','edge','src,dest',7,'out_frombfs',NULL,TRUE,NULL);
+
+SELECT assert(COUNT(*) = 0, 'Wrong output in graph (BFS)') FROM out_frombfs;
+
+-- source_vertex = 8
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge',NULL,8,'out_frombfs',NULL,TRUE);
+
+SELECT assert(COUNT(*) = 2, 'Wrong output in graph (BFS)') 
+       FROM out_frombfs WHERE dist = 2;
+
+-----------------------
+-- max_distance check
+-----------------------
+-- source_vertex = 3
+-- Undirected graph
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex','id','edge','src,dest',3,'out_frombfs',2,FALSE);
+
+SELECT assert(MAX(dist) = 2 AND COUNT(*) = 6,
+       'Wrong output in graph (BFS)') FROM out_frombfs;
+
+-- Directed graph
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge',NULL,3,'out_frombfs',2,TRUE);
+
+SELECT assert(MAX(dist) = 2 AND COUNT(*) = 4,
+       'Wrong output in graph (BFS)') FROM out_frombfs;
+
+---------------------
+-- Grouping columns
+---------------------
+-- -- source_vertex = 3 
+-- -- Undirected graph
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- INSERT INTO out_actual VALUES
+-- (3,100,'a',3,0,NULL),
+-- (3,100,'a',1,1,3),
+-- (3,100,'a',4,1,3),
+-- (3,100,'a',5,1,3),
+-- (3,100,'a',0,2,1),
+-- (3,100,'a',2,2,4),
+-- (3,100,'a',6,3,2),
+-- (3,100,'b',3,0,NULL),
+-- (3,100,'b',1,1,3),
+-- (3,100,'b',4,1,3),
+-- (3,100,'b',5,1,3),
+-- (3,100,'b',0,2,1),
+-- (3,100,'b',2,2,4),
+-- (3,100,'b',6,3,2)
+-- ;
+
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT 
graph_bfs('vertex',NULL,'edge_grp',NULL,3,'out_frombfs',NULL,NULL,'g1,g2');
+
+SELECT assert( COUNT(*) = 14,'Wrong output in graph (BFS)') FROM out_frombfs;
+SELECT assert(COUNT(*) = 7,
+       'Wrong output in graph (BFS)') FROM out_frombfs WHERE g2 = 'a'
+;
+SELECT assert(
+       array_agg(id ORDER BY id) = ARRAY[0,2]::INT[] 
+       AND 
+       array_agg(parent ORDER BY id) = ARRAY[1,4]::INT[],
+       'Wrong output in graph (BFS)')
+       FROM out_frombfs WHERE dist = 2 AND g1 = 100 AND g2 = 'a'
+;
+SELECT assert(
+       array_agg(id ORDER BY id) = ARRAY[0,2]::INT[] 
+       AND 
+       array_agg(parent ORDER BY id) = ARRAY[1,4]::INT[],
+       'Wrong output in graph (BFS)')
+       FROM out_frombfs WHERE dist = 2 AND g1 = 100 AND g2 = 'b'
+;
+SELECT assert(MIN(g1) = 100 AND MAX(g1) = 100,
+       'Wrong output in graph (BFS)') FROM out_frombfs GROUP BY g1,g2
+;


Reply via email to