incubator-madlib git commit: Graph: Fix multiple bugs

okislal Wed, 02 Aug 2017 11:36:23 -0700

Repository: incubator-madlib
Updated Branches:
  refs/heads/master 06788cc48 -> a1f980336



Graph: Fix multiple bugs

Fix quoted output table name bug
Fix empty string arguments bug

Closes #154


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/a1f98033
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/a1f98033
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/a1f98033

Branch: refs/heads/master
Commit: a1f9803360473bb671516ee6afa04216f834c04f
Parents: 06788cc
Author: Orhan Kislal <okis...@pivotal.io>
Authored: Tue Aug 1 13:31:20 2017 -0700
Committer: Orhan Kislal <okis...@pivotal.io>
Committed: Wed Aug 2 11:34:08 2017 -0700

----------------------------------------------------------------------
 src/ports/postgres/modules/graph/apsp.py_in     |  47 ++---
 src/ports/postgres/modules/graph/bfs.py_in      | 171 ++++++++++---------
 src/ports/postgres/modules/graph/pagerank.py_in |   6 +-
 src/ports/postgres/modules/graph/sssp.py_in     |  52 +++---
 .../postgres/modules/graph/test/sssp.sql_in     |  15 +-
 5 files changed, 161 insertions(+), 130 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/a1f98033/src/ports/postgres/modules/graph/apsp.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/apsp.py_in 
b/src/ports/postgres/modules/graph/apsp.py_in
index 42ee698..be18a61 100644
--- a/src/ports/postgres/modules/graph/apsp.py_in
+++ b/src/ports/postgres/modules/graph/apsp.py_in
@@ -35,6 +35,7 @@ from graph_utils import _grp_from_table
 from graph_utils import _check_groups
 from utilities.control import MinWarning
 from utilities.utilities import _assert
+from utilities.utilities import add_postfix
 from utilities.utilities import extract_keyvalue_params
 from utilities.utilities import unique_string
 from utilities.utilities import split_quoted_delimited_str
@@ -72,16 +73,19 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, 
edge_table,
         edge_params = extract_keyvalue_params(edge_args, params_types, 
default_args)
 
         # Prepare the input for recording in the summary table
-        if vertex_id is None:
+
+        if not vertex_id:
             v_st = ''
             vertex_id = "id"
         else:
             v_st = vertex_id
-        if edge_args is None:
+
+        if not edge_args:
             e_st = ''
         else:
             e_st = edge_args
-        if grouping_cols is None:
+
+        if not grouping_cols:
             g_st = ''
             glist = None
         else:
@@ -162,7 +166,8 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, 
edge_table,
         # We keep a summary table to keep track of the parameters used for this
         # APSP run. This table is used in the path finding function to 
eliminate
         # the need for repetition.
-        plpy.execute(""" CREATE TABLE {out_table}_summary  (
+        summary_table = add_postfix(out_table, "_summary")
+        plpy.execute(""" CREATE TABLE {summary_table} (
             vertex_table            TEXT,
             vertex_id               TEXT,
             edge_table              TEXT,
@@ -170,7 +175,7 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, 
edge_table,
             out_table               TEXT,
             grouping_cols           TEXT)
             """.format(**locals()))
-        plpy.execute(""" INSERT INTO {out_table}_summary VALUES
+        plpy.execute(""" INSERT INTO {summary_table} VALUES
             ('{vertex_table}', '{v_st}', '{edge_table}', '{e_st}',
             '{out_table}', '{g_st}') """.format(**locals()))
 
@@ -308,7 +313,7 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, 
edge_table,
         if v_cnt < 2:
             plpy.execute("DROP VIEW IF EXISTS {0}".format(tmp_view))
             plpy.execute("DROP TABLE IF EXISTS {0},{1}".
-                         format(out_table, out_table + "_summary"))
+                         format(out_table, summary_table))
             if is_hawq:
                 plpy.execute("DROP TABLE IF EXISTS {0},{1}".format(
                     out_table_1, out_table_2))
@@ -428,10 +433,10 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, 
edge_table,
         if i == v_cnt:
 
             # If there are no groups, clean up and give error.
-            if grouping_cols is None:
+            if not grouping_cols:
                 plpy.execute("DROP VIEW IF EXISTS {0}".format(tmp_view))
                 plpy.execute("DROP TABLE IF EXISTS {0},{1}".
-                             format(out_table, out_table + "_summary"))
+                             format(out_table, summary_table))
                 if is_hawq:
                     plpy.execute("DROP TABLE IF EXISTS {0},{1}".format(
                         out_table_1, out_table_2))
@@ -476,7 +481,7 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, 
edge_table,
                 # drop the output table as well.
                 if table_is_empty(out_table):
                     plpy.execute("DROP TABLE IF EXISTS {0},{1}".
-                                 format(out_table, out_table + "_summary"))
+                                 format(out_table, summary_table))
                 plpy.warning(
                     """Graph APSP: Detected a negative cycle in the """ +
                     """sub-graphs of following groups: {0}.""".
@@ -509,8 +514,8 @@ def graph_apsp_get_path(schema_madlib, apsp_table,
 
         temp1_name = unique_string(desp='temp1')
         temp2_name = unique_string(desp='temp2')
-
-        summary = plpy.execute("SELECT * FROM {0}_summary".format(apsp_table))
+        summary_table = add_postfix(apsp_table, "_summary")
+        summary = plpy.execute("SELECT * FROM {0}".format(summary_table))
         vertex_id = summary[0]['vertex_id']
         edge_args = summary[0]['edge_args']
         grouping_cols = summary[0]['grouping_cols']
@@ -525,10 +530,11 @@ def graph_apsp_get_path(schema_madlib, apsp_table,
         dest = edge_params["dest"]
         weight = edge_params["weight"]
 
-        if not vertex_id or vertex_id == "NULL":
+
+        if not vertex_id:
             vertex_id = "id"
 
-        if not grouping_cols or grouping_cols == "NULL":
+        if not grouping_cols:
             grouping_cols = None
 
         select_grps = ""
@@ -537,7 +543,7 @@ def graph_apsp_get_path(schema_madlib, apsp_table,
         grp_comma = ""
         tmp = ""
 
-        if grouping_cols is not None:
+        if grouping_cols:
             glist = split_quoted_delimited_str(grouping_cols)
             select_grps = _grp_from_table(apsp_table, glist) + " , "
             check_grps_t1 = " AND " + _check_groups(
@@ -660,7 +666,8 @@ def _validate_apsp(vertex_table, vertex_id, edge_table, 
edge_params,
             """Graph APSP: Source vertex table {vertex_table} contains 
duplicate vertex id's.""".
             format(**locals()))
 
-    _assert(not table_exists(out_table + "_summary"),
+    summary_table = add_postfix(out_table, "_summary")
+    _assert(not table_exists(summary_table),
             "Graph APSP: Output summary table already exists!")
 
     if glist is not None:
@@ -679,11 +686,11 @@ def _validate_get_path(apsp_table, source_vertex, 
dest_vertex,
     _assert(not table_is_empty(apsp_table),
             "Graph APSP: APSP table ({0}) is empty!".format(apsp_table))
 
-    summary = apsp_table + "_summary"
-    _assert(table_exists(summary),
-            "Graph APSP: APSP summary table ({0}) is missing!".format(summary))
-    _assert(not table_is_empty(summary),
-            "Graph APSP: APSP summary table ({0}) is empty!".format(summary))
+    summary_table = add_postfix(apsp_table, "_summary")
+    _assert(table_exists(summary_table),
+            "Graph APSP: APSP summary table ({0}) is 
missing!".format(summary_table))
+    _assert(not table_is_empty(summary_table),
+            "Graph APSP: APSP summary table ({0}) is 
empty!".format(summary_table))
 
     _assert(not table_exists(path_table),
             "Graph APSP: Output path table already exists!")

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/a1f98033/src/ports/postgres/modules/graph/bfs.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/bfs.py_in 
b/src/ports/postgres/modules/graph/bfs.py_in
index a5631ea..ea03cf1 100644
--- a/src/ports/postgres/modules/graph/bfs.py_in
+++ b/src/ports/postgres/modules/graph/bfs.py_in
@@ -33,6 +33,7 @@ from graph_utils import get_graph_usage
 from graph_utils import _grp_null_checks
 from utilities.control import MinWarning
 from utilities.utilities import _assert
+from utilities.utilities import add_postfix
 from utilities.utilities import extract_keyvalue_params
 from utilities.utilities import split_quoted_delimited_str
 from utilities.validate_args import table_exists
@@ -47,8 +48,8 @@ def _validate_bfs(vertex_table, vertex_id, edge_table, 
edge_params,
         out_table,'BFS')
 
     _assert((max_distance >= 0) and isinstance(max_distance,int),
-        """Graph BFS: Invalid max_distance type or value ({0}), must be 
integer, 
-        be greater than or equal to 0 and be less than max allowable integer 
+        """Graph BFS: Invalid max_distance type or value ({0}), must be 
integer,
+        be greater than or equal to 0 and be less than max allowable integer
         (2147483647).""".
         format(max_distance))
 
@@ -64,7 +65,7 @@ def _validate_bfs(vertex_table, vertex_id, edge_table, 
edge_params,
         """.format(**locals()))
     if src_exists.nrows() == 0:
         plpy.error(
-            """Graph BFS: Source vertex {source_vertex} is not present in the 
+            """Graph BFS: Source vertex {source_vertex} is not present in the
             vertex table {vertex_table}.""".
             format(**locals()))
 
@@ -76,16 +77,17 @@ def _validate_bfs(vertex_table, vertex_id, edge_table, 
edge_params,
             HAVING count(*) > 1 """.format(**locals()))
     if vt_error.nrows() != 0:
         plpy.error(
-            """Graph BFS: Source vertex table {vertex_table} contains 
duplicate 
+            """Graph BFS: Source vertex table {vertex_table} contains duplicate
             vertex id's.""".
             format(**locals()))
 
-    _assert(not table_exists(out_table+"_summary"),
+    summary_table = add_postfix(out_table, "_summary")
+    _assert(not table_exists(summary_table),
         "Graph BFS: Output summary table already exists!")
 
     if grouping_cols_list is not None:
         _assert(columns_exist_in_table(edge_table, grouping_cols_list),
-            """Graph BFS: Not all columns from {grouping_cols_list} are 
present 
+            """Graph BFS: Not all columns from {grouping_cols_list} are present
             in edge table ({edge_table}).""".
             format(**locals()))
 
@@ -107,7 +109,7 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, 
edge_table,
         @param source_vertex   The source vertex id for the algorithm to start.
         @param out_table       Name of the table to store the result of BFS.
         @param max_distance    Maximum distance from the source_vertex to 
search for.
-        @param directed        Graph will be treated as directed if this 
boolean flag 
+        @param directed        Graph will be treated as directed if this 
boolean flag
                                is set to TRUE. Graph is treated as undirected 
by default.
         @param grouping_cols   The list of grouping columns.
 
@@ -125,35 +127,39 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, 
edge_table,
                                             default_args)
 
         # Prepare the input for recording in the summary table
-        if vertex_id is None:
-            v_st= "NULL"
+        if not vertex_id:
+            v_st = ''
             vertex_id = "id"
         else:
             v_st = vertex_id
-        if edge_args is None:
-            e_st = "NULL"
+
+        if not edge_args:
+            e_st = ''
         else:
             e_st = edge_args
+
+        if not grouping_cols:
+            g_st = ''
+            glist = None
+        else:
+            g_st = grouping_cols
+            glist = split_quoted_delimited_str(grouping_cols)
+
         if max_distance is None:
             d_st= "NULL"
             max_distance = INT_MAX
         else:
             d_st = max_distance
+
         if directed is None:
             dir_st= "NULL"
             directed = False
         else:
             dir_st = directed
-        if grouping_cols is None or grouping_cols is '':
-            g_st = "NULL"
-            glist = None
-        else:
-            g_st = grouping_cols
-            glist = split_quoted_delimited_str(grouping_cols)
 
         src = edge_params["src"]
         dest = edge_params["dest"]
-        
+
         distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>,
             <!"DISTRIBUTED BY ({0})".format(vertex_id)!>)
         local_distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>,
@@ -166,10 +172,10 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, 
edge_table,
         grp_comma = ""
         and_grp_null_checks = ""
 
-        if grouping_cols is not None and grouping_cols is not '':
+        if grouping_cols and grouping_cols is not '':
             grp_comma = grouping_cols + ", "
             and_grp_null_checks = " AND " + _grp_null_checks(glist)
-            
+
         # We keep a table of every vertex, the distance to that vertex from 
source
         # and the parent in the path to the vertex.
         # This table will be updated throughout the execution.
@@ -179,20 +185,21 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, 
edge_table,
 
         # Creating the output table with the appropriate columns and data types
         plpy.execute("""
-            CREATE TABLE {out_table} AS ( 
+            CREATE TABLE {out_table} AS (
                 SELECT
-                    {grp_comma} 
-                    {src} AS {vertex_id}, 
+                    {grp_comma}
+                    {src} AS {vertex_id},
                     {curr_dist_val}::INT AS {dist_col},
                     {src} AS {parent_col}
-                FROM {edge_table} 
+                FROM {edge_table}
                 LIMIT 0
             ) {distribution}""".format(**locals()))
 
         # We keep a summary table to keep track of the parameters used for this
         # BFS run
-        plpy.execute( """ 
-            CREATE TABLE {out_table}_summary  (
+        summary_table = add_postfix(out_table, "_summary")
+        plpy.execute( """
+            CREATE TABLE {summary_table}  (
                 vertex_table            TEXT,
                 vertex_id               TEXT,
                 edge_table              TEXT,
@@ -204,13 +211,13 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, 
edge_table,
                 grouping_cols           TEXT
             )""".format(**locals()))
 
-        plpy.execute(""" 
-            INSERT INTO {out_table}_summary VALUES
+        plpy.execute("""
+            INSERT INTO {summary_table} VALUES
                 ('{vertex_table}', '{v_st}', '{edge_table}', '{e_st}',
                 {source_vertex}, '{out_table}', {d_st}, {dir_st}, '{g_st}')
             """.format(**locals()))
 
-        
+
         # The queries for directed and undirected graphs share a common 
section.
         # There are additional clauses added to the undirected graph queries.
         # In the undirected case edges can be considered to go from {src} to
@@ -225,11 +232,11 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, 
edge_table,
                 """.format(**locals())
 
             count_qry_undirected = """ OR (
-                    ({grp_comma} {dest}) IN ( 
-                        SELECT {grp_comma} {vertex_id} FROM {out_table} 
+                    ({grp_comma} {dest}) IN (
+                        SELECT {grp_comma} {vertex_id} FROM {out_table}
                         WHERE {dist_col}={{curr_dist_val}}
-                    ) 
-                    AND 
+                    )
+                    AND
                     ({grp_comma} {src}) NOT IN (
                         SELECT {grp_comma} {vertex_id} FROM {out_table}
                     )
@@ -237,30 +244,30 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, 
edge_table,
             """.format(**locals())
 
             insert_qry_undirected_loop = """ UNION
-                SELECT {grp_comma} 
-                    {src} AS {vertex_id}, 
-                    {{curr_dist_val}}+1 AS {dist_col}, 
+                SELECT {grp_comma}
+                    {src} AS {vertex_id},
+                    {{curr_dist_val}}+1 AS {dist_col},
                     {dest} AS {parent_col}
-                FROM {edge_table} 
+                FROM {edge_table}
                 WHERE (
                     ({grp_comma} {dest}) IN (
-                        SELECT {grp_comma} {vertex_id} FROM {out_table} 
+                        SELECT {grp_comma} {vertex_id} FROM {out_table}
                         WHERE {dist_col}={{curr_dist_val}}
-                    ) 
-                    AND 
+                    )
+                    AND
                     ({grp_comma} {src}) NOT IN (
                         SELECT {grp_comma} {vertex_id} FROM {out_table}
                     )
                 )
             """.format(**locals())
 
-        # This step inserts into the output table the source vertex for each 
+        # This step inserts into the output table the source vertex for each
         # group in which it is present. Grouping behavior is not predictable
         # when there are NULLs in any grouping column. Therefore those rows
         # are explicitly removed from analysis
         insert_qry_init = """
             INSERT INTO {out_table}
-            SELECT {grp_comma} 
+            SELECT {grp_comma}
                 {source_vertex} AS {vertex_id},
                 {curr_dist_val} AS {dist_col},
                 NULL AS {parent_col}
@@ -272,17 +279,17 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, 
edge_table,
         plpy.execute(insert_qry_init.format(**locals()))
 
         # After initialization of the output table, number of nodes connected
-        # by edges to the source vertex in each group is counted. This is also 
used 
+        # by edges to the source vertex in each group is counted. This is also 
used
         # below in the BFS iteration while-loop
         count_qry = """
-            SELECT COUNT(*) 
-            FROM {edge_table} 
+            SELECT COUNT(*)
+            FROM {edge_table}
             WHERE (
-                ({grp_comma} {src}) IN ( 
-                    SELECT {grp_comma} {vertex_id} FROM {out_table} 
+                ({grp_comma} {src}) IN (
+                    SELECT {grp_comma} {vertex_id} FROM {out_table}
                     WHERE {dist_col}={{curr_dist_val}}
-                ) 
-                AND 
+                )
+                AND
                 ({grp_comma} {dest}) NOT IN (
                     SELECT {grp_comma} {vertex_id} FROM {out_table}
                 )
@@ -291,24 +298,24 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, 
edge_table,
 
         vct = plpy.execute(count_qry.format(**locals()))[0]['count']
 
-        # This insert statement is executed within the BFS iteration 
while-loop 
-        # below. It is used to discover and store all nodes (not already 
found) 
+        # This insert statement is executed within the BFS iteration while-loop
+        # below. It is used to discover and store all nodes (not already found)
         # connected to those found in the immediate previous iteration.
         insert_qry_loop = """
             INSERT INTO {out_table}
             SELECT {grp_comma} {vertex_id}, {dist_col}, min({parent_col})
             FROM (
-                SELECT {grp_comma} 
-                    {dest} AS {vertex_id}, 
-                    {{curr_dist_val}}+1 AS {dist_col}, 
+                SELECT {grp_comma}
+                    {dest} AS {vertex_id},
+                    {{curr_dist_val}}+1 AS {dist_col},
                     {src} AS {parent_col}
-                FROM {edge_table} 
+                FROM {edge_table}
                 WHERE (
                     ({grp_comma} {src}) IN (
-                        SELECT {grp_comma} {vertex_id} FROM {out_table} 
+                        SELECT {grp_comma} {vertex_id} FROM {out_table}
                         WHERE {dist_col}={{curr_dist_val}}
-                    ) 
-                    AND 
+                    )
+                    AND
                     ({grp_comma} {dest}) NOT IN (
                         SELECT {grp_comma} {vertex_id} FROM {out_table}
                     )
@@ -321,41 +328,41 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, 
edge_table,
         # Main loop for traversing the graph
         while vct > 0 and curr_dist_val < max_distance:
             # The loop consists of two steps:
-            # 1) Disover and store all nodes that are linked to nodes found in 
-            #    the immediate previous iteration of the loop that have not 
already 
-            #    been found in all previous iterations  
+            # 1) Disover and store all nodes that are linked to nodes found in
+            #    the immediate previous iteration of the loop that have not 
already
+            #    been found in all previous iterations
             # 2) Check for any nodes linked to those discovered in Step 1 above
             #    that have not yet been discovered
-            # 
-            # If a node has multiple possible parents then the parent with the 
+            #
+            # If a node has multiple possible parents then the parent with the
             # smallest ID is chosen for output
 
-            # In the directed graph case only nodes in the {dest} column of 
-            # the edge table are searched to find new nodes reachable from 
+            # In the directed graph case only nodes in the {dest} column of
+            # the edge table are searched to find new nodes reachable from
             # previously discovered nodes
 
-            # In the undirected graph case edges are treated as 
non-directional 
-            # (or bidirectional). Nodes in both the {src} and {dest} columns 
of 
-            # the edge table are searched to find new nodes reachable from 
+            # In the undirected graph case edges are treated as non-directional
+            # (or bidirectional). Nodes in both the {src} and {dest} columns of
+            # the edge table are searched to find new nodes reachable from
             # previously discovered nodes.
             #
             # This approach does NOT require the user to provide a forward edge
-            # and a reverse edge between the same two nodes to indicate the 
+            # and a reverse edge between the same two nodes to indicate the
             # graph's undirected nature. However, it will work in that scenario
             # as well.
 
-            # Discover and store all nodes (not already found) connected to 
+            # Discover and store all nodes (not already found) connected to
             # those found in the immediate previous iteration
             plpy.execute(insert_qry_loop.format(**locals()))
 
             # Update distance value for next iteration
             curr_dist_val = curr_dist_val + 1
 
-            # Count / find any nodes that are connected to those discovered 
and 
+            # Count / find any nodes that are connected to those discovered and
             # stored in this iteration. This is used to check if the iterations
             # need to continue.
             vct = plpy.execute(count_qry.format(**locals()))[0]['count']
-    
+
     return None
 
 def graph_bfs_help(schema_madlib, message, **kwargs):
@@ -392,18 +399,18 @@ finds all nodes reachable from the source vertex.
 ----------------------------------------------------------------------------
                             OUTPUT
 ----------------------------------------------------------------------------
-The output of BFS ('out_table' above) contains a row for every vertex of that 
is 
-reachable from the source_vertex. In the presence of grouping columns, only 
those 
+The output of BFS ('out_table' above) contains a row for every vertex of that 
is
+reachable from the source_vertex. In the presence of grouping columns, only 
those
 edges are used for which there are no NULL values in any grouping column.
-The output table will have the following columns (in addition to the 
+The output table will have the following columns (in addition to the
 grouping columns):
-  - vertex_id : The id for any node reachable from source_vertex in addition 
to 
-                the source_vertex. Will use the input parameter 'vertex_id' 
for 
+  - vertex_id : The id for any node reachable from source_vertex in addition to
+                the source_vertex. Will use the input parameter 'vertex_id' for
                 column naming.
-  - dist      : The distance in number of edges (or hops) from the 
source_vertex 
-                to where this vertex is located. 
-  - parent    : The parent of this vertex in BFS traversal of the graph from 
-                source_vertex. Will use 'parent' for column naming. For the 
+  - dist      : The distance in number of edges (or hops) from the 
source_vertex
+                to where this vertex is located.
+  - parent    : The parent of this vertex in BFS traversal of the graph from
+                source_vertex. Will use 'parent' for column naming. For the
                 case where vertex_id = source_vertex, the value for parent is 
NULL.
 """
     elif message.lower() in ("example", "examples"):
@@ -457,7 +464,7 @@ SELECT madlib.graph_bfs(
                          NULL,          -- Edge arguments (NULL means use 
default naming)
                          3,             -- Source vertex for BFS
                          'out'          -- Output table of nodes reachable 
from source_vertex
-                        );        
+                        );
                         -- Default values used for the other arguments
 SELECT * FROM out ORDER BY dist,id;
 

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/a1f98033/src/ports/postgres/modules/graph/pagerank.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/pagerank.py_in 
b/src/ports/postgres/modules/graph/pagerank.py_in
index 95bc4b4..7768de1 100644
--- a/src/ports/postgres/modules/graph/pagerank.py_in
+++ b/src/ports/postgres/modules/graph/pagerank.py_in
@@ -30,6 +30,7 @@
 import plpy
 from utilities.control import MinWarning
 from utilities.utilities import _assert
+from utilities.utilities import add_postfix
 from utilities.utilities import extract_keyvalue_params
 from utilities.utilities import unique_string, split_quoted_delimited_str
 from utilities.utilities import is_platform_pg
@@ -88,7 +89,7 @@ def pagerank(schema_madlib, vertex_table, vertex_id, 
edge_table, edge_args,
             damping_factor = 0.85
         if max_iter is None:
             max_iter = 100
-        if vertex_id is None:
+        if not vertex_id:
             vertex_id = "id"
         if not grouping_cols:
             grouping_cols = ''
@@ -97,7 +98,8 @@ def pagerank(schema_madlib, vertex_table, vertex_id, 
edge_table, edge_args,
         validate_pagerank_args(schema_madlib, vertex_table, vertex_id, 
edge_table,
                                edge_params, out_table, damping_factor,
                                max_iter, threshold, grouping_cols_list)
-        summary_table = out_table + "_summary"
+
+        summary_table = add_postfix(out_table, "_summary")
         _assert(not table_exists(summary_table),
                 "Graph PageRank: Output summary table ({summary_table}) 
already exists."
                 .format(**locals()))

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/a1f98033/src/ports/postgres/modules/graph/sssp.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/sssp.py_in 
b/src/ports/postgres/modules/graph/sssp.py_in
index 93497c4..77ff1ff 100644
--- a/src/ports/postgres/modules/graph/sssp.py_in
+++ b/src/ports/postgres/modules/graph/sssp.py_in
@@ -35,6 +35,7 @@ from graph_utils import _check_groups
 from utilities.control import MinWarning
 
 from utilities.utilities import _assert
+from utilities.utilities import add_postfix
 from utilities.utilities import extract_keyvalue_params
 from utilities.utilities import unique_string
 from utilities.utilities import split_quoted_delimited_str
@@ -82,17 +83,19 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, 
edge_table,
                                               default_args)
 
         # Prepare the input for recording in the summary table
-        if vertex_id is None:
-            v_st = "NULL"
+        if not vertex_id:
+            v_st = ''
             vertex_id = "id"
         else:
             v_st = vertex_id
-        if edge_args is None:
-            e_st = "NULL"
+
+        if not edge_args:
+            e_st = ''
         else:
             e_st = edge_args
-        if grouping_cols is None:
-            g_st = "NULL"
+
+        if not grouping_cols:
+            g_st = ''
             glist = None
         else:
             g_st = grouping_cols
@@ -124,7 +127,7 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, 
edge_table,
         checkg_om = ""
         group_by = ""
 
-        if grouping_cols is not None:
+        if grouping_cols:
             comma_grp = " , " + grouping_cols
             group_by = " , " + _grp_from_table(edge_table, glist)
             comma_grp_e = " , " + _grp_from_table(edge_table, glist)
@@ -154,7 +157,8 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, 
edge_table,
         # We keep a summary table to keep track of the parameters used for this
         # SSSP run. This table is used in the path finding function to 
eliminate
         # the need for repetition.
-        plpy.execute(""" CREATE TABLE {out_table}_summary  (
+        summary_table = add_postfix(out_table, "_summary")
+        plpy.execute(""" CREATE TABLE {summary_table}  (
             vertex_table            TEXT,
             vertex_id               TEXT,
             edge_table              TEXT,
@@ -163,7 +167,7 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, 
edge_table,
             out_table               TEXT,
             grouping_cols           TEXT)
             """.format(**locals()))
-        plpy.execute(""" INSERT INTO {out_table}_summary VALUES
+        plpy.execute(""" INSERT INTO {summary_table} VALUES
             ('{vertex_table}', '{v_st}', '{edge_table}', '{e_st}',
             {source_vertex}, '{out_table}', '{g_st}')
             """.format(**locals()))
@@ -207,7 +211,7 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, 
edge_table,
         # Example:
         # Assume there are two grouping columns g1 and g2
         # g1 values are 0 and 1. g2 values are 5 and 6
-        if grouping_cols is not None:
+        if grouping_cols:
 
             distinct_grp_table = unique_string(desp='grp')
             plpy.execute("DROP TABLE IF EXISTS {distinct_grp_table}".
@@ -378,9 +382,9 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, 
edge_table,
         # The algorithm should converge in less than |V| iterations.
         # Otherwise there is a negative cycle in the graph.
         if i == v_cnt:
-            if grouping_cols is None:
+            if not grouping_cols:
                 plpy.execute("DROP TABLE IF EXISTS {0},{1},{2}".
-                             format(out_table, out_table + "_summary", 
oldupdate))
+                             format(out_table, summary_table, oldupdate))
                 if is_hawq:
                     plpy.execute("DROP TABLE IF EXISTS {0}".format(temp_table))
                 plpy.error("Graph SSSP: Detected a negative cycle in the 
graph.")
@@ -423,7 +427,7 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, 
edge_table,
                 # drop the output table as well.
                 if table_is_empty(out_table):
                     plpy.execute("DROP TABLE IF EXISTS {0},{1}".
-                                 format(out_table, out_table + "_summary"))
+                                 format(out_table, summary_table))
 
                 plpy.warning(
                     """Graph SSSP: Detected a negative cycle in the """ +
@@ -460,18 +464,19 @@ def graph_sssp_get_path(schema_madlib, sssp_table, 
dest_vertex, path_table,
         grp_comma = ""
         tmp = ""
 
-        summary = plpy.execute("SELECT * FROM {0}_summary".format(sssp_table))
+        summary_table = add_postfix(sssp_table, "_summary")
+        summary = plpy.execute("SELECT * FROM {0}".format(summary_table))
         vertex_id = summary[0]['vertex_id']
         source_vertex = summary[0]['source_vertex']
 
-        if vertex_id == "NULL":
+        if not vertex_id:
             vertex_id = "id"
 
         grouping_cols = summary[0]['grouping_cols']
-        if grouping_cols == "NULL":
+        if not grouping_cols:
             grouping_cols = None
 
-        if grouping_cols is not None:
+        if grouping_cols:
             glist = split_quoted_delimited_str(grouping_cols)
             select_grps = _grp_from_table(sssp_table, glist) + " , "
             check_grps_t1 = " AND " + _check_groups(
@@ -583,7 +588,8 @@ def _validate_sssp(vertex_table, vertex_id, edge_table, 
edge_params,
         plpy.error("Graph SSSP: Source vertex table {vertex_table} "
                    "contains duplicate vertex id's.".format(**locals()))
 
-    _assert(not table_exists(out_table + "_summary"),
+    summary_table = add_postfix(out_table, "_summary")
+    _assert(not table_exists(summary_table),
             "Graph SSSP: Output summary table already exists!")
 
     if glist is not None:
@@ -602,11 +608,11 @@ def _validate_get_path(sssp_table, dest_vertex, 
path_table, **kwargs):
     _assert(not table_is_empty(sssp_table),
             "Graph SSSP: SSSP table ({0}) is empty!".format(sssp_table))
 
-    summary = sssp_table + "_summary"
-    _assert(table_exists(summary),
-            "Graph SSSP: SSSP summary table ({0}) is missing!".format(summary))
-    _assert(not table_is_empty(summary),
-            "Graph SSSP: SSSP summary table ({0}) is empty!".format(summary))
+    summary_table = add_postfix(sssp_table, "_summary")
+    _assert(table_exists(summary_table),
+            "Graph SSSP: SSSP summary table ({0}) is 
missing!".format(summary_table))
+    _assert(not table_is_empty(summary_table),
+            "Graph SSSP: SSSP summary table ({0}) is 
empty!".format(summary_table))
 
     _assert(not table_exists(path_table),
             "Graph SSSP: Output path table already exists!")

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/a1f98033/src/ports/postgres/modules/graph/test/sssp.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in 
b/src/ports/postgres/modules/graph/test/sssp.sql_in
index c3545c2..621d55d 100644
--- a/src/ports/postgres/modules/graph/test/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/test/sssp.sql_in
@@ -23,8 +23,8 @@
 DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path,
        vertex_alt,edge_alt,out_alt,out_alot_summary,
        edge_gr,out_gr,out_gr_summary,out_gr_path,
-       edge_gr2, out_gr2, out_gr2_summary;
-
+       edge_gr2, out_gr2, out_gr2_summary,
+       "edge_Q", "out_Q", "out_Q_summary", "out_Q_path";
 
 CREATE TABLE vertex(
                   id INTEGER
@@ -126,8 +126,17 @@ CREATE TABLE edge_gr2 AS
 
 SELECT graph_sssp('vertex',NULL,'edge_gr2',NULL,0,'out_gr2','grp1,grp2');
 
-
 SELECT assert(weight = 3, 'Wrong output in graph (SSSP)')
        FROM out_gr2 WHERE id = 6 AND grp1 = 0 AND grp2 = 0;
 SELECT assert(parent = 5, 'Wrong parent in graph (SSSP)')
        FROM out_gr2 WHERE id = 6 AND grp1 = 0 AND grp2 = 0;
+
+CREATE TABLE "edge_Q" AS SELECT src, dest AS "dest_Q", weight FROM edge;
+
+SELECT graph_sssp('vertex','','"edge_Q"','dest="dest_Q"',0,'"out_Q"','');
+
+SELECT * FROM "out_Q";
+SELECT * FROM "out_Q_summary";
+SELECT graph_sssp_get_path('"out_Q"',5,'"out_Q_path"');
+
+SELECT * FROM "out_Q_path";

incubator-madlib git commit: Graph: Fix multiple bugs

Reply via email to