Repository: incubator-madlib Updated Branches: refs/heads/master 06788cc48 -> a1f980336
Graph: Fix multiple bugs Fix quoted output table name bug Fix empty string arguments bug Closes #154 Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/a1f98033 Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/a1f98033 Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/a1f98033 Branch: refs/heads/master Commit: a1f9803360473bb671516ee6afa04216f834c04f Parents: 06788cc Author: Orhan Kislal <okis...@pivotal.io> Authored: Tue Aug 1 13:31:20 2017 -0700 Committer: Orhan Kislal <okis...@pivotal.io> Committed: Wed Aug 2 11:34:08 2017 -0700 ---------------------------------------------------------------------- src/ports/postgres/modules/graph/apsp.py_in | 47 ++--- src/ports/postgres/modules/graph/bfs.py_in | 171 ++++++++++--------- src/ports/postgres/modules/graph/pagerank.py_in | 6 +- src/ports/postgres/modules/graph/sssp.py_in | 52 +++--- .../postgres/modules/graph/test/sssp.sql_in | 15 +- 5 files changed, 161 insertions(+), 130 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/a1f98033/src/ports/postgres/modules/graph/apsp.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/graph/apsp.py_in b/src/ports/postgres/modules/graph/apsp.py_in index 42ee698..be18a61 100644 --- a/src/ports/postgres/modules/graph/apsp.py_in +++ b/src/ports/postgres/modules/graph/apsp.py_in @@ -35,6 +35,7 @@ from graph_utils import _grp_from_table from graph_utils import _check_groups from utilities.control import MinWarning from utilities.utilities import _assert +from utilities.utilities import add_postfix from utilities.utilities import extract_keyvalue_params from utilities.utilities import unique_string from utilities.utilities import split_quoted_delimited_str @@ -72,16 +73,19 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, edge_table, edge_params = extract_keyvalue_params(edge_args, params_types, default_args) # Prepare the input for recording in the summary table - if vertex_id is None: + + if not vertex_id: v_st = '' vertex_id = "id" else: v_st = vertex_id - if edge_args is None: + + if not edge_args: e_st = '' else: e_st = edge_args - if grouping_cols is None: + + if not grouping_cols: g_st = '' glist = None else: @@ -162,7 +166,8 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, edge_table, # We keep a summary table to keep track of the parameters used for this # APSP run. This table is used in the path finding function to eliminate # the need for repetition. - plpy.execute(""" CREATE TABLE {out_table}_summary ( + summary_table = add_postfix(out_table, "_summary") + plpy.execute(""" CREATE TABLE {summary_table} ( vertex_table TEXT, vertex_id TEXT, edge_table TEXT, @@ -170,7 +175,7 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, edge_table, out_table TEXT, grouping_cols TEXT) """.format(**locals())) - plpy.execute(""" INSERT INTO {out_table}_summary VALUES + plpy.execute(""" INSERT INTO {summary_table} VALUES ('{vertex_table}', '{v_st}', '{edge_table}', '{e_st}', '{out_table}', '{g_st}') """.format(**locals())) @@ -308,7 +313,7 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, edge_table, if v_cnt < 2: plpy.execute("DROP VIEW IF EXISTS {0}".format(tmp_view)) plpy.execute("DROP TABLE IF EXISTS {0},{1}". - format(out_table, out_table + "_summary")) + format(out_table, summary_table)) if is_hawq: plpy.execute("DROP TABLE IF EXISTS {0},{1}".format( out_table_1, out_table_2)) @@ -428,10 +433,10 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, edge_table, if i == v_cnt: # If there are no groups, clean up and give error. - if grouping_cols is None: + if not grouping_cols: plpy.execute("DROP VIEW IF EXISTS {0}".format(tmp_view)) plpy.execute("DROP TABLE IF EXISTS {0},{1}". - format(out_table, out_table + "_summary")) + format(out_table, summary_table)) if is_hawq: plpy.execute("DROP TABLE IF EXISTS {0},{1}".format( out_table_1, out_table_2)) @@ -476,7 +481,7 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, edge_table, # drop the output table as well. if table_is_empty(out_table): plpy.execute("DROP TABLE IF EXISTS {0},{1}". - format(out_table, out_table + "_summary")) + format(out_table, summary_table)) plpy.warning( """Graph APSP: Detected a negative cycle in the """ + """sub-graphs of following groups: {0}.""". @@ -509,8 +514,8 @@ def graph_apsp_get_path(schema_madlib, apsp_table, temp1_name = unique_string(desp='temp1') temp2_name = unique_string(desp='temp2') - - summary = plpy.execute("SELECT * FROM {0}_summary".format(apsp_table)) + summary_table = add_postfix(apsp_table, "_summary") + summary = plpy.execute("SELECT * FROM {0}".format(summary_table)) vertex_id = summary[0]['vertex_id'] edge_args = summary[0]['edge_args'] grouping_cols = summary[0]['grouping_cols'] @@ -525,10 +530,11 @@ def graph_apsp_get_path(schema_madlib, apsp_table, dest = edge_params["dest"] weight = edge_params["weight"] - if not vertex_id or vertex_id == "NULL": + + if not vertex_id: vertex_id = "id" - if not grouping_cols or grouping_cols == "NULL": + if not grouping_cols: grouping_cols = None select_grps = "" @@ -537,7 +543,7 @@ def graph_apsp_get_path(schema_madlib, apsp_table, grp_comma = "" tmp = "" - if grouping_cols is not None: + if grouping_cols: glist = split_quoted_delimited_str(grouping_cols) select_grps = _grp_from_table(apsp_table, glist) + " , " check_grps_t1 = " AND " + _check_groups( @@ -660,7 +666,8 @@ def _validate_apsp(vertex_table, vertex_id, edge_table, edge_params, """Graph APSP: Source vertex table {vertex_table} contains duplicate vertex id's.""". format(**locals())) - _assert(not table_exists(out_table + "_summary"), + summary_table = add_postfix(out_table, "_summary") + _assert(not table_exists(summary_table), "Graph APSP: Output summary table already exists!") if glist is not None: @@ -679,11 +686,11 @@ def _validate_get_path(apsp_table, source_vertex, dest_vertex, _assert(not table_is_empty(apsp_table), "Graph APSP: APSP table ({0}) is empty!".format(apsp_table)) - summary = apsp_table + "_summary" - _assert(table_exists(summary), - "Graph APSP: APSP summary table ({0}) is missing!".format(summary)) - _assert(not table_is_empty(summary), - "Graph APSP: APSP summary table ({0}) is empty!".format(summary)) + summary_table = add_postfix(apsp_table, "_summary") + _assert(table_exists(summary_table), + "Graph APSP: APSP summary table ({0}) is missing!".format(summary_table)) + _assert(not table_is_empty(summary_table), + "Graph APSP: APSP summary table ({0}) is empty!".format(summary_table)) _assert(not table_exists(path_table), "Graph APSP: Output path table already exists!") http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/a1f98033/src/ports/postgres/modules/graph/bfs.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/graph/bfs.py_in b/src/ports/postgres/modules/graph/bfs.py_in index a5631ea..ea03cf1 100644 --- a/src/ports/postgres/modules/graph/bfs.py_in +++ b/src/ports/postgres/modules/graph/bfs.py_in @@ -33,6 +33,7 @@ from graph_utils import get_graph_usage from graph_utils import _grp_null_checks from utilities.control import MinWarning from utilities.utilities import _assert +from utilities.utilities import add_postfix from utilities.utilities import extract_keyvalue_params from utilities.utilities import split_quoted_delimited_str from utilities.validate_args import table_exists @@ -47,8 +48,8 @@ def _validate_bfs(vertex_table, vertex_id, edge_table, edge_params, out_table,'BFS') _assert((max_distance >= 0) and isinstance(max_distance,int), - """Graph BFS: Invalid max_distance type or value ({0}), must be integer, - be greater than or equal to 0 and be less than max allowable integer + """Graph BFS: Invalid max_distance type or value ({0}), must be integer, + be greater than or equal to 0 and be less than max allowable integer (2147483647).""". format(max_distance)) @@ -64,7 +65,7 @@ def _validate_bfs(vertex_table, vertex_id, edge_table, edge_params, """.format(**locals())) if src_exists.nrows() == 0: plpy.error( - """Graph BFS: Source vertex {source_vertex} is not present in the + """Graph BFS: Source vertex {source_vertex} is not present in the vertex table {vertex_table}.""". format(**locals())) @@ -76,16 +77,17 @@ def _validate_bfs(vertex_table, vertex_id, edge_table, edge_params, HAVING count(*) > 1 """.format(**locals())) if vt_error.nrows() != 0: plpy.error( - """Graph BFS: Source vertex table {vertex_table} contains duplicate + """Graph BFS: Source vertex table {vertex_table} contains duplicate vertex id's.""". format(**locals())) - _assert(not table_exists(out_table+"_summary"), + summary_table = add_postfix(out_table, "_summary") + _assert(not table_exists(summary_table), "Graph BFS: Output summary table already exists!") if grouping_cols_list is not None: _assert(columns_exist_in_table(edge_table, grouping_cols_list), - """Graph BFS: Not all columns from {grouping_cols_list} are present + """Graph BFS: Not all columns from {grouping_cols_list} are present in edge table ({edge_table}).""". format(**locals())) @@ -107,7 +109,7 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, @param source_vertex The source vertex id for the algorithm to start. @param out_table Name of the table to store the result of BFS. @param max_distance Maximum distance from the source_vertex to search for. - @param directed Graph will be treated as directed if this boolean flag + @param directed Graph will be treated as directed if this boolean flag is set to TRUE. Graph is treated as undirected by default. @param grouping_cols The list of grouping columns. @@ -125,35 +127,39 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, default_args) # Prepare the input for recording in the summary table - if vertex_id is None: - v_st= "NULL" + if not vertex_id: + v_st = '' vertex_id = "id" else: v_st = vertex_id - if edge_args is None: - e_st = "NULL" + + if not edge_args: + e_st = '' else: e_st = edge_args + + if not grouping_cols: + g_st = '' + glist = None + else: + g_st = grouping_cols + glist = split_quoted_delimited_str(grouping_cols) + if max_distance is None: d_st= "NULL" max_distance = INT_MAX else: d_st = max_distance + if directed is None: dir_st= "NULL" directed = False else: dir_st = directed - if grouping_cols is None or grouping_cols is '': - g_st = "NULL" - glist = None - else: - g_st = grouping_cols - glist = split_quoted_delimited_str(grouping_cols) src = edge_params["src"] dest = edge_params["dest"] - + distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>, <!"DISTRIBUTED BY ({0})".format(vertex_id)!>) local_distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>, @@ -166,10 +172,10 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, grp_comma = "" and_grp_null_checks = "" - if grouping_cols is not None and grouping_cols is not '': + if grouping_cols and grouping_cols is not '': grp_comma = grouping_cols + ", " and_grp_null_checks = " AND " + _grp_null_checks(glist) - + # We keep a table of every vertex, the distance to that vertex from source # and the parent in the path to the vertex. # This table will be updated throughout the execution. @@ -179,20 +185,21 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, # Creating the output table with the appropriate columns and data types plpy.execute(""" - CREATE TABLE {out_table} AS ( + CREATE TABLE {out_table} AS ( SELECT - {grp_comma} - {src} AS {vertex_id}, + {grp_comma} + {src} AS {vertex_id}, {curr_dist_val}::INT AS {dist_col}, {src} AS {parent_col} - FROM {edge_table} + FROM {edge_table} LIMIT 0 ) {distribution}""".format(**locals())) # We keep a summary table to keep track of the parameters used for this # BFS run - plpy.execute( """ - CREATE TABLE {out_table}_summary ( + summary_table = add_postfix(out_table, "_summary") + plpy.execute( """ + CREATE TABLE {summary_table} ( vertex_table TEXT, vertex_id TEXT, edge_table TEXT, @@ -204,13 +211,13 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, grouping_cols TEXT )""".format(**locals())) - plpy.execute(""" - INSERT INTO {out_table}_summary VALUES + plpy.execute(""" + INSERT INTO {summary_table} VALUES ('{vertex_table}', '{v_st}', '{edge_table}', '{e_st}', {source_vertex}, '{out_table}', {d_st}, {dir_st}, '{g_st}') """.format(**locals())) - + # The queries for directed and undirected graphs share a common section. # There are additional clauses added to the undirected graph queries. # In the undirected case edges can be considered to go from {src} to @@ -225,11 +232,11 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, """.format(**locals()) count_qry_undirected = """ OR ( - ({grp_comma} {dest}) IN ( - SELECT {grp_comma} {vertex_id} FROM {out_table} + ({grp_comma} {dest}) IN ( + SELECT {grp_comma} {vertex_id} FROM {out_table} WHERE {dist_col}={{curr_dist_val}} - ) - AND + ) + AND ({grp_comma} {src}) NOT IN ( SELECT {grp_comma} {vertex_id} FROM {out_table} ) @@ -237,30 +244,30 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, """.format(**locals()) insert_qry_undirected_loop = """ UNION - SELECT {grp_comma} - {src} AS {vertex_id}, - {{curr_dist_val}}+1 AS {dist_col}, + SELECT {grp_comma} + {src} AS {vertex_id}, + {{curr_dist_val}}+1 AS {dist_col}, {dest} AS {parent_col} - FROM {edge_table} + FROM {edge_table} WHERE ( ({grp_comma} {dest}) IN ( - SELECT {grp_comma} {vertex_id} FROM {out_table} + SELECT {grp_comma} {vertex_id} FROM {out_table} WHERE {dist_col}={{curr_dist_val}} - ) - AND + ) + AND ({grp_comma} {src}) NOT IN ( SELECT {grp_comma} {vertex_id} FROM {out_table} ) ) """.format(**locals()) - # This step inserts into the output table the source vertex for each + # This step inserts into the output table the source vertex for each # group in which it is present. Grouping behavior is not predictable # when there are NULLs in any grouping column. Therefore those rows # are explicitly removed from analysis insert_qry_init = """ INSERT INTO {out_table} - SELECT {grp_comma} + SELECT {grp_comma} {source_vertex} AS {vertex_id}, {curr_dist_val} AS {dist_col}, NULL AS {parent_col} @@ -272,17 +279,17 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, plpy.execute(insert_qry_init.format(**locals())) # After initialization of the output table, number of nodes connected - # by edges to the source vertex in each group is counted. This is also used + # by edges to the source vertex in each group is counted. This is also used # below in the BFS iteration while-loop count_qry = """ - SELECT COUNT(*) - FROM {edge_table} + SELECT COUNT(*) + FROM {edge_table} WHERE ( - ({grp_comma} {src}) IN ( - SELECT {grp_comma} {vertex_id} FROM {out_table} + ({grp_comma} {src}) IN ( + SELECT {grp_comma} {vertex_id} FROM {out_table} WHERE {dist_col}={{curr_dist_val}} - ) - AND + ) + AND ({grp_comma} {dest}) NOT IN ( SELECT {grp_comma} {vertex_id} FROM {out_table} ) @@ -291,24 +298,24 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, vct = plpy.execute(count_qry.format(**locals()))[0]['count'] - # This insert statement is executed within the BFS iteration while-loop - # below. It is used to discover and store all nodes (not already found) + # This insert statement is executed within the BFS iteration while-loop + # below. It is used to discover and store all nodes (not already found) # connected to those found in the immediate previous iteration. insert_qry_loop = """ INSERT INTO {out_table} SELECT {grp_comma} {vertex_id}, {dist_col}, min({parent_col}) FROM ( - SELECT {grp_comma} - {dest} AS {vertex_id}, - {{curr_dist_val}}+1 AS {dist_col}, + SELECT {grp_comma} + {dest} AS {vertex_id}, + {{curr_dist_val}}+1 AS {dist_col}, {src} AS {parent_col} - FROM {edge_table} + FROM {edge_table} WHERE ( ({grp_comma} {src}) IN ( - SELECT {grp_comma} {vertex_id} FROM {out_table} + SELECT {grp_comma} {vertex_id} FROM {out_table} WHERE {dist_col}={{curr_dist_val}} - ) - AND + ) + AND ({grp_comma} {dest}) NOT IN ( SELECT {grp_comma} {vertex_id} FROM {out_table} ) @@ -321,41 +328,41 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, # Main loop for traversing the graph while vct > 0 and curr_dist_val < max_distance: # The loop consists of two steps: - # 1) Disover and store all nodes that are linked to nodes found in - # the immediate previous iteration of the loop that have not already - # been found in all previous iterations + # 1) Disover and store all nodes that are linked to nodes found in + # the immediate previous iteration of the loop that have not already + # been found in all previous iterations # 2) Check for any nodes linked to those discovered in Step 1 above # that have not yet been discovered - # - # If a node has multiple possible parents then the parent with the + # + # If a node has multiple possible parents then the parent with the # smallest ID is chosen for output - # In the directed graph case only nodes in the {dest} column of - # the edge table are searched to find new nodes reachable from + # In the directed graph case only nodes in the {dest} column of + # the edge table are searched to find new nodes reachable from # previously discovered nodes - # In the undirected graph case edges are treated as non-directional - # (or bidirectional). Nodes in both the {src} and {dest} columns of - # the edge table are searched to find new nodes reachable from + # In the undirected graph case edges are treated as non-directional + # (or bidirectional). Nodes in both the {src} and {dest} columns of + # the edge table are searched to find new nodes reachable from # previously discovered nodes. # # This approach does NOT require the user to provide a forward edge - # and a reverse edge between the same two nodes to indicate the + # and a reverse edge between the same two nodes to indicate the # graph's undirected nature. However, it will work in that scenario # as well. - # Discover and store all nodes (not already found) connected to + # Discover and store all nodes (not already found) connected to # those found in the immediate previous iteration plpy.execute(insert_qry_loop.format(**locals())) # Update distance value for next iteration curr_dist_val = curr_dist_val + 1 - # Count / find any nodes that are connected to those discovered and + # Count / find any nodes that are connected to those discovered and # stored in this iteration. This is used to check if the iterations # need to continue. vct = plpy.execute(count_qry.format(**locals()))[0]['count'] - + return None def graph_bfs_help(schema_madlib, message, **kwargs): @@ -392,18 +399,18 @@ finds all nodes reachable from the source vertex. ---------------------------------------------------------------------------- OUTPUT ---------------------------------------------------------------------------- -The output of BFS ('out_table' above) contains a row for every vertex of that is -reachable from the source_vertex. In the presence of grouping columns, only those +The output of BFS ('out_table' above) contains a row for every vertex of that is +reachable from the source_vertex. In the presence of grouping columns, only those edges are used for which there are no NULL values in any grouping column. -The output table will have the following columns (in addition to the +The output table will have the following columns (in addition to the grouping columns): - - vertex_id : The id for any node reachable from source_vertex in addition to - the source_vertex. Will use the input parameter 'vertex_id' for + - vertex_id : The id for any node reachable from source_vertex in addition to + the source_vertex. Will use the input parameter 'vertex_id' for column naming. - - dist : The distance in number of edges (or hops) from the source_vertex - to where this vertex is located. - - parent : The parent of this vertex in BFS traversal of the graph from - source_vertex. Will use 'parent' for column naming. For the + - dist : The distance in number of edges (or hops) from the source_vertex + to where this vertex is located. + - parent : The parent of this vertex in BFS traversal of the graph from + source_vertex. Will use 'parent' for column naming. For the case where vertex_id = source_vertex, the value for parent is NULL. """ elif message.lower() in ("example", "examples"): @@ -457,7 +464,7 @@ SELECT madlib.graph_bfs( NULL, -- Edge arguments (NULL means use default naming) 3, -- Source vertex for BFS 'out' -- Output table of nodes reachable from source_vertex - ); + ); -- Default values used for the other arguments SELECT * FROM out ORDER BY dist,id; http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/a1f98033/src/ports/postgres/modules/graph/pagerank.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in index 95bc4b4..7768de1 100644 --- a/src/ports/postgres/modules/graph/pagerank.py_in +++ b/src/ports/postgres/modules/graph/pagerank.py_in @@ -30,6 +30,7 @@ import plpy from utilities.control import MinWarning from utilities.utilities import _assert +from utilities.utilities import add_postfix from utilities.utilities import extract_keyvalue_params from utilities.utilities import unique_string, split_quoted_delimited_str from utilities.utilities import is_platform_pg @@ -88,7 +89,7 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, damping_factor = 0.85 if max_iter is None: max_iter = 100 - if vertex_id is None: + if not vertex_id: vertex_id = "id" if not grouping_cols: grouping_cols = '' @@ -97,7 +98,8 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, validate_pagerank_args(schema_madlib, vertex_table, vertex_id, edge_table, edge_params, out_table, damping_factor, max_iter, threshold, grouping_cols_list) - summary_table = out_table + "_summary" + + summary_table = add_postfix(out_table, "_summary") _assert(not table_exists(summary_table), "Graph PageRank: Output summary table ({summary_table}) already exists." .format(**locals())) http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/a1f98033/src/ports/postgres/modules/graph/sssp.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/graph/sssp.py_in b/src/ports/postgres/modules/graph/sssp.py_in index 93497c4..77ff1ff 100644 --- a/src/ports/postgres/modules/graph/sssp.py_in +++ b/src/ports/postgres/modules/graph/sssp.py_in @@ -35,6 +35,7 @@ from graph_utils import _check_groups from utilities.control import MinWarning from utilities.utilities import _assert +from utilities.utilities import add_postfix from utilities.utilities import extract_keyvalue_params from utilities.utilities import unique_string from utilities.utilities import split_quoted_delimited_str @@ -82,17 +83,19 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table, default_args) # Prepare the input for recording in the summary table - if vertex_id is None: - v_st = "NULL" + if not vertex_id: + v_st = '' vertex_id = "id" else: v_st = vertex_id - if edge_args is None: - e_st = "NULL" + + if not edge_args: + e_st = '' else: e_st = edge_args - if grouping_cols is None: - g_st = "NULL" + + if not grouping_cols: + g_st = '' glist = None else: g_st = grouping_cols @@ -124,7 +127,7 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table, checkg_om = "" group_by = "" - if grouping_cols is not None: + if grouping_cols: comma_grp = " , " + grouping_cols group_by = " , " + _grp_from_table(edge_table, glist) comma_grp_e = " , " + _grp_from_table(edge_table, glist) @@ -154,7 +157,8 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table, # We keep a summary table to keep track of the parameters used for this # SSSP run. This table is used in the path finding function to eliminate # the need for repetition. - plpy.execute(""" CREATE TABLE {out_table}_summary ( + summary_table = add_postfix(out_table, "_summary") + plpy.execute(""" CREATE TABLE {summary_table} ( vertex_table TEXT, vertex_id TEXT, edge_table TEXT, @@ -163,7 +167,7 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table, out_table TEXT, grouping_cols TEXT) """.format(**locals())) - plpy.execute(""" INSERT INTO {out_table}_summary VALUES + plpy.execute(""" INSERT INTO {summary_table} VALUES ('{vertex_table}', '{v_st}', '{edge_table}', '{e_st}', {source_vertex}, '{out_table}', '{g_st}') """.format(**locals())) @@ -207,7 +211,7 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table, # Example: # Assume there are two grouping columns g1 and g2 # g1 values are 0 and 1. g2 values are 5 and 6 - if grouping_cols is not None: + if grouping_cols: distinct_grp_table = unique_string(desp='grp') plpy.execute("DROP TABLE IF EXISTS {distinct_grp_table}". @@ -378,9 +382,9 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table, # The algorithm should converge in less than |V| iterations. # Otherwise there is a negative cycle in the graph. if i == v_cnt: - if grouping_cols is None: + if not grouping_cols: plpy.execute("DROP TABLE IF EXISTS {0},{1},{2}". - format(out_table, out_table + "_summary", oldupdate)) + format(out_table, summary_table, oldupdate)) if is_hawq: plpy.execute("DROP TABLE IF EXISTS {0}".format(temp_table)) plpy.error("Graph SSSP: Detected a negative cycle in the graph.") @@ -423,7 +427,7 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table, # drop the output table as well. if table_is_empty(out_table): plpy.execute("DROP TABLE IF EXISTS {0},{1}". - format(out_table, out_table + "_summary")) + format(out_table, summary_table)) plpy.warning( """Graph SSSP: Detected a negative cycle in the """ + @@ -460,18 +464,19 @@ def graph_sssp_get_path(schema_madlib, sssp_table, dest_vertex, path_table, grp_comma = "" tmp = "" - summary = plpy.execute("SELECT * FROM {0}_summary".format(sssp_table)) + summary_table = add_postfix(sssp_table, "_summary") + summary = plpy.execute("SELECT * FROM {0}".format(summary_table)) vertex_id = summary[0]['vertex_id'] source_vertex = summary[0]['source_vertex'] - if vertex_id == "NULL": + if not vertex_id: vertex_id = "id" grouping_cols = summary[0]['grouping_cols'] - if grouping_cols == "NULL": + if not grouping_cols: grouping_cols = None - if grouping_cols is not None: + if grouping_cols: glist = split_quoted_delimited_str(grouping_cols) select_grps = _grp_from_table(sssp_table, glist) + " , " check_grps_t1 = " AND " + _check_groups( @@ -583,7 +588,8 @@ def _validate_sssp(vertex_table, vertex_id, edge_table, edge_params, plpy.error("Graph SSSP: Source vertex table {vertex_table} " "contains duplicate vertex id's.".format(**locals())) - _assert(not table_exists(out_table + "_summary"), + summary_table = add_postfix(out_table, "_summary") + _assert(not table_exists(summary_table), "Graph SSSP: Output summary table already exists!") if glist is not None: @@ -602,11 +608,11 @@ def _validate_get_path(sssp_table, dest_vertex, path_table, **kwargs): _assert(not table_is_empty(sssp_table), "Graph SSSP: SSSP table ({0}) is empty!".format(sssp_table)) - summary = sssp_table + "_summary" - _assert(table_exists(summary), - "Graph SSSP: SSSP summary table ({0}) is missing!".format(summary)) - _assert(not table_is_empty(summary), - "Graph SSSP: SSSP summary table ({0}) is empty!".format(summary)) + summary_table = add_postfix(sssp_table, "_summary") + _assert(table_exists(summary_table), + "Graph SSSP: SSSP summary table ({0}) is missing!".format(summary_table)) + _assert(not table_is_empty(summary_table), + "Graph SSSP: SSSP summary table ({0}) is empty!".format(summary_table)) _assert(not table_exists(path_table), "Graph SSSP: Output path table already exists!") http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/a1f98033/src/ports/postgres/modules/graph/test/sssp.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in b/src/ports/postgres/modules/graph/test/sssp.sql_in index c3545c2..621d55d 100644 --- a/src/ports/postgres/modules/graph/test/sssp.sql_in +++ b/src/ports/postgres/modules/graph/test/sssp.sql_in @@ -23,8 +23,8 @@ DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path, vertex_alt,edge_alt,out_alt,out_alot_summary, edge_gr,out_gr,out_gr_summary,out_gr_path, - edge_gr2, out_gr2, out_gr2_summary; - + edge_gr2, out_gr2, out_gr2_summary, + "edge_Q", "out_Q", "out_Q_summary", "out_Q_path"; CREATE TABLE vertex( id INTEGER @@ -126,8 +126,17 @@ CREATE TABLE edge_gr2 AS SELECT graph_sssp('vertex',NULL,'edge_gr2',NULL,0,'out_gr2','grp1,grp2'); - SELECT assert(weight = 3, 'Wrong output in graph (SSSP)') FROM out_gr2 WHERE id = 6 AND grp1 = 0 AND grp2 = 0; SELECT assert(parent = 5, 'Wrong parent in graph (SSSP)') FROM out_gr2 WHERE id = 6 AND grp1 = 0 AND grp2 = 0; + +CREATE TABLE "edge_Q" AS SELECT src, dest AS "dest_Q", weight FROM edge; + +SELECT graph_sssp('vertex','','"edge_Q"','dest="dest_Q"',0,'"out_Q"',''); + +SELECT * FROM "out_Q"; +SELECT * FROM "out_Q_summary"; +SELECT graph_sssp_get_path('"out_Q"',5,'"out_Q_path"'); + +SELECT * FROM "out_Q_path";