This is an automated email from the ASF dual-hosted git repository. okislal pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit a4b3e9d8f5d667e558917fd16847f6a687edd64e Author: Orhan Kislal <[email protected]> AuthorDate: Tue Mar 3 18:42:39 2020 -0500 Graph: Fix output table schema parsing bug JIRA: MADLIB-1411 HITS and Pagerank functions failed if the output table had a schema attached to it. This error was caused by the final alter table operation, since postgres does not allow users to alter a tables schema. This commit changes this operation to create table and adds tests to every graph function to make sure we don't have similar bugs in the future. In addition, the new tests also test for the bigint related changes from the previous commit. --- src/ports/postgres/modules/graph/hits.py_in | 11 +++---- src/ports/postgres/modules/graph/pagerank.py_in | 6 ++-- src/ports/postgres/modules/graph/test/apsp.sql_in | 10 ++++++ src/ports/postgres/modules/graph/test/bfs.sql_in | 9 ++++++ src/ports/postgres/modules/graph/test/hits.sql_in | 9 ++++++ .../postgres/modules/graph/test/measures.sql_in | 36 +++++++++++----------- .../postgres/modules/graph/test/pagerank.sql_in | 9 ++++++ src/ports/postgres/modules/graph/test/sssp.sql_in | 10 ++++++ src/ports/postgres/modules/graph/test/wcc.sql_in | 9 ++++++ src/ports/postgres/modules/graph/wcc.py_in | 4 ++- 10 files changed, 84 insertions(+), 29 deletions(-) diff --git a/src/ports/postgres/modules/graph/hits.py_in b/src/ports/postgres/modules/graph/hits.py_in index 4864160..f792f15 100644 --- a/src/ports/postgres/modules/graph/hits.py_in +++ b/src/ports/postgres/modules/graph/hits.py_in @@ -47,6 +47,7 @@ from utilities.utilities import is_platform_pg from utilities.validate_args import columns_exist_in_table, drop_tables from utilities.validate_args import get_cols_and_types, table_exists +from utilities.validate_args import rename_table def validate_hits_args(schema_madlib, vertex_table, vertex_id, edge_table, edge_params, out_table, max_iter, threshold, @@ -334,7 +335,7 @@ def hits(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, if converged: break - update_final_results(converged, threshold, cur, temp_summary_table, + update_final_results(schema_madlib, converged, threshold, cur, temp_summary_table, iteration_num, summary_table, out_table, grouping_cols_list, cur_unconv, distinct_grp_table) @@ -345,7 +346,7 @@ def hits(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, drop_tables([distinct_grp_table, temp_summary_table]) -def update_final_results(converged, threshold, cur, temp_summary_table, +def update_final_results(schema_madlib, converged, threshold, cur, temp_summary_table, iteration_num, summary_table, out_table, grouping_cols_list, cur_unconv, distinct_grp_table): """ @@ -374,10 +375,8 @@ def update_final_results(converged, threshold, cur, temp_summary_table, grouping_cols_list, distinct_grp_table) else: - plpy.execute(""" - ALTER TABLE {table_name} - RENAME TO {out_table} - """.format(table_name=cur, **locals())) + + rename_table(schema_madlib, cur, out_table) plpy.execute(""" INSERT INTO {summary_table} VALUES ({iteration_num}+1) diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in index a0f7319..41f25de 100644 --- a/src/ports/postgres/modules/graph/pagerank.py_in +++ b/src/ports/postgres/modules/graph/pagerank.py_in @@ -49,6 +49,7 @@ from utilities.utilities import py_list_to_sql_string from utilities.validate_args import columns_exist_in_table, get_cols_and_types from utilities.validate_args import table_exists +from utilities.validate_args import rename_table def validate_pagerank_args(schema_madlib, vertex_table, vertex_id, edge_table, @@ -671,10 +672,7 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_ """.format(table_name=cur, total_ppr_nodes=total_ppr_nodes)) - plpy.execute(""" - ALTER TABLE {table_name} - RENAME TO {out_table} - """.format(table_name=cur, **locals())) + rename_table(schema_madlib, cur, out_table) plpy.execute(""" INSERT INTO {summary_table} VALUES ({iteration_num}+1) diff --git a/src/ports/postgres/modules/graph/test/apsp.sql_in b/src/ports/postgres/modules/graph/test/apsp.sql_in index c7bb163..f98b76b 100644 --- a/src/ports/postgres/modules/graph/test/apsp.sql_in +++ b/src/ports/postgres/modules/graph/test/apsp.sql_in @@ -109,3 +109,13 @@ ALTER TABLE vertex RENAME COLUMN src TO "DEST"; SELECT graph_apsp('vertex','"DEST"','edge_gr','dest="DEST"','out','grp'); SELECT * FROM out ORDER BY src,"DEST"; + +ALTER TABLE vertex RENAME COLUMN "DEST" TO id; + +-- Test for bigint columns +CREATE TABLE v2 AS SELECT id::bigint FROM vertex; +CREATE TABLE e2 AS SELECT src::bigint, "DEST"::bigint, weight FROM "EDGE"; + +DROP TABLE IF EXISTS public.out2, public.out2_summary, public.out2_path; +SELECT graph_apsp('v2',NULL,'e2','dest="DEST"','public.out2'); +SELECT graph_apsp_get_path('public.out2',0,7,'public.out2_path'); diff --git a/src/ports/postgres/modules/graph/test/bfs.sql_in b/src/ports/postgres/modules/graph/test/bfs.sql_in index 052d6b2..5a32b96 100644 --- a/src/ports/postgres/modules/graph/test/bfs.sql_in +++ b/src/ports/postgres/modules/graph/test/bfs.sql_in @@ -287,3 +287,12 @@ ALTER TABLE vertex RENAME COLUMN "SRC" TO dest; SELECT graph_bfs('vertex','dest','edge_grp','src="SRC"',3,'out',NULL,NULL,'g1'); SELECT * FROM out; + +ALTER TABLE vertex RENAME COLUMN dest TO id; + +-- Test for bigint columns +CREATE TABLE v2 AS SELECT id::bigint FROM vertex; +CREATE TABLE e2 AS SELECT "SRC"::bigint, dest::bigint, weight FROM "EDGE"; + +DROP TABLE IF EXISTS public.out2, public.out2_summary; +SELECT graph_bfs('v2',NULL,'e2','src="SRC"',3,'public.out2'); diff --git a/src/ports/postgres/modules/graph/test/hits.sql_in b/src/ports/postgres/modules/graph/test/hits.sql_in index 32b56e2..3be0688 100644 --- a/src/ports/postgres/modules/graph/test/hits.sql_in +++ b/src/ports/postgres/modules/graph/test/hits.sql_in @@ -169,3 +169,12 @@ ALTER TABLE vertex RENAME COLUMN src TO dest; SELECT hits('vertex','dest','edge',NULL,'out',3,0.01,'user_id'); SELECT * FROM out; + +ALTER TABLE vertex RENAME COLUMN dest TO id; + +-- Test for bigint columns +CREATE TABLE v2 AS SELECT id::bigint FROM vertex; +CREATE TABLE e2 AS SELECT src::bigint, dest::bigint FROM edge; + +DROP TABLE IF EXISTS public.out2, public.out2_summary; +SELECT hits('v2',NULL,'e2',NULL,'public.out2'); diff --git a/src/ports/postgres/modules/graph/test/measures.sql_in b/src/ports/postgres/modules/graph/test/measures.sql_in index 3b3886a..38e2e72 100644 --- a/src/ports/postgres/modules/graph/test/measures.sql_in +++ b/src/ports/postgres/modules/graph/test/measures.sql_in @@ -64,51 +64,51 @@ SELECT graph_apsp('vertex', -- Vertex table 'out_apsp'); -- Output table of shortest paths -- Compute the closeness measure for all nodes: -DROP TABLE IF EXISTS out_closeness; -SELECT graph_closeness('out_apsp', 'out_closeness'); -SELECT * FROM out_closeness; +DROP TABLE IF EXISTS public.__madlib__out_closeness; +SELECT graph_closeness('out_apsp', 'public.__madlib__out_closeness'); +SELECT * FROM public.__madlib__out_closeness; SELECT assert(relative_error(inverse_sum_dist, 0.04347) < 1e-2 and relative_error(inverse_avg_dist, 0.3043) < 1e-2 and relative_error(sum_inverse_dist, 3.6833) < 1e-2 and k_degree = 7, 'Incorrect value for closeness') -FROM out_closeness +FROM public.__madlib__out_closeness WHERE src_id = 0; -- Compute the diameter measure for graph -DROP TABLE IF EXISTS out_diameter; -SELECT graph_diameter('out_apsp', 'out_diameter'); -SELECT * FROM out_diameter; -SELECT assert(diameter=14, 'Invalid value for diameter') FROM out_diameter; +DROP TABLE IF EXISTS public.__madlib__out_diameter; +SELECT graph_diameter('out_apsp', 'public.__madlib__out_diameter'); +SELECT * FROM public.__madlib__out_diameter; +SELECT assert(diameter=14, 'Invalid value for diameter') FROM public.__madlib__out_diameter; -- Compute the average path length measure for graph -DROP TABLE IF EXISTS out_avg_path_length; -SELECT graph_avg_path_length('out_apsp', 'out_avg_path_length'); -SELECT * FROM out_avg_path_length; +DROP TABLE IF EXISTS public.__madlib__out_avg_path_length; +SELECT graph_avg_path_length('out_apsp', 'public.__madlib__out_avg_path_length'); +SELECT * FROM public.__madlib__out_avg_path_length; SELECT assert(relative_error(avg_path_length, 2.0178) < 1e-2, - 'Invalid value for avg_path_length') FROM out_avg_path_length; + 'Invalid value for avg_path_length') FROM public.__madlib__out_avg_path_length; -- Compute the in and out degrees -DROP TABLE IF EXISTS out_degrees; +DROP TABLE IF EXISTS public.__madlib__out_degrees; SELECT graph_vertex_degrees('vertex', -- Vertex table 'id', -- Vertix id column (NULL means use default naming) '"EDGE"', -- "EDGE" table 'src=src_id, dest="DEST_ID", weight=edge_weight', -- "EDGE" arguments (NULL means use default naming) - 'out_degrees'); -SELECT * FROM out_degrees; + 'public.__madlib__out_degrees'); +SELECT * FROM public.__madlib__out_degrees; SELECT assert(indegree = 2 and outdegree = 3, 'Invalid value for degrees') -FROM out_degrees +FROM public.__madlib__out_degrees WHERE id = 0; SELECT assert(COUNT(*)=1, 'Invalid value for node with only one incoming edge.') -FROM out_degrees +FROM public.__madlib__out_degrees WHERE id = 7; DELETE FROM "EDGE" WHERE "DEST_ID"=7; INSERT INTO "EDGE" VALUES (7,6,1); -DROP TABLE IF EXISTS out_degrees; +DROP TABLE IF EXISTS public.out_degrees; SELECT graph_vertex_degrees('vertex', -- Vertex table 'id', -- Vertix id column (NULL means use default naming) '"EDGE"', -- "EDGE" table diff --git a/src/ports/postgres/modules/graph/test/pagerank.sql_in b/src/ports/postgres/modules/graph/test/pagerank.sql_in index ba45c30..7c886e1 100644 --- a/src/ports/postgres/modules/graph/test/pagerank.sql_in +++ b/src/ports/postgres/modules/graph/test/pagerank.sql_in @@ -205,3 +205,12 @@ ALTER TABLE vertex RENAME COLUMN src TO dest; SELECT pagerank('vertex','dest','"EDGE"',NULL,'out',NULL,NULL,NULL,'user_id'); SELECT * FROM out; + +ALTER TABLE vertex RENAME COLUMN dest TO id; + +-- Test for bigint columns +CREATE TABLE v2 AS SELECT id::bigint FROM vertex; +CREATE TABLE e2 AS SELECT src::bigint, dest::bigint FROM "EDGE"; + +DROP TABLE IF EXISTS public.out2, public.out2_summary; +SELECT pagerank('v2',NULL,'e2',NULL,'public.out2'); diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in b/src/ports/postgres/modules/graph/test/sssp.sql_in index a147976..9f1a913 100644 --- a/src/ports/postgres/modules/graph/test/sssp.sql_in +++ b/src/ports/postgres/modules/graph/test/sssp.sql_in @@ -154,3 +154,13 @@ ALTER TABLE vertex RENAME COLUMN src TO dest; SELECT graph_sssp('vertex','dest','edge_gr',NULL,0,'out','grp'); SELECT * FROM out; + +ALTER TABLE vertex RENAME COLUMN dest TO id; + +-- Test for bigint columns +CREATE TABLE v2 AS SELECT id::bigint FROM vertex; +CREATE TABLE e2 AS SELECT src::bigint, dest::bigint, weight FROM "EDGE"; + +DROP TABLE IF EXISTS public.out2, public.out2_summary, public.out2_path; +SELECT graph_sssp('v2',NULL,'e2',NULL,0,'public.out2'); +SELECT graph_sssp_get_path('public.out2',5,'public.out2_path'); diff --git a/src/ports/postgres/modules/graph/test/wcc.sql_in b/src/ports/postgres/modules/graph/test/wcc.sql_in index 1b9dcaa..2cdbf6d 100644 --- a/src/ports/postgres/modules/graph/test/wcc.sql_in +++ b/src/ports/postgres/modules/graph/test/wcc.sql_in @@ -172,3 +172,12 @@ ALTER TABLE vertex RENAME COLUMN src TO dest; SELECT weakly_connected_components('vertex','dest','"EDGE"', 'src=src_node,dest=dest_node','out','user_id'); SELECT * FROM out; + +ALTER TABLE vertex RENAME COLUMN dest TO id; + +-- Test for bigint columns +CREATE TABLE v2 AS SELECT id::bigint FROM vertex; +CREATE TABLE e2 AS SELECT src_node::bigint, dest_node::bigint FROM "EDGE"; + +DROP TABLE IF EXISTS public.out2, public.out2_summary; +SELECT weakly_connected_components('v2',NULL,'e2','src=src_node,dest=dest_node','public.out2'); diff --git a/src/ports/postgres/modules/graph/wcc.py_in b/src/ports/postgres/modules/graph/wcc.py_in index 4b4b05d..4adc52e 100644 --- a/src/ports/postgres/modules/graph/wcc.py_in +++ b/src/ports/postgres/modules/graph/wcc.py_in @@ -37,6 +37,7 @@ from utilities.validate_args import columns_exist_in_table, get_expr_type from utilities.utilities import is_platform_pg from utilities.utilities import add_postfix from utilities.validate_args import table_exists +from utilities.validate_args import rename_table from utilities.control import MinWarning from graph_utils import validate_graph_coding, get_graph_usage from graph_utils import validate_output_and_summary_tables @@ -294,7 +295,8 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, if not is_platform_pg(): # Drop intermediate table created for Greenplum plpy.execute("DROP TABLE IF EXISTS {0}".format(edge_inverse)) - plpy.execute("ALTER TABLE {0} RENAME TO {1}".format(newupdate, out_table)) + + rename_table(schema_madlib, newupdate, out_table) # Create summary table. We only need the vertex_id and grouping columns # in it. plpy.execute("""
