This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit a4b3e9d8f5d667e558917fd16847f6a687edd64e
Author: Orhan Kislal <[email protected]>
AuthorDate: Tue Mar 3 18:42:39 2020 -0500

    Graph: Fix output table schema parsing bug
    
    JIRA: MADLIB-1411
    
    HITS and Pagerank functions failed if the output table had a schema
    attached to it. This error was caused by the final alter table operation,
    since postgres does not allow users to alter a tables schema.
    
    This commit changes this operation to create table and adds tests to
    every graph function to make sure we don't have similar bugs in the future.
    
    In addition, the new tests also test for the bigint related changes from
    the previous commit.
---
 src/ports/postgres/modules/graph/hits.py_in        | 11 +++----
 src/ports/postgres/modules/graph/pagerank.py_in    |  6 ++--
 src/ports/postgres/modules/graph/test/apsp.sql_in  | 10 ++++++
 src/ports/postgres/modules/graph/test/bfs.sql_in   |  9 ++++++
 src/ports/postgres/modules/graph/test/hits.sql_in  |  9 ++++++
 .../postgres/modules/graph/test/measures.sql_in    | 36 +++++++++++-----------
 .../postgres/modules/graph/test/pagerank.sql_in    |  9 ++++++
 src/ports/postgres/modules/graph/test/sssp.sql_in  | 10 ++++++
 src/ports/postgres/modules/graph/test/wcc.sql_in   |  9 ++++++
 src/ports/postgres/modules/graph/wcc.py_in         |  4 ++-
 10 files changed, 84 insertions(+), 29 deletions(-)

diff --git a/src/ports/postgres/modules/graph/hits.py_in 
b/src/ports/postgres/modules/graph/hits.py_in
index 4864160..f792f15 100644
--- a/src/ports/postgres/modules/graph/hits.py_in
+++ b/src/ports/postgres/modules/graph/hits.py_in
@@ -47,6 +47,7 @@ from utilities.utilities import is_platform_pg
 
 from utilities.validate_args import columns_exist_in_table, drop_tables
 from utilities.validate_args import get_cols_and_types, table_exists
+from utilities.validate_args import rename_table
 
 def validate_hits_args(schema_madlib, vertex_table, vertex_id, edge_table,
                        edge_params, out_table, max_iter, threshold,
@@ -334,7 +335,7 @@ def hits(schema_madlib, vertex_table, vertex_id, 
edge_table, edge_args,
             if converged:
                 break
 
-        update_final_results(converged, threshold, cur, temp_summary_table,
+        update_final_results(schema_madlib, converged, threshold, cur, 
temp_summary_table,
                              iteration_num, summary_table, out_table,
                              grouping_cols_list, cur_unconv, 
distinct_grp_table)
 
@@ -345,7 +346,7 @@ def hits(schema_madlib, vertex_table, vertex_id, 
edge_table, edge_args,
             drop_tables([distinct_grp_table, temp_summary_table])
 
 
-def update_final_results(converged, threshold, cur, temp_summary_table,
+def update_final_results(schema_madlib, converged, threshold, cur, 
temp_summary_table,
                          iteration_num, summary_table, out_table,
                          grouping_cols_list, cur_unconv, distinct_grp_table):
     """
@@ -374,10 +375,8 @@ def update_final_results(converged, threshold, cur, 
temp_summary_table,
                                                                       
grouping_cols_list,
                                                                       
distinct_grp_table)
     else:
-        plpy.execute("""
-                ALTER TABLE {table_name}
-                RENAME TO {out_table}
-                """.format(table_name=cur, **locals()))
+
+        rename_table(schema_madlib, cur, out_table)
         plpy.execute("""
                 INSERT INTO {summary_table} VALUES
                 ({iteration_num}+1)
diff --git a/src/ports/postgres/modules/graph/pagerank.py_in 
b/src/ports/postgres/modules/graph/pagerank.py_in
index a0f7319..41f25de 100644
--- a/src/ports/postgres/modules/graph/pagerank.py_in
+++ b/src/ports/postgres/modules/graph/pagerank.py_in
@@ -49,6 +49,7 @@ from utilities.utilities import py_list_to_sql_string
 
 from utilities.validate_args import columns_exist_in_table, get_cols_and_types
 from utilities.validate_args import table_exists
+from utilities.validate_args import rename_table
 
 
 def validate_pagerank_args(schema_madlib, vertex_table, vertex_id, edge_table,
@@ -671,10 +672,7 @@ def pagerank(schema_madlib, vertex_table, vertex_id, 
edge_table, edge_args, out_
                                  """.format(table_name=cur,
                                             total_ppr_nodes=total_ppr_nodes))
 
-                plpy.execute("""
-                        ALTER TABLE {table_name}
-                        RENAME TO {out_table}
-                    """.format(table_name=cur, **locals()))
+                rename_table(schema_madlib, cur, out_table)
                 plpy.execute("""
                         INSERT INTO {summary_table} VALUES
                         ({iteration_num}+1)
diff --git a/src/ports/postgres/modules/graph/test/apsp.sql_in 
b/src/ports/postgres/modules/graph/test/apsp.sql_in
index c7bb163..f98b76b 100644
--- a/src/ports/postgres/modules/graph/test/apsp.sql_in
+++ b/src/ports/postgres/modules/graph/test/apsp.sql_in
@@ -109,3 +109,13 @@ ALTER TABLE vertex RENAME COLUMN src TO "DEST";
 
 SELECT graph_apsp('vertex','"DEST"','edge_gr','dest="DEST"','out','grp');
 SELECT * FROM out ORDER BY src,"DEST";
+
+ALTER TABLE vertex RENAME COLUMN "DEST" TO id;
+
+-- Test for bigint columns
+CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
+CREATE TABLE e2 AS SELECT src::bigint, "DEST"::bigint, weight FROM "EDGE";
+
+DROP TABLE IF EXISTS public.out2, public.out2_summary, public.out2_path;
+SELECT graph_apsp('v2',NULL,'e2','dest="DEST"','public.out2');
+SELECT graph_apsp_get_path('public.out2',0,7,'public.out2_path');
diff --git a/src/ports/postgres/modules/graph/test/bfs.sql_in 
b/src/ports/postgres/modules/graph/test/bfs.sql_in
index 052d6b2..5a32b96 100644
--- a/src/ports/postgres/modules/graph/test/bfs.sql_in
+++ b/src/ports/postgres/modules/graph/test/bfs.sql_in
@@ -287,3 +287,12 @@ ALTER TABLE vertex RENAME COLUMN "SRC" TO dest;
 
 SELECT 
graph_bfs('vertex','dest','edge_grp','src="SRC"',3,'out',NULL,NULL,'g1');
 SELECT * FROM out;
+
+ALTER TABLE vertex RENAME COLUMN dest TO id;
+
+-- Test for bigint columns
+CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
+CREATE TABLE e2 AS SELECT "SRC"::bigint, dest::bigint, weight FROM "EDGE";
+
+DROP TABLE IF EXISTS public.out2, public.out2_summary;
+SELECT graph_bfs('v2',NULL,'e2','src="SRC"',3,'public.out2');
diff --git a/src/ports/postgres/modules/graph/test/hits.sql_in 
b/src/ports/postgres/modules/graph/test/hits.sql_in
index 32b56e2..3be0688 100644
--- a/src/ports/postgres/modules/graph/test/hits.sql_in
+++ b/src/ports/postgres/modules/graph/test/hits.sql_in
@@ -169,3 +169,12 @@ ALTER TABLE vertex RENAME COLUMN src TO dest;
 
 SELECT hits('vertex','dest','edge',NULL,'out',3,0.01,'user_id');
 SELECT * FROM out;
+
+ALTER TABLE vertex RENAME COLUMN dest TO id;
+
+-- Test for bigint columns
+CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
+CREATE TABLE e2 AS SELECT src::bigint, dest::bigint FROM edge;
+
+DROP TABLE IF EXISTS public.out2, public.out2_summary;
+SELECT hits('v2',NULL,'e2',NULL,'public.out2');
diff --git a/src/ports/postgres/modules/graph/test/measures.sql_in 
b/src/ports/postgres/modules/graph/test/measures.sql_in
index 3b3886a..38e2e72 100644
--- a/src/ports/postgres/modules/graph/test/measures.sql_in
+++ b/src/ports/postgres/modules/graph/test/measures.sql_in
@@ -64,51 +64,51 @@ SELECT graph_apsp('vertex',      -- Vertex table
                   'out_apsp');   -- Output table of shortest paths
 
 -- Compute the closeness measure for all nodes:
-DROP TABLE IF EXISTS out_closeness;
-SELECT graph_closeness('out_apsp', 'out_closeness');
-SELECT * FROM out_closeness;
+DROP TABLE IF EXISTS public.__madlib__out_closeness;
+SELECT graph_closeness('out_apsp', 'public.__madlib__out_closeness');
+SELECT * FROM public.__madlib__out_closeness;
 
 SELECT assert(relative_error(inverse_sum_dist, 0.04347) < 1e-2 and
               relative_error(inverse_avg_dist, 0.3043) < 1e-2 and
               relative_error(sum_inverse_dist, 3.6833) < 1e-2 and
               k_degree = 7,
               'Incorrect value for closeness')
-FROM out_closeness
+FROM public.__madlib__out_closeness
 WHERE src_id = 0;
 
 -- Compute the diameter measure for graph
-DROP TABLE IF EXISTS out_diameter;
-SELECT graph_diameter('out_apsp', 'out_diameter');
-SELECT * FROM out_diameter;
-SELECT assert(diameter=14, 'Invalid value for diameter') FROM out_diameter;
+DROP TABLE IF EXISTS public.__madlib__out_diameter;
+SELECT graph_diameter('out_apsp', 'public.__madlib__out_diameter');
+SELECT * FROM public.__madlib__out_diameter;
+SELECT assert(diameter=14, 'Invalid value for diameter') FROM 
public.__madlib__out_diameter;
 
 -- Compute the average path length measure for graph
-DROP TABLE IF EXISTS out_avg_path_length;
-SELECT graph_avg_path_length('out_apsp', 'out_avg_path_length');
-SELECT * FROM out_avg_path_length;
+DROP TABLE IF EXISTS public.__madlib__out_avg_path_length;
+SELECT graph_avg_path_length('out_apsp', 
'public.__madlib__out_avg_path_length');
+SELECT * FROM public.__madlib__out_avg_path_length;
 SELECT assert(relative_error(avg_path_length, 2.0178) < 1e-2,
-              'Invalid value for avg_path_length') FROM out_avg_path_length;
+              'Invalid value for avg_path_length') FROM 
public.__madlib__out_avg_path_length;
 
 -- Compute the in and out degrees
-DROP TABLE IF EXISTS out_degrees;
+DROP TABLE IF EXISTS public.__madlib__out_degrees;
 SELECT graph_vertex_degrees('vertex',      -- Vertex table
                      'id',          -- Vertix id column (NULL means use 
default naming)
                      '"EDGE"',        -- "EDGE" table
                      'src=src_id, dest="DEST_ID", weight=edge_weight',
                                  -- "EDGE" arguments (NULL means use default 
naming)
-                     'out_degrees');
-SELECT * FROM out_degrees;
+                     'public.__madlib__out_degrees');
+SELECT * FROM public.__madlib__out_degrees;
 SELECT assert(indegree = 2 and outdegree = 3, 'Invalid value for degrees')
-FROM out_degrees
+FROM public.__madlib__out_degrees
 WHERE id = 0;
 
 SELECT assert(COUNT(*)=1, 'Invalid value for node with only one incoming 
edge.')
-FROM out_degrees
+FROM public.__madlib__out_degrees
 WHERE id = 7;
 
 DELETE FROM "EDGE" WHERE "DEST_ID"=7;
 INSERT INTO "EDGE" VALUES (7,6,1);
-DROP TABLE IF EXISTS out_degrees;
+DROP TABLE IF EXISTS public.out_degrees;
 SELECT graph_vertex_degrees('vertex',      -- Vertex table
                      'id',          -- Vertix id column (NULL means use 
default naming)
                      '"EDGE"',        -- "EDGE" table
diff --git a/src/ports/postgres/modules/graph/test/pagerank.sql_in 
b/src/ports/postgres/modules/graph/test/pagerank.sql_in
index ba45c30..7c886e1 100644
--- a/src/ports/postgres/modules/graph/test/pagerank.sql_in
+++ b/src/ports/postgres/modules/graph/test/pagerank.sql_in
@@ -205,3 +205,12 @@ ALTER TABLE vertex RENAME COLUMN src TO dest;
 
 SELECT pagerank('vertex','dest','"EDGE"',NULL,'out',NULL,NULL,NULL,'user_id');
 SELECT * FROM out;
+
+ALTER TABLE vertex RENAME COLUMN dest TO id;
+
+-- Test for bigint columns
+CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
+CREATE TABLE e2 AS SELECT src::bigint, dest::bigint FROM "EDGE";
+
+DROP TABLE IF EXISTS public.out2, public.out2_summary;
+SELECT pagerank('v2',NULL,'e2',NULL,'public.out2');
diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in 
b/src/ports/postgres/modules/graph/test/sssp.sql_in
index a147976..9f1a913 100644
--- a/src/ports/postgres/modules/graph/test/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/test/sssp.sql_in
@@ -154,3 +154,13 @@ ALTER TABLE vertex RENAME COLUMN src TO dest;
 
 SELECT graph_sssp('vertex','dest','edge_gr',NULL,0,'out','grp');
 SELECT * FROM out;
+
+ALTER TABLE vertex RENAME COLUMN dest TO id;
+
+-- Test for bigint columns
+CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
+CREATE TABLE e2 AS SELECT src::bigint, dest::bigint, weight FROM "EDGE";
+
+DROP TABLE IF EXISTS public.out2, public.out2_summary, public.out2_path;
+SELECT graph_sssp('v2',NULL,'e2',NULL,0,'public.out2');
+SELECT graph_sssp_get_path('public.out2',5,'public.out2_path');
diff --git a/src/ports/postgres/modules/graph/test/wcc.sql_in 
b/src/ports/postgres/modules/graph/test/wcc.sql_in
index 1b9dcaa..2cdbf6d 100644
--- a/src/ports/postgres/modules/graph/test/wcc.sql_in
+++ b/src/ports/postgres/modules/graph/test/wcc.sql_in
@@ -172,3 +172,12 @@ ALTER TABLE vertex RENAME COLUMN src TO dest;
 SELECT weakly_connected_components('vertex','dest','"EDGE"',
     'src=src_node,dest=dest_node','out','user_id');
 SELECT * FROM out;
+
+ALTER TABLE vertex RENAME COLUMN dest TO id;
+
+-- Test for bigint columns
+CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
+CREATE TABLE e2 AS SELECT src_node::bigint, dest_node::bigint FROM "EDGE";
+
+DROP TABLE IF EXISTS public.out2, public.out2_summary;
+SELECT 
weakly_connected_components('v2',NULL,'e2','src=src_node,dest=dest_node','public.out2');
diff --git a/src/ports/postgres/modules/graph/wcc.py_in 
b/src/ports/postgres/modules/graph/wcc.py_in
index 4b4b05d..4adc52e 100644
--- a/src/ports/postgres/modules/graph/wcc.py_in
+++ b/src/ports/postgres/modules/graph/wcc.py_in
@@ -37,6 +37,7 @@ from utilities.validate_args import columns_exist_in_table, 
get_expr_type
 from utilities.utilities import is_platform_pg
 from utilities.utilities import add_postfix
 from utilities.validate_args import table_exists
+from utilities.validate_args import rename_table
 from utilities.control import MinWarning
 from graph_utils import validate_graph_coding, get_graph_usage
 from graph_utils import validate_output_and_summary_tables
@@ -294,7 +295,8 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, 
edge_args,
     if not is_platform_pg():
         # Drop intermediate table created for Greenplum
         plpy.execute("DROP TABLE IF EXISTS {0}".format(edge_inverse))
-    plpy.execute("ALTER TABLE {0} RENAME TO {1}".format(newupdate, out_table))
+
+    rename_table(schema_madlib, newupdate, out_table)
     # Create summary table. We only need the vertex_id and grouping columns
     # in it.
     plpy.execute("""

Reply via email to