This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit acb9e50f280dc0efd96f1236712f6643824e2acd
Author: Orhan Kislal <[email protected]>
AuthorDate: Tue Mar 3 18:41:44 2020 -0500

    Graph: Fix column type concat bug
    
    JIRA: MADLIB-1408
    
    The apsp get path function fails if the the input columns are of bigint 
type.
    The issue stems from the concatenate operation we use to build the array 
that
    represents the path. Adding an integer to an integer array works as expected
    but adding a bigint to an int array (or vice versa) doesn't.
    
    This commit fixes the issue by detecting the appropriate type and adding the
    proper cast operations.
    
    Closes #483
---
 src/ports/postgres/modules/graph/apsp.py_in        | 18 +++++--------
 src/ports/postgres/modules/graph/apsp.sql_in       | 16 ++++++------
 src/ports/postgres/modules/graph/graph_utils.py_in | 24 +++++++++++++++++
 src/ports/postgres/modules/graph/sssp.py_in        | 16 ++++++++----
 src/ports/postgres/modules/graph/sssp.sql_in       | 16 ++++++------
 .../postgres/modules/graph/test/measures.sql_in    | 30 ++++++++++++++++++++++
 src/ports/postgres/modules/graph/test/sssp.sql_in  |  1 +
 7 files changed, 89 insertions(+), 32 deletions(-)

diff --git a/src/ports/postgres/modules/graph/apsp.py_in 
b/src/ports/postgres/modules/graph/apsp.py_in
index 2d50929..1d55696 100644
--- a/src/ports/postgres/modules/graph/apsp.py_in
+++ b/src/ports/postgres/modules/graph/apsp.py_in
@@ -31,6 +31,7 @@
 import plpy
 from graph_utils import validate_graph_coding
 from graph_utils import get_graph_usage
+from graph_utils import get_edge_params
 from utilities.control import MinWarning
 from utilities.utilities import _assert
 from utilities.utilities import _check_groups
@@ -412,17 +413,12 @@ def graph_apsp_get_path(schema_madlib, apsp_table,
         edge_args = summary[0]['edge_args']
         grouping_cols = summary[0]['grouping_cols']
 
-        params_types = {'src': str, 'dest': str, 'weight': str}
-        default_args = {'src': 'src', 'dest': 'dest', 'weight': 'weight'}
-        edge_params = extract_keyvalue_params(edge_args,
-                                              params_types,
-                                              default_args)
+        edge_params, final_type = get_edge_params(schema_madlib, apsp_table, 
edge_args)
 
         src = edge_params["src"]
         dest = edge_params["dest"]
         weight = edge_params["weight"]
 
-
         if not vertex_id:
             vertex_id = "id"
 
@@ -450,7 +446,7 @@ def graph_apsp_get_path(schema_madlib, apsp_table,
         if source_vertex == dest_vertex:
             plpy.execute("""
                 CREATE TABLE {path_table} AS
-                SELECT {grp_comma} '{{{dest_vertex}}}'::INT[] AS path
+                SELECT {grp_comma} '{{{dest_vertex}}}'::{final_type}[] AS path
                 FROM {apsp_table} WHERE {src} = {source_vertex} AND
                     {dest} = {dest_vertex}
                 """.format(**locals()))
@@ -462,7 +458,7 @@ def graph_apsp_get_path(schema_madlib, apsp_table,
         # Initialize the temporary tables
         out = plpy.execute(""" CREATE TEMP TABLE {temp1_name} AS
                 SELECT {grp_comma} {apsp_table}.parent AS {vertex_id},
-                    ARRAY[{dest_vertex}] AS path
+                    ARRAY[{dest_vertex}]::{final_type}[] AS path
                 FROM {apsp_table}
                 WHERE {src} = {source_vertex} AND {dest} = {dest_vertex}
                     AND {apsp_table}.parent IS NOT NULL
@@ -513,15 +509,15 @@ def graph_apsp_get_path(schema_madlib, apsp_table,
         # Path with 1 vertex (src == dest) has been handled before
         plpy.execute("""
             CREATE TABLE {path_table} AS
-            SELECT {grp_comma} {source_vertex} || ({vertex_id} || path) AS path
+            SELECT {grp_comma} {source_vertex}::{final_type} || ({vertex_id} 
|| path) AS path
             FROM {temp2_name}
             WHERE {vertex_id} <> {dest_vertex}
             UNION
-            SELECT {grp_comma} {source_vertex} || path AS path
+            SELECT {grp_comma} {source_vertex}::{final_type} || path AS path
             FROM {temp2_name}
             WHERE {vertex_id} = {dest_vertex}
             UNION
-            SELECT {grp_comma} '{{}}'::INT[] AS path
+            SELECT {grp_comma} '{{}}'::{final_type}[] AS path
             FROM {apsp_table}
             WHERE {src} = {source_vertex} AND {dest} = {dest_vertex}
                 AND {apsp_table}.parent IS NULL
diff --git a/src/ports/postgres/modules/graph/apsp.sql_in 
b/src/ports/postgres/modules/graph/apsp.sql_in
index 7cd77d3..66d1f74 100644
--- a/src/ports/postgres/modules/graph/apsp.sql_in
+++ b/src/ports/postgres/modules/graph/apsp.sql_in
@@ -78,7 +78,7 @@ contain the column specified in the 'vertex_id' parameter 
below.</dd>
 
 <dt>vertex_id</dt>
 <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids.  The vertex ids are of type INTEGER with no duplicates.
+vertex ids.  The vertex ids are of type BIGINT with no duplicates.
 They do not need to be contiguous.</dd>
 
 <dt>edge_table</dt>
@@ -90,9 +90,9 @@ Column naming convention is described below in the 
'edge_args' parameter.</dd>
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (INTEGER): Name of the column containing the source vertex ids in the
+  - src (BIGINT): Name of the column containing the source vertex ids in the
   edge table. Default column name is 'src'.
-  - dest (INTEGER): Name of the column containing the destination vertex ids
+  - dest (BIGINT): Name of the column containing the destination vertex ids
   in the edge table. Default column name is 'dest'.
   - weight (FLOAT8): Name of the column containing the edge weights in the
   edge table. Default column name is 'weight'.</dd>
@@ -142,10 +142,10 @@ graph_apsp_get_path( apsp_table,
 <dd>TEXT. Name of the table that contains the APSP output.</dd>
 
 <dt>source_vertex</dt>
-<dd>INTEGER. The vertex that will be the source of the desired path.</dd>
+<dd>BIGINT. The vertex that will be the source of the desired path.</dd>
 
 <dt>dest_vertex</dt>
-<dd>INTEGER. The vertex that will be the destination of the desired path.</dd>
+<dd>BIGINT. The vertex that will be the destination of the desired path.</dd>
 
 <dt>path_table</dt>
 <dd>TEXT. Name of the output table that contains the path.
@@ -412,9 +412,9 @@ m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL 
DATA', `');
 
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp_get_path(
     apsp_table             TEXT,
-    source_vertex          INT,
-  dest_vertex            INT,
-  path_table             TEXT
+    source_vertex          BIGINT,
+    dest_vertex            BIGINT,
+    path_table             TEXT
 
 ) RETURNS VOID AS $$
     PythonFunction(graph, apsp, graph_apsp_get_path)
diff --git a/src/ports/postgres/modules/graph/graph_utils.py_in 
b/src/ports/postgres/modules/graph/graph_utils.py_in
index c104f8e..889ef88 100644
--- a/src/ports/postgres/modules/graph/graph_utils.py_in
+++ b/src/ports/postgres/modules/graph/graph_utils.py_in
@@ -29,11 +29,13 @@
 import plpy
 from utilities.utilities import _assert, add_postfix
 from utilities.utilities import get_filtered_cols_subquery_str
+from utilities.utilities import extract_keyvalue_params
 from utilities.validate_args import get_cols
 from utilities.validate_args import unquote_ident
 from utilities.validate_args import table_exists
 from utilities.validate_args import columns_exist_in_table
 from utilities.validate_args import table_is_empty
+from utilities.validate_args import get_cols_and_types
 
 def validate_output_and_summary_tables(model_out_table, module_name,
                                        out_table=None):
@@ -221,3 +223,25 @@ def get_graph_usage(schema_madlib, func_name, other_text):
                other_text=other_text,
                comma=',' if other_text is not None else ' ')
     return usage
+
+
+def get_edge_params(schema_madlib, table, edge_args):
+
+    params_types = {'src': str, 'dest': str, 'weight': str}
+    default_args = {'src': 'src', 'dest': 'dest', 'weight': 'weight'}
+    edge_params = extract_keyvalue_params(edge_args,
+                                          params_types,
+                                          default_args)
+    src = edge_params["src"]
+    dest = edge_params["dest"]
+    weight = edge_params["weight"]
+
+    # Find the appropriate column type for correct concat operations
+    names_and_types = get_cols_and_types(table)
+    final_type = 'integer'
+    for col_name,col_type in names_and_types:
+        # We want to check the column types except the weight column
+        if col_type == 'bigint'  and col_name != weight:
+            final_type = 'bigint'
+
+    return edge_params, final_type
diff --git a/src/ports/postgres/modules/graph/sssp.py_in 
b/src/ports/postgres/modules/graph/sssp.py_in
index d7f2c80..0819132 100644
--- a/src/ports/postgres/modules/graph/sssp.py_in
+++ b/src/ports/postgres/modules/graph/sssp.py_in
@@ -30,6 +30,7 @@
 import plpy
 from graph_utils import validate_graph_coding
 from graph_utils import get_graph_usage
+from graph_utils import get_edge_params
 from utilities.control import MinWarning
 
 from utilities.utilities import _assert
@@ -411,6 +412,11 @@ def graph_sssp_get_path(schema_madlib, sssp_table, 
dest_vertex, path_table,
         summary = plpy.execute("SELECT * FROM {0}".format(summary_table))
         vertex_id = summary[0]['vertex_id']
         source_vertex = summary[0]['source_vertex']
+        edge_args = summary[0]['edge_args']
+
+        edge_params, final_type = get_edge_params(schema_madlib, sssp_table, 
edge_args)
+
+        weight = edge_params["weight"]
 
         if not vertex_id:
             vertex_id = "id"
@@ -432,7 +438,7 @@ def graph_sssp_get_path(schema_madlib, sssp_table, 
dest_vertex, path_table,
         if source_vertex == dest_vertex:
             plpy.execute("""
                 CREATE TABLE {path_table} AS
-                SELECT {grp_comma} '{{{dest_vertex}}}'::INT[] AS path
+                SELECT {grp_comma} '{{{dest_vertex}}}'::{final_type}[] AS path
                 FROM {sssp_table} WHERE {vertex_id} = {dest_vertex}
                 """.format(**locals()))
             return
@@ -441,7 +447,7 @@ def graph_sssp_get_path(schema_madlib, sssp_table, 
dest_vertex, path_table,
                      format(temp1_name, temp2_name))
         out = plpy.execute(""" CREATE TEMP TABLE {temp1_name} AS
                 SELECT {grp_comma} {sssp_table}.parent AS {vertex_id},
-                    ARRAY[{dest_vertex}] AS path
+                    ARRAY[{dest_vertex}]::{final_type}[] AS path
                 FROM {sssp_table}
                 WHERE {vertex_id} = {dest_vertex}
                     AND {sssp_table}.parent IS NOT NULL
@@ -481,10 +487,10 @@ def graph_sssp_get_path(schema_madlib, sssp_table, 
dest_vertex, path_table,
         # the destination vertex
         plpy.execute("""
             CREATE TABLE {path_table} AS
-            SELECT {grp_comma} {source_vertex} || path AS path
+            SELECT {grp_comma} {source_vertex}::{final_type} || path AS path
             FROM {temp2_name}
             UNION
-            SELECT {grp_comma} '{{}}'::INT[] AS path
+            SELECT {grp_comma} '{{}}'::{final_type}[] AS path
             FROM {sssp_table}
             WHERE {vertex_id} = {dest_vertex}
                 AND {sssp_table}.parent IS NULL
@@ -508,7 +514,7 @@ def _validate_sssp(vertex_table, vertex_id, edge_table, 
edge_params,
     validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
                           out_table, 'SSSP')
 
-    _assert(isinstance(source_vertex, int),
+    _assert(isinstance(source_vertex, int) or isinstance(source_vertex, long),
             """Graph SSSP: Source vertex {source_vertex} has to be an 
integer.""".
             format(**locals()))
     src_exists = plpy.execute("""
diff --git a/src/ports/postgres/modules/graph/sssp.sql_in 
b/src/ports/postgres/modules/graph/sssp.sql_in
index 8175624..fd05fcf 100644
--- a/src/ports/postgres/modules/graph/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/sssp.sql_in
@@ -68,7 +68,7 @@ column specified in the 'vertex_id' parameter below.</dd>
 
 <dt>vertex_id</dt>
 <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids.  The vertex ids are of type INTEGER with no duplicates.
+vertex ids.  The vertex ids are of type BIGINT with no duplicates.
 They do not need to be contiguous.</dd>
 
 <dt>edge_table</dt>
@@ -80,12 +80,12 @@ Column naming convention is described below in the 
'edge_args' parameter.</dd>
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (INTEGER): Name of the column containing the source vertex ids in the 
edge table. Default column name is 'src'.
-  - dest (INTEGER): Name of the column containing the destination vertex ids 
in the edge table. Default column name is 'dest'.
+  - src (BIGINT): Name of the column containing the source vertex ids in the 
edge table. Default column name is 'src'.
+  - dest (BIGINT): Name of the column containing the destination vertex ids in 
the edge table. Default column name is 'dest'.
   - weight (FLOAT8): Name of the column containing the edge weights in the 
edge table. Default column name is 'weight'.</dd>
 
 <dt>source_vertex</dt>
-<dd>INTEGER. The source vertex id for the algorithm to start. This vertex id 
must
+<dd>BIGINT. The source vertex id for the algorithm to start. This vertex id 
must
 exist in the 'vertex_id' column of 'vertex_table'.</dd>
 
 <dt>out_table</dt>
@@ -125,7 +125,7 @@ graph_sssp_get_path( sssp_table,
 <dd>TEXT. Name of the table that contains the SSSP output.</dd>
 
 <dt>dest_vertex</dt>
-<dd>INTEGER. The vertex that will be the destination of the desired path.</dd>
+<dd>BIGINT. The vertex that will be the destination of the desired path.</dd>
 
 <dt>path_table</dt>
 <dd>TEXT. Name of the output table that contains the path.
@@ -332,7 +332,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp(
     vertex_id               TEXT,
     edge_table              TEXT,
     edge_args               TEXT,
-    source_vertex           INT,
+    source_vertex           BIGINT,
     out_table               TEXT,
     grouping_cols           TEXT
 
@@ -346,7 +346,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp(
     vertex_id               TEXT,
     edge_table              TEXT,
     edge_args               TEXT,
-    source_vertex           INT,
+    source_vertex           BIGINT,
     out_table               TEXT
 
 ) RETURNS VOID AS $$
@@ -356,7 +356,7 @@ m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL 
DATA', `');
 -------------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp_get_path(
     sssp_table             TEXT,
-    dest_vertex            INT,
+    dest_vertex            BIGINT,
     path_table             TEXT
 
 ) RETURNS VOID AS $$
diff --git a/src/ports/postgres/modules/graph/test/measures.sql_in 
b/src/ports/postgres/modules/graph/test/measures.sql_in
index f6e87bd..3b3886a 100644
--- a/src/ports/postgres/modules/graph/test/measures.sql_in
+++ b/src/ports/postgres/modules/graph/test/measures.sql_in
@@ -199,3 +199,33 @@ ALTER TABLE vertex RENAME COLUMN src_id TO "DEST_ID";
 SELECT graph_vertex_degrees('vertex','"DEST_ID"','"EDGE"',
     'src=src_id, dest="DEST_ID", weight=edge_weight','out');
 SELECT * FROM out;
+ALTER TABLE vertex RENAME COLUMN "DEST_ID" TO id;
+
+-- Test for bigint columns
+
+CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
+CREATE TABLE e2 AS SELECT src_id::bigint, "DEST_ID"::bigint, edge_weight FROM 
"EDGE";
+
+DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
+SELECT graph_apsp('v2',      -- Vertex table
+                  'id',          -- Vertix id column (NULL means use default 
naming)
+                  'e2',        -- "EDGE" table
+                  'src=src_id, dest="DEST_ID", weight=edge_weight',
+                                 -- "EDGE" arguments (NULL means use default 
naming)
+                  'out_apsp');   -- Output table of shortest paths
+
+DROP TABLE IF EXISTS out_closeness;
+SELECT graph_closeness('out_apsp', 'out_closeness');
+
+DROP TABLE IF EXISTS out_diameter;
+SELECT graph_diameter('out_apsp', 'out_diameter');
+
+DROP TABLE IF EXISTS out;
+SELECT graph_avg_path_length('out_apsp', 'out');
+
+DROP TABLE IF EXISTS out_degrees;
+SELECT graph_vertex_degrees('v2',      -- Vertex table
+                     'id',          -- Vertix id column (NULL means use 
default naming)
+                     'e2',        -- "EDGE" table
+                     'src=src_id, dest="DEST_ID", weight=edge_weight',
+                     'out_degrees');
diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in 
b/src/ports/postgres/modules/graph/test/sssp.sql_in
index eab2b38..a147976 100644
--- a/src/ports/postgres/modules/graph/test/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/test/sssp.sql_in
@@ -142,6 +142,7 @@ SELECT graph_sssp_get_path('"out_Q"',5,'"out_Q_path"');
 SELECT * FROM "out_Q_path";
 
 -- Test for common column names in vertex and edge tables
+
 DROP TABLE IF EXISTS out, out_summary;
 ALTER TABLE vertex RENAME COLUMN id TO src;
 

Reply via email to