This is an automated email from the ASF dual-hosted git repository. okislal pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit eabaea93f833d442a8264696e2e8e1d151d46a74 Author: Orhan Kislal <[email protected]> AuthorDate: Tue Mar 10 19:42:45 2020 -0400 Graph: Filter out infinite paths JIRA: MADLIB-1415 SSSP, APSP and BFS output tables had entries for unconnected vertices, denoted by a NULL parent column. This commit filters these rows to clean up the output tables. The BFS output had a slightly different output format where vertices parent column was left NULL for both unreachable vertices as well as the source vertex itself. This is different than SSSP and APSP so BFS is changed to follow the same pattern. --- src/ports/postgres/modules/graph/apsp.py_in | 5 ++++- src/ports/postgres/modules/graph/bfs.py_in | 8 ++++++++ src/ports/postgres/modules/graph/bfs.sql_in | 14 +++++++------- src/ports/postgres/modules/graph/measures.py_in | 8 +------- src/ports/postgres/modules/graph/measures.sql_in | 13 ++++++++----- src/ports/postgres/modules/graph/sssp.py_in | 4 ++++ src/ports/postgres/modules/graph/test/apsp.sql_in | 11 +++++++++++ src/ports/postgres/modules/graph/test/bfs.sql_in | 9 +++++++++ src/ports/postgres/modules/graph/test/measures.sql_in | 2 +- src/ports/postgres/modules/graph/test/sssp.sql_in | 11 +++++++++++ 10 files changed, 64 insertions(+), 21 deletions(-) diff --git a/src/ports/postgres/modules/graph/apsp.py_in b/src/ports/postgres/modules/graph/apsp.py_in index 1d55696..e854ab9 100644 --- a/src/ports/postgres/modules/graph/apsp.py_in +++ b/src/ports/postgres/modules/graph/apsp.py_in @@ -381,10 +381,13 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, edge_table, """Graph APSP: Detected a negative cycle in the """ + """sub-graphs of following groups: {0}.""". format(str(negs)[1:-1])) - else: plpy.execute("DROP VIEW IF EXISTS {0}".format(tmp_view)) + + # Filter out the infinite paths (disconnected pairs) + plpy.execute(""" DELETE FROM {0} WHERE parent IS NULL + """.format(out_table)) return None diff --git a/src/ports/postgres/modules/graph/bfs.py_in b/src/ports/postgres/modules/graph/bfs.py_in index 6fc008e..e802aac 100644 --- a/src/ports/postgres/modules/graph/bfs.py_in +++ b/src/ports/postgres/modules/graph/bfs.py_in @@ -375,6 +375,14 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table, # stored in this iteration. This is used to check if the iterations # need to continue. vct = plpy.execute(count_qry.format(**locals()))[0]['count'] + + # Filter out the infinite paths (disconnected pairs) + plpy.execute(""" UPDATE {out_table} SET parent = {source_vertex} + WHERE {vertex_id} = {source_vertex} + """.format(**locals())) + + plpy.execute(""" DELETE FROM {0} WHERE parent IS NULL + """.format(out_table)) # Drop temp tables plpy.execute("DROP TABLE IF EXISTS {0},{1}".format(toupdate, message)) diff --git a/src/ports/postgres/modules/graph/bfs.sql_in b/src/ports/postgres/modules/graph/bfs.sql_in index ea991fa..ac945a1 100644 --- a/src/ports/postgres/modules/graph/bfs.sql_in +++ b/src/ports/postgres/modules/graph/bfs.sql_in @@ -200,7 +200,7 @@ SELECT * FROM out ORDER BY dist,id; <pre class="result"> id | dist | parent ----+------+-------- - 3 | 0 | + 3 | 0 | 3 1 | 1 | 3 4 | 1 | 3 5 | 1 | 3 @@ -236,7 +236,7 @@ SELECT * FROM out_max ORDER BY dist,id; <pre class="result"> id | dist | parent ----+------+-------- - 3 | 0 | + 3 | 0 | 3 1 | 1 | 3 4 | 1 | 3 5 | 1 | 3 @@ -269,7 +269,7 @@ SELECT * FROM out_alt ORDER BY v_id; <pre class="result"> v_id | dist | parent ------+------+-------- - 8 | 0 | + 8 | 0 | 8 9 | 1 | 8 10 | 1 | 8 11 | 2 | 9 @@ -292,7 +292,7 @@ SELECT * FROM out_alt_dir ORDER BY v_id; <pre class="result"> v_id | dist | parent ------+------+-------- - 8 | 0 | + 8 | 0 | 8 9 | 1 | 8 10 | 2 | 9 11 | 2 | 9 @@ -348,11 +348,11 @@ SELECT * FROM out_gr ORDER BY g1,g2,dist,id; <pre class="result"> g1 | g2 | id | dist | parent -----+----+----+------+-------- - 100 | a | 8 | 0 | + 100 | a | 8 | 0 | 8 100 | a | 9 | 1 | 8 100 | a | 10 | 1 | 8 100 | a | 11 | 2 | 9 - 202 | c | 8 | 0 | + 202 | c | 8 | 0 | 8 202 | c | 9 | 1 | 8 202 | c | 10 | 1 | 8 202 | c | 11 | 2 | 9 @@ -378,7 +378,7 @@ SELECT * FROM out_gr ORDER BY g1,g2,dist,id; <pre class="result"> g1 | g2 | id | dist | parent -----+----+----+------+-------- - 100 | a | 3 | 0 | + 100 | a | 3 | 0 | 3 100 | a | 1 | 1 | 3 100 | a | 4 | 1 | 3 100 | a | 5 | 1 | 3 diff --git a/src/ports/postgres/modules/graph/measures.py_in b/src/ports/postgres/modules/graph/measures.py_in index a3dd778..3ec07e9 100644 --- a/src/ports/postgres/modules/graph/measures.py_in +++ b/src/ports/postgres/modules/graph/measures.py_in @@ -181,13 +181,7 @@ class Graph(object): CREATE TABLE {out_table} AS SELECT {grouping_cols_comma} - -- Filtering 'Infinity' occurs in CASE instead of WHERE clause - -- so that the edge is part of the average value i.e. part of - -- the count of paths but zero addition to sum of distances. - AVG(CASE WHEN {e.weight} = 'Infinity'::double precision - THEN 0::double precision - ELSE {e.weight}::double precision - END) as avg_path_length + AVG({e.weight}::double precision) as avg_path_length FROM {apsp_table} WHERE {e.src} != {e.dest} {filter_clause} diff --git a/src/ports/postgres/modules/graph/measures.sql_in b/src/ports/postgres/modules/graph/measures.sql_in index 92680ba..0879832 100644 --- a/src/ports/postgres/modules/graph/measures.sql_in +++ b/src/ports/postgres/modules/graph/measures.sql_in @@ -443,7 +443,10 @@ m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); This function computes the average of the shortest paths between each pair of vertices. Average path length is based on "reachable target vertices", so it -ignores infinite-length paths between vertices that are not connected. +ignores infinite-length paths between vertices that are not connected. This means +the function will average the path lengths in each connected component. If the +user requires the average path length of a particular component, the +weakly_connected_components function may be used to isolate the relevant vertices. @note This function assumes a valid output from a prior APSP run - both the APSP table and the associated output summary table. APSP is a computationally @@ -532,7 +535,7 @@ SELECT * FROM out_avg_path_length; <pre class="result"> avg_path_length \------------------ - 2.01785714285714 + 2.973684210526316 (1 row) </pre> @@ -570,9 +573,9 @@ SELECT * FROM out_gr_path ORDER BY grp; </pre> <pre class="result"> grp | avg_path_length -\-------+-------------------- - 0 | 2.01785714285714 - 1 | 0.466666666666667 +\-------+---------------------- + 0 | 2.973684210526316 + 1 | 0.56 (2 rows) </pre> */ diff --git a/src/ports/postgres/modules/graph/sssp.py_in b/src/ports/postgres/modules/graph/sssp.py_in index 0819132..26ddf88 100644 --- a/src/ports/postgres/modules/graph/sssp.py_in +++ b/src/ports/postgres/modules/graph/sssp.py_in @@ -381,6 +381,10 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table, """sub-graphs of following groups: {0}.""". format(str(negs)[1:-1])) + # Filter out the infinite paths (disconnected pairs) + plpy.execute(""" DELETE FROM {0} WHERE parent IS NULL + """.format(out_table)) + plpy.execute("DROP TABLE IF EXISTS {0}".format(oldupdate)) return None diff --git a/src/ports/postgres/modules/graph/test/apsp.sql_in b/src/ports/postgres/modules/graph/test/apsp.sql_in index f98b76b..bf46260 100644 --- a/src/ports/postgres/modules/graph/test/apsp.sql_in +++ b/src/ports/postgres/modules/graph/test/apsp.sql_in @@ -119,3 +119,14 @@ CREATE TABLE e2 AS SELECT src::bigint, "DEST"::bigint, weight FROM "EDGE"; DROP TABLE IF EXISTS public.out2, public.out2_summary, public.out2_path; SELECT graph_apsp('v2',NULL,'e2','dest="DEST"','public.out2'); SELECT graph_apsp_get_path('public.out2',0,7,'public.out2_path'); + +-- Test for infinite paths +DROP TABLE IF EXISTS out, out_summary, out_path; +DELETE FROM "EDGE" WHERE "DEST" = 7; + +SELECT graph_apsp('vertex',NULL,'"EDGE"','dest="DEST"','out'); + +SELECT assert(count(*) = 0, 'Null parent in the out table') FROM +(SELECT * FROM out WHERE parent IS NULL)q1; + +SELECT graph_apsp_get_path('out',0,5,'out_path'); diff --git a/src/ports/postgres/modules/graph/test/bfs.sql_in b/src/ports/postgres/modules/graph/test/bfs.sql_in index 5a32b96..f36eeb6 100644 --- a/src/ports/postgres/modules/graph/test/bfs.sql_in +++ b/src/ports/postgres/modules/graph/test/bfs.sql_in @@ -296,3 +296,12 @@ CREATE TABLE e2 AS SELECT "SRC"::bigint, dest::bigint, weight FROM "EDGE"; DROP TABLE IF EXISTS public.out2, public.out2_summary; SELECT graph_bfs('v2',NULL,'e2','src="SRC"',3,'public.out2'); + +-- Test for infinite paths +DROP TABLE IF EXISTS out, out_summary, out_path; +DELETE FROM "EDGE" WHERE dest = 7; + +SELECT graph_bfs('vertex',NULL,'"EDGE"','src="SRC"',3,'out'); + +SELECT assert(count(*) = 0, 'Null parent in the out table') + FROM (SELECT * FROM out WHERE parent IS NULL)q1; diff --git a/src/ports/postgres/modules/graph/test/measures.sql_in b/src/ports/postgres/modules/graph/test/measures.sql_in index 38e2e72..a98faff 100644 --- a/src/ports/postgres/modules/graph/test/measures.sql_in +++ b/src/ports/postgres/modules/graph/test/measures.sql_in @@ -86,7 +86,7 @@ SELECT assert(diameter=14, 'Invalid value for diameter') FROM public.__madlib__o DROP TABLE IF EXISTS public.__madlib__out_avg_path_length; SELECT graph_avg_path_length('out_apsp', 'public.__madlib__out_avg_path_length'); SELECT * FROM public.__madlib__out_avg_path_length; -SELECT assert(relative_error(avg_path_length, 2.0178) < 1e-2, +SELECT assert(relative_error(avg_path_length, 2.97) < 1e-2, 'Invalid value for avg_path_length') FROM public.__madlib__out_avg_path_length; -- Compute the in and out degrees diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in b/src/ports/postgres/modules/graph/test/sssp.sql_in index 9f1a913..6a2f881 100644 --- a/src/ports/postgres/modules/graph/test/sssp.sql_in +++ b/src/ports/postgres/modules/graph/test/sssp.sql_in @@ -164,3 +164,14 @@ CREATE TABLE e2 AS SELECT src::bigint, dest::bigint, weight FROM "EDGE"; DROP TABLE IF EXISTS public.out2, public.out2_summary, public.out2_path; SELECT graph_sssp('v2',NULL,'e2',NULL,0,'public.out2'); SELECT graph_sssp_get_path('public.out2',5,'public.out2_path'); + +-- Test for infinite paths +DROP TABLE IF EXISTS out, out_summary, out_path; +DELETE FROM "EDGE" WHERE dest = 7; + +SELECT graph_sssp('vertex',NULL,'"EDGE"',NULL,0,'out'); + +SELECT assert(count(*) = 0, 'Null parent in the out table') FROM +(SELECT * FROM out WHERE parent IS NULL)q1; + +SELECT graph_sssp_get_path('out',5,'out_path');
