Github user jingyimei commented on a diff in the pull request:
https://github.com/apache/madlib/pull/244#discussion_r177899442
--- Diff: src/ports/postgres/modules/graph/pagerank.py_in ---
@@ -211,19 +261,30 @@ def pagerank(schema_madlib, vertex_table, vertex_id,
edge_table, edge_args,
distinct_grp_table, grouping_cols_list)
# Find number of vertices in each group, this is the
normalizing factor
# for computing the random_prob
+ where_clause_ppr = ''
+ if nodes_of_interest > 0:
+ where_clause_ppr = """where __vertices__ =
ANY(ARRAY{nodes_of_interest})""".format(
+ **locals())
+ random_prob_grp = 1.0 - damping_factor
+ init_prob_grp = 1.0 / len(nodes_of_interest)
+ else:
+ random_prob_grp =
"""{rand_damp}/COUNT(__vertices__)::DOUBLE PRECISION
+ """.format(**locals())
+ init_prob_grp = """1/COUNT(__vertices__)::DOUBLE
PRECISION""".format(
+ **locals())
+
plpy.execute("DROP TABLE IF EXISTS
{0}".format(vertices_per_group))
plpy.execute("""CREATE TEMP TABLE {vertices_per_group} AS
SELECT {distinct_grp_table}.*,
- 1/COUNT(__vertices__)::DOUBLE PRECISION AS {init_pr},
- {rand_damp}/COUNT(__vertices__)::DOUBLE PRECISION
- AS {random_prob}
+ {init_prob_grp} AS {init_pr},
+ {random_prob_grp} as {random_prob}
FROM {distinct_grp_table} INNER JOIN (
SELECT {grouping_cols}, {src} AS __vertices__
FROM {edge_temp_table}
UNION
SELECT {grouping_cols}, {dest} FROM
{edge_temp_table}
){subq}
- ON {grouping_where_clause}
+ ON {grouping_where_clause} {where_clause_ppr}
--- End diff --
put {where_clause_ppr} in a new line
---