Github user jingyimei commented on a diff in the pull request:
https://github.com/apache/madlib/pull/244#discussion_r177897977
--- Diff: src/ports/postgres/modules/graph/pagerank.py_in ---
@@ -211,19 +261,30 @@ def pagerank(schema_madlib, vertex_table, vertex_id,
edge_table, edge_args,
distinct_grp_table, grouping_cols_list)
# Find number of vertices in each group, this is the
normalizing factor
# for computing the random_prob
+ where_clause_ppr = ''
+ if nodes_of_interest > 0:
+ where_clause_ppr = """where __vertices__ =
ANY(ARRAY{nodes_of_interest})""".format(
+ **locals())
+ random_prob_grp = 1.0 - damping_factor
+ init_prob_grp = 1.0 / len(nodes_of_interest)
--- End diff --
len(nodes_of_interest) == total_ppr_nodes ? so that we don't need to run
O(n) again
---