Github user jingyimei commented on a diff in the pull request: https://github.com/apache/madlib/pull/244#discussion_r177897977 --- Diff: src/ports/postgres/modules/graph/pagerank.py_in --- @@ -211,19 +261,30 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, distinct_grp_table, grouping_cols_list) # Find number of vertices in each group, this is the normalizing factor # for computing the random_prob + where_clause_ppr = '' + if nodes_of_interest > 0: + where_clause_ppr = """where __vertices__ = ANY(ARRAY{nodes_of_interest})""".format( + **locals()) + random_prob_grp = 1.0 - damping_factor + init_prob_grp = 1.0 / len(nodes_of_interest) --- End diff -- len(nodes_of_interest) == total_ppr_nodes ? so that we don't need to run O(n) again
---