Github user jingyimei commented on a diff in the pull request: https://github.com/apache/madlib/pull/244#discussion_r177899442 --- Diff: src/ports/postgres/modules/graph/pagerank.py_in --- @@ -211,19 +261,30 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, distinct_grp_table, grouping_cols_list) # Find number of vertices in each group, this is the normalizing factor # for computing the random_prob + where_clause_ppr = '' + if nodes_of_interest > 0: + where_clause_ppr = """where __vertices__ = ANY(ARRAY{nodes_of_interest})""".format( + **locals()) + random_prob_grp = 1.0 - damping_factor + init_prob_grp = 1.0 / len(nodes_of_interest) + else: + random_prob_grp = """{rand_damp}/COUNT(__vertices__)::DOUBLE PRECISION + """.format(**locals()) + init_prob_grp = """1/COUNT(__vertices__)::DOUBLE PRECISION""".format( + **locals()) + plpy.execute("DROP TABLE IF EXISTS {0}".format(vertices_per_group)) plpy.execute("""CREATE TEMP TABLE {vertices_per_group} AS SELECT {distinct_grp_table}.*, - 1/COUNT(__vertices__)::DOUBLE PRECISION AS {init_pr}, - {rand_damp}/COUNT(__vertices__)::DOUBLE PRECISION - AS {random_prob} + {init_prob_grp} AS {init_pr}, + {random_prob_grp} as {random_prob} FROM {distinct_grp_table} INNER JOIN ( SELECT {grouping_cols}, {src} AS __vertices__ FROM {edge_temp_table} UNION SELECT {grouping_cols}, {dest} FROM {edge_temp_table} ){subq} - ON {grouping_where_clause} + ON {grouping_where_clause} {where_clause_ppr} --- End diff -- put {where_clause_ppr} in a new line
---