Github user hpandeycodeit commented on a diff in the pull request:
https://github.com/apache/madlib/pull/244#discussion_r175953987
--- Diff: src/ports/postgres/modules/graph/pagerank.py_in ---
@@ -149,25 +164,39 @@ def pagerank(schema_madlib, vertex_table, vertex_id,
edge_table, edge_args,
out_cnts = unique_string(desp='out_cnts')
out_cnts_cnt = unique_string(desp='cnt')
v1 = unique_string(desp='v1')
+ personalized_nodes = unique_string(desp='personalized_nodes')
if is_platform_pg():
cur_distribution = cnts_distribution = ''
else:
- cur_distribution = cnts_distribution = \
- "DISTRIBUTED BY ({0}{1})".format(
- grouping_cols_comma, vertex_id)
+ cur_distribution = cnts_distribution = "DISTRIBUTED BY
({0}{1})".format(
+ grouping_cols_comma, vertex_id)
cur_join_clause = """{edge_temp_table}.{dest} = {cur}.{vertex_id}
""".format(**locals())
out_cnts_join_clause = """{out_cnts}.{vertex_id} =
{edge_temp_table}.{src} """.format(**locals())
v1_join_clause = """{v1}.{vertex_id} = {edge_temp_table}.{src}
""".format(**locals())
+ # Get query params for Personalized Page Rank.
+ ppr_params = get_query_params_for_ppr(nodes_of_interest,
damping_factor,
+ ppr_join_clause, vertex_id,
+ edge_temp_table,
vertex_table, cur_distribution,
+ personalized_nodes)
+ total_ppr_nodes = ppr_params[0]
+ random_jump_prob_ppr = ppr_params[1]
+ ppr_join_clause = ppr_params[2]
+
random_probability = (1.0 - damping_factor) / n_vertices
+ if total_ppr_nodes > 0:
+ random_jump_prob = random_jump_prob_ppr
+ else:
+ random_jump_prob = random_probability
--- End diff --
This is getting used in when we create nodes_with_no_incoming_edges table.
---