Github user jingyimei commented on a diff in the pull request: https://github.com/apache/madlib/pull/244#discussion_r175663431 --- Diff: src/ports/postgres/modules/graph/pagerank.py_in --- @@ -149,25 +164,39 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_cnts = unique_string(desp='out_cnts') out_cnts_cnt = unique_string(desp='cnt') v1 = unique_string(desp='v1') + personalized_nodes = unique_string(desp='personalized_nodes') if is_platform_pg(): cur_distribution = cnts_distribution = '' else: - cur_distribution = cnts_distribution = \ - "DISTRIBUTED BY ({0}{1})".format( - grouping_cols_comma, vertex_id) + cur_distribution = cnts_distribution = "DISTRIBUTED BY ({0}{1})".format( + grouping_cols_comma, vertex_id) cur_join_clause = """{edge_temp_table}.{dest} = {cur}.{vertex_id} """.format(**locals()) out_cnts_join_clause = """{out_cnts}.{vertex_id} = {edge_temp_table}.{src} """.format(**locals()) v1_join_clause = """{v1}.{vertex_id} = {edge_temp_table}.{src} """.format(**locals()) + # Get query params for Personalized Page Rank. + ppr_params = get_query_params_for_ppr(nodes_of_interest, damping_factor, + ppr_join_clause, vertex_id, + edge_temp_table, vertex_table, cur_distribution, + personalized_nodes) + total_ppr_nodes = ppr_params[0] + random_jump_prob_ppr = ppr_params[1] + ppr_join_clause = ppr_params[2] + random_probability = (1.0 - damping_factor) / n_vertices + if total_ppr_nodes > 0: + random_jump_prob = random_jump_prob_ppr + else: + random_jump_prob = random_probability --- End diff -- Can move (1.0 - damping_factor) / n_vertices here since random_probability is not used anywhere else.
---