Github user hpandeycodeit commented on a diff in the pull request: https://github.com/apache/madlib/pull/244#discussion_r175954793 --- Diff: src/ports/postgres/modules/graph/pagerank.py_in --- @@ -527,14 +562,63 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, """.format(**locals())) # Step 4: Cleanup - plpy.execute("""DROP TABLE IF EXISTS {0},{1},{2},{3},{4},{5},{6} + plpy.execute("""DROP TABLE IF EXISTS {0},{1},{2},{3},{4},{5},{6},{7} """.format(out_cnts, edge_temp_table, cur, message, cur_unconv, - message_unconv, nodes_with_no_incoming_edges)) + message_unconv, nodes_with_no_incoming_edges, personalized_nodes)) if grouping_cols: plpy.execute("""DROP TABLE IF EXISTS {0},{1},{2} """.format(vertices_per_group, temp_summary_table, distinct_grp_table)) + +def get_query_params_for_ppr(nodes_of_interest, damping_factor, + ppr_join_clause, vertex_id, edge_temp_table, vertex_table, + cur_distribution, personalized_nodes): + """ + This function will prepare the Join Clause and the condition to Calculate the Personalized Page Rank + and Returns Total number of user provided nodes of interest, A join Clause and a clause to be added + to existing formula to calculate pagerank. + + Args: + @param nodes_of_interest + @param damping_factor + @param ppr_join_clause + @param vertex_id + @param edge_temp_table + @param vertex_table + @param cur_distribution + + Returns : + (Integer, String, String) + + """ + total_ppr_nodes = 0 + random_jump_prob_ppr = '' + + if nodes_of_interest: + total_ppr_nodes = len(nodes_of_interest) + init_value_ppr_nodes = 1.0 / total_ppr_nodes + # Create a Temp table that holds the Inital probabilities for the + # user provided nodes + plpy.execute(""" + CREATE TEMP TABLE {personalized_nodes} AS + SELECT {vertex_id}, {init_value_ppr_nodes}::DOUBLE PRECISION as pagerank + FROM {vertex_table} where {vertex_id} = ANY(ARRAY{nodes_of_interest}) + {cur_distribution} + """.format(**locals())) + ppr_join_clause = """ LEFT JOIN {personalized_nodes} on + {personalized_nodes}.{vertex_id} = {edge_temp_table}.dest""".format(**locals()) + prob_value = 1.0 - damping_factor + + # In case of PPR, Assign the Random jump probability to the nodes_of_interest only. + # For rest of the nodes, Random jump probability will be zero. + random_jump_prob_ppr = """ CASE when {edge_temp_table}.dest = ANY(ARRAY{nodes_of_interest}) + THEN {prob_value} + ELSE 0 + END """.format(**locals()) + return(total_ppr_nodes, random_jump_prob_ppr, ppr_join_clause) + + def pagerank_help(schema_madlib, message, **kwargs): --- End diff -- Also changed the DISTRIBUTED BY clause in the get_query_params_for_ppr for the above error.
---