Github user hpandeycodeit commented on a diff in the pull request:
https://github.com/apache/madlib/pull/244#discussion_r175952633
--- Diff: src/ports/postgres/modules/graph/pagerank.py_in ---
@@ -527,14 +562,63 @@ def pagerank(schema_madlib, vertex_table, vertex_id,
edge_table, edge_args,
""".format(**locals()))
# Step 4: Cleanup
- plpy.execute("""DROP TABLE IF EXISTS {0},{1},{2},{3},{4},{5},{6}
+ plpy.execute("""DROP TABLE IF EXISTS
{0},{1},{2},{3},{4},{5},{6},{7}
""".format(out_cnts, edge_temp_table, cur, message, cur_unconv,
- message_unconv, nodes_with_no_incoming_edges))
+ message_unconv, nodes_with_no_incoming_edges,
personalized_nodes))
if grouping_cols:
plpy.execute("""DROP TABLE IF EXISTS {0},{1},{2}
""".format(vertices_per_group, temp_summary_table,
distinct_grp_table))
+
+def get_query_params_for_ppr(nodes_of_interest, damping_factor,
+ ppr_join_clause, vertex_id, edge_temp_table,
vertex_table,
+ cur_distribution, personalized_nodes):
+ """
+ This function will prepare the Join Clause and the condition to
Calculate the Personalized Page Rank
+ and Returns Total number of user provided nodes of interest, A join
Clause and a clause to be added
+ to existing formula to calculate pagerank.
+
+ Args:
+ @param nodes_of_interest
+ @param damping_factor
+ @param ppr_join_clause
+ @param vertex_id
+ @param edge_temp_table
+ @param vertex_table
+ @param cur_distribution
+
+ Returns :
+ (Integer, String, String)
+
+ """
+ total_ppr_nodes = 0
+ random_jump_prob_ppr = ''
+
+ if nodes_of_interest:
+ total_ppr_nodes = len(nodes_of_interest)
+ init_value_ppr_nodes = 1.0 / total_ppr_nodes
+ # Create a Temp table that holds the Inital probabilities for the
+ # user provided nodes
+ plpy.execute("""
+ CREATE TEMP TABLE {personalized_nodes} AS
+ SELECT {vertex_id}, {init_value_ppr_nodes}::DOUBLE PRECISION
as pagerank
+ FROM {vertex_table} where {vertex_id} =
ANY(ARRAY{nodes_of_interest})
+ {cur_distribution}
+ """.format(**locals()))
+ ppr_join_clause = """ LEFT JOIN {personalized_nodes} on
+ {personalized_nodes}.{vertex_id} =
{edge_temp_table}.dest""".format(**locals())
+ prob_value = 1.0 - damping_factor
+
+ # In case of PPR, Assign the Random jump probability to the
nodes_of_interest only.
+ # For rest of the nodes, Random jump probability will be zero.
+ random_jump_prob_ppr = """ CASE when {edge_temp_table}.dest =
ANY(ARRAY{nodes_of_interest})
+ THEN {prob_value}
+ ELSE 0
+ END """.format(**locals())
+ return(total_ppr_nodes, random_jump_prob_ppr, ppr_join_clause)
+
+
def pagerank_help(schema_madlib, message, **kwargs):
--- End diff --
Added the explanation and example in the helper function
---