Github user hpandeycodeit commented on a diff in the pull request:

    https://github.com/apache/madlib/pull/244#discussion_r175952633
  
    --- Diff: src/ports/postgres/modules/graph/pagerank.py_in ---
    @@ -527,14 +562,63 @@ def pagerank(schema_madlib, vertex_table, vertex_id, 
edge_table, edge_args,
                     """.format(**locals()))
     
             # Step 4: Cleanup
    -        plpy.execute("""DROP TABLE IF EXISTS {0},{1},{2},{3},{4},{5},{6}
    +        plpy.execute("""DROP TABLE IF EXISTS 
{0},{1},{2},{3},{4},{5},{6},{7}
                 """.format(out_cnts, edge_temp_table, cur, message, cur_unconv,
    -                       message_unconv, nodes_with_no_incoming_edges))
    +                       message_unconv, nodes_with_no_incoming_edges, 
personalized_nodes))
             if grouping_cols:
                 plpy.execute("""DROP TABLE IF EXISTS {0},{1},{2}
                     """.format(vertices_per_group, temp_summary_table,
                                distinct_grp_table))
     
    +
    +def get_query_params_for_ppr(nodes_of_interest, damping_factor,
    +                             ppr_join_clause, vertex_id, edge_temp_table, 
vertex_table,
    +                             cur_distribution, personalized_nodes):
    +    """
    +     This function will prepare the Join Clause and the condition to 
Calculate the Personalized Page Rank
    +     and Returns Total number of user provided nodes of interest, A join 
Clause and a clause to be added
    +     to existing formula to calculate pagerank.
    +
    +     Args:
    +         @param nodes_of_interest
    +         @param damping_factor
    +         @param ppr_join_clause
    +         @param vertex_id
    +         @param edge_temp_table
    +         @param vertex_table
    +         @param cur_distribution
    +
    +     Returns :
    +             (Integer, String, String)
    +
    +    """
    +    total_ppr_nodes = 0
    +    random_jump_prob_ppr = ''
    +
    +    if nodes_of_interest:
    +        total_ppr_nodes = len(nodes_of_interest)
    +        init_value_ppr_nodes = 1.0 / total_ppr_nodes
    +        # Create a Temp table that holds the Inital probabilities for the
    +        # user provided nodes
    +        plpy.execute("""
    +            CREATE TEMP TABLE {personalized_nodes} AS
    +            SELECT {vertex_id}, {init_value_ppr_nodes}::DOUBLE PRECISION 
as pagerank
    +            FROM {vertex_table} where {vertex_id} =  
ANY(ARRAY{nodes_of_interest})
    +            {cur_distribution}
    +        """.format(**locals()))
    +        ppr_join_clause = """ LEFT  JOIN {personalized_nodes} on
    +                {personalized_nodes}.{vertex_id} = 
{edge_temp_table}.dest""".format(**locals())
    +        prob_value = 1.0 - damping_factor
    +
    +        # In case of PPR, Assign the Random jump probability to the 
nodes_of_interest only.
    +        # For rest of the nodes, Random jump probability  will be zero.
    +        random_jump_prob_ppr = """ CASE when {edge_temp_table}.dest = 
ANY(ARRAY{nodes_of_interest})
    +                                THEN {prob_value}
    +                                ELSE 0
    +                                END """.format(**locals())
    +    return(total_ppr_nodes, random_jump_prob_ppr, ppr_join_clause)
    +
    +
     def pagerank_help(schema_madlib, message, **kwargs):
    --- End diff --
    
    Added the explanation and example in the helper function


---

Reply via email to