Github user iyerr3 commented on a diff in the pull request:

    https://github.com/apache/madlib/pull/289#discussion_r201498820
  
    --- Diff: 
src/ports/postgres/modules/recursive_partitioning/random_forest.py_in ---
    @@ -1333,42 +1368,69 @@ def _create_group_table(
     # -------------------------------------------------------------------------
     
     
    -def _create_empty_result_table(schema_madlib, output_table_name):
    +def _create_empty_result_table(schema_madlib, output_table_name, 
importance):
         """Create the result table for all trees in the forest"""
    +    impurity_var_imp_str = """, impurity_var_importance double 
precision[]);""" if importance else ");"
    +
         sql_create_empty_result_table = """
                 CREATE TABLE {output_table_name} (
                     gid         integer,
                     sample_id   integer,
    -                tree        {schema_madlib}.bytea8);
    +                tree        {schema_madlib}.bytea8
    +                {impurity_var_imp_str}
                 """.format(**locals())
         plpy.notice("sql_create_empty_result_table:\n" + 
sql_create_empty_result_table)
         plpy.execute(sql_create_empty_result_table)
     # ------------------------------------------------------------
     
     
     def _insert_into_result_table(schema_madlib, tree_states, 
output_table_name,
    -                              grp_key_to_grp_cols, sample_id):
    +                              grp_key_to_grp_cols, sample_id, importance, 
grouping_cols):
         """Insert one tree to result table"""
    +
    +    impurity_var_imp_str = ''
    +    importance_query = ''
    +    importance_results = ''
    --- End diff --
    
    IMO these variables in else clause is easier to read. Also 
`importance_results` is limited to the `if` block and does not require to be 
defined outside that scope. 


---

Reply via email to