Github user iyerr3 commented on a diff in the pull request:

    https://github.com/apache/madlib/pull/295#discussion_r203811384
  
    --- Diff: 
src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in ---
    @@ -2327,6 +2328,110 @@ def _tree_error(schema_madlib, source_table, 
dependent_varname,
             plpy.execute(sql)
     # ------------------------------------------------------------
     
    +def _validate_var_importance_input(model_table, summary_table, 
output_table):
    +    _assert(table_exists(model_table),
    +            "Recursive Partitioning: Model table does not exist.")
    +    _assert(table_exists(summary_table),
    +            "Recursive Partitioning: Model summary table does not exist.")
    +    _assert(not table_exists(output_table),
    +            "Recursive Partitioning: Output table already exists.")
    +
    +def _is_model_for_RF(summary_table):
    +    # Only an RF model (and not DT) would have num_trees column in summary
    +    return columns_exist_in_table(summary_table, ['num_trees'])
    +
    +def _is_RF_model_with_imp_pre_1_15(group_table, summary_table):
    +    """
    +        Check if the RF model is from MADlib < 1.15. The group table for
    +        >= 1.15 RF models should have a column named 
impurity_var_importance
    +        if it was learnt with importance param True.
    +    """
    +    _assert(table_exists(group_table),
    +        "Recursive Partitioning: Model group table does not exist.")
    +    # this flag has to be set to true for RF to report importance scores.
    +    isImportance = plpy.execute("SELECT importance FROM {summary_table}".
    --- End diff --
    
    Our convention is to use snake case for Python (i.e. `is_importance`). I 
also suggest changing it to `is_importance_set` to make it more explicit. 


---

Reply via email to