Github user njayaram2 commented on a diff in the pull request: https://github.com/apache/madlib/pull/277#discussion_r196150565 --- Diff: src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in --- @@ -1097,28 +1121,21 @@ def _one_step(schema_madlib, training_table_name, cat_features, "$3", "$2", null_proxy) - # The arguments of the aggregate (in the same order): - # 1. current tree state, madlib.bytea8 - # 2. categorical features (integer format) in a single array - # 3. continuous features in a single array - # 4. weight value - # 5. categorical sorted levels (integer format) in a combined array - # 6. continuous splits - # 7. number of dependent levels train_sql = """ SELECT (result).* from ( SELECT - {schema_madlib}._dt_apply($1, + {schema_madlib}._dt_apply( + $1, {schema_madlib}._compute_leaf_stats( - $1, - {cat_features_str}, - {con_features_str}, + $1, -- current tree state, madlib.bytea8 + {cat_features_str}, -- categorical features in an array + {con_features_str}, -- continuous features in an array {dep_var}, - {weights}, - $2, - $4, - {dep_n_levels}::smallint, - {subsample}::boolean + {weights}, -- weight value + $2, -- categorical sorted levels in a combined array + $4, -- continuous splits + {dep_n_levels}::smallint, -- number of dependent levels + {subsample}::boolean -- should we use a subsample of data --- End diff -- Oh okay, thank you. I think a comment will be useful.
---