[ 
https://issues.apache.org/jira/browse/MADLIB-1217?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Frank McQuillan updated MADLIB-1217:
------------------------------------
    Description: 
Load data: 

{code}
DROP TABLE IF EXISTS mt_cars;

CREATE TABLE mt_cars (
    id integer NOT NULL,
    mpg double precision,
    cyl integer,
    disp double precision,
    hp integer,
    drat double precision,
    wt double precision,
    qsec double precision,
    vs integer,
    am integer,
    gear integer,
    carb integer
);

INSERT INTO mt_cars VALUES
(1,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2),
(2,21,6,160,110,3.9,2.62,16.46,0,1,4,4),
(3,24.4,4,146.7,62,3.69,3.19,20,1,0,4,2),
(4,21,6,160,110,3.9,2.875,17.02,0,1,4,4),
(5,17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4),
(6,16.4,8,275.8,180,3.078,4.07,17.4,0,0,3,3),
(7,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1),
(8,17.3,8,275.8,180,3.078,3.73,17.6,0,0,3,3),
(9,21.4,null,258,110,3.08,3.215,19.44,1,0,3,1),
(10,15.2,8,275.8,180,3.078,3.78,18,0,0,3,3),
(11,18.1,6,225,105,2.768,3.46,20.22,1,0,3,1),
(12,32.4,4,78.7,66,4.08,2.20,19.47,1,1,4,1),
(13,14.3,8,360,245,3.21,3.578,15.84,0,0,3,4),
(14,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2),
(15,30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2),
(16,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4),
(17,33.9,4,71.14,65,4.22,1.835,19.9,1,1,4,1),
(18,15.2,null,304,150,3.15,3.435,17.3,0,0,3,2),
(19,10.4,8,472,205,2.93,5.25,17.98,0,0,3,4),
(20,27.3,4,79,66,4.08,1.935,18.9,1,1,4,1),
(21,10.4,8,460,215,3,5.424,17.82,0,0,3,4),
(22,26,4,120.3,91,4.43,2.14,16.7,0,1,5,2),
(23,14.7,8,440,230,3.23,5.345,17.42,0,0,3,4),
(24,30.4,4,95.14,113,3.77,1.513,16.9,1,1,5,2),
(25,21.5,4,120.1,97,3.70,2.465,20.01,1,0,3,1),
(26,15.8,8,351,264,4.22,3.17,14.5,0,1,5,4),
(27,15.5,8,318,150,2.768,3.52,16.87,0,0,3,2),
(28,15,8,301,335,3.54,3.578,14.6,0,1,5,8),
(29,13.3,8,350,245,3.73,3.84,15.41,0,0,3,4),
(30,19.2,8,400,175,3.08,3.845,17.05,0,0,3,2),
(31,19.7,6,145,175,3.62,2.77,15.5,0,1,5,6),
(32,21.4,4,121,109,4.11,2.78,18.6,1,1,4,2);
{code}

DT:

{code}
DROP TABLE IF EXISTS train_output, train_output_summary, train_output_cv;

SELECT madlib.tree_train('mt_cars',         -- source table
                         'train_output',    -- output model table
                         'id',              -- id column
                         'mpg',             -- dependent variable
                         '*',               -- features
                         'id, hp, drat, am, gear, carb',  -- exclude columns
                         'mse',             -- split criterion
                         'am',        -- grouping
                         NULL::text,        -- no weights, all observations 
treated equally
                         10,                -- max depth
                         8,                 -- min split
                         3,                 -- number of bins per continuous 
variable
                         10,                -- number of splits
                         NULL,              -- pruning parameters
                         'null_as_category=TRUE'
                         );
{code}
results in error
{code}
ERROR:  plpy.SPIError: column "__null__" does not exist
LINE 14:                             (COALESCE(vs, __NULL__))::text a...
                                                   ^
QUERY:  
                SELECT
                    colname::text,
                    levels::text[],
                    grp_key::text
                from (
                    SELECT
                        grp_key,
                        'vs' as colname,
                        array_agg(levels order by dep_avg) as levels
                    from (
                        SELECT
                            array_to_string(array[(am)::text]::text[], ',') as 
grp_key,
                            (COALESCE(vs, __NULL__))::text as levels,
                            COALESCE(vs, __NULL__) as dep_avg
                        FROM mt_cars
                        WHERE (am) is not NULL and (mpg) is not NULL AND 
COALESCE(vs, __NULL__) IS NOT NULL
                        GROUP BY COALESCE(vs, __NULL__), am
                    ) s
                    GROUP BY grp_key
                ) s1
                where array_upper(levels, 1) > 1
                 UNION ALL 
                SELECT
                    colname::text,
                    levels::text[],
                    grp_key::text
                from (
                    SELECT
                        grp_key,
                        'cyl' as colname,
                        array_agg(levels order by dep_avg) as levels
                    from (
                        SELECT
                            array_to_string(array[(am)::text]::text[], ',') as 
grp_key,
                            (COALESCE(cyl, __NULL__))::text as levels,
                            COALESCE(cyl, __NULL__) as dep_avg
                        FROM mt_cars
                        WHERE (am) is not NULL and (mpg) is not NULL AND 
COALESCE(cyl, __NULL__) IS NOT NULL
                        GROUP BY COALESCE(cyl, __NULL__), am
                    ) s
                    GROUP BY grp_key
                ) s1
                where array_upper(levels, 1) > 1
                
CONTEXT:  Traceback (most recent call last):
  PL/Python function "tree_train", line 28, in <module>
    null_handling_params, verbose_mode)
  PL/Python function "tree_train", line 489, in tree_train
  PL/Python function "tree_train", line 293, in _get_tree_states
  PL/Python function "tree_train", line 963, in _get_bins_grps
PL/Python function "tree_train"
{code}

RF:

{code}
DROP TABLE IF EXISTS mt_cars_output, mt_cars_output_group, 
mt_cars_output_summary;

SELECT madlib.forest_train('mt_cars',
                           'mt_cars_output',
                           'id',
                           'mpg',
                           '*',
                           'id, hp, drat, am, gear, carb',  -- exclude columns
                           'am', -- grouping
                           10::integer,
                           2::integer,
                           TRUE::boolean,
                           1,
                           10,
                           8,
                           3,
                           10,
                           'null_as_category=TRUE'
                           );
{code}
results in error:
{code}
ERROR:  plpy.SPIError: column "__null__" does not exist
LINE 14:                             (COALESCE(vs, __NULL__))::text a...
                                                   ^
QUERY:  
                SELECT
                    colname::text,
                    levels::text[],
                    grp_key::text
                from (
                    SELECT
                        grp_key,
                        'vs' as colname,
                        array_agg(levels order by dep_avg) as levels
                    from (
                        SELECT
                            array_to_string(array[(am)::text], ',') as grp_key,
                            (COALESCE(vs, __NULL__))::text as levels,
                            COALESCE(vs, __NULL__) as dep_avg
                        FROM mt_cars
                        WHERE (am) is not NULL and (mpg) is not NULL AND 
COALESCE(vs, __NULL__) IS NOT NULL
                        GROUP BY COALESCE(vs, __NULL__), am
                    ) s
                    GROUP BY grp_key
                ) s1
                where array_upper(levels, 1) > 1
                 UNION ALL 
                SELECT
                    colname::text,
                    levels::text[],
                    grp_key::text
                from (
                    SELECT
                        grp_key,
                        'cyl' as colname,
                        array_agg(levels order by dep_avg) as levels
                    from (
                        SELECT
                            array_to_string(array[(am)::text], ',') as grp_key,
                            (COALESCE(cyl, __NULL__))::text as levels,
                            COALESCE(cyl, __NULL__) as dep_avg
                        FROM mt_cars
                        WHERE (am) is not NULL and (mpg) is not NULL AND 
COALESCE(cyl, __NULL__) IS NOT NULL
                        GROUP BY COALESCE(cyl, __NULL__), am
                    ) s
                    GROUP BY grp_key
                ) s1
                where array_upper(levels, 1) > 1
                
CONTEXT:  Traceback (most recent call last):
  PL/Python function "forest_train", line 42, in <module>
    sample_ratio
  PL/Python function "forest_train", line 427, in forest_train
  PL/Python function "forest_train", line 963, in _get_bins_grps
PL/Python function "forest_train"
{code}




  was:
{code}
DROP TABLE IF EXISTS mt_cars;

CREATE TABLE mt_cars (
    id integer NOT NULL,
    mpg double precision,
    cyl integer,
    disp double precision,
    hp integer,
    drat double precision,
    wt double precision,
    qsec double precision,
    vs integer,
    am integer,
    gear integer,
    carb integer
);

INSERT INTO mt_cars VALUES
(1,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2),
(2,21,6,160,110,3.9,2.62,16.46,0,1,4,4),
(3,24.4,4,146.7,62,3.69,3.19,20,1,0,4,2),
(4,21,6,160,110,3.9,2.875,17.02,0,1,4,4),
(5,17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4),
(6,16.4,8,275.8,180,3.078,4.07,17.4,0,0,3,3),
(7,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1),
(8,17.3,8,275.8,180,3.078,3.73,17.6,0,0,3,3),
(9,21.4,null,258,110,3.08,3.215,19.44,1,0,3,1),
(10,15.2,8,275.8,180,3.078,3.78,18,0,0,3,3),
(11,18.1,6,225,105,2.768,3.46,20.22,1,0,3,1),
(12,32.4,4,78.7,66,4.08,2.20,19.47,1,1,4,1),
(13,14.3,8,360,245,3.21,3.578,15.84,0,0,3,4),
(14,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2),
(15,30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2),
(16,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4),
(17,33.9,4,71.14,65,4.22,1.835,19.9,1,1,4,1),
(18,15.2,null,304,150,3.15,3.435,17.3,0,0,3,2),
(19,10.4,8,472,205,2.93,5.25,17.98,0,0,3,4),
(20,27.3,4,79,66,4.08,1.935,18.9,1,1,4,1),
(21,10.4,8,460,215,3,5.424,17.82,0,0,3,4),
(22,26,4,120.3,91,4.43,2.14,16.7,0,1,5,2),
(23,14.7,8,440,230,3.23,5.345,17.42,0,0,3,4),
(24,30.4,4,95.14,113,3.77,1.513,16.9,1,1,5,2),
(25,21.5,4,120.1,97,3.70,2.465,20.01,1,0,3,1),
(26,15.8,8,351,264,4.22,3.17,14.5,0,1,5,4),
(27,15.5,8,318,150,2.768,3.52,16.87,0,0,3,2),
(28,15,8,301,335,3.54,3.578,14.6,0,1,5,8),
(29,13.3,8,350,245,3.73,3.84,15.41,0,0,3,4),
(30,19.2,8,400,175,3.08,3.845,17.05,0,0,3,2),
(31,19.7,6,145,175,3.62,2.77,15.5,0,1,5,6),
(32,21.4,4,121,109,4.11,2.78,18.6,1,1,4,2);
{code}

DT:

{code}
DROP TABLE IF EXISTS train_output, train_output_summary, train_output_cv;

SELECT madlib.tree_train('mt_cars',         -- source table
                         'train_output',    -- output model table
                         'id',              -- id column
                         'mpg',             -- dependent variable
                         '*',               -- features
                         'id, hp, drat, am, gear, carb',  -- exclude columns
                         'mse',             -- split criterion
                         'am',        -- grouping
                         NULL::text,        -- no weights, all observations 
treated equally
                         10,                -- max depth
                         8,                 -- min split
                         3,                 -- number of bins per continuous 
variable
                         10,                -- number of splits
                         NULL,              -- pruning parameters
                         'null_as_category=TRUE'
                         );
{code}
results in error
{code}
ERROR:  plpy.SPIError: column "__null__" does not exist
LINE 14:                             (COALESCE(vs, __NULL__))::text a...
                                                   ^
QUERY:  
                SELECT
                    colname::text,
                    levels::text[],
                    grp_key::text
                from (
                    SELECT
                        grp_key,
                        'vs' as colname,
                        array_agg(levels order by dep_avg) as levels
                    from (
                        SELECT
                            array_to_string(array[(am)::text]::text[], ',') as 
grp_key,
                            (COALESCE(vs, __NULL__))::text as levels,
                            COALESCE(vs, __NULL__) as dep_avg
                        FROM mt_cars
                        WHERE (am) is not NULL and (mpg) is not NULL AND 
COALESCE(vs, __NULL__) IS NOT NULL
                        GROUP BY COALESCE(vs, __NULL__), am
                    ) s
                    GROUP BY grp_key
                ) s1
                where array_upper(levels, 1) > 1
                 UNION ALL 
                SELECT
                    colname::text,
                    levels::text[],
                    grp_key::text
                from (
                    SELECT
                        grp_key,
                        'cyl' as colname,
                        array_agg(levels order by dep_avg) as levels
                    from (
                        SELECT
                            array_to_string(array[(am)::text]::text[], ',') as 
grp_key,
                            (COALESCE(cyl, __NULL__))::text as levels,
                            COALESCE(cyl, __NULL__) as dep_avg
                        FROM mt_cars
                        WHERE (am) is not NULL and (mpg) is not NULL AND 
COALESCE(cyl, __NULL__) IS NOT NULL
                        GROUP BY COALESCE(cyl, __NULL__), am
                    ) s
                    GROUP BY grp_key
                ) s1
                where array_upper(levels, 1) > 1
                
CONTEXT:  Traceback (most recent call last):
  PL/Python function "tree_train", line 28, in <module>
    null_handling_params, verbose_mode)
  PL/Python function "tree_train", line 489, in tree_train
  PL/Python function "tree_train", line 293, in _get_tree_states
  PL/Python function "tree_train", line 963, in _get_bins_grps
PL/Python function "tree_train"
{code}

RF:

{code}
DROP TABLE IF EXISTS mt_cars_output, mt_cars_output_group, 
mt_cars_output_summary;

SELECT madlib.forest_train('mt_cars',
                           'mt_cars_output',
                           'id',
                           'mpg',
                           '*',
                           'id, hp, drat, am, gear, carb',  -- exclude columns
                           'am',
                           10::integer,
                           2::integer,
                           TRUE::boolean,
                           1,
                           10,
                           8,
                           3,
                           10,
                           'null_as_category=TRUE'
                           );
{code}
results in error:
{code}
ERROR:  plpy.SPIError: column "__null__" does not exist
LINE 14:                             (COALESCE(vs, __NULL__))::text a...
                                                   ^
QUERY:  
                SELECT
                    colname::text,
                    levels::text[],
                    grp_key::text
                from (
                    SELECT
                        grp_key,
                        'vs' as colname,
                        array_agg(levels order by dep_avg) as levels
                    from (
                        SELECT
                            array_to_string(array[(am)::text], ',') as grp_key,
                            (COALESCE(vs, __NULL__))::text as levels,
                            COALESCE(vs, __NULL__) as dep_avg
                        FROM mt_cars
                        WHERE (am) is not NULL and (mpg) is not NULL AND 
COALESCE(vs, __NULL__) IS NOT NULL
                        GROUP BY COALESCE(vs, __NULL__), am
                    ) s
                    GROUP BY grp_key
                ) s1
                where array_upper(levels, 1) > 1
                 UNION ALL 
                SELECT
                    colname::text,
                    levels::text[],
                    grp_key::text
                from (
                    SELECT
                        grp_key,
                        'cyl' as colname,
                        array_agg(levels order by dep_avg) as levels
                    from (
                        SELECT
                            array_to_string(array[(am)::text], ',') as grp_key,
                            (COALESCE(cyl, __NULL__))::text as levels,
                            COALESCE(cyl, __NULL__) as dep_avg
                        FROM mt_cars
                        WHERE (am) is not NULL and (mpg) is not NULL AND 
COALESCE(cyl, __NULL__) IS NOT NULL
                        GROUP BY COALESCE(cyl, __NULL__), am
                    ) s
                    GROUP BY grp_key
                ) s1
                where array_upper(levels, 1) > 1
                
CONTEXT:  Traceback (most recent call last):
  PL/Python function "forest_train", line 42, in <module>
    sample_ratio
  PL/Python function "forest_train", line 427, in forest_train
  PL/Python function "forest_train", line 963, in _get_bins_grps
PL/Python function "forest_train"
{code}





> DT and RF:  null_as_category null handling does not work with grouping
> ----------------------------------------------------------------------
>
>                 Key: MADLIB-1217
>                 URL: https://issues.apache.org/jira/browse/MADLIB-1217
>             Project: Apache MADlib
>          Issue Type: Bug
>          Components: Module: Decision Tree, Module: Random Forest
>            Reporter: Frank McQuillan
>            Priority: Minor
>             Fix For: v1.14
>
>
> Load data: 
> {code}
> DROP TABLE IF EXISTS mt_cars;
> CREATE TABLE mt_cars (
>     id integer NOT NULL,
>     mpg double precision,
>     cyl integer,
>     disp double precision,
>     hp integer,
>     drat double precision,
>     wt double precision,
>     qsec double precision,
>     vs integer,
>     am integer,
>     gear integer,
>     carb integer
> );
> INSERT INTO mt_cars VALUES
> (1,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2),
> (2,21,6,160,110,3.9,2.62,16.46,0,1,4,4),
> (3,24.4,4,146.7,62,3.69,3.19,20,1,0,4,2),
> (4,21,6,160,110,3.9,2.875,17.02,0,1,4,4),
> (5,17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4),
> (6,16.4,8,275.8,180,3.078,4.07,17.4,0,0,3,3),
> (7,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1),
> (8,17.3,8,275.8,180,3.078,3.73,17.6,0,0,3,3),
> (9,21.4,null,258,110,3.08,3.215,19.44,1,0,3,1),
> (10,15.2,8,275.8,180,3.078,3.78,18,0,0,3,3),
> (11,18.1,6,225,105,2.768,3.46,20.22,1,0,3,1),
> (12,32.4,4,78.7,66,4.08,2.20,19.47,1,1,4,1),
> (13,14.3,8,360,245,3.21,3.578,15.84,0,0,3,4),
> (14,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2),
> (15,30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2),
> (16,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4),
> (17,33.9,4,71.14,65,4.22,1.835,19.9,1,1,4,1),
> (18,15.2,null,304,150,3.15,3.435,17.3,0,0,3,2),
> (19,10.4,8,472,205,2.93,5.25,17.98,0,0,3,4),
> (20,27.3,4,79,66,4.08,1.935,18.9,1,1,4,1),
> (21,10.4,8,460,215,3,5.424,17.82,0,0,3,4),
> (22,26,4,120.3,91,4.43,2.14,16.7,0,1,5,2),
> (23,14.7,8,440,230,3.23,5.345,17.42,0,0,3,4),
> (24,30.4,4,95.14,113,3.77,1.513,16.9,1,1,5,2),
> (25,21.5,4,120.1,97,3.70,2.465,20.01,1,0,3,1),
> (26,15.8,8,351,264,4.22,3.17,14.5,0,1,5,4),
> (27,15.5,8,318,150,2.768,3.52,16.87,0,0,3,2),
> (28,15,8,301,335,3.54,3.578,14.6,0,1,5,8),
> (29,13.3,8,350,245,3.73,3.84,15.41,0,0,3,4),
> (30,19.2,8,400,175,3.08,3.845,17.05,0,0,3,2),
> (31,19.7,6,145,175,3.62,2.77,15.5,0,1,5,6),
> (32,21.4,4,121,109,4.11,2.78,18.6,1,1,4,2);
> {code}
> DT:
> {code}
> DROP TABLE IF EXISTS train_output, train_output_summary, train_output_cv;
> SELECT madlib.tree_train('mt_cars',         -- source table
>                          'train_output',    -- output model table
>                          'id',              -- id column
>                          'mpg',             -- dependent variable
>                          '*',               -- features
>                          'id, hp, drat, am, gear, carb',  -- exclude columns
>                          'mse',             -- split criterion
>                          'am',        -- grouping
>                          NULL::text,        -- no weights, all observations 
> treated equally
>                          10,                -- max depth
>                          8,                 -- min split
>                          3,                 -- number of bins per continuous 
> variable
>                          10,                -- number of splits
>                          NULL,              -- pruning parameters
>                          'null_as_category=TRUE'
>                          );
> {code}
> results in error
> {code}
> ERROR:  plpy.SPIError: column "__null__" does not exist
> LINE 14:                             (COALESCE(vs, __NULL__))::text a...
>                                                    ^
> QUERY:  
>                 SELECT
>                     colname::text,
>                     levels::text[],
>                     grp_key::text
>                 from (
>                     SELECT
>                         grp_key,
>                         'vs' as colname,
>                         array_agg(levels order by dep_avg) as levels
>                     from (
>                         SELECT
>                             array_to_string(array[(am)::text]::text[], ',') 
> as grp_key,
>                             (COALESCE(vs, __NULL__))::text as levels,
>                             COALESCE(vs, __NULL__) as dep_avg
>                         FROM mt_cars
>                         WHERE (am) is not NULL and (mpg) is not NULL AND 
> COALESCE(vs, __NULL__) IS NOT NULL
>                         GROUP BY COALESCE(vs, __NULL__), am
>                     ) s
>                     GROUP BY grp_key
>                 ) s1
>                 where array_upper(levels, 1) > 1
>                  UNION ALL 
>                 SELECT
>                     colname::text,
>                     levels::text[],
>                     grp_key::text
>                 from (
>                     SELECT
>                         grp_key,
>                         'cyl' as colname,
>                         array_agg(levels order by dep_avg) as levels
>                     from (
>                         SELECT
>                             array_to_string(array[(am)::text]::text[], ',') 
> as grp_key,
>                             (COALESCE(cyl, __NULL__))::text as levels,
>                             COALESCE(cyl, __NULL__) as dep_avg
>                         FROM mt_cars
>                         WHERE (am) is not NULL and (mpg) is not NULL AND 
> COALESCE(cyl, __NULL__) IS NOT NULL
>                         GROUP BY COALESCE(cyl, __NULL__), am
>                     ) s
>                     GROUP BY grp_key
>                 ) s1
>                 where array_upper(levels, 1) > 1
>                 
> CONTEXT:  Traceback (most recent call last):
>   PL/Python function "tree_train", line 28, in <module>
>     null_handling_params, verbose_mode)
>   PL/Python function "tree_train", line 489, in tree_train
>   PL/Python function "tree_train", line 293, in _get_tree_states
>   PL/Python function "tree_train", line 963, in _get_bins_grps
> PL/Python function "tree_train"
> {code}
> RF:
> {code}
> DROP TABLE IF EXISTS mt_cars_output, mt_cars_output_group, 
> mt_cars_output_summary;
> SELECT madlib.forest_train('mt_cars',
>                            'mt_cars_output',
>                            'id',
>                            'mpg',
>                            '*',
>                            'id, hp, drat, am, gear, carb',  -- exclude columns
>                            'am', -- grouping
>                            10::integer,
>                            2::integer,
>                            TRUE::boolean,
>                            1,
>                            10,
>                            8,
>                            3,
>                            10,
>                            'null_as_category=TRUE'
>                            );
> {code}
> results in error:
> {code}
> ERROR:  plpy.SPIError: column "__null__" does not exist
> LINE 14:                             (COALESCE(vs, __NULL__))::text a...
>                                                    ^
> QUERY:  
>                 SELECT
>                     colname::text,
>                     levels::text[],
>                     grp_key::text
>                 from (
>                     SELECT
>                         grp_key,
>                         'vs' as colname,
>                         array_agg(levels order by dep_avg) as levels
>                     from (
>                         SELECT
>                             array_to_string(array[(am)::text], ',') as 
> grp_key,
>                             (COALESCE(vs, __NULL__))::text as levels,
>                             COALESCE(vs, __NULL__) as dep_avg
>                         FROM mt_cars
>                         WHERE (am) is not NULL and (mpg) is not NULL AND 
> COALESCE(vs, __NULL__) IS NOT NULL
>                         GROUP BY COALESCE(vs, __NULL__), am
>                     ) s
>                     GROUP BY grp_key
>                 ) s1
>                 where array_upper(levels, 1) > 1
>                  UNION ALL 
>                 SELECT
>                     colname::text,
>                     levels::text[],
>                     grp_key::text
>                 from (
>                     SELECT
>                         grp_key,
>                         'cyl' as colname,
>                         array_agg(levels order by dep_avg) as levels
>                     from (
>                         SELECT
>                             array_to_string(array[(am)::text], ',') as 
> grp_key,
>                             (COALESCE(cyl, __NULL__))::text as levels,
>                             COALESCE(cyl, __NULL__) as dep_avg
>                         FROM mt_cars
>                         WHERE (am) is not NULL and (mpg) is not NULL AND 
> COALESCE(cyl, __NULL__) IS NOT NULL
>                         GROUP BY COALESCE(cyl, __NULL__), am
>                     ) s
>                     GROUP BY grp_key
>                 ) s1
>                 where array_upper(levels, 1) > 1
>                 
> CONTEXT:  Traceback (most recent call last):
>   PL/Python function "forest_train", line 42, in <module>
>     sample_ratio
>   PL/Python function "forest_train", line 427, in forest_train
>   PL/Python function "forest_train", line 963, in _get_bins_grps
> PL/Python function "forest_train"
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to