Github user fmcquillan99 commented on the issue:

    https://github.com/apache/madlib/pull/256
  
    Is this expected behavior?  last group for NJ gets only 1 observation
    
    ```
    DROP TABLE IF EXISTS iris_data;
    CREATE TABLE iris_data(
        id serial,
        attributes numeric[],
        class_text text,
        class integer,
        state text
    );
    INSERT INTO iris_data(id, attributes, class_text, class, state) VALUES
    (1,ARRAY[5.0,3.2,1.2,0.2],'Iris_setosa',1,'Alaska'),
    (2,ARRAY[5.5,3.5,1.3,0.2],'Iris_setosa',1,'Alaska'),
    (3,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Alaska'),
    (4,ARRAY[4.4,3.0,1.3,0.2],'Iris_setosa',1,'Alaska'),
    (5,ARRAY[5.1,3.4,1.5,0.2],'Iris_setosa',1,'Alaska'),
    (6,ARRAY[5.0,3.5,1.3,0.3],'Iris_setosa',1,'Alaska'),
    (7,ARRAY[4.5,2.3,1.3,0.3],'Iris_setosa',1,'Alaska'),
    (8,ARRAY[4.4,3.2,1.3,0.2],'Iris_setosa',1,'Alaska'),
    (9,ARRAY[5.0,3.5,1.6,0.6],'Iris_setosa',1,'Alaska'),
    (10,ARRAY[5.1,3.8,1.9,0.4],'Iris_setosa',1,'Alaska'),
    (11,ARRAY[4.8,3.0,1.4,0.3],'Iris_setosa',1,'Alaska'),
    (12,ARRAY[5.1,3.8,1.6,0.2],'Iris_setosa',1,'Alaska'),
    (13,ARRAY[5.7,2.8,4.5,1.3],'Iris_versicolor',2,'Alaska'),
    (14,ARRAY[6.3,3.3,4.7,1.6],'Iris_versicolor',2,'Alaska'),
    (15,ARRAY[4.9,2.4,3.3,1.0],'Iris_versicolor',2,'Alaska'),
    (16,ARRAY[6.6,2.9,4.6,1.3],'Iris_versicolor',2,'Alaska'),
    (17,ARRAY[5.2,2.7,3.9,1.4],'Iris_versicolor',2,'Alaska'),
    (18,ARRAY[5.0,2.0,3.5,1.0],'Iris_versicolor',2,'Alaska'),
    (19,ARRAY[5.9,3.0,4.2,1.5],'Iris_versicolor',2,'Alaska'),
    (20,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'Alaska'),
    (21,ARRAY[5.0,3.2,1.2,0.2],'Iris_setosa',1,'NJ'),
    (22,ARRAY[5.5,3.5,1.3,0.2],'Iris_setosa',1,'NJ'),
    (23,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'NJ'),
    (24,ARRAY[4.4,3.0,1.3,0.2],'Iris_setosa',1,'NJ'),
    (25,ARRAY[5.1,3.4,1.5,0.2],'Iris_setosa',1,'NJ'),
    (26,ARRAY[5.0,3.5,1.3,0.3],'Iris_setosa',1,'NJ'),
    (27,ARRAY[4.5,2.3,1.3,0.3],'Iris_setosa',1,'NJ'),
    (28,ARRAY[4.4,3.2,1.3,0.2],'Iris_setosa',1,'NJ'),
    (29,ARRAY[5.0,3.5,1.6,0.6],'Iris_setosa',1,'NJ'),
    (30,ARRAY[5.1,3.8,1.9,0.4],'Iris_setosa',1,'NJ'),
    (31,ARRAY[4.8,3.0,1.4,0.3],'Iris_setosa',1,'NJ'),
    (32,ARRAY[5.1,3.8,1.6,0.2],'Iris_setosa',1,'NJ'),
    (33,ARRAY[5.7,2.8,4.5,1.3],'Iris_versicolor',2,'NJ'),
    (34,ARRAY[6.3,3.3,4.7,1.6],'Iris_versicolor',2,'NJ'),
    (35,ARRAY[4.9,2.4,3.3,1.0],'Iris_versicolor',2,'NJ'),
    (36,ARRAY[6.6,2.9,4.6,1.3],'Iris_versicolor',2,'NJ'),
    (37,ARRAY[5.2,2.7,3.9,1.4],'Iris_versicolor',2,'NJ'),
    (38,ARRAY[5.0,2.0,3.5,1.0],'Iris_versicolor',2,'NJ'),
    (39,ARRAY[5.9,3.0,4.2,1.5],'Iris_versicolor',2,'NJ'),
    (40,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'NJ'),
    (41,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'NJ'),
    (42,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'NJ'),
    (43,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'NJ');
    ```
    
    ```
    DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, 
iris_data_packed_standardization;
    SELECT madlib.minibatch_preprocessor('iris_data',         -- Source table
                                         'iris_data_packed',  -- Output table
                                         'class_text',        -- Dependent 
variable
                                         'attributes',        -- Independent 
variables
                                         'state'              -- Grouping
                                         );
    SELECT * FROM iris_data_packed ORDER BY state, __id__;
    ```
    
    ```
    __id__      state   dependent_varname       independent_varname
    
    0   Alaska  [[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], 
[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]    
[[-0.71340313126425, -0.0601082924775639, -0.815378124905574, 
-0.706014621503565], [1.1550336410945, -0.0601082924775639, 1.26960703467032, 
1.61512933960405], [1.8344651946795, 0.540974632298078, 1.64192581316602, 
1.80855800302968], [-0.373687354471751, -2.06371804172971, 0.748360744776349, 
0.647986022475874], [-0.203829466075501, 1.54277950692415, -0.666450613507296, 
-0.899443284929199], [1.32489152949075, -1.66299609187928, 1.12067952327204, 
0.647986022475874], [0.475602087509498, 0.941696582148507, -0.889841880604713, 
-0.899443284929199], [-0.203829466075501, 0.741335607223293, 
-0.740914369206435, -0.899443284929199], [-0.373687354471751, 
0.941696582148507, -0.889841880604713, -0.706014621503565], 
[-0.373687354471751, 0.941696582148507, -0.666450613507296, 
-0.125728631226662], [-0.543545242868, 0.14025268244765, -0.740914369206435, 
-1.09
 287194835483]]
    
    1   Alaska  [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], 
[1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]]    [[-1.222976796453, 
-1.46263511695406, -0.889841880604713, -0.706014621503565], 
[-0.373687354471751, 0.340613657372865, -0.964305636303853, 
-0.899443284929199], [-0.543545242868, -1.26227414202885, 0.59943323337807, 
0.647986022475874], [0.815317864301998, -0.460830242327993, 1.49299830176774, 
1.22827201275278], [-0.0339715776792507, -0.661191217253206, 1.04621576757291, 
1.42170067617841], [-1.39283468484925, -0.0601082924775639, -0.889841880604713, 
-0.899443284929199], [2.34403885986824, -0.260469267402778, 1.56746205746688, 
1.22827201275278], [-1.39283468484925, 0.340613657372865, -0.889841880604713, 
-0.899443284929199], [-0.203829466075501, 1.54277950692415, -0.443059346409878, 
-0.512585958077931]]
    
    0   NJ      [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], 
[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]    
[[-1.51451754593938, 0.144152305294091, -1.02984968208236, -1.02742318255518], 
[-0.528652350941107, 1.06512536689522, -0.80778834438335, -0.219534868067347], 
[-0.528652350941107, -1.69779381790817, 0.598600127710372, 0.588353446420489], 
[-0.528652350941107, 0.512541529934543, -1.10387012798203, -1.02742318255518], 
[0.621523709890219, -0.224236919346362, 1.33880458670707, 1.19426968228637], 
[-0.364341485108061, 1.6177092038559, -0.585727006684342, -0.623479025311265], 
[-0.692963216774152, 0.328346917614317, -0.88180879028302, -1.22939526117714], 
[-0.692963216774152, -0.961015368627265, 0.450559235911032, 0.588353446420489], 
[-0.364341485108061, 1.6177092038559, -0.80778834438335, -1.02742318255518], 
[-0.364341485108061, 0.880930754574994, -0.88180879028302, -1.02742318255518], 
[-1.51451754593938, 0.512541529934543, -1.02984968208236, -1.02742318
 255518]]
    
    1   NJ      [[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], 
[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]    
[[2.10032150238764, -0.0400423070261354, 1.41282503260674, 1.19426968228637], 
[1.11445630738936, -1.32940459326772, 0.96870235720872, 0.588353446420489], 
[-0.8572740826072, 0.144152305294091, -0.95582923618269, -0.825451103933224], 
[0.292901978224126, 1.06512536689522, -1.02984968208236, -1.02742318255518], 
[-1.35020668010634, -1.14520998094749, -1.02984968208236, -0.825451103933224], 
[-0.200030619275013, -0.408431531666587, 0.89468191130905, 1.39624176090833], 
[0.950145441556312, 0.144152305294091, 1.11674324900806, 1.59821383953028], 
[1.11445630738936, -1.32940459326772, 0.96870235720872, 0.588353446420489], 
[1.6073889048885, 0.696736142254768, 1.48684547850641, 1.80018591815224], 
[1.11445630738936, -1.32940459326772, 0.96870235720872, 0.588353446420489], 
[1.11445630738936, -1.32940459326772, 0.96870235720872, 0.588353446420489]]
    
    2   NJ      [[1.0, 0.0]]    [[-0.528652350941107, 1.06512536689522, 
-1.02984968208236, -0.825451103933224]]
    ```



---

Reply via email to