Github user fmcquillan99 commented on the issue: https://github.com/apache/madlib/pull/256 Is this expected behavior? last group for NJ gets only 1 observation ``` DROP TABLE IF EXISTS iris_data; CREATE TABLE iris_data( id serial, attributes numeric[], class_text text, class integer, state text ); INSERT INTO iris_data(id, attributes, class_text, class, state) VALUES (1,ARRAY[5.0,3.2,1.2,0.2],'Iris_setosa',1,'Alaska'), (2,ARRAY[5.5,3.5,1.3,0.2],'Iris_setosa',1,'Alaska'), (3,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Alaska'), (4,ARRAY[4.4,3.0,1.3,0.2],'Iris_setosa',1,'Alaska'), (5,ARRAY[5.1,3.4,1.5,0.2],'Iris_setosa',1,'Alaska'), (6,ARRAY[5.0,3.5,1.3,0.3],'Iris_setosa',1,'Alaska'), (7,ARRAY[4.5,2.3,1.3,0.3],'Iris_setosa',1,'Alaska'), (8,ARRAY[4.4,3.2,1.3,0.2],'Iris_setosa',1,'Alaska'), (9,ARRAY[5.0,3.5,1.6,0.6],'Iris_setosa',1,'Alaska'), (10,ARRAY[5.1,3.8,1.9,0.4],'Iris_setosa',1,'Alaska'), (11,ARRAY[4.8,3.0,1.4,0.3],'Iris_setosa',1,'Alaska'), (12,ARRAY[5.1,3.8,1.6,0.2],'Iris_setosa',1,'Alaska'), (13,ARRAY[5.7,2.8,4.5,1.3],'Iris_versicolor',2,'Alaska'), (14,ARRAY[6.3,3.3,4.7,1.6],'Iris_versicolor',2,'Alaska'), (15,ARRAY[4.9,2.4,3.3,1.0],'Iris_versicolor',2,'Alaska'), (16,ARRAY[6.6,2.9,4.6,1.3],'Iris_versicolor',2,'Alaska'), (17,ARRAY[5.2,2.7,3.9,1.4],'Iris_versicolor',2,'Alaska'), (18,ARRAY[5.0,2.0,3.5,1.0],'Iris_versicolor',2,'Alaska'), (19,ARRAY[5.9,3.0,4.2,1.5],'Iris_versicolor',2,'Alaska'), (20,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'Alaska'), (21,ARRAY[5.0,3.2,1.2,0.2],'Iris_setosa',1,'NJ'), (22,ARRAY[5.5,3.5,1.3,0.2],'Iris_setosa',1,'NJ'), (23,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'NJ'), (24,ARRAY[4.4,3.0,1.3,0.2],'Iris_setosa',1,'NJ'), (25,ARRAY[5.1,3.4,1.5,0.2],'Iris_setosa',1,'NJ'), (26,ARRAY[5.0,3.5,1.3,0.3],'Iris_setosa',1,'NJ'), (27,ARRAY[4.5,2.3,1.3,0.3],'Iris_setosa',1,'NJ'), (28,ARRAY[4.4,3.2,1.3,0.2],'Iris_setosa',1,'NJ'), (29,ARRAY[5.0,3.5,1.6,0.6],'Iris_setosa',1,'NJ'), (30,ARRAY[5.1,3.8,1.9,0.4],'Iris_setosa',1,'NJ'), (31,ARRAY[4.8,3.0,1.4,0.3],'Iris_setosa',1,'NJ'), (32,ARRAY[5.1,3.8,1.6,0.2],'Iris_setosa',1,'NJ'), (33,ARRAY[5.7,2.8,4.5,1.3],'Iris_versicolor',2,'NJ'), (34,ARRAY[6.3,3.3,4.7,1.6],'Iris_versicolor',2,'NJ'), (35,ARRAY[4.9,2.4,3.3,1.0],'Iris_versicolor',2,'NJ'), (36,ARRAY[6.6,2.9,4.6,1.3],'Iris_versicolor',2,'NJ'), (37,ARRAY[5.2,2.7,3.9,1.4],'Iris_versicolor',2,'NJ'), (38,ARRAY[5.0,2.0,3.5,1.0],'Iris_versicolor',2,'NJ'), (39,ARRAY[5.9,3.0,4.2,1.5],'Iris_versicolor',2,'NJ'), (40,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'NJ'), (41,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'NJ'), (42,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'NJ'), (43,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'NJ'); ``` ``` DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization; SELECT madlib.minibatch_preprocessor('iris_data', -- Source table 'iris_data_packed', -- Output table 'class_text', -- Dependent variable 'attributes', -- Independent variables 'state' -- Grouping ); SELECT * FROM iris_data_packed ORDER BY state, __id__; ``` ``` __id__ state dependent_varname independent_varname 0 Alaska [[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]] [[-0.71340313126425, -0.0601082924775639, -0.815378124905574, -0.706014621503565], [1.1550336410945, -0.0601082924775639, 1.26960703467032, 1.61512933960405], [1.8344651946795, 0.540974632298078, 1.64192581316602, 1.80855800302968], [-0.373687354471751, -2.06371804172971, 0.748360744776349, 0.647986022475874], [-0.203829466075501, 1.54277950692415, -0.666450613507296, -0.899443284929199], [1.32489152949075, -1.66299609187928, 1.12067952327204, 0.647986022475874], [0.475602087509498, 0.941696582148507, -0.889841880604713, -0.899443284929199], [-0.203829466075501, 0.741335607223293, -0.740914369206435, -0.899443284929199], [-0.373687354471751, 0.941696582148507, -0.889841880604713, -0.706014621503565], [-0.373687354471751, 0.941696582148507, -0.666450613507296, -0.125728631226662], [-0.543545242868, 0.14025268244765, -0.740914369206435, -1.09 287194835483]] 1 Alaska [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]] [[-1.222976796453, -1.46263511695406, -0.889841880604713, -0.706014621503565], [-0.373687354471751, 0.340613657372865, -0.964305636303853, -0.899443284929199], [-0.543545242868, -1.26227414202885, 0.59943323337807, 0.647986022475874], [0.815317864301998, -0.460830242327993, 1.49299830176774, 1.22827201275278], [-0.0339715776792507, -0.661191217253206, 1.04621576757291, 1.42170067617841], [-1.39283468484925, -0.0601082924775639, -0.889841880604713, -0.899443284929199], [2.34403885986824, -0.260469267402778, 1.56746205746688, 1.22827201275278], [-1.39283468484925, 0.340613657372865, -0.889841880604713, -0.899443284929199], [-0.203829466075501, 1.54277950692415, -0.443059346409878, -0.512585958077931]] 0 NJ [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]] [[-1.51451754593938, 0.144152305294091, -1.02984968208236, -1.02742318255518], [-0.528652350941107, 1.06512536689522, -0.80778834438335, -0.219534868067347], [-0.528652350941107, -1.69779381790817, 0.598600127710372, 0.588353446420489], [-0.528652350941107, 0.512541529934543, -1.10387012798203, -1.02742318255518], [0.621523709890219, -0.224236919346362, 1.33880458670707, 1.19426968228637], [-0.364341485108061, 1.6177092038559, -0.585727006684342, -0.623479025311265], [-0.692963216774152, 0.328346917614317, -0.88180879028302, -1.22939526117714], [-0.692963216774152, -0.961015368627265, 0.450559235911032, 0.588353446420489], [-0.364341485108061, 1.6177092038559, -0.80778834438335, -1.02742318255518], [-0.364341485108061, 0.880930754574994, -0.88180879028302, -1.02742318255518], [-1.51451754593938, 0.512541529934543, -1.02984968208236, -1.02742318 255518]] 1 NJ [[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]] [[2.10032150238764, -0.0400423070261354, 1.41282503260674, 1.19426968228637], [1.11445630738936, -1.32940459326772, 0.96870235720872, 0.588353446420489], [-0.8572740826072, 0.144152305294091, -0.95582923618269, -0.825451103933224], [0.292901978224126, 1.06512536689522, -1.02984968208236, -1.02742318255518], [-1.35020668010634, -1.14520998094749, -1.02984968208236, -0.825451103933224], [-0.200030619275013, -0.408431531666587, 0.89468191130905, 1.39624176090833], [0.950145441556312, 0.144152305294091, 1.11674324900806, 1.59821383953028], [1.11445630738936, -1.32940459326772, 0.96870235720872, 0.588353446420489], [1.6073889048885, 0.696736142254768, 1.48684547850641, 1.80018591815224], [1.11445630738936, -1.32940459326772, 0.96870235720872, 0.588353446420489], [1.11445630738936, -1.32940459326772, 0.96870235720872, 0.588353446420489]] 2 NJ [[1.0, 0.0]] [[-0.528652350941107, 1.06512536689522, -1.02984968208236, -0.825451103933224]] ```
---