This is an automated email from the ASF dual-hosted git repository. jingyimei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 6819e694b6a68452e8f5f896631889f7bd238061 Author: Jingyi Mei <[email protected]> AuthorDate: Mon Mar 18 17:29:20 2019 -0700 DL: Update user doc Co-authored-by: Ekta Khanna <[email protected]> Co-authored-by: Frank Mcquillan <[email protected]> --- .../utilities/minibatch_preprocessing_dl.sql_in | 332 +++++++++------------ 1 file changed, 145 insertions(+), 187 deletions(-) diff --git a/src/ports/postgres/modules/utilities/minibatch_preprocessing_dl.sql_in b/src/ports/postgres/modules/utilities/minibatch_preprocessing_dl.sql_in index 7a45db2..3be1ec0 100644 --- a/src/ports/postgres/modules/utilities/minibatch_preprocessing_dl.sql_in +++ b/src/ports/postgres/modules/utilities/minibatch_preprocessing_dl.sql_in @@ -52,8 +52,7 @@ minibatch_preprocessor_dl( source_table, dependent_varname, independent_varname, buffer_size, - normalizing_const, - one_hot_encode_int_dep_var + normalizing_const ) </pre> @@ -68,12 +67,16 @@ minibatch_preprocessor_dl( source_table, will be used as input to algorithms that support mini-batching. Note that the arrays packed into the output table are shuffled and normalized (by dividing each element in the independent variable array - by the "normalizing_const"), so they will not match up in an obvious way with - the rows in the source table. + by the optional "normalizing_const" parameter), so they will not match + up in an obvious way with the rows in the source table. </dd> <dt>dependent_varname</dt> <dd>TEXT. Name of the dependent variable column. + @note The mini-batch preprocessor automatically encodes + dependent variables of all types. The exception is numeric array types + (integer and float), where we assume these are already 1-hot encoded, + so these will just be passed through as is. </dd> <dt>independent_varname</dt> @@ -88,30 +91,17 @@ minibatch_preprocessor_dl( source_table, output table. The default value is computed considering size of the source table, number of independent variables, and number of segments in the database cluster. + @note minibatch_preprocessor_dl tries to pack data and distribute it + evenly based on the number of input rows. Sometimes you don't + necessarily get the exact same number of rows in one pack as you specified + in buffer_size. </dd> <dt>normalizing_const (optional)</dt> <dd>DOUBLE PRECISION, default: 1.0. The normalizing constant to divide - each value in the independent_varname array by. For example, in some cases - you may need to use 255 for this value if the image data is 0-255. + each value in the independent_varname array by. For example, + you may need to use 255 for this value if the image data is in the form 0-255. </dd> - - <dt>one_hot_encode_int_dep_var (optional)</dt> - <dd> BOOLEAN. default: FALSE. - Flag to one-hot encode dependent variables that are - scalar integers. This parameter is ignored if the - dependent variable is not a scalar integer. - - @note The mini-batch preprocessor automatically encodes - dependent variables that are boolean and character types such as text, char and - varchar. However, scalar integers are a special case because they can be used - in both classification and regression problems, so you must tell the mini-batch - preprocessor whether you want to encode them or not. In the case that you have - already encoded the dependent variable yourself, you can ignore this parameter. - Also, if you want to encode float values for some reason, cast them to text - first. - </dd> - </dl> <b>Output tables</b> @@ -195,42 +185,60 @@ CREATE TABLE image_data AS ( SELECT * FROM image_data; </pre> <pre class="result"> - rgb | species --------------------------------------------------------------+--------- - {{{46,137,5},{208,71,90}},{{148,61,186},{8,109,10}}} | dog - {{{94,133,111},{41,211,179}},{{11,81,114},{26,182,105}}} | dog - {{{9,198,217},{84,224,7}},{{221,230,216},{36,64,107}}} | dog - {{{250,116,206},{4,249,43}},{{136,104,85},{91,27,96}}} | bird - {{{9,226,50},{223,238,158}},{{245,69,45},{206,35,139}}} | bird - {{{230,76,170},{97,38,256}},{{95,79,53},{153,17,188}}} | bird - {{{234,240,201},{63,210,211}},{{33,3,177},{16,161,166}}} | cat - {{{207,116,120},{90,46,94}},{{166,216,190},{204,216,29}}} | cat - {{{13,182,44},{201,174,22}},{{186,119,85},{139,73,118}}} | dog - {{{86,236,135},{98,229,56}},{{150,26,76},{235,115,142}}} | bird - {{{16,128,19},{82,2,21}},{{182,146,111},{44,27,251}}} | dog - {{{155,55,178},{135,61,127}},{{199,201,127},{146,211,0}}} | bird - {{{140,56,91},{37,205,186}},{{180,139,83},{212,94,163}}} | dog - {{{35,72,197},{64,98,167}},{{176,120,13},{209,199,55}}} | cat - {{{145,159,176},{36,127,176}},{{222,114,143},{214,56,142}}} | dog - {{{152,248,249},{26,46,172}},{{65,203,229},{21,32,147}}} | dog - {{{211,57,188},{23,18,187}},{{69,60,112},{41,131,209}}} | bird - {{{190,51,66},{218,220,218}},{{210,213,244},{256,129,53}}} | bird - {{{40,0,124},{213,201,190}},{{80,68,77},{24,240,39}}} | dog - {{{105,121,39},{119,75,103}},{{48,228,8},{43,6,16}}} | dog - {{{214,143,134},{74,251,204}},{{49,226,171},{145,27,160}}} | bird - {{{71,224,194},{216,149,3}},{{80,52,97},{211,115,129}}} | bird - {{{66,131,251},{67,228,209}},{{210,106,27},{205,54,76}}} | bird - {{{193,43,21},{163,215,79}},{{211,130,254},{113,36,213}}} | bird - {{{183,29,86},{229,41,166}},{{73,97,155},{207,178,174}}} | cat - {{{253,235,211},{38,79,175}},{{51,176,42},{201,27,47}}} | bird - {{{107,217,255},{122,72,221}},{{23,244,58},{66,26,148}}} | bird - {{{221,95,164},{185,251,42}},{{94,58,58},{14,222,88}}} | dog - {{{105,188,149},{109,226,140}},{{80,31,105},{74,64,36}}} | cat - {{{215,40,134},{71,156,50}},{{160,226,179},{255,169,185}}} | cat - {{{146,235,249},{181,128,163}},{{161,132,14},{249,4,72}}} | dog - {{{195,223,197},{49,149,156}},{{89,26,227},{245,76,131}}} | bird - {{{255,131,128},{184,179,19}},{{163,171,200},{35,78,105}}} | dog - {{{79,128,8},{211,197,199}},{{22,160,79},{97,53,137}}} | dog + rgb | species +--------------------------------------------------------------+--------- + {{{124,198,44},{91,47,130}},{{24,175,69},{196,189,166}}} | dog + {{{111,202,129},{198,249,254}},{{141,37,88},{187,167,113}}} | dog + {{{235,53,39},{145,167,209}},{{197,147,222},{55,218,53}}} | dog + {{{231,48,125},{248,233,151}},{{63,125,230},{33,24,70}}} | dog + {{{92,146,121},{163,241,110}},{{75,88,72},{218,90,12}}} | bird + {{{88,114,59},{202,211,152}},{{92,76,58},{77,186,134}}} | dog + {{{2,96,255},{14,48,19}},{{240,55,115},{137,255,245}}} | dog + {{{165,122,98},{16,115,240}},{{4,106,116},{108,242,210}}} | dog + {{{155,207,101},{214,167,24}},{{118,240,228},{199,230,21}}} | dog + {{{94,212,15},{48,66,170}},{{255,167,128},{166,191,246}}} | dog + {{{169,69,131},{16,98,225}},{{228,113,17},{38,27,17}}} | bird + {{{156,183,139},{146,77,46}},{{80,202,230},{146,84,239}}} | dog + {{{190,210,147},{227,31,66}},{{229,251,84},{51,118,240}}} | bird + {{{253,175,200},{237,151,107}},{{207,56,162},{133,39,35}}} | cat + {{{146,185,108},{14,10,105}},{{188,210,86},{83,61,36}}} | dog + {{{223,169,177},{3,200,250}},{{112,91,16},{193,32,151}}} | cat + {{{249,145,240},{144,153,58}},{{131,156,230},{56,50,75}}} | dog + {{{212,186,229},{52,251,197}},{{230,121,201},{35,215,119}}} | cat + {{{234,94,23},{114,196,94}},{{242,249,90},{223,24,109}}} | bird + {{{111,36,145},{77,135,123}},{{171,158,237},{111,252,222}}} | dog + {{{90,74,240},{231,133,95}},{{11,21,173},{146,144,88}}} | cat + {{{170,52,237},{13,114,71}},{{87,99,46},{220,194,56}}} | bird + {{{8,17,92},{64,2,203}},{{10,131,145},{4,129,30}}} | cat + {{{217,218,207},{74,68,186}},{{127,107,76},{38,60,16}}} | bird + {{{193,34,83},{203,99,58}},{{251,224,50},{228,118,113}}} | dog + {{{146,218,155},{32,159,243}},{{146,218,189},{101,114,25}}} | bird + {{{179,160,74},{204,81,246}},{{50,189,39},{60,42,185}}} | cat + {{{13,82,174},{198,151,84}},{{65,249,100},{179,234,104}}} | cat + {{{162,190,124},{184,66,138}},{{10,240,80},{161,68,145}}} | dog + {{{164,144,199},{53,42,111}},{{122,174,128},{220,143,100}}} | cat + {{{160,138,104},{177,86,3}},{{104,226,149},{181,16,229}}} | dog + {{{246,119,211},{229,249,119}},{{117,192,172},{159,47,38}}} | cat + {{{175,1,220},{18,78,124}},{{156,181,45},{242,185,148}}} | bird + {{{50,113,246},{101,213,180}},{{56,103,151},{87,169,124}}} | cat + {{{73,109,147},{22,81,197}},{{135,71,42},{91,251,98}}} | bird + {{{206,61,255},{25,151,211}},{{211,124,7},{206,64,237}}} | cat + {{{201,71,34},{182,142,43}},{{198,172,171},{230,1,23}}} | bird + {{{142,158,2},{223,45,205}},{{118,177,223},{232,178,141}}} | cat + {{{86,190,128},{195,172,14}},{{97,173,237},{142,123,99}}} | cat + {{{26,72,148},{79,226,156}},{{96,62,220},{99,9,230}}} | bird + {{{154,234,103},{184,18,65}},{{146,225,139},{214,156,10}}} | cat + {{{244,169,103},{218,143,2}},{{196,246,186},{214,55,76}}} | bird + {{{20,226,7},{96,153,200}},{{130,236,147},{229,38,142}}} | bird + {{{172,102,107},{50,11,109}},{{145,9,123},{193,28,107}}} | bird + {{{143,243,247},{132,104,137}},{{94,3,169},{253,246,59}}} | bird + {{{78,74,228},{51,200,218}},{{170,155,190},{164,18,51}}} | dog + {{{163,226,161},{56,182,239}},{{129,154,35},{73,116,205}}} | bird + {{{74,243,3},{172,182,149}},{{101,34,163},{111,138,95}}} | cat + {{{224,178,126},{4,61,93}},{{174,238,96},{118,232,208}}} | bird + {{{55,236,249},{7,189,242}},{{151,173,130},{49,232,5}}} | bird + {{{9,16,30},{128,32,85}},{{108,25,91},{41,11,243}}} | bird + {{{141,35,191},{146,240,141}},{{207,239,166},{102,194,121}}} | bird (52 rows) </pre> -# Run the preprocessor for image data: @@ -246,12 +254,15 @@ SELECT madlib.minibatch_preprocessor_dl('image_data', -- Source table </pre> For small datasets like in this example, buffer size is mainly determined by the number of segments in the database. -This example is run on a Greenplum database with 2 segments, -so there are 2 rows with a buffer size of 26. +This example is run on a Greenplum database with 3 segments, +so there are 3 rows with a buffer size of 18 (in this case +two segments will get 18 rows and one segment will get 16 rows). For PostgresSQL, there would be only one row with a buffer size of 52 since it is a single node database. For larger data sets, other factors go into computing buffers size besides number of segments. +Note that dependent variable is a text type, and it is one-hot encoded +after preprocessing. Here is a sample of the packed output table: <pre class="example"> \\x on @@ -259,13 +270,17 @@ SELECT * FROM image_data_packed ORDER BY buffer_id; </pre> <pre class="result"> -[ RECORD 1 ]---+--------------------------------------------------------------------------------------------------------------------- -independent_var | {{{{0.607843,0.215686,0.698039},{0.529412,0.239216,0.498039}},{{0.780392,0.788235,0.498039},{0.572549,0.827451,0}}},...} -dependent_var | {bird,dog,dog,cat,bird,dog,bird,dog,cat,cat,bird,dog,dog,cat,bird,dog,bird,dog,bird,bird,dog,bird,dog,dog,bird,cat} +independent_var | {{{{0.921569,0.207843,0.152941},{0.568627,0.654902,0.819608}},{{0.772549,0.576471,0.870588},{0.215686,0.854902,0.207843}}},...} +dependent_var | {{0,0,1},{0,0,1},{1,0,0},{0,1,0},...} buffer_id | 0 -[ RECORD 2 ]---+--------------------------------------------------------------------------------------------------------------------- -independent_var | {{{{0.184314,0.380392,0.556863},{0.133333,0.764706,0.6}},{{0.470588,0.85098,0.32549},{0.666667,0.196078,0.129412}}},...} -dependent_var | {bird,bird,bird,cat,dog,bird,cat,cat,bird,dog,dog,cat,dog,bird,cat,dog,bird,bird,dog,dog,dog,bird,dog,bird,bird,cat} +independent_var | {{{{0.639216,0.886275,0.631373},{0.219608,0.713726,0.937255}},{{0.505882,0.603922,0.137255},{0.286275,0.454902,0.803922}}},...} +dependent_var | {{1,0,0},{0,1,0},{1,0,0},{0,0,1},...} buffer_id | 1 +-[ RECORD 3 ]---+--------------------------------------------------------------------------------------------------------------------- +independent_var | {{{{0.635294,0.745098,0.486275},{0.721569,0.258824,0.541176}},{{0.0392157,0.941177,0.313726},{0.631373,0.266667,0.568627}}},...} +dependent_var | {{0,0,1},{0,0,1},{0,1,0},{1,0,0},...} +buffer_id | 2 </pre> Review the output summary table: <pre class="example"> @@ -279,7 +294,8 @@ output_table | image_data_packed dependent_varname | species independent_varname | rgb dependent_vartype | text -buffer_size | 26 +class_values | {bird,cat,dog} +buffer_size | 18 </pre> -# Load data in another format. Create an artificial 2x2 resolution color image @@ -308,58 +324,58 @@ SELECT * FROM image_data; <pre class="result"> rgb | species --------------------------------------------------+--------- - {177,194,185,175,43,16,205,92,164,130,204,100} | cat - {128,51,73,226,104,194,73,190,98,23,98,101} | cat - {237,14,81,38,116,2,232,45,177,19,61,126} | dog - {225,0,59,69,29,187,120,102,157,224,40,230} | dog - {138,253,256,239,51,237,253,132,19,113,134,251} | dog - {55,14,219,182,125,189,182,184,2,211,115,122} | cat - {16,90,96,246,248,234,243,248,217,39,229,215} | dog - {247,72,49,242,230,104,256,193,30,125,126,212} | cat - {128,167,168,250,223,184,84,63,174,76,42,161} | cat - {3,200,41,218,114,32,33,163,18,8,11,18} | bird - {41,143,71,253,196,200,163,108,194,130,35,22} | bird - {209,98,235,114,167,238,58,207,200,172,240,233} | cat - {2,241,90,21,186,130,164,1,127,104,201,34} | bird - {139,164,247,161,102,200,3,82,58,170,64,115} | dog - {8,31,105,241,110,107,226,200,128,156,74,36} | dog - {200,140,103,234,95,241,143,86,146,245,30,149} | cat - {87,63,134,203,185,142,234,34,127,88,141,97} | cat - {13,254,105,49,155,50,189,2,28,28,243,171} | dog - {133,160,144,26,231,231,90,109,178,19,252,156} | cat - {123,244,194,221,20,208,218,125,1,117,175,190} | dog - {204,218,106,119,75,239,23,219,9,254,194,99} | dog - {116,118,103,16,171,226,5,109,191,25,61,153} | dog - {62,14,69,252,133,17,213,239,136,32,222,159} | bird - {231,157,189,74,9,49,192,112,66,107,83,70} | bird - {18,95,22,171,245,84,185,58,79,62,75,36} | cat - {211,69,11,114,64,242,15,254,60,24,47,252} | dog - {31,171,230,197,167,228,220,138,123,199,79,75} | cat - {83,238,149,1,213,101,150,168,179,155,255,49} | dog - {123,157,77,177,69,108,92,43,49,3,15,13} | dog - {138,212,221,212,219,48,194,113,49,152,213,199} | cat - {136,98,62,185,256,185,86,77,106,155,185,198} | bird - {235,202,213,247,87,95,203,52,51,167,100,245} | cat - {149,141,236,93,204,117,191,10,46,190,195,133} | cat - {45,32,196,244,230,175,189,187,166,21,26,113} | cat - {77,24,173,66,47,66,207,28,159,155,145,94} | dog - {191,28,104,67,39,150,99,235,137,73,154,71} | cat - {65,92,30,178,164,107,202,81,173,249,147,124} | cat - {50,23,166,144,188,101,172,36,168,211,186,11} | bird - {67,84,88,138,88,153,229,117,75,138,224,21} | bird - {140,14,110,8,36,160,31,201,48,219,46,219} | bird - {214,174,184,224,108,251,52,196,133,139,93,106} | cat - {168,244,225,189,207,109,203,61,117,239,221,148} | bird - {13,111,230,232,109,188,150,37,157,2,32,209} | bird - {165,92,36,15,93,203,4,61,136,210,171,84} | cat - {32,66,236,180,251,249,34,225,225,144,157,119} | bird - {58,121,213,11,64,122,103,99,137,195,47,141} | cat - {183,95,171,11,111,203,77,91,127,72,83,162} | cat - {52,49,199,172,230,1,37,187,11,101,52,114} | bird - {190,53,247,75,54,174,170,226,185,24,173,6} | dog - {44,78,198,206,119,251,255,62,166,229,63,204} | dog - {75,49,212,189,249,146,242,240,221,40,159,134} | cat - {88,159,183,94,18,228,172,216,178,35,211,177} | dog + {26,150,191,113,235,57,145,143,44,145,85,25} | dog + {240,43,225,15,220,136,186,209,49,130,55,111} | bird + {25,191,37,77,193,62,249,228,97,33,81,7} | cat + {141,223,46,195,201,19,207,78,160,130,157,89} | cat + {39,249,168,164,223,193,99,4,14,37,66,7} | cat + {159,250,127,44,151,254,11,211,247,137,79,233} | cat + {19,230,76,253,42,175,230,143,184,133,27,215} | cat + {199,224,144,5,64,19,200,186,109,218,108,70} | bird + {148,136,4,41,185,104,203,253,113,151,166,76} | bird + {230,132,114,213,210,139,91,199,240,142,203,75} | bird + {166,188,96,217,135,70,93,249,27,47,132,118} | bird + {118,120,222,236,110,83,240,47,19,206,222,51} | bird + {230,3,26,47,93,144,167,59,123,21,142,107} | cat + {250,224,62,136,112,142,88,187,24,1,168,216} | bird + {52,144,231,12,76,1,162,11,114,141,69,3} | cat + {166,172,246,169,200,102,62,57,239,75,165,88} | dog + {151,50,112,227,199,97,47,4,43,123,116,133} | bird + {39,185,96,127,80,248,177,191,218,120,32,9} | dog + {25,172,34,34,40,109,166,23,60,216,246,54} | bird + {163,39,89,170,95,230,137,141,169,82,159,121} | dog + {131,143,183,138,151,90,177,240,4,16,214,141} | dog + {99,233,100,9,159,140,30,202,29,169,120,62} | bird + {99,162,69,10,204,169,219,20,106,170,111,16} | bird + {16,246,27,32,187,226,0,75,231,64,94,175} | bird + {25,135,244,101,50,4,91,77,36,22,47,37} | dog + {22,101,191,197,96,138,78,198,155,138,193,51} | bird + {236,22,110,30,181,20,218,21,236,97,91,73} | dog + {160,57,34,212,239,197,233,174,164,97,88,153} | cat + {226,170,192,123,242,224,190,51,163,192,91,105} | bird + {149,174,12,72,112,1,37,153,118,201,79,121} | bird + {34,250,232,222,218,221,234,201,138,66,186,58} | bird + {162,55,85,159,247,234,77,3,50,189,4,87} | dog + {122,32,164,243,0,198,237,232,164,199,197,142} | dog + {80,209,75,138,169,236,193,254,140,184,232,217} | bird + {112,148,114,137,13,107,105,75,243,218,218,75} | dog + {241,76,61,202,76,112,90,51,125,166,52,30} | bird + {75,132,239,207,49,224,250,19,238,214,154,169} | dog + {203,43,222,58,231,5,243,71,131,67,63,52} | cat + {229,12,133,142,179,80,185,145,138,160,149,125} | bird + {64,251,61,153,13,100,145,181,8,112,118,107} | dog + {128,223,60,248,126,124,243,188,20,0,31,166} | bird + {39,22,43,146,138,174,33,65,56,184,155,234} | dog + {177,247,133,154,159,37,148,30,81,43,29,92} | bird + {56,127,199,118,105,120,109,239,18,12,20,166} | cat + {101,209,72,193,207,91,166,27,88,209,203,62} | dog + {131,195,122,90,18,178,217,217,40,66,81,149} | cat + {203,137,103,17,60,251,152,64,36,81,168,239} | cat + {239,97,10,20,194,32,121,129,228,217,11,50} | dog + {117,4,193,192,223,176,33,232,196,226,8,61} | dog + {162,21,190,223,120,170,245,230,200,170,250,163} | bird + {32,67,65,195,2,39,198,28,86,35,172,254} | dog + {39,19,236,146,87,140,203,121,96,187,62,73} | dog (52 rows) </pre> @@ -380,14 +396,18 @@ Here is a sample of the packed output table: SELECT * FROM image_data_packed ORDER BY buffer_id; </pre> <pre class="result"> --[ RECORD 1 ]-------+------------------------------------- -independent_var | {{0.501961,0.2,0.286275,0.886275,0.407843,0.760784,0.286275,0.745098,0.384314,0.0901961,0.384314,0.396078},...}} -dependent_var | {cat,dog,cat,cat,bird,cat,dog,cat,dog,dog,dog,bird,dog,dog,cat,cat,cat,cat,bird,bird,bird,cat,cat,bird,dog,dog} +-[ RECORD 1 ]---+--------------------------------------------------------------------------------------------------------------------- +independent_var | {{0.203922,0.564706,0.905882,0.0470588,0.298039,0.00392157,0.635294,0.0431373,0.447059,0.552941,0.270588,0.0117647},...} +dependent_var | {{0,1,0},{1,0,0},{1,0,0},{1,0,0},{0,0,1},...} buffer_id | 0 --[ RECORD 2 ]-------+------------------------------------- -independent_var | {{0.694118,0.760784,0.72549,0.686275,0.168627,0.0627451,0.803922,0.360784,0.643137,0.509804,0.8,0.392157},...}} -dependent_var | {cat,dog,dog,dog,cat,bird,bird,dog,cat,cat,dog,bird,cat,cat,dog,bird,cat,dog,cat,bird,cat,bird,bird,cat,dog,cat} +-[ RECORD 2 ]---+--------------------------------------------------------------------------------------------------------------------- +independent_var | {{0.25098,0.984314,0.239216,0.6,0.0509804,0.392157,0.568627,0.709804,0.0313726,0.439216,0.462745,0.419608},...} +dependent_var | {{0,0,1},{0,0,1},{0,1,0},{0,0,1},{1,0,0},...} buffer_id | 1 +-[ RECORD 3 ]---+--------------------------------------------------------------------------------------------------------------------- +independent_var | {{0.796079,0.537255,0.403922,0.0666667,0.235294,0.984314,0.596078,0.25098,0.141176,0.317647,0.658824,0.937255},...} +dependent_var | {{0,1,0},{0,1,0},{0,1,0},{0,0,1},{0,0,1},...} +buffer_id | 2 </pre> -# Generally the default buffer size will work well, @@ -420,72 +440,10 @@ output_table | image_data_packed dependent_varname | species independent_varname | rgb dependent_vartype | text +class_values | {bird,cat,dog} buffer_size | 10 </pre> --# Shift integer dependent variable. Let's say our input data has class levels -starting from 1, but we want them to start from 0 after preprocesssing. -Create a new image data set with class values as 1, 2 or 3: -<pre class="example"> -DROP TABLE IF EXISTS image_data_2; -CREATE TABLE image_data_2 AS ( -SELECT ARRAY[ - (random() * 256)::integer, -- R values - (random() * 256)::integer, - (random() * 256)::integer, - (random() * 256)::integer, - (random() * 256)::integer, -- G values - (random() * 256)::integer, - (random() * 256)::integer, - (random() * 256)::integer, - (random() * 256)::integer, -- B values - (random() * 256)::integer, - (random() * 256)::integer, - (random() * 256)::integer - ] as rgb, ('{1,2,3}'::integer[])[ceil(random()*3)] as species -FROM generate_series(1, 52) -); -SELECT * FROM image_data_2 LIMIT 10; -</pre> -<pre class="result"> - rgb | species -------------------------------------------------+--------- - {103,220,183,64,237,22,86,152,10,170,81,68} | 3 - {144,143,30,189,98,99,200,212,163,185,227,161} | 3 - {252,208,241,132,65,89,96,247,153,77,13,239} | 3 - {23,153,55,92,146,199,235,176,131,77,19,75} | 1 - {182,5,4,86,250,0,38,235,132,103,67,228} | 2 - {220,50,107,203,23,131,100,78,222,246,21,201} | 2 - {152,22,185,227,55,110,232,59,197,226,59,235} | 3 - {192,82,16,164,176,236,214,27,183,237,158,27} | 1 - {124,17,80,69,183,233,91,111,204,146,221,180} | 3 - {162,150,8,141,98,200,222,114,108,142,95,66} | 2 -... -</pre> -Now run the preprocessor with a dependent variable offset of -1: -<pre class="example"> -DROP TABLE IF EXISTS image_data_packed_2, image_data_packed_2_summary; -SELECT madlib.minibatch_preprocessor_dl('image_data_2', -- Source table - 'image_data_packed_2', -- Output table - 'species', -- Dependent variable - 'rgb', -- Independent variable - NULL, -- Buffer size - 255, -- Normalizing constant - -1 -- Dependent variable offset - ); -SELECT * FROM image_data_packed_2 ORDER BY buffer_id; -</pre> -<pre class="result"> --[ RECORD 1 ]-------+------------------------------------- -independent_var | {{0.564706,0.560784,0.117647,0.741176,0.384314,0.388235,0.784314,0.831373,0.639216,0.72549,0.890196,0.631373},...}} -dependent_var | {2,0,1,0,1,1,1,0,1,0,2,0,1,0,1,0,2,0,2,0,2,1,0,1,0,0} -buffer_id | 0 --[ RECORD 2 ]-------+------------------------------------- -independent_var | {{0.403922,0.862745,0.717647,0.25098,0.929412,0.0862745,0.337255,0.596078,0.0392157,0.666667,0.317647,0.266667},...}} -dependent_var | {2,2,1,2,2,1,1,2,0,0,0,1,1,1,0,2,2,1,2,1,1,0,2,1,2,2} -buffer_id | 1 -</pre> - @anchor related @par Related Topics
