This is an automated email from the ASF dual-hosted git repository.

jingyimei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 6819e694b6a68452e8f5f896631889f7bd238061
Author: Jingyi Mei <[email protected]>
AuthorDate: Mon Mar 18 17:29:20 2019 -0700

    DL: Update user doc
    
    Co-authored-by: Ekta Khanna <[email protected]>
    Co-authored-by: Frank Mcquillan <[email protected]>
---
 .../utilities/minibatch_preprocessing_dl.sql_in    | 332 +++++++++------------
 1 file changed, 145 insertions(+), 187 deletions(-)

diff --git 
a/src/ports/postgres/modules/utilities/minibatch_preprocessing_dl.sql_in 
b/src/ports/postgres/modules/utilities/minibatch_preprocessing_dl.sql_in
index 7a45db2..3be1ec0 100644
--- a/src/ports/postgres/modules/utilities/minibatch_preprocessing_dl.sql_in
+++ b/src/ports/postgres/modules/utilities/minibatch_preprocessing_dl.sql_in
@@ -52,8 +52,7 @@ minibatch_preprocessor_dl( source_table,
                            dependent_varname,
                            independent_varname,
                            buffer_size,
-                           normalizing_const,
-                           one_hot_encode_int_dep_var
+                           normalizing_const
                         )
 </pre>
 
@@ -68,12 +67,16 @@ minibatch_preprocessor_dl( source_table,
   will be used as input to algorithms that support mini-batching.
   Note that the arrays packed into the output table are shuffled
   and normalized (by dividing each element in the independent variable array
-  by the "normalizing_const"), so they will not match up in an obvious way with
-  the rows in the source table.
+  by the optional "normalizing_const" parameter), so they will not match
+  up in an obvious way with the rows in the source table.
   </dd>
 
   <dt>dependent_varname</dt>
   <dd>TEXT. Name of the dependent variable column.
+  @note The mini-batch preprocessor automatically encodes
+  dependent variables of all types.  The exception is numeric array types
+  (integer and float), where we assume these are already 1-hot encoded,
+  so these will just be passed through as is.
   </dd>
 
   <dt>independent_varname</dt>
@@ -88,30 +91,17 @@ minibatch_preprocessor_dl( source_table,
   output table.  The default value is computed considering size of
   the source table, number of independent variables,
   and number of segments in the database cluster.
+  @note minibatch_preprocessor_dl tries to pack data and distribute it
+  evenly based on the number of input rows. Sometimes you don't
+  necessarily get the exact same number of rows in one pack as you specified
+  in buffer_size.
   </dd>
 
   <dt>normalizing_const (optional)</dt>
   <dd>DOUBLE PRECISION, default: 1.0. The normalizing constant to divide
-  each value in the independent_varname array by.  For example, in some cases
-  you may need to use 255 for this value if the image data is 0-255.
+  each value in the independent_varname array by.  For example,
+  you may need to use 255 for this value if the image data is in the form 
0-255.
   </dd>
-
-  <dt>one_hot_encode_int_dep_var (optional)</dt>
-    <dd> BOOLEAN. default: FALSE.
-    Flag to one-hot encode dependent variables that are
-    scalar integers. This parameter is ignored if the
-    dependent variable is not a scalar integer.
-
-  @note The mini-batch preprocessor automatically encodes
-  dependent variables that are boolean and character types such as text, char 
and
-  varchar.  However, scalar integers are a special case because they can be 
used
-  in both classification and regression problems, so you must tell the 
mini-batch
-  preprocessor whether you want to encode them or not. In the case that you 
have
-  already encoded the dependent variable yourself,  you can ignore this 
parameter.
-  Also, if you want to encode float values for some reason, cast them to text
-  first.
-    </dd>
-
 </dl>
 
 <b>Output tables</b>
@@ -195,42 +185,60 @@ CREATE TABLE image_data AS (
 SELECT * FROM image_data;
 </pre>
 <pre class="result">
-                             rgb                             | species
--------------------------------------------------------------+---------
- {{{46,137,5},{208,71,90}},{{148,61,186},{8,109,10}}}        | dog
- {{{94,133,111},{41,211,179}},{{11,81,114},{26,182,105}}}    | dog
- {{{9,198,217},{84,224,7}},{{221,230,216},{36,64,107}}}      | dog
- {{{250,116,206},{4,249,43}},{{136,104,85},{91,27,96}}}      | bird
- {{{9,226,50},{223,238,158}},{{245,69,45},{206,35,139}}}     | bird
- {{{230,76,170},{97,38,256}},{{95,79,53},{153,17,188}}}      | bird
- {{{234,240,201},{63,210,211}},{{33,3,177},{16,161,166}}}    | cat
- {{{207,116,120},{90,46,94}},{{166,216,190},{204,216,29}}}   | cat
- {{{13,182,44},{201,174,22}},{{186,119,85},{139,73,118}}}    | dog
- {{{86,236,135},{98,229,56}},{{150,26,76},{235,115,142}}}    | bird
- {{{16,128,19},{82,2,21}},{{182,146,111},{44,27,251}}}       | dog
- {{{155,55,178},{135,61,127}},{{199,201,127},{146,211,0}}}   | bird
- {{{140,56,91},{37,205,186}},{{180,139,83},{212,94,163}}}    | dog
- {{{35,72,197},{64,98,167}},{{176,120,13},{209,199,55}}}     | cat
- {{{145,159,176},{36,127,176}},{{222,114,143},{214,56,142}}} | dog
- {{{152,248,249},{26,46,172}},{{65,203,229},{21,32,147}}}    | dog
- {{{211,57,188},{23,18,187}},{{69,60,112},{41,131,209}}}     | bird
- {{{190,51,66},{218,220,218}},{{210,213,244},{256,129,53}}}  | bird
- {{{40,0,124},{213,201,190}},{{80,68,77},{24,240,39}}}       | dog
- {{{105,121,39},{119,75,103}},{{48,228,8},{43,6,16}}}        | dog
- {{{214,143,134},{74,251,204}},{{49,226,171},{145,27,160}}}  | bird
- {{{71,224,194},{216,149,3}},{{80,52,97},{211,115,129}}}     | bird
- {{{66,131,251},{67,228,209}},{{210,106,27},{205,54,76}}}    | bird
- {{{193,43,21},{163,215,79}},{{211,130,254},{113,36,213}}}   | bird
- {{{183,29,86},{229,41,166}},{{73,97,155},{207,178,174}}}    | cat
- {{{253,235,211},{38,79,175}},{{51,176,42},{201,27,47}}}     | bird
- {{{107,217,255},{122,72,221}},{{23,244,58},{66,26,148}}}    | bird
- {{{221,95,164},{185,251,42}},{{94,58,58},{14,222,88}}}      | dog
- {{{105,188,149},{109,226,140}},{{80,31,105},{74,64,36}}}    | cat
- {{{215,40,134},{71,156,50}},{{160,226,179},{255,169,185}}}  | cat
- {{{146,235,249},{181,128,163}},{{161,132,14},{249,4,72}}}   | dog
- {{{195,223,197},{49,149,156}},{{89,26,227},{245,76,131}}}   | bird
- {{{255,131,128},{184,179,19}},{{163,171,200},{35,78,105}}}  | dog
- {{{79,128,8},{211,197,199}},{{22,160,79},{97,53,137}}}      | dog
+                             rgb                              | species
+--------------------------------------------------------------+---------
+ {{{124,198,44},{91,47,130}},{{24,175,69},{196,189,166}}}     | dog
+ {{{111,202,129},{198,249,254}},{{141,37,88},{187,167,113}}}  | dog
+ {{{235,53,39},{145,167,209}},{{197,147,222},{55,218,53}}}    | dog
+ {{{231,48,125},{248,233,151}},{{63,125,230},{33,24,70}}}     | dog
+ {{{92,146,121},{163,241,110}},{{75,88,72},{218,90,12}}}      | bird
+ {{{88,114,59},{202,211,152}},{{92,76,58},{77,186,134}}}      | dog
+ {{{2,96,255},{14,48,19}},{{240,55,115},{137,255,245}}}       | dog
+ {{{165,122,98},{16,115,240}},{{4,106,116},{108,242,210}}}    | dog
+ {{{155,207,101},{214,167,24}},{{118,240,228},{199,230,21}}}  | dog
+ {{{94,212,15},{48,66,170}},{{255,167,128},{166,191,246}}}    | dog
+ {{{169,69,131},{16,98,225}},{{228,113,17},{38,27,17}}}       | bird
+ {{{156,183,139},{146,77,46}},{{80,202,230},{146,84,239}}}    | dog
+ {{{190,210,147},{227,31,66}},{{229,251,84},{51,118,240}}}    | bird
+ {{{253,175,200},{237,151,107}},{{207,56,162},{133,39,35}}}   | cat
+ {{{146,185,108},{14,10,105}},{{188,210,86},{83,61,36}}}      | dog
+ {{{223,169,177},{3,200,250}},{{112,91,16},{193,32,151}}}     | cat
+ {{{249,145,240},{144,153,58}},{{131,156,230},{56,50,75}}}    | dog
+ {{{212,186,229},{52,251,197}},{{230,121,201},{35,215,119}}}  | cat
+ {{{234,94,23},{114,196,94}},{{242,249,90},{223,24,109}}}     | bird
+ {{{111,36,145},{77,135,123}},{{171,158,237},{111,252,222}}}  | dog
+ {{{90,74,240},{231,133,95}},{{11,21,173},{146,144,88}}}      | cat
+ {{{170,52,237},{13,114,71}},{{87,99,46},{220,194,56}}}       | bird
+ {{{8,17,92},{64,2,203}},{{10,131,145},{4,129,30}}}           | cat
+ {{{217,218,207},{74,68,186}},{{127,107,76},{38,60,16}}}      | bird
+ {{{193,34,83},{203,99,58}},{{251,224,50},{228,118,113}}}     | dog
+ {{{146,218,155},{32,159,243}},{{146,218,189},{101,114,25}}}  | bird
+ {{{179,160,74},{204,81,246}},{{50,189,39},{60,42,185}}}      | cat
+ {{{13,82,174},{198,151,84}},{{65,249,100},{179,234,104}}}    | cat
+ {{{162,190,124},{184,66,138}},{{10,240,80},{161,68,145}}}    | dog
+ {{{164,144,199},{53,42,111}},{{122,174,128},{220,143,100}}}  | cat
+ {{{160,138,104},{177,86,3}},{{104,226,149},{181,16,229}}}    | dog
+ {{{246,119,211},{229,249,119}},{{117,192,172},{159,47,38}}}  | cat
+ {{{175,1,220},{18,78,124}},{{156,181,45},{242,185,148}}}     | bird
+ {{{50,113,246},{101,213,180}},{{56,103,151},{87,169,124}}}   | cat
+ {{{73,109,147},{22,81,197}},{{135,71,42},{91,251,98}}}       | bird
+ {{{206,61,255},{25,151,211}},{{211,124,7},{206,64,237}}}     | cat
+ {{{201,71,34},{182,142,43}},{{198,172,171},{230,1,23}}}      | bird
+ {{{142,158,2},{223,45,205}},{{118,177,223},{232,178,141}}}   | cat
+ {{{86,190,128},{195,172,14}},{{97,173,237},{142,123,99}}}    | cat
+ {{{26,72,148},{79,226,156}},{{96,62,220},{99,9,230}}}        | bird
+ {{{154,234,103},{184,18,65}},{{146,225,139},{214,156,10}}}   | cat
+ {{{244,169,103},{218,143,2}},{{196,246,186},{214,55,76}}}    | bird
+ {{{20,226,7},{96,153,200}},{{130,236,147},{229,38,142}}}     | bird
+ {{{172,102,107},{50,11,109}},{{145,9,123},{193,28,107}}}     | bird
+ {{{143,243,247},{132,104,137}},{{94,3,169},{253,246,59}}}    | bird
+ {{{78,74,228},{51,200,218}},{{170,155,190},{164,18,51}}}     | dog
+ {{{163,226,161},{56,182,239}},{{129,154,35},{73,116,205}}}   | bird
+ {{{74,243,3},{172,182,149}},{{101,34,163},{111,138,95}}}     | cat
+ {{{224,178,126},{4,61,93}},{{174,238,96},{118,232,208}}}     | bird
+ {{{55,236,249},{7,189,242}},{{151,173,130},{49,232,5}}}      | bird
+ {{{9,16,30},{128,32,85}},{{108,25,91},{41,11,243}}}          | bird
+ {{{141,35,191},{146,240,141}},{{207,239,166},{102,194,121}}} | bird
 (52 rows)
 </pre>
 -#  Run the preprocessor for image data:
@@ -246,12 +254,15 @@ SELECT madlib.minibatch_preprocessor_dl('image_data',     
    -- Source table
 </pre>
 For small datasets like in this example, buffer size is mainly
 determined by the number of segments in the database.
-This example is run on a Greenplum database with 2 segments,
-so there are 2 rows with a buffer size of 26.
+This example is run on a Greenplum database with 3 segments,
+so there are 3 rows with a buffer size of 18 (in this case
+two segments will get 18 rows and one segment will get 16 rows).
 For PostgresSQL, there would be only one row with a buffer
 size of 52 since it is a single node database.
 For larger data sets, other factors go into
 computing buffers size besides number of segments.
+Note that dependent variable is a text type, and it is one-hot encoded
+after preprocessing.
 Here is a sample of the packed output table:
 <pre class="example">
 \\x on
@@ -259,13 +270,17 @@ SELECT * FROM image_data_packed ORDER BY buffer_id;
 </pre>
 <pre class="result">
 -[ RECORD 1 
]---+---------------------------------------------------------------------------------------------------------------------
-independent_var | 
{{{{0.607843,0.215686,0.698039},{0.529412,0.239216,0.498039}},{{0.780392,0.788235,0.498039},{0.572549,0.827451,0}}},...}
-dependent_var   | 
{bird,dog,dog,cat,bird,dog,bird,dog,cat,cat,bird,dog,dog,cat,bird,dog,bird,dog,bird,bird,dog,bird,dog,dog,bird,cat}
+independent_var | 
{{{{0.921569,0.207843,0.152941},{0.568627,0.654902,0.819608}},{{0.772549,0.576471,0.870588},{0.215686,0.854902,0.207843}}},...}
+dependent_var   | {{0,0,1},{0,0,1},{1,0,0},{0,1,0},...}
 buffer_id       | 0
 -[ RECORD 2 
]---+---------------------------------------------------------------------------------------------------------------------
-independent_var | 
{{{{0.184314,0.380392,0.556863},{0.133333,0.764706,0.6}},{{0.470588,0.85098,0.32549},{0.666667,0.196078,0.129412}}},...}
-dependent_var   | 
{bird,bird,bird,cat,dog,bird,cat,cat,bird,dog,dog,cat,dog,bird,cat,dog,bird,bird,dog,dog,dog,bird,dog,bird,bird,cat}
+independent_var | 
{{{{0.639216,0.886275,0.631373},{0.219608,0.713726,0.937255}},{{0.505882,0.603922,0.137255},{0.286275,0.454902,0.803922}}},...}
+dependent_var   | {{1,0,0},{0,1,0},{1,0,0},{0,0,1},...}
 buffer_id       | 1
+-[ RECORD 3 
]---+---------------------------------------------------------------------------------------------------------------------
+independent_var | 
{{{{0.635294,0.745098,0.486275},{0.721569,0.258824,0.541176}},{{0.0392157,0.941177,0.313726},{0.631373,0.266667,0.568627}}},...}
+dependent_var   | {{0,0,1},{0,0,1},{0,1,0},{1,0,0},...}
+buffer_id       | 2
 </pre>
 Review the output summary table:
 <pre class="example">
@@ -279,7 +294,8 @@ output_table        | image_data_packed
 dependent_varname   | species
 independent_varname | rgb
 dependent_vartype   | text
-buffer_size         | 26
+class_values        | {bird,cat,dog}
+buffer_size         | 18
 </pre>
 
 -#  Load data in another format.  Create an artificial 2x2 resolution color 
image
@@ -308,58 +324,58 @@ SELECT * FROM image_data;
 <pre class="result">
                        rgb                        | species
 --------------------------------------------------+---------
- {177,194,185,175,43,16,205,92,164,130,204,100}   | cat
- {128,51,73,226,104,194,73,190,98,23,98,101}      | cat
- {237,14,81,38,116,2,232,45,177,19,61,126}        | dog
- {225,0,59,69,29,187,120,102,157,224,40,230}      | dog
- {138,253,256,239,51,237,253,132,19,113,134,251}  | dog
- {55,14,219,182,125,189,182,184,2,211,115,122}    | cat
- {16,90,96,246,248,234,243,248,217,39,229,215}    | dog
- {247,72,49,242,230,104,256,193,30,125,126,212}   | cat
- {128,167,168,250,223,184,84,63,174,76,42,161}    | cat
- {3,200,41,218,114,32,33,163,18,8,11,18}          | bird
- {41,143,71,253,196,200,163,108,194,130,35,22}    | bird
- {209,98,235,114,167,238,58,207,200,172,240,233}  | cat
- {2,241,90,21,186,130,164,1,127,104,201,34}       | bird
- {139,164,247,161,102,200,3,82,58,170,64,115}     | dog
- {8,31,105,241,110,107,226,200,128,156,74,36}     | dog
- {200,140,103,234,95,241,143,86,146,245,30,149}   | cat
- {87,63,134,203,185,142,234,34,127,88,141,97}     | cat
- {13,254,105,49,155,50,189,2,28,28,243,171}       | dog
- {133,160,144,26,231,231,90,109,178,19,252,156}   | cat
- {123,244,194,221,20,208,218,125,1,117,175,190}   | dog
- {204,218,106,119,75,239,23,219,9,254,194,99}     | dog
- {116,118,103,16,171,226,5,109,191,25,61,153}     | dog
- {62,14,69,252,133,17,213,239,136,32,222,159}     | bird
- {231,157,189,74,9,49,192,112,66,107,83,70}       | bird
- {18,95,22,171,245,84,185,58,79,62,75,36}         | cat
- {211,69,11,114,64,242,15,254,60,24,47,252}       | dog
- {31,171,230,197,167,228,220,138,123,199,79,75}   | cat
- {83,238,149,1,213,101,150,168,179,155,255,49}    | dog
- {123,157,77,177,69,108,92,43,49,3,15,13}         | dog
- {138,212,221,212,219,48,194,113,49,152,213,199}  | cat
- {136,98,62,185,256,185,86,77,106,155,185,198}    | bird
- {235,202,213,247,87,95,203,52,51,167,100,245}    | cat
- {149,141,236,93,204,117,191,10,46,190,195,133}   | cat
- {45,32,196,244,230,175,189,187,166,21,26,113}    | cat
- {77,24,173,66,47,66,207,28,159,155,145,94}       | dog
- {191,28,104,67,39,150,99,235,137,73,154,71}      | cat
- {65,92,30,178,164,107,202,81,173,249,147,124}    | cat
- {50,23,166,144,188,101,172,36,168,211,186,11}    | bird
- {67,84,88,138,88,153,229,117,75,138,224,21}      | bird
- {140,14,110,8,36,160,31,201,48,219,46,219}       | bird
- {214,174,184,224,108,251,52,196,133,139,93,106}  | cat
- {168,244,225,189,207,109,203,61,117,239,221,148} | bird
- {13,111,230,232,109,188,150,37,157,2,32,209}     | bird
- {165,92,36,15,93,203,4,61,136,210,171,84}        | cat
- {32,66,236,180,251,249,34,225,225,144,157,119}   | bird
- {58,121,213,11,64,122,103,99,137,195,47,141}     | cat
- {183,95,171,11,111,203,77,91,127,72,83,162}      | cat
- {52,49,199,172,230,1,37,187,11,101,52,114}       | bird
- {190,53,247,75,54,174,170,226,185,24,173,6}      | dog
- {44,78,198,206,119,251,255,62,166,229,63,204}    | dog
- {75,49,212,189,249,146,242,240,221,40,159,134}   | cat
- {88,159,183,94,18,228,172,216,178,35,211,177}    | dog
+ {26,150,191,113,235,57,145,143,44,145,85,25}     | dog
+ {240,43,225,15,220,136,186,209,49,130,55,111}    | bird
+ {25,191,37,77,193,62,249,228,97,33,81,7}         | cat
+ {141,223,46,195,201,19,207,78,160,130,157,89}    | cat
+ {39,249,168,164,223,193,99,4,14,37,66,7}         | cat
+ {159,250,127,44,151,254,11,211,247,137,79,233}   | cat
+ {19,230,76,253,42,175,230,143,184,133,27,215}    | cat
+ {199,224,144,5,64,19,200,186,109,218,108,70}     | bird
+ {148,136,4,41,185,104,203,253,113,151,166,76}    | bird
+ {230,132,114,213,210,139,91,199,240,142,203,75}  | bird
+ {166,188,96,217,135,70,93,249,27,47,132,118}     | bird
+ {118,120,222,236,110,83,240,47,19,206,222,51}    | bird
+ {230,3,26,47,93,144,167,59,123,21,142,107}       | cat
+ {250,224,62,136,112,142,88,187,24,1,168,216}     | bird
+ {52,144,231,12,76,1,162,11,114,141,69,3}         | cat
+ {166,172,246,169,200,102,62,57,239,75,165,88}    | dog
+ {151,50,112,227,199,97,47,4,43,123,116,133}      | bird
+ {39,185,96,127,80,248,177,191,218,120,32,9}      | dog
+ {25,172,34,34,40,109,166,23,60,216,246,54}       | bird
+ {163,39,89,170,95,230,137,141,169,82,159,121}    | dog
+ {131,143,183,138,151,90,177,240,4,16,214,141}    | dog
+ {99,233,100,9,159,140,30,202,29,169,120,62}      | bird
+ {99,162,69,10,204,169,219,20,106,170,111,16}     | bird
+ {16,246,27,32,187,226,0,75,231,64,94,175}        | bird
+ {25,135,244,101,50,4,91,77,36,22,47,37}          | dog
+ {22,101,191,197,96,138,78,198,155,138,193,51}    | bird
+ {236,22,110,30,181,20,218,21,236,97,91,73}       | dog
+ {160,57,34,212,239,197,233,174,164,97,88,153}    | cat
+ {226,170,192,123,242,224,190,51,163,192,91,105}  | bird
+ {149,174,12,72,112,1,37,153,118,201,79,121}      | bird
+ {34,250,232,222,218,221,234,201,138,66,186,58}   | bird
+ {162,55,85,159,247,234,77,3,50,189,4,87}         | dog
+ {122,32,164,243,0,198,237,232,164,199,197,142}   | dog
+ {80,209,75,138,169,236,193,254,140,184,232,217}  | bird
+ {112,148,114,137,13,107,105,75,243,218,218,75}   | dog
+ {241,76,61,202,76,112,90,51,125,166,52,30}       | bird
+ {75,132,239,207,49,224,250,19,238,214,154,169}   | dog
+ {203,43,222,58,231,5,243,71,131,67,63,52}        | cat
+ {229,12,133,142,179,80,185,145,138,160,149,125}  | bird
+ {64,251,61,153,13,100,145,181,8,112,118,107}     | dog
+ {128,223,60,248,126,124,243,188,20,0,31,166}     | bird
+ {39,22,43,146,138,174,33,65,56,184,155,234}      | dog
+ {177,247,133,154,159,37,148,30,81,43,29,92}      | bird
+ {56,127,199,118,105,120,109,239,18,12,20,166}    | cat
+ {101,209,72,193,207,91,166,27,88,209,203,62}     | dog
+ {131,195,122,90,18,178,217,217,40,66,81,149}     | cat
+ {203,137,103,17,60,251,152,64,36,81,168,239}     | cat
+ {239,97,10,20,194,32,121,129,228,217,11,50}      | dog
+ {117,4,193,192,223,176,33,232,196,226,8,61}      | dog
+ {162,21,190,223,120,170,245,230,200,170,250,163} | bird
+ {32,67,65,195,2,39,198,28,86,35,172,254}         | dog
+ {39,19,236,146,87,140,203,121,96,187,62,73}      | dog
 (52 rows)
 </pre>
 
@@ -380,14 +396,18 @@ Here is a sample of the packed output table:
 SELECT * FROM image_data_packed ORDER BY buffer_id;
 </pre>
 <pre class="result">
--[ RECORD 1 ]-------+-------------------------------------
-independent_var | 
{{0.501961,0.2,0.286275,0.886275,0.407843,0.760784,0.286275,0.745098,0.384314,0.0901961,0.384314,0.396078},...}}
-dependent_var   | 
{cat,dog,cat,cat,bird,cat,dog,cat,dog,dog,dog,bird,dog,dog,cat,cat,cat,cat,bird,bird,bird,cat,cat,bird,dog,dog}
+-[ RECORD 1 
]---+---------------------------------------------------------------------------------------------------------------------
+independent_var | 
{{0.203922,0.564706,0.905882,0.0470588,0.298039,0.00392157,0.635294,0.0431373,0.447059,0.552941,0.270588,0.0117647},...}
+dependent_var   | {{0,1,0},{1,0,0},{1,0,0},{1,0,0},{0,0,1},...}
 buffer_id       | 0
--[ RECORD 2 ]-------+-------------------------------------
-independent_var | 
{{0.694118,0.760784,0.72549,0.686275,0.168627,0.0627451,0.803922,0.360784,0.643137,0.509804,0.8,0.392157},...}}
-dependent_var   | 
{cat,dog,dog,dog,cat,bird,bird,dog,cat,cat,dog,bird,cat,cat,dog,bird,cat,dog,cat,bird,cat,bird,bird,cat,dog,cat}
+-[ RECORD 2 
]---+---------------------------------------------------------------------------------------------------------------------
+independent_var | 
{{0.25098,0.984314,0.239216,0.6,0.0509804,0.392157,0.568627,0.709804,0.0313726,0.439216,0.462745,0.419608},...}
+dependent_var   | {{0,0,1},{0,0,1},{0,1,0},{0,0,1},{1,0,0},...}
 buffer_id       | 1
+-[ RECORD 3 
]---+---------------------------------------------------------------------------------------------------------------------
+independent_var | 
{{0.796079,0.537255,0.403922,0.0666667,0.235294,0.984314,0.596078,0.25098,0.141176,0.317647,0.658824,0.937255},...}
+dependent_var   | {{0,1,0},{0,1,0},{0,1,0},{0,0,1},{0,0,1},...}
+buffer_id       | 2
 </pre>
 
 -# Generally the default buffer size will work well,
@@ -420,72 +440,10 @@ output_table        | image_data_packed
 dependent_varname   | species
 independent_varname | rgb
 dependent_vartype   | text
+class_values        | {bird,cat,dog}
 buffer_size         | 10
 </pre>
 
--# Shift integer dependent variable.  Let's say our input data has class levels
-starting from 1, but we want them to start from 0 after preprocesssing.
-Create a new image data set with class values as 1, 2 or 3:
-<pre class="example">
-DROP TABLE IF EXISTS image_data_2;
-CREATE TABLE image_data_2 AS (
-SELECT ARRAY[
-        (random() * 256)::integer, -- R values
-        (random() * 256)::integer,
-        (random() * 256)::integer,
-        (random() * 256)::integer,
-        (random() * 256)::integer, -- G values
-        (random() * 256)::integer,
-        (random() * 256)::integer,
-        (random() * 256)::integer,
-        (random() * 256)::integer, -- B values
-        (random() * 256)::integer,
-        (random() * 256)::integer,
-        (random() * 256)::integer
-    ] as rgb, ('{1,2,3}'::integer[])[ceil(random()*3)] as species
-FROM generate_series(1, 52)
-);
-SELECT * FROM image_data_2 LIMIT 10;
-</pre>
-<pre class="result">
-                      rgb                       | species
-------------------------------------------------+---------
- {103,220,183,64,237,22,86,152,10,170,81,68}    |       3
- {144,143,30,189,98,99,200,212,163,185,227,161} |       3
- {252,208,241,132,65,89,96,247,153,77,13,239}   |       3
- {23,153,55,92,146,199,235,176,131,77,19,75}    |       1
- {182,5,4,86,250,0,38,235,132,103,67,228}       |       2
- {220,50,107,203,23,131,100,78,222,246,21,201}  |       2
- {152,22,185,227,55,110,232,59,197,226,59,235}  |       3
- {192,82,16,164,176,236,214,27,183,237,158,27}  |       1
- {124,17,80,69,183,233,91,111,204,146,221,180}  |       3
- {162,150,8,141,98,200,222,114,108,142,95,66}   |       2
-...
-</pre>
-Now run the preprocessor with a dependent variable offset of -1:
-<pre class="example">
-DROP TABLE IF EXISTS image_data_packed_2, image_data_packed_2_summary;
-SELECT madlib.minibatch_preprocessor_dl('image_data_2',       -- Source table
-                                        'image_data_packed_2', -- Output table
-                                        'species',            -- Dependent 
variable
-                                        'rgb',                -- Independent 
variable
-                                        NULL,                 -- Buffer size
-                                        255,                  -- Normalizing 
constant
-                                        -1                    -- Dependent 
variable offset
-                                        );
-SELECT * FROM image_data_packed_2 ORDER BY buffer_id;
-</pre>
-<pre class="result">
--[ RECORD 1 ]-------+-------------------------------------
-independent_var | 
{{0.564706,0.560784,0.117647,0.741176,0.384314,0.388235,0.784314,0.831373,0.639216,0.72549,0.890196,0.631373},...}}
-dependent_var   | {2,0,1,0,1,1,1,0,1,0,2,0,1,0,1,0,2,0,2,0,2,1,0,1,0,0}
-buffer_id       | 0
--[ RECORD 2 ]-------+-------------------------------------
-independent_var | 
{{0.403922,0.862745,0.717647,0.25098,0.929412,0.0862745,0.337255,0.596078,0.0392157,0.666667,0.317647,0.266667},...}}
-dependent_var   | {2,2,1,2,2,1,1,2,0,0,0,1,1,1,0,2,2,1,2,1,1,0,2,1,2,2}
-buffer_id       | 1
-</pre>
-
 @anchor related
 @par Related Topics
 

Reply via email to