This is an automated email from the ASF dual-hosted git repository.

njayaram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit b9453b4c8ab04a47ca5e25f341e7682a82d5bf8e
Author: Nandish Jayaram <[email protected]>
AuthorDate: Tue May 14 15:10:00 2019 -0700

    DL: Change how we create tables in dev-check
    
    Due to the issues mentioned in MADLIB-1326, running
    training_preprocessor_dl in deep learning dev-check results in flaky
    errors. This commit manually creates one such table.
    
    Co-authored-by: Jingyi Mei <[email protected]>
---
 .../modules/deep_learning/test/madlib_keras.sql_in | 71 ++++++++++++++--------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in 
b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index 5ed3014..b5aaa6d 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -19,10 +19,10 @@
  *
  *//* ---------------------------------------------------------------------*/
 drop table if exists cifar_10_sample;
-create table cifar_10_sample(id INTEGER, y SMALLINT, imgpath TEXT, x  REAL[]);
+create table cifar_10_sample(id INTEGER, y SMALLINT, y_text TEXT, imgpath 
TEXT, x  REAL[]);
 copy cifar_10_sample from stdin delimiter '|';
-1|0|'0/img0.jpg'|{{{202,204,199},{202,204,199},{204,206,201},{206,208,203},{208,210,205},{209,211,206},{210,212,207},{212,214,210},{213,215,212},{215,217,214},{216,218,215},{216,218,215},{215,217,214},{216,218,215},{216,218,215},{216,218,214},{217,219,214},{217,219,214},{218,220,215},{218,219,214},{216,217,212},{217,218,213},{218,219,214},{214,215,209},{213,214,207},{212,213,206},{211,212,205},{209,210,203},{208,209,202},{207,208,200},{205,206,199},{203,204,198}},{{206,208,203},{206,208,
 [...]
-2|1|'0/img2.jpg'|{{{126,118,110},{122,115,108},{126,119,111},{127,119,109},{130,122,111},{130,122,111},{132,124,113},{133,125,114},{130,122,111},{132,124,113},{134,126,115},{131,123,112},{131,123,112},{134,126,115},{133,125,114},{136,128,117},{137,129,118},{137,129,118},{136,128,117},{131,123,112},{130,122,111},{132,124,113},{132,124,113},{132,124,113},{129,122,110},{127,121,109},{127,121,109},{125,119,107},{124,118,106},{124,118,106},{120,114,102},{117,111,99}},{{122,115,107},{119,112,1
 [...]
+1|0|'cat'|'0/img0.jpg'|{{{202,204,199},{202,204,199},{204,206,201},{206,208,203},{208,210,205},{209,211,206},{210,212,207},{212,214,210},{213,215,212},{215,217,214},{216,218,215},{216,218,215},{215,217,214},{216,218,215},{216,218,215},{216,218,214},{217,219,214},{217,219,214},{218,220,215},{218,219,214},{216,217,212},{217,218,213},{218,219,214},{214,215,209},{213,214,207},{212,213,206},{211,212,205},{209,210,203},{208,209,202},{207,208,200},{205,206,199},{203,204,198}},{{206,208,203},{20
 [...]
+2|1|'dog'|'0/img2.jpg'|{{{126,118,110},{122,115,108},{126,119,111},{127,119,109},{130,122,111},{130,122,111},{132,124,113},{133,125,114},{130,122,111},{132,124,113},{134,126,115},{131,123,112},{131,123,112},{134,126,115},{133,125,114},{136,128,117},{137,129,118},{137,129,118},{136,128,117},{131,123,112},{130,122,111},{132,124,113},{132,124,113},{132,124,113},{129,122,110},{127,121,109},{127,121,109},{125,119,107},{124,118,106},{124,118,106},{120,114,102},{117,111,99}},{{122,115,107},{119
 [...]
 \.
 
 drop table if exists cifar_10_sample_val;
@@ -43,12 +43,13 @@ copy cifar_10_sample_val from stdin delimiter '|';
 
 DROP TABLE IF EXISTS cifar_10_sample_batched;
 CREATE TABLE cifar_10_sample_batched(
-    independent_var real[],
+    buffer_id smallint,
     dependent_var integer[],
-    buffer_id smallint);
+    dependent_var_text_with_null integer[],
+    independent_var real[]);
 copy cifar_10_sample_batched from stdin delimiter '|';
-{{{{0.494118,0.462745,0.431373},{0.478431,0.45098,0.423529},{0.494118,0.466667,0.435294},{0.498039,0.466667,0.427451},{0.509804,0.478431,0.435294},{0.509804,0.478431,0.435294},{0.517647,0.486275,0.443137},{0.521569,0.490196,0.447059},{0.509804,0.478431,0.435294},{0.517647,0.486275,0.443137},{0.52549,0.494118,0.45098},{0.513726,0.482353,0.439216},{0.513726,0.482353,0.439216},{0.52549,0.494118,0.45098},{0.521569,0.490196,0.447059},{0.533333,0.501961,0.458824},{0.537255,0.505882,0.462745},{
 [...]
-{{{{0.792157,0.8,0.780392},{0.792157,0.8,0.780392},{0.8,0.807843,0.788235},{0.807843,0.815686,0.796079},{0.815686,0.823529,0.803922},{0.819608,0.827451,0.807843},{0.823529,0.831373,0.811765},{0.831373,0.839216,0.823529},{0.835294,0.843137,0.831373},{0.843137,0.85098,0.839216},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.843137},{0.843137,0.85098,0.839216},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.839216},{0.85098,0.858824,0.839216},{0.85098,0.858
 [...]
+0|{{0,1}}|{{0,0,1,0,0}}|{{{{0.494118,0.462745,0.431373},{0.478431,0.45098,0.423529},{0.494118,0.466667,0.435294},{0.498039,0.466667,0.427451},{0.509804,0.478431,0.435294},{0.509804,0.478431,0.435294},{0.517647,0.486275,0.443137},{0.521569,0.490196,0.447059},{0.509804,0.478431,0.435294},{0.517647,0.486275,0.443137},{0.52549,0.494118,0.45098},{0.513726,0.482353,0.439216},{0.513726,0.482353,0.439216},{0.52549,0.494118,0.45098},{0.521569,0.490196,0.447059},{0.533333,0.501961,0.458824},{0.537
 [...]
+1|{{1,0}}|{{0,1,0,0,0}}|{{{{0.792157,0.8,0.780392},{0.792157,0.8,0.780392},{0.8,0.807843,0.788235},{0.807843,0.815686,0.796079},{0.815686,0.823529,0.803922},{0.819608,0.827451,0.807843},{0.823529,0.831373,0.811765},{0.831373,0.839216,0.823529},{0.835294,0.843137,0.831373},{0.843137,0.85098,0.839216},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.843137},{0.843137,0.85098,0.839216},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.839216},{0.85098,0.858824,
 [...]
 \.
 
 DROP TABLE IF EXISTS cifar_10_sample_batched_summary;
@@ -346,7 +347,6 @@ SELECT madlib_keras_fit(
     1,
     $$ optimizer=Adam(epsilon=None), loss='categorical_crossentropy', 
metrics=['accuracy']$$::text,
     $$ batch_size=2, epochs=1, verbose=0 $$::text,
-
     1,
     0,
     NULL,
@@ -378,15 +378,17 @@ SELECT madlib_keras_fit(
     1,
     NULL,
     NULL,
+    NULL,
     'model name', 'model desc');
 
 -- -- negative test case for passing non numeric y to fit
 -- induce failure by passing a non numeric column
-create table cifar_10_sample_val_failure as select * from cifar_10_sample_val;
-alter table cifar_10_sample_val_failure rename dependent_var to 
dependent_var_original;
-alter table cifar_10_sample_val_failure rename buffer_id to dependent_var;
+DROP TABLE IF EXISTS cifar_10_sample_val_failure;
+CREATE TABLE cifar_10_sample_val_failure AS SELECT * FROM cifar_10_sample_val;
+ALTER TABLE cifar_10_sample_val_failure rename dependent_var to 
dependent_var_original;
+ALTER TABLE cifar_10_sample_val_failure rename buffer_id to dependent_var;
 DROP TABLE IF EXISTS keras_out, keras_out_summary;
-select assert(trap_error($TRAP$madlib_keras_fit(
+SELECT assert(trap_error($TRAP$madlib_keras_fit(
            'cifar_10_sample_batched',
            'keras_out',
            'model_arch',
@@ -425,21 +427,36 @@ WHERE attrelid='cifar10_predict'::regclass AND attnum>0;
 
 -- Tests with text class values:
 -- Modify input data to have text classes, and mini-batch it.
-CREATE TABLE cifar_10_sample_text AS
-SELECT * FROM cifar_10_sample;
-
-ALTER TABLE cifar_10_sample_text ALTER COLUMN y type TEXT;
-UPDATE cifar_10_sample_text SET y='cat' where y='0';
-UPDATE cifar_10_sample_text SET y='dog' where y='1';
--- Add a new image with NULL class value
-INSERT INTO cifar_10_sample_text(id, x, y, imgpath)
-SELECT 3, x, NULL, '0/img3.jpg' FROM cifar_10_sample_text
-WHERE y='cat';
-
-
 DROP TABLE IF EXISTS cifar_10_sample_text_batched;
+-- Create a new table using the text based column for dep var.
+CREATE TABLE cifar_10_sample_text_batched AS
+    SELECT buffer_id, independent_var, dependent_var_text_with_null AS 
dependent_var
+    FROM cifar_10_sample_batched;
+-- Insert a new row with NULL as the dependent var (one-hot encoded)
+INSERT INTO cifar_10_sample_text_batched(buffer_id, independent_var, 
dependent_var)
+    SELECT 2, independent_var, ARRAY[[0,1,0,0,0]]
+    FROM cifar_10_sample_batched
+    WHERE cifar_10_sample_batched.buffer_id=0;
+-- Create the necessary summary table for the batched input.
 DROP TABLE IF EXISTS cifar_10_sample_text_batched_summary;
-SELECT 
training_preprocessor_dl('cifar_10_sample_text','cifar_10_sample_text_batched','y','x',
 2, 255, 5);
+CREATE TABLE cifar_10_sample_text_batched_summary(
+    source_table text,
+    output_table text,
+    dependent_varname text,
+    independent_varname text,
+    dependent_vartype text,
+    class_values text[],
+    buffer_size integer,
+    normalizing_const numeric);
+INSERT INTO cifar_10_sample_text_batched_summary values (
+    'cifar_10_sample',
+    'cifar_10_sample_text_batched',
+    'y_text',
+    'x',
+    'text',
+    ARRAY[NULL,'cat','dog',NULL,NULL],
+    1,
+    255.0);
 
 -- Change model_arch to reflect 5 num_classes
 DROP TABLE IF EXISTS model_arch;
@@ -481,6 +498,10 @@ SELECT assert(
 FROM (SELECT * FROM keras_saved_out_summary) summary;
 
 -- Predict with pred_type=prob
+DROP TABLE IF EXISTS cifar_10_sample_text;
+CREATE TABLE cifar_10_sample_text AS
+    SELECT id, x, y_text
+    FROM cifar_10_sample;
 DROP TABLE IF EXISTS cifar10_predict;
 SELECT madlib_keras_predict(
     'keras_saved_out',

Reply via email to