This is an automated email from the ASF dual-hosted git repository. njayaram pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit b9453b4c8ab04a47ca5e25f341e7682a82d5bf8e Author: Nandish Jayaram <[email protected]> AuthorDate: Tue May 14 15:10:00 2019 -0700 DL: Change how we create tables in dev-check Due to the issues mentioned in MADLIB-1326, running training_preprocessor_dl in deep learning dev-check results in flaky errors. This commit manually creates one such table. Co-authored-by: Jingyi Mei <[email protected]> --- .../modules/deep_learning/test/madlib_keras.sql_in | 71 ++++++++++++++-------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in index 5ed3014..b5aaa6d 100644 --- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in +++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in @@ -19,10 +19,10 @@ * *//* ---------------------------------------------------------------------*/ drop table if exists cifar_10_sample; -create table cifar_10_sample(id INTEGER, y SMALLINT, imgpath TEXT, x REAL[]); +create table cifar_10_sample(id INTEGER, y SMALLINT, y_text TEXT, imgpath TEXT, x REAL[]); copy cifar_10_sample from stdin delimiter '|'; -1|0|'0/img0.jpg'|{{{202,204,199},{202,204,199},{204,206,201},{206,208,203},{208,210,205},{209,211,206},{210,212,207},{212,214,210},{213,215,212},{215,217,214},{216,218,215},{216,218,215},{215,217,214},{216,218,215},{216,218,215},{216,218,214},{217,219,214},{217,219,214},{218,220,215},{218,219,214},{216,217,212},{217,218,213},{218,219,214},{214,215,209},{213,214,207},{212,213,206},{211,212,205},{209,210,203},{208,209,202},{207,208,200},{205,206,199},{203,204,198}},{{206,208,203},{206,208, [...] -2|1|'0/img2.jpg'|{{{126,118,110},{122,115,108},{126,119,111},{127,119,109},{130,122,111},{130,122,111},{132,124,113},{133,125,114},{130,122,111},{132,124,113},{134,126,115},{131,123,112},{131,123,112},{134,126,115},{133,125,114},{136,128,117},{137,129,118},{137,129,118},{136,128,117},{131,123,112},{130,122,111},{132,124,113},{132,124,113},{132,124,113},{129,122,110},{127,121,109},{127,121,109},{125,119,107},{124,118,106},{124,118,106},{120,114,102},{117,111,99}},{{122,115,107},{119,112,1 [...] +1|0|'cat'|'0/img0.jpg'|{{{202,204,199},{202,204,199},{204,206,201},{206,208,203},{208,210,205},{209,211,206},{210,212,207},{212,214,210},{213,215,212},{215,217,214},{216,218,215},{216,218,215},{215,217,214},{216,218,215},{216,218,215},{216,218,214},{217,219,214},{217,219,214},{218,220,215},{218,219,214},{216,217,212},{217,218,213},{218,219,214},{214,215,209},{213,214,207},{212,213,206},{211,212,205},{209,210,203},{208,209,202},{207,208,200},{205,206,199},{203,204,198}},{{206,208,203},{20 [...] +2|1|'dog'|'0/img2.jpg'|{{{126,118,110},{122,115,108},{126,119,111},{127,119,109},{130,122,111},{130,122,111},{132,124,113},{133,125,114},{130,122,111},{132,124,113},{134,126,115},{131,123,112},{131,123,112},{134,126,115},{133,125,114},{136,128,117},{137,129,118},{137,129,118},{136,128,117},{131,123,112},{130,122,111},{132,124,113},{132,124,113},{132,124,113},{129,122,110},{127,121,109},{127,121,109},{125,119,107},{124,118,106},{124,118,106},{120,114,102},{117,111,99}},{{122,115,107},{119 [...] \. drop table if exists cifar_10_sample_val; @@ -43,12 +43,13 @@ copy cifar_10_sample_val from stdin delimiter '|'; DROP TABLE IF EXISTS cifar_10_sample_batched; CREATE TABLE cifar_10_sample_batched( - independent_var real[], + buffer_id smallint, dependent_var integer[], - buffer_id smallint); + dependent_var_text_with_null integer[], + independent_var real[]); copy cifar_10_sample_batched from stdin delimiter '|'; -{{{{0.494118,0.462745,0.431373},{0.478431,0.45098,0.423529},{0.494118,0.466667,0.435294},{0.498039,0.466667,0.427451},{0.509804,0.478431,0.435294},{0.509804,0.478431,0.435294},{0.517647,0.486275,0.443137},{0.521569,0.490196,0.447059},{0.509804,0.478431,0.435294},{0.517647,0.486275,0.443137},{0.52549,0.494118,0.45098},{0.513726,0.482353,0.439216},{0.513726,0.482353,0.439216},{0.52549,0.494118,0.45098},{0.521569,0.490196,0.447059},{0.533333,0.501961,0.458824},{0.537255,0.505882,0.462745},{ [...] -{{{{0.792157,0.8,0.780392},{0.792157,0.8,0.780392},{0.8,0.807843,0.788235},{0.807843,0.815686,0.796079},{0.815686,0.823529,0.803922},{0.819608,0.827451,0.807843},{0.823529,0.831373,0.811765},{0.831373,0.839216,0.823529},{0.835294,0.843137,0.831373},{0.843137,0.85098,0.839216},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.843137},{0.843137,0.85098,0.839216},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.839216},{0.85098,0.858824,0.839216},{0.85098,0.858 [...] +0|{{0,1}}|{{0,0,1,0,0}}|{{{{0.494118,0.462745,0.431373},{0.478431,0.45098,0.423529},{0.494118,0.466667,0.435294},{0.498039,0.466667,0.427451},{0.509804,0.478431,0.435294},{0.509804,0.478431,0.435294},{0.517647,0.486275,0.443137},{0.521569,0.490196,0.447059},{0.509804,0.478431,0.435294},{0.517647,0.486275,0.443137},{0.52549,0.494118,0.45098},{0.513726,0.482353,0.439216},{0.513726,0.482353,0.439216},{0.52549,0.494118,0.45098},{0.521569,0.490196,0.447059},{0.533333,0.501961,0.458824},{0.537 [...] +1|{{1,0}}|{{0,1,0,0,0}}|{{{{0.792157,0.8,0.780392},{0.792157,0.8,0.780392},{0.8,0.807843,0.788235},{0.807843,0.815686,0.796079},{0.815686,0.823529,0.803922},{0.819608,0.827451,0.807843},{0.823529,0.831373,0.811765},{0.831373,0.839216,0.823529},{0.835294,0.843137,0.831373},{0.843137,0.85098,0.839216},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.843137},{0.843137,0.85098,0.839216},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.843137},{0.847059,0.854902,0.839216},{0.85098,0.858824, [...] \. DROP TABLE IF EXISTS cifar_10_sample_batched_summary; @@ -346,7 +347,6 @@ SELECT madlib_keras_fit( 1, $$ optimizer=Adam(epsilon=None), loss='categorical_crossentropy', metrics=['accuracy']$$::text, $$ batch_size=2, epochs=1, verbose=0 $$::text, - 1, 0, NULL, @@ -378,15 +378,17 @@ SELECT madlib_keras_fit( 1, NULL, NULL, + NULL, 'model name', 'model desc'); -- -- negative test case for passing non numeric y to fit -- induce failure by passing a non numeric column -create table cifar_10_sample_val_failure as select * from cifar_10_sample_val; -alter table cifar_10_sample_val_failure rename dependent_var to dependent_var_original; -alter table cifar_10_sample_val_failure rename buffer_id to dependent_var; +DROP TABLE IF EXISTS cifar_10_sample_val_failure; +CREATE TABLE cifar_10_sample_val_failure AS SELECT * FROM cifar_10_sample_val; +ALTER TABLE cifar_10_sample_val_failure rename dependent_var to dependent_var_original; +ALTER TABLE cifar_10_sample_val_failure rename buffer_id to dependent_var; DROP TABLE IF EXISTS keras_out, keras_out_summary; -select assert(trap_error($TRAP$madlib_keras_fit( +SELECT assert(trap_error($TRAP$madlib_keras_fit( 'cifar_10_sample_batched', 'keras_out', 'model_arch', @@ -425,21 +427,36 @@ WHERE attrelid='cifar10_predict'::regclass AND attnum>0; -- Tests with text class values: -- Modify input data to have text classes, and mini-batch it. -CREATE TABLE cifar_10_sample_text AS -SELECT * FROM cifar_10_sample; - -ALTER TABLE cifar_10_sample_text ALTER COLUMN y type TEXT; -UPDATE cifar_10_sample_text SET y='cat' where y='0'; -UPDATE cifar_10_sample_text SET y='dog' where y='1'; --- Add a new image with NULL class value -INSERT INTO cifar_10_sample_text(id, x, y, imgpath) -SELECT 3, x, NULL, '0/img3.jpg' FROM cifar_10_sample_text -WHERE y='cat'; - - DROP TABLE IF EXISTS cifar_10_sample_text_batched; +-- Create a new table using the text based column for dep var. +CREATE TABLE cifar_10_sample_text_batched AS + SELECT buffer_id, independent_var, dependent_var_text_with_null AS dependent_var + FROM cifar_10_sample_batched; +-- Insert a new row with NULL as the dependent var (one-hot encoded) +INSERT INTO cifar_10_sample_text_batched(buffer_id, independent_var, dependent_var) + SELECT 2, independent_var, ARRAY[[0,1,0,0,0]] + FROM cifar_10_sample_batched + WHERE cifar_10_sample_batched.buffer_id=0; +-- Create the necessary summary table for the batched input. DROP TABLE IF EXISTS cifar_10_sample_text_batched_summary; -SELECT training_preprocessor_dl('cifar_10_sample_text','cifar_10_sample_text_batched','y','x', 2, 255, 5); +CREATE TABLE cifar_10_sample_text_batched_summary( + source_table text, + output_table text, + dependent_varname text, + independent_varname text, + dependent_vartype text, + class_values text[], + buffer_size integer, + normalizing_const numeric); +INSERT INTO cifar_10_sample_text_batched_summary values ( + 'cifar_10_sample', + 'cifar_10_sample_text_batched', + 'y_text', + 'x', + 'text', + ARRAY[NULL,'cat','dog',NULL,NULL], + 1, + 255.0); -- Change model_arch to reflect 5 num_classes DROP TABLE IF EXISTS model_arch; @@ -481,6 +498,10 @@ SELECT assert( FROM (SELECT * FROM keras_saved_out_summary) summary; -- Predict with pred_type=prob +DROP TABLE IF EXISTS cifar_10_sample_text; +CREATE TABLE cifar_10_sample_text AS + SELECT id, x, y_text + FROM cifar_10_sample; DROP TABLE IF EXISTS cifar10_predict; SELECT madlib_keras_predict( 'keras_saved_out',
