Github user njayaram2 commented on a diff in the pull request:

    https://github.com/apache/madlib/pull/241#discussion_r175548591
  
    --- Diff: 
src/ports/postgres/modules/utilities/test/minibatch_preprocessing.sql_in ---
    @@ -0,0 +1,223 @@
    +/* ----------------------------------------------------------------------- 
*//**
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *   http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + *
    + *//* 
----------------------------------------------------------------------- */
    +DROP TABLE IF EXISTS minibatch_preprocessing_input;
    +CREATE TABLE minibatch_preprocessing_input(
    +    sex TEXT,
    +    id SERIAL NOT NULL,
    +    length DOUBLE PRECISION,
    +    diameter DOUBLE PRECISION,
    +    height DOUBLE PRECISION,
    +    whole DOUBLE PRECISION,
    +    shucked DOUBLE PRECISION,
    +    viscera DOUBLE PRECISION,
    +    shell DOUBLE PRECISION,
    +    rings INTEGER);
    +
    +-- Test empty input table
    +-- Error out if out table already exists
    +
    +INSERT INTO 
minibatch_preprocessing_input(id,sex,length,diameter,height,whole,shucked,viscera,shell,rings)
 VALUES
    +(1040,'F',0.66,0.475,0.18,1.3695,0.641,0.294,0.335,6),
    +(3160,'F',0.34,0.255,0.085,0.204,0.097,0.021,0.05,6),
    +(3984,'F',0.585,0.45,0.125,0.874,0.3545,0.2075,0.225,6),
    +(2551,'I',0.28,0.22,0.08,0.1315,0.066,0.024,0.03,5),
    +(1246,'I',0.385,0.28,0.09,0.228,0.1025,0.042,0.0655,5),
    +(519,'M',0.325,0.23,0.09,0.147,0.06,0.034,0.045,4),
    +(2382,'M',0.155,0.115,0.025,0.024,0.009,0.005,0.0075,5),
    +(698,'M',0.28,0.205,0.1,0.1165,0.0545,0.0285,0.03,5),
    +(2381,'M',0.175,0.135,0.04,0.0305,0.011,0.0075,0.01,5),
    +(516,'M',0.27,0.195,0.08,0.1,0.0385,0.0195,0.03,6);
    +
    +-- no of rows = 10, buffer_size = 4, so assert that count =  10/4 = 3
    +\set expected_row_count 3
    +DROP TABLE IF EXISTS minibatch_preprocessing_out, 
minibatch_preprocessing_out_standardization, 
minibatch_preprocessing_out_summary;
    +SELECT minibatch_preprocessor('minibatch_preprocessing_input', 
'minibatch_preprocessing_out',  'length>0.2',  
'ARRAY[diameter,height,whole,shucked,viscera,shell]', 4);
    +SELECT assert
    +        (
    +        row_count = :expected_row_count, 'Row count validation failed for 
minibatch_preprocessing_out.
    +        Expected:' || :expected_row_count || ' Actual: ' || row_count
    +        ) from (select count(*) as row_count from 
minibatch_preprocessing_out) s;
    +
    +\set expected_dep_row_count '\'' 2,4,4 '\''
    +\set expected_dep_col_count '\'' 2,2,2 '\''
    +\set expected_indep_row_count '\'' 2,4,4 '\''
    +\set expected_indep_col_count '\'' 6,6,6 '\''
    +
    +-- assert dimensions for both dependent and independent variable
    +SELECT assert
    +        (
    +        str_dep_row_count = :expected_dep_row_count, 'Dependent variable 
row count failed. Actual: ' || str_dep_row_count || ' Expected:' || 
:expected_dep_row_count
    +        ) from
    +        (
    +        select array_to_string(array_agg(row_count order by row_count 
asc), ',') as str_dep_row_count from (select array_upper(dependent_varname,1) 
as row_count from minibatch_preprocessing_out order by row_count asc) s
    +        ) s;
    +
    +SELECT assert
    +        (
    +        str_dep_col_count = :expected_dep_col_count, 'Dependent variable 
col count failed. Actual: ' || str_dep_col_count || ' Expected:' || 
:expected_dep_col_count
    +        ) from
    +        (
    +        select array_to_string(array_agg(col_count order by col_count 
asc), ',') as str_dep_col_count from (select array_upper(dependent_varname,2) 
as col_count from minibatch_preprocessing_out order by col_count asc) s
    +        ) s;
    +
    +SELECT assert
    +        (
    +        str_indep_row_count = :expected_indep_row_count, 'Independent 
variable row count failed. Actual: ' || str_indep_row_count || ' Expected:' || 
:expected_indep_row_count
    +        ) from
    +        (
    +        select array_to_string(array_agg(row_count order by row_count 
asc), ',') as str_indep_row_count from (select array_upper(independent_varname, 
1) as row_count from minibatch_preprocessing_out order by row_count asc) s
    +        ) s;
    +
    +SELECT assert
    +        (
    +        str_indep_col_count = :expected_indep_col_count, 'Independent 
variable col count failed. Actual: ' || str_indep_col_count || ' Expected:' || 
:expected_indep_col_count
    +        ) from
    +        (
    +        select array_to_string(array_agg(col_count order by col_count 
asc), ',') as str_indep_col_count from (select 
array_upper(independent_varname,2) as col_count from 
minibatch_preprocessing_out order by col_count asc) s
    +        ) s;
    +
    +SELECT assert
    +        (
    +        source_table        = 'minibatch_preprocessing_input' AND
    +        output_table        = 'minibatch_preprocessing_out' AND
    +        dependent_varname   = 'length>0.2' AND
    +        independent_varname = 
'ARRAY[diameter,height,whole,shucked,viscera,shell]' AND
    +        buffer_size         = 4 AND
    +        class_values        = '{f,t}' AND -- we sort the class values in 
python
    +        num_rows_processed  = 10 AND
    +        num_missing_rows_skipped    = 0 AND
    +        grouping_cols       is NULL,
    +        'Summary Validation failed. Expected:' || __to_char(summary)
    +        ) from (select * from minibatch_preprocessing_out_summary) summary;
    +
    +
    +-- Test null values in x and y
    +\set expected_row_count 1
    +DROP TABLE IF EXISTS minibatch_preprocessing_out, 
minibatch_preprocessing_out_standardization, 
minibatch_preprocessing_out_summary;
    +
    +TRUNCATE TABLE minibatch_preprocessing_input;
    +INSERT INTO 
minibatch_preprocessing_input(id,sex,length,diameter,height,whole,shucked,viscera,shell,rings)
 VALUES
    +(1040,'F',0.66,0.475,0.18,NULL,0.641,0.294,0.335,6),
    +(3160,'F',0.34,0.35,0.085,0.204,0.097,0.021,0.05,6),
    +(3984,NULL,0.585,0.45,0.25,0.874,0.3545,0.2075,0.225,5),
    +(861,'M',0.595,0.475,NULL,1.1405,0.547,0.231,0.271,6),
    +(932,NULL,0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195,6),
    +(698,'F',0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195,6),
    +(922,NULL,0.445,0.335,0.11,NULL,0.2025,0.1095,0.1195,6);
    +SELECT minibatch_preprocessor('minibatch_preprocessing_input', 
'minibatch_preprocessing_out', 'sex', 
'ARRAY[length,diameter,height,whole,shucked,viscera,shell]', 2);
    +SELECT assert
    +        (
    +        row_count = :expected_row_count, 'Row count validation failed for 
minibatch_preprocessing_out.
    +        Expected:' || :expected_row_count || ' Actual: ' || row_count
    +        ) from (select count(*) as row_count from 
minibatch_preprocessing_out) s;
    +SELECT assert
    +        (num_rows_processed = 2 AND num_missing_rows_skipped = 5,
    +         'Rows processed/skipped validation failed for 
minibatch_preprocessing_out_summary.
    +         Actual num_rows_processed:' || num_rows_processed || ', Actual 
num_missing_rows_skipped: ' || num_missing_rows_skipped
    +        ) from (select * from minibatch_preprocessing_out_summary) s;
    +
    +-- Test standardization
    +DROP TABLE IF EXISTS minibatch_preprocessing_input;
    +DROP TABLE IF EXISTS minibatch_preprocessing_out, 
minibatch_preprocessing_out_standardization, 
minibatch_preprocessing_out_summary;
    +CREATE TABLE minibatch_preprocessing_input(x1 INTEGER ,x2 INTEGER ,y TEXT);
    +INSERT INTO minibatch_preprocessing_input(x1,x2,y) VALUES
    +(2,10,'y1'),
    +(4,30,'y2');
    +SELECT minibatch_preprocessor('minibatch_preprocessing_input', 
'minibatch_preprocessing_out', 'y', 'ARRAY[x1,x2]', 2);
    +
    +-- since the order is not deterministic, we assert for all possible orders
    +\set expected_normalized_independent_var1 '\'' {{-1, -1},{1, 1}} '\''
    +\set expected_normalized_independent_var2 '\'' {{1, 1},{-1, -1}} '\''
    +
    +SELECT assert
    +(
    +    independent_varname = :expected_normalized_independent_var1 OR
    +    independent_varname = :expected_normalized_independent_var2,
    +    'Standardization check failed. Actual: ' || independent_varname
    +) from
    +(
    +    select __to_char(independent_varname) as independent_varname from 
minibatch_preprocessing_out
    +) s;
    +
    +
    +-- Test that the standardization table gets created.
    +\set expected_row_count 1
    +SELECT assert
    +(
    +  row_count = :expected_row_count, 'Row count validation failed for 
minibatch_preprocessing_out_standardization.
    +        Expected:' || :expected_row_count || ' Actual: ' || row_count
    +) from
    +(
    +  select count(*) as row_count from 
minibatch_preprocessing_out_standardization
    +) s;
    +
    +-- Test that the summary table gets created.
    +\set expected_row_count 1
    +SELECT assert
    +(
    +  row_count = :expected_row_count, 'Row count validation failed for 
minibatch_preprocessing_out_summary.
    +        Expected:' || :expected_row_count || ' Actual: ' || row_count
    +) from
    +(
    +  select count(*) as row_count from minibatch_preprocessing_out_summary
    +) s;
    +
    +-- Test for array values in indep column
    +DROP TABLE IF EXISTS minibatch_preprocessing_input;
    +DROP TABLE IF EXISTS minibatch_preprocessing_out, 
minibatch_preprocessing_out_standardization, 
minibatch_preprocessing_out_summary;
    +CREATE TABLE minibatch_preprocessing_input(
    +    id INTEGER,
    +    sex TEXT,
    +    attributes double precision[],
    +    rings INTEGER);
    +TRUNCATE TABLE minibatch_preprocessing_input;
    +INSERT INTO minibatch_preprocessing_input(id,sex,attributes) VALUES
    +(1040,'F',ARRAY[0.66,0.475,0.18,NULL,0.641,0.294,0.335]),
    +(3160,'F',ARRAY[0.34,0.35,0.085,0.204,0.097,0.021,0.05]),
    +(3984,NULL,ARRAY[0.585,0.45,0.25,0.874,0.3545,0.2075,0.225]),
    +(861,'M',ARRAY[0.595,0.475,NULL,1.1405,0.547,0.231,0.271]),
    +(932,NULL,ARRAY[0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195]),
    +(NULL,'F',ARRAY[0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195]),
    +(922,NULL,ARRAY[0.445,0.335,0.11,NULL,0.2025,0.1095,0.1195]);
    +SELECT minibatch_preprocessor('minibatch_preprocessing_input', 
'minibatch_preprocessing_out', 'sex', 'attributes', 1);
    +SELECT assert
    +        (
    +        row_count = 2, 'Row count validation failed for 
minibatch_preprocessing_out.
    +        Expected:' || 2 || ' Actual: ' || row_count
    +        ) from (select count(*) as row_count from 
minibatch_preprocessing_out) s;
    +
    +-- Test for array values in dep column
    +DROP TABLE IF EXISTS minibatch_preprocessing_out, 
minibatch_preprocessing_out_standardization, 
minibatch_preprocessing_out_summary;
    +SELECT minibatch_preprocessor('minibatch_preprocessing_input', 
'minibatch_preprocessing_out', 'attributes', 'ARRAY[id]', 1);
    +SELECT assert
    +        (
    +        row_count = 3, 'Row count validation failed array values in 
dependent variable.
    +        Expected:' || 3 || ' Actual: ' || row_count
    +        ) from (select count(*) as row_count from 
minibatch_preprocessing_out) s;
    +
    +-- Test for null buffer size
    +DROP TABLE IF EXISTS minibatch_preprocessing_out, 
minibatch_preprocessing_out_standardization, 
minibatch_preprocessing_out_summary;
    +SELECT minibatch_preprocessor('minibatch_preprocessing_input', 
'minibatch_preprocessing_out', 'attributes', 'ARRAY[id]');
    +SELECT assert
    +        (
    +        row_count = 3, 'Row count validation failed for null buffer size.
    +        Expected:' || 3 || ' Actual: ' || row_count
    +        ) from (select count(*) as row_count from 
minibatch_preprocessing_out) s;
    --- End diff --
    
    Please end this and other files with a new line.


---

Reply via email to