Repository: madlib Updated Branches: refs/heads/master c73cf8507 -> f28c5a07e
K-NN: Fix overloaded functions for defaults Some of the overloaded functions (meant for default arguments) were incorrectly defined. This commit fixes that and adds test for those function calls in the install-check. Project: http://git-wip-us.apache.org/repos/asf/madlib/repo Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/f28c5a07 Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/f28c5a07 Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/f28c5a07 Branch: refs/heads/master Commit: f28c5a07e4bf6178330df936d0338de1e0d20a49 Parents: c73cf85 Author: Rahul Iyer <[email protected]> Authored: Tue Dec 5 00:17:21 2017 -0800 Committer: Rahul Iyer <[email protected]> Committed: Tue Dec 5 00:17:21 2017 -0800 ---------------------------------------------------------------------- src/ports/postgres/modules/knn/knn.py_in | 8 +++++--- src/ports/postgres/modules/knn/knn.sql_in | 15 +++++++++------ src/ports/postgres/modules/knn/test/knn.sql_in | 13 ++----------- 3 files changed, 16 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/f28c5a07/src/ports/postgres/modules/knn/knn.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/knn/knn.py_in b/src/ports/postgres/modules/knn/knn.py_in index caa89e0..7729d2f 100644 --- a/src/ports/postgres/modules/knn/knn.py_in +++ b/src/ports/postgres/modules/knn/knn.py_in @@ -38,7 +38,8 @@ from utilities.control import MinWarning def knn_validate_src(schema_madlib, point_source, point_column_name, point_id, label_column_name, test_source, test_column_name, - test_id, output_table, k, output_neighbors, fn_dist, **kwargs): + test_id, output_table, k, output_neighbors, fn_dist, + **kwargs): input_tbl_valid(point_source, 'kNN') input_tbl_valid(test_source, 'kNN') output_tbl_valid(output_table, 'kNN') @@ -108,8 +109,9 @@ def knn_validate_src(schema_madlib, point_source, point_column_name, point_id, # ------------------------------------------------------------------------------ -def knn(schema_madlib, point_source, point_column_name, point_id, label_column_name, - test_source, test_column_name, test_id, output_table, k, output_neighbors, fn_dist): +def knn(schema_madlib, point_source, point_column_name, point_id, + label_column_name, test_source, test_column_name, test_id, output_table, + k, output_neighbors, fn_dist): """ KNN function to find the K Nearest neighbours Args: http://git-wip-us.apache.org/repos/asf/madlib/blob/f28c5a07/src/ports/postgres/modules/knn/knn.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/knn/knn.sql_in b/src/ports/postgres/modules/knn/knn.sql_in index cdc9704..d45f0f4 100644 --- a/src/ports/postgres/modules/knn/knn.sql_in +++ b/src/ports/postgres/modules/knn/knn.sql_in @@ -456,7 +456,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.knn( test_id VARCHAR, output_table VARCHAR, k INTEGER, - output_neighbors Boolean, + output_neighbors BOOLEAN, fn_dist TEXT ) RETURNS VARCHAR AS $$ PythonFunctionBodyOnly(`knn', `knn') @@ -473,7 +473,6 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.knn( k, output_neighbors, fn_dist - ) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -488,12 +487,14 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.knn( test_column_name VARCHAR, test_id VARCHAR, output_table VARCHAR, - output_neighbors Boolean + k INTEGER, + output_neighbors BOOLEAN ) RETURNS VARCHAR AS $$ DECLARE returnstring VARCHAR; BEGIN - returnstring = MADLIB_SCHEMA.knn($1,$2,$3,$4,$5,$6,$7,$8,1,$9, 'MADLIB_SCHEMA.squared_dist_norm2'); + returnstring = MADLIB_SCHEMA.knn($1,$2,$3,$4,$5,$6,$7,$8,$9,$10, + 'MADLIB_SCHEMA.squared_dist_norm2'); RETURN returnstring; END; $$ LANGUAGE plpgsql VOLATILE @@ -513,7 +514,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.knn( DECLARE returnstring VARCHAR; BEGIN - returnstring = MADLIB_SCHEMA.knn($1,$2,$3,$4,$5,$6,$7,$8,$9,TRUE,'MADLIB_SCHEMA.squared_dist_norm2'); + returnstring = MADLIB_SCHEMA.knn($1,$2,$3,$4,$5,$6,$7,$8,$9,TRUE, + 'MADLIB_SCHEMA.squared_dist_norm2'); RETURN returnstring; END; $$ LANGUAGE plpgsql VOLATILE @@ -532,7 +534,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.knn( DECLARE returnstring VARCHAR; BEGIN - returnstring = MADLIB_SCHEMA.knn($1,$2,$3,$4,$5,$6,$7,$8,1,TRUE,'MADLIB_SCHEMA.squared_dist_norm2'); + returnstring = MADLIB_SCHEMA.knn($1,$2,$3,$4,$5,$6,$7,$8,1,TRUE, + 'MADLIB_SCHEMA.squared_dist_norm2'); RETURN returnstring; END; $$ LANGUAGE plpgsql VOLATILE http://git-wip-us.apache.org/repos/asf/madlib/blob/f28c5a07/src/ports/postgres/modules/knn/test/knn.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/knn/test/knn.sql_in b/src/ports/postgres/modules/knn/test/knn.sql_in index 8bd1644..8c62dad 100644 --- a/src/ports/postgres/modules/knn/test/knn.sql_in +++ b/src/ports/postgres/modules/knn/test/knn.sql_in @@ -76,29 +76,20 @@ select knn('knn_train_data','data','id','label','knn_test_data','data','id','mad select assert(array_agg(prediction order by id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') from madlib_knn_result_classification; drop table if exists madlib_knn_result_classification; -select knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,True,'MADLIB_SCHEMA.squared_dist_norm2'); +select knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3); select assert(array_agg(x)= '{1,2,3}','Wrong output in classification with k=3') from (select unnest(k_nearest_neighbours) as x from madlib_knn_result_classification where id = 1 order by x asc) y; drop table if exists madlib_knn_result_regression; select knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',4,False,'MADLIB_SCHEMA.squared_dist_norm2'); select assert(array_agg(prediction order by id)='{1,1,0.5,1,0.25,0.25}', 'Wrong output in regression') from madlib_knn_result_regression; drop table if exists madlib_knn_result_regression; -select knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',3,True,'MADLIB_SCHEMA.squared_dist_norm2'); +select knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',3,True); select assert(array_agg(x)= '{1,2,3}' , 'Wrong output in regression with k=3') from (select unnest(k_nearest_neighbours) as x from madlib_knn_result_regression where id = 1 order by x asc) y; drop table if exists madlib_knn_result_classification; select knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,NULL); select assert(array_agg(prediction order by id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') from madlib_knn_result_classification; - -drop table if exists madlib_knn_result_regression; -select knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',3,True, NULL ); -select assert(array_agg(x)= '{1,2,3}' , 'Wrong output in regression with k=3') from (select unnest(k_nearest_neighbours) as x from madlib_knn_result_regression where id = 1 order by x asc) y; - -drop table if exists madlib_knn_result_regression; -select knn('knn_train_data_reg','data','id','label','knn_test_data','data','id','madlib_knn_result_regression',4,False,NULL); -select assert(array_agg(prediction order by id)='{1,1,0.5,1,0.25,0.25}', 'Wrong output in regression') from madlib_knn_result_regression; - drop table if exists madlib_knn_result_classification; select knn('knn_train_data','data','id','label','knn_test_data','data','id','madlib_knn_result_classification',3,False,'MADLIB_SCHEMA.dist_norm1'); select assert(array_agg(prediction order by id)='{1,1,0,1,0,0}', 'Wrong output in classification with k=3') from madlib_knn_result_classification;
