khannaekta commented on a change in pull request #571: URL: https://github.com/apache/madlib/pull/571#discussion_r723776016
########## File path: src/ports/postgres/modules/dbscan/test/dbscan.sql_in ########## @@ -118,27 +118,68 @@ SELECT assert(count(DISTINCT cluster_id) = 3, 'Incorrect cluster count') FROM db DROP TABLE IF EXISTS dbscan_train_data3; CREATE TABLE dbscan_train_data3 ( -id_in integer, -data0 integer, -data1 integer); -copy dbscan_train_data3 (id_in, data0, data1) FROM stdin delimiter '|'; -1|1|1 -2|2|2 -3|3|3 -4|4|4 -5|4|8 -6|17|8 -7|19|8 -8|19|9 -9|19|10 -10|3|111 -11|3|112 -12|3|113 -13|8|113 + city TEXT, + country TEXT, + lat INTEGER, + lon INTEGER, + PRIMARY KEY (city, country) +); +COPY dbscan_train_data3 (city, country, lon, lat) FROM stdin csv header; +city,country,lat,lon +Bangkok,Thailand,13,100 +Athens,Greece,37,23 +Beijing,China,39,116 +St. Petersburg,Russia,59,30 +Barcelona,Spain,23,9 +Tehran,Iran,35,51 +Bristol,England,51,-2 +São Paulo,Brazil,-23,-46 +Berlin,Germany,30,25 +New Dehli,India,28,77 +Sydney,Australia,-34,151 +Bogotá,Colombia,4,-74 +Kingston,Jamaica,17,-76 +Rome,Italy,41,12 +Bombay,India,19,72 +Cape Town,South Africa,-33,22 +Cairo,Egypt,30,31 +Guayaquil,Ecuador,-2,-79 +Jakarta,Indonesia,-6,106 +Hamburg,Germany,53,2 +Hong Kong,China,22,114 +Bordeux,France,44,0 +Nairobi,Kenya,-1,36 +Osaka,Japan,34,135 \. +-- Test that id and point params may be passed in as expressions DROP TABLE IF EXISTS out1, out1_summary; -SELECT dbscan('dbscan_train_data3','out1','id_in','ARRAY[data0,data1]',20,4,'squared_dist_norm2','brute'); +SELECT dbscan('dbscan_train_data3', + 'out1', + $$('x' || left(md5(city || ',' || country),16))::BIT(64)::BIGINT$$ + ,'ARRAY[lat,lon]', + 50, 1, 'squared_dist_norm2', 'optimized' +); + +SELECT assert( + id_column = $$('x' || left(md5(city || ',' || country),16))::BIT(64)::BIGINT$$, + 'id expression passed in does not match expression saved in summary table: ' || id_column +) FROM out1_summary; +SELECT assert( + expr_point = $$ARRAY[lat,lon]$$, + 'point expression passed in does not match expression saved in summary table: ' || expr_point +) FROM out1_summary; -- verify point expression saved in summary +SELECT assert(COUNT(*) = 20, 'Wrong number of rows returned') FROM out1; -- Should return 20 non-noise points + +-- Results in Graph WCC error - TODO: fix Review comment: Do we still want to validate this or we can remove it? ########## File path: tool/jenkins/jenkins_build.sh ########## @@ -60,9 +60,9 @@ sleep 15 echo "---------- Install pip, and mock -----------" # cmake, make, make install, and make package cat <<EOF -docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip install mock' | tee $workdir/logs/madlib_compile.log +docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip install mock scipy==1.2.1' | tee $workdir/logs/madlib_compile.log EOF -docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip install mock' | tee $workdir/logs/madlib_compile.log +docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip install mock scipy==1.2.1' | tee $workdir/logs/madlib_compile.log Review comment: Actually knn doesn't use `scipy` it either. The import that you saw in [knn.sql_in](https://github.com/apache/madlib/blob/master/src/ports/postgres/modules/knn/knn.sql_in#L528) is actually part of an example in docs. That's why it was working fine (I got confused for a moment). Let's remove it from here then. ########## File path: tool/jenkins/jenkins_build.sh ########## @@ -60,9 +60,9 @@ sleep 15 echo "---------- Install pip, and mock -----------" # cmake, make, make install, and make package cat <<EOF -docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip install mock' | tee $workdir/logs/madlib_compile.log +docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip install mock scipy==1.2.1' | tee $workdir/logs/madlib_compile.log EOF -docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip install mock' | tee $workdir/logs/madlib_compile.log +docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip install mock scipy==1.2.1' | tee $workdir/logs/madlib_compile.log Review comment: Actually knn doesn't use `scipy` either. The import that you saw in [knn.sql_in](https://github.com/apache/madlib/blob/master/src/ports/postgres/modules/knn/knn.sql_in#L528) is actually part of an example in docs. That's why it was working fine (I got confused for a moment). Let's remove it from here then. ########## File path: src/ports/postgres/modules/dbscan/test/dbscan.sql_in ########## @@ -118,27 +118,68 @@ SELECT assert(count(DISTINCT cluster_id) = 3, 'Incorrect cluster count') FROM db DROP TABLE IF EXISTS dbscan_train_data3; CREATE TABLE dbscan_train_data3 ( -id_in integer, -data0 integer, -data1 integer); -copy dbscan_train_data3 (id_in, data0, data1) FROM stdin delimiter '|'; -1|1|1 -2|2|2 -3|3|3 -4|4|4 -5|4|8 -6|17|8 -7|19|8 -8|19|9 -9|19|10 -10|3|111 -11|3|112 -12|3|113 -13|8|113 + city TEXT, + country TEXT, + lat INTEGER, + lon INTEGER, + PRIMARY KEY (city, country) +); +COPY dbscan_train_data3 (city, country, lon, lat) FROM stdin csv header; +city,country,lat,lon +Bangkok,Thailand,13,100 +Athens,Greece,37,23 +Beijing,China,39,116 +St. Petersburg,Russia,59,30 +Barcelona,Spain,23,9 +Tehran,Iran,35,51 +Bristol,England,51,-2 +São Paulo,Brazil,-23,-46 +Berlin,Germany,30,25 +New Dehli,India,28,77 +Sydney,Australia,-34,151 +Bogotá,Colombia,4,-74 +Kingston,Jamaica,17,-76 +Rome,Italy,41,12 +Bombay,India,19,72 +Cape Town,South Africa,-33,22 +Cairo,Egypt,30,31 +Guayaquil,Ecuador,-2,-79 +Jakarta,Indonesia,-6,106 +Hamburg,Germany,53,2 +Hong Kong,China,22,114 +Bordeux,France,44,0 +Nairobi,Kenya,-1,36 +Osaka,Japan,34,135 \. +-- Test that id and point params may be passed in as expressions DROP TABLE IF EXISTS out1, out1_summary; -SELECT dbscan('dbscan_train_data3','out1','id_in','ARRAY[data0,data1]',20,4,'squared_dist_norm2','brute'); +SELECT dbscan('dbscan_train_data3', + 'out1', + $$('x' || left(md5(city || ',' || country),16))::BIT(64)::BIGINT$$ + ,'ARRAY[lat,lon]', + 50, 1, 'squared_dist_norm2', 'optimized' +); + +SELECT assert( + id_column = $$('x' || left(md5(city || ',' || country),16))::BIT(64)::BIGINT$$, + 'id expression passed in does not match expression saved in summary table: ' || id_column +) FROM out1_summary; +SELECT assert( + expr_point = $$ARRAY[lat,lon]$$, + 'point expression passed in does not match expression saved in summary table: ' || expr_point +) FROM out1_summary; -- verify point expression saved in summary +SELECT assert(COUNT(*) = 20, 'Wrong number of rows returned') FROM out1; -- Should return 20 non-noise points + +-- Results in Graph WCC error - TODO: fix Review comment: Do we still want to validate this or we can remove it? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@madlib.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org