khannaekta commented on a change in pull request #571:
URL: https://github.com/apache/madlib/pull/571#discussion_r723776016



##########
File path: src/ports/postgres/modules/dbscan/test/dbscan.sql_in
##########
@@ -118,27 +118,68 @@ SELECT assert(count(DISTINCT cluster_id) = 3, 'Incorrect 
cluster count') FROM db
 
 DROP TABLE IF EXISTS dbscan_train_data3;
 CREATE TABLE dbscan_train_data3 (
-id_in integer,
-data0 integer,
-data1 integer);
-copy dbscan_train_data3 (id_in, data0, data1) FROM stdin delimiter '|';
-1|1|1
-2|2|2
-3|3|3
-4|4|4
-5|4|8
-6|17|8
-7|19|8
-8|19|9
-9|19|10
-10|3|111
-11|3|112
-12|3|113
-13|8|113
+       city TEXT,
+       country TEXT,
+       lat INTEGER,
+       lon INTEGER,
+       PRIMARY KEY (city, country)
+);
+COPY dbscan_train_data3 (city, country, lon, lat) FROM stdin csv header;
+city,country,lat,lon
+Bangkok,Thailand,13,100
+Athens,Greece,37,23
+Beijing,China,39,116
+St. Petersburg,Russia,59,30
+Barcelona,Spain,23,9
+Tehran,Iran,35,51
+Bristol,England,51,-2
+São Paulo,Brazil,-23,-46
+Berlin,Germany,30,25
+New Dehli,India,28,77
+Sydney,Australia,-34,151
+Bogotá,Colombia,4,-74
+Kingston,Jamaica,17,-76
+Rome,Italy,41,12
+Bombay,India,19,72
+Cape Town,South Africa,-33,22
+Cairo,Egypt,30,31
+Guayaquil,Ecuador,-2,-79
+Jakarta,Indonesia,-6,106
+Hamburg,Germany,53,2
+Hong Kong,China,22,114
+Bordeux,France,44,0
+Nairobi,Kenya,-1,36
+Osaka,Japan,34,135
 \.
 
+-- Test that id and point params may be passed in as expressions
 DROP TABLE IF EXISTS out1, out1_summary;
-SELECT 
dbscan('dbscan_train_data3','out1','id_in','ARRAY[data0,data1]',20,4,'squared_dist_norm2','brute');
+SELECT dbscan('dbscan_train_data3',
+       'out1',
+       $$('x' || left(md5(city || ',' || country),16))::BIT(64)::BIGINT$$
+       ,'ARRAY[lat,lon]',
+       50, 1, 'squared_dist_norm2', 'optimized'
+);
+
+SELECT assert(
+       id_column = $$('x' || left(md5(city || ',' || 
country),16))::BIT(64)::BIGINT$$,
+       'id expression passed in does not match expression saved in summary 
table: ' || id_column
+) FROM out1_summary;
+SELECT assert(
+       expr_point = $$ARRAY[lat,lon]$$,
+       'point expression passed in does not match expression saved in summary 
table: ' || expr_point
+) FROM out1_summary; -- verify point expression saved in summary
+SELECT assert(COUNT(*) = 20, 'Wrong number of rows returned') FROM out1;  -- 
Should return 20 non-noise points
+
+-- Results in Graph WCC error - TODO: fix

Review comment:
       Do we still want to validate this or we can remove it? 

##########
File path: tool/jenkins/jenkins_build.sh
##########
@@ -60,9 +60,9 @@ sleep 15
 echo "---------- Install pip, and mock -----------"
 # cmake, make, make install, and make package
 cat <<EOF
-docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip 
install mock' | tee $workdir/logs/madlib_compile.log
+docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip 
install mock scipy==1.2.1' | tee $workdir/logs/madlib_compile.log
 EOF
-docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip 
install mock' | tee $workdir/logs/madlib_compile.log
+docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip 
install mock scipy==1.2.1' | tee $workdir/logs/madlib_compile.log
 

Review comment:
       Actually knn doesn't use `scipy` it either. The import that you saw in 
[knn.sql_in](https://github.com/apache/madlib/blob/master/src/ports/postgres/modules/knn/knn.sql_in#L528)
 is actually part of an example in docs. That's why it was working fine (I got 
confused for a moment).
   Let's remove it from here then.

##########
File path: tool/jenkins/jenkins_build.sh
##########
@@ -60,9 +60,9 @@ sleep 15
 echo "---------- Install pip, and mock -----------"
 # cmake, make, make install, and make package
 cat <<EOF
-docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip 
install mock' | tee $workdir/logs/madlib_compile.log
+docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip 
install mock scipy==1.2.1' | tee $workdir/logs/madlib_compile.log
 EOF
-docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip 
install mock' | tee $workdir/logs/madlib_compile.log
+docker exec madlib bash -c 'apt-get update; apt-get install -y python-pip; pip 
install mock scipy==1.2.1' | tee $workdir/logs/madlib_compile.log
 

Review comment:
       Actually knn doesn't use `scipy` either. The import that you saw in 
[knn.sql_in](https://github.com/apache/madlib/blob/master/src/ports/postgres/modules/knn/knn.sql_in#L528)
 is actually part of an example in docs. That's why it was working fine (I got 
confused for a moment).
   Let's remove it from here then.

##########
File path: src/ports/postgres/modules/dbscan/test/dbscan.sql_in
##########
@@ -118,27 +118,68 @@ SELECT assert(count(DISTINCT cluster_id) = 3, 'Incorrect 
cluster count') FROM db
 
 DROP TABLE IF EXISTS dbscan_train_data3;
 CREATE TABLE dbscan_train_data3 (
-id_in integer,
-data0 integer,
-data1 integer);
-copy dbscan_train_data3 (id_in, data0, data1) FROM stdin delimiter '|';
-1|1|1
-2|2|2
-3|3|3
-4|4|4
-5|4|8
-6|17|8
-7|19|8
-8|19|9
-9|19|10
-10|3|111
-11|3|112
-12|3|113
-13|8|113
+       city TEXT,
+       country TEXT,
+       lat INTEGER,
+       lon INTEGER,
+       PRIMARY KEY (city, country)
+);
+COPY dbscan_train_data3 (city, country, lon, lat) FROM stdin csv header;
+city,country,lat,lon
+Bangkok,Thailand,13,100
+Athens,Greece,37,23
+Beijing,China,39,116
+St. Petersburg,Russia,59,30
+Barcelona,Spain,23,9
+Tehran,Iran,35,51
+Bristol,England,51,-2
+São Paulo,Brazil,-23,-46
+Berlin,Germany,30,25
+New Dehli,India,28,77
+Sydney,Australia,-34,151
+Bogotá,Colombia,4,-74
+Kingston,Jamaica,17,-76
+Rome,Italy,41,12
+Bombay,India,19,72
+Cape Town,South Africa,-33,22
+Cairo,Egypt,30,31
+Guayaquil,Ecuador,-2,-79
+Jakarta,Indonesia,-6,106
+Hamburg,Germany,53,2
+Hong Kong,China,22,114
+Bordeux,France,44,0
+Nairobi,Kenya,-1,36
+Osaka,Japan,34,135
 \.
 
+-- Test that id and point params may be passed in as expressions
 DROP TABLE IF EXISTS out1, out1_summary;
-SELECT 
dbscan('dbscan_train_data3','out1','id_in','ARRAY[data0,data1]',20,4,'squared_dist_norm2','brute');
+SELECT dbscan('dbscan_train_data3',
+       'out1',
+       $$('x' || left(md5(city || ',' || country),16))::BIT(64)::BIGINT$$
+       ,'ARRAY[lat,lon]',
+       50, 1, 'squared_dist_norm2', 'optimized'
+);
+
+SELECT assert(
+       id_column = $$('x' || left(md5(city || ',' || 
country),16))::BIT(64)::BIGINT$$,
+       'id expression passed in does not match expression saved in summary 
table: ' || id_column
+) FROM out1_summary;
+SELECT assert(
+       expr_point = $$ARRAY[lat,lon]$$,
+       'point expression passed in does not match expression saved in summary 
table: ' || expr_point
+) FROM out1_summary; -- verify point expression saved in summary
+SELECT assert(COUNT(*) = 20, 'Wrong number of rows returned') FROM out1;  -- 
Should return 20 non-noise points
+
+-- Results in Graph WCC error - TODO: fix

Review comment:
       Do we still want to validate this or we can remove it? 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@madlib.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to