This is an automated email from the ASF dual-hosted git repository.

ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 88f1063  [MINOR] Passing quantiles as function parameters in winsorize 
builtin   This commit also removes the parfor from logical pipelines' 
enumerator to stabilize the results
88f1063 is described below

commit 88f1063f0197d7197f90ec1cb1113bfc8173b12b
Author: Shafaq Siddiqi <[email protected]>
AuthorDate: Wed Sep 8 16:29:35 2021 +0200

    [MINOR] Passing quantiles as function parameters in winsorize builtin
      This commit also removes the parfor from logical pipelines' enumerator to 
stabilize the results
    
    Closes #1387.
---
 scripts/builtin/winsorize.dml                           | 17 +++++++++--------
 scripts/pipelines/properties/param.csv                  |  2 +-
 scripts/pipelines/scripts/enumerateLogical.dml          |  2 +-
 src/test/scripts/functions/builtin/multipleBuiltins.dml |  2 +-
 src/test/scripts/functions/builtin/winsorize.dml        |  2 +-
 src/test/scripts/functions/builtin/winsorizeFoo.dml     |  2 +-
 .../scripts/functions/misc/FunPotpourriMultiEval.dml    |  2 +-
 src/test/scripts/functions/misc/Functions15b.dml        |  2 +-
 8 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/scripts/builtin/winsorize.dml b/scripts/builtin/winsorize.dml
index 5e7eb61..614630d 100644
--- a/scripts/builtin/winsorize.dml
+++ b/scripts/builtin/winsorize.dml
@@ -19,18 +19,19 @@
 #
 #-------------------------------------------------------------
 
-m_winsorize = function(Matrix[Double] X, Boolean verbose) return 
(Matrix[Double] Y) {
+m_winsorize = function(Matrix[Double] X, Double ql = 0.05, Double qu = 0.95, 
Boolean verbose) 
+return (Matrix[Double] Y) {
+
   Y = matrix(0, nrow(X), ncol(X))
-  parfor(i in 1:ncol(X))
-    Y[,i] = fixOutliersWinsorize(X[,i])
+  parfor(i in 1:ncol(X), check=0) {
+    q1 = quantile(X[,i], ql)
+    q2 = quantile(X[,i], qu)
+    Y[, i] = fixOutliersWinsorize(X[,i], q1, q2)
+  }
 }
 
-fixOutliersWinsorize = function(Matrix[Double] X) return (Matrix[Double] Y)
+fixOutliersWinsorize = function(Matrix[Double] X,  Double ql, Double qu) 
return (Matrix[Double] Y)
 {
-  # compute quantiles for lower and upper probs
-  q = quantile(X, matrix("0.05 0.95", rows=2, cols=1));
-  ql = as.scalar(q[1,1]);
-  qu = as.scalar(q[2,1]);
   # replace values outside [ql,qu] w/ ql and qu respectively
   Y = ifelse(X < ql, ql, X);
   Y = ifelse(Y > qu, qu, Y);
diff --git a/scripts/pipelines/properties/param.csv 
b/scripts/pipelines/properties/param.csv
index fc454c4..e2068b1 100644
--- a/scripts/pipelines/properties/param.csv
+++ b/scripts/pipelines/properties/param.csv
@@ -1,7 +1,7 @@
 
name,param_no,maskFlag,FDFlag,yFlag,verboseFlag,dataFlag,dt1,dt2,dt3,dt4,st1,en1,st2,en2,st3,en3,st4,en4
 outlierByIQR,3,0,0,0,1,0,FP,INT,INT,1,7,2,2,1,1,,,
 outlierBySd,3,0,0,0,1,0,INT,INT,INT,1,7,1,2,2,1,,,
-winsorize,0,0,0,0,1,0,,,,,,,,,,,,
+winsorize,2,0,0,0,1,0,FP,FP,0.01,0.05,0.95,1,,,,,,
 normalize,0,0,0,0,0,0,,,,,,,,,,,,
 imputeByMean,0,1,0,0,0,2,,,,,,,,,,,,
 imputeByMedian,0,1,0,0,0,2,,,,,,,,,,,,
diff --git a/scripts/pipelines/scripts/enumerateLogical.dml 
b/scripts/pipelines/scripts/enumerateLogical.dml
index 29ac78c..977c0a2 100644
--- a/scripts/pipelines/scripts/enumerateLogical.dml
+++ b/scripts/pipelines/scripts/enumerateLogical.dml
@@ -85,7 +85,7 @@ return (Frame[Unknown] bestLg, Double pre_best)
     # # # execute the physical pipelines
     scores = matrix(0, nrow(physicalPipList), 1)
     # TODO better parfor-dep handling of multi-assignments to avoid check=0 
-    parfor(i in 1:length(physicalPipList), check=0) {
+    for(i in 1:length(physicalPipList), check=0) {
       lp2 = as.frame(logicalPipList[((i-1)%/%num_inst)+1,])
       pp2 = as.frame(physicalPipList[i,])
       # # append configuration keys for extracting the pipeline later on
diff --git a/src/test/scripts/functions/builtin/multipleBuiltins.dml 
b/src/test/scripts/functions/builtin/multipleBuiltins.dml
index 7a8315b..a771f59 100644
--- a/src/test/scripts/functions/builtin/multipleBuiltins.dml
+++ b/src/test/scripts/functions/builtin/multipleBuiltins.dml
@@ -20,6 +20,6 @@
 #-------------------------------------------------------------
 
 X = read($1);
-Y = winsorize(X, FALSE);
+Y = winsorize(X=X, verbose=FALSE);
 Z = outlier(Y, FALSE);
 write(Z, $2);
diff --git a/src/test/scripts/functions/builtin/winsorize.dml 
b/src/test/scripts/functions/builtin/winsorize.dml
index eeba09d..00725e4 100644
--- a/src/test/scripts/functions/builtin/winsorize.dml
+++ b/src/test/scripts/functions/builtin/winsorize.dml
@@ -20,5 +20,5 @@
 #-------------------------------------------------------------
 
 X = read($1);
-Y = winsorize(X, FALSE);
+Y = winsorize(X=X, ql=0.05, qu= 0.95, verbose=FALSE);
 write(Y, $2)
diff --git a/src/test/scripts/functions/builtin/winsorizeFoo.dml 
b/src/test/scripts/functions/builtin/winsorizeFoo.dml
index 78472bd..9c1c53b 100644
--- a/src/test/scripts/functions/builtin/winsorizeFoo.dml
+++ b/src/test/scripts/functions/builtin/winsorizeFoo.dml
@@ -25,5 +25,5 @@ foo = function(Matrix[Double] X, Boolean verbose)
    while(FALSE){} #no inlining
    if( verbose )
      print( min(X)+" "+max(X) )
-   R = winsorize(X, verbose);
+   R = winsorize(X=X, verbose=verbose);
 }
diff --git a/src/test/scripts/functions/misc/FunPotpourriMultiEval.dml 
b/src/test/scripts/functions/misc/FunPotpourriMultiEval.dml
index 72b7b7b..3d5fe37 100644
--- a/src/test/scripts/functions/misc/FunPotpourriMultiEval.dml
+++ b/src/test/scripts/functions/misc/FunPotpourriMultiEval.dml
@@ -23,7 +23,7 @@ X = rand(rows=10, cols= 10)
 t1 = interQuartileMean(X[,7]);
 
 for(i in 1:5)
-  X = eval("winsorize", list(X, FALSE))
+  X = eval("winsorize", list(X=X, ql = 0.05, qu=0.95, verbose=FALSE))
 
 t2 = interQuartileMean(X[,7]);
 print("expected=TRUE, actual="+(t2 < t1))
diff --git a/src/test/scripts/functions/misc/Functions15b.dml 
b/src/test/scripts/functions/misc/Functions15b.dml
index 6834f49..074c7e4 100644
--- a/src/test/scripts/functions/misc/Functions15b.dml
+++ b/src/test/scripts/functions/misc/Functions15b.dml
@@ -22,5 +22,5 @@
 foo = function(Matrix[Double] X)
   return (Matrix[Double] Y)
 {
-  Y = winsorize(X, FALSE)
+  Y = winsorize(X=X, verbose=FALSE)
 }

Reply via email to