This is an automated email from the ASF dual-hosted git repository.

ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 36eaaeb  [SYSTEMDS-2659] imputeByFD now accepts the matrix input
36eaaeb is described below

commit 36eaaeb961130471c7d8f19456a7848312ff25b5
Author: Shafaq Siddiqi <[email protected]>
AuthorDate: Tue Sep 1 22:47:36 2020 +0200

    [SYSTEMDS-2659] imputeByFD now accepts the matrix input
    
    The initial version of imputeByFD accepts the frame input then internally 
recodes the
    frame and performs imputations. Now, the method accepts the matrix input
    (recoded matrix for non-numeric data) and directly perform imputations on 
matrix values.
---
 scripts/builtin/imputeByFD.dml                  | 30 +++++--------------------
 src/test/scripts/functions/builtin/imputeFD.dml | 22 ++++++++++++++++--
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/scripts/builtin/imputeByFD.dml b/scripts/builtin/imputeByFD.dml
index 8ad523a..01281d2 100644
--- a/scripts/builtin/imputeByFD.dml
+++ b/scripts/builtin/imputeByFD.dml
@@ -25,7 +25,7 @@
 # 
---------------------------------------------------------------------------------------------
 # NAME            TYPE    DEFAULT     MEANING
 # 
---------------------------------------------------------------------------------------------
-# F               String    --       Data frame
+# X               Double    --       Matrix X 
 # source          Integer   --       source attribute to use for imputation 
and error correction
 # target          Integer   --       attribute to be fixed
 # threshold       Double    --       threshold value in interval [0, 1] for 
robust FDs 
@@ -36,39 +36,21 @@
 # 
---------------------------------------------------------------------------------------------
 # NAME            TYPE    DEFAULT     MEANING
 # 
---------------------------------------------------------------------------------------------
-# imputed_F      String   ---        Frame with possible imputations 
+# X               Double   ---        Matrix with possible imputations 
 
 
-s_imputeByFD = function(Frame[String] F, Integer sourceAttribute, Integer 
targetAttribute, Double threshold)
-  return(Frame[String] imputed_F)
+m_imputeByFD = function(Matrix[Double] X, Integer sourceAttribute, Integer 
targetAttribute, Double threshold)
+  return(Matrix[Double] X)
 {
-
   # sanity checks
   if( threshold < 0 | threshold > 1 )
     stop("Stopping due to invalid input, threshold required in interval [0, 1] 
found "+threshold)
 
-  if(sourceAttribute < 0 | sourceAttribute > ncol(F) | targetAttribute < 0 | 
targetAttribute > ncol(F))
+  if(sourceAttribute < 0 | sourceAttribute > ncol(X) | targetAttribute < 0 | 
targetAttribute > ncol(X))
     stop("Stopping due to invalid source and target")
-
-
-  # detect schema for transformation
-  schema = detectSchema(F)
-  s=""
-  for(i in 1: ncol(F)) {
-    if(as.scalar(schema[1,i]) == "STRING" | as.scalar(schema[1,i]) == 
"BOOLEAN" )
-      s = s+as.integer(i)+","; 
-  }
-  
-  # recode data frame
-  jspecR = "{ids:true, recode:["+s+"]}";
-  [X, M] = transformencode(target=F, spec=jspecR);
-
+ 
   # impute missing values and fix errors
   X[,targetAttribute] = imputeAndCorrect(X[,sourceAttribute], 
X[,targetAttribute], threshold) 
-
-  # getting the actual data back
-  dF = transformdecode(target=X, spec=jspecR, meta=M);
-  imputed_F = dF;
 }
 
 imputeAndCorrect = function(Matrix[Double] X, Matrix[Double] Y, Double 
threshold)
diff --git a/src/test/scripts/functions/builtin/imputeFD.dml 
b/src/test/scripts/functions/builtin/imputeFD.dml
index 9325562..4782921 100644
--- a/src/test/scripts/functions/builtin/imputeFD.dml
+++ b/src/test/scripts/functions/builtin/imputeFD.dml
@@ -16,6 +16,24 @@
 #
 #-------------------------------------------------------------
 
-X = read($1, data_type="frame", format="csv", header=FALSE);
+F = read($1, data_type="frame", format="csv", header=FALSE);
+# as the method accepts the matrix so convert the non-numeric data into matrix
+
+# detect schema for transformation
+schema = detectSchema(F)
+s=""
+for(i in 1: ncol(F)) {
+  if(as.scalar(schema[1,i]) == "STRING" | as.scalar(schema[1,i]) == "BOOLEAN" )
+    s = s+as.integer(i)+","; 
+}
+  
+# recode data frame
+jspecR = "{ids:true, recode:["+s+"]}";
+[X, M] = transformencode(target=F, spec=jspecR);
+# call the method
 Y = imputeByFD(X, $2, $3, $4);
-write(Y, $5, format="binary")
+
+# getting the actual data back
+dF = transformdecode(target=Y, spec=jspecR, meta=M);
+
+write(dF, $5, format="binary")

Reply via email to