This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 09417e1975 [SYSTEMDS-3378] Python API auto generator default values
09417e1975 is described below
commit 09417e1975cd31d26bd67771716e06a98570315e
Author: baunsgaard <[email protected]>
AuthorDate: Mon May 16 12:19:23 2022 +0200
[SYSTEMDS-3378] Python API auto generator default values
This commits fixes the auto generator of python code to work with all
of our builtin functions.
Also contained in this commit is changes of the keyword lambda to reg,
for all builtin functions. This allows python to use the names without
error, since lambda is a keyword in python.
Closes #1615
---
scripts/algorithms/l2-svm.dml | 4 +-
scripts/builtin/als.dml | 20 ++--
scripts/builtin/alsCG.dml | 42 ++++-----
scripts/builtin/alsDS.dml | 12 +--
scripts/builtin/bandit.dml | 1 -
scripts/builtin/cox.dml | 2 +-
scripts/builtin/l2svm.dml | 14 +--
scripts/builtin/lenetTrain.dml | 16 ++--
scripts/builtin/mcc.dml | 2 +-
scripts/builtin/msvm.dml | 6 +-
scripts/builtin/setdiff.dml | 2 +-
scripts/builtin/sherlock.dml | 2 +-
scripts/builtin/sherlockPredict.dml | 2 +-
scripts/builtin/symmetricDifference.dml | 2 +-
scripts/builtin/union.dml | 2 +-
scripts/builtin/unique.dml | 2 +-
scripts/{builtin => nn/examples}/sherlockNet.dml | 0
scripts/perftest/scripts/alsDS.dml | 4 +-
scripts/perftest/scripts/m-svm.dml | 2 +-
src/main/python/generator/generator.py | 101 +++++++++++++++------
src/main/python/generator/parser.py | 98 +++++++++++++++-----
.../python/generator/resources/type_mapping.json | 4 +-
.../python/systemds/operator/algorithm/__init__.py | 60 ++++++++++++
.../systemds/operator/algorithm/builtin/als.py | 4 +-
.../systemds/operator/algorithm/builtin/alsCG.py | 4 +-
.../systemds/operator/algorithm/builtin/alsDS.py | 2 +-
.../builtin/{alsDS.py => applyAndEvaluate.py} | 35 ++++---
.../builtin/{lmPredict.py => apply_pipeline.py} | 19 ++--
.../builtin/{gmm.py => autoencoder_2layer.py} | 42 +++++----
.../systemds/operator/algorithm/builtin/bandit.py | 23 ++++-
.../builtin/{hyperband.py => correctTypos.py} | 32 ++++---
.../builtin/{lmPredict.py => correctTyposApply.py} | 22 +++--
.../operator/algorithm/builtin/decisionTree.py | 3 +-
.../algorithm/builtin/denialConstraints.py | 65 +++++++++++++
.../algorithm/builtin/{lmPredict.py => dmv.py} | 18 ++--
.../operator/algorithm/builtin/{msvm.py => ema.py} | 31 ++++---
.../operator/algorithm/builtin/executePipeline.py | 38 ++++++--
.../builtin/{lmPredict.py => ffPredict.py} | 18 ++--
.../builtin/{alsDS.py => fit_pipeline.py} | 39 ++++----
.../builtin/{lmPredict.py => fixInvalidLengths.py} | 29 ++++--
.../{lmPredict.py => fixInvalidLengthsApply.py} | 15 ++-
.../builtin/{lmPredict.py => frameSort.py} | 13 ++-
.../builtin/{lenetTrain.py => glmPredict.py} | 34 +++----
.../systemds/operator/algorithm/builtin/gmm.py | 3 +-
.../operator/algorithm/builtin/gridSearch.py | 40 ++++++--
.../operator/algorithm/builtin/hyperband.py | 2 +-
.../systemds/operator/algorithm/builtin/knn.py | 66 ++++++++++++++
.../systemds/operator/algorithm/builtin/l2svm.py | 2 +-
.../builtin/{lmPredict.py => lenetPredict.py} | 26 ++++--
.../operator/algorithm/builtin/lenetTrain.py | 2 +-
.../operator/algorithm/builtin/lmPredict.py | 10 +-
.../algorithm/builtin/{lmPredict.py => mcc.py} | 15 ++-
.../algorithm/builtin/{lmPredict.py => mdedup.py} | 23 +++--
.../algorithm/builtin/{miceApply.py => mice.py} | 34 +++++--
.../operator/algorithm/builtin/miceApply.py | 2 +-
.../systemds/operator/algorithm/builtin/msvm.py | 2 +-
.../builtin/{lmPredict.py => pcaInverse.py} | 17 ++--
.../builtin/{lmPredict.py => pcaTransform.py} | 15 ++-
.../algorithm/builtin/{lmPredict.py => setdiff.py} | 13 +--
.../builtin/{lmPredict.py => stratstats.py} | 21 +++--
.../{lmPredict.py => symmetricDifference.py} | 13 +--
.../builtin/{alsDS.py => topk_cleaning.py} | 39 ++++----
.../algorithm/builtin/{lmPredict.py => union.py} | 19 ++--
.../algorithm/builtin/{lmPredict.py => unique.py} | 12 +--
.../builtin/{decisionTree.py => xgboost.py} | 28 +++---
src/main/python/systemds/operator/nodes/scalar.py | 2 +-
src/main/python/tests/algorithms/test_gmm.py | 2 +-
.../tests/algorithms/test_gmm_train_predict.py | 2 +-
src/test/scripts/functions/builtin/l2svm.dml | 2 +-
src/test/scripts/functions/builtin/multisvm.dml | 2 +-
.../functions/federated/FederatedAlsCGTest.dml | 6 +-
.../federated/FederatedAlsCGTestReference.dml | 6 +-
.../functions/federated/FederatedL2SVMTest.dml | 2 +-
.../federated/FederatedL2SVMTestReference.dml | 2 +-
.../functions/federated/FederatedMSVMTest.dml | 2 +-
.../federated/FederatedMSVMTestReference.dml | 2 +-
.../functions/federated/FederatedYL2SVMTest.dml | 2 +-
.../functions/federated/FederatedYL2SVMTest2.dml | 2 +-
.../federated/FederatedYL2SVMTest2Reference.dml | 2 +-
.../federated/FederatedYL2SVMTestReference.dml | 2 +-
.../scripts/functions/lineage/LineageReuseAlg5.dml | 8 +-
.../functions/lineage/LineageTraceParforMSVM.dml | 4 +-
.../pipelines/topkcleaningClassificationTest.dml | 6 +-
83 files changed, 871 insertions(+), 475 deletions(-)
diff --git a/scripts/algorithms/l2-svm.dml b/scripts/algorithms/l2-svm.dml
index 1c2fb9d177..634503dcf3 100644
--- a/scripts/algorithms/l2-svm.dml
+++ b/scripts/algorithms/l2-svm.dml
@@ -50,7 +50,7 @@
fmt = ifdef($fmt, "text")
intercept = ifdef($icpt, FALSE)
epsilon = ifdef($tol, 0.001)
-lambda = ifdef($reg, 1.0)
+reg = ifdef($reg, 1.0)
maxIterations = ifdef($maxiter, 100)
verbose = ifdef($verbose, FALSE)
@@ -62,7 +62,7 @@ negative_label = min(Y)
dimensions = ncol(X)
w = l2svm(X=X, Y=Y, intercept=intercept,
- epsilon=epsilon, lambda=lambda,
+ epsilon=epsilon, reg=reg,
maxIterations=maxIterations,
verbose=verbose)
diff --git a/scripts/builtin/als.dml b/scripts/builtin/als.dml
index 8048f2f41d..5fa18ff6da 100644
--- a/scripts/builtin/als.dml
+++ b/scripts/builtin/als.dml
@@ -29,18 +29,18 @@
#
----------------------------------------------------------------------------------------------------------------------
# X Matrix[Double] --- Location to read the input matrix X to
be factorized
# rank Integer 10 Rank of the factorization
-# reg String "L2" Regularization:
+# regType String "L2" Regularization:
# "L2" = L2 regularization;
# f (U, V) = 0.5 * sum (W * (U %*% V -
X) ^ 2)
-# + 0.5 * lambda * (sum (U ^
2) + sum (V ^ 2))
+# + 0.5 * reg * (sum (U ^ 2)
+ sum (V ^ 2))
# "wL2" = weighted L2 regularization
# f (U, V) = 0.5 * sum (W * (U %*% V -
X) ^ 2)
-# + 0.5 * lambda * (sum (U ^
2 * row_nonzeros)
+# + 0.5 * reg * (sum (U ^ 2 *
row_nonzeros)
# + sum (V ^ 2 *
col_nonzeros))
-# lambda Double 0.000001 Regularization parameter, no
regularization if 0.0
-# maxi Integer 50 Maximum number of iterations
-# check Boolean TRUE Check for convergence after every
iteration, i.e., updating U and V once
-# thr Double 0.0001 Assuming check is set to TRUE, the
algorithm stops and convergence is declared
+# reg Double 0.000001 Regularization parameter, no
regularization if 0.0
+# maxi Integer 50 Maximum number of iterations
+# check Boolean TRUE Check for convergence after every
iteration, i.e., updating U and V once
+# thr Double 0.0001 Assuming check is set to TRUE, the
algorithm stops and convergence is declared
# if the decrease in loss in any two
consecutive iterations falls below this threshold;
# if check is FALSE thr is ignored
#
----------------------------------------------------------------------------------------------------------------------
@@ -53,15 +53,15 @@
# V Matrix An m x r matrix where r is the factorization rank
#
----------------------------------------------------------------------------------------------------------------------
-m_als = function(Matrix[Double] X, Integer rank = 10, String reg = "L2",
Double lambda = 0.000001,
+m_als = function(Matrix[Double] X, Integer rank = 10, String regType = "L2",
Double reg = 0.000001,
Integer maxi = 50, Boolean check = TRUE, Double thr = 0.0001, Boolean
verbose = TRUE)
return (Matrix[Double] U, Matrix[Double] V)
{
N = 10000; # for large problems, use scalable alsCG
if( reg != "L2" | nrow(X) > N | ncol(X) > N )
- [U, V] = alsCG(X=X, rank=rank, reg=reg, lambda=lambda,
+ [U, V] = alsCG(X=X, rank=rank, regType=regType, reg=reg,
maxi=maxi, check=check, thr=thr, verbose=verbose);
else
- [U, V] = alsDS(X=X, rank=rank, lambda=lambda, maxi=maxi,
+ [U, V] = alsDS(X=X, rank=rank, reg=reg, maxi=maxi,
check=check, thr=thr, verbose=verbose);
}
diff --git a/scripts/builtin/alsCG.dml b/scripts/builtin/alsCG.dml
index d001c41a68..2d0be0dd80 100644
--- a/scripts/builtin/alsCG.dml
+++ b/scripts/builtin/alsCG.dml
@@ -29,18 +29,18 @@
#
----------------------------------------------------------------------------------------------------------------------
# X Matrix[Double] --- Location to read the input matrix X to
be factorized
# rank Integer 10 Rank of the factorization
-# reg String "L2" Regularization:
+# regType String "L2" Regularization:
# "L2" = L2 regularization;
# f (U, V) = 0.5 * sum (W * (U %*% V -
X) ^ 2)
-# + 0.5 * lambda * (sum (U ^
2) + sum (V ^ 2))
+# + 0.5 * reg * (sum (U ^ 2) +
sum (V ^ 2))
# "wL2" = weighted L2 regularization
# f (U, V) = 0.5 * sum (W * (U %*% V -
X) ^ 2)
-# + 0.5 * lambda * (sum (U ^ 2
* row_nonzeros)
+# + 0.5 * reg * (sum (U ^ 2 *
row_nonzeros)
# + sum (V ^ 2 * col_nonzeros))
-# lambda Double 0.000001 Regularization parameter, no
regularization if 0.0
-# maxi Integer 50 Maximum number of iterations
-# check Boolean TRUE Check for convergence after every
iteration, i.e., updating U and V once
-# thr Double 0.0001 Assuming check is set to TRUE, the
algorithm stops and convergence is declared
+# reg Double 0.000001 Regularization parameter, no
regularization if 0.0
+# maxi Integer 50 Maximum number of iterations
+# check Boolean TRUE Check for convergence after every
iteration, i.e., updating U and V once
+# thr Double 0.0001 Assuming check is set to TRUE, the
algorithm stops and convergence is declared
# if the decrease in loss in any two
consecutive iterations falls below this threshold;
# if check is FALSE thr is ignored
#
----------------------------------------------------------------------------------------------------------------------
@@ -53,7 +53,7 @@
# V Matrix[Double] An m x r matrix where r is the factorization rank
#
----------------------------------------------------------------------------------------------------------------------
-m_alsCG = function(Matrix[Double] X, Integer rank = 10, String reg = "L2",
Double lambda = 0.000001, Integer maxi = 50,
+m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2",
Double reg = 0.000001, Integer maxi = 50,
Boolean check = TRUE, Double thr = 0.0001, Boolean verbose = TRUE)
return (Matrix[Double] U, Matrix[Double] V)
{
@@ -73,26 +73,26 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10,
String reg = "L2", Doubl
# check for regularization
row_nonzeros = matrix(0,rows=1,cols=1);
col_nonzeros = matrix(0,rows=1,cols=1);
- if( reg == "L2" ) {
+ if( regType == "L2" ) {
# Loss Function with L2:
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
- # + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
+ # + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2))
if( verbose )
- print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA -
" + lambda);
+ print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH REG - "
+ reg);
row_nonzeros = matrix(1, nrow(W), 1);
col_nonzeros = matrix(1, ncol(W), 1);
}
- else if( reg == "wL2" ) {
+ else if( regType == "wL2" ) {
# Loss Function with weighted L2:
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
- # + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 *
col_nonzeros))
+ # + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 *
col_nonzeros))
if( verbose )
- print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH
LAMBDA - " + lambda);
+ print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH
REG - " + reg);
row_nonzeros = rowSums(W);
col_nonzeros = t(colSums(W));
}
else {
- stop ("wrong regularization! " + reg);
+ stop ("wrong regularization! " + regType);
}
is_U = TRUE; # start optimizing U, alternated
@@ -101,7 +101,7 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10,
String reg = "L2", Doubl
loss_init = 0.0; # only used if check is TRUE
if( check ) {
loss_init = 0.5 * sum( (X != 0) * (U %*% t(V) - X) ^ 2);
- loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum
(V ^ 2 * col_nonzeros));
+ loss_init = loss_init + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) + sum (V ^
2 * col_nonzeros));
if( verbose )
print ("----- Initial train loss: " + loss_init + " -----");
}
@@ -111,9 +111,9 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10,
String reg = "L2", Doubl
while( as.integer(it/2) < max_iter & ! converged ) {
it = it + 1;
if( is_U )
- G = ((X != 0) * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
+ G = ((X != 0) * (U %*% t(V) - X)) %*% V + reg * U * row_nonzeros;
else
- G = t(t(U) %*% ((X != 0) * (U %*% t(V) - X))) + lambda * V *
col_nonzeros;
+ G = t(t(U) %*% ((X != 0) * (U %*% t(V) - X))) + reg * V * col_nonzeros;
R = -G;
S = R;
@@ -124,12 +124,12 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10,
String reg = "L2", Doubl
tt = 0.000000001;
while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter ) {
if( is_U ) {
- HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros;
+ HS = (W * (S %*% t(V))) %*% V + reg * S * row_nonzeros;
alpha = norm_R2 / sum (S * HS);
U = U + alpha * S; # OK since U is not used in HS
}
else {
- HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros;
+ HS = t(t(U) %*% (W * (U %*% t(S)))) + reg * S * col_nonzeros;
alpha = norm_R2 / sum (S * HS);
V = V + alpha * S; # OK since V is not used in HS
}
@@ -146,7 +146,7 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10,
String reg = "L2", Doubl
# check for convergence
if( check & (it%%2 == 0) ) {
loss_cur = 0.5 * sum( (X != 0) * (U %*% t(V) - X) ^ 2);
- loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum
(V ^ 2 * col_nonzeros));
+ loss_cur = loss_cur + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) + sum (V ^
2 * col_nonzeros));
loss_dec = (loss_init - loss_cur) / loss_init;
if( verbose )
diff --git a/scripts/builtin/alsDS.dml b/scripts/builtin/alsDS.dml
index 0b5797894f..4f7a5cffe6 100644
--- a/scripts/builtin/alsDS.dml
+++ b/scripts/builtin/alsDS.dml
@@ -30,7 +30,7 @@
#
----------------------------------------------------------------------------------------------------------------------
# X Matrix[Double] --- Location to read the input matrix V to
be factorized
# rank Integer 10 Rank of the factorization
-# lambda Double 0.000001 Regularization parameter, no
regularization if 0.0
+# reg Double 0.000001 Regularization parameter, no
regularization if 0.0
# maxi Integer 50 Maximum number of iterations
# check Boolean FALSE Check for convergence after every
iteration, i.e., updating L and R once
# thr Double 0.0001 Assuming check is set to TRUE, the
algorithm stops and convergence is declared
@@ -46,7 +46,7 @@
# V Matrix[Double] An m x r matrix where r is the factorization rank
#
----------------------------------------------------------------------------------------------------------------------
-m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double lambda =
0.000001,
+m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double reg = 0.000001,
Integer maxi = 50, Boolean check = FALSE, Double thr = 0.0001, Boolean
verbose = TRUE)
return (Matrix[Double] U, Matrix[Double] V)
{
@@ -92,17 +92,17 @@ m_alsDS = function(Matrix[Double] X, Integer rank = 10,
Double lambda = 0.000001
# check for regularization
if ( verbose )
- print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " +
lambda);
+ print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH REG - " +
reg);
loss_init = 0.0; # only used if check is TRUE
if (check) {
loss_init = sum (X_nonzero_ind * (X - (U %*% t(V)))^2)
- + lambda * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros));
+ + reg * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros));
if( verbose )
print ("----- Initial train loss: " + loss_init + " -----");
}
- lambda_I = diag (matrix (lambda, rows = r, cols = 1));
+ lambda_I = diag (matrix (reg, rows = r, cols = 1));
it = 0;
converged = FALSE;
while ((it < max_iter) & (!converged)) {
@@ -126,7 +126,7 @@ m_alsDS = function(Matrix[Double] X, Integer rank = 10,
Double lambda = 0.000001
# check for convergence
if (check) {
loss_cur = sum (X_nonzero_ind * (X - (U %*% t(V)))^2)
- + lambda * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros));
+ + reg * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros));
loss_dec = (loss_init - loss_cur) / loss_init;
if( verbose )
print ("Train loss at iteration (X) " + it + ": " + loss_cur + "
loss-dec " + loss_dec);
diff --git a/scripts/builtin/bandit.dml b/scripts/builtin/bandit.dml
index fa1ff1137d..04e28517d8 100644
--- a/scripts/builtin/bandit.dml
+++ b/scripts/builtin/bandit.dml
@@ -54,7 +54,6 @@
m_bandit = function(Matrix[Double] X_train, Matrix[Double] Y_train,
Matrix[Double] X_test, Matrix[Double] Y_test, List[Unknown] metaList,
String evaluationFunc, Matrix[Double] evalFunHp, Frame[Unknown] lp,
Matrix[Double] lpHp, Frame[Unknown] primitives, Frame[Unknown] param, Integer k
= 3,
Integer R=50, Double baseLineScore, Boolean cv, Integer cvk = 2, Double ref
= 0, Integer seed = -1, Boolean enablePruning = FALSE, Boolean verbose = TRUE)
- # return(Boolean perf)
return (Frame[Unknown] bestPipeline, Matrix[Double] bestHyperparams,
Matrix[Double] bestAccuracy, Frame[String] applyFunc)
{
print("Starting optimizer")
diff --git a/scripts/builtin/cox.dml b/scripts/builtin/cox.dml
index f6c9363b0c..68672d6339 100644
--- a/scripts/builtin/cox.dml
+++ b/scripts/builtin/cox.dml
@@ -84,7 +84,7 @@
#
----------------------------------------------------------------------------------------------------------------------
m_cox = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] F,
Matrix[Double] R,
- Double alpha = 0.05, Double tol = 0.000001, Int moi = 100, Int mii = 0)
+ Double alpha = 0.05, Double tol = 0.000001, Integer moi = 100, Integer mii
= 0)
return (Matrix[Double] M, Matrix[Double] S, Matrix[Double] T, Matrix[Double]
COV, Matrix[Double] RT, Matrix[Double] XO) {
X_orig = X;
diff --git a/scripts/builtin/l2svm.dml b/scripts/builtin/l2svm.dml
index 9fa6a71c13..9d3fca7d15 100644
--- a/scripts/builtin/l2svm.dml
+++ b/scripts/builtin/l2svm.dml
@@ -30,7 +30,7 @@
# intercept Boolean False No Intercept ( If set to TRUE
then a constant bias column is added to X)
# epsilon Double 0.001 Procedure terminates early if
the reduction in objective function value is less
# than epsilon (tolerance) times
the initial objective function value.
-# lambda Double 1.0 Regularization parameter
(lambda) for L2 regularization
+# reg Double 1.0 Regularization parameter (reg)
for L2 regularization
# maxIterations Int 100 Maximum number of conjugate
gradient iterations
# maxii Int 20 -
# verbose Boolean FALSE Set to true if one wants print
statements updating on loss.
@@ -46,7 +46,7 @@
#
----------------------------------------------------------------------------------------------------------------------
m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept =
FALSE,
- Double epsilon = 0.001, Double lambda = 1, Integer maxIterations = 100,
+ Double epsilon = 0.001, Double reg = 1, Integer maxIterations = 100,
Integer maxii = 20, Boolean verbose = FALSE, Integer columnId = -1)
return(Matrix[Double] model)
{
@@ -55,7 +55,7 @@ m_l2svm = function(Matrix[Double] X, Matrix[Double] Y,
Boolean intercept = FALSE
stop("L2SVM: Stopping due to invalid inputs: Not possible to learn a
binary class classifier without at least 2 rows")
if(epsilon < 0)
stop("L2SVM: Stopping due to invalid argument: Tolerance (tol) must be
non-negative")
- if(lambda < 0)
+ if(reg < 0)
stop("L2SVM: Stopping due to invalid argument: Regularization constant
(reg) must be non-negative")
if(maxIterations < 1)
stop("L2SVM: Stopping due to invalid argument: Maximum iterations should
be a positive integer")
@@ -106,8 +106,8 @@ m_l2svm = function(Matrix[Double] X, Matrix[Double] Y,
Boolean intercept = FALSE
# minimizing primal obj along direction s
step_sz = 0
Xd = X %*% s
- wd = lambda * sum(w * s)
- dd = lambda * sum(s * s)
+ wd = reg * sum(w * s)
+ dd = reg * sum(s * s)
continue1 = TRUE
iiter = 0
while(continue1 & iiter < maxii){
@@ -129,8 +129,8 @@ m_l2svm = function(Matrix[Double] X, Matrix[Double] Y,
Boolean intercept = FALSE
out = 1 - Y * Xw
sv = (out > 0)
out = sv * out
- obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
- g_new = t(X) %*% (out * Y) - lambda * w
+ obj = 0.5 * sum(out * out) + reg/2 * sum(w * w)
+ g_new = t(X) %*% (out * Y) - reg * w
if(verbose) {
colstr = ifelse(columnId!=-1, ", Col:"+columnId + " ,", " ,")
diff --git a/scripts/builtin/lenetTrain.dml b/scripts/builtin/lenetTrain.dml
index 4f02c2dd48..b82acf3d06 100644
--- a/scripts/builtin/lenetTrain.dml
+++ b/scripts/builtin/lenetTrain.dml
@@ -39,7 +39,7 @@
# lr Double 0.01 Learning rate
# mu Double 0.9 Momentum value
# decay Double 0.95 Learning rate decay
-# lambda Double 5e-04 Regularization strength
+# reg Double 5e-04 Regularization strength
# seed Integer -1 Seed for model initialization
# verbose Boolean FALSE Flag indicates if function
should print to stdout
#
----------------------------------------------------------------------------------------------------------------------
@@ -64,7 +64,7 @@ source("nn/layers/lenetForwardPass.dml") as lenet_fw
m_lenetTrain = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double]
X_val,
Matrix[Double] Y_val, Integer C, Integer Hin, Integer Win, Integer
batch_size=64,
- Integer epochs=20, Double lr=0.01, Double mu=0.9, Double decay=0.95, Double
lambda=5e-04,
+ Integer epochs=20, Double lr=0.01, Double mu=0.9, Double decay=0.95, Double
reg=5e-04,
Boolean verbose=FALSE, Integer seed=-1)
return (List[unknown] model)
{
@@ -126,7 +126,7 @@ m_lenetTrain = function(Matrix[Double] X, Matrix[Double] Y,
Matrix[Double] X_val
# Compute data backward pass
[dW1, db1, dW2, db2, dW3, db3, dW4, db4] = feed_backward(
- X_batch, C, Hin, Win, lambda, model, dprobs, cache)
+ X_batch, C, Hin, Win, reg, model, dprobs, cache)
# Optimize with SGD w/ Nesterov momentum
[W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1)
@@ -153,7 +153,7 @@ m_lenetTrain = function(Matrix[Double] X, Matrix[Double] Y,
Matrix[Double] X_val
}
feed_backward = function(Matrix[Double] X, Integer C, Integer Hin, Integer
Win,
- Double lambda,list[unknown] model, matrix[Double] dprobs, list[unknown]
cache)
+ Double reg,list[unknown] model, matrix[Double] dprobs, list[unknown] cache)
return (Matrix[Double] dW1, Matrix[Double] db1,
Matrix[Double] dW2, Matrix[Double] db2,
Matrix[Double] dW3, Matrix[Double] db3,
@@ -193,10 +193,10 @@ feed_backward = function(Matrix[Double] X, Integer C,
Integer Hin, Integer Win,
X, as.matrix(model["W1"]),
as.matrix(model["b1"]), C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
# Compute regularization backward pass
- dW1_reg = l2_reg::backward(as.matrix(model["W1"]), lambda)
- dW2_reg = l2_reg::backward(as.matrix(model["W2"]), lambda)
- dW3_reg = l2_reg::backward(as.matrix(model["W3"]), lambda)
- dW4_reg = l2_reg::backward(as.matrix(model["W4"]), lambda)
+ dW1_reg = l2_reg::backward(as.matrix(model["W1"]), reg)
+ dW2_reg = l2_reg::backward(as.matrix(model["W2"]), reg)
+ dW3_reg = l2_reg::backward(as.matrix(model["W3"]), reg)
+ dW4_reg = l2_reg::backward(as.matrix(model["W4"]), reg)
dW1 = dW1 + dW1_reg
dW2 = dW2 + dW2_reg
dW3 = dW3 + dW3_reg
diff --git a/scripts/builtin/mcc.dml b/scripts/builtin/mcc.dml
index 60456c7491..644ec37a00 100644
--- a/scripts/builtin/mcc.dml
+++ b/scripts/builtin/mcc.dml
@@ -37,7 +37,7 @@
# mattCC Double --- Matthews' Correlation Coefficient
#
---------------------------------------------------------------------------------------------
-m_mcc = function(Matrix[Double] predictions = matrix(0,0,0), Matrix[Double]
labels = matrix(0,0,0))
+m_mcc = function(Matrix[Double] predictions, Matrix[Double] labels)
return (Double mattCC)
{
# # validation checks
diff --git a/scripts/builtin/msvm.dml b/scripts/builtin/msvm.dml
index 921d694e44..3f2dbde51e 100644
--- a/scripts/builtin/msvm.dml
+++ b/scripts/builtin/msvm.dml
@@ -32,7 +32,7 @@
# num_classes integer 10 Number of classes
# epsilon Double 0.001 Procedure terminates early if
the reduction in objective function
# value is less than epsilon
(tolerance) times the initial objective function value.
-# lambda Double 1.0 Regularization parameter
(lambda) for L2 regularization
+# reg Double 1.0 Regularization parameter
(lambda) for L2 regularization
# maxIterations Int 100 Maximum number of conjugate
gradient iterations
# verbose Boolean False Set to true to print while
training.
#
----------------------------------------------------------------------------------------------------------------------
@@ -45,7 +45,7 @@
#-----------------------------------------------------------------------------------------------------------------------
m_msvm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept =
FALSE,
- Double epsilon = 0.001, Double lambda = 1.0, Integer maxIterations = 100,
+ Double epsilon = 0.001, Double reg = 1.0, Integer maxIterations = 100,
Boolean verbose = FALSE)
return(Matrix[Double] model)
{
@@ -76,7 +76,7 @@ m_msvm = function(Matrix[Double] X, Matrix[Double] Y, Boolean
intercept = FALSE,
parfor(class in 1:max(Y)) {
Y_local = 2 * (Y == class) - 1
w[,class] = l2svm(X=X, Y=Y_local, intercept=FALSE,
- epsilon=epsilon, lambda=lambda, maxIterations=maxIterations,
+ epsilon=epsilon, reg=reg, maxIterations=maxIterations,
verbose=verbose, columnId=class)
}
diff --git a/scripts/builtin/setdiff.dml b/scripts/builtin/setdiff.dml
index 3c7bdc8073..27721de79c 100644
--- a/scripts/builtin/setdiff.dml
+++ b/scripts/builtin/setdiff.dml
@@ -36,7 +36,7 @@
# R Matrix[Double] vector with all elements that are present
in X but not in Y
#
---------------------------------------------------------------------------------------------
-setdiff = function(Matrix[double] X, Matrix[double] Y)
+m_setdiff = function(Matrix[double] X, Matrix[double] Y)
return (matrix[double] R)
{
common = intersect(X, Y);
diff --git a/scripts/builtin/sherlock.dml b/scripts/builtin/sherlock.dml
index f82b02ab23..86b989075f 100644
--- a/scripts/builtin/sherlock.dml
+++ b/scripts/builtin/sherlock.dml
@@ -52,7 +52,7 @@
# fb Matrix[Double] biases vectors for combining all
trained features (final)
#
----------------------------------------------------------------------------------------------------------------------
-source("scripts/builtin/sherlockNet.dml") as sherlockNet
+source("scripts/nn/examples/sherlockNet.dml") as sherlockNet
m_sherlock = function(Matrix[Double] X_train, Matrix[Double] y_train)
return (Matrix[Double] cW1, Matrix[Double] cb1,
diff --git a/scripts/builtin/sherlockPredict.dml
b/scripts/builtin/sherlockPredict.dml
index 8765c98989..e69d0f2615 100644
--- a/scripts/builtin/sherlockPredict.dml
+++ b/scripts/builtin/sherlockPredict.dml
@@ -51,7 +51,7 @@
# probs Matrix[Double] class probabilities of shape (N, K)
#
----------------------------------------------------------------------------------------------------------------------
-source("scripts/builtin/sherlockNet.dml") as sherlockNet
+source("scripts/nn/examples/sherlockNet.dml") as sherlockNet
m_sherlockPredict = function(Matrix[Double] X,
Matrix[Double] cW1, Matrix[Double] cb1,
diff --git a/scripts/builtin/symmetricDifference.dml
b/scripts/builtin/symmetricDifference.dml
index dc18386fa1..7d875eaafd 100644
--- a/scripts/builtin/symmetricDifference.dml
+++ b/scripts/builtin/symmetricDifference.dml
@@ -36,7 +36,7 @@
# R Matrix[Double] vector with all elements in X and Y but
not in both
#
---------------------------------------------------------------------------------------------
-symmetricDifference = function(Matrix[Double] X, Matrix[Double] Y)
+m_symmetricDifference = function(Matrix[Double] X, Matrix[Double] Y)
return (matrix[double] R)
{
R = setdiff(union(X,Y), intersect(X,Y))
diff --git a/scripts/builtin/union.dml b/scripts/builtin/union.dml
index 73ce3c4202..ff191c2cf6 100644
--- a/scripts/builtin/union.dml
+++ b/scripts/builtin/union.dml
@@ -36,7 +36,7 @@
# R Matrix matrix with all unique rows existing in X and Y
#
---------------------------------------------------------------------------------------------
-union = function(Matrix[Double] X, Matrix[Double] Y)
+m_union = function(Matrix[Double] X, Matrix[Double] Y)
return (matrix[double] R)
{
R = unique(rbind(X, Y));
diff --git a/scripts/builtin/unique.dml b/scripts/builtin/unique.dml
index ac403753f9..491ac20d3a 100644
--- a/scripts/builtin/unique.dml
+++ b/scripts/builtin/unique.dml
@@ -35,7 +35,7 @@
# R Matrix[Double] matrix with only unique rows
#
---------------------------------------------------------------------------------------------
-unique = function(matrix[double] X)
+m_unique = function(matrix[double] X)
return (matrix[double] R)
{
R = X
diff --git a/scripts/builtin/sherlockNet.dml
b/scripts/nn/examples/sherlockNet.dml
similarity index 100%
rename from scripts/builtin/sherlockNet.dml
rename to scripts/nn/examples/sherlockNet.dml
diff --git a/scripts/perftest/scripts/alsDS.dml
b/scripts/perftest/scripts/alsDS.dml
index 2c3380c428..6334cc1af4 100755
--- a/scripts/perftest/scripts/alsDS.dml
+++ b/scripts/perftest/scripts/alsDS.dml
@@ -20,7 +20,7 @@
#-------------------------------------------------------------
rank = ifdef($rank, 10);
-lambda = ifdef($lambda, 0.000001);
+reg = ifdef($lambda, 0.000001);
maxiter = ifdef($maxiter, 50);
thr = ifdef($thr, 0.0001);
verbose = ifdef($verbose, TRUE);
@@ -31,7 +31,7 @@ check = ifdef($check, TRUE);
X = read($X);
-[U, V] = alsDS(X=X, rank=rank, lambda=lambda, maxi=maxiter, check=check,
thr=thr, verbose=verbose);
+[U, V] = alsDS(X=X, rank=rank, reg=reg, maxi=maxiter, check=check, thr=thr,
verbose=verbose);
write(U, $modelU, format=fmt);
write(V, $modelV, format=fmt);
diff --git a/scripts/perftest/scripts/m-svm.dml
b/scripts/perftest/scripts/m-svm.dml
index 028356e8f4..e0fd125f15 100755
--- a/scripts/perftest/scripts/m-svm.dml
+++ b/scripts/perftest/scripts/m-svm.dml
@@ -29,7 +29,7 @@ tol = as.double ($tol);
X = read($X)
Y = read($Y)
-model = msvm(X = X, Y = Y, intercept = icpt, epsilon = tol, lambda = reg,
maxIterations = maxiter, verbose = FALSE)
+model = msvm(X = X, Y = Y, intercept = icpt, epsilon = tol, reg = reg,
maxIterations = maxiter, verbose = FALSE)
extra_model_params = matrix(0, rows=2, cols=ncol(model))
extra_model_params[1, 1] = icpt
diff --git a/src/main/python/generator/generator.py
b/src/main/python/generator/generator.py
index f9a7a1917c..2a441c5cbb 100644
--- a/src/main/python/generator/generator.py
+++ b/src/main/python/generator/generator.py
@@ -19,12 +19,13 @@
#
# -------------------------------------------------------------
-from typing import Tuple, List
import json
import os
import re
-from parser import FunctionParser
+import sys
import traceback
+from parser import FunctionParser
+from typing import List, Tuple
class PythonAPIFileGenerator(object):
@@ -89,7 +90,7 @@ class PythonAPIFileGenerator(object):
init_file.write(self.init_import.format(function=f))
init_file.write("\n")
init_file.write(self.init_all.format(
- functions=self.function_names).replace(",",",\n"))
+ functions=self.function_names).replace(",", ",\n"))
class PythonAPIFunctionGenerator(object):
@@ -141,30 +142,57 @@ class PythonAPIFunctionGenerator(object):
function_name=function_name, parameters=parameters, header=header,
params_dict=params_dict, api_call=api_call)
+ def replace_types(self, item: str):
+ pattern = self.__class__.type_mapping_pattern
+ return self.__class__.type_mapping["type"].get(re.search(pattern, str(
+ item).lower()).group() if item else item.lower(), item)
+
def format_param_string(self, parameters: List[Tuple[str]], nameLength:
int) -> str:
- result = []
- has_optional = False
- path = os.path.dirname(__file__)
- newline_spacing = "\n" + " " * (nameLength + 5)
- for param in parameters:
- # map data types
- pattern = self.__class__.type_mapping_pattern
- param =
[self.__class__.type_mapping["type"].get(re.search(pattern, str(
- item).lower()).group() if item else str(item).lower(), item)
for item in param]
- if param[2] is not None:
- has_optional = True
+ try:
+ result = []
+ has_optional = False
+ path = os.path.dirname(__file__)
+ newline_spacing = "\n" + " " * (nameLength + 5)
+
+ for param in parameters:
+ # map data types
+ # pattern = self.__class__.type_mapping_pattern
+ # print(param)
+ param[1] = self.replace_types(param[1])
+ # print(param)
+ if "[" in param[1] or "[" in param[0]:
+ raise AttributeError(
+ "Failed parsing param" + str(param) + "\n" +
str(parameters))
+ if param[2] is not None:
+ has_optional = True
+ # result.append("{nl}{name}: {typ},".format(
+ # result=result, name=param[0], typ=param[1],
+ # nl=newline_spacing))
+ else:
+ # has_optional = False
+ result.append("{nl}{name}: {typ},".format(
+ result=result, name=param[0], typ=param[1],
+ nl=newline_spacing))
+ if len(result) == 0:
+ result = ""
+ # if has_optional:
+ # result = u"{kwargs}".format(
+ # result=result,
kwargs=self.__class__.kwargs_parameter_string,
+ # nl=newline_spacing)
else:
- result.append("{nl}{name}: {typ},".format(
- result=result, name=param[0], typ=param[1],
- nl=newline_spacing))
- result[0] = result[0][len(newline_spacing):]
- result[-1] = result[-1][:-1]
- result = "".join(result)
- if has_optional:
- result = u"{result},{nl}{kwargs}".format(
- result=result, kwargs=self.__class__.kwargs_parameter_string,
- nl=newline_spacing)
- return result
+ result[0] = result[0][len(newline_spacing):]
+ result[-1] = result[-1][:-1]
+ result = "".join(result)
+ if has_optional:
+ result = u"{result},{nl}{kwargs}".format(
+ result=result,
kwargs=self.__class__.kwargs_parameter_string,
+ nl=newline_spacing)
+
+ # print("\n\n" +str(parameters) + "\n\n " +result)
+ return result
+ except Exception as e:
+ raise AttributeError("Failed Formatting parameter strings: " +
+ str(parameters) + " " + format_exception(e))
def format_params_dict_string(self, parameters: List[Tuple[str]]) -> str:
if not len(parameters):
@@ -219,7 +247,8 @@ class PythonAPIFunctionGenerator(object):
if(output_type):
output_type = output_type[0].upper()
else:
- raise AttributeError("Error in pattern match")
+ raise AttributeError("Error in pattern match: " + str(value) +
"\n" +
+ function_name + "\n" + str(parameters) +
"\n" + str(return_values))
result = ("{sds_context}," +
"\n \'{function_name}\'," +
"\n named_input_nodes=params_dict").format(
@@ -317,6 +346,21 @@ class PythonAPIDocumentationGenerator(object):
return meaning_str
+def format_exception(e):
+ exception_list = traceback.format_stack()
+ exception_list = exception_list[:-2]
+ exception_list.extend(traceback.format_tb(sys.exc_info()[2]))
+ exception_list.extend(traceback.format_exception_only(
+ sys.exc_info()[0], sys.exc_info()[1]))
+
+ exception_str = "Traceback (most recent call last):\n"
+ exception_str += "".join(exception_list)
+ # Removing the last \n
+ exception_str = exception_str[:-1]
+
+ return exception_str
+
+
if __name__ == "__main__":
if "python" in os.getcwd():
source_path = os.path.join("../../../", 'scripts', 'builtin')
@@ -337,9 +381,8 @@ if __name__ == "__main__":
header_data)
script_content = fun_generator.generate_function(data)
except Exception as e:
- traceback.print_exc()
- print("[ERROR] error in : \'{file_name}\'.".format(
- file_name=dml_file))
+ print("[ERROR] error in : \'{file_name}\' \n{err}
\n{trace}.".format(
+ file_name=dml_file, err=e, trace=format_exception(e)))
continue
file_generator.generate_file(
data["function_name"], script_content, dml_file)
diff --git a/src/main/python/generator/parser.py
b/src/main/python/generator/parser.py
index 135d85811c..72b5b73671 100644
--- a/src/main/python/generator/parser.py
+++ b/src/main/python/generator/parser.py
@@ -20,16 +20,16 @@
# -------------------------------------------------------------
+import json
import os
import re
-import json
class FunctionParser(object):
header_input_pattern = r"^[ \t\n]*[#]+[ \t\n]*input[
\t\n\w:;.,#]*[\s#\-]*[#]+[\w\s\d:,.()\" \t\n\-]*[\s#\-]*$"
header_output_pattern = r"[\s#\-]*[#]+[ \t]*(return|output)[
\t\w:;.,#]*[\s#\-]*[#]+[\w\s\d:,.()\" \t\-]*[\s#\-]*$"
- function_pattern = r"^m_[\w]+[ \t\n]+=[ \t\n]+function[^#{]*"
- parameter_pattern =
r"^m_[\w]+[\s]+=[\s]+function[\s]*\([\s]*([\w\[\]\s,\d=.\-'\"_\.]*)[\s]*\)[\s]*return[\s]*\([\s]*([\w\[\]\s,\d=.\-_]*)[\s]*\)[\s]*"
+ function_pattern = r"^[ms]_[\w]+[ \t\n]*=[ \t\n]+function[^#{]*"
+ # parameter_pattern =
r"^m_[\w]+[\s]+=[\s]+function[\s]*\([\s]*(?=return)[\s]*\)[\s]*return[\s]*\([\s]*([\w\[\]\s,\d=.\-_]*)[\s]*\)[\s]*"
header_parameter_pattern = r"[\s#\-]*[#]+[
\t]*([\w|-]+)[\s]+([\w]+)[\s]+([\w,\d.\"\-]+)[\s]+([\w|\W]+)"
divider_pattern = r"[\s#\-]*"
@@ -57,31 +57,71 @@ class FunctionParser(object):
"""
file_name = os.path.basename(path)
function_name, extension = os.path.splitext(file_name)
+ # try:
function_definition = self.find_function_definition(path)
- pattern = re.compile(
- self.__class__.parameter_pattern, flags=re.I | re.M)
- match = pattern.match(function_definition)
- if match:
- param_str, retval_str = match.group(1, 2)
+ # pattern = re.compile(
+ # self.__class__.parameter_pattern, flags=re.I | re.M)
+ # match = pattern.match(function_definition)
+
+ # if match:
+
+ func_split = function_definition.split("function")[1].split("return")
+ param_str, retval_str = self.extract_param_str(
+ func_split[0]), self.extract_param_str(func_split[1])
+ if param_str:
parameters = self.get_parameters(param_str)
return_values = self.get_parameters(retval_str)
- else:
- # TODO handle default matrix variables.
- raise AttributeError("Unable to match to function definition:\n" +
function_definition)
- data = {'function_name': function_name,
+ data = {'function_name': function_name,
'parameters': parameters, 'return_values': return_values}
- return data
+ if parameters and return_values:
+ return data
+ else:
+ raise AttributeError("Unable to match to function
definition:\n" + function_definition +
+ "\n parameter_str: " + param_str + "\n
retVal: " + retval_str)
+ else:
+ raise AttributeError("Unable to match to function definition:\n" +
function_definition +
+ "\n parameter_str: " + param_str + "\n
retVal: " + retval_str)
+ # else:
+ # # TODO handle default matrix variables.
+ # raise AttributeError("Unable to match to function definition:\n"
+ function_definition)
+ # except Exception as e:
+ # import generator
+ # raise AttributeError("Unable to parse " + path + " " +
generator.format_exception(e))
+
+ def extract_param_str(self, a: str):
+ try:
+ return a[a.index("(") + 1: a.rindex(")")]
+ except:
+ raise AttributeError("failed extracting from: " + a)
def get_parameters(self, param_str: str):
-
+
params = re.split(r",[\s]*", param_str)
-
- parameters = []
+
+ paramsCombined = []
+ inside = 0
+
for param in params:
- parameters.append(self.parse_single_parameter(param))
+ before = inside
+ start = param.count("(")
+ end = param.count(")")
+ inside += start - end
+ if before > 0:
+ if inside > 0:
+ paramsCombined[-1] += param + ","
+ else:
+ paramsCombined[-1] += param + ","
+ else:
+ paramsCombined.append(param)
+
+ parameters = []
+
+ for param in paramsCombined:
+ parameters.append(self.parse_single_parameter(param.strip()))
return parameters
def parse_single_parameter(self, param: str):
+ # try:
splitted = re.split(r"[\s]+", param)
dml_type = splitted[0]
name = splitted[1]
@@ -94,7 +134,16 @@ class FunctionParser(object):
default_split = name.split("=")
name = default_split[0]
default_value = default_split[1]
- return (name, dml_type, default_value)
+ if default_value is None:
+ raise AttributeError("Failed parsing " + param)
+
+ if "(" in name or "=" in name or "]" in name or "=" in dml_type:
+ raise AttributeError("failed Parsing " +
+ param + " " + str(splitted))
+ return [name, dml_type, default_value]
+ # except Exception as e:
+ # import generator
+ # raise AttributeError("Failed parsing " + param + " " +
generator.format_exception(e))
def get_header_parameters(self, param_str: str):
parameters = list()
@@ -109,7 +158,6 @@ class FunctionParser(object):
except Exception as e:
if re.search(pattern=self.__class__.divider_pattern,
string=param_line, flags=re.I | re.M) is not None:
continue
- print(e)
return parameters
return parameters
@@ -196,9 +244,9 @@ class FunctionParser(object):
header_param_names = [p[0].lower() for p in header["parameters"]]
data_param_names = [p[0].lower() for p in data["parameters"]]
- if header_param_names != data_param_names:
- print("[WARNING] The parameter names of the function does not
match with the documentation "
- "for file
\'{file_name}\'.".format(file_name=data["function_name"]))
+ # if header_param_names != data_param_names:
+ # print("[WARNING] The parameter names of the function does not match
with the documentation "
+ # "for file
\'{file_name}\'.".format(file_name=data["function_name"]))
header_param_type = [p[1].lower() for p in header["parameters"]]
header_param_type = [type_mapping["type"].get(
@@ -209,6 +257,6 @@ class FunctionParser(object):
re.search(type_mapping_pattern, str(item).lower()).group() if item
else str(item).lower(), item)
for item in data_param_type]
- if header_param_type != data_param_type:
- print("[WARNING] The parameter type of the function does not match
with the documentation "
- "for file
\'{file_name}\'.".format(file_name=data["function_name"]))
+ # if header_param_type != data_param_type:
+ # print("[WARNING] The parameter type of the function does not
match with the documentation "
+ # "for file
\'{file_name}\'.".format(file_name=data["function_name"]))
diff --git a/src/main/python/generator/resources/type_mapping.json
b/src/main/python/generator/resources/type_mapping.json
index 7d2041a030..c45eaec5ad 100644
--- a/src/main/python/generator/resources/type_mapping.json
+++ b/src/main/python/generator/resources/type_mapping.json
@@ -5,9 +5,11 @@
"frame": "Frame",
"boolean": "bool",
"integer": "int",
+ "int": "int",
+ "scalar":"float",
"double": "float",
"string": "str",
- "list": "Iterable"
+ "list": "List"
},
"default": {
"---": "None"
diff --git a/src/main/python/systemds/operator/algorithm/__init__.py
b/src/main/python/systemds/operator/algorithm/__init__.py
index 5e79d071ea..feb5342ecc 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -29,11 +29,16 @@ from .builtin.alsCG import alsCG
from .builtin.alsDS import alsDS
from .builtin.alsPredict import alsPredict
from .builtin.alsTopkPredict import alsTopkPredict
+from .builtin.apply_pipeline import apply_pipeline
from .builtin.arima import arima
+from .builtin.autoencoder_2layer import autoencoder_2layer
+from .builtin.bandit import bandit
from .builtin.bivar import bivar
from .builtin.components import components
from .builtin.confusionMatrix import confusionMatrix
from .builtin.cor import cor
+from .builtin.correctTypos import correctTypos
+from .builtin.correctTyposApply import correctTyposApply
from .builtin.cox import cox
from .builtin.cspline import cspline
from .builtin.csplineCG import csplineCG
@@ -44,19 +49,29 @@ from .builtin.dbscanApply import dbscanApply
from .builtin.decisionTree import decisionTree
from .builtin.decisionTreePredict import decisionTreePredict
from .builtin.deepWalk import deepWalk
+from .builtin.denialConstraints import denialConstraints
from .builtin.discoverFD import discoverFD
from .builtin.dist import dist
+from .builtin.dmv import dmv
+from .builtin.ema import ema
from .builtin.executePipeline import executePipeline
+from .builtin.ffPredict import ffPredict
from .builtin.ffTrain import ffTrain
+from .builtin.fit_pipeline import fit_pipeline
+from .builtin.fixInvalidLengths import fixInvalidLengths
+from .builtin.fixInvalidLengthsApply import fixInvalidLengthsApply
+from .builtin.frameSort import frameSort
from .builtin.frequencyEncode import frequencyEncode
from .builtin.frequencyEncodeApply import frequencyEncodeApply
from .builtin.garch import garch
from .builtin.gaussianClassifier import gaussianClassifier
from .builtin.getAccuracy import getAccuracy
from .builtin.glm import glm
+from .builtin.glmPredict import glmPredict
from .builtin.gmm import gmm
from .builtin.gmmPredict import gmmPredict
from .builtin.gnmf import gnmf
+from .builtin.gridSearch import gridSearch
from .builtin.hospitalResidencyMatch import hospitalResidencyMatch
from .builtin.hyperband import hyperband
from .builtin.img_brightness import img_brightness
@@ -83,17 +98,23 @@ from .builtin.intersect import intersect
from .builtin.km import km
from .builtin.kmeans import kmeans
from .builtin.kmeansPredict import kmeansPredict
+from .builtin.knn import knn
from .builtin.knnGraph import knnGraph
from .builtin.knnbf import knnbf
from .builtin.l2svm import l2svm
from .builtin.l2svmPredict import l2svmPredict
from .builtin.lasso import lasso
+from .builtin.lenetPredict import lenetPredict
from .builtin.lenetTrain import lenetTrain
from .builtin.lm import lm
from .builtin.lmCG import lmCG
from .builtin.lmDS import lmDS
+from .builtin.lmPredict import lmPredict
from .builtin.logSumExp import logSumExp
from .builtin.matrixProfile import matrixProfile
+from .builtin.mcc import mcc
+from .builtin.mdedup import mdedup
+from .builtin.mice import mice
from .builtin.miceApply import miceApply
from .builtin.msvm import msvm
from .builtin.msvmPredict import msvmPredict
@@ -111,6 +132,8 @@ from .builtin.outlierByIQRApply import outlierByIQRApply
from .builtin.outlierBySd import outlierBySd
from .builtin.outlierBySdApply import outlierBySdApply
from .builtin.pca import pca
+from .builtin.pcaInverse import pcaInverse
+from .builtin.pcaTransform import pcaTransform
from .builtin.pnmf import pnmf
from .builtin.ppca import ppca
from .builtin.randomForest import randomForest
@@ -118,6 +141,7 @@ from .builtin.scale import scale
from .builtin.scaleApply import scaleApply
from .builtin.scaleMinMax import scaleMinMax
from .builtin.selectByVarThresh import selectByVarThresh
+from .builtin.setdiff import setdiff
from .builtin.sherlock import sherlock
from .builtin.sherlockPredict import sherlockPredict
from .builtin.shortestPath import shortestPath
@@ -130,16 +154,22 @@ from .builtin.splitBalanced import splitBalanced
from .builtin.stableMarriage import stableMarriage
from .builtin.statsNA import statsNA
from .builtin.steplm import steplm
+from .builtin.stratstats import stratstats
+from .builtin.symmetricDifference import symmetricDifference
from .builtin.tSNE import tSNE
from .builtin.toOneHot import toOneHot
from .builtin.tomeklink import tomeklink
+from .builtin.topk_cleaning import topk_cleaning
from .builtin.underSampling import underSampling
+from .builtin.union import union
+from .builtin.unique import unique
from .builtin.univar import univar
from .builtin.vectorToCsv import vectorToCsv
from .builtin.winsorize import winsorize
from .builtin.winsorizeApply import winsorizeApply
from .builtin.xdummy1 import xdummy1
from .builtin.xdummy2 import xdummy2
+from .builtin.xgboost import xgboost
from .builtin.xgboostPredictClassification import xgboostPredictClassification
from .builtin.xgboostPredictRegression import xgboostPredictRegression
@@ -151,11 +181,16 @@ __all__ = ['WoE',
'alsDS',
'alsPredict',
'alsTopkPredict',
+ 'apply_pipeline',
'arima',
+ 'autoencoder_2layer',
+ 'bandit',
'bivar',
'components',
'confusionMatrix',
'cor',
+ 'correctTypos',
+ 'correctTyposApply',
'cox',
'cspline',
'csplineCG',
@@ -166,19 +201,29 @@ __all__ = ['WoE',
'decisionTree',
'decisionTreePredict',
'deepWalk',
+ 'denialConstraints',
'discoverFD',
'dist',
+ 'dmv',
+ 'ema',
'executePipeline',
+ 'ffPredict',
'ffTrain',
+ 'fit_pipeline',
+ 'fixInvalidLengths',
+ 'fixInvalidLengthsApply',
+ 'frameSort',
'frequencyEncode',
'frequencyEncodeApply',
'garch',
'gaussianClassifier',
'getAccuracy',
'glm',
+ 'glmPredict',
'gmm',
'gmmPredict',
'gnmf',
+ 'gridSearch',
'hospitalResidencyMatch',
'hyperband',
'img_brightness',
@@ -205,17 +250,23 @@ __all__ = ['WoE',
'km',
'kmeans',
'kmeansPredict',
+ 'knn',
'knnGraph',
'knnbf',
'l2svm',
'l2svmPredict',
'lasso',
+ 'lenetPredict',
'lenetTrain',
'lm',
'lmCG',
'lmDS',
+ 'lmPredict',
'logSumExp',
'matrixProfile',
+ 'mcc',
+ 'mdedup',
+ 'mice',
'miceApply',
'msvm',
'msvmPredict',
@@ -233,6 +284,8 @@ __all__ = ['WoE',
'outlierBySd',
'outlierBySdApply',
'pca',
+ 'pcaInverse',
+ 'pcaTransform',
'pnmf',
'ppca',
'randomForest',
@@ -240,6 +293,7 @@ __all__ = ['WoE',
'scaleApply',
'scaleMinMax',
'selectByVarThresh',
+ 'setdiff',
'sherlock',
'sherlockPredict',
'shortestPath',
@@ -252,15 +306,21 @@ __all__ = ['WoE',
'stableMarriage',
'statsNA',
'steplm',
+ 'stratstats',
+ 'symmetricDifference',
'tSNE',
'toOneHot',
'tomeklink',
+ 'topk_cleaning',
'underSampling',
+ 'union',
+ 'unique',
'univar',
'vectorToCsv',
'winsorize',
'winsorizeApply',
'xdummy1',
'xdummy2',
+ 'xgboost',
'xgboostPredictClassification',
'xgboostPredictRegression']
diff --git a/src/main/python/systemds/operator/algorithm/builtin/als.py
b/src/main/python/systemds/operator/algorithm/builtin/als.py
index d532c68b45..5357ea6a81 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/als.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/als.py
@@ -33,8 +33,8 @@ def als(X: Matrix,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
:param rank: Rank of the factorization
- :param reg: Regularization:
- :param lambda: Regularization parameter, no regularization if 0.0
+ :param regType: Regularization:
+ :param reg: Regularization parameter, no regularization if 0.0
:param maxi: Maximum number of iterations
:param check: Check for convergence after every iteration, i.e., updating
U and V once
:param thr: Assuming check is set to TRUE, the algorithm stops and
convergence is declared
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsCG.py
b/src/main/python/systemds/operator/algorithm/builtin/alsCG.py
index a54874a105..bde4133bc6 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsCG.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/alsCG.py
@@ -33,8 +33,8 @@ def alsCG(X: Matrix,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
:param rank: Rank of the factorization
- :param reg: Regularization:
- :param lambda: Regularization parameter, no regularization if 0.0
+ :param regType: Regularization:
+ :param reg: Regularization parameter, no regularization if 0.0
:param maxi: Maximum number of iterations
:param check: Check for convergence after every iteration, i.e., updating
U and V once
:param thr: Assuming check is set to TRUE, the algorithm stops and
convergence is declared
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
b/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
index f1b03c8394..cba1d29f88 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
@@ -33,7 +33,7 @@ def alsDS(X: Matrix,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
:param rank: Rank of the factorization
- :param lambda: Regularization parameter, no regularization if 0.0
+ :param reg: Regularization parameter, no regularization if 0.0
:param maxi: Maximum number of iterations
:param check: Check for convergence after every iteration, i.e., updating
L and R once
:param thr: Assuming check is set to TRUE, the algorithm stops and
convergence is declared
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
b/src/main/python/systemds/operator/algorithm/builtin/applyAndEvaluate.py
similarity index 60%
copy from src/main/python/systemds/operator/algorithm/builtin/alsDS.py
copy to src/main/python/systemds/operator/algorithm/builtin/applyAndEvaluate.py
index f1b03c8394..3a968bb267 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/applyAndEvaluate.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/alsDS.dml
+# Autogenerated From : scripts/builtin/applyAndEvaluate.dml
from typing import Dict, Iterable
@@ -29,28 +29,27 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def alsDS(X: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
- """
- :param rank: Rank of the factorization
- :param lambda: Regularization parameter, no regularization if 0.0
- :param maxi: Maximum number of iterations
- :param check: Check for convergence after every iteration, i.e., updating
L and R once
- :param thr: Assuming check is set to TRUE, the algorithm stops and
convergence is declared
- :param if: in loss in any two consecutive iterations falls below this
threshold;
- :param if: FALSE thr is ignored
- :return: 'OperationNode' containing
- """
- params_dict = {'X': X}
+def applyAndEvaluate(trainData: Frame,
+ testData: Frame,
+ pip: Frame,
+ applyFunc: Frame,
+ hp: Matrix,
+ evaluationFunc: str,
+ evalFunHp: Matrix,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
+
+ params_dict = {'trainData': trainData, 'testData': testData, 'pip': pip,
'applyFunc': applyFunc, 'hp': hp, 'evaluationFunc': evaluationFunc,
'evalFunHp': evalFunHp}
params_dict.update(kwargs)
- vX_0 = Matrix(X.sds_context, '')
- vX_1 = Matrix(X.sds_context, '')
- output_nodes = [vX_0, vX_1, ]
+ vX_0 = Matrix(trainData.sds_context, '')
+ vX_1 = Matrix(trainData.sds_context, '')
+ vX_2 = Matrix(trainData.sds_context, '')
+ output_nodes = [vX_0, vX_1, vX_2, ]
- op = MultiReturn(X.sds_context, 'alsDS', output_nodes,
named_input_nodes=params_dict)
+ op = MultiReturn(trainData.sds_context, 'applyAndEvaluate', output_nodes,
named_input_nodes=params_dict)
vX_0._unnamed_input_nodes = [op]
vX_1._unnamed_input_nodes = [op]
+ vX_2._unnamed_input_nodes = [op]
return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
similarity index 71%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
index 612887be37..fa52482eae 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/apply_pipeline.dml
from typing import Dict, Iterable
@@ -29,13 +29,16 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def apply_pipeline(testData: Frame,
+ pip: Frame,
+ applyFunc: Frame,
+ hp: Matrix,
+ exState: List,
+ iState: List,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
+ params_dict = {'testData': testData, 'pip': pip, 'applyFunc': applyFunc,
'hp': hp, 'exState': exState, 'iState': iState}
params_dict.update(kwargs)
- return Matrix(X.sds_context,
- 'lmPredict',
+ return Matrix(testData.sds_context,
+ 'apply_pipeline',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gmm.py
b/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py
similarity index 54%
copy from src/main/python/systemds/operator/algorithm/builtin/gmm.py
copy to
src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py
index 350ee0a835..3f3a061170 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gmm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/gmm.dml
+# Autogenerated From : scripts/builtin/autoencoder_2layer.dml
from typing import Dict, Iterable
@@ -29,31 +29,39 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def gmm(X: Matrix,
- verbose: bool,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def autoencoder_2layer(X: Matrix,
+ num_hidden1: int,
+ num_hidden2: int,
+ max_epochs: int,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
- :param n_components: Number of n_components in the Gaussian mixture model
- :param model: "VVV": unequal variance (full),each component has its own
general covariance matrix
- :param init_param: initialize weights with "kmeans" or "random"
- :param iterations: Number of iterations
- :param reg_covar: regularization parameter for covariance matrix
- :param tol: tolerance value for convergence
- :return: 'OperationNode' containing of estimated parameters & information
criterion for best iteration & kth class
+ :param num_hidden1: Number of neurons in the 1st hidden layer
+ :param num_hidden2: Number of neurons in the 2nd hidden layer
+ :param max_epochs: Number of epochs to train for
+ :param full_obj: If TRUE, Computes objective function value (squared-loss)
+ :param at: of each epoch. Note that, computing the full
+ :param objective: a lot of time.
+ :param batch_size: Mini-batch size (training parameter)
+ :param step: Initial step size (training parameter)
+ :param decay: Decays step size after each epoch (training parameter)
+ :param mu: Momentum parameter (training parameter)
+ :return: 'OperationNode' containing
"""
- params_dict = {'X': X, 'verbose': verbose}
+ params_dict = {'X': X, 'num_hidden1': num_hidden1, 'num_hidden2':
num_hidden2, 'max_epochs': max_epochs}
params_dict.update(kwargs)
vX_0 = Matrix(X.sds_context, '')
vX_1 = Matrix(X.sds_context, '')
- vX_2 = Scalar(X.sds_context, '')
- vX_3 = Scalar(X.sds_context, '')
+ vX_2 = Matrix(X.sds_context, '')
+ vX_3 = Matrix(X.sds_context, '')
vX_4 = Matrix(X.sds_context, '')
vX_5 = Matrix(X.sds_context, '')
vX_6 = Matrix(X.sds_context, '')
- output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, vX_6, ]
+ vX_7 = Matrix(X.sds_context, '')
+ vX_8 = Matrix(X.sds_context, '')
+ output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, vX_6, vX_7, vX_8, ]
- op = MultiReturn(X.sds_context, 'gmm', output_nodes,
named_input_nodes=params_dict)
+ op = MultiReturn(X.sds_context, 'autoencoder_2layer', output_nodes,
named_input_nodes=params_dict)
vX_0._unnamed_input_nodes = [op]
vX_1._unnamed_input_nodes = [op]
@@ -62,5 +70,7 @@ def gmm(X: Matrix,
vX_4._unnamed_input_nodes = [op]
vX_5._unnamed_input_nodes = [op]
vX_6._unnamed_input_nodes = [op]
+ vX_7._unnamed_input_nodes = [op]
+ vX_8._unnamed_input_nodes = [op]
return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/bandit.py
b/src/main/python/systemds/operator/algorithm/builtin/bandit.py
index 5cb87b5497..4adf73c760 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/bandit.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/bandit.py
@@ -33,18 +33,31 @@ def bandit(X_train: Matrix,
Y_train: Matrix,
X_test: Matrix,
Y_test: Matrix,
- metaList: Iterable,
+ metaList: List,
evaluationFunc: str,
evalFunHp: Matrix,
lp: Frame,
+ lpHp: Matrix,
primitives: Frame,
param: Frame,
baseLineScore: float,
cv: bool,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
- params_dict = {'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test,
'Y_test': Y_test, 'metaList': metaList, 'evaluationFunc': evaluationFunc,
'evalFunHp': evalFunHp, 'lp': lp, 'primitives': primitives, 'param': param,
'baseLineScore': baseLineScore, 'cv': cv}
+ params_dict = {'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test,
'Y_test': Y_test, 'metaList': metaList, 'evaluationFunc': evaluationFunc,
'evalFunHp': evalFunHp, 'lp': lp, 'lpHp': lpHp, 'primitives': primitives,
'param': param, 'baseLineScore': baseLineScore, 'cv': cv}
params_dict.update(kwargs)
- return Matrix(X_train.sds_context,
- 'bandit',
- named_input_nodes=params_dict)
+
+ vX_0 = Frame(X_train.sds_context, '')
+ vX_1 = Matrix(X_train.sds_context, '')
+ vX_2 = Matrix(X_train.sds_context, '')
+ vX_3 = Frame(X_train.sds_context, '')
+ output_nodes = [vX_0, vX_1, vX_2, vX_3, ]
+
+ op = MultiReturn(X_train.sds_context, 'bandit', output_nodes,
named_input_nodes=params_dict)
+
+ vX_0._unnamed_input_nodes = [op]
+ vX_1._unnamed_input_nodes = [op]
+ vX_2._unnamed_input_nodes = [op]
+ vX_3._unnamed_input_nodes = [op]
+
+ return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
b/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
similarity index 60%
copy from src/main/python/systemds/operator/algorithm/builtin/hyperband.py
copy to src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
index 4a0065dc06..acbd0f9448 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/hyperband.dml
+# Autogenerated From : scripts/builtin/correctTypos.dml
from typing import Dict, Iterable
@@ -29,28 +29,30 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def hyperband(X_train: Matrix,
- y_train: Matrix,
- X_val: Matrix,
- y_val: Matrix,
- params: Iterable,
- paramRanges: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def correctTypos(strings: Frame,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
- :param One: hyper parameter, first column specifies min, second column max
value.
- :param verbose: If TRUE print messages are activated
+ :param frequency_threshold: Strings that occur above this frequency level
will not be corrected
+ :param distance_threshold: Max distance at which strings are considered
similar
+ :param is_verbose: Print debug information
:return: 'OperationNode' containing
"""
- params_dict = {'X_train': X_train, 'y_train': y_train, 'X_val': X_val,
'y_val': y_val, 'params': params, 'paramRanges': paramRanges}
+ params_dict = {'strings': strings}
params_dict.update(kwargs)
- vX_0 = Matrix(X_train.sds_context, '')
- vX_1 = Frame(X_train.sds_context, '')
- output_nodes = [vX_0, vX_1, ]
+ vX_0 = Frame(strings.sds_context, '')
+ vX_1 = Scalar(strings.sds_context, '')
+ vX_2 = Scalar(strings.sds_context, '')
+ vX_3 = Matrix(strings.sds_context, '')
+ vX_4 = Frame(strings.sds_context, '')
+ output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, ]
- op = MultiReturn(X_train.sds_context, 'hyperband', output_nodes,
named_input_nodes=params_dict)
+ op = MultiReturn(strings.sds_context, 'correctTypos', output_nodes,
named_input_nodes=params_dict)
vX_0._unnamed_input_nodes = [op]
vX_1._unnamed_input_nodes = [op]
+ vX_2._unnamed_input_nodes = [op]
+ vX_3._unnamed_input_nodes = [op]
+ vX_4._unnamed_input_nodes = [op]
return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
similarity index 66%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
index 612887be37..3aa4c0e2d4 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/correctTyposApply.dml
from typing import Dict, Iterable
@@ -29,13 +29,17 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
-
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
+def correctTyposApply(strings: Frame,
+ distance_matrix: Matrix,
+ dict: Frame,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
+ """
+ :param frequency_threshold: Strings that occur above this frequency level
will not be corrected
+ :param distance_threshold: Max distance at which strings are considered
similar
+ :return: 'OperationNode' containing
+ """
+ params_dict = {'strings': strings, 'distance_matrix': distance_matrix,
'dict': dict}
params_dict.update(kwargs)
- return Matrix(X.sds_context,
- 'lmPredict',
+ return Matrix(strings.sds_context,
+ 'correctTyposApply',
named_input_nodes=params_dict)
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
index 6d73186122..94da3da45c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
@@ -32,7 +32,6 @@ from systemds.utils.consts import VALID_INPUT_TYPES
def decisionTree(X: Matrix,
Y: Matrix,
R: Matrix,
- verbose: bool,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
:param a: vector, other positive Integers indicate the number of categories
@@ -42,7 +41,7 @@ def decisionTree(X: Matrix,
:param verbose: boolean specifying if the algorithm should print
information while executing
:return: 'OperationNode' containing information: & if the feature is
categorical) & looks at if j is an internal node, otherwise 0 & as r input
vector & of the subset of values & 6,7,... if j is categorical & a leaf node:
number of misclassified samples reaching at node j & at m[6,j] if the feature
chosen for j is scale, & feature chosen for j is categorical rows 6,7,...
depict the value subset chosen for j & a leaf node 1 if j is impure and the
number of samples at j > threshold, ot [...]
"""
- params_dict = {'X': X, 'Y': Y, 'R': R, 'verbose': verbose}
+ params_dict = {'X': X, 'Y': Y, 'R': R}
params_dict.update(kwargs)
return Matrix(X.sds_context,
'decisionTree',
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
b/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
new file mode 100644
index 0000000000..b2bb53c59b
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
@@ -0,0 +1,65 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/denialConstraints.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn,
Scalar
+from systemds.script_building.dag import OutputType
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def denialConstraints(dataFrame: Frame,
+ constraintsFrame: Frame):
+ """
+ :param dataFrame: frame which columns represent the variables of the data
and the rows correspond
+ :param to: or instances.
+ :param Recommended: a column indexing the instances from 1 to N (N=number
of instances).
+ :param constraintsFrame: frame with fixed columns and each row
representing one constraint.
+ :param ie: value of the variable 1 in instance 1 is lower/higher than the
value of variable 1 in instance 2,
+ :param then: of of variable 2 in instance 2 can't be lower/higher than the
value of variable 2 in instance 2.
+ :param in: of instanceCompare
+ :param rank: yrs.service sex salary
+ :param 1: 19 18 Male 139750
+ :param 2: 20 16 Male 173200
+ :param 3: 3 3 Male 79750.56
+ :param 4: 45 39 Male 115000
+ :param 5: 40 40 Male 141500
+ :param 6: 6 6 Male 97000
+ :param 7: 30 23 Male 175000
+ :param 8: 45 45 Male 147765
+ :param 9: 21 20 Male 119250
+ :param 10: 18 18 Female 129000
+ :param 11: 12 8 Male 119800
+ :param 12: 7 2 Male 79800
+ :param 13: 1 1 Male 77700
+ :param 1: yrs.since.phd < yrs.service
+ :param 2: rank Prof yrs.service ><
salary
+ :param 3: salary = 78182
+ :param 4: discipline B yrs.service >
yrs.since.phd
+ :return: 'OperationNode' containing shows the indexes of dataframe that
are wrong. & shows the index of the denial constraint that is fulfilled & no
wrong instances to show (0 constrains fulfilled) -->
wronginstances=matrix(0,1,2)
+ """
+ params_dict = {'dataFrame': dataFrame, 'constraintsFrame':
constraintsFrame}
+ return Matrix(dataFrame.sds_context,
+ 'denialConstraints',
+ named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/dmv.py
similarity index 76%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/dmv.py
index 612887be37..2a6eaa4952 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/dmv.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/dmv.dml
from typing import Dict, Iterable
@@ -29,13 +29,15 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
-
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
+def dmv(X: Frame,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
+ """
+ :param threshold: Threshold value in interval [0, 1] for dominant pattern
per column (e.g., 0.8 means
+ :param replace: The string disguised missing values are replaced with
+ :return: 'OperationNode' containing
+ """
+ params_dict = {'X': X}
params_dict.update(kwargs)
return Matrix(X.sds_context,
- 'lmPredict',
+ 'dmv',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/msvm.py
b/src/main/python/systemds/operator/algorithm/builtin/ema.py
similarity index 64%
copy from src/main/python/systemds/operator/algorithm/builtin/msvm.py
copy to src/main/python/systemds/operator/algorithm/builtin/ema.py
index d9a2791ccc..ab4ba1a675 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/msvm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ema.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/msvm.dml
+# Autogenerated From : scripts/builtin/ema.dml
from typing import Dict, Iterable
@@ -29,21 +29,24 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def msvm(X: Matrix,
- Y: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def ema(X: Frame,
+ search_iterations: int,
+ mode: str,
+ freq: int,
+ alpha: float,
+ beta: float,
+ gamma: float):
"""
- :param intercept: No Intercept ( If set to TRUE then a constant bias
column is added to X)
- :param num_classes: Number of classes
- :param epsilon: Procedure terminates early if the reduction in objective
function
- :param value: than epsilon (tolerance) times the initial objective
function value.
- :param lambda: Regularization parameter (lambda) for L2 regularization
- :param maxIterations: Maximum number of conjugate gradient iterations
- :param verbose: Set to true to print while training.
+ :param search_iterations: Budget iterations for parameter optimisation,
+ :param used: weren't set
+ :param mode: Type of EMA method. Either "single", "double" or "triple"
+ :param freq: Seasonality when using triple EMA.
+ :param alpha: alpha- value for EMA
+ :param beta: beta- value for EMA
+ :param gamma: gamma- value for EMA
:return: 'OperationNode' containing
"""
- params_dict = {'X': X, 'Y': Y}
- params_dict.update(kwargs)
+ params_dict = {'X': X, 'search_iterations': search_iterations, 'mode':
mode, 'freq': freq, 'alpha': alpha, 'beta': beta, 'gamma': gamma}
return Matrix(X.sds_context,
- 'msvm',
+ 'ema',
named_input_nodes=params_dict)
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
b/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
index 470d152ad3..31235e5910 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
@@ -29,23 +29,45 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def executePipeline(X: Matrix):
+def executePipeline(pipeline: Frame,
+ Xtrain: Matrix,
+ Ytrain: Matrix,
+ Xtest: Matrix,
+ Ytest: Matrix,
+ metaList: List,
+ hyperParameters: Matrix,
+ flagsCount: int,
+ verbose: bool,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
:param flagsCount: ---
:param test: ---
- :return: 'OperationNode' containing validation check & convert the matrix
row-vector into list & flag & append flag & append flag & append flag & of
hyper-parameters and loop till that & flag & and remove categorical & and
remove numerics & + 1 for nan replacement & matrix & matrix & ohe call, to call
inside eval as a function & encoding of categorical features & features & ohe
call, to call inside eval as a function & to call inside eval as a function &
doing relative over-samplin [...]
+ :return: 'OperationNode' containing
"""
- params_dict = {'X': X}
+ params_dict = {'pipeline': pipeline, 'Xtrain': Xtrain, 'Ytrain': Ytrain,
'Xtest': Xtest, 'Ytest': Ytest, 'metaList': metaList, 'hyperParameters':
hyperParameters, 'flagsCount': flagsCount, 'verbose': verbose}
+ params_dict.update(kwargs)
- vX_0 = Matrix(X.sds_context, '')
- vX_1 = Matrix(X.sds_context, '')
- vX_2 = Matrix(X.sds_context, '')
- output_nodes = [vX_0, vX_1, vX_2, ]
+ vX_0 = Matrix(pipeline.sds_context, '')
+ vX_1 = Matrix(pipeline.sds_context, '')
+ vX_2 = Matrix(pipeline.sds_context, '')
+ vX_3 = Matrix(pipeline.sds_context, '')
+ vX_4 = Scalar(pipeline.sds_context, '')
+ vX_5 = Matrix(pipeline.sds_context, '')
+ vX_6 = Matrix(pipeline.sds_context, '')
+ vX_7 = Scalar(pipeline.sds_context, '')
+ vX_8 = List(pipeline.sds_context, '')
+ output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, vX_6, vX_7, vX_8, ]
- op = MultiReturn(X.sds_context, 'executePipeline', output_nodes,
named_input_nodes=params_dict)
+ op = MultiReturn(pipeline.sds_context, 'executePipeline', output_nodes,
named_input_nodes=params_dict)
vX_0._unnamed_input_nodes = [op]
vX_1._unnamed_input_nodes = [op]
vX_2._unnamed_input_nodes = [op]
+ vX_3._unnamed_input_nodes = [op]
+ vX_4._unnamed_input_nodes = [op]
+ vX_5._unnamed_input_nodes = [op]
+ vX_6._unnamed_input_nodes = [op]
+ vX_7._unnamed_input_nodes = [op]
+ vX_8._unnamed_input_nodes = [op]
return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py
similarity index 81%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/ffPredict.py
index 612887be37..3c6244dc1e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/ffPredict.dml
from typing import Dict, Iterable
@@ -29,13 +29,15 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
+def ffPredict(model: List,
+ X: Matrix,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
-
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
+ """
+ :param batch_size: Batch size
+ :return: 'OperationNode' containing value
+ """
+ params_dict = {'model': model, 'X': X}
params_dict.update(kwargs)
- return Matrix(X.sds_context,
- 'lmPredict',
+ return Matrix(model.sds_context,
+ 'ffPredict',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
b/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
similarity index 56%
copy from src/main/python/systemds/operator/algorithm/builtin/alsDS.py
copy to src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
index f1b03c8394..34bee247be 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/alsDS.dml
+# Autogenerated From : scripts/builtin/fit_pipeline.dml
from typing import Dict, Iterable
@@ -29,28 +29,31 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def alsDS(X: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
- """
- :param rank: Rank of the factorization
- :param lambda: Regularization parameter, no regularization if 0.0
- :param maxi: Maximum number of iterations
- :param check: Check for convergence after every iteration, i.e., updating
L and R once
- :param thr: Assuming check is set to TRUE, the algorithm stops and
convergence is declared
- :param if: in loss in any two consecutive iterations falls below this
threshold;
- :param if: FALSE thr is ignored
- :return: 'OperationNode' containing
- """
- params_dict = {'X': X}
+def fit_pipeline(trainData: Frame,
+ testData: Frame,
+ pip: Frame,
+ applyFunc: Frame,
+ hp: Matrix,
+ evaluationFunc: str,
+ evalFunHp: Matrix,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
+
+ params_dict = {'trainData': trainData, 'testData': testData, 'pip': pip,
'applyFunc': applyFunc, 'hp': hp, 'evaluationFunc': evaluationFunc,
'evalFunHp': evalFunHp}
params_dict.update(kwargs)
- vX_0 = Matrix(X.sds_context, '')
- vX_1 = Matrix(X.sds_context, '')
- output_nodes = [vX_0, vX_1, ]
+ vX_0 = Matrix(trainData.sds_context, '')
+ vX_1 = Matrix(trainData.sds_context, '')
+ vX_2 = Matrix(trainData.sds_context, '')
+ vX_3 = List(trainData.sds_context, '')
+ vX_4 = List(trainData.sds_context, '')
+ output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, ]
- op = MultiReturn(X.sds_context, 'alsDS', output_nodes,
named_input_nodes=params_dict)
+ op = MultiReturn(trainData.sds_context, 'fit_pipeline', output_nodes,
named_input_nodes=params_dict)
vX_0._unnamed_input_nodes = [op]
vX_1._unnamed_input_nodes = [op]
+ vX_2._unnamed_input_nodes = [op]
+ vX_3._unnamed_input_nodes = [op]
+ vX_4._unnamed_input_nodes = [op]
return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
similarity index 63%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
index 612887be37..d70ca4a42c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/fixInvalidLengths.dml
from typing import Dict, Iterable
@@ -29,13 +29,24 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def fixInvalidLengths(F1: Frame,
+ mask: Matrix,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
+ params_dict = {'F1': F1, 'mask': mask}
params_dict.update(kwargs)
- return Matrix(X.sds_context,
- 'lmPredict',
- named_input_nodes=params_dict)
+
+ vX_0 = Frame(F1.sds_context, '')
+ vX_1 = Matrix(F1.sds_context, '')
+ vX_2 = Matrix(F1.sds_context, '')
+ vX_3 = Matrix(F1.sds_context, '')
+ output_nodes = [vX_0, vX_1, vX_2, vX_3, ]
+
+ op = MultiReturn(F1.sds_context, 'fixInvalidLengths', output_nodes,
named_input_nodes=params_dict)
+
+ vX_0._unnamed_input_nodes = [op]
+ vX_1._unnamed_input_nodes = [op]
+ vX_2._unnamed_input_nodes = [op]
+ vX_3._unnamed_input_nodes = [op]
+
+ return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
similarity index 79%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to
src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
index 612887be37..2fa9c5f748 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++
b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/fixInvalidLengthsApply.dml
from typing import Dict, Iterable
@@ -29,13 +29,12 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def fixInvalidLengthsApply(X: Frame,
+ mask: Matrix,
+ qLow: Matrix,
+ qUp: Matrix):
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
- params_dict.update(kwargs)
+ params_dict = {'X': X, 'mask': mask, 'qLow': qLow, 'qUp': qUp}
return Matrix(X.sds_context,
- 'lmPredict',
+ 'fixInvalidLengthsApply',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
similarity index 84%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/frameSort.py
index 612887be37..1199c8529c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/frameSort.dml
from typing import Dict, Iterable
@@ -29,13 +29,12 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
+def frameSort(F: Frame,
+ mask: Matrix,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
+ params_dict = {'F': F, 'mask': mask}
params_dict.update(kwargs)
- return Matrix(X.sds_context,
- 'lmPredict',
+ return Matrix(F.sds_context,
+ 'frameSort',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py
b/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py
similarity index 57%
copy from src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py
copy to src/main/python/systemds/operator/algorithm/builtin/glmPredict.py
index 66bc460dec..38d7ef570f 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lenetTrain.dml
+# Autogenerated From : scripts/builtin/glmPredict.dml
from typing import Dict, Iterable
@@ -29,30 +29,20 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lenetTrain(X: Matrix,
- Y: Matrix,
- X_val: Matrix,
- Y_val: Matrix,
- C: int,
- Hin: int,
- Win: int,
+def glmPredict(X: Matrix,
+ B: Matrix,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
- :param C: Number of input channels (dimensionality of input depth)
- :param Hin: Input width
- :param Win: Input height
- :param batch_size: Batch size
- :param epochs: Number of epochs
- :param lr: Learning rate
- :param mu: Momentum value
- :param decay: Learning rate decay
- :param lambda: Regularization strength
- :param seed: Seed for model initialization
- :param verbose: Flag indicates if function should print to stdout
- :return: 'OperationNode' containing
+ :param dfam: GLM distribution family: 1 = Power, 2 = Binomial, 3 =
Multinomial Logit
+ :param vpow: Power for Variance defined as (mean)^power (ignored if dfam
!= 1):
+ :param link: Link function code: 0 = canonical (depends on distribution),
1 = Power,
+ :param lpow: Power for Link function defined as (mean)^power (ignored if
link != 1):
+ :param disp: Dispersion value, when available
+ :param verbose: Print statistics to stdout
+ :return: 'OperationNode' containing printed one per each line, in the
following & string identifier for the statistic, see the table below. &
optional integer value that specifies the y-column for per-column statistics; &
binomial/multinomial one-column y input is converted into multi-column. &
optional boolean value (true or false) that tells us whether or not the input &
value of the statistic. & meaning
"""
- params_dict = {'X': X, 'Y': Y, 'X_val': X_val, 'Y_val': Y_val, 'C': C,
'Hin': Hin, 'Win': Win}
+ params_dict = {'X': X, 'B': B}
params_dict.update(kwargs)
return Matrix(X.sds_context,
- 'lenetTrain',
+ 'glmPredict',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gmm.py
b/src/main/python/systemds/operator/algorithm/builtin/gmm.py
index 350ee0a835..e2f74fab8f 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gmm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gmm.py
@@ -30,7 +30,6 @@ from systemds.utils.consts import VALID_INPUT_TYPES
def gmm(X: Matrix,
- verbose: bool,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
:param n_components: Number of n_components in the Gaussian mixture model
@@ -41,7 +40,7 @@ def gmm(X: Matrix,
:param tol: tolerance value for convergence
:return: 'OperationNode' containing of estimated parameters & information
criterion for best iteration & kth class
"""
- params_dict = {'X': X, 'verbose': verbose}
+ params_dict = {'X': X}
params_dict.update(kwargs)
vX_0 = Matrix(X.sds_context, '')
diff --git a/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py
b/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py
index 0ddae9ca4e..42304818b9 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py
@@ -24,15 +24,43 @@
from typing import Dict, Iterable
-from systemds.operator import OperationNode, Matrix
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn,
Scalar
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def gridSearch(X: OperationNode, y: OperationNode, train: str, predict: str,
params: Iterable, paramValues: Iterable, **kwargs: Dict[str,
VALID_INPUT_TYPES]):
-
- params_dict = {'X':X, 'y':y, 'train':train, 'predict':predict,
'params':params, 'paramValues':paramValues}
+
+def gridSearch(X: Matrix,
+ y: Matrix,
+ train: str,
+ predict: str,
+ params: List,
+ paramValues: List,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
+ """
+ :param train: Name ft of the train function to call via ft(trainArgs)
+ :param predict: Name fp of the loss function to call via
fp((predictArgs,B))
+ :param numB: Maximum number of parameters in model B (pass the max because
the size
+ :param may: parameters like icpt or multi-class classification)
+ :param columnvectors: hyper-parameters in 'params'
+ :param gridSearch: hyper-parameter by name, if
+ :param not: an empty list, the lm parameters are used
+ :param gridSearch: trained models at the end, if
+ :param not: an empty list, list(X, y) is used instead
+ :param cv: flag enabling k-fold cross validation, otherwise training loss
+ :param cvk: if cv=TRUE, specifies the the number of folds, otherwise
ignored
+ :param verbose: flag for verbose debug output
+ :return: 'OperationNode' containing returned as a column-major linearized
column vector
+ """
+ params_dict = {'X': X, 'y': y, 'train': train, 'predict': predict,
'params': params, 'paramValues': paramValues}
params_dict.update(kwargs)
- return OperationNode(X.sds_context, 'gridSearch',
named_input_nodes=params_dict, output_type=OutputType.LIST,
number_of_outputs=2, output_types=[OutputType.MATRIX, OutputType.FRAME])
+
+ vX_0 = Matrix(X.sds_context, '')
+ vX_1 = Frame(X.sds_context, '')
+ output_nodes = [vX_0, vX_1, ]
+
+ op = MultiReturn(X.sds_context, 'gridSearch', output_nodes,
named_input_nodes=params_dict)
+ vX_0._unnamed_input_nodes = [op]
+ vX_1._unnamed_input_nodes = [op]
-
\ No newline at end of file
+ return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
b/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
index 4a0065dc06..727bb51bec 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/hyperband.py
@@ -33,7 +33,7 @@ def hyperband(X_train: Matrix,
y_train: Matrix,
X_val: Matrix,
y_val: Matrix,
- params: Iterable,
+ params: List,
paramRanges: Matrix,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
diff --git a/src/main/python/systemds/operator/algorithm/builtin/knn.py
b/src/main/python/systemds/operator/algorithm/builtin/knn.py
new file mode 100644
index 0000000000..33c871c427
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/knn.py
@@ -0,0 +1,66 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/knn.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn,
Scalar
+from systemds.script_building.dag import OutputType
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def knn(Train: Matrix,
+ Test: Matrix,
+ CL: Matrix,
+ START_SELECTED: Matrix,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
+ """
+ :param CL_T: Y The target type of matrix CL whether
+ :param columns: are continuous ( =1 ) or
+ :param trans_continuous: Y Option flag for continuous feature
transformed to [-1,1]:
+ :param k_value: Y k value for KNN, ignore if select_k enable
+ :param select_k: Y Use k selection algorithm to estimate k (TRUE
means yes)
+ :param k_min: Y Min k value( available if select_k = 1 )
+ :param k_max: Y Max k value( available if select_k = 1 )
+ :param select_feature: Y Use feature selection algorithm to
select feature (TRUE means yes)
+ :param feature_max: Y Max feature selection
+ :param interval: Y Interval value for K selecting ( available
if select_k = 1 )
+ :param feature_importance: Y Use feature importance algorithm to
estimate each feature
+ :param predict_con_tg: Y Continuous target predict function:
mean(=0) or median(=1)
+ :return: 'OperationNode' containing
+ """
+ params_dict = {'Train': Train, 'Test': Test, 'CL': CL, 'START_SELECTED':
START_SELECTED}
+ params_dict.update(kwargs)
+
+ vX_0 = Matrix(Train.sds_context, '')
+ vX_1 = Matrix(Train.sds_context, '')
+ vX_2 = Matrix(Train.sds_context, '')
+ output_nodes = [vX_0, vX_1, vX_2, ]
+
+ op = MultiReturn(Train.sds_context, 'knn', output_nodes,
named_input_nodes=params_dict)
+
+ vX_0._unnamed_input_nodes = [op]
+ vX_1._unnamed_input_nodes = [op]
+ vX_2._unnamed_input_nodes = [op]
+
+ return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/l2svm.py
b/src/main/python/systemds/operator/algorithm/builtin/l2svm.py
index 5b62279980..1a8cfbab8a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/l2svm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/l2svm.py
@@ -35,7 +35,7 @@ def l2svm(X: Matrix,
"""
:param intercept: No Intercept ( If set to TRUE then a constant bias
column is added to X)
:param epsilon: Procedure terminates early if the reduction in objective
function value is less
- :param lambda: Regularization parameter (lambda) for L2 regularization
+ :param reg: Regularization parameter (reg) for L2 regularization
:param maxIterations: Maximum number of conjugate gradient iterations
:param maxii: -
:param verbose: Set to true if one wants print statements updating on loss.
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/lenetPredict.py
similarity index 68%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/lenetPredict.py
index 612887be37..073b8eda24 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lenetPredict.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/lenetPredict.dml
from typing import Dict, Iterable
@@ -29,13 +29,21 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
-
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
+def lenetPredict(model: List,
+ X: Matrix,
+ C: int,
+ Hin: int,
+ Win: int,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
+ """
+ :param C: Number of input channels
+ :param Hin: Input height
+ :param Win: Input width
+ :param batch_size: Batch size
+ :return: 'OperationNode' containing
+ """
+ params_dict = {'model': model, 'X': X, 'C': C, 'Hin': Hin, 'Win': Win}
params_dict.update(kwargs)
- return Matrix(X.sds_context,
- 'lmPredict',
+ return Matrix(model.sds_context,
+ 'lenetPredict',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py
b/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py
index 66bc460dec..ac51610564 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py
@@ -46,7 +46,7 @@ def lenetTrain(X: Matrix,
:param lr: Learning rate
:param mu: Momentum value
:param decay: Learning rate decay
- :param lambda: Regularization strength
+ :param reg: Regularization strength
:param seed: Seed for model initialization
:param verbose: Flag indicates if function should print to stdout
:return: 'OperationNode' containing
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
index 612887be37..784eea8e48 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
@@ -31,10 +31,14 @@ from systemds.utils.consts import VALID_INPUT_TYPES
def lmPredict(X: Matrix,
B: Matrix,
- ytest: Matrix,
**kwargs: Dict[str, VALID_INPUT_TYPES]):
-
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
+ """
+ :param if: is not wanted
+ :param icpt: Intercept presence, shifting and rescaling the columns of X
+ :param verbose: If TRUE print messages are activated
+ :return: 'OperationNode' containing
+ """
+ params_dict = {'X': X, 'B': B}
params_dict.update(kwargs)
return Matrix(X.sds_context,
'lmPredict',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/mcc.py
similarity index 79%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/mcc.py
index 612887be37..194ed45c98 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/mcc.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/mcc.dml
from typing import Dict, Iterable
@@ -29,13 +29,10 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def mcc(predictions: Matrix,
+ labels: Matrix):
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
- params_dict.update(kwargs)
- return Matrix(X.sds_context,
- 'lmPredict',
+ params_dict = {'predictions': predictions, 'labels': labels}
+ return Matrix(predictions.sds_context,
+ 'mcc',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/mdedup.py
similarity index 70%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/mdedup.py
index 612887be37..6e9bacd9a5 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/mdedup.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/mdedup.dml
from typing import Dict, Iterable
@@ -29,13 +29,18 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
-
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
- params_dict.update(kwargs)
+def mdedup(X: Frame,
+ LHSfeatures: Matrix,
+ LHSthreshold: Matrix,
+ RHSfeatures: Matrix,
+ RHSthreshold: Matrix,
+ verbose: bool):
+ """
+ :param X: Input Frame X
+ :param verbose: To print the output
+ :return: 'OperationNode' containing
+ """
+ params_dict = {'X': X, 'LHSfeatures': LHSfeatures, 'LHSthreshold':
LHSthreshold, 'RHSfeatures': RHSfeatures, 'RHSthreshold': RHSthreshold,
'verbose': verbose}
return Matrix(X.sds_context,
- 'lmPredict',
+ 'mdedup',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/miceApply.py
b/src/main/python/systemds/operator/algorithm/builtin/mice.py
similarity index 66%
copy from src/main/python/systemds/operator/algorithm/builtin/miceApply.py
copy to src/main/python/systemds/operator/algorithm/builtin/mice.py
index ab14dfe398..38e957d037 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/miceApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/mice.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/miceApply.dml
+# Autogenerated From : scripts/builtin/mice.dml
from typing import Dict, Iterable
@@ -29,19 +29,33 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def miceApply(X: Matrix,
- meta: Matrix,
- threshold: float,
- dM: Frame,
- betaList: Iterable):
+def mice(X: Matrix,
+ cMask: Matrix,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
+ :param iter: Number of iteration for multiple imputations
:param threshold: confidence value [0, 1] for robust imputation, values
will only be imputed
:param if: value has probability greater than threshold,
:param only: categorical data
:param verbose: Boolean value.
:return: 'OperationNode' containing are represented with empty string i.e
",," in csv file & n are storing continuos/numeric data and variables with &
storing categorical data
"""
- params_dict = {'X': X, 'meta': meta, 'threshold': threshold, 'dM': dM,
'betaList': betaList}
- return Matrix(X.sds_context,
- 'miceApply',
- named_input_nodes=params_dict)
+ params_dict = {'X': X, 'cMask': cMask}
+ params_dict.update(kwargs)
+
+ vX_0 = Matrix(X.sds_context, '')
+ vX_1 = Matrix(X.sds_context, '')
+ vX_2 = Scalar(X.sds_context, '')
+ vX_3 = Frame(X.sds_context, '')
+ vX_4 = List(X.sds_context, '')
+ output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, ]
+
+ op = MultiReturn(X.sds_context, 'mice', output_nodes,
named_input_nodes=params_dict)
+
+ vX_0._unnamed_input_nodes = [op]
+ vX_1._unnamed_input_nodes = [op]
+ vX_2._unnamed_input_nodes = [op]
+ vX_3._unnamed_input_nodes = [op]
+ vX_4._unnamed_input_nodes = [op]
+
+ return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/miceApply.py
b/src/main/python/systemds/operator/algorithm/builtin/miceApply.py
index ab14dfe398..ea484df0fa 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/miceApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/miceApply.py
@@ -33,7 +33,7 @@ def miceApply(X: Matrix,
meta: Matrix,
threshold: float,
dM: Frame,
- betaList: Iterable):
+ betaList: List):
"""
:param threshold: confidence value [0, 1] for robust imputation, values
will only be imputed
:param if: value has probability greater than threshold,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/msvm.py
b/src/main/python/systemds/operator/algorithm/builtin/msvm.py
index d9a2791ccc..53db201cba 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/msvm.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/msvm.py
@@ -37,7 +37,7 @@ def msvm(X: Matrix,
:param num_classes: Number of classes
:param epsilon: Procedure terminates early if the reduction in objective
function
:param value: than epsilon (tolerance) times the initial objective
function value.
- :param lambda: Regularization parameter (lambda) for L2 regularization
+ :param reg: Regularization parameter (lambda) for L2 regularization
:param maxIterations: Maximum number of conjugate gradient iterations
:param verbose: Set to true to print while training.
:return: 'OperationNode' containing
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/pcaInverse.py
similarity index 78%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/pcaInverse.py
index 612887be37..14ce313c54 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/pcaInverse.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/pcaInverse.dml
from typing import Dict, Iterable
@@ -29,13 +29,12 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def pcaInverse(Y: Matrix,
+ Clusters: Matrix,
+ Centering: Matrix,
+ ScaleFactor: Matrix):
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
- params_dict.update(kwargs)
- return Matrix(X.sds_context,
- 'lmPredict',
+ params_dict = {'Y': Y, 'Clusters': Clusters, 'Centering': Centering,
'ScaleFactor': ScaleFactor}
+ return Matrix(Y.sds_context,
+ 'pcaInverse',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/pcaTransform.py
similarity index 80%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/pcaTransform.py
index 612887be37..53bf5e8a2a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/pcaTransform.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/pcaTransform.dml
from typing import Dict, Iterable
@@ -29,13 +29,12 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def pcaTransform(X: Matrix,
+ Clusters: Matrix,
+ Centering: Matrix,
+ ScaleFactor: Matrix):
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
- params_dict.update(kwargs)
+ params_dict = {'X': X, 'Clusters': Clusters, 'Centering': Centering,
'ScaleFactor': ScaleFactor}
return Matrix(X.sds_context,
- 'lmPredict',
+ 'pcaTransform',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/setdiff.py
similarity index 81%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/setdiff.py
index 612887be37..7559935e32 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/setdiff.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/setdiff.dml
from typing import Dict, Iterable
@@ -29,13 +29,10 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def setdiff(X: Matrix,
+ Y: Matrix):
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
- params_dict.update(kwargs)
+ params_dict = {'X': X, 'Y': Y}
return Matrix(X.sds_context,
- 'lmPredict',
+ 'setdiff',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/stratstats.py
similarity index 71%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/stratstats.py
index 612887be37..64c5679cb8 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/stratstats.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/stratstats.dml
from typing import Dict, Iterable
@@ -29,13 +29,18 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
-
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
+def stratstats(X: Matrix,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
+ """
+ :param the: empty means "use X in place of Y"
+ :param the: empty means "use X in place of S"
+ :param the: empty means "use columns 1 : ncol(X)"
+ :param the: empty means "use columns 1 : ncol(Y)"
+ :param Scid: Column index of the stratum column in S
+ :return: 'OperationNode' containing the following information:
+ """
+ params_dict = {'X': X}
params_dict.update(kwargs)
return Matrix(X.sds_context,
- 'lmPredict',
+ 'stratstats',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/symmetricDifference.py
similarity index 81%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to
src/main/python/systemds/operator/algorithm/builtin/symmetricDifference.py
index 612887be37..33a8209667 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/symmetricDifference.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/symmetricDifference.dml
from typing import Dict, Iterable
@@ -29,13 +29,10 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def symmetricDifference(X: Matrix,
+ Y: Matrix):
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
- params_dict.update(kwargs)
+ params_dict = {'X': X, 'Y': Y}
return Matrix(X.sds_context,
- 'lmPredict',
+ 'symmetricDifference',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
b/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py
similarity index 56%
copy from src/main/python/systemds/operator/algorithm/builtin/alsDS.py
copy to src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py
index f1b03c8394..e9165510b4 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/alsDS.dml
+# Autogenerated From : scripts/builtin/topk_cleaning.dml
from typing import Dict, Iterable
@@ -29,28 +29,31 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def alsDS(X: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
- """
- :param rank: Rank of the factorization
- :param lambda: Regularization parameter, no regularization if 0.0
- :param maxi: Maximum number of iterations
- :param check: Check for convergence after every iteration, i.e., updating
L and R once
- :param thr: Assuming check is set to TRUE, the algorithm stops and
convergence is declared
- :param if: in loss in any two consecutive iterations falls below this
threshold;
- :param if: FALSE thr is ignored
- :return: 'OperationNode' containing
- """
- params_dict = {'X': X}
+def topk_cleaning(dataTrain: Frame,
+ primitives: Frame,
+ parameters: Frame,
+ evaluationFunc: str,
+ evalFunHp: Matrix,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
+
+ params_dict = {'dataTrain': dataTrain, 'primitives': primitives,
'parameters': parameters, 'evaluationFunc': evaluationFunc, 'evalFunHp':
evalFunHp}
params_dict.update(kwargs)
- vX_0 = Matrix(X.sds_context, '')
- vX_1 = Matrix(X.sds_context, '')
- output_nodes = [vX_0, vX_1, ]
+ vX_0 = Frame(dataTrain.sds_context, '')
+ vX_1 = Matrix(dataTrain.sds_context, '')
+ vX_2 = Matrix(dataTrain.sds_context, '')
+ vX_3 = Scalar(dataTrain.sds_context, '')
+ vX_4 = Matrix(dataTrain.sds_context, '')
+ vX_5 = Frame(dataTrain.sds_context, '')
+ output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, ]
- op = MultiReturn(X.sds_context, 'alsDS', output_nodes,
named_input_nodes=params_dict)
+ op = MultiReturn(dataTrain.sds_context, 'topk_cleaning', output_nodes,
named_input_nodes=params_dict)
vX_0._unnamed_input_nodes = [op]
vX_1._unnamed_input_nodes = [op]
+ vX_2._unnamed_input_nodes = [op]
+ vX_3._unnamed_input_nodes = [op]
+ vX_4._unnamed_input_nodes = [op]
+ vX_5._unnamed_input_nodes = [op]
return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/union.py
similarity index 81%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/union.py
index 612887be37..013de4bc3c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/union.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/union.dml
from typing import Dict, Iterable
@@ -29,13 +29,14 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
-
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
- params_dict.update(kwargs)
+def union(X: Matrix,
+ Y: Matrix):
+ """
+ :param X: input vector
+ :param Y: input vector
+ :return: 'OperationNode' containing with all unique rows existing in x and
y
+ """
+ params_dict = {'X': X, 'Y': Y}
return Matrix(X.sds_context,
- 'lmPredict',
+ 'union',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/unique.py
similarity index 81%
copy from src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
copy to src/main/python/systemds/operator/algorithm/builtin/unique.py
index 612887be37..cceb3fadea 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/unique.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/lmPredict.dml
+# Autogenerated From : scripts/builtin/unique.dml
from typing import Dict, Iterable
@@ -29,13 +29,9 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def lmPredict(X: Matrix,
- B: Matrix,
- ytest: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def unique(X: Matrix):
- params_dict = {'X': X, 'B': B, 'ytest': ytest}
- params_dict.update(kwargs)
+ params_dict = {'X': X}
return Matrix(X.sds_context,
- 'lmPredict',
+ 'unique',
named_input_nodes=params_dict)
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
b/src/main/python/systemds/operator/algorithm/builtin/xgboost.py
similarity index 53%
copy from src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
copy to src/main/python/systemds/operator/algorithm/builtin/xgboost.py
index 6d73186122..80db85bf0e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/xgboost.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/decisionTree.dml
+# Autogenerated From : scripts/builtin/xgboost.dml
from typing import Dict, Iterable
@@ -29,21 +29,21 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def decisionTree(X: Matrix,
- Y: Matrix,
- R: Matrix,
- verbose: bool,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def xgboost(X: Matrix,
+ y: Matrix,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
- :param a: vector, other positive Integers indicate the number of categories
- :param If: not provided by default all variables are assumed to be scale
- :param bins: Number of equiheight bins per scale feature to choose
thresholds
- :param depth: Maximum depth of the learned tree
- :param verbose: boolean specifying if the algorithm should print
information while executing
- :return: 'OperationNode' containing information: & if the feature is
categorical) & looks at if j is an internal node, otherwise 0 & as r input
vector & of the subset of values & 6,7,... if j is categorical & a leaf node:
number of misclassified samples reaching at node j & at m[6,j] if the feature
chosen for j is scale, & feature chosen for j is categorical rows 6,7,...
depict the value subset chosen for j & a leaf node 1 if j is impure and the
number of samples at j > threshold, ot [...]
+ :param Feature: a scalar feature and features 2 is a categorical feature
+ :param If: not provided by default all variables are assumed to be scale
(1)
+ :param sml_type: Supervised machine learning type: 1 =
Regression(default), 2 = Classification
+ :param num_trees: Number of trees to be created in the xgboost model
+ :param learning_rate: Alias: eta. After each boosting step the learning
rate controls the weights of the new predictions
+ :param max_depth: Maximum depth of a tree. Increasing this value will make
the model more complex and more likely to overfit
+ :param lambda: L2 regularization term on weights. Increasing this value
will make model more conservative and reduce amount of leaves of a tree
+ :return: 'OperationNode' containing feature id if the feature is
categorical) & looks at if j is an internal node, otherwise 0 & stored at
m[6,j] if the feature chosen for j is scale, & feature chosen for j is
categorical rows 6,7,... depict & chosen for j & a leaf node 1 if j is impure
and the number of samples at j > threshold, otherwise 0
"""
- params_dict = {'X': X, 'Y': Y, 'R': R, 'verbose': verbose}
+ params_dict = {'X': X, 'y': y}
params_dict.update(kwargs)
return Matrix(X.sds_context,
- 'decisionTree',
+ 'xgboost',
named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/nodes/scalar.py
b/src/main/python/systemds/operator/nodes/scalar.py
index a4d62928ea..815858fec7 100644
--- a/src/main/python/systemds/operator/nodes/scalar.py
+++ b/src/main/python/systemds/operator/nodes/scalar.py
@@ -37,7 +37,7 @@ from systemds.utils.converters import numpy_to_matrix_block
class Scalar(OperationNode):
__assign: bool
- def __init__(self, sds_context: 'SystemDSContext', operation: str,
+ def __init__(self, sds_context, operation: str,
unnamed_input_nodes: Iterable[VALID_INPUT_TYPES] = None,
named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None,
output_type: OutputType = OutputType.DOUBLE,
diff --git a/src/main/python/tests/algorithms/test_gmm.py
b/src/main/python/tests/algorithms/test_gmm.py
index 10a561fe0e..37475570da 100644
--- a/src/main/python/tests/algorithms/test_gmm.py
+++ b/src/main/python/tests/algorithms/test_gmm.py
@@ -49,7 +49,7 @@ class TestGMM(unittest.TestCase):
n_gaussian = 4
[_, _, _, _, mu, precision_cholesky, weight] = gmm(
- features, False, n_components=n_gaussian, seed=10)
+ features, n_components=n_gaussian, seed=10)
[_, pp] = gmmPredict(
test, weight, mu, precision_cholesky, model=self.sds.scalar("VVV"))
diff --git a/src/main/python/tests/algorithms/test_gmm_train_predict.py
b/src/main/python/tests/algorithms/test_gmm_train_predict.py
index 26c3128cfa..151b531549 100644
--- a/src/main/python/tests/algorithms/test_gmm_train_predict.py
+++ b/src/main/python/tests/algorithms/test_gmm_train_predict.py
@@ -47,7 +47,7 @@ class TestGMM(unittest.TestCase):
n_gaussian = 4
[_, _, _, _, mu, precision_cholesky, weight] = gmm(
- features, False, n_components=n_gaussian, seed=10)
+ features, n_components=n_gaussian, seed=10)
model = sds_train.list(mu, precision_cholesky, weight)
model.write(self.model_path).compute()
diff --git a/src/test/scripts/functions/builtin/l2svm.dml
b/src/test/scripts/functions/builtin/l2svm.dml
index 9b9502dbfa..20438ee457 100644
--- a/src/test/scripts/functions/builtin/l2svm.dml
+++ b/src/test/scripts/functions/builtin/l2svm.dml
@@ -21,5 +21,5 @@
X = read($X)
Y = read($Y)
-model= l2svm(X=X, Y=Y, intercept = $inc, epsilon = $eps, lambda = $lam,
maxIterations = $max )
+model= l2svm(X=X, Y=Y, intercept = $inc, epsilon = $eps, reg = $lam,
maxIterations = $max )
write(model, $model)
diff --git a/src/test/scripts/functions/builtin/multisvm.dml
b/src/test/scripts/functions/builtin/multisvm.dml
index b95b56f938..b1fb5fd9e7 100644
--- a/src/test/scripts/functions/builtin/multisvm.dml
+++ b/src/test/scripts/functions/builtin/multisvm.dml
@@ -22,5 +22,5 @@
X = read($X)
Y = read($Y)
model = msvm(X=X, Y=Y, intercept = $inc,
- epsilon = $eps, lambda = $lam, maxIterations = $max )
+ epsilon = $eps, reg = $lam, maxIterations = $max )
write(model, $model)
diff --git a/src/test/scripts/functions/federated/FederatedAlsCGTest.dml
b/src/test/scripts/functions/federated/FederatedAlsCGTest.dml
index 05258f4ac6..1b38603468 100644
--- a/src/test/scripts/functions/federated/FederatedAlsCGTest.dml
+++ b/src/test/scripts/functions/federated/FederatedAlsCGTest.dml
@@ -23,12 +23,12 @@ X = federated(addresses=list($in_X1, $in_X2),
ranges=list(list(0, 0), list($rows, $cols), list($rows, 0), list($rows * 2,
$cols)));
rank = $in_rank;
-reg = $in_reg;
-lambda = $in_lambda;
+regType = $in_reg;
+reg = $in_lambda;
maxi = $in_maxi;
thr = $in_thr;
-[U, V] = alsCG(X = X, rank = rank, reg = reg, lambda = lambda, maxi = maxi,
check = TRUE, thr = thr);
+[U, V] = alsCG(X = X, rank = rank, regType = regType, reg = reg, maxi = maxi,
check = TRUE, thr = thr);
Z = U %*% V;
diff --git
a/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml
b/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml
index a73efba1a6..64ef02469e 100644
--- a/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml
@@ -22,12 +22,12 @@
X = rbind(read($in_X1), read($in_X2));
rank = $in_rank;
-reg = $in_reg;
-lambda = $in_lambda;
+regType = $in_reg;
+reg = $in_lambda;
maxi = $in_maxi;
thr = $in_thr;
-[U, V] = alsCG(X = X, rank = rank, reg = reg, lambda = lambda, maxi = maxi,
check = TRUE, thr = thr);
+[U, V] = alsCG(X = X, rank = rank, regType = regType, reg = reg, maxi = maxi,
check = TRUE, thr = thr);
Z = U %*% V;
diff --git a/src/test/scripts/functions/federated/FederatedL2SVMTest.dml
b/src/test/scripts/functions/federated/FederatedL2SVMTest.dml
index 7ae1a57fcb..e31a08d29f 100644
--- a/src/test/scripts/functions/federated/FederatedL2SVMTest.dml
+++ b/src/test/scripts/functions/federated/FederatedL2SVMTest.dml
@@ -30,6 +30,6 @@ else {
ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0),
list($rows, $cols)))
}
-model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1,
maxIterations = 100)
+model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1,
maxIterations = 100)
write(model, $out)
diff --git
a/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml
b/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml
index b5439d425e..e5e428abe6 100644
--- a/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml
@@ -27,6 +27,6 @@ if( $4 ) {
else
X = rbind(read($1), read($2))
-model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1,
maxIterations = 100)
+model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1,
maxIterations = 100)
write(model, $5)
diff --git a/src/test/scripts/functions/federated/FederatedMSVMTest.dml
b/src/test/scripts/functions/federated/FederatedMSVMTest.dml
index 3d9cc8c910..b44dd727e1 100644
--- a/src/test/scripts/functions/federated/FederatedMSVMTest.dml
+++ b/src/test/scripts/functions/federated/FederatedMSVMTest.dml
@@ -30,6 +30,6 @@ else {
ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0),
list($rows, $cols)))
}
-model = msvm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1,
maxIterations = 100, verbose = FALSE)
+model = msvm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1,
maxIterations = 100, verbose = FALSE)
write(model, $out)
diff --git
a/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml
b/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml
index 19fad3a09c..e4da0f346a 100644
--- a/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml
@@ -27,6 +27,6 @@ if( $4 ) {
else
X = rbind(read($1), read($2))
-model = msvm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1,
maxIterations = 100, verbose = FALSE)
+model = msvm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1,
maxIterations = 100, verbose = FALSE)
write(model, $5)
diff --git a/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml
b/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml
index b1f5fdc6d7..a56619f1aa 100644
--- a/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml
+++ b/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml
@@ -23,5 +23,5 @@ X = federated(addresses=list($in_X1, $in_X2),
ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0),
list($rows, $cols)))
Y = federated(addresses=list($in_Y1, $in_Y2),
ranges=list(list(0, 0), list($rows / 2, 1), list($rows / 2, 0),
list($rows, 1)))
-model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1,
maxIterations = 100)
+model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1,
maxIterations = 100)
write(model, $out)
diff --git a/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml
b/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml
index 4e72b49266..aa73e3a463 100644
--- a/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml
+++ b/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml
@@ -23,5 +23,5 @@ X = federated(addresses=list($in_X1),
ranges=list(list(0, 0), list($rows / 2, $cols)))
Y = federated(addresses=list($in_Y1),
ranges=list(list(0, 0), list($rows / 2, 1)))
-model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1,
maxIterations = 100)
+model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1,
maxIterations = 100)
write(model, $out)
diff --git
a/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml
b/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml
index 486e856f71..03c2b5f0bf 100644
--- a/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml
+++ b/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml
@@ -21,5 +21,5 @@
X = read($1)
Y = read($3)
-model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1,
maxIterations = 100)
+model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1,
maxIterations = 100)
write(model, $5)
diff --git
a/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml
b/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml
index c17a6559c1..26233a3ab4 100644
--- a/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml
+++ b/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml
@@ -21,5 +21,5 @@
X = rbind(read($1), read($2))
Y = rbind(read($3), read($4))
-model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1,
maxIterations = 100)
+model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1,
maxIterations = 100)
write(model, $5)
diff --git a/src/test/scripts/functions/lineage/LineageReuseAlg5.dml
b/src/test/scripts/functions/lineage/LineageReuseAlg5.dml
index 57af57542e..660c705f28 100644
--- a/src/test/scripts/functions/lineage/LineageReuseAlg5.dml
+++ b/src/test/scripts/functions/lineage/LineageReuseAlg5.dml
@@ -31,7 +31,7 @@ return (Matrix[Double] loss) {
N = 1000;
no_lamda = 10;
stp = (0.1 - 0.0001)/no_lamda;
-lamda = 0.0001;
+reg = 0.0001;
Rbeta = matrix(0, rows=N+1, cols=no_lamda*2);
Rloss = matrix(0, rows=no_lamda*2, cols=1);
i = 1;
@@ -43,18 +43,18 @@ y = ceil(y);
for (l in 1:no_lamda)
{
beta = l2svm(X=X, Y=y, intercept=FALSE, epsilon=1e-12,
- lambda = lamda, verbose=FALSE);
+ reg = reg, verbose=FALSE);
Rbeta[1:nrow(beta),i] = beta;
Rloss[i,] = l2norm(X, y, beta, FALSE);
i = i + 1;
beta = l2svm(X=X, Y=y, intercept=TRUE, epsilon=1e-12,
- lambda = lamda, verbose=FALSE);
+ reg = reg, verbose=FALSE);
Rbeta[1:nrow(beta),i] = beta;
Rloss[i,] = l2norm(X, y, beta, TRUE);
i = i + 1;
- lamda = lamda + stp;
+ reg = reg + stp;
}
leastLoss = rowIndexMin(t(Rloss));
diff --git a/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml
b/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml
index 23f39b0814..ebd360b731 100644
--- a/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml
+++ b/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml
@@ -20,7 +20,7 @@
#-------------------------------------------------------------
msvm2 = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE,
- Double epsilon = 0.001, Double lambda = 1.0, Integer maxIterations = 100,
Boolean verbose = FALSE)
+ Double epsilon = 0.001, Double reg = 1.0, Integer maxIterations = 100,
Boolean verbose = FALSE)
return(Matrix[Double] model)
{
if(min(Y) < 0)
@@ -43,7 +43,7 @@ msvm2 = function(Matrix[Double] X, Matrix[Double] Y, Boolean
intercept = FALSE,
parfor(class in 1:max(Y), opt=CONSTRAINED, par=4, mode=REMOTE_SPARK) {
Y_local = 2 * (Y == class) - 1
w[,class] = l2svm(X=X, Y=Y_local, intercept=intercept,
- epsilon=epsilon, lambda=lambda, maxIterations=maxIterations,
+ epsilon=epsilon, reg=reg, maxIterations=maxIterations,
verbose= verbose, columnId=class)
}
diff --git
a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
index 296165c029..1b035b88f3 100644
--- a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
+++ b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
@@ -118,9 +118,9 @@ return(Matrix[Double] output, Matrix[Double] error)
if(is.na(as.scalar(evalFunHp[1,1])))
{
nc = max(Y);
- params = list("intercept", "lambda", "epsilon")
+ params = list("intercept", "reg", "epsilon")
paramRanges = list(seq(0, 1), 10^seq(1,-3), 10^seq(1,-5));
- trainArgs = list(X=X, Y=Y, intercept=-1, lambda=-1, epsilon=-1,
maxIterations=1000, verbose=FALSE);
+ trainArgs = list(X=X, Y=Y, intercept=-1, reg=-1, epsilon=-1,
maxIterations=1000, verbose=FALSE);
dataArgs = list("X", "Y");
[B1,opt] = gridSearch(X=X, y=Y, train="msvm", predict="accuracyMSVM",
numB=(ncol(X)+1)*(nc),
params=params, paramValues=paramRanges, dataArgs=dataArgs,
trainArgs=trainArgs, cv=TRUE, cvk=3, verbose=TRUE);
@@ -134,7 +134,7 @@ return(Matrix[Double] output, Matrix[Double] error)
a = 0
}
else {
- beta = msvm(X=X, Y=Y, intercept=as.scalar(evalFunHp[1,1]),
lambda=as.scalar(evalFunHp[1,2]), epsilon=as.scalar(evalFunHp[1,3]),
+ beta = msvm(X=X, Y=Y, intercept=as.scalar(evalFunHp[1,1]),
reg=as.scalar(evalFunHp[1,2]), epsilon=as.scalar(evalFunHp[1,3]),
maxIterations=1000, verbose=FALSE);
yhat = msvmPredict(X=Xtest, W=beta);
yhat = rowIndexMax(yhat)