This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 00d60e0 [MINOR] Add New Python Builtins and Fix Python Build
00d60e0 is described below
commit 00d60e04d607402c832be5e9643be4effe8662eb
Author: baunsgaard <[email protected]>
AuthorDate: Sun Apr 11 18:41:39 2021 +0200
[MINOR] Add New Python Builtins and Fix Python Build
---
src/main/python/generator/generator.py | 2 +-
.../python/systemds/operator/algorithm/__init__.py | 152 +++++++++++----------
.../builtin/{imputeByMean.py => __init__.py} | 19 ---
.../systemds/operator/algorithm/builtin/bandit.py | 9 +-
.../systemds/operator/algorithm/builtin/cox.py | 59 ++++++++
.../builtin/{smote.py => gaussianClassifier.py} | 17 ++-
.../operator/algorithm/builtin/imputeByMean.py | 5 +-
.../operator/algorithm/builtin/imputeByMedian.py | 5 +-
.../builtin/{imputeByMean.py => imputeByMode.py} | 6 +-
.../systemds/operator/algorithm/builtin/km.py | 58 ++++++++
.../builtin/{smote.py => l2svmPredict.py} | 16 ++-
.../operator/algorithm/builtin/randomForest.py | 59 ++++++++
.../algorithm/builtin/{bandit.py => sherlock.py} | 14 +-
.../operator/algorithm/builtin/sherlockPredict.py | 69 ++++++++++
.../systemds/operator/algorithm/builtin/smote.py | 5 +-
.../{imputeByMedian.py => splitBalanced.py} | 9 +-
.../builtin/{imputeByMedian.py => tomeklink.py} | 15 +-
.../operator/algorithm/builtin/vectorToCsv.py | 8 +-
18 files changed, 388 insertions(+), 139 deletions(-)
diff --git a/src/main/python/generator/generator.py
b/src/main/python/generator/generator.py
index 2aeda04..6965d46 100644
--- a/src/main/python/generator/generator.py
+++ b/src/main/python/generator/generator.py
@@ -42,7 +42,7 @@ class PythonAPIFileGenerator(object):
init_path = os.path.join(os.path.dirname(os.path.dirname(
__file__)), 'systemds', 'operator', 'algorithm', '__init__.py')
- init_import = u"from .builtin.{function} import {function} \n"
+ init_import = u"from .builtin import {function} \n"
init_all = u"__all__ = {functions} \n"
def __init__(self, source_path: str, extension: str = 'py'):
diff --git a/src/main/python/systemds/operator/algorithm/__init__.py
b/src/main/python/systemds/operator/algorithm/__init__.py
index 90e9de9..51d1f8e 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -21,75 +21,85 @@
# Autogenerated By : src/main/python/generator/generator.py
-from .builtin.abstain import abstain
-from .builtin.als import als
-from .builtin.alsCG import alsCG
-from .builtin.alsDS import alsDS
-from .builtin.alsPredict import alsPredict
-from .builtin.alsTopkPredict import alsTopkPredict
-from .builtin.arima import arima
-from .builtin.bandit import bandit
-from .builtin.bivar import bivar
-from .builtin.components import components
-from .builtin.confusionMatrix import confusionMatrix
-from .builtin.cor import cor
-from .builtin.cspline import cspline
-from .builtin.csplineDS import csplineDS
-from .builtin.cvlm import cvlm
-from .builtin.dbscan import dbscan
-from .builtin.decisionTree import decisionTree
-from .builtin.discoverFD import discoverFD
-from .builtin.dist import dist
-from .builtin.getAccuracy import getAccuracy
-from .builtin.glm import glm
-from .builtin.gmm import gmm
-from .builtin.gmmPredict import gmmPredict
-from .builtin.gnmf import gnmf
-from .builtin.gridSearch import gridSearch
-from .builtin.hyperband import hyperband
-from .builtin.img_brightness import img_brightness
-from .builtin.img_crop import img_crop
-from .builtin.img_mirror import img_mirror
-from .builtin.imputeByFD import imputeByFD
-from .builtin.imputeByMean import imputeByMean
-from .builtin.imputeByMedian import imputeByMedian
-from .builtin.intersect import intersect
-from .builtin.kmeans import kmeans
-from .builtin.kmeansPredict import kmeansPredict
-from .builtin.knnbf import knnbf
-from .builtin.l2svm import l2svm
-from .builtin.lasso import lasso
-from .builtin.lm import lm
-from .builtin.lmCG import lmCG
-from .builtin.lmDS import lmDS
-from .builtin.logSumExp import logSumExp
-from .builtin.msvm import msvm
-from .builtin.msvmPredict import msvmPredict
-from .builtin.multiLogReg import multiLogReg
-from .builtin.multiLogRegPredict import multiLogRegPredict
-from .builtin.na_locf import na_locf
-from .builtin.naivebayes import naivebayes
-from .builtin.normalize import normalize
-from .builtin.outlier import outlier
-from .builtin.outlierByArima import outlierByArima
-from .builtin.outlierByIQR import outlierByIQR
-from .builtin.outlierBySd import outlierBySd
-from .builtin.pca import pca
-from .builtin.pnmf import pnmf
-from .builtin.ppca import ppca
-from .builtin.scale import scale
-from .builtin.scaleApply import scaleApply
-from .builtin.sigmoid import sigmoid
-from .builtin.slicefinder import slicefinder
-from .builtin.smote import smote
-from .builtin.split import split
-from .builtin.statsNA import statsNA
-from .builtin.steplm import steplm
-from .builtin.toOneHot import toOneHot
-from .builtin.univar import univar
-from .builtin.vectorToCsv import vectorToCsv
-from .builtin.winsorize import winsorize
-from .builtin.xdummy1 import xdummy1
-from .builtin.xdummy2 import xdummy2
+from .builtin import abstain
+from .builtin import als
+from .builtin import alsCG
+from .builtin import alsDS
+from .builtin import alsPredict
+from .builtin import alsTopkPredict
+from .builtin import arima
+from .builtin import bandit
+from .builtin import bivar
+from .builtin import components
+from .builtin import confusionMatrix
+from .builtin import cor
+from .builtin import cox
+from .builtin import cspline
+from .builtin import csplineDS
+from .builtin import cvlm
+from .builtin import dbscan
+from .builtin import decisionTree
+from .builtin import discoverFD
+from .builtin import dist
+from .builtin import gaussianClassifier
+from .builtin import getAccuracy
+from .builtin import glm
+from .builtin import gmm
+from .builtin import gmmPredict
+from .builtin import gnmf
+from .builtin import gridSearch
+from .builtin import hyperband
+from .builtin import img_brightness
+from .builtin import img_crop
+from .builtin import img_mirror
+from .builtin import imputeByFD
+from .builtin import imputeByMean
+from .builtin import imputeByMedian
+from .builtin import imputeByMode
+from .builtin import intersect
+from .builtin import km
+from .builtin import kmeans
+from .builtin import kmeansPredict
+from .builtin import knnbf
+from .builtin import l2svm
+from .builtin import l2svmPredict
+from .builtin import lasso
+from .builtin import lm
+from .builtin import lmCG
+from .builtin import lmDS
+from .builtin import logSumExp
+from .builtin import msvm
+from .builtin import msvmPredict
+from .builtin import multiLogReg
+from .builtin import multiLogRegPredict
+from .builtin import na_locf
+from .builtin import naivebayes
+from .builtin import normalize
+from .builtin import outlier
+from .builtin import outlierByArima
+from .builtin import outlierByIQR
+from .builtin import outlierBySd
+from .builtin import pca
+from .builtin import pnmf
+from .builtin import ppca
+from .builtin import randomForest
+from .builtin import scale
+from .builtin import scaleApply
+from .builtin import sherlock
+from .builtin import sherlockPredict
+from .builtin import sigmoid
+from .builtin import slicefinder
+from .builtin import smote
+from .builtin import split
+from .builtin import splitBalanced
+from .builtin import statsNA
+from .builtin import steplm
+from .builtin import toOneHot
+from .builtin import tomeklink
+from .builtin import univar
+from .builtin import vectorToCsv
+from .builtin import winsorize
+from .builtin import xdummy1
+from .builtin import xdummy2
-__all__ = [abstain, als, alsCG, alsDS, alsPredict, alsTopkPredict, arima,
bandit, bivar, components, confusionMatrix, cor, cspline, csplineDS, cvlm,
dbscan, decisionTree, discoverFD, dist, getAccuracy, glm, gmm, gmmPredict,
gnmf, gridSearch, hyperband, img_brightness, img_crop, img_mirror, imputeByFD,
imputeByMean, imputeByMedian, intersect, kmeans, kmeansPredict, knnbf, l2svm,
lasso, lm, lmCG, lmDS, logSumExp, msvm, msvmPredict, multiLogReg,
multiLogRegPredict, na_locf, naivebayes, norm [...]
+__all__ = [abstain, als, alsCG, alsDS, alsPredict, alsTopkPredict, arima,
bandit, bivar, components, confusionMatrix, cor, cox, cspline, csplineDS, cvlm,
dbscan, decisionTree, discoverFD, dist, gaussianClassifier, getAccuracy, glm,
gmm, gmmPredict, gnmf, gridSearch, hyperband, img_brightness, img_crop,
img_mirror, imputeByFD, imputeByMean, imputeByMedian, imputeByMode, intersect,
km, kmeans, kmeansPredict, knnbf, l2svm, l2svmPredict, lasso, lm, lmCG, lmDS,
logSumExp, msvm, msvmPredict, m [...]
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/imputeByMean.py
b/src/main/python/systemds/operator/algorithm/builtin/__init__.py
similarity index 62%
copy from src/main/python/systemds/operator/algorithm/builtin/imputeByMean.py
copy to src/main/python/systemds/operator/algorithm/builtin/__init__.py
index 393bbc8..e66abb4 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/imputeByMean.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/__init__.py
@@ -18,22 +18,3 @@
# under the License.
#
# -------------------------------------------------------------
-
-# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/imputeByMean.dml
-
-from typing import Dict, Iterable
-
-from systemds.operator import OperationNode
-from systemds.script_building.dag import OutputType
-from systemds.utils.consts import VALID_INPUT_TYPES
-
-def imputeByMean(X: OperationNode) -> OperationNode:
-
-
- X._check_matrix_op()
- params_dict = {'X':X}
- return OperationNode(X.sds_context, 'imputeByMean',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
-
-
-
\ No newline at end of file
diff --git a/src/main/python/systemds/operator/algorithm/builtin/bandit.py
b/src/main/python/systemds/operator/algorithm/builtin/bandit.py
index b2a4d0b..8c7a475 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/bandit.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/bandit.py
@@ -28,17 +28,14 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def bandit(X_train: OperationNode, Y_train: OperationNode, X_val:
OperationNode, Y_val: OperationNode, mask: OperationNode, schema:
OperationNode, lp: OperationNode, primitives: OperationNode, param:
OperationNode, isWeighted: bool, **kwargs: Dict[str, VALID_INPUT_TYPES]) ->
OperationNode:
+def bandit(X_train: OperationNode, Y_train: OperationNode, metaList: Iterable,
targetList: Iterable, lp: OperationNode, primitives: OperationNode, param:
OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
X_train._check_matrix_op()
Y_train._check_matrix_op()
- X_val._check_matrix_op()
- Y_val._check_matrix_op()
- mask._check_matrix_op()
- params_dict = {'X_train':X_train, 'Y_train':Y_train, 'X_val':X_val,
'Y_val':Y_val, 'mask':mask, 'schema':schema, 'lp':lp, 'primitives':primitives,
'param':param, 'isWeighted':isWeighted}
+ params_dict = {'X_train':X_train, 'Y_train':Y_train, 'metaList':metaList,
'targetList':targetList, 'lp':lp, 'primitives':primitives, 'param':param}
params_dict.update(kwargs)
- return OperationNode(X_train.sds_context, 'bandit',
named_input_nodes=params_dict, output_type=OutputType.LIST,
number_of_outputs=3, output_types=[OutputType.FRAME, OutputType.MATRIX,
OutputType.MATRIX])
+ return OperationNode(X_train.sds_context, 'bandit',
named_input_nodes=params_dict, output_type=OutputType.LIST,
number_of_outputs=4, output_types=[OutputType.FRAME, OutputType.MATRIX,
OutputType.MATRIX, OutputType.FRAME])
\ No newline at end of file
diff --git a/src/main/python/systemds/operator/algorithm/builtin/cox.py
b/src/main/python/systemds/operator/algorithm/builtin/cox.py
new file mode 100644
index 0000000..fa2b2aa
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/cox.py
@@ -0,0 +1,59 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/cox.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode
+from systemds.script_building.dag import OutputType
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+def cox(X: OperationNode, TE: OperationNode, F: OperationNode, R:
OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
+ """
+ :param X: Location to read the input matrix X containing the survival data
+ :param containing: information
+ :param TE: Column indices of X as a column vector which contain timestamp
+ :param F: Column indices of X as a column vector which are to be used for
+ :param fitting: model
+ :param R: If factors (categorical variables) are available in the input
matrix
+ :param the: X
+ :param each: needs to be removed from X; in this case the start
+ :param and: corresponding to the baseline level need to be the same;
+ :param if: not provided by default all variables are considered to be
continuous
+ :param alpha: Parameter to compute a 100*(1-alpha)% confidence interval
for the betas
+ :param tol: Tolerance ("epsilon")
+ :param moi: Max. number of outer (Newton) iterations
+ :param mii: Max. number of inner (conjugate gradient) iterations, 0 = no
max
+ :return: 'OperationNode' containing a summary of some statistics of the
fitted model: & matrix rt that contains the order-preserving recoded timestamps
from x & which is matrix x with sorted timestamps & matrix mf that contains the
column indices of x with the baseline factors removed (if available)
+ """
+
+ X._check_matrix_op()
+ TE._check_matrix_op()
+ F._check_matrix_op()
+ R._check_matrix_op()
+ params_dict = {'X':X, 'TE':TE, 'F':F, 'R':R}
+ params_dict.update(kwargs)
+ return OperationNode(X.sds_context, 'cox', named_input_nodes=params_dict,
output_type=OutputType.LIST, number_of_outputs=6,
output_types=[OutputType.MATRIX, OutputType.MATRIX, OutputType.MATRIX,
OutputType.MATRIX, OutputType.MATRIX, OutputType.MATRIX])
+
+
+
\ No newline at end of file
diff --git a/src/main/python/systemds/operator/algorithm/builtin/smote.py
b/src/main/python/systemds/operator/algorithm/builtin/gaussianClassifier.py
similarity index 64%
copy from src/main/python/systemds/operator/algorithm/builtin/smote.py
copy to
src/main/python/systemds/operator/algorithm/builtin/gaussianClassifier.py
index 7b1f343..f371f8c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/smote.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/gaussianClassifier.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/smote.dml
+# Autogenerated From : scripts/builtin/gaussianClassifier.dml
from typing import Dict, Iterable
@@ -28,13 +28,18 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def smote(X: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) ->
OperationNode:
+def gaussianClassifier(D: OperationNode, C: OperationNode, **kwargs: Dict[str,
VALID_INPUT_TYPES]) -> OperationNode:
+ """
+ :param varSmoothing: Smoothing factor for variances
+ :param verbose: Print accuracy of the training set
+ :return: 'OperationNode' containing
+ """
-
- X._check_matrix_op()
- params_dict = {'X':X}
+ D._check_matrix_op()
+ C._check_matrix_op()
+ params_dict = {'D':D, 'C':C}
params_dict.update(kwargs)
- return OperationNode(X.sds_context, 'smote',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
+ return OperationNode(D.sds_context, 'gaussianClassifier',
named_input_nodes=params_dict, output_type=OutputType.LIST,
number_of_outputs=4, output_types=[OutputType.MATRIX, OutputType.MATRIX,
OutputType.LIST, OutputType.MATRIX])
\ No newline at end of file
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/imputeByMean.py
b/src/main/python/systemds/operator/algorithm/builtin/imputeByMean.py
index 393bbc8..e7ce9c0 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/imputeByMean.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/imputeByMean.py
@@ -28,11 +28,12 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def imputeByMean(X: OperationNode) -> OperationNode:
+def imputeByMean(X: OperationNode, mask: OperationNode) -> OperationNode:
X._check_matrix_op()
- params_dict = {'X':X}
+ mask._check_matrix_op()
+ params_dict = {'X':X, 'mask':mask}
return OperationNode(X.sds_context, 'imputeByMean',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/imputeByMedian.py
b/src/main/python/systemds/operator/algorithm/builtin/imputeByMedian.py
index cdefcee..ca049e7 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/imputeByMedian.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/imputeByMedian.py
@@ -28,11 +28,12 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def imputeByMedian(X: OperationNode) -> OperationNode:
+def imputeByMedian(X: OperationNode, mask: OperationNode) -> OperationNode:
X._check_matrix_op()
- params_dict = {'X':X}
+ mask._check_matrix_op()
+ params_dict = {'X':X, 'mask':mask}
return OperationNode(X.sds_context, 'imputeByMedian',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/imputeByMean.py
b/src/main/python/systemds/operator/algorithm/builtin/imputeByMode.py
similarity index 87%
copy from src/main/python/systemds/operator/algorithm/builtin/imputeByMean.py
copy to src/main/python/systemds/operator/algorithm/builtin/imputeByMode.py
index 393bbc8..748e017 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/imputeByMean.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/imputeByMode.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/imputeByMean.dml
+# Autogenerated From : scripts/builtin/imputeByMode.dml
from typing import Dict, Iterable
@@ -28,12 +28,12 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def imputeByMean(X: OperationNode) -> OperationNode:
+def imputeByMode(X: OperationNode) -> OperationNode:
X._check_matrix_op()
params_dict = {'X':X}
- return OperationNode(X.sds_context, 'imputeByMean',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
+ return OperationNode(X.sds_context, 'imputeByMode',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
\ No newline at end of file
diff --git a/src/main/python/systemds/operator/algorithm/builtin/km.py
b/src/main/python/systemds/operator/algorithm/builtin/km.py
new file mode 100644
index 0000000..bdace08
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/km.py
@@ -0,0 +1,58 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/km.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode
+from systemds.script_building.dag import OutputType
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+def km(X: OperationNode, TE: OperationNode, GI: OperationNode, SI:
OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
+ """
+ :param X: Input matrix X containing the survival data:
+ :param number: (categorical features) for grouping and/or stratifying
+ :param TE: Column indices of X which contain timestamps (first entry) and
event
+ :param GI: Column indices of X corresponding to the factors to be used for
grouping
+ :param SI: Column indices of X corresponding to the factors to be used for
stratifying
+ :param alpha: Parameter to compute 100*(1-alpha)% confidence intervals for
the survivor
+ :param function: median
+ :param err_type: Parameter to specify the error type according to
"greenwood" (the default) or "peto"
+ :param conf_type: Parameter to modify the confidence interval; "plain"
keeps the lower and
+ :param upper: the confidence interval unmodified, "log" (the default)
+ :param corresponds: transformation and "log-log" corresponds to the
+ :param test_type: If survival data for multiple groups is available
specifies which test to
+ :param perform: survival data across multiple groups: "none" (the default)
+ :return: 'OperationNode' containing 7 consecutive columns in km
corresponds to a unique combination of groups and strata in the data & schema
& whose dimension depends on the number of groups (g) and strata (s) in the
data (k denotes the number & for grouping ,i.e., ncol(gi) and l denotes the
number of factors used for stratifying, i.e., ncol(si)) & of groups and strata
is equal to 1, m will have 4 columns with & data from multiple groups
available and ttype=log-rank or wilcoxon, [...]
+ """
+
+ X._check_matrix_op()
+ TE._check_matrix_op()
+ GI._check_matrix_op()
+ SI._check_matrix_op()
+ params_dict = {'X':X, 'TE':TE, 'GI':GI, 'SI':SI}
+ params_dict.update(kwargs)
+ return OperationNode(X.sds_context, 'km', named_input_nodes=params_dict,
output_type=OutputType.LIST, number_of_outputs=4,
output_types=[OutputType.MATRIX, OutputType.MATRIX, OutputType.MATRIX,
OutputType.MATRIX])
+
+
+
\ No newline at end of file
diff --git a/src/main/python/systemds/operator/algorithm/builtin/smote.py
b/src/main/python/systemds/operator/algorithm/builtin/l2svmPredict.py
similarity index 64%
copy from src/main/python/systemds/operator/algorithm/builtin/smote.py
copy to src/main/python/systemds/operator/algorithm/builtin/l2svmPredict.py
index 7b1f343..fb8ef7d 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/smote.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/l2svmPredict.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/smote.dml
+# Autogenerated From : scripts/builtin/l2svmPredict.dml
from typing import Dict, Iterable
@@ -28,13 +28,19 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def smote(X: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) ->
OperationNode:
-
+def l2svmPredict(X: OperationNode, W: OperationNode, **kwargs: Dict[str,
VALID_INPUT_TYPES]) -> OperationNode:
+ """
+ :param X: matrix X of feature vectors to classify
+ :param W: matrix of the trained variables
+ :param verbose: Set to true if one wants print statements.
+ :return: 'OperationNode' containing classification labels maxed to ones
and zeros.
+ """
X._check_matrix_op()
- params_dict = {'X':X}
+ W._check_matrix_op()
+ params_dict = {'X':X, 'W':W}
params_dict.update(kwargs)
- return OperationNode(X.sds_context, 'smote',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
+ return OperationNode(X.sds_context, 'l2svmPredict',
named_input_nodes=params_dict, output_type=OutputType.LIST,
number_of_outputs=2, output_types=[OutputType.MATRIX, OutputType.MATRIX])
\ No newline at end of file
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
b/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
new file mode 100644
index 0000000..41e2b81
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
@@ -0,0 +1,59 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/randomForest.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode
+from systemds.script_building.dag import OutputType
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+def randomForest(X: OperationNode, Y: OperationNode, R: OperationNode,
**kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
+ """
+ :param X: Feature matrix X; note that X needs to be both recoded and dummy
coded
+ :param Y: Label matrix Y; note that Y needs to be both recoded and dummy
coded
+ :param R: " Matrix which for each feature in X contains the
following information
+ :param If: not provided by default all variables are assumed to be scale
+ :param bins: Number of equiheight bins per scale feature to choose
thresholds
+ :param depth: Maximum depth of the learned tree
+ :param num_leaf: Number of samples when splitting stops and a leaf node is
added
+ :param num_samples: Number of samples at which point we switch to
in-memory subtree building
+ :param num_trees: Number of trees to be learned in the random forest model
+ :param subsamp_rate: Parameter controlling the size of each tree in the
forest; samples are selected from a
+ :param Poisson: parameter subsamp_rate (the default value is 1.0)
+ :param feature_subset: Parameter that controls the number of feature used
as candidates for splitting at each tree node
+ :param as: of number of features in the dataset;
+ :param by: root of features (i.e., feature_subset = 0.5) are used at each
tree node
+ :param impurity: Impurity measure: entropy or Gini (the default)
+ :return: 'OperationNode' containing tree and each row contains the
following information: & that leaf node j is supposed to predict & 7,8,... if j
is categorical & chosen for j is categorical rows 7,8,... depict the value
subset chosen for j & c containing the number of times samples are chosen in
each tree of the random forest & from scale feature ids to global feature ids &
from categorical feature ids to global feature ids
+ """
+
+ X._check_matrix_op()
+ Y._check_matrix_op()
+ R._check_matrix_op()
+ params_dict = {'X':X, 'Y':Y, 'R':R}
+ params_dict.update(kwargs)
+ return OperationNode(X.sds_context, 'randomForest',
named_input_nodes=params_dict, output_type=OutputType.LIST,
number_of_outputs=4, output_types=[OutputType.MATRIX, OutputType.MATRIX,
OutputType.MATRIX, OutputType.MATRIX])
+
+
+
\ No newline at end of file
diff --git a/src/main/python/systemds/operator/algorithm/builtin/bandit.py
b/src/main/python/systemds/operator/algorithm/builtin/sherlock.py
similarity index 56%
copy from src/main/python/systemds/operator/algorithm/builtin/bandit.py
copy to src/main/python/systemds/operator/algorithm/builtin/sherlock.py
index b2a4d0b..687aac1 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/bandit.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/sherlock.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/bandit.dml
+# Autogenerated From : scripts/builtin/sherlock.dml
from typing import Dict, Iterable
@@ -28,17 +28,13 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def bandit(X_train: OperationNode, Y_train: OperationNode, X_val:
OperationNode, Y_val: OperationNode, mask: OperationNode, schema:
OperationNode, lp: OperationNode, primitives: OperationNode, param:
OperationNode, isWeighted: bool, **kwargs: Dict[str, VALID_INPUT_TYPES]) ->
OperationNode:
+def sherlock(X_train: OperationNode, y_train: OperationNode) -> OperationNode:
X_train._check_matrix_op()
- Y_train._check_matrix_op()
- X_val._check_matrix_op()
- Y_val._check_matrix_op()
- mask._check_matrix_op()
- params_dict = {'X_train':X_train, 'Y_train':Y_train, 'X_val':X_val,
'Y_val':Y_val, 'mask':mask, 'schema':schema, 'lp':lp, 'primitives':primitives,
'param':param, 'isWeighted':isWeighted}
- params_dict.update(kwargs)
- return OperationNode(X_train.sds_context, 'bandit',
named_input_nodes=params_dict, output_type=OutputType.LIST,
number_of_outputs=3, output_types=[OutputType.FRAME, OutputType.MATRIX,
OutputType.MATRIX])
+ y_train._check_matrix_op()
+ params_dict = {'X_train':X_train, 'y_train':y_train}
+ return OperationNode(X_train.sds_context, 'sherlock',
named_input_nodes=params_dict, output_type=OutputType.LIST,
number_of_outputs=30, output_types=[OutputType.MATRIX, OutputType.MATRIX,
OutputType.MATRIX, OutputType.MATRIX, OutputType.MATRIX, OutputType.MATRIX,
OutputType.MATRIX, OutputType.MATRIX, OutputType.MATRIX, OutputType.MATRIX,
OutputType.MATRIX, OutputType.MATRIX, OutputType.MATRIX, OutputType.MATRIX,
OutputType.MATRIX, OutputType.MATRIX, OutputType.MATRIX, OutputType.MATR [...]
\ No newline at end of file
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/sherlockPredict.py
b/src/main/python/systemds/operator/algorithm/builtin/sherlockPredict.py
new file mode 100644
index 0000000..6a61000
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/sherlockPredict.py
@@ -0,0 +1,69 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/sherlockPredict.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode
+from systemds.script_building.dag import OutputType
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+def sherlockPredict(X: OperationNode, cW1: OperationNode, cb1: OperationNode,
cW2: OperationNode, cb2: OperationNode, cW3: OperationNode, cb3: OperationNode,
wW1: OperationNode, wb1: OperationNode, wW2: OperationNode, wb2: OperationNode,
wW3: OperationNode, wb3: OperationNode, pW1: OperationNode, pb1: OperationNode,
pW2: OperationNode, pb2: OperationNode, pW3: OperationNode, pb3: OperationNode,
sW1: OperationNode, sb1: OperationNode, sW2: OperationNode, sb2: OperationNode,
sW3: Operation [...]
+
+
+ X._check_matrix_op()
+ cW1._check_matrix_op()
+ cb1._check_matrix_op()
+ cW2._check_matrix_op()
+ cb2._check_matrix_op()
+ cW3._check_matrix_op()
+ cb3._check_matrix_op()
+ wW1._check_matrix_op()
+ wb1._check_matrix_op()
+ wW2._check_matrix_op()
+ wb2._check_matrix_op()
+ wW3._check_matrix_op()
+ wb3._check_matrix_op()
+ pW1._check_matrix_op()
+ pb1._check_matrix_op()
+ pW2._check_matrix_op()
+ pb2._check_matrix_op()
+ pW3._check_matrix_op()
+ pb3._check_matrix_op()
+ sW1._check_matrix_op()
+ sb1._check_matrix_op()
+ sW2._check_matrix_op()
+ sb2._check_matrix_op()
+ sW3._check_matrix_op()
+ sb3._check_matrix_op()
+ fW1._check_matrix_op()
+ fb1._check_matrix_op()
+ fW2._check_matrix_op()
+ fb2._check_matrix_op()
+ fW3._check_matrix_op()
+ fb3._check_matrix_op()
+ params_dict = {'X':X, 'cW1':cW1, 'cb1':cb1, 'cW2':cW2, 'cb2':cb2,
'cW3':cW3, 'cb3':cb3, 'wW1':wW1, 'wb1':wb1, 'wW2':wW2, 'wb2':wb2, 'wW3':wW3,
'wb3':wb3, 'pW1':pW1, 'pb1':pb1, 'pW2':pW2, 'pb2':pb2, 'pW3':pW3, 'pb3':pb3,
'sW1':sW1, 'sb1':sb1, 'sW2':sW2, 'sb2':sb2, 'sW3':sW3, 'sb3':sb3, 'fW1':fW1,
'fb1':fb1, 'fW2':fW2, 'fb2':fb2, 'fW3':fW3, 'fb3':fb3}
+ return OperationNode(X.sds_context, 'sherlockPredict',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
+
+
+
\ No newline at end of file
diff --git a/src/main/python/systemds/operator/algorithm/builtin/smote.py
b/src/main/python/systemds/operator/algorithm/builtin/smote.py
index 7b1f343..189dc0a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/smote.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/smote.py
@@ -28,11 +28,12 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def smote(X: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) ->
OperationNode:
+def smote(X: OperationNode, mask: OperationNode, **kwargs: Dict[str,
VALID_INPUT_TYPES]) -> OperationNode:
X._check_matrix_op()
- params_dict = {'X':X}
+ mask._check_matrix_op()
+ params_dict = {'X':X, 'mask':mask}
params_dict.update(kwargs)
return OperationNode(X.sds_context, 'smote',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/imputeByMedian.py
b/src/main/python/systemds/operator/algorithm/builtin/splitBalanced.py
similarity index 70%
copy from src/main/python/systemds/operator/algorithm/builtin/imputeByMedian.py
copy to src/main/python/systemds/operator/algorithm/builtin/splitBalanced.py
index cdefcee..6c45061 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/imputeByMedian.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/splitBalanced.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/imputeByMedian.dml
+# Autogenerated From : scripts/builtin/splitBalanced.dml
from typing import Dict, Iterable
@@ -28,12 +28,13 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def imputeByMedian(X: OperationNode) -> OperationNode:
+def splitBalanced(X: OperationNode, Y: OperationNode, splitRatio: float,
verbose: bool) -> OperationNode:
X._check_matrix_op()
- params_dict = {'X':X}
- return OperationNode(X.sds_context, 'imputeByMedian',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
+ Y._check_matrix_op()
+ params_dict = {'X':X, 'Y':Y, 'splitRatio':splitRatio, 'verbose':verbose}
+ return OperationNode(X.sds_context, 'splitBalanced',
named_input_nodes=params_dict, output_type=OutputType.LIST,
number_of_outputs=4, output_types=[OutputType.MATRIX, OutputType.MATRIX,
OutputType.MATRIX, OutputType.MATRIX])
\ No newline at end of file
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/imputeByMedian.py
b/src/main/python/systemds/operator/algorithm/builtin/tomeklink.py
similarity index 70%
copy from src/main/python/systemds/operator/algorithm/builtin/imputeByMedian.py
copy to src/main/python/systemds/operator/algorithm/builtin/tomeklink.py
index cdefcee..9ad0aee 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/imputeByMedian.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/tomeklink.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/imputeByMedian.dml
+# Autogenerated From : scripts/builtin/tomeklink.dml
from typing import Dict, Iterable
@@ -28,12 +28,17 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def imputeByMedian(X: OperationNode) -> OperationNode:
-
+def tomeklink(X: OperationNode, y: OperationNode) -> OperationNode:
+ """
+ :param X: Data Matrix (nxm)
+ :param y: Label Matrix (nx1)
+ :return: 'OperationNode' containing
+ """
X._check_matrix_op()
- params_dict = {'X':X}
- return OperationNode(X.sds_context, 'imputeByMedian',
named_input_nodes=params_dict, output_type=OutputType.MATRIX)
+ y._check_matrix_op()
+ params_dict = {'X':X, 'y':y}
+ return OperationNode(X.sds_context, 'tomeklink',
named_input_nodes=params_dict, output_type=OutputType.LIST,
number_of_outputs=3, output_types=[OutputType.MATRIX, OutputType.MATRIX,
OutputType.MATRIX])
\ No newline at end of file
diff --git a/src/main/python/systemds/operator/algorithm/builtin/vectorToCsv.py
b/src/main/python/systemds/operator/algorithm/builtin/vectorToCsv.py
index 140787f..7e2f702 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/vectorToCsv.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/vectorToCsv.py
@@ -28,12 +28,12 @@ from systemds.operator import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def vectorToCsv(vector: OperationNode) -> OperationNode:
+def vectorToCsv(mask: OperationNode) -> OperationNode:
- vector._check_matrix_op()
- params_dict = {'vector':vector}
- return OperationNode(vector.sds_context, 'vectorToCsv',
named_input_nodes=params_dict, output_type=OutputType.STRING)
+ mask._check_matrix_op()
+ params_dict = {'mask':mask}
+ return OperationNode(mask.sds_context, 'vectorToCsv',
named_input_nodes=params_dict, output_type=OutputType.STRING)
\ No newline at end of file