This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 7d354ac0f63b2156b099c14f6bf99658c690b650 Author: baunsgaard <[email protected]> AuthorDate: Tue Sep 14 14:37:49 2021 +0200 [SYSTEMDS-3130] Remove old algorithm definitions in PythonAPI --- src/main/python/systemds/operator/algorithm.py | 198 ------------------------- 1 file changed, 198 deletions(-) diff --git a/src/main/python/systemds/operator/algorithm.py b/src/main/python/systemds/operator/algorithm.py deleted file mode 100644 index 7833030..0000000 --- a/src/main/python/systemds/operator/algorithm.py +++ /dev/null @@ -1,198 +0,0 @@ -# ------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# ------------------------------------------------------------- - -from typing import Dict - -from systemds.operator import OperationNode -from systemds.script_building.dag import OutputType -from systemds.utils.consts import VALID_INPUT_TYPES - -__all__ = ['l2svm', 'lm', 'kmeans', 'pca', 'multiLogReg', 'multiLogRegPredict'] - - -def l2svm(x: OperationNode, y: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: - """ - Perform L2SVM on matrix with labels given. - - :param x: Input dataset - :param y: Input labels in shape of one column - :param kwargs: Dictionary of extra arguments - :return: `OperationNode` containing the model fit. - """ - x._check_matrix_op() - params_dict = {'X': x, 'Y': y} - params_dict.update(kwargs) - return OperationNode(x.sds_context, 'l2svm', named_input_nodes=params_dict) - - -def lm(x: OperationNode, y: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: - """ - Performs LM on matrix with labels given. - - :param x: Input dataset - :param y: Input labels in shape of one column - :param kwargs: Dictionary of extra arguments - :return: `OperationNode` containing the model fit. - """ - - if x.shape[0] == 0: - raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." - .format(s=x.shape)) - if y.shape[0] == 0: - raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." - .format(s=y.shape)) - - params_dict = {'X': x, 'y': y} - params_dict.update(kwargs) - return OperationNode(x.sds_context, 'lm', named_input_nodes=params_dict) - - -def kmeans(x: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: - """ - Performs KMeans on matrix input. - - :param x: Input dataset to perform K-Means on. - :param k: The number of centroids to use for the algorithm. - :param runs: The number of concurrent instances of K-Means to run (with different initial centroids). - :param max_iter: The maximum number of iterations to run the K-Means algorithm for. - :param eps: Tolerance for the algorithm to declare convergence using WCSS change ratio. - :param is_verbose: Boolean flag if the algorithm should be run in a verbose manner. - :param avg_sample_size_per_centroid: The average number of records per centroid in the data samples. - :return: `OperationNode` List containing two outputs 1. the clusters, 2 the cluster ID associated with each row in x. - """ - - x._check_matrix_op() - if x.shape[0] == 0: - raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." - .format(s=x.shape)) - - if 'k' in kwargs.keys() and kwargs.get('k') < 1: - raise ValueError( - "Invalid number of clusters in K-Means, number must be integer above 0") - - params_dict = {'X': x} - params_dict.update(kwargs) - return OperationNode(x.sds_context, 'kmeans', named_input_nodes=params_dict, output_type=OutputType.LIST, number_of_outputs=2) - -def kmeansPredict(X: OperationNode, C: OperationNode) -> OperationNode: - """ - Perform Kmeans Predict, note that the Ids returned are 1 indexed. - - :param X: The matrix to classify. - :param Y: The Clusters to use for classification into. - :return: `OperationNode` containing a matrix of classifications of Id's of specific clusters in C. - """ - X._check_matrix_op() - C._check_matrix_op() - - params_dict = {'X' : X, 'C' : C} - return OperationNode(X.sds_context, 'kmeansPredict', named_input_nodes=params_dict, output_type=OutputType.MATRIX, shape=(1, X.shape[0])) - - - -def pca(x: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: - """ - Performs PCA on the matrix input - - :param x: Input dataset to perform Principal Componenet Analysis (PCA) on. - :param K: The number of reduced dimensions. - :param center: Boolean specifying if the input values should be centered. - :param scale: Boolean specifying if the input values should be scaled. - :return: `OperationNode` List containing two outputs 1. The dimensionality reduced X input, 2. A matrix to reduce dimensionality similarly on unseen data. - """ - - x._check_matrix_op() - if x.shape[0] == 0: - raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." - .format(s=x.shape)) - - if 'K' in kwargs.keys() and kwargs.get('K') < 1: - raise ValueError( - "Invalid number of dimensions in PCA, number must be integer above 0") - - params_dict = {'X': x} - params_dict.update(kwargs) - return OperationNode(x.sds_context, 'pca', named_input_nodes=params_dict, output_type=OutputType.LIST, number_of_outputs=2) - - -def multiLogReg(x: OperationNode, y: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: - """ - Performs Multiclass Logistic Regression on the matrix input - using Trust Region method. - - See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650) - - :param x: Input dataset to perform logstic regression on - :param y: Labels rowaligned with the input dataset - :param icpt: Intercept, default 2, Intercept presence, shifting and rescaling X columns: - 0 = no intercept, no shifting, no rescaling; - 1 = add intercept, but neither shift nor rescale X; - 2 = add intercept, shift & rescale X columns to mean = 0, variance = 1 - :param tol: float tolerance for the algorithm. - :param reg: Regularization parameter (lambda = 1/C); intercept settings are not regularized. - :param maxi: Maximum outer iterations of the algorithm - :param maxii: Maximum inner iterations of the algorithm - :return: `OperationNode` of a matrix containing the regression parameters trained. - """ - - if x.shape[0] == 0: - raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." - .format(s=x.shape)) - if y.shape[0] == 0: - raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." - .format(s=y.shape)) - if -1 in x.shape: - output_shape = (-1,) - else: - output_shape = (x.shape[1],) - - params_dict = {'X': x, 'Y': y} - params_dict.update(kwargs) - return OperationNode(x.sds_context, 'multiLogReg', named_input_nodes=params_dict, shape = output_shape) - - -def multiLogRegPredict(x: OperationNode, b: OperationNode, y: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: - """ - Performs prediction on input data x using the model trained, b. - - :param x: The data to perform classification on. - :param b: The regression parameters trained from multiLogReg. - :param y: The Labels expected to be contained in the X dataset, to calculate accuracy. - :param verbose: Boolean specifying if the prediction should be verbose. - :return: `OperationNode` List containing three outputs. - 1. The predicted means / probabilities - 2. The predicted response vector - 3. The scalar value of accuracy - """ - - if x.shape[0] == 0: - raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." - .format(s=x.shape)) - if b.shape[0] == 0: - raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." - .format(s=y.shape)) - if y.shape[0] == 0: - raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." - .format(s=y.shape)) - - params_dict = {'X': x, 'B': b, 'Y': y} - params_dict.update(kwargs) - return OperationNode(x.sds_context, 'multiLogRegPredict', named_input_nodes=params_dict, output_type=OutputType.LIST, number_of_outputs=3, output_types=[OutputType.MATRIX,OutputType.MATRIX,OutputType.DOUBLE])
