This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 113aecc7a6bb8b59e5fffa296bf60dae686a2e43 Author: Sebastian Baunsgaard <[email protected]> AuthorDate: Thu Oct 19 12:06:25 2023 +0200 [MINOR] Python generate API This commit generates the Python API and fixes an edge case where there are no returns in the method, such as differenceStatistics. This method now returns an operation node that can be used just like a print statements operation node. --- src/main/python/create_python_dist.py | 6 +-- .../source/code/guide/algorithms/FullScript.py | 2 +- .../docs/source/code/guide/end_to_end/part1.py | 2 +- .../python/docs/source/guide/algorithms_basics.rst | 4 +- src/main/python/generator/dml_parser.py | 28 +++++----- src/main/python/generator/generator.py | 14 ++++- .../python/systemds/operator/algorithm/__init__.py | 20 ++++++- .../operator/algorithm/builtin/csplineCG.py | 2 +- .../systemds/operator/algorithm/builtin/dbscan.py | 16 +++--- .../{scaleMinMax.py => differenceStatistics.py} | 22 ++++---- ...malizeApply.py => img_brightness_linearized.py} | 27 +++++----- .../{scaleMinMax.py => img_crop_linearized.py} | 28 +++++++--- ...{lmPredictStats.py => img_cutout_linearized.py} | 35 +++++++----- .../{scaleMinMax.py => img_invert_linearized.py} | 18 ++++--- ...{lmPredictStats.py => img_mirror_linearized.py} | 30 ++++++----- ...{scaleMinMax.py => img_posterize_linearized.py} | 20 ++++--- .../algorithm/builtin/img_transform_linearized.py | 62 ++++++++++++++++++++++ .../algorithm/builtin/img_translate_linearized.py | 60 +++++++++++++++++++++ .../systemds/operator/algorithm/builtin/lm.py | 7 +-- .../systemds/operator/algorithm/builtin/lmCG.py | 4 +- .../systemds/operator/algorithm/builtin/lmDS.py | 4 +- .../operator/algorithm/builtin/lmPredictStats.py | 8 +-- .../algorithm/builtin/multiLogRegPredict.py | 3 +- .../operator/algorithm/builtin/normalizeApply.py | 4 +- .../operator/algorithm/builtin/scaleMinMax.py | 2 + .../python/tests/algorithms/test_multiLogReg.py | 2 +- .../python/tests/examples/tutorials/test_adult.py | 2 +- .../python/tests/examples/tutorials/test_mnist.py | 4 +- .../python/tests/federated/test_federated_mnist.py | 2 +- .../tests/manual_tests/multi_log_reg_mnist.py | 2 +- 30 files changed, 310 insertions(+), 130 deletions(-) diff --git a/src/main/python/create_python_dist.py b/src/main/python/create_python_dist.py index 4718881a36..f02578fa3a 100755 --- a/src/main/python/create_python_dist.py +++ b/src/main/python/create_python_dist.py @@ -23,6 +23,6 @@ import subprocess f = open("generator.log","w") -subprocess.run("python generator/generator.py",shell=True, check=True, stdout =f, stderr=f) -subprocess.run("python pre_setup.py",shell=True, check=True) -subprocess.run("python setup.py sdist bdist_wheel",shell=True, check=True) +subprocess.run("python3 generator/generator.py",shell=True, check=True, stdout =f, stderr=f) +subprocess.run("python3 pre_setup.py",shell=True, check=True) +subprocess.run("python3 setup.py sdist bdist_wheel",shell=True, check=True) diff --git a/src/main/python/docs/source/code/guide/algorithms/FullScript.py b/src/main/python/docs/source/code/guide/algorithms/FullScript.py index 0340886175..e8cd82cc1f 100644 --- a/src/main/python/docs/source/code/guide/algorithms/FullScript.py +++ b/src/main/python/docs/source/code/guide/algorithms/FullScript.py @@ -39,6 +39,6 @@ with SystemDSContext() as sds: # Test data Xt_ds = sds.from_numpy(Xt) Yt_ds = sds.from_numpy(Yt) + 1.0 - [m, y_pred, acc] = multiLogRegPredict(Xt_ds, bias, Yt_ds, verbose=False).compute() + [m, y_pred, acc] = multiLogRegPredict(Xt_ds, bias, Y=Yt_ds, verbose=False).compute() logging.info(acc) diff --git a/src/main/python/docs/source/code/guide/end_to_end/part1.py b/src/main/python/docs/source/code/guide/end_to_end/part1.py index 4b45679049..55ce7eca13 100644 --- a/src/main/python/docs/source/code/guide/end_to_end/part1.py +++ b/src/main/python/docs/source/code/guide/end_to_end/part1.py @@ -54,7 +54,7 @@ with SystemDSContext() as sds: betas = multiLogReg(X, Y, verbose=False) # Apply model - [_, y_pred, acc] = multiLogRegPredict(Xt, betas, Yt) + [_, y_pred, acc] = multiLogRegPredict(Xt, betas, Y=Yt) # Confusion Matrix confusion_matrix_abs, _ = confusionMatrix(y_pred, Yt).compute() diff --git a/src/main/python/docs/source/guide/algorithms_basics.rst b/src/main/python/docs/source/guide/algorithms_basics.rst index 6c25b8b39d..7206605222 100644 --- a/src/main/python/docs/source/guide/algorithms_basics.rst +++ b/src/main/python/docs/source/guide/algorithms_basics.rst @@ -134,7 +134,7 @@ Finally we verify the accuracy by calling .. code-block:: python from systemds.operator.algorithm import multiLogRegPredict - [m, y_pred, acc] = multiLogRegPredict(Xt, bias, Yt).compute() + [m, y_pred, acc] = multiLogRegPredict(Xt, bias, Y=Yt).compute() print(acc) There are three outputs from the multiLogRegPredict call. @@ -158,7 +158,7 @@ To see how our accuracy is on the training data we use the Predict function agai .. code-block:: python - [m, y_pred, acc] = multiLogRegPredict(X_ds, bias, Y_ds).compute() + [m, y_pred, acc] = multiLogRegPredict(X_ds, bias, Y=Y_ds).compute() print(acc) In this specific case we achieve 100% accuracy on the training data, indicating that we have fit the training data, diff --git a/src/main/python/generator/dml_parser.py b/src/main/python/generator/dml_parser.py index 017abf78b3..2abffb021f 100644 --- a/src/main/python/generator/dml_parser.py +++ b/src/main/python/generator/dml_parser.py @@ -66,28 +66,26 @@ class FunctionParser(object): # if match: func_split = function_definition.split("function")[1].split("return") - param_str, retval_str = self.extract_param_str( - func_split[0]), self.extract_param_str(func_split[1]) + + param_str = self.extract_param_str(func_split[0]) + retval_str = None + if(len(func_split)> 1): + retval_str = self.extract_param_str(func_split[1]) + if param_str: parameters = self.get_parameters(param_str) return_values = self.get_parameters(retval_str) data = {'function_name': function_name, 'parameters': parameters, 'return_values': return_values} - if parameters and return_values: + if parameters: return data else: - raise AttributeError("Unable to match to function definition:\n" + function_definition + - "\n parameter_str: " + param_str + "\n retVal: " + retval_str) + raise AttributeError("Unable to match to function definition:\n" + function_definition + + "\n parameter_str: " + param_str + "\n retVal: " + retval_str) else: - raise AttributeError("Unable to match to function definition:\n" + function_definition + - "\n parameter_str: " + param_str + "\n retVal: " + retval_str) - # else: - # # TODO handle default matrix variables. - # raise AttributeError("Unable to match to function definition:\n" + function_definition) - # except Exception as e: - # import generator - # raise AttributeError("Unable to parse " + path + " " + generator.format_exception(e)) - + raise AttributeError("Unable to match to function definition:\n" + function_definition + + "\n parameter_str: " + param_str + "\n retVal: " + retval_str) + def extract_param_str(self, a: str): try: return a[a.index("(") + 1: a.rindex(")")] @@ -95,6 +93,8 @@ class FunctionParser(object): raise AttributeError("failed extracting from: " + a) def get_parameters(self, param_str: str): + if(param_str == None): + return None params = re.split(r",[\s]*", param_str) diff --git a/src/main/python/generator/generator.py b/src/main/python/generator/generator.py index 12aa185310..eeed4f4aed 100644 --- a/src/main/python/generator/generator.py +++ b/src/main/python/generator/generator.py @@ -215,7 +215,10 @@ class PythonAPIFunctionGenerator(object): return_values: List[Tuple[str]], function_name: str ) -> str: - length = len(return_values) + if( return_values == None): + length = 1 + else: + length = len(return_values) param_string = "" param = parameters[0] sds_context = "{param}.sds_context".format(param=param[0]) @@ -231,6 +234,15 @@ class PythonAPIFunctionGenerator(object): op_assign=op_assignments ) return result + elif return_values == None: + result = ("return OperationNode({sds_context}," + + "\n \'{function_name}\'," + + "\n named_input_nodes=params_dict," + + "\n output_type=OutputType.NONE)").format( + sds_context=sds_context, + function_name=function_name + ) + return result else: value = return_values[0] output_type = re.search(pattern, value[1]) diff --git a/src/main/python/systemds/operator/algorithm/__init__.py b/src/main/python/systemds/operator/algorithm/__init__.py index a5227c3461..52c470d201 100644 --- a/src/main/python/systemds/operator/algorithm/__init__.py +++ b/src/main/python/systemds/operator/algorithm/__init__.py @@ -51,6 +51,7 @@ from .builtin.decisionTree import decisionTree from .builtin.decisionTreePredict import decisionTreePredict from .builtin.deepWalk import deepWalk from .builtin.denialConstraints import denialConstraints +from .builtin.differenceStatistics import differenceStatistics from .builtin.discoverFD import discoverFD from .builtin.dist import dist from .builtin.dmv import dmv @@ -77,20 +78,27 @@ from .builtin.gridSearch import gridSearch from .builtin.hospitalResidencyMatch import hospitalResidencyMatch from .builtin.hyperband import hyperband from .builtin.img_brightness import img_brightness +from .builtin.img_brightness_linearized import img_brightness_linearized from .builtin.img_crop import img_crop +from .builtin.img_crop_linearized import img_crop_linearized from .builtin.img_cutout import img_cutout +from .builtin.img_cutout_linearized import img_cutout_linearized from .builtin.img_invert import img_invert +from .builtin.img_invert_linearized import img_invert_linearized from .builtin.img_mirror import img_mirror +from .builtin.img_mirror_linearized import img_mirror_linearized from .builtin.img_posterize import img_posterize +from .builtin.img_posterize_linearized import img_posterize_linearized from .builtin.img_rotate import img_rotate from .builtin.img_sample_pairing import img_sample_pairing from .builtin.img_shear import img_shear from .builtin.img_transform import img_transform +from .builtin.img_transform_linearized import img_transform_linearized from .builtin.img_translate import img_translate +from .builtin.img_translate_linearized import img_translate_linearized from .builtin.impurityMeasures import impurityMeasures from .builtin.imputeByFD import imputeByFD from .builtin.imputeByFDApply import imputeByFDApply -from .builtin.imputeByKNN import imputeByKNN from .builtin.imputeByMean import imputeByMean from .builtin.imputeByMeanApply import imputeByMeanApply from .builtin.imputeByMedian import imputeByMedian @@ -216,6 +224,7 @@ __all__ = ['WoE', 'decisionTreePredict', 'deepWalk', 'denialConstraints', + 'differenceStatistics', 'discoverFD', 'dist', 'dmv', @@ -242,20 +251,27 @@ __all__ = ['WoE', 'hospitalResidencyMatch', 'hyperband', 'img_brightness', + 'img_brightness_linearized', 'img_crop', + 'img_crop_linearized', 'img_cutout', + 'img_cutout_linearized', 'img_invert', + 'img_invert_linearized', 'img_mirror', + 'img_mirror_linearized', 'img_posterize', + 'img_posterize_linearized', 'img_rotate', 'img_sample_pairing', 'img_shear', 'img_transform', + 'img_transform_linearized', 'img_translate', + 'img_translate_linearized', 'impurityMeasures', 'imputeByFD', 'imputeByFDApply', - 'imputeByKNN', 'imputeByMean', 'imputeByMeanApply', 'imputeByMedian', diff --git a/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py b/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py index fa6dcb04f9..4ec9cb7091 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py +++ b/src/main/python/systemds/operator/algorithm/builtin/csplineCG.py @@ -42,7 +42,7 @@ def csplineCG(X: Matrix, monotonically increasing and there is no duplicates points in X :param Y: 1-column matrix of corresponding y values knots :param inp_x: the given input x, for which the cspline will find predicted y. - :param tol: Tolerance (epsilon); conjugate graduent procedure terminates early if + :param tol: Tolerance (epsilon); conjugate gradient procedure terminates early if L2 norm of the beta-residual is less than tolerance * its initial norm :param maxi: Maximum number of conjugate gradient iterations, 0 = no maximum :return: Predicted value diff --git a/src/main/python/systemds/operator/algorithm/builtin/dbscan.py b/src/main/python/systemds/operator/algorithm/builtin/dbscan.py index 35fb0dda50..7b49e465de 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/dbscan.py +++ b/src/main/python/systemds/operator/algorithm/builtin/dbscan.py @@ -32,15 +32,19 @@ from systemds.utils.consts import VALID_INPUT_TYPES def dbscan(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ - Implements the DBSCAN clustering algorithm using Euclidian distance matrix + Implements the DBSCAN clustering algorithm using an Euclidean + distance matrix. :param X: The input Matrix to do DBSCAN on. - :param eps: Maximum distance between two points for one to be considered reachable for the other. - :param minPts: Number of points in a neighborhood for a point to be considered as a core point + :param eps: Maximum distance between two points for one to + be considered reachable for the other. + :param minPts: Number of points in a neighborhood for a point to + be considered as a core point (includes the point itself). - :return: clustering Matrix + :return: The clustering matrix + :return: The cluster model """ params_dict = {'X': X} @@ -48,13 +52,11 @@ def dbscan(X: Matrix, vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') - vX_2 = Scalar(X.sds_context, '') - output_nodes = [vX_0, vX_1, vX_2, ] + output_nodes = [vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'dbscan', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] - vX_2._unnamed_input_nodes = [op] return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py b/src/main/python/systemds/operator/algorithm/builtin/differenceStatistics.py similarity index 69% copy from src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py copy to src/main/python/systemds/operator/algorithm/builtin/differenceStatistics.py index b2a59859f6..ccef2d3cdb 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py +++ b/src/main/python/systemds/operator/algorithm/builtin/differenceStatistics.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- # Autogenerated By : src/main/python/generator/generator.py -# Autogenerated From : scripts/builtin/scaleMinMax.dml +# Autogenerated From : scripts/builtin/differenceStatistics.dml from typing import Dict, Iterable @@ -29,17 +29,17 @@ from systemds.script_building.dag import OutputType from systemds.utils.consts import VALID_INPUT_TYPES -def scaleMinMax(X: Matrix): +def differenceStatistics(X: Matrix, + Y: Matrix): """ - This function performs min-max normalization (rescaling to [0,1]). + Prints the difference statistics of two matrices given, to indicate how + they are different. This can be used for instance in comparison of lossy + compression techniques, that reduce the fidelity of the data. - - - :param X: Input feature matrix - :return: Scaled output matrix """ - params_dict = {'X': X} - return Matrix(X.sds_context, - 'scaleMinMax', - named_input_nodes=params_dict) + params_dict = {'X': X, 'Y': Y} + return OperationNode(X.sds_context, + 'differenceStatistics', + named_input_nodes=params_dict, + output_type=OutputType.NONE) diff --git a/src/main/python/systemds/operator/algorithm/builtin/normalizeApply.py b/src/main/python/systemds/operator/algorithm/builtin/img_brightness_linearized.py similarity index 59% copy from src/main/python/systemds/operator/algorithm/builtin/normalizeApply.py copy to src/main/python/systemds/operator/algorithm/builtin/img_brightness_linearized.py index 063e457f25..d61fcf6354 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/normalizeApply.py +++ b/src/main/python/systemds/operator/algorithm/builtin/img_brightness_linearized.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- # Autogenerated By : src/main/python/generator/generator.py -# Autogenerated From : scripts/builtin/normalizeApply.dml +# Autogenerated From : scripts/builtin/img_brightness_linearized.dml from typing import Dict, Iterable @@ -29,24 +29,21 @@ from systemds.script_building.dag import OutputType from systemds.utils.consts import VALID_INPUT_TYPES -def normalizeApply(X: Matrix, - cmin: Matrix, - cmax: Matrix): +def img_brightness_linearized(img_in: Matrix, + value: float, + channel_max: int): """ - Min-max normalization (a.k.a. min-max scaling) to range [0,1], given - existing min-max ranges. For matrices of positive values, this normalization - preserves the input sparsity. The validity of the provided min-max range - and post-processing is under control of the caller. + The img_brightness_linearized-function is an image data augmentation function. It changes the brightness of one or multiple images. - :param X: Input feature matrix of shape n-by-m - :param cmin: Colunm minima of shape 1-by-m - :param cmax: Column maxima of shape 1-by-m - :return: Modified output feature matrix of shape n-by-m + :param img_in: Input matrix/image (can represent multiple images every row of the matrix represents a linearized image) + :param value: The amount of brightness to be changed for the image + :param channel_max: Maximum value of the brightness of the image + :return: Output matrix/images (every row of the matrix represents a linearized image) """ - params_dict = {'X': X, 'cmin': cmin, 'cmax': cmax} - return Matrix(X.sds_context, - 'normalizeApply', + params_dict = {'img_in': img_in, 'value': value, 'channel_max': channel_max} + return Matrix(img_in.sds_context, + 'img_brightness_linearized', named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py b/src/main/python/systemds/operator/algorithm/builtin/img_crop_linearized.py similarity index 55% copy from src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py copy to src/main/python/systemds/operator/algorithm/builtin/img_crop_linearized.py index b2a59859f6..be2907b15d 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py +++ b/src/main/python/systemds/operator/algorithm/builtin/img_crop_linearized.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- # Autogenerated By : src/main/python/generator/generator.py -# Autogenerated From : scripts/builtin/scaleMinMax.dml +# Autogenerated From : scripts/builtin/img_crop_linearized.dml from typing import Dict, Iterable @@ -29,17 +29,29 @@ from systemds.script_building.dag import OutputType from systemds.utils.consts import VALID_INPUT_TYPES -def scaleMinMax(X: Matrix): +def img_crop_linearized(img_in: Matrix, + w: int, + h: int, + x_offset: int, + y_offset: int, + s_cols: int, + s_rows: int): """ - This function performs min-max normalization (rescaling to [0,1]). + The img_crop_linearized cuts out a rectangular section of multiple linearized images. - :param X: Input feature matrix - :return: Scaled output matrix + :param img_in: Linearized input images as 2D matrix + :param w: The width of the subregion required + :param h: The height of the subregion required + :param x_offset: The horizontal offset for the center of the crop region + :param y_offset: The vertical offset for the center of the crop region + :param s_cols: Width of a single image + :param s_rows: Height of a single image + :return: Cropped images as linearized 2D matrix """ - params_dict = {'X': X} - return Matrix(X.sds_context, - 'scaleMinMax', + params_dict = {'img_in': img_in, 'w': w, 'h': h, 'x_offset': x_offset, 'y_offset': y_offset, 's_cols': s_cols, 's_rows': s_rows} + return Matrix(img_in.sds_context, + 'img_crop_linearized', named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py b/src/main/python/systemds/operator/algorithm/builtin/img_cutout_linearized.py similarity index 52% copy from src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py copy to src/main/python/systemds/operator/algorithm/builtin/img_cutout_linearized.py index 731d6d232c..f42752e561 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py +++ b/src/main/python/systemds/operator/algorithm/builtin/img_cutout_linearized.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- # Autogenerated By : src/main/python/generator/generator.py -# Autogenerated From : scripts/builtin/lmPredictStats.dml +# Autogenerated From : scripts/builtin/img_cutout_linearized.dml from typing import Dict, Iterable @@ -29,22 +29,31 @@ from systemds.script_building.dag import OutputType from systemds.utils.consts import VALID_INPUT_TYPES -def lmPredictStats(yhat: Matrix, - ytest: Matrix, - lm: bool): +def img_cutout_linearized(img_in: Matrix, + x: int, + y: int, + width: int, + height: int, + fill_value: float, + s_cols: int, + s_rows: int): """ - This builtin function computes and prints a summary of accuracy - measures for regression problems. + Image Cutout function replaces a rectangular section of an image with a constant value. - :param yhat: column vector of predicted response values y - :param ytest: column vector of actual response values y - :param lm: indicator if used for linear regression model - :return: column vector holding avg_res, ss_avg_res, and R2 + :param img_in: Input images as linearized 2D matrix with top left corner at [1, 1] + :param x: Column index of the top left corner of the rectangle (starting at 1) + :param y: Row index of the top left corner of the rectangle (starting at 1) + :param width: Width of the rectangle (must be positive) + :param height: Height of the rectangle (must be positive) + :param fill_value: The value to set for the rectangle + :param s_cols: Width of a single image + :param s_rows: Height of a single image + :return: Output images as linearized 2D matrix with top left corner at [1, 1] """ - params_dict = {'yhat': yhat, 'ytest': ytest, 'lm': lm} - return Matrix(yhat.sds_context, - 'lmPredictStats', + params_dict = {'img_in': img_in, 'x': x, 'y': y, 'width': width, 'height': height, 'fill_value': fill_value, 's_cols': s_cols, 's_rows': s_rows} + return Matrix(img_in.sds_context, + 'img_cutout_linearized', named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py b/src/main/python/systemds/operator/algorithm/builtin/img_invert_linearized.py similarity index 67% copy from src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py copy to src/main/python/systemds/operator/algorithm/builtin/img_invert_linearized.py index b2a59859f6..145da81c70 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py +++ b/src/main/python/systemds/operator/algorithm/builtin/img_invert_linearized.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- # Autogenerated By : src/main/python/generator/generator.py -# Autogenerated From : scripts/builtin/scaleMinMax.dml +# Autogenerated From : scripts/builtin/img_invert_linearized.dml from typing import Dict, Iterable @@ -29,17 +29,19 @@ from systemds.script_building.dag import OutputType from systemds.utils.consts import VALID_INPUT_TYPES -def scaleMinMax(X: Matrix): +def img_invert_linearized(img_in: Matrix, + max_value: float): """ - This function performs min-max normalization (rescaling to [0,1]). + This is an image data augmentation function. It inverts an image.It can handle one or multiple images - :param X: Input feature matrix - :return: Scaled output matrix + :param img_in: Input matrix/image (every row of the matrix represents a linearized image) + :param max_value: The maximum value pixels can have + :return: Output images (every row of the matrix represents a linearized image) """ - params_dict = {'X': X} - return Matrix(X.sds_context, - 'scaleMinMax', + params_dict = {'img_in': img_in, 'max_value': max_value} + return Matrix(img_in.sds_context, + 'img_invert_linearized', named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py b/src/main/python/systemds/operator/algorithm/builtin/img_mirror_linearized.py similarity index 53% copy from src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py copy to src/main/python/systemds/operator/algorithm/builtin/img_mirror_linearized.py index 731d6d232c..25129e6e5e 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py +++ b/src/main/python/systemds/operator/algorithm/builtin/img_mirror_linearized.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- # Autogenerated By : src/main/python/generator/generator.py -# Autogenerated From : scripts/builtin/lmPredictStats.dml +# Autogenerated From : scripts/builtin/img_mirror_linearized.dml from typing import Dict, Iterable @@ -29,22 +29,24 @@ from systemds.script_building.dag import OutputType from systemds.utils.consts import VALID_INPUT_TYPES -def lmPredictStats(yhat: Matrix, - ytest: Matrix, - lm: bool): +def img_mirror_linearized(img_matrix: Matrix, + horizontal_axis: bool, + original_rows: int, + original_cols: int): """ - This builtin function computes and prints a summary of accuracy - measures for regression problems. + This function has the same functionality with img_mirror but it handles multiple images at + the same time. Each row of the input and output matrix represents a linearized image/matrix + It flips an image on the X (horizontal) or Y (vertical) axis. - - :param yhat: column vector of predicted response values y - :param ytest: column vector of actual response values y - :param lm: indicator if used for linear regression model - :return: column vector holding avg_res, ss_avg_res, and R2 + :param img_matrix: Input matrix/image (every row represents a linearized matrix/image) + :param horizontal_axis: flip either in X or Y axis + :param original_rows: number of rows in the original 2-D images + :param original_cols: number of cols in the original 2-D images + :return: Output matrix/image (every row represents a linearized matrix/image) """ - params_dict = {'yhat': yhat, 'ytest': ytest, 'lm': lm} - return Matrix(yhat.sds_context, - 'lmPredictStats', + params_dict = {'img_matrix': img_matrix, 'horizontal_axis': horizontal_axis, 'original_rows': original_rows, 'original_cols': original_cols} + return Matrix(img_matrix.sds_context, + 'img_mirror_linearized', named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py b/src/main/python/systemds/operator/algorithm/builtin/img_posterize_linearized.py similarity index 64% copy from src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py copy to src/main/python/systemds/operator/algorithm/builtin/img_posterize_linearized.py index b2a59859f6..a40c84e73b 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py +++ b/src/main/python/systemds/operator/algorithm/builtin/img_posterize_linearized.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- # Autogenerated By : src/main/python/generator/generator.py -# Autogenerated From : scripts/builtin/scaleMinMax.dml +# Autogenerated From : scripts/builtin/img_posterize_linearized.dml from typing import Dict, Iterable @@ -29,17 +29,21 @@ from systemds.script_building.dag import OutputType from systemds.utils.consts import VALID_INPUT_TYPES -def scaleMinMax(X: Matrix): +def img_posterize_linearized(img_in: Matrix, + bits: int): """ - This function performs min-max normalization (rescaling to [0,1]). + The Linearized Image Posterize function limits pixel values to 2^bits different values in the range [0, 255]. + Assumes the input image can attain values in the range [0, 255]. - :param X: Input feature matrix - :return: Scaled output matrix + :param img_in: Row linearized input images as 2D matrix + :param bits: The number of bits keep for the values. + 1 means black and white, 8 means every integer between 0 and 255. + :return: Row linearized output images as 2D matrix """ - params_dict = {'X': X} - return Matrix(X.sds_context, - 'scaleMinMax', + params_dict = {'img_in': img_in, 'bits': bits} + return Matrix(img_in.sds_context, + 'img_posterize_linearized', named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_transform_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_transform_linearized.py new file mode 100644 index 0000000000..68fc7bdf1e --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/img_transform_linearized.py @@ -0,0 +1,62 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/img_transform_linearized.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def img_transform_linearized(img_in: Matrix, + out_w: int, + out_h: int, + a: float, + b: float, + c: float, + d: float, + e: float, + f: float, + fill_value: float, + s_cols: int, + s_rows: int): + """ + The Linearized Image Transform function applies an affine transformation to linearized images. + Optionally resizes the image (without scaling). + Uses nearest neighbor sampling. + + + + :param img_in: Linearized input images as 2D matrix with top left corner at [1, 1] + :param out_w: Width of the output matrix + :param out_h: Height of the output matrix + :param a,b,c,d,e,f: The first two rows of the affine matrix in row-major order + :param fill_value: The background of an image + :return: Output images in linearized form as 2D matrix with top left corner at [1, 1] + """ + + params_dict = {'img_in': img_in, 'out_w': out_w, 'out_h': out_h, 'a': a, 'b': b, 'c': c, 'd': d, 'e': e, 'f': f, 'fill_value': fill_value, 's_cols': s_cols, 's_rows': s_rows} + return Matrix(img_in.sds_context, + 'img_transform_linearized', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/img_translate_linearized.py b/src/main/python/systemds/operator/algorithm/builtin/img_translate_linearized.py new file mode 100644 index 0000000000..d6c63b66c3 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/img_translate_linearized.py @@ -0,0 +1,60 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/img_translate_linearized.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def img_translate_linearized(img_in: Matrix, + offset_x: float, + offset_y: float, + out_w: int, + out_h: int, + fill_value: float, + o_w: int, + o_h: int): + """ + This function has the same functionality with img_translate but it handles multiple images at + the same time. Each row of the input and output matrix represents a linearized image/matrix + It translates the image and Optionally resizes the image (without scaling). + + + :param img_in: Input matrix/image (every row represents a linearized matrix/image) + :param offset_x: The distance to move the image in x direction + :param offset_y: The distance to move the image in y direction + :param out_w: Width of the output image + :param out_h: Height of the output image + :param fill_value: The background of the image + :param o_w: Width of the original 2D images + :param o_h: Height of the original 2D images + :return: Output matrix/image (every row represents a linearized matrix/image) + """ + + params_dict = {'img_in': img_in, 'offset_x': offset_x, 'offset_y': offset_y, 'out_w': out_w, 'out_h': out_h, 'fill_value': fill_value, 'o_w': o_w, 'o_h': o_h} + return Matrix(img_in.sds_context, + 'img_translate_linearized', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/lm.py b/src/main/python/systemds/operator/algorithm/builtin/lm.py index 06f5f6e7c5..3aae122a63 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/lm.py +++ b/src/main/python/systemds/operator/algorithm/builtin/lm.py @@ -33,8 +33,9 @@ def lm(X: Matrix, y: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ - The lm-function solves linear regression using either the direct solve method or the conjugate gradient - algorithm depending on the input size of the matrices (See lmDS-function and lmCG-function respectively). + The lm-function solves linear regression using either the direct solve + method or the conjugate gradient algorithm depending on the input size + of the matrices (See lmDS-function and lmCG-function respectively). @@ -47,7 +48,7 @@ def lm(X: Matrix, norm of the beta-residual is less than tolerance * its initial norm :param maxi: Maximum number of conjugate gradient iterations. 0 = no maximum :param verbose: If TRUE print messages are activated - :return: The model fit + :return: The model fit beta that can be used as input in lmPredict """ params_dict = {'X': X, 'y': y} diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmCG.py b/src/main/python/systemds/operator/algorithm/builtin/lmCG.py index 607579f869..ed4e3cbacb 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/lmCG.py +++ b/src/main/python/systemds/operator/algorithm/builtin/lmCG.py @@ -42,11 +42,11 @@ def lmCG(X: Matrix, :param icpt: Intercept presence, shifting and rescaling the columns of X :param reg: Regularization constant (lambda) for L2-regularization. set to nonzero for highly dependant/sparse/numerous features - :param tol: Tolerance (epsilon); conjugate gradient procedure terminates early if L2 + :param tol: Tolerance (epsilon) conjugate gradient procedure terminates early if L2 norm of the beta-residual is less than tolerance * its initial norm :param maxi: Maximum number of conjugate gradient iterations. 0 = no maximum :param verbose: If TRUE print messages are activated - :return: The model fit + :return: The model fit beta that can be used as input in lmPredict """ params_dict = {'X': X, 'y': y} diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmDS.py b/src/main/python/systemds/operator/algorithm/builtin/lmDS.py index 8fcdbd5b46..2acec88267 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/lmDS.py +++ b/src/main/python/systemds/operator/algorithm/builtin/lmDS.py @@ -42,11 +42,11 @@ def lmDS(X: Matrix, :param icpt: Intercept presence, shifting and rescaling the columns of X :param reg: Regularization constant (lambda) for L2-regularization. set to nonzero for highly dependant/sparse/numerous features - :param tol: Tolerance (epsilon); conjugate gradient procedure terminates early if L2 + :param tol: Tolerance (epsilon) conjugate gradient procedure terminates early if L2 norm of the beta-residual is less than tolerance * its initial norm :param maxi: Maximum number of conjugate gradient iterations. 0 = no maximum :param verbose: If TRUE print messages are activated - :return: The model fit + :return: The model fit beta that can be used as input in lmPredict """ params_dict = {'X': X, 'y': y} diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py b/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py index 731d6d232c..f90416078f 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py +++ b/src/main/python/systemds/operator/algorithm/builtin/lmPredictStats.py @@ -38,10 +38,10 @@ def lmPredictStats(yhat: Matrix, - :param yhat: column vector of predicted response values y - :param ytest: column vector of actual response values y - :param lm: indicator if used for linear regression model - :return: column vector holding avg_res, ss_avg_res, and R2 + :param yhat: A column vector of predicted response values y + :param ytest: A column vector of actual response values y + :param lm: An indicator if used for linear regression model + :return: A column vector holding avg_res, ss_avg_res, and R2 """ params_dict = {'yhat': yhat, 'ytest': ytest, 'lm': lm} diff --git a/src/main/python/systemds/operator/algorithm/builtin/multiLogRegPredict.py b/src/main/python/systemds/operator/algorithm/builtin/multiLogRegPredict.py index 3fcd41c25c..b489912e6c 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/multiLogRegPredict.py +++ b/src/main/python/systemds/operator/algorithm/builtin/multiLogRegPredict.py @@ -31,7 +31,6 @@ from systemds.utils.consts import VALID_INPUT_TYPES def multiLogRegPredict(X: Matrix, B: Matrix, - Y: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ THIS SCRIPT APPLIES THE ESTIMATED PARAMETERS OF MULTINOMIAL LOGISTIC REGRESSION TO A NEW (TEST) DATASET @@ -48,7 +47,7 @@ def multiLogRegPredict(X: Matrix, :return: scalar value of accuracy """ - params_dict = {'X': X, 'B': B, 'Y': Y} + params_dict = {'X': X, 'B': B} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') diff --git a/src/main/python/systemds/operator/algorithm/builtin/normalizeApply.py b/src/main/python/systemds/operator/algorithm/builtin/normalizeApply.py index 063e457f25..8ff4dd2bc6 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/normalizeApply.py +++ b/src/main/python/systemds/operator/algorithm/builtin/normalizeApply.py @@ -41,8 +41,8 @@ def normalizeApply(X: Matrix, :param X: Input feature matrix of shape n-by-m - :param cmin: Colunm minima of shape 1-by-m - :param cmax: Column maxima of shape 1-by-m + :param cmin: Column min of shape 1-by-m + :param cmax: Column max of shape 1-by-m :return: Modified output feature matrix of shape n-by-m """ diff --git a/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py b/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py index b2a59859f6..da92b53ea9 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py +++ b/src/main/python/systemds/operator/algorithm/builtin/scaleMinMax.py @@ -32,6 +32,8 @@ from systemds.utils.consts import VALID_INPUT_TYPES def scaleMinMax(X: Matrix): """ This function performs min-max normalization (rescaling to [0,1]). + + This function is deprecated, use normalize instead. diff --git a/src/main/python/tests/algorithms/test_multiLogReg.py b/src/main/python/tests/algorithms/test_multiLogReg.py index 89e48c32f9..10887ea54b 100644 --- a/src/main/python/tests/algorithms/test_multiLogReg.py +++ b/src/main/python/tests/algorithms/test_multiLogReg.py @@ -67,7 +67,7 @@ class TestMultiLogReg(unittest.TestCase): X), self.sds.from_numpy(Y), verbose=False).compute() [m, y_pred, acc] = multiLogRegPredict(self.sds.from_numpy( - X), self.sds.from_numpy(bias), self.sds.from_numpy(Y), verbose=False).compute() + X), self.sds.from_numpy(bias), Y=self.sds.from_numpy(Y), verbose=False).compute() self.assertTrue(acc > 98) diff --git a/src/main/python/tests/examples/tutorials/test_adult.py b/src/main/python/tests/examples/tutorials/test_adult.py index 8aae4b6aee..edf1449269 100644 --- a/src/main/python/tests/examples/tutorials/test_adult.py +++ b/src/main/python/tests/examples/tutorials/test_adult.py @@ -136,7 +136,7 @@ class TestAdultStandardML(unittest.TestCase): test_y = test_y_frame.transform_apply(spec=jspec_labels, meta=M2) betas = multiLogReg(train_x, train_y, verbose=False) - [_, y_pred, acc] = multiLogRegPredict(test_x, betas, test_y, verbose=False) + [_, y_pred, acc] = multiLogRegPredict(test_x, betas, Y=test_y, verbose=False) [_, conf_avg] = confusionMatrix(y_pred, test_y) confusion_numpy = conf_avg.compute() diff --git a/src/main/python/tests/examples/tutorials/test_mnist.py b/src/main/python/tests/examples/tutorials/test_mnist.py index 3fe297fb4c..89fffe9c4f 100644 --- a/src/main/python/tests/examples/tutorials/test_mnist.py +++ b/src/main/python/tests/examples/tutorials/test_mnist.py @@ -77,7 +77,7 @@ class Test_DMLScript(unittest.TestCase): Yt = Yt + 1.0 bias = multiLogReg(X, Y, verbose = False) - [_, _, acc] = multiLogRegPredict(Xt, bias, Yt, verbose=False).compute() + [_, _, acc] = multiLogRegPredict(Xt, bias, Y=Yt, verbose=False).compute() self.assertGreater(acc, 80) @@ -100,7 +100,7 @@ class Test_DMLScript(unittest.TestCase): Yt = self.sds.from_numpy( self.d.get_test_labels()[:test_count]) Yt = Yt + 1.0 - [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute(verbose=False) + [_, _, acc] = multiLogRegPredict(Xt, bias, Y=Yt).compute(verbose=False) self.assertGreater(acc, 70) diff --git a/src/main/python/tests/federated/test_federated_mnist.py b/src/main/python/tests/federated/test_federated_mnist.py index c49f64897c..d7cb640157 100644 --- a/src/main/python/tests/federated/test_federated_mnist.py +++ b/src/main/python/tests/federated/test_federated_mnist.py @@ -112,7 +112,7 @@ class TestFederatedMnist(unittest.TestCase): bias = multiLogReg(X, Y) with self.sds.capture_stats_context(): - [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute() + [_, _, acc] = multiLogRegPredict(Xt, bias, Y=Yt).compute() stats = self.sds.take_stats() for fed_instr in ["fed_contains", "fed_*", "fed_-", "fed_uark+", "fed_r'", "fed_rightIndex"]: self.assertIn(fed_instr, stats) diff --git a/src/main/python/tests/manual_tests/multi_log_reg_mnist.py b/src/main/python/tests/manual_tests/multi_log_reg_mnist.py index 79cda6d818..080403ea72 100644 --- a/src/main/python/tests/manual_tests/multi_log_reg_mnist.py +++ b/src/main/python/tests/manual_tests/multi_log_reg_mnist.py @@ -35,6 +35,6 @@ with SystemDSContext() as sds: # Test data Xt = sds.from_numpy(d.get_test_data().reshape((10000, 28*28))) Yt = sds.from_numpy(d.get_test_labels()) + 1.0 - [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute() + [_, _, acc] = multiLogRegPredict(Xt, bias, Y=Yt).compute() logging.info(acc)
