(systemds) branch main updated: [MINOR] Update Python API

baunsgaard Mon, 30 Dec 2024 05:11:34 -0800

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/main by this push:
     new c50454930d [MINOR] Update Python API
c50454930d is described below

commit c50454930d7f43d6d46ca1e658efb6064d9e3f4e
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Mon Dec 30 14:08:49 2024 +0100

    [MINOR] Update Python API
---
 .../python/systemds/operator/algorithm/__init__.py |  2 +
 .../systemds/operator/algorithm/builtin/adasyn.py  | 62 ++++++++++++++++++++++
 .../operator/algorithm/builtin/incSliceLine.py     |  5 +-
 3 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/src/main/python/systemds/operator/algorithm/__init__.py 
b/src/main/python/systemds/operator/algorithm/__init__.py
index fe3d40b34c..95eb5dd207 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -24,6 +24,7 @@
 from .builtin.WoE import WoE 
 from .builtin.WoEApply import WoEApply 
 from .builtin.abstain import abstain 
+from .builtin.adasyn import adasyn 
 from .builtin.als import als 
 from .builtin.alsCG import alsCG 
 from .builtin.alsDS import alsDS 
@@ -199,6 +200,7 @@ from .builtin.xgboostPredictRegression import 
xgboostPredictRegression
 __all__ = ['WoE',
  'WoEApply',
  'abstain',
+ 'adasyn',
  'als',
  'alsCG',
  'alsDS',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/adasyn.py 
b/src/main/python/systemds/operator/algorithm/builtin/adasyn.py
new file mode 100644
index 0000000000..a45c3c9625
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/adasyn.py
@@ -0,0 +1,62 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/adasyn.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, 
Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def adasyn(X: Matrix,
+           Y: Matrix,
+           **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+     Builtin function for handing class imbalance using Adaptive Synthetic 
Sampling (ADASYN)
+     by Haibo He et. al. In International Joint Conference on Neural Networks 
(2008). 1322-1328
+    
+    
+    
+    :param X: Feature matrix [shape: n-by-m]
+    :param Y: Class labels [shape: n-by-1]
+    :param k: Number of nearest neighbors
+    :param beta: Desired balance level after generation of synthetic data [0, 
1]
+    :param dth: Distribution threshold
+    :param seed: Seed for randomized data point selection
+    :return: Feature matrix of n original rows followed by G = (ml-ms)*beta 
synthetic rows
+    :return: Class labels aligned with output X
+    """
+
+    params_dict = {'X': X, 'Y': Y}
+    params_dict.update(kwargs)
+    
+    vX_0 = Matrix(X.sds_context, '')
+    vX_1 = Matrix(X.sds_context, '')
+    output_nodes = [vX_0, vX_1, ]
+
+    op = MultiReturn(X.sds_context, 'adasyn', output_nodes, 
named_input_nodes=params_dict)
+
+    vX_0._unnamed_input_nodes = [op]
+    vX_1._unnamed_input_nodes = [op]
+
+    return op
diff --git 
a/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py 
b/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
index e3bb1889f4..f49dbcda41 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
@@ -65,8 +65,9 @@ def incSliceLine(addedX: Matrix,
     :param prevTK: previous top-k slices (for incremental updates)
     :param prevTKC: previous top-k scores (for incremental updates)
     :param encodeLat: flag for encoding output lattice for less memory 
consumption
-    :param pruningStrat: flag for disabling certain pruning strategies
-        (0 all, 1 all exact (score and size), 2 no score, 3 no size, 4 none)
+    :param pruningStrat: pruning strategy: 0 all pruning, 1 all exact pruning,
+        2 only score pruning, 3 only max score pruning,
+        4 only size pruning, 5 no pruning
     :return: top-k slices (k x ncol(totalX) if successful)
     :return: score, size, error of slices (k x 3)
     :return: debug matrix, populated with enumeration stats if verbose

(systemds) branch main updated: [MINOR] Update Python API

Reply via email to