This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new c34ea60da2 [SYSTEMDS-3434] Python Startup optimizations
c34ea60da2 is described below
commit c34ea60da241ad0d11b37565265b7824e5012d81
Author: baunsgaard <[email protected]>
AuthorDate: Tue Sep 6 10:00:21 2022 +0200
[SYSTEMDS-3434] Python Startup optimizations
This commit addresses the python APIs slower performance in execution
of scripts. Reducing constant overheads of startup of the pythonAPI.
reducing the previous overhead of up to ~0.5 sec to ~0.1 sec.
Also included is a better approach to printing in python, to not
pipe into python arrays, but process directly. This design change
allowed us to not look at python output for when the JVM is ready for
requests. And allow live printing of output from SystemDS.
Logging have also been introduced in the API, to allow custom logging
levels for the execution to reduce clutter in the output, and fixes
the tests to not print things to the output on test execution.
Closes #1693
---
pom.xml | 3 +
src/main/java/org/apache/sysds/api/DMLOptions.java | 5 +-
.../java/org/apache/sysds/api/PythonDMLScript.java | 49 ++--
src/main/python/.gitignore | 1 +
.../code/getting_started/simpleExamples/l2svm.py | 2 +-
.../simpleExamples/l2svm_internal.py | 2 +-
.../source/code/guide/algorithms/FullScript.py | 4 +-
.../python/systemds/context/systemds_context.py | 260 +++++++++++----------
src/main/python/tests/algorithms/test_lm.py | 2 +-
.../python/tests/algorithms/test_multiLogReg.py | 21 +-
.../python/tests/basics/test_context_creation.py | 11 +
.../tests/examples/tutorials/neural_net_source.dml | 12 +-
.../python/tests/examples/tutorials/test_adult.py | 41 ++--
.../tests/examples/tutorials/test_adult_neural.py | 2 +-
.../python/tests/examples/tutorials/test_mnist.py | 13 +-
src/main/python/tests/frame/test_hyperband.py | 1 +
src/main/python/tests/lineage/test_lineagetrace.py | 29 +--
.../multi_log_reg_adult.py => lineage/trace1.dml} | 9 +-
.../tests/manual_tests/multi_log_reg_adult.py | 5 +-
.../tests/manual_tests/multi_log_reg_mnist.py | 4 +-
.../tests/manual_tests/save_log_reg_mnist_sysds.py | 9 +-
.../for_loop_time_run.dml} | 11 +-
.../for_loop_time_run.py} | 11 +-
.../manual_tests/time/run.sh} | 36 ++-
.../startup_time_run.dml} | 9 +-
.../startup_time_run.py} | 10 +-
src/main/python/tests/matrix/test_print.py | 2 +-
src/main/python/tests/script/test_dml_script.py | 2 +-
.../python/tests/source/test_source_no_return.py | 2 +-
.../sysds/test/usertest/pythonapi/StartupTest.java | 7 -
30 files changed, 303 insertions(+), 272 deletions(-)
diff --git a/pom.xml b/pom.xml
index 1ace5488d7..7b8116522a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -577,6 +577,7 @@
<excludes>
<exclude>scripts/perftest/results/**</exclude>
<exclude>scripts/perftest/temp/**</exclude>
+
<exclude>scripts/perftest/logs/**</exclude>
<exclude>.gitignore</exclude>
<exclude>src/main/python/.gitignore</exclude>
<exclude>.gitmodules</exclude>
@@ -590,6 +591,8 @@
<exclude>src/main/python/docs/build/**/*</exclude>
<exclude>src/main/python/docs/source/_build/**</exclude>
<exclude>src/main/python/generator/resources/**</exclude>
+
<exclude>src/main/python/generator.log</exclude>
+
<exclude>**/systemds.egg-info/**</exclude>
<exclude>docs/api/**/*</exclude>
<exclude>docs/_site/**/*</exclude>
<exclude>docs/site/run_issues.md</exclude>
diff --git a/src/main/java/org/apache/sysds/api/DMLOptions.java
b/src/main/java/org/apache/sysds/api/DMLOptions.java
index d9bbe260bc..7686d84d52 100644
--- a/src/main/java/org/apache/sysds/api/DMLOptions.java
+++ b/src/main/java/org/apache/sysds/api/DMLOptions.java
@@ -259,9 +259,8 @@ public class DMLOptions {
}
}
- if (line.hasOption("python")){
+ if (line.hasOption("python"))
dmlOptions.pythonPort =
Integer.parseInt(line.getOptionValue("python"));
- }
// Named arguments map is created as ("$K, 123), ("$X",
"X.csv"), etc
if (line.hasOption("nvargs")){
@@ -347,7 +346,7 @@ public class DMLOptions {
Option debugOpt = OptionBuilder.withDescription("runs in debug
mode; default off")
.create("debug");
Option pythonOpt = OptionBuilder
- .withDescription("Python Context start with port
argument for communication to python")
+ .withDescription("Python Context start with port
argument for communication to from python to java")
.isRequired().hasArg().create("python");
Option monitorIdOpt = OptionBuilder
.withDescription("Coordinator context start
with monitorId argument for monitoring registration")
diff --git a/src/main/java/org/apache/sysds/api/PythonDMLScript.java
b/src/main/java/org/apache/sysds/api/PythonDMLScript.java
index 61a2ca823f..c4957d4e9f 100644
--- a/src/main/java/org/apache/sysds/api/PythonDMLScript.java
+++ b/src/main/java/org/apache/sysds/api/PythonDMLScript.java
@@ -30,7 +30,8 @@ import py4j.Py4JServerConnection;
public class PythonDMLScript {
- private Connection _connection;
+ private static final Log LOG =
LogFactory.getLog(PythonDMLScript.class.getName());
+ final private Connection _connection;
/**
* Entry point for Python API.
@@ -41,13 +42,22 @@ public class PythonDMLScript {
public static void main(String[] args) throws Exception {
final DMLOptions dmlOptions = DMLOptions.parseCLArguments(args);
DMLScript.loadConfiguration(dmlOptions.configFile);
- start(dmlOptions.pythonPort);
- }
-
- private static void start(int port) throws Py4JNetworkException {
- GatewayServer GwS = new GatewayServer(new PythonDMLScript(),
port);
+ final GatewayServer GwS = new GatewayServer(new
PythonDMLScript(), dmlOptions.pythonPort);
GwS.addListener(new DMLGateWayListener());
- GwS.start();
+ try {
+ GwS.start();
+ }
+ catch(Py4JNetworkException p4e) {
+ /**
+ * This sometimes happens when the startup is using a
port already in use. In this case we handle it in python
+ * therefore use logging framework. and terminate
program.
+ */
+ LOG.info("failed startup", p4e);
+ System.exit(-1);
+ }
+ catch(Exception e) {
+ throw new DMLException("Failed startup and maintaining
Python gateway", e);
+ }
}
private PythonDMLScript() {
@@ -60,53 +70,52 @@ public class PythonDMLScript {
public Connection getConnection() {
return _connection;
}
-
+
protected static class DMLGateWayListener implements
GatewayServerListener {
private static final Log LOG =
LogFactory.getLog(DMLGateWayListener.class.getName());
-
+
@Override
public void connectionError(Exception e) {
LOG.warn("Connection error: " + e.getMessage());
System.exit(1);
}
-
+
@Override
public void connectionStarted(Py4JServerConnection
gatewayConnection) {
LOG.debug("Connection Started: " +
gatewayConnection.toString());
}
-
+
@Override
public void connectionStopped(Py4JServerConnection
gatewayConnection) {
LOG.debug("Connection stopped: " +
gatewayConnection.toString());
}
-
+
@Override
public void serverError(Exception e) {
LOG.error("Server Error " + e.getMessage());
}
-
+
@Override
public void serverPostShutdown() {
LOG.info("Shutdown done");
System.exit(0);
}
-
+
@Override
public void serverPreShutdown() {
LOG.info("Starting JVM shutdown");
}
-
+
@Override
public void serverStarted() {
- // message the python interface that the JVM is ready.
- System.out.println("GatewayServer Started");
+ LOG.info("GatewayServer Started");
}
-
+
@Override
public void serverStopped() {
- System.out.println("GatewayServer Stopped");
+ LOG.info("GatewayServer Stopped");
System.exit(0);
}
}
-}
+}
diff --git a/src/main/python/.gitignore b/src/main/python/.gitignore
index ad5ef4dc44..1df6248167 100644
--- a/src/main/python/.gitignore
+++ b/src/main/python/.gitignore
@@ -21,3 +21,4 @@ tests/federated/tmp
tests/list/tmp
tests/algorithms/readwrite/
tests/examples/tutorials/model
+tests/lineage/temp
diff --git
a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
index fdc0030924..eee6a07f39 100644
--- a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
+++ b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
@@ -42,5 +42,5 @@ for i in range(10):
# compute our model
with SystemDSContext() as sds:
model = l2svm(sds.from_numpy(features),
- sds.from_numpy(labels)).compute()
+ sds.from_numpy(labels), verbose=False).compute()
logging.info(model)
diff --git
a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
index 2f9fc62574..b955160b10 100644
---
a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
+++
b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
@@ -31,5 +31,5 @@ with SystemDSContext() as sds:
# Generate labels of all ones and zeros
labels = sds.rand(10, 1, 1, 1, sparsity=0.5)
- model = l2svm(features, labels).compute()
+ model = l2svm(features, labels, verbose=False).compute()
logging.info(model)
diff --git a/src/main/python/docs/source/code/guide/algorithms/FullScript.py
b/src/main/python/docs/source/code/guide/algorithms/FullScript.py
index 9c213d0129..0340886175 100644
--- a/src/main/python/docs/source/code/guide/algorithms/FullScript.py
+++ b/src/main/python/docs/source/code/guide/algorithms/FullScript.py
@@ -35,10 +35,10 @@ with SystemDSContext() as sds:
# Train Data
X_ds = sds.from_numpy(X)
Y_ds = sds.from_numpy(Y) + 1.0
- bias = multiLogReg(X_ds, Y_ds, maxi=30)
+ bias = multiLogReg(X_ds, Y_ds, maxi=30, verbose=False)
# Test data
Xt_ds = sds.from_numpy(Xt)
Yt_ds = sds.from_numpy(Yt) + 1.0
- [m, y_pred, acc] = multiLogRegPredict(Xt_ds, bias, Yt_ds).compute()
+ [m, y_pred, acc] = multiLogRegPredict(Xt_ds, bias, Yt_ds,
verbose=False).compute()
logging.info(acc)
diff --git a/src/main/python/systemds/context/systemds_context.py
b/src/main/python/systemds/context/systemds_context.py
index 32f0f90029..21d0915e63 100644
--- a/src/main/python/systemds/context/systemds_context.py
+++ b/src/main/python/systemds/context/systemds_context.py
@@ -36,7 +36,7 @@ from typing import Dict, Iterable, Sequence, Tuple, Union
import numpy as np
import pandas as pd
-from py4j.java_gateway import GatewayParameters, JavaGateway
+from py4j.java_gateway import GatewayParameters, JavaGateway, Py4JNetworkError
from systemds.operator import (Frame, List, Matrix, OperationNode, Scalar,
Source)
from systemds.script_building import DMLScript, OutputType
@@ -49,15 +49,21 @@ class SystemDSContext(object):
The java process is started and is running using a random tcp port for
instruction parsing.
This class is used as the starting point for all SystemDS execution. It
gives the ability to create
- all the different objects and adding them to the exectution.
+ all the different objects and adding them to the execution.
"""
java_gateway: JavaGateway
- _capture_statistics: bool
- _statistics: str
+ _capture_statistics: bool = False
+ _statistics: str = ""
_log: logging.Logger
-
- def __init__(self, port: int = -1, capture_statistics: bool = False,
logging_level: int = 20):
+ __stdout: Queue = None
+ __stderr: Queue = None
+
+ def __init__(self, port: int = -1,
+ capture_statistics: bool = False,
+ capture_stdout: bool = False,
+ logging_level: int = 20,
+ py4j_logging_level: int = 50):
"""Starts a new instance of SystemDSContext, in which the connection
to a JVM systemds instance is handled
Any new instance of this SystemDS Context, would start a separate new
JVM.
@@ -66,23 +72,15 @@ class SystemDSContext(object):
:param port: default -1, giving a random port for communication with
JVM
:param capture_statistics: If the statistics of the execution in
SystemDS should be captured
+ :param capture_stdout: If the standard out should be captured in Java
SystemDS and maintained in ques
:param logging_level: Specify the logging level used for informative
messages, default 20 indicating INFO.
The logging levels are as follows: 10 DEBUG, 20 INFO, 30 WARNING, 40
ERROR, 50 CRITICAL.
+ :param py4j_logging_level: The logging level for Py4j to use, since
all communication to the JVM is done through this,
+ it can be verbose if not set high.
"""
- actual_port = self.__start(port)
- process = self.__process
- self._statistics = ""
- self._capture_statistics = capture_statistics
-
- self._log = logging.Logger(self.__class__.__name__)
- self._log.setLevel(logging_level)
-
- if process.poll() is None:
- self.__start_gateway(actual_port)
- else:
- self.exception_and_close(
- "Java process stopped before gateway could connect")
-
+ self.__setup_logging(logging_level, py4j_logging_level)
+ self.__start(port, capture_stdout)
+ self.capture_stats(capture_statistics)
self._log.debug("Started JVM and SystemDS python context manager")
def get_stdout(self, lines: int = -1):
@@ -91,10 +89,13 @@ class SystemDSContext(object):
:param lines: The number of lines to try to read from the stdout queue.
default -1 prints all current lines in the queue.
"""
- if lines == -1 or self.__stdout.qsize() < lines:
- return [self.__stdout.get() for x in range(self.__stdout.qsize())]
+ if self.__stdout:
+ if (lines == -1 or self.__stdout.qsize() < lines):
+ return [self.__stdout.get() for x in
range(self.__stdout.qsize())]
+ else:
+ return [self.__stdout.get() for x in range(lines)]
else:
- return [self.__stdout.get() for x in range(lines)]
+ return []
def get_stderr(self, lines: int = -1):
"""Getter for the stderr of the java subprocess
@@ -102,10 +103,13 @@ class SystemDSContext(object):
:param lines: The number of lines to try to read from the stderr queue.
default -1 prints all current lines in the queue.
"""
- if lines == -1 or self.__stderr.qsize() < lines:
- return [self.__stderr.get() for x in range(self.__stderr.qsize())]
+ if self.__stderr:
+ if lines == -1 or self.__stderr.qsize() < lines:
+ return [self.__stderr.get() for x in
range(self.__stderr.qsize())]
+ else:
+ return [self.__stderr.get() for x in range(lines)]
else:
- return [self.__stderr.get() for x in range(lines)]
+ return []
def exception_and_close(self, exception, trace_back_limit: int = None):
"""
@@ -127,51 +131,29 @@ class SystemDSContext(object):
self.close()
raise RuntimeError(message)
- def __try_startup(self, command) -> bool:
-
- self.__process = Popen(command, stdout=PIPE, stdin=PIPE, stderr=PIPE)
-
- # Handle Std out from the subprocess.
- self.__stdout = Queue()
- self.__stderr = Queue()
-
- self.__stdout_thread = Thread(target=self.__enqueue_output, args=(
- self.__process.stdout, self.__stdout), daemon=True)
+ def __try_startup(self, command: str, capture_stdout: bool) -> Popen:
+ if(capture_stdout):
+ process = Popen(command, stdout=PIPE, stdin=PIPE, stderr=PIPE)
- self.__stderr_thread = Thread(target=self.__enqueue_output, args=(
- self.__process.stderr, self.__stderr), daemon=True)
+ # Handle Std out from the subprocess.
+ self.__stdout = Queue()
+ self.__stderr = Queue()
- self.__stdout_thread.start()
- self.__stderr_thread.start()
+ self.__stdout_thread = Thread(target=self.__enqueue_output, args=(
+ process.stdout, self.__stdout), daemon=True)
- return self.__verify_startup(command)
+ self.__stderr_thread = Thread(target=self.__enqueue_output, args=(
+ process.stderr, self.__stderr), daemon=True)
- def __verify_startup(self, command) -> bool:
- first_stdout = self.get_stdout()
- if(not "GatewayServer Started" in first_stdout):
- return self.__verify_startup_retry(command)
+ self.__stdout_thread.start()
+ self.__stderr_thread.start()
+ return process
else:
- return True
-
- def __verify_startup_retry(self, command, retry: int = 1) -> bool:
- sleep(0.8 * retry)
- stdout = self.get_stdout()
- if "GatewayServer Started" in stdout:
- return True, ""
- elif retry < 3: # retry 3 times
- return self.__verify_startup_retry(command, retry + 1)
- else:
- error_message = "Error in startup of systemDS gateway process:"
- error_message += "\n" + " ".join(command)
- stderr = self.get_stderr()
- if len(stderr) > 0:
- error_message += "\n" + "\n".join(stderr)
- if len(stdout) > 0:
- error_message += "\n\n" + "\n".join(stdout)
- self.__error_message = error_message
- return False
+ return Popen(command)
def __build_startup_command(self, port: int):
+ """Build the command line argument for the startup of the JVM
+ :param port: The port address to use if -1 chose random port."""
command = ["java", "-cp"]
root = os.environ.get("SYSTEMDS_ROOT")
@@ -236,48 +218,59 @@ class SystemDSContext(object):
return command, actual_port
- def __start(self, port: int):
+ def __start(self, port: int, capture_stdout: bool, retry: int = 0):
+ """Starts the JVM and establishes connection to it via calls to:
+ jvm.System.currentTimeMillis()
+ If the connection is not established the connection is
+ tried to be established multiple times,
+ and if this fails new JVMs are allocated up to a total of 3 times.
+
+ :param port: The port to try, if -1 chose random.
+ :param capture_stdout: If the output of the JVM should be captured.
+ :param retry: The Retry number of the current startup.
+ """
+ if retry > 3:
+ raise Exception(
+ "Failed startup of SystemDS Context with 3 repeats")
+
+ if port != -1 and self.__is_port_in_use(port):
+ port = -1
command, actual_port = self.__build_startup_command(port)
- success = self.__try_startup(command)
-
- if not success:
- retry = 1
- while not success and retry < 3:
- self.__kill_Popen(self.__process)
- # retry after waiting a bit.
- sleep(3 * retry)
- self.close()
- self.__error_message = None
- success, command, actual_port = self.__retry_start(retry)
- retry = retry + 1
- if not success:
- self.exception_and_close(self.__error_message)
- return actual_port
-
- def __retry_start(self, ret):
- command, actual_port = self.__build_startup_command(-1)
- success = self.__try_startup(command)
- return success, command, actual_port
-
- def __start_gateway(self, actual_port: int):
- process = self.__process
- gwp = GatewayParameters(port=actual_port, eager_load=True)
- self.__retry_start_gateway(process, gwp)
- def __retry_start_gateway(self, process: Popen, gwp: GatewayParameters,
retry: int = 0):
+ # Verify the port intended is available.
+ while self.__is_port_in_use(actual_port):
+ command, actual_port, = self.__build_startup_command(actual_port)
+
+ process = self.__try_startup(command, capture_stdout)
+
+ # Eager load to test connection to the JVM via calls to
currentTimeMillis
+ gwp = GatewayParameters(port=actual_port, eager_load=True)
try:
- self.java_gateway = JavaGateway(
- gateway_parameters=gwp, java_process=process)
- self.__process = None # On success clear process variable
- return
- except:
- sleep(3 * retry)
- if retry < 3:
- self.__retry_start_gateway(process, gwp, retry + 1)
- return
- else:
- e = "Error in startup of Java Gateway"
- self.exception_and_close(e)
+ connect_retry = 1
+ max_retry = 25
+ while connect_retry < max_retry:
+ # Increasing sleep time to establish connection.
+ sleep_time = 0.04 * connect_retry
+ sleep(sleep_time)
+ try:
+ self.java_gateway = JavaGateway(
+ gateway_parameters=gwp, java_process=process)
+ # Successful startup.
+ return
+ except Py4JNetworkError as pe:
+ m = str(pe)
+ if "An error occurred while trying to connect to the Java
server" in m:
+ # Here the startup failed because the java process is
not ready.
+ connect_retry += 1
+ else: # unknown new error or java process crashed
+ raise pe
+ except Exception as e:
+ raise Exception(
+ "Exception hit when connecting to JavaGateway, perhaps
the JVM terminated because of port", e)
+ raise Exception("Failed to connect to process, making a new JVM")
+ except Exception:
+ self.__kill_Popen(process)
+ self.__start(-1, capture_stdout, retry + 1)
def __enter__(self):
return self
@@ -301,12 +294,14 @@ class SystemDSContext(object):
self.__stderr_thread.join(0)
def __kill_Popen(self, process: Popen):
+ """Stop the process at the Popen.
+ :param process: The process to stop"""
process.kill()
process.__exit__(None, None, None)
def __enqueue_output(self, out, queue):
"""Method for handling the output from java.
- It is locating the string handeling inside a different thread, since
the 'out.readline' is a blocking command.
+ It is locating the string handling inside a different thread, since
the 'out.readline' is a blocking command.
"""
for line in iter(out.readline, b""):
line_string = line.decode("utf-8")
@@ -316,14 +311,17 @@ class SystemDSContext(object):
"""Get a random available port.
and hope that no other process steals it while we wait for the JVM to
startup
"""
- #
https://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python
-
- s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- s.bind(("", 0))
- s.listen(1)
- port = s.getsockname()[1]
- s.close()
- return port
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+ s.bind(("", 0))
+ s.listen(1)
+ return s.getsockname()[1]
+
+ def __is_port_in_use(self, port: int) -> bool:
+ """Get if the given port is in use.
+ :param port: The port to analyze"""
+ #
https://stackoverflow.com/questions/2470971/fast-way-to-test-if-a-port-is-in-use-using-python
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+ return s.connect_ex(('localhost', port)) == 0
def _execution_completed(self, script: DMLScript):
"""
@@ -365,8 +363,7 @@ class SystemDSContext(object):
self.capture_stats(was_enabled)
def get_stats(self):
- """
- Get the captured statistics. Will not clear the captured statistics.
+ """Get the captured statistics. Will not clear the captured statistics.
See `take_stats()` for an option that also clears the captured
statistics.
:return: The captured statistics
@@ -374,8 +371,7 @@ class SystemDSContext(object):
return self._statistics
def take_stats(self):
- """
- Get the captured statistics and clear the captured statistics.
+ """Get the captured statistics and clear the captured statistics.
See `get_stats()` for an option that does not clear the captured
statistics.
:return: The captured statistics
@@ -385,8 +381,7 @@ class SystemDSContext(object):
return stats
def clear_stats(self):
- """
- Clears the captured statistics.
+ """Clears the captured statistics.
"""
self._statistics = ""
@@ -467,7 +462,7 @@ class SystemDSContext(object):
return f'"{nf}"'
def read(self, path: os.PathLike, **kwargs: Dict[str, VALID_INPUT_TYPES])
-> OperationNode:
- """ Read an file from disk. Supportted types include:
+ """ Read an file from disk. Supported types include:
CSV, Matrix Market(coordinate), Text(i,j,v), SystemDS Binary, etc.
See:
http://apache.github.io/systemds/site/dml-language-reference#readwrite-built-in-functions
for more details
:return: an Operation Node, containing the read data the operationNode
read can be of types, Matrix, Frame or Scalar.
@@ -608,7 +603,7 @@ class SystemDSContext(object):
def source(self, path: str, name: str) -> Source:
"""Import methods from a given dml file.
- The importing is done thorugh the DML command source, and adds all
defined methods from
+ The importing is done through the DML command source, and adds all
defined methods from
the script to the Source object returned in python. This gives the
flexibility to call the methods
directly on the object returned.
@@ -652,3 +647,28 @@ class SystemDSContext(object):
:return: A List
"""
return List(self, named_input_nodes=kwargs)
+
+ def __setup_logging(self, level: int, py4j_level: int):
+ """Setup the logging infrastructure of the Python API, note this does
not effect the JVM part.
+ This method also reset the loggers to only have one handler.
+
+ :param level: The SystemDS logging part logging level.
+ :param py4j_level: The Py4J logging level.
+ """
+ logging.basicConfig()
+ py4j = logging.getLogger("py4j.java_gateway")
+ py4j.setLevel(py4j_level)
+ py4j.propagate = False
+
+ self._log = logging.getLogger(self.__class__.__name__)
+ f_handler = logging.StreamHandler()
+ f_handler.setLevel(level)
+ f_format = logging.Formatter(
+ '%(asctime)s - SystemDS- %(levelname)s - %(message)s')
+ f_handler.setFormatter(f_format)
+ self._log.addHandler
+ # avoid the logger to call loggers above.
+ self._log.propagate = False
+ # Reset all handlers to only this new handler.
+ self._log.handlers = [f_handler]
+ self._log.debug("Logging setup done")
diff --git a/src/main/python/tests/algorithms/test_lm.py
b/src/main/python/tests/algorithms/test_lm.py
index 779c543775..19833eda49 100644
--- a/src/main/python/tests/algorithms/test_lm.py
+++ b/src/main/python/tests/algorithms/test_lm.py
@@ -51,7 +51,7 @@ class TestLm(unittest.TestCase):
X_sds = self.sds.from_numpy(X)
Y_sds = self.sds.from_numpy(Y)
- sds_model_weights = lm(X_sds, Y_sds).compute()
+ sds_model_weights = lm(X_sds, Y_sds, verbose=False).compute()
model = model.reshape(sds_model_weights.shape)
eps = 1e-03
diff --git a/src/main/python/tests/algorithms/test_multiLogReg.py
b/src/main/python/tests/algorithms/test_multiLogReg.py
index e326f3cb74..89e48c32f9 100644
--- a/src/main/python/tests/algorithms/test_multiLogReg.py
+++ b/src/main/python/tests/algorithms/test_multiLogReg.py
@@ -46,13 +46,14 @@ class TestMultiLogReg(unittest.TestCase):
[X, labels, Y] = self.gen_data()
# Call algorithm
- bias =
multiLogReg(self.sds.from_numpy(X),self.sds.from_numpy(Y)).compute()
-
+ bias = multiLogReg(self.sds.from_numpy(
+ X), self.sds.from_numpy(Y), verbose=False).compute()
+
# Calculate result.
res = np.reshape(np.dot(X, bias[:len(X[0])]) + bias[len(X[0])], (250))
- f2 = lambda x: (x < 0) + 1
+ def f2(x): return (x < 0) + 1
accuracy = np.sum(labels == f2(res)) / 250 * 100
-
+
self.assertTrue(accuracy > 98)
def test_using_predict(self):
@@ -62,20 +63,21 @@ class TestMultiLogReg(unittest.TestCase):
"""
[X, labels, Y] = self.gen_data()
# Call algorithm
- bias =
multiLogReg(self.sds.from_numpy(X),self.sds.from_numpy(Y)).compute()
+ bias = multiLogReg(self.sds.from_numpy(
+ X), self.sds.from_numpy(Y), verbose=False).compute()
- [m, y_pred, acc] =
multiLogRegPredict(self.sds.from_numpy(X),self.sds.from_numpy(bias),
self.sds.from_numpy(Y)).compute()
+ [m, y_pred, acc] = multiLogRegPredict(self.sds.from_numpy(
+ X), self.sds.from_numpy(bias), self.sds.from_numpy(Y),
verbose=False).compute()
self.assertTrue(acc > 98)
-
def gen_data(self):
np.random.seed(13241)
# Generate data
mu, sigma = 1, 0.1
- X = np.reshape(np.random.normal(mu, sigma, 500), (2,250))
+ X = np.reshape(np.random.normal(mu, sigma, 500), (2, 250))
# All over 1 is true
- f = lambda x: (x[0] > 1) + 1
+ def f(x): return (x[0] > 1) + 1
labels = f(X)
# Y labels as double
Y = np.array(labels, dtype=np.double)
@@ -83,5 +85,6 @@ class TestMultiLogReg(unittest.TestCase):
X = X.transpose()
return X, labels, Y
+
if __name__ == "__main__":
unittest.main(exit=False)
diff --git a/src/main/python/tests/basics/test_context_creation.py
b/src/main/python/tests/basics/test_context_creation.py
index 702a4a6859..920dace8be 100644
--- a/src/main/python/tests/basics/test_context_creation.py
+++ b/src/main/python/tests/basics/test_context_creation.py
@@ -20,12 +20,23 @@
# -------------------------------------------------------------
import unittest
+import logging
from systemds.context import SystemDSContext
class TestContextCreation(unittest.TestCase):
+ def test_random_port(self):
+ sds1 = SystemDSContext()
+ sds1.close()
+
+ def test_two_random_port(self):
+ sds1 = SystemDSContext(logging_level=20)
+ sds2 = SystemDSContext(logging_level=20)
+ sds1.close()
+ sds2.close()
+
def test_same_port(self):
# Same port should graciously change port
sds1 = SystemDSContext(port=9415)
diff --git a/src/main/python/tests/examples/tutorials/neural_net_source.dml
b/src/main/python/tests/examples/tutorials/neural_net_source.dml
index 8615f04991..6c2bcb3955 100644
--- a/src/main/python/tests/examples/tutorials/neural_net_source.dml
+++ b/src/main/python/tests/examples/tutorials/neural_net_source.dml
@@ -30,10 +30,10 @@ source("nn/optim/sgd.dml") as sgd
init_model = function(Integer inputDimension, Integer outputDimension, int
seed = -1)
return(list[unknown] model){
[W1, b1] = affine::init(inputDimension, 200, seed = seed)
- lseed = ifelse(seed==-1, -1, seed + 1);
- [W2, b2] = affine::init(200, 200, seed = lseed)
- lseed = ifelse(seed==-1, -1, seed + 2);
- [W3, b3] = affine::init(200, outputDimension, seed = lseed)
+ l_seed = ifelse(seed==-1, -1, seed + 1);
+ [W2, b2] = affine::init(200, 200, seed = l_seed)
+ l_seed = ifelse(seed==-1, -1, seed + 2);
+ [W3, b3] = affine::init(200, outputDimension, seed = l_seed)
model = list(W1, W2, W3, b1, b2, b3)
}
@@ -75,8 +75,8 @@ gradients = function(list[unknown] model,
probs = softmax::forward(out3)
# Compute loss & accuracy for training data
- loss = cross_entropy_loss::forward(probs, labels)
- print("Batch loss: " + loss)
+ # loss = cross_entropy_loss::forward(probs, labels)
+ # print("Batch loss: " + loss)
# Compute data backward pass
# Note it is same arguments as forward with one extra argument in front
diff --git a/src/main/python/tests/examples/tutorials/test_adult.py
b/src/main/python/tests/examples/tutorials/test_adult.py
index c6b1018658..8aae4b6aee 100644
--- a/src/main/python/tests/examples/tutorials/test_adult.py
+++ b/src/main/python/tests/examples/tutorials/test_adult.py
@@ -69,60 +69,57 @@ class TestAdultStandardML(unittest.TestCase):
self.assertEqual((16281, 1), y_l.shape)
def test_train_data_pandas_vs_systemds(self):
- pandas = self.d.get_train_data_pandas()
- systemds = self.d.get_train_data(self.sds).compute()
+ pandas = self.d.get_train_data_pandas()[0:2000]
+ systemds = self.d.get_train_data(self.sds)[0:2000].compute()
self.assertTrue(len(pandas.columns.difference(systemds.columns)) == 0)
self.assertEqual(pandas.shape, systemds.shape)
def test_train_labels_pandas_vs_systemds(self):
# Pandas does not strip the parsed values.. so i have to do it here.
- pandas = np.array(
- [x.strip() for x in
self.d.get_train_labels_pandas().to_numpy().flatten()])
- systemds = self.d.get_train_labels(
- self.sds).compute().to_numpy().flatten()
+ pandas = np.array([x.strip() for x in
self.d.get_train_labels_pandas()[0:2000].to_numpy().flatten()])
+ systemds =
self.d.get_train_labels(self.sds)[0:2000].compute().to_numpy().flatten()
comp = pandas == systemds
self.assertTrue(comp.all())
def test_test_labels_pandas_vs_systemds(self):
# Pandas does not strip the parsed values.. so i have to do it here.
pandas = np.array(
- [x.strip() for x in
self.d.get_test_labels_pandas().to_numpy().flatten()])
- systemds = self.d.get_test_labels(
- self.sds).compute().to_numpy().flatten()
+ [x.strip() for x in
self.d.get_test_labels_pandas()[0:2000].to_numpy().flatten()])
+ systemds =
self.d.get_test_labels(self.sds)[0:2000].compute().to_numpy().flatten()
comp = pandas == systemds
self.assertTrue(comp.all())
def test_transform_encode_train_data(self):
jspec = self.d.get_jspec(self.sds)
- train_x, M1 =
self.d.get_train_data(self.sds).transform_encode(spec=jspec)
+ train_x, M1 =
self.d.get_train_data(self.sds)[0:2000].transform_encode(spec=jspec)
train_x_numpy = train_x.compute()
- self.assertEqual((32561, 107), train_x_numpy.shape)
+ self.assertEqual((2000, 101), train_x_numpy.shape)
def test_transform_encode_apply_test_data(self):
jspec = self.d.get_jspec(self.sds)
- train_x, M1 =
self.d.get_train_data(self.sds).transform_encode(spec=jspec)
- test_x = self.d.get_test_data(self.sds).transform_apply(spec=jspec,
meta=M1)
+ train_x, M1 =
self.d.get_train_data(self.sds)[0:2000].transform_encode(spec=jspec)
+ test_x =
self.d.get_test_data(self.sds)[0:2000].transform_apply(spec=jspec, meta=M1)
test_x_numpy = test_x.compute()
- self.assertEqual((16281, 107), test_x_numpy.shape)
+ self.assertEqual((2000, 101), test_x_numpy.shape)
def test_transform_encode_train_labels(self):
jspec_dict = {"recode":["income"]}
jspec = self.sds.scalar(f'"{jspec_dict}"')
- train_y, M1 =
self.d.get_train_labels(self.sds).transform_encode(spec=jspec)
+ train_y, M1 =
self.d.get_train_labels(self.sds)[0:2000].transform_encode(spec=jspec)
train_y_numpy = train_y.compute()
- self.assertEqual((32561, 1), train_y_numpy.shape)
+ self.assertEqual((2000, 1), train_y_numpy.shape)
def test_transform_encode_test_labels(self):
jspec_dict = {"recode":["income"]}
jspec = self.sds.scalar(f'"{jspec_dict}"')
- train_y, M1 =
self.d.get_train_labels(self.sds).transform_encode(spec=jspec)
- test_y = self.d.get_test_labels(self.sds).transform_apply(spec=jspec,
meta=M1)
+ train_y, M1 =
self.d.get_train_labels(self.sds)[0:2000].transform_encode(spec=jspec)
+ test_y =
self.d.get_test_labels(self.sds)[0:2000].transform_apply(spec=jspec, meta=M1)
test_y_numpy = test_y.compute()
- self.assertEqual((16281, 1), test_y_numpy.shape)
+ self.assertEqual((2000, 1), test_y_numpy.shape)
def test_multi_log_reg(self):
# Reduced because we want the tests to finish a bit faster.
- train_count = 10000
+ train_count = 2000
test_count = 500
jspec_data = self.d.get_jspec(self.sds)
@@ -138,8 +135,8 @@ class TestAdultStandardML(unittest.TestCase):
test_y_frame = self.d.get_test_labels(self.sds)[0:test_count]
test_y = test_y_frame.transform_apply(spec=jspec_labels, meta=M2)
- betas = multiLogReg(train_x, train_y)
- [_, y_pred, acc] = multiLogRegPredict(test_x, betas, test_y)
+ betas = multiLogReg(train_x, train_y, verbose=False)
+ [_, y_pred, acc] = multiLogRegPredict(test_x, betas, test_y,
verbose=False)
[_, conf_avg] = confusionMatrix(y_pred, test_y)
confusion_numpy = conf_avg.compute()
diff --git a/src/main/python/tests/examples/tutorials/test_adult_neural.py
b/src/main/python/tests/examples/tutorials/test_adult_neural.py
index 1323ff22cf..3b60922ae7 100644
--- a/src/main/python/tests/examples/tutorials/test_adult_neural.py
+++ b/src/main/python/tests/examples/tutorials/test_adult_neural.py
@@ -43,7 +43,7 @@ class TestAdultNeural(unittest.TestCase):
dataset_path_test_mtd: str =
"../../test/resources/datasets/adult/test_data.csv.mtd"
dataset_jspec: str = "../../test/resources/datasets/adult/jspec.json"
- train_count: int = 15000
+ train_count: int = 5000
test_count: int = 300
network_dir: str = "tests/examples/tutorials/model"
diff --git a/src/main/python/tests/examples/tutorials/test_mnist.py
b/src/main/python/tests/examples/tutorials/test_mnist.py
index 17d43582cb..3fe297fb4c 100644
--- a/src/main/python/tests/examples/tutorials/test_mnist.py
+++ b/src/main/python/tests/examples/tutorials/test_mnist.py
@@ -21,11 +21,9 @@
import unittest
-import numpy as np
from systemds.context import SystemDSContext
from systemds.examples.tutorials.mnist import DataManager
-from systemds.operator.algorithm import kmeans, multiLogReg, multiLogRegPredict
-from systemds.script_building import DMLScript
+from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
class Test_DMLScript(unittest.TestCase):
@@ -64,8 +62,8 @@ class Test_DMLScript(unittest.TestCase):
def test_multi_log_reg(self):
# Reduced because we want the tests to finish a bit faster.
- train_count = 15000
- test_count = 5000
+ train_count = 5000
+ test_count = 2000
# Train data
X = self.sds.from_numpy( self.d.get_train_data().reshape(
(60000, 28*28))[:train_count])
@@ -78,9 +76,8 @@ class Test_DMLScript(unittest.TestCase):
Yt = self.sds.from_numpy( self.d.get_test_labels()[:test_count])
Yt = Yt + 1.0
- bias = multiLogReg(X, Y)
-
- [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute()
+ bias = multiLogReg(X, Y, verbose = False)
+ [_, _, acc] = multiLogRegPredict(Xt, bias, Yt, verbose=False).compute()
self.assertGreater(acc, 80)
diff --git a/src/main/python/tests/frame/test_hyperband.py
b/src/main/python/tests/frame/test_hyperband.py
index d8cd958689..12dc1c68bf 100644
--- a/src/main/python/tests/frame/test_hyperband.py
+++ b/src/main/python/tests/frame/test_hyperband.py
@@ -67,6 +67,7 @@ class TestHyperband(unittest.TestCase):
y_val=y_val,
params=params,
paramRanges=paramRanges,
+ verbose=False
).compute()
self.assertTrue(isinstance(best_weights_mat, np.ndarray))
self.assertTrue(best_weights_mat.shape[0] == self.X_train.shape[1])
diff --git a/src/main/python/tests/lineage/test_lineagetrace.py
b/src/main/python/tests/lineage/test_lineagetrace.py
index a223797963..06b5b39297 100644
--- a/src/main/python/tests/lineage/test_lineagetrace.py
+++ b/src/main/python/tests/lineage/test_lineagetrace.py
@@ -29,7 +29,7 @@ os.environ['SYSDS_QUIET'] = "1"
test_dir = os.path.join("tests", "lineage")
temp_dir = os.path.join(test_dir, "temp")
-
+trace_test_1 = os.path.join(test_dir,"trace1.dml")
class TestLineageTrace(unittest.TestCase):
@@ -37,7 +37,7 @@ class TestLineageTrace(unittest.TestCase):
@classmethod
def setUpClass(cls):
- cls.sds = SystemDSContext()
+ cls.sds = SystemDSContext(capture_stdout=True)
@classmethod
def tearDownClass(cls):
@@ -54,43 +54,30 @@ class TestLineageTrace(unittest.TestCase):
python_trace = [x.strip().split("°")
for x in m_res.get_lineage_trace().split("\n")]
- dml_script = (
- "x = matrix(1, rows=10, cols=10);\n"
- "y = x + x;\n"
- "print(lineage(y));\n"
- )
-
- sysds_trace = create_execute_and_trace_dml(dml_script, "trace1")
+
+ sysds_trace = create_execute_and_trace_dml(trace_test_1)
- # It is not garantied, that the two lists 100% align to be the same.
+ # It is not guarantied, that the two lists 100% align to be the same.
# Therefore for now, we only compare if the command is the same, in
same order.
python_trace_commands = [x[:1] for x in python_trace]
dml_script_commands = [x[:1] for x in sysds_trace]
self.assertEqual(python_trace_commands[0], dml_script_commands[0])
-# TODO add more tests cases.
-
-
-def create_execute_and_trace_dml(script: str, name: str):
- script_file_name = temp_dir + "/" + name + ".dml"
+def create_execute_and_trace_dml(script: str):
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
- with open(script_file_name, "w") as dml_file:
- dml_file.write(script)
-
# Call SYSDS!
- result_file_name = temp_dir + "/" + name + ".txt"
+ result_file_name = temp_dir + "/tmp_res.txt"
- command = "systemds " + script_file_name + \
+ command = "systemds " + script + \
" > " + result_file_name + " 2> /dev/null"
os.system(command)
return parse_trace(result_file_name)
def parse_trace(path: str):
- pointer = 0
data = []
with open(path, "r") as log:
for line in log:
diff --git a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
b/src/main/python/tests/lineage/trace1.dml
similarity index 82%
copy from src/main/python/tests/manual_tests/multi_log_reg_adult.py
copy to src/main/python/tests/lineage/trace1.dml
index dbda0bd839..704b5ab49b 100644
--- a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
+++ b/src/main/python/tests/lineage/trace1.dml
@@ -19,9 +19,6 @@
#
# -------------------------------------------------------------
-from systemds.context import SystemDSContext
-from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
-from systemds.examples.tutorials.adult import DataManager
-
-d = DataManager()
-
+x = matrix(1, rows=10, cols=10);
+y = x + x;
+print(lineage(y));
diff --git a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
b/src/main/python/tests/manual_tests/multi_log_reg_adult.py
index dbda0bd839..5b7f29d6c1 100644
--- a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
+++ b/src/main/python/tests/manual_tests/multi_log_reg_adult.py
@@ -19,9 +19,6 @@
#
# -------------------------------------------------------------
-from systemds.context import SystemDSContext
-from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
from systemds.examples.tutorials.adult import DataManager
-d = DataManager()
-
+DataManager()
diff --git a/src/main/python/tests/manual_tests/multi_log_reg_mnist.py
b/src/main/python/tests/manual_tests/multi_log_reg_mnist.py
index 8e8690bb2c..79cda6d818 100644
--- a/src/main/python/tests/manual_tests/multi_log_reg_mnist.py
+++ b/src/main/python/tests/manual_tests/multi_log_reg_mnist.py
@@ -22,8 +22,8 @@
import logging
from systemds.context import SystemDSContext
-from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
from systemds.examples.tutorials.mnist import DataManager
+from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
d = DataManager()
@@ -31,7 +31,7 @@ with SystemDSContext() as sds:
# Train Data
X = sds.from_numpy(d.get_train_data().reshape((60000, 28*28)))
Y = sds.from_numpy(d.get_train_labels()) + 1.0
- bias = multiLogReg(X, Y, tol= 0.0001, verbose= False)
+ bias = multiLogReg(X, Y, tol=0.0001, verbose=False)
# Test data
Xt = sds.from_numpy(d.get_test_data().reshape((10000, 28*28)))
Yt = sds.from_numpy(d.get_test_labels()) + 1.0
diff --git a/src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py
b/src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py
index df6ca0d65d..d59b837ec6 100644
--- a/src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py
+++ b/src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py
@@ -27,12 +27,11 @@ d = DataManager()
base_path = "systemds/examples/tutorials/mnist/"
with SystemDSContext() as sds:
# Train Data
- X = sds.from_numpy( d.get_train_data().reshape((60000, 28*28)))
+ X = sds.from_numpy(d.get_train_data().reshape((60000, 28*28)))
X.write(base_path + "train_data").compute()
- Y = sds.from_numpy( d.get_train_labels()) + 1.0
+ Y = sds.from_numpy(d.get_train_labels()) + 1.0
Y.write(base_path + "train_labels").compute()
- Xt = sds.from_numpy( d.get_test_data().reshape((10000, 28*28)))
+ Xt = sds.from_numpy(d.get_test_data().reshape((10000, 28*28)))
Xt.write(base_path + "test_data").compute()
- Yt = sds.from_numpy( d.get_test_labels()) + 1.0
+ Yt = sds.from_numpy(d.get_test_labels()) + 1.0
Yt.write(base_path + "test_labels").compute()
-
diff --git a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
b/src/main/python/tests/manual_tests/time/for_loop_time_run.dml
similarity index 82%
copy from src/main/python/tests/manual_tests/multi_log_reg_adult.py
copy to src/main/python/tests/manual_tests/time/for_loop_time_run.dml
index dbda0bd839..df965f77b2 100644
--- a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
+++ b/src/main/python/tests/manual_tests/time/for_loop_time_run.dml
@@ -19,9 +19,12 @@
#
# -------------------------------------------------------------
-from systemds.context import SystemDSContext
-from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
-from systemds.examples.tutorials.adult import DataManager
-d = DataManager()
+x = rand(rows= 10,cols = 10,min = 0,max = 10,seed=42)
+y = rand(rows= 10,cols = 10,min = 0,max = 10,seed=32)
+for (k in 1:10){
+ x = x %*% y
+}
+
+print(sum(x))
diff --git a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
b/src/main/python/tests/manual_tests/time/for_loop_time_run.py
similarity index 84%
copy from src/main/python/tests/manual_tests/multi_log_reg_adult.py
copy to src/main/python/tests/manual_tests/time/for_loop_time_run.py
index dbda0bd839..62d55eef14 100644
--- a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
+++ b/src/main/python/tests/manual_tests/time/for_loop_time_run.py
@@ -18,10 +18,11 @@
# under the License.
#
# -------------------------------------------------------------
-
from systemds.context import SystemDSContext
-from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
-from systemds.examples.tutorials.adult import DataManager
-
-d = DataManager()
+with SystemDSContext() as sds:
+ x = sds.rand(10, 10, 0, 10, seed=42)
+ y = sds.rand(10, 10, 0, 10, seed=32)
+ for b in range(10):
+ x = x @ y
+ z_c = x.sum().compute()
diff --git
a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
b/src/main/python/tests/manual_tests/time/run.sh
old mode 100644
new mode 100755
similarity index 62%
copy from
src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
copy to src/main/python/tests/manual_tests/time/run.sh
index 2f9fc62574..33f0950dc0
---
a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
+++ b/src/main/python/tests/manual_tests/time/run.sh
@@ -18,18 +18,30 @@
# under the License.
#
# -------------------------------------------------------------
-import logging
-from systemds.context import SystemDSContext
-from systemds.operator.algorithm import l2svm
+export SYSDS_QUIET=1
-with SystemDSContext() as sds:
- # Generate 10 by 10 matrix with values in range 0 to 100.
- features = sds.rand(10, 10, 0, 100)
- # Add value to all cells in features
- features += 1.1
- # Generate labels of all ones and zeros
- labels = sds.rand(10, 1, 1, 1, sparsity=0.5)
+tests=("startup_time_run for_loop_time_run")
+tests=("for_loop_time_run")
+base="tests/manual_tests/time/"
+gr="elapsed"
+gr="elapsed|task-clock|cycles|instructions"
+rep=50
- model = l2svm(features, labels).compute()
- logging.info(model)
+for t in $tests; do
+
+ # Verbose runs. to verify it works.
+ systemds $base$t.dml
+ python $base$t.py
+
+ # Timed runs
+ # Systemds
+ perf stat -d -d -d -r $rep \
+ systemds $base$t.dml \
+ 2>&1 | grep -E $gr
+
+ # PythonAPI SystemDS
+ perf stat -d -d -d -r $rep \
+ python $base$t.py \
+ 2>&1 | grep -E $gr
+done
diff --git a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
b/src/main/python/tests/manual_tests/time/startup_time_run.dml
similarity index 82%
copy from src/main/python/tests/manual_tests/multi_log_reg_adult.py
copy to src/main/python/tests/manual_tests/time/startup_time_run.dml
index dbda0bd839..8a66a3ce05 100644
--- a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
+++ b/src/main/python/tests/manual_tests/time/startup_time_run.dml
@@ -19,9 +19,10 @@
#
# -------------------------------------------------------------
-from systemds.context import SystemDSContext
-from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
-from systemds.examples.tutorials.adult import DataManager
-d = DataManager()
+x = rand(rows= 10,cols = 10,min = 0,max = 10,seed=42)
+y = rand(rows= 10,cols = 10,min = 0,max = 10,seed=32)
+z = x %*% y
+
+print(sum(z))
diff --git a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
b/src/main/python/tests/manual_tests/time/startup_time_run.py
similarity index 86%
copy from src/main/python/tests/manual_tests/multi_log_reg_adult.py
copy to src/main/python/tests/manual_tests/time/startup_time_run.py
index dbda0bd839..659837f658 100644
--- a/src/main/python/tests/manual_tests/multi_log_reg_adult.py
+++ b/src/main/python/tests/manual_tests/time/startup_time_run.py
@@ -18,10 +18,10 @@
# under the License.
#
# -------------------------------------------------------------
-
from systemds.context import SystemDSContext
-from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
-from systemds.examples.tutorials.adult import DataManager
-
-d = DataManager()
+with SystemDSContext() as sds:
+ x = sds.rand(10, 10, 0, 10, seed=42)
+ y = sds.rand(10, 10, 0, 10, seed=32)
+ z = x @ y
+ z_c = z.sum().compute()
diff --git a/src/main/python/tests/matrix/test_print.py
b/src/main/python/tests/matrix/test_print.py
index 397d200c4b..5a345a18d7 100644
--- a/src/main/python/tests/matrix/test_print.py
+++ b/src/main/python/tests/matrix/test_print.py
@@ -32,7 +32,7 @@ class TestPrint(unittest.TestCase):
@classmethod
def setUpClass(cls):
- cls.sds = SystemDSContext()
+ cls.sds = SystemDSContext(capture_stdout=True)
sleep(2.0)
# Clear stdout ...
cls.sds.get_stdout()
diff --git a/src/main/python/tests/script/test_dml_script.py
b/src/main/python/tests/script/test_dml_script.py
index 3c307e1cd5..809a20cb87 100644
--- a/src/main/python/tests/script/test_dml_script.py
+++ b/src/main/python/tests/script/test_dml_script.py
@@ -34,7 +34,7 @@ class Test_DMLScript(unittest.TestCase):
@classmethod
def setUpClass(cls):
- cls.sds = SystemDSContext()
+ cls.sds = SystemDSContext(capture_stdout=True)
sleep(1)
cls.sds.get_stdout()
cls.sds.get_stdout()
diff --git a/src/main/python/tests/source/test_source_no_return.py
b/src/main/python/tests/source/test_source_no_return.py
index 86e039823c..f38273dd61 100644
--- a/src/main/python/tests/source/test_source_no_return.py
+++ b/src/main/python/tests/source/test_source_no_return.py
@@ -31,7 +31,7 @@ class TestSource_NoReturn(unittest.TestCase):
@classmethod
def setUpClass(cls):
- cls.sds = SystemDSContext()
+ cls.sds = SystemDSContext(capture_stdout=True)
@classmethod
def tearDownClass(cls):
diff --git
a/src/test/java/org/apache/sysds/test/usertest/pythonapi/StartupTest.java
b/src/test/java/org/apache/sysds/test/usertest/pythonapi/StartupTest.java
index 585c6ef25f..4b8395107f 100644
--- a/src/test/java/org/apache/sysds/test/usertest/pythonapi/StartupTest.java
+++ b/src/test/java/org/apache/sysds/test/usertest/pythonapi/StartupTest.java
@@ -50,11 +50,4 @@ public class StartupTest {
// Number out of range
PythonDMLScript.main(new String[] {"-python", "918757"});
}
-
- @Test(expected = Exception.class)
- public void testStartupCorrectButTwice() throws Exception {
- // crash if you start two instances on same port.
- PythonDMLScript.main(new String[] {"-python", "8142"});
- PythonDMLScript.main(new String[] {"-python", "8142"});
- }
}