This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new dd0ca2675c [SYSTEMDS-3351] Python Test Docs
dd0ca2675c is described below
commit dd0ca2675c90d818727a2b6eadad9ce1a3ff930e
Author: baunsgaard <[email protected]>
AuthorDate: Mon Apr 18 17:18:11 2022 +0200
[SYSTEMDS-3351] Python Test Docs
This commit is the start of adding tests for the python documentation.
The tests are simply run via imports of the source code insterted into
the docs.
Closes #1585
---
.../source/code/federatedTutorial_part3_old2.py | 55 -------------
.../simpleExamples/l2svm.py} | 30 ++++---
.../simpleExamples/l2svm_internal.py} | 29 +++----
.../simpleExamples/multiply.py} | 28 +++----
.../simpleExamples/multiplyMatrix.py} | 29 +++----
.../federated}/federatedTutorial_part1.py | 0
.../federated}/federatedTutorial_part2.py | 0
.../federated}/federatedTutorial_part3.py | 0
.../source/getting_started/simple_examples.rst | 94 ++++++----------------
src/main/python/docs/source/guide/federated.rst | 6 +-
src/main/python/tests/README.md | 2 +-
.../docs/__init__.py} | 9 ---
.../test_simple_example.py} | 21 +++--
.../tests/federated/test_federated_tutorial.py | 6 +-
14 files changed, 97 insertions(+), 212 deletions(-)
diff --git a/src/main/python/docs/source/code/federatedTutorial_part3_old2.py
b/src/main/python/docs/source/code/federatedTutorial_part3_old2.py
deleted file mode 100644
index a6bf94f8cd..0000000000
--- a/src/main/python/docs/source/code/federatedTutorial_part3_old2.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-# Python
-import numpy as np
-from systemds.context import SystemDSContext
-
-addr1 = "localhost:8001/temp/test.csv"
-addr2 = "localhost:8002/temp/test.csv"
-addr3 = "localhost:8003/temp/test.csv"
-
-# Create a federated matrix using two federated environments
-# Note that the two federated matrices are stacked on top of each other
-
-with SystemDSContext() as sds:
-
- fed_a = sds.federated([addr1],[([0, 0], [3, 3])])
- fed_b = sds.federated([addr2],[([0, 0], [3, 3])])
- # fed_c = sds.federated([addr3],[([0, 0], [3, 3])])
-
- np_array = np.array([[1,2,3],[4,5,6],[7,8,9]])
-
- loc_a = sds.from_numpy(np_array)
- loc_b = sds.from_numpy(np_array)
-
- fed_res = fed_a @ fed_b
- loc_res = loc_a @ loc_b
-
- hybrid_res_1 = fed_a @ loc_b
- hybrid_res_2 = loc_a @ fed_b
-
- # compute and print
- print(fed_a.compute())
- print(fed_b.compute())
- print(fed_res.compute(verbose=True))
- print(loc_res.compute(verbose=True))
- print(hybrid_res_1.compute())
- print(hybrid_res_1.compute())
diff --git a/src/main/python/docs/source/code/federatedTutorial_part2.py
b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
similarity index 63%
copy from src/main/python/docs/source/code/federatedTutorial_part2.py
copy to src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
index ac9c0bfc83..1403b68443 100644
--- a/src/main/python/docs/source/code/federatedTutorial_part2.py
+++ b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
@@ -19,21 +19,27 @@
#
# -------------------------------------------------------------
# Python
+# Import numpy and SystemDS
import numpy as np
from systemds.context import SystemDSContext
+from systemds.operator.algorithm import l2svm
-# Create a federated matrix
-# Indicate the dimensions of the data:
-# Here the first list in the tuple is the top left Coordinate,
-# and the second the bottom left coordinate.
-# It is ordered as [col,row].
-dims = ([0, 0], [3, 3])
+# Set a seed
+np.random.seed(0)
+# Generate random features and labels in numpy
+# This can easily be exchanged with a data set.
+features = np.array(np.random.randint(
+ 100, size=10 * 10) + 1.01, dtype=np.double)
+features.shape = (10, 10)
+labels = np.zeros((10, 1))
-# Specify the address + file path from worker:
-address = "localhost:8001/temp/test.csv"
+# l2svm labels can only be 0 or 1
+for i in range(10):
+ if np.random.random() > 0.5:
+ labels[i][0] = 1
+# compute our model
with SystemDSContext() as sds:
- fed_a = sds.federated([address], [dims])
- # Sum the federated matrix and call compute to execute
- print(fed_a.sum().compute())
- # Result should be 45.
+ model = l2svm(sds.from_numpy(features),
+ sds.from_numpy(labels)).compute()
+ print(model)
diff --git a/src/main/python/docs/source/code/federatedTutorial_part3_old.py
b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
similarity index 64%
rename from src/main/python/docs/source/code/federatedTutorial_part3_old.py
rename to
src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
index eaa3f1d3c2..5a4f24cfd6 100644
--- a/src/main/python/docs/source/code/federatedTutorial_part3_old.py
+++
b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
@@ -19,26 +19,17 @@
#
# -------------------------------------------------------------
# Python
-import numpy as np
+# Import SystemDS
from systemds.context import SystemDSContext
-
-addr1 = "localhost:8001/temp/test.csv"
-addr2 = "localhost:8002/temp/test.csv"
-addr3 = "localhost:8003/temp/test.csv"
-
-# Create a federated matrix using two federated environments
-# Note that the two federated matrices are stacked on top of each other
+from systemds.operator.algorithm import l2svm
with SystemDSContext() as sds:
- fed_a = sds.federated(
- [addr1, addr2],
- [([0, 0], [3, 3]), ([0, 3], [3, 6])])
-
- fed_b = sds.federated(
- [addr1, addr3],
- [([0, 0], [3, 3]), ([0, 3], [3, 6])])
-
- # Multiply, compute and print.
- res = (fed_a * fed_b).compute()
+ # Generate 10 by 10 matrix with values in range 0 to 100.
+ features = sds.rand(10, 10, 0, 100)
+ # Add value to all cells in features
+ features += 1.1
+ # Generate labels of all ones and zeros
+ labels = sds.rand(10, 1, 1, 1, sparsity = 0.5)
-print(res)
+ model = l2svm(features, labels).compute()
+ print(model)
diff --git a/src/main/python/docs/source/code/federatedTutorial_part2.py
b/src/main/python/docs/source/code/getting_started/simpleExamples/multiply.py
similarity index 62%
copy from src/main/python/docs/source/code/federatedTutorial_part2.py
copy to
src/main/python/docs/source/code/getting_started/simpleExamples/multiply.py
index ac9c0bfc83..b53a35e6e3 100644
--- a/src/main/python/docs/source/code/federatedTutorial_part2.py
+++
b/src/main/python/docs/source/code/getting_started/simpleExamples/multiply.py
@@ -19,21 +19,17 @@
#
# -------------------------------------------------------------
# Python
-import numpy as np
+# Import SystemDSContext
from systemds.context import SystemDSContext
-
-# Create a federated matrix
-# Indicate the dimensions of the data:
-# Here the first list in the tuple is the top left Coordinate,
-# and the second the bottom left coordinate.
-# It is ordered as [col,row].
-dims = ([0, 0], [3, 3])
-
-# Specify the address + file path from worker:
-address = "localhost:8001/temp/test.csv"
-
+# Create a context and if necessary (no SystemDS py4j instance running)
+# it starts a subprocess which does the execution in SystemDS
with SystemDSContext() as sds:
- fed_a = sds.federated([address], [dims])
- # Sum the federated matrix and call compute to execute
- print(fed_a.sum().compute())
- # Result should be 45.
+ # Full generates a matrix completely filled with one number.
+ # Generate a 5x10 matrix filled with 4.2
+ m = sds.full((5, 10), 4.20)
+ # multiply with scalar. Nothing is executed yet!
+ m_res = m * 3.1
+ # Do the calculation in SystemDS by calling compute().
+ # The returned value is an numpy array that can be directly printed.
+ print(m_res.compute())
+ # context will automatically be closed and process stopped
diff --git a/src/main/python/docs/source/code/federatedTutorial_part2.py
b/src/main/python/docs/source/code/getting_started/simpleExamples/multiplyMatrix.py
similarity index 63%
copy from src/main/python/docs/source/code/federatedTutorial_part2.py
copy to
src/main/python/docs/source/code/getting_started/simpleExamples/multiplyMatrix.py
index ac9c0bfc83..7bf7ef99f8 100644
--- a/src/main/python/docs/source/code/federatedTutorial_part2.py
+++
b/src/main/python/docs/source/code/getting_started/simpleExamples/multiplyMatrix.py
@@ -19,21 +19,22 @@
#
# -------------------------------------------------------------
# Python
-import numpy as np
-from systemds.context import SystemDSContext
+import numpy as np # import numpy
-# Create a federated matrix
-# Indicate the dimensions of the data:
-# Here the first list in the tuple is the top left Coordinate,
-# and the second the bottom left coordinate.
-# It is ordered as [col,row].
-dims = ([0, 0], [3, 3])
+# Import SystemDSContext
+from systemds.context import SystemDSContext
-# Specify the address + file path from worker:
-address = "localhost:8001/temp/test.csv"
+# create a random array
+m1 = np.array(np.random.randint(100, size=5 * 5) + 1.01, dtype=np.double)
+m1.shape = (5, 5)
+# create another random array
+m2 = np.array(np.random.randint(5, size=5 * 5) + 1, dtype=np.double)
+m2.shape = (5, 5)
+# Create a context
with SystemDSContext() as sds:
- fed_a = sds.federated([address], [dims])
- # Sum the federated matrix and call compute to execute
- print(fed_a.sum().compute())
- # Result should be 45.
+ # element-wise matrix multiplication, note that nothing is executed yet!
+ m_res = sds.from_numpy(m1) * sds.from_numpy(m2)
+ # lets do the actual computation in SystemDS! The result is an numpy array
+ m_res_np = m_res.compute()
+ print(m_res_np)
diff --git a/src/main/python/docs/source/code/federatedTutorial_part1.py
b/src/main/python/docs/source/code/guide/federated/federatedTutorial_part1.py
similarity index 100%
copy from src/main/python/docs/source/code/federatedTutorial_part1.py
copy to
src/main/python/docs/source/code/guide/federated/federatedTutorial_part1.py
diff --git a/src/main/python/docs/source/code/federatedTutorial_part2.py
b/src/main/python/docs/source/code/guide/federated/federatedTutorial_part2.py
similarity index 100%
rename from src/main/python/docs/source/code/federatedTutorial_part2.py
rename to
src/main/python/docs/source/code/guide/federated/federatedTutorial_part2.py
diff --git a/src/main/python/docs/source/code/federatedTutorial_part3.py
b/src/main/python/docs/source/code/guide/federated/federatedTutorial_part3.py
similarity index 100%
rename from src/main/python/docs/source/code/federatedTutorial_part3.py
rename to
src/main/python/docs/source/code/guide/federated/federatedTutorial_part3.py
diff --git a/src/main/python/docs/source/getting_started/simple_examples.rst
b/src/main/python/docs/source/getting_started/simple_examples.rst
index e1d3bfec04..dd20c89fd0 100644
--- a/src/main/python/docs/source/getting_started/simple_examples.rst
+++ b/src/main/python/docs/source/getting_started/simple_examples.rst
@@ -29,58 +29,27 @@ Matrix Operations
Making use of SystemDS, let us multiply an Matrix with an scalar:
-.. code-block:: python
-
- # Import SystemDSContext
- from systemds.context import SystemDSContext
- # Create a context and if necessary (no SystemDS py4j instance running)
- # it starts a subprocess which does the execution in SystemDS
- with SystemDSContext() as sds:
- # Full generates a matrix completely filled with one number.
- # Generate a 5x10 matrix filled with 4.2
- m = sds.full((5, 10), 4.20)
- # multiply with scalar. Nothing is executed yet!
- m_res = m * 3.1
- # Do the calculation in SystemDS by calling compute().
- # The returned value is an numpy array that can be directly printed.
- print(m_res.compute())
- # context will automatically be closed and process stopped
+.. include:: ../code/getting_started/simpleExamples/multiply.py
+ :start-line: 20
+ :code: python
As output we get
-.. code-block:: python
+.. code-block::
- [[ 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02]
- [ 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02]
- [ 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02]
- [ 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02]
- [ 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02]]
+ [[13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02]
+ [13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02]
+ [13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02]
+ [13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02]
+ [13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02]]
The Python SystemDS package is compatible with numpy arrays.
Let us do a quick element-wise matrix multiplication of numpy arrays with
SystemDS.
Remember to first start up a new terminal:
-.. code-block:: python
-
- import numpy as np # import numpy
-
- # Import SystemDSContext
- from systemds.context import SystemDSContext
-
- # create a random array
- m1 = np.array(np.random.randint(100, size=5 * 5) + 1.01, dtype=np.double)
- m1.shape = (5, 5)
- # create another random array
- m2 = np.array(np.random.randint(5, size=5 * 5) + 1, dtype=np.double)
- m2.shape = (5, 5)
-
- # Create a context
- with SystemDSContext() as sds:
- # element-wise matrix multiplication, note that nothing is executed yet!
- m_res = sds.from_numpy(m1) * sds.from_numpy(m2)
- # lets do the actual computation in SystemDS! The result is an numpy
array
- m_res_np = m_res.compute()
- print(m_res_np)
+.. include:: ../code/getting_started/simpleExamples/multiplyMatrix.py
+ :start-line: 20
+ :code: python
More complex operations
-----------------------
@@ -88,34 +57,13 @@ More complex operations
SystemDS provides algorithm level functions as built-in functions to simplify
development.
One example of this is l2SVM, a high level functions for Data-Scientists.
Let's take a look at l2svm:
-.. code-block:: python
-
- # Import numpy and SystemDS
- import numpy as np
- from systemds.context import SystemDSContext
- from systemds.operator.algorithm import l2svm
-
- # Set a seed
- np.random.seed(0)
- # Generate random features and labels in numpy
- # This can easily be exchanged with a data set.
- features = np.array(np.random.randint(100, size=10 * 10) + 1.01,
dtype=np.double)
- features.shape = (10, 10)
- labels = np.zeros((10, 1))
-
- # l2svm labels can only be 0 or 1
- for i in range(10):
- if np.random.random() > 0.5:
- labels[i][0] = 1
-
- # compute our model
- with SystemDSContext() as sds:
- model = l2svm(sds.from_numpy(features), sds.from_numpy(labels)).compute()
- print(model)
+.. include:: ../code/getting_started/simpleExamples/l2svm.py
+ :start-line: 20
+ :code: python
The output should be similar to
-.. code-block:: python
+.. code-block::
[[ 0.02033445]
[-0.00324092]
@@ -128,3 +76,13 @@ The output should be similar to
[-0.01686351]
[-0.03839821]]
+To get the full performance of SystemDS one can modify the script to only use
internal functionality,
+instead of using numpy arrays that have to be transfered into systemDS.
+The above script transformed goes like this:
+
+.. include:: ../code/getting_started/simpleExamples/l2svm_internal.py
+ :start-line: 20
+ :code: python
+
+When reading in datasets for processing it is highly recommended that you read
from inside systemds using
+sds.read("file"), since this avoid the transferring of numpy arrays.
diff --git a/src/main/python/docs/source/guide/federated.rst
b/src/main/python/docs/source/guide/federated.rst
index 1e6eec7f9b..6afadf2393 100644
--- a/src/main/python/docs/source/guide/federated.rst
+++ b/src/main/python/docs/source/guide/federated.rst
@@ -53,14 +53,14 @@ Currently we also require a metadata file for the federated
worker.
This should be located next to the ``test.csv`` file called ``test.csv.mtd``.
To make both the data and metadata simply execute the following
-.. include:: ../code/federatedTutorial_part1.py
+.. include:: ../code/guide/federated/federatedTutorial_part1.py
:start-line: 20
:code: python
After creating our data the federated worker becomes able to execute federated
instructions.
The aggregated sum using federated instructions in python SystemDS is done as
follows
-.. include:: ../code/federatedTutorial_part2.py
+.. include:: ../code/guide/federated/federatedTutorial_part2.py
:start-line: 20
:code: python
@@ -81,7 +81,7 @@ Start with 3 different terminals, and run one federated
environment in each.
Once all three workers are up and running we can leverage all three in the
following example
-.. include:: ../code/federatedTutorial_part3.py
+.. include:: ../code/guide/federated/federatedTutorial_part3.py
:start-line: 20
:code: python
diff --git a/src/main/python/tests/README.md b/src/main/python/tests/README.md
index feef8bf96c..bf6b6e35f0 100644
--- a/src/main/python/tests/README.md
+++ b/src/main/python/tests/README.md
@@ -41,4 +41,4 @@ To execute the Federated Tests, use:
Federated experiments are a little different from the rest, since they require
some setup in form of federated workers.
-See more details in the [script](federated/runFedTest.sh)
\ No newline at end of file
+See more details in the [script](federated/runFedTest.sh)
diff --git a/src/main/python/docs/source/code/federatedTutorial_part1.py
b/src/main/python/tests/docs/__init__.py
similarity index 75%
rename from src/main/python/docs/source/code/federatedTutorial_part1.py
rename to src/main/python/tests/docs/__init__.py
index 7795c4b1eb..e66abb4646 100644
--- a/src/main/python/docs/source/code/federatedTutorial_part1.py
+++ b/src/main/python/tests/docs/__init__.py
@@ -18,12 +18,3 @@
# under the License.
#
# -------------------------------------------------------------
-# Python
-import numpy as np
-import os
-if not os.path.isdir("temp"):
- os.mkdir("temp")
-a = np.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-np.savetxt("temp/test.csv", a, delimiter=",")
-with open("temp/test.csv.mtd", "w") as mtd:
- mtd.write('{ "format":"csv", "header":false, "rows":3, "cols":3 }')
diff --git a/src/main/python/tests/federated/test_federated_tutorial.py
b/src/main/python/tests/docs/test_simple_example.py
similarity index 66%
copy from src/main/python/tests/federated/test_federated_tutorial.py
copy to src/main/python/tests/docs/test_simple_example.py
index d9efdd6813..37118d5583 100644
--- a/src/main/python/tests/federated/test_federated_tutorial.py
+++ b/src/main/python/tests/docs/test_simple_example.py
@@ -19,21 +19,18 @@
#
# -------------------------------------------------------------
-import shutil
import unittest
-class TestFederatedAggFn(unittest.TestCase):
+class TestSimpleExample(unittest.TestCase):
+ def test_multiply(self):
+ import docs.source.code.getting_started.simpleExamples.multiply
- @classmethod
- def tearDownClass(cls):
- shutil.rmtree("temp")
+ def test_multiplyMatrix(self):
+ import docs.source.code.getting_started.simpleExamples.multiplyMatrix
- def test_part1(self):
- import docs.source.code.federatedTutorial_part1
+ def test_l2svm(self):
+ import docs.source.code.getting_started.simpleExamples.l2svm
- def test_part2(self):
- import docs.source.code.federatedTutorial_part2
-
- def test_part3(self):
- import docs.source.code.federatedTutorial_part3
+ def test_l2svm_internal(self):
+ import docs.source.code.getting_started.simpleExamples.l2svm_internal
diff --git a/src/main/python/tests/federated/test_federated_tutorial.py
b/src/main/python/tests/federated/test_federated_tutorial.py
index d9efdd6813..6ef7caab05 100644
--- a/src/main/python/tests/federated/test_federated_tutorial.py
+++ b/src/main/python/tests/federated/test_federated_tutorial.py
@@ -30,10 +30,10 @@ class TestFederatedAggFn(unittest.TestCase):
shutil.rmtree("temp")
def test_part1(self):
- import docs.source.code.federatedTutorial_part1
+ import docs.source.code.guide.federated.federatedTutorial_part1
def test_part2(self):
- import docs.source.code.federatedTutorial_part2
+ import docs.source.code.guide.federated.federatedTutorial_part2
def test_part3(self):
- import docs.source.code.federatedTutorial_part3
+ import docs.source.code.guide.federated.federatedTutorial_part3