This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new bc1e438 [SYSTEMDS-3195] Python federated tutorial test bc1e438 is described below commit bc1e4388cb0100a26d15b9d56a86dcba7b3d0454 Author: baunsgaard <baunsga...@tugraz.at> AuthorDate: Tue Nov 2 13:22:54 2021 +0100 [SYSTEMDS-3195] Python federated tutorial test --- .../docs/source/code/federatedTutorial_part1.py | 29 +++++++++ .../docs/source/code/federatedTutorial_part2.py | 39 +++++++++++ .../docs/source/code/federatedTutorial_part3.py | 47 ++++++++++++++ .../source/code/federatedTutorial_part3_old.py | 44 +++++++++++++ .../source/code/federatedTutorial_part3_old2.py | 55 ++++++++++++++++ src/main/python/docs/source/guide/federated.rst | 75 +++++----------------- src/main/python/tests/federated/runFedTest.sh | 6 ++ .../tests/federated/test_federated_tutorial.py | 39 +++++++++++ 8 files changed, 274 insertions(+), 60 deletions(-) diff --git a/src/main/python/docs/source/code/federatedTutorial_part1.py b/src/main/python/docs/source/code/federatedTutorial_part1.py new file mode 100644 index 0000000..7795c4b --- /dev/null +++ b/src/main/python/docs/source/code/federatedTutorial_part1.py @@ -0,0 +1,29 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- +# Python +import numpy as np +import os +if not os.path.isdir("temp"): + os.mkdir("temp") +a = np.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) +np.savetxt("temp/test.csv", a, delimiter=",") +with open("temp/test.csv.mtd", "w") as mtd: + mtd.write('{ "format":"csv", "header":false, "rows":3, "cols":3 }') diff --git a/src/main/python/docs/source/code/federatedTutorial_part2.py b/src/main/python/docs/source/code/federatedTutorial_part2.py new file mode 100644 index 0000000..ac9c0bf --- /dev/null +++ b/src/main/python/docs/source/code/federatedTutorial_part2.py @@ -0,0 +1,39 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- +# Python +import numpy as np +from systemds.context import SystemDSContext + +# Create a federated matrix +# Indicate the dimensions of the data: +# Here the first list in the tuple is the top left Coordinate, +# and the second the bottom left coordinate. +# It is ordered as [col,row]. +dims = ([0, 0], [3, 3]) + +# Specify the address + file path from worker: +address = "localhost:8001/temp/test.csv" + +with SystemDSContext() as sds: + fed_a = sds.federated([address], [dims]) + # Sum the federated matrix and call compute to execute + print(fed_a.sum().compute()) + # Result should be 45. diff --git a/src/main/python/docs/source/code/federatedTutorial_part3.py b/src/main/python/docs/source/code/federatedTutorial_part3.py new file mode 100644 index 0000000..1d1125a --- /dev/null +++ b/src/main/python/docs/source/code/federatedTutorial_part3.py @@ -0,0 +1,47 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- +# Python +import numpy as np +from systemds.context import SystemDSContext + +addr1 = "localhost:8001/temp/test.csv" +addr2 = "localhost:8002/temp/test.csv" +addr3 = "localhost:8003/temp/test.csv" + +# Create a federated matrix using two federated environments +# Note that the two federated matrices are stacked on top of each other + +with SystemDSContext() as sds: + # federated data on three locations + fed = sds.federated([addr1, addr2, addr3], [ + ([0, 0], [3, 3]), + ([3, 0], [6, 3]), + ([6, 0], [9, 3])]) + # local matrix to multiply with + loc = sds.from_numpy(np.array([ + [1,2,3,4,5,6,7,8,9], + [1,2,3,4,5,6,7,8,9], + [1,2,3,4,5,6,7,8,9] + ])) + # Multiply local and federated + ret = loc @ fed + # execute the lazy script and print + print(ret.compute()) diff --git a/src/main/python/docs/source/code/federatedTutorial_part3_old.py b/src/main/python/docs/source/code/federatedTutorial_part3_old.py new file mode 100644 index 0000000..eaa3f1d --- /dev/null +++ b/src/main/python/docs/source/code/federatedTutorial_part3_old.py @@ -0,0 +1,44 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- +# Python +import numpy as np +from systemds.context import SystemDSContext + +addr1 = "localhost:8001/temp/test.csv" +addr2 = "localhost:8002/temp/test.csv" +addr3 = "localhost:8003/temp/test.csv" + +# Create a federated matrix using two federated environments +# Note that the two federated matrices are stacked on top of each other + +with SystemDSContext() as sds: + fed_a = sds.federated( + [addr1, addr2], + [([0, 0], [3, 3]), ([0, 3], [3, 6])]) + + fed_b = sds.federated( + [addr1, addr3], + [([0, 0], [3, 3]), ([0, 3], [3, 6])]) + + # Multiply, compute and print. + res = (fed_a * fed_b).compute() + +print(res) diff --git a/src/main/python/docs/source/code/federatedTutorial_part3_old2.py b/src/main/python/docs/source/code/federatedTutorial_part3_old2.py new file mode 100644 index 0000000..a6bf94f --- /dev/null +++ b/src/main/python/docs/source/code/federatedTutorial_part3_old2.py @@ -0,0 +1,55 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- +# Python +import numpy as np +from systemds.context import SystemDSContext + +addr1 = "localhost:8001/temp/test.csv" +addr2 = "localhost:8002/temp/test.csv" +addr3 = "localhost:8003/temp/test.csv" + +# Create a federated matrix using two federated environments +# Note that the two federated matrices are stacked on top of each other + +with SystemDSContext() as sds: + + fed_a = sds.federated([addr1],[([0, 0], [3, 3])]) + fed_b = sds.federated([addr2],[([0, 0], [3, 3])]) + # fed_c = sds.federated([addr3],[([0, 0], [3, 3])]) + + np_array = np.array([[1,2,3],[4,5,6],[7,8,9]]) + + loc_a = sds.from_numpy(np_array) + loc_b = sds.from_numpy(np_array) + + fed_res = fed_a @ fed_b + loc_res = loc_a @ loc_b + + hybrid_res_1 = fed_a @ loc_b + hybrid_res_2 = loc_a @ fed_b + + # compute and print + print(fed_a.compute()) + print(fed_b.compute()) + print(fed_res.compute(verbose=True)) + print(loc_res.compute(verbose=True)) + print(hybrid_res_1.compute()) + print(hybrid_res_1.compute()) diff --git a/src/main/python/docs/source/guide/federated.rst b/src/main/python/docs/source/guide/federated.rst index 051ad03..fefea54 100644 --- a/src/main/python/docs/source/guide/federated.rst +++ b/src/main/python/docs/source/guide/federated.rst @@ -37,7 +37,7 @@ A simple guide to do this is in the SystemDS Repository_. If that is setup correctly simply start a worker using the following command. Here the ``8001`` refer to the port used by the worker. -.. code-block:: python +.. code-block:: systemds WORKER 8001 @@ -47,45 +47,22 @@ Simple Aggregation Example In this example we use a single federated worker, and aggregate the sum of its data. First we need to create some data for our federated worker to use. -In this example we simply use Numpy to create a ``test.csv`` file - -.. code-block:: python - - # Import numpy - import numpy as np - a = np.asarray([[1,2,3], [4,5,6], [7,8,9]]) - np.savetxt("temp/test.csv", a, delimiter=",") +In this example we simply use Numpy to create a ``test.csv`` file. Currently we also require a metadata file for the federated worker. This should be located next to the ``test.csv`` file called ``test.csv.mtd``. -To make this simply execute the following:: +To make both the data and metadata simply execute the following - echo '{ "format":"csv", "header":false, "rows":3, "cols":3 }' > temp/test.csv.mtd +.. include:: ../code/federatedTutorial_part1.py + :start-line: 20 + :code: python -After creating our data we the federated worker becomes able to execute federated instructions. +After creating our data the federated worker becomes able to execute federated instructions. The aggregated sum using federated instructions in python SystemDS is done as follows -.. code-block:: python - - # Import numpy and SystemDS - import numpy as np - from systemds.context import SystemDSContext - - # Create a federated matrix - ## Indicate the dimensions of the data: - ### Here the first list in the tuple is the top left Coordinate, - ### and the second the bottom left coordinate. - ### It is ordered as [col,row]. - dims = ([0,0], [3,3]) - - ## Specify the address + file path from worker: - address = "localhost:8001/temp/test.csv" - - with SystemDSContext() as sds: - fed_a = sds.federated([address], [dims]) - # Sum the federated matrix and call compute to execute - print(fed_a.sum().compute()) - # Result should be 45. +.. include:: ../code/federatedTutorial_part2.py + :start-line: 20 + :code: python Multiple Federated Environments ------------------------------- @@ -96,7 +73,7 @@ Using the data created from the last example we can simulate multiple federated workers by starting multiple ones on different ports. Start with 3 different terminals, and run one federated environment in each. -.. code-block:: python +.. code-block:: systemds WORKER 8001 systemds WORKER 8002 @@ -104,35 +81,13 @@ Start with 3 different terminals, and run one federated environment in each. Once all three workers are up and running we can leverage all three in the following example -.. code-block:: python - - import numpy as np - from systemds.context import SystemDSContext - - addr1 = "localhost:8001/temp/test.csv" - addr2 = "localhost:8002/temp/test.csv" - addr3 = "localhost:8003/temp/test.csv" - - # Create a federated matrix using two federated environments - # Note that the two federated matrices are stacked on top of each other - - with SystemDSContext() as sds: - fed_a = sds.federated( - [addr1, addr2], - [([0,0], [3,3]), ([0,3], [3,6])]) - - fed_b = sds.federated( - [addr1, addr3], - [([0,0], [3,3]), ([0,3], [3,6])]) - - # Multiply, compute and print. - res = (fed_a * fed_b).compute() - - print(res) +.. include:: ../code/federatedTutorial_part3.py + :start-line: 20 + :code: python The print should look like -.. code-block:: python +.. code-block:: [[ 1. 4. 9. 1. 4. 9.] [16. 25. 36. 16. 25. 36.] diff --git a/src/main/python/tests/federated/runFedTest.sh b/src/main/python/tests/federated/runFedTest.sh index b34ca99..90e24a5 100755 --- a/src/main/python/tests/federated/runFedTest.sh +++ b/src/main/python/tests/federated/runFedTest.sh @@ -34,6 +34,7 @@ mkdir -p $workerdir mkdir -p $outputdir w1_Output="$workerdir/w1" w2_Output="$workerdir/w2" +w3_Output="$workerdir/w3" log="$outputdir/out.log" # Make the workers start quietly and pipe their output to a file to print later @@ -42,12 +43,15 @@ systemds WORKER 8001 >$w1_Output 2>&1 & Fed1=$! systemds WORKER 8002 >$w2_Output 2>&1 & Fed2=$! +systemds WORKER 8003 >$w3_Output 2>&1 & +Fed3=$! echo "Starting workers" && sleep 3 && echo "Starting tests" # Run test python -m unittest discover -s tests/federated -p 'test_*.py' $1 >$log 2>&1 pkill -P $Fed1 pkill -P $Fed2 +pkill -P $Fed3 # Print output echo -e "\n---------------\nWorkers Output:\n---------------" @@ -55,6 +59,8 @@ echo -e "\nWorker 1:" cat $w1_Output echo -e "\nWorker 2:" cat $w2_Output +echo -e "\nWorker 3:" +cat $w3_Output echo -e "\n------------\nTest output:\n------------" cat $log grepvals="$(tail -n 10 $log | grep OK)" diff --git a/src/main/python/tests/federated/test_federated_tutorial.py b/src/main/python/tests/federated/test_federated_tutorial.py new file mode 100644 index 0000000..d9efdd6 --- /dev/null +++ b/src/main/python/tests/federated/test_federated_tutorial.py @@ -0,0 +1,39 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +import shutil +import unittest + + +class TestFederatedAggFn(unittest.TestCase): + + @classmethod + def tearDownClass(cls): + shutil.rmtree("temp") + + def test_part1(self): + import docs.source.code.federatedTutorial_part1 + + def test_part2(self): + import docs.source.code.federatedTutorial_part2 + + def test_part3(self): + import docs.source.code.federatedTutorial_part3