This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 942a3a2a34 [SYSTEMDS-3185] Docs and cleanup multi-tenant federated 
learning
942a3a2a34 is described below

commit 942a3a2a349cee2fcf3591e7850051538cc41fef
Author: ywcb00 <[email protected]>
AuthorDate: Mon Jun 6 00:33:44 2022 +0200

    [SYSTEMDS-3185] Docs and cleanup multi-tenant federated learning
    
    Closes #1627.
---
 docs/api/python/sources/guide/federated.rst.txt    | 26 ++++++++++++++++++++++
 .../runtime/controlprogram/LocalVariableMap.java   |  7 +++---
 src/main/python/docs/source/guide/federated.rst    | 26 ++++++++++++++++++++++
 .../multitenant/FederatedReuseSlicesTest.java      |  4 +++-
 4 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/docs/api/python/sources/guide/federated.rst.txt 
b/docs/api/python/sources/guide/federated.rst.txt
index 6afadf2393..4afafa070d 100644
--- a/docs/api/python/sources/guide/federated.rst.txt
+++ b/docs/api/python/sources/guide/federated.rst.txt
@@ -99,3 +99,29 @@ The print should look like
   that you have:
   
   a csv file, mtd file, and SystemDS Environment is set correctly.
+
+Multi-tenant Federated Learning
+-------------------------------
+
+SystemDS supports Multi-tenant Federated Learning, meaning that multiple
+coordinators learn on shared federated workers. From another perspective,
+the federated worker allows multiple coordinators to perform model training
+simultaneously using the data from the respective federated site. This
+approach enables the worker to operate in a server-like mode, providing
+multiple tenants with the ability to learn on the federated data at the same
+time. Tenant isolation ensures that tenant-specific intermediate results are
+only accessible by the respective tenant.
+
+Limitations
+~~~~~~~~~~~
+
+Since the coordinators are differentiated by their IP address in combination
+with their process ID, the worker is not able to isolate coordinators which
+share the same IP address and the same process ID. This occurs, for example,
+when two coordinators are running behind a proxy (same IP address), where
+both coordinators coincidentally have the same process ID.
+
+A second limitation is showing up in networks using the Dynamic Host Protocol
+(DHCP). Since the federated worker identifies the coordinator based on the
+IP address, the worker does not re-identify the coordinator when its IP address
+has changed, i.e., when DHCP renews its IP address.
diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java 
b/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java
index 3f92c9e096..bac6759ca5 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java
@@ -19,7 +19,6 @@
 
 package org.apache.sysds.runtime.controlprogram;
 
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
@@ -45,19 +44,19 @@ public class LocalVariableMap implements Cloneable
        private static final IDSequence _seq = new IDSequence();
        
        //variable map data and id
-       private final ConcurrentHashMap<String, Data> localMap;
+       private final HashMap<String, Data> localMap;
        private final long localID;
        
        //optional set of registered outputs
        private HashSet<String> outputs = null;
        
        public LocalVariableMap() {
-               localMap = new ConcurrentHashMap<>();
+               localMap = new HashMap<>();
                localID = _seq.getNextID();
        }
        
        public LocalVariableMap(LocalVariableMap vars) {
-               localMap = new ConcurrentHashMap<>(vars.localMap);
+               localMap = new HashMap<>(vars.localMap);
                localID = _seq.getNextID();
        }
 
diff --git a/src/main/python/docs/source/guide/federated.rst 
b/src/main/python/docs/source/guide/federated.rst
index 6afadf2393..4afafa070d 100644
--- a/src/main/python/docs/source/guide/federated.rst
+++ b/src/main/python/docs/source/guide/federated.rst
@@ -99,3 +99,29 @@ The print should look like
   that you have:
   
   a csv file, mtd file, and SystemDS Environment is set correctly.
+
+Multi-tenant Federated Learning
+-------------------------------
+
+SystemDS supports Multi-tenant Federated Learning, meaning that multiple
+coordinators learn on shared federated workers. From another perspective,
+the federated worker allows multiple coordinators to perform model training
+simultaneously using the data from the respective federated site. This
+approach enables the worker to operate in a server-like mode, providing
+multiple tenants with the ability to learn on the federated data at the same
+time. Tenant isolation ensures that tenant-specific intermediate results are
+only accessible by the respective tenant.
+
+Limitations
+~~~~~~~~~~~
+
+Since the coordinators are differentiated by their IP address in combination
+with their process ID, the worker is not able to isolate coordinators which
+share the same IP address and the same process ID. This occurs, for example,
+when two coordinators are running behind a proxy (same IP address), where
+both coordinators coincidentally have the same process ID.
+
+A second limitation is showing up in networks using the Dynamic Host Protocol
+(DHCP). Since the federated worker identifies the coordinator based on the
+IP address, the worker does not re-identify the coordinator when its IP address
+has changed, i.e., when DHCP renews its IP address.
diff --git 
a/src/test/java/org/apache/sysds/test/functions/federated/multitenant/FederatedReuseSlicesTest.java
 
b/src/test/java/org/apache/sysds/test/functions/federated/multitenant/FederatedReuseSlicesTest.java
index 9f15f49d22..cdae6ac42e 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/federated/multitenant/FederatedReuseSlicesTest.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/federated/multitenant/FederatedReuseSlicesTest.java
@@ -247,9 +247,11 @@ public class FederatedReuseSlicesTest extends 
MultiTenantTestBase {
                boolean retVal = false;
                int multiplier = 1;
                int numInst = -1;
+               int resSerial = 0; // serialized responses written to lineage 
cache
                switch(opType) {
                        case EW_MULT:
                                numInst = 1;
+                               resSerial = 1;
                                break;
                        case RM_EMPTY:
                                numInst = 1;
@@ -262,7 +264,7 @@ public class FederatedReuseSlicesTest extends 
MultiTenantTestBase {
                if(coordIX <= 1) {
                        retVal = outputLog.contains(LINCACHE_MULTILVL + "0/");
                        retVal &= outputLog.contains(LINCACHE_WRITES + 
Integer.toString(
-                               (((coordIX == 0) ? 1 : 0) + numInst) // read + 
instructions
+                               (((coordIX == 0) ? 1 : 0) + numInst + 
resSerial) // read + instructions + serialization
                                * workerProcesses.size()) + "/");
                }
                else {

Reply via email to