This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 942a3a2a34 [SYSTEMDS-3185] Docs and cleanup multi-tenant federated
learning
942a3a2a34 is described below
commit 942a3a2a349cee2fcf3591e7850051538cc41fef
Author: ywcb00 <[email protected]>
AuthorDate: Mon Jun 6 00:33:44 2022 +0200
[SYSTEMDS-3185] Docs and cleanup multi-tenant federated learning
Closes #1627.
---
docs/api/python/sources/guide/federated.rst.txt | 26 ++++++++++++++++++++++
.../runtime/controlprogram/LocalVariableMap.java | 7 +++---
src/main/python/docs/source/guide/federated.rst | 26 ++++++++++++++++++++++
.../multitenant/FederatedReuseSlicesTest.java | 4 +++-
4 files changed, 58 insertions(+), 5 deletions(-)
diff --git a/docs/api/python/sources/guide/federated.rst.txt
b/docs/api/python/sources/guide/federated.rst.txt
index 6afadf2393..4afafa070d 100644
--- a/docs/api/python/sources/guide/federated.rst.txt
+++ b/docs/api/python/sources/guide/federated.rst.txt
@@ -99,3 +99,29 @@ The print should look like
that you have:
a csv file, mtd file, and SystemDS Environment is set correctly.
+
+Multi-tenant Federated Learning
+-------------------------------
+
+SystemDS supports Multi-tenant Federated Learning, meaning that multiple
+coordinators learn on shared federated workers. From another perspective,
+the federated worker allows multiple coordinators to perform model training
+simultaneously using the data from the respective federated site. This
+approach enables the worker to operate in a server-like mode, providing
+multiple tenants with the ability to learn on the federated data at the same
+time. Tenant isolation ensures that tenant-specific intermediate results are
+only accessible by the respective tenant.
+
+Limitations
+~~~~~~~~~~~
+
+Since the coordinators are differentiated by their IP address in combination
+with their process ID, the worker is not able to isolate coordinators which
+share the same IP address and the same process ID. This occurs, for example,
+when two coordinators are running behind a proxy (same IP address), where
+both coordinators coincidentally have the same process ID.
+
+A second limitation is showing up in networks using the Dynamic Host Protocol
+(DHCP). Since the federated worker identifies the coordinator based on the
+IP address, the worker does not re-identify the coordinator when its IP address
+has changed, i.e., when DHCP renews its IP address.
diff --git
a/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java
b/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java
index 3f92c9e096..bac6759ca5 100644
---
a/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java
+++
b/src/main/java/org/apache/sysds/runtime/controlprogram/LocalVariableMap.java
@@ -19,7 +19,6 @@
package org.apache.sysds.runtime.controlprogram;
-import java.util.concurrent.ConcurrentHashMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@@ -45,19 +44,19 @@ public class LocalVariableMap implements Cloneable
private static final IDSequence _seq = new IDSequence();
//variable map data and id
- private final ConcurrentHashMap<String, Data> localMap;
+ private final HashMap<String, Data> localMap;
private final long localID;
//optional set of registered outputs
private HashSet<String> outputs = null;
public LocalVariableMap() {
- localMap = new ConcurrentHashMap<>();
+ localMap = new HashMap<>();
localID = _seq.getNextID();
}
public LocalVariableMap(LocalVariableMap vars) {
- localMap = new ConcurrentHashMap<>(vars.localMap);
+ localMap = new HashMap<>(vars.localMap);
localID = _seq.getNextID();
}
diff --git a/src/main/python/docs/source/guide/federated.rst
b/src/main/python/docs/source/guide/federated.rst
index 6afadf2393..4afafa070d 100644
--- a/src/main/python/docs/source/guide/federated.rst
+++ b/src/main/python/docs/source/guide/federated.rst
@@ -99,3 +99,29 @@ The print should look like
that you have:
a csv file, mtd file, and SystemDS Environment is set correctly.
+
+Multi-tenant Federated Learning
+-------------------------------
+
+SystemDS supports Multi-tenant Federated Learning, meaning that multiple
+coordinators learn on shared federated workers. From another perspective,
+the federated worker allows multiple coordinators to perform model training
+simultaneously using the data from the respective federated site. This
+approach enables the worker to operate in a server-like mode, providing
+multiple tenants with the ability to learn on the federated data at the same
+time. Tenant isolation ensures that tenant-specific intermediate results are
+only accessible by the respective tenant.
+
+Limitations
+~~~~~~~~~~~
+
+Since the coordinators are differentiated by their IP address in combination
+with their process ID, the worker is not able to isolate coordinators which
+share the same IP address and the same process ID. This occurs, for example,
+when two coordinators are running behind a proxy (same IP address), where
+both coordinators coincidentally have the same process ID.
+
+A second limitation is showing up in networks using the Dynamic Host Protocol
+(DHCP). Since the federated worker identifies the coordinator based on the
+IP address, the worker does not re-identify the coordinator when its IP address
+has changed, i.e., when DHCP renews its IP address.
diff --git
a/src/test/java/org/apache/sysds/test/functions/federated/multitenant/FederatedReuseSlicesTest.java
b/src/test/java/org/apache/sysds/test/functions/federated/multitenant/FederatedReuseSlicesTest.java
index 9f15f49d22..cdae6ac42e 100644
---
a/src/test/java/org/apache/sysds/test/functions/federated/multitenant/FederatedReuseSlicesTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/federated/multitenant/FederatedReuseSlicesTest.java
@@ -247,9 +247,11 @@ public class FederatedReuseSlicesTest extends
MultiTenantTestBase {
boolean retVal = false;
int multiplier = 1;
int numInst = -1;
+ int resSerial = 0; // serialized responses written to lineage
cache
switch(opType) {
case EW_MULT:
numInst = 1;
+ resSerial = 1;
break;
case RM_EMPTY:
numInst = 1;
@@ -262,7 +264,7 @@ public class FederatedReuseSlicesTest extends
MultiTenantTestBase {
if(coordIX <= 1) {
retVal = outputLog.contains(LINCACHE_MULTILVL + "0/");
retVal &= outputLog.contains(LINCACHE_WRITES +
Integer.toString(
- (((coordIX == 0) ? 1 : 0) + numInst) // read +
instructions
+ (((coordIX == 0) ? 1 : 0) + numInst +
resSerial) // read + instructions + serialization
* workerProcesses.size()) + "/");
}
else {