This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit 543508af428899816d0dcb3c7db5dca24eb358fa
Author: Matthias Boehm <[email protected]>
AuthorDate: Fri Jun 30 16:47:47 2023 +0200

    [SYSTEMDS-3583] Fix eager broadcast block cleanup (spurious NPE)
    
    This patch fixes the cleanup logic during broadcasting, where after the
    in-memory objects are serialized, the blocks are cleared to reduce the
    memory requirements at the driver. However, Spark only lazily ships the
    serialized broadcasts to the executors, and locally they are only
    cached via weak references, requiring the ability to re-serialize on
    memory pressure.
---
 .../sysds/runtime/instructions/spark/data/PartitionedBlock.java   | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/data/PartitionedBlock.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/data/PartitionedBlock.java
index b6e573f154..26c6a7845d 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/data/PartitionedBlock.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/data/PartitionedBlock.java
@@ -224,7 +224,13 @@ public class PartitionedBlock<T extends CacheBlock> 
implements Externalizable
        }
        
        public void clearBlocks() {
-               _partBlocks = null;
+               //note: a clear of blocks is invalid here because although Spark
+               //serializes the blocks on broadcast(), they are lazily shipped
+               //to the executors. Since the serialized version is only stored
+               //on a weak reference, the original blocks are still necessary
+               //in case a garbage collection happens before the actual 
broadcast.
+               
+               //_partBlocks = null;
        }
 
        /**

Reply via email to