This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit 469d15856878a3de56b5da98dfe8513a9461ccd1
Author: Matthias Boehm <[email protected]>
AuthorDate: Thu Aug 4 23:33:12 2022 +0200

    [SYSTEMDS-3414] Fix allocation of large, multi-array dense matrices
    
    This patch fixes the allocation logic for large dense matrix blocks
    which internally use multiple arrays of size <= int_max in a row-aligned
    manner. While this works well if the row-alignment causes enough head
    room to max_integer, the actual maximum allocation is platform-specific.
    For example, on the following scenario it failed with
    OutOfMemoryError: Requested array size exceeds VM limit:
    
    INT_MAX:   2147483648
    ALLOCATED: 2147483646 --> error despite <= int_max
    
    We now leave explicit head room of 8 cells for platform-specific padding
    to prevent such errors. The same bound is also applied to the byte
    buffers in the lazy write cache (buffer pool).
---
 .../sysds/runtime/controlprogram/caching/ByteBuffer.java       |  5 +++--
 .../java/org/apache/sysds/runtime/data/DenseBlockLBool.java    |  2 +-
 .../java/org/apache/sysds/runtime/data/DenseBlockLDRB.java     | 10 ++++++++--
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/ByteBuffer.java 
b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/ByteBuffer.java
index a0c699c268..0466035998 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/ByteBuffer.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/ByteBuffer.java
@@ -25,6 +25,7 @@ import java.io.DataInputStream;
 import java.io.DataOutput;
 import java.io.IOException;
 
+import org.apache.sysds.runtime.data.DenseBlockLDRB;
 import org.apache.sysds.runtime.matrix.data.FrameBlock;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.util.LocalFileUtils;
@@ -36,7 +37,7 @@ import org.apache.sysds.runtime.util.LocalFileUtils;
  */
 public class ByteBuffer
 {
-       private volatile boolean _serialized;   
+       private volatile boolean _serialized;
        private volatile boolean _shallow;
        private volatile boolean _matrix;
        private final long _size;
@@ -167,7 +168,7 @@ public class ByteBuffer
                if( !cb.isShallowSerialize(true) ) { //SPARSE matrix blocks
                        // since cache blocks are serialized into a byte 
representation
                        // the buffer buffer can hold at most 2GB in size 
-                       return ( size <= Integer.MAX_VALUE );
+                       return ( size <= DenseBlockLDRB.MAX_ALLOC );
                }
                else {//DENSE/SPARSE matrix / frame blocks
                        // for dense and under special conditions also sparse 
matrix blocks 
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlockLBool.java 
b/src/main/java/org/apache/sysds/runtime/data/DenseBlockLBool.java
index 705f894241..1282d98bf2 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockLBool.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockLBool.java
@@ -74,7 +74,7 @@ public class DenseBlockLBool extends DenseBlockLDRB
                // Special implementation to make computeNnz fast if complete 
block is read
                boolean bv = v != 0;
                long dataLength = (long) rlen * odims[0];
-               int newBlockSize = Math.min(rlen, Integer.MAX_VALUE / odims[0]);
+               int newBlockSize = Math.min(rlen, MAX_ALLOC / odims[0]);
                int numBlocks = UtilFunctions.toInt(Math.ceil((double) rlen / 
newBlockSize));
                if (_blen == newBlockSize && dataLength <= capacity()) {
                        for (int i = 0; i < numBlocks; i++) {
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlockLDRB.java 
b/src/main/java/org/apache/sysds/runtime/data/DenseBlockLDRB.java
index 339dbb5069..7b23c4c4a9 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockLDRB.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockLDRB.java
@@ -33,6 +33,12 @@ public abstract class DenseBlockLDRB extends DenseBlock
 {
        private static final long serialVersionUID = -7519435549328146356L;
 
+       // On same platforms, allocating arrays very close to INT_MAX runs into
+       // a "java.lang.OutOfMemoryError: Requested array size exceeds VM limit"
+       // Also, this normally does not happen because we allocate row-aligned
+       // chunks, to be on the safe side, we keep a margin of 8 for padding.
+       public static int MAX_ALLOC = Integer.MAX_VALUE - 8;
+       
        protected int _blen;
 
        protected DenseBlockLDRB(int[] dims) {
@@ -49,7 +55,7 @@ public abstract class DenseBlockLDRB extends DenseBlock
 
        @Override
        public int blockSize() {
-           return _blen;
+               return _blen;
        }
 
        @Override
@@ -60,7 +66,7 @@ public abstract class DenseBlockLDRB extends DenseBlock
        @Override
        public void reset(int rlen, int[] odims, double v) {
                long dataLength = (long) rlen * odims[0];
-               int newBlockSize = Math.min(rlen, Integer.MAX_VALUE / odims[0]);
+               int newBlockSize = Math.min(rlen, MAX_ALLOC / odims[0]);
                int numBlocks = UtilFunctions.toInt(Math.ceil((double) rlen / 
newBlockSize));
                if (_blen == newBlockSize && dataLength <= capacity()) {
                        IntStream.range(0, numBlocks)

Reply via email to