This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 5ac091a20b [MINOR] Increase Memory Estimate for Frames
5ac091a20b is described below

commit 5ac091a20bc9e8af7886ab2c45219e6c01e1d2ea
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Sat Dec 28 14:05:36 2024 +0100

    [MINOR] Increase Memory Estimate for Frames
    
    This commit increases the default estimate of frame size.
    Previously, frames were estimated similarly to Matrices.
    The wrong estimate leads to problems on frames of > Integer rows.
    To improve it, this commit defaults to 8 character strings on all cells.
    In an unread matrix.
    
    Since there is no way of knowing if the input Frame contains longer
    strings, it is still a subpar estimate. However,
    it is an improvement overestimating everything as a dense double Matrix.
    
    (The change happened because I encountered very incorrect estimates in 
BEWARE)
    
    Closes #2158
    
    Signed-off-by: Sebastian Baunsgaard <[email protected]>
---
 src/main/java/org/apache/sysds/hops/DataOp.java    |  9 ++++-
 .../java/org/apache/sysds/hops/OptimizerUtils.java | 13 +++++++
 .../test/component/misc/MemoryEstimateTest.java    |  1 -
 .../test/component/misc/OptimizerUtilsTest.java    | 43 ++++++++++++++++++++++
 4 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/src/main/java/org/apache/sysds/hops/DataOp.java 
b/src/main/java/org/apache/sysds/hops/DataOp.java
index 82e5ecbbad..7be61f4129 100644
--- a/src/main/java/org/apache/sysds/hops/DataOp.java
+++ b/src/main/java/org/apache/sysds/hops/DataOp.java
@@ -359,8 +359,8 @@ public class DataOp extends Hop {
        protected double computeOutputMemEstimate( long dim1, long dim2, long 
nnz )
        {               
                double ret = 0;
-               
-               if ( getDataType() == DataType.SCALAR ) 
+               final DataType dt = getDataType();
+               if ( dt == DataType.SCALAR ) 
                {
                        switch( getValueType() ) 
                        {
@@ -379,6 +379,11 @@ public class DataOp extends Hop {
                                        ret = 0;
                        }
                }
+               else if(dt == DataType.FRAME) {
+                       if(_op == OpOpData.PERSISTENTREAD || _op == 
OpOpData.TRANSIENTREAD) {
+                               ret = 
OptimizerUtils.estimateSizeExactFrame(dim1, dim2);
+                       }
+               }
                else //MATRIX / FRAME
                {
                        if(   _op == OpOpData.PERSISTENTREAD 
diff --git a/src/main/java/org/apache/sysds/hops/OptimizerUtils.java 
b/src/main/java/org/apache/sysds/hops/OptimizerUtils.java
index 6338ff7a70..a3161c5723 100644
--- a/src/main/java/org/apache/sysds/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysds/hops/OptimizerUtils.java
@@ -64,6 +64,7 @@ import org.apache.sysds.runtime.meta.MatrixCharacteristics;
 import org.apache.sysds.runtime.util.IndexRange;
 import org.apache.sysds.runtime.util.UtilFunctions;
 import org.apache.sysds.utils.stats.InfrastructureAnalyzer;
+import org.apache.sysds.utils.MemoryEstimates;
 
 public class OptimizerUtils 
 {
@@ -788,6 +789,18 @@ public class OptimizerUtils
                double sp = getSparsity(nrows, ncols, nnz);
                return estimateSizeExactSparsity(nrows, ncols, sp);
        }
+
+
+       public static long estimateSizeExactFrame(long nRows, long nCols){
+               // Currently we do not support frames larger than INT. 
+               // Therefore, we estimate their size to be extremely large.
+               // The large size force spark operations.
+               if(nRows > Integer.MAX_VALUE)
+                       return Long.MAX_VALUE;
+               
+               // assuming String arrays and on average 8 characters per value.
+               return (long)MemoryEstimates.stringArrayCost((int)nRows, 8) * 
nCols;
+       }
        
        /**
         * Estimates the footprint (in bytes) for an in-memory representation 
of a
diff --git 
a/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java 
b/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
index 8c8e31535b..d68c30f836 100644
--- a/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
+++ b/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
@@ -87,7 +87,6 @@ public class MemoryEstimateTest {
                                
assertEquals(MemoryEstimates.doubleArrayCost(length), measure(arrayDouble), 
0.2);
                                break;
                        default:
-                               
System.out.println(arrayToMeasure.getClass().getSimpleName());
                                throw new 
NotImplementedException(arrayToMeasure + " not implemented");
                }
        }
diff --git 
a/src/test/java/org/apache/sysds/test/component/misc/OptimizerUtilsTest.java 
b/src/test/java/org/apache/sysds/test/component/misc/OptimizerUtilsTest.java
new file mode 100644
index 0000000000..16e9b2c27b
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/component/misc/OptimizerUtilsTest.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.misc;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.sysds.hops.OptimizerUtils;
+import org.junit.Test;
+
+public class OptimizerUtilsTest {
+
+       @Test
+       public void estimateFrameSize() {
+               Long size = OptimizerUtils.estimateSizeExactFrame(10, 10);
+               assertTrue(size > 10 * 10);
+       }
+
+       @Test
+       public void estimateFrameSizeMoreRowsThanInt() {
+               // Currently we do not support frames larger than INT. 
Therefore we estimate their size to be extremely large.
+               // The large size force spark operations
+               Long size = 
OptimizerUtils.estimateSizeExactFrame(Integer.MAX_VALUE + 1L, 10);
+
+               assertTrue(size == Long.MAX_VALUE);
+       }
+}

Reply via email to