This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 1fa2ebc7ba [MINOR] Frame Shallow Update
1fa2ebc7ba is described below

commit 1fa2ebc7bad9e6bb8006f70c8ae01a00cde74d5d
Author: Sebastian Baunsgaard <baunsga...@apache.org>
AuthorDate: Fri Apr 5 17:01:47 2024 +0200

    [MINOR] Frame Shallow Update
    
    This commit make minor modifications to the
    shallow handling of Frames.
    One instance is fast abort of isShallowSerialize.
    
    Closes #2013
---
 .../sysds/runtime/frame/data/FrameBlock.java       | 61 ++++++++++++----------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java
index 3efafbb30b..312f88ca7d 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java
@@ -106,6 +106,7 @@ public class FrameBlock implements CacheBlock<FrameBlock>, 
Externalizable {
        /** Locks on the columns not tied to the columns objects. */
        private SoftReference<Object[]> _columnLocks = null;
 
+       /** Materialized number of rows in this FrameBlock */
        private int _nRow = 0;
 
        /** Cached size in memory to avoid repeated scans of string columns */
@@ -756,7 +757,8 @@ public class FrameBlock implements CacheBlock<FrameBlock>, 
Externalizable {
        public void write(DataOutput out) throws IOException {
                final boolean isDefaultMeta = isColNamesDefault() && 
isColumnMetadataDefault();
                // write header (rows, cols, default)
-               out.writeInt(getNumRows());
+               final int nRow = getNumRows();
+               out.writeInt(nRow);
                out.writeInt(getNumColumns());
                out.writeBoolean(isDefaultMeta);
                // write columns (value type, data)
@@ -767,7 +769,7 @@ public class FrameBlock implements CacheBlock<FrameBlock>, 
Externalizable {
                                out.writeUTF(getColumnName(j));
                                _colmeta[j].write(out);
                        }
-                       if(type >= 0) // if allocated write column data
+                       if(type >= 0 && nRow > 0) // if allocated write column 
data
                                _coldata[j].write(out);
                }
        }
@@ -796,6 +798,8 @@ public class FrameBlock implements CacheBlock<FrameBlock>, 
Externalizable {
                        isDefaultMeta ? null : new String[numCols]; // if meta 
is default allocate on demand
                _colmeta = (_colmeta != null && _colmeta.length == numCols) ? 
_colmeta : new ColumnMetadata[numCols];
                _coldata = (_coldata != null && _coldata.length == numCols) ? 
_coldata : new Array[numCols];
+               if(_nRow == 0)
+                       _coldata = null;
                // read columns (value type, meta, data)
                for(int j = 0; j < numCols; j++) {
                        byte type = in.readByte();
@@ -807,7 +811,7 @@ public class FrameBlock implements CacheBlock<FrameBlock>, 
Externalizable {
                        else
                                _colmeta[j] = new ColumnMetadata(); // must be 
allocated.
 
-                       if(type >= 0) // if in allocated column data then read 
it
+                       if(type >= 0 && _nRow > 0) // if in allocated column 
data then read it
                                _coldata[j] = ArrayFactory.read(in, _nRow);
                }
                _msize = -1;
@@ -815,30 +819,12 @@ public class FrameBlock implements 
CacheBlock<FrameBlock>, Externalizable {
 
        @Override
        public void writeExternal(ObjectOutput out) throws IOException {
-               
-               // if((out instanceof ObjectOutputStream)){
-               //      ObjectOutputStream oos = (ObjectOutputStream)out;
-               //      FastBufferedDataOutputStream fos = new 
FastBufferedDataOutputStream(oos);
-               //      write(fos); //note: cannot close fos as this would 
close oos
-               //      fos.flush();
-               // }
-               // else{
-                       write(out);
-               // }
+               write(out);
        }
 
        @Override
        public void readExternal(ObjectInput in) throws IOException {
-               // if(in instanceof ObjectInputStream) {
-               //      // fast deserialize of dense/sparse blocks
-               //      ObjectInputStream ois = (ObjectInputStream) in;
-               //      FastBufferedDataInputStream fis = new 
FastBufferedDataInputStream(ois);
-               //      readFields(fis); // note: cannot close fos as this 
would close oos
-               // }
-               // else {
-                       // redirect deserialization to writable impl
-                       readFields(in);
-               // }
+               readFields(in);
        }
 
        @Override
@@ -878,7 +864,7 @@ public class FrameBlock implements CacheBlock<FrameBlock>, 
Externalizable {
                        for(int j = 0; j < clen; j++)
                                size += 
ArrayFactory.getInMemorySize(_schema[j], rlen, true);
                else {// allocated
-                       if(rlen > 1000 && clen > 10 && 
ConfigurationManager.isParallelIOEnabled()) {
+                       if((rlen > 1000 || clen > 10 )&& 
ConfigurationManager.isParallelIOEnabled()) {
                                final ExecutorService pool = 
CommonThreadPool.get();
                                try {
                                        List<Future<Long>> f = new 
ArrayList<>(clen);
@@ -893,6 +879,7 @@ public class FrameBlock implements CacheBlock<FrameBlock>, 
Externalizable {
                                }
                                catch(InterruptedException | ExecutionException 
e) {
                                        LOG.error(e);
+                                       size = 0;
                                        for(Array<?> aa : _coldata)
                                                size += aa.getInMemorySize();
                                }
@@ -937,10 +924,10 @@ public class FrameBlock implements 
CacheBlock<FrameBlock>, Externalizable {
        public boolean isShallowSerialize(boolean inclConvert) {
                // shallow serialize if non-string schema because a frame block
                // is always dense but strings have large array overhead per 
cell
-               boolean ret = true;
-               for(int j = 0; j < _schema.length && ret; j++)
-                       ret &= _coldata[j].isShallowSerialize();
-               return ret;
+               for(int j = 0; j < _schema.length; j++)
+                       if(!_coldata[j].isShallowSerialize())
+                               return false;
+               return true;
        }
 
        @Override
@@ -1217,6 +1204,22 @@ public class FrameBlock implements 
CacheBlock<FrameBlock>, Externalizable {
                _msize = -1;
        }
 
+       public FrameBlock copyShallow(){
+               FrameBlock ret = new FrameBlock();
+               ret._nRow = _nRow;
+               ret._msize = _msize; 
+               final int nCol = getNumColumns();
+               if(_coldata != null)
+                       ret._coldata = Arrays.copyOf(_coldata, nCol);
+               if(_colnames != null)
+                       ret._colnames = Arrays.copyOf(_colnames, nCol);
+               if(_colmeta != null)
+                       ret._colmeta = Arrays.copyOf(_colmeta, nCol);
+               if(_schema != null)
+                       ret._schema = Arrays.copyOf(_schema, nCol);
+               return ret;
+       }
+
        /**
         * Copy src matrix into the index range of the existing current matrix.
         *
@@ -1358,7 +1361,7 @@ public class FrameBlock implements 
CacheBlock<FrameBlock>, Externalizable {
        }
 
        public final FrameBlock detectSchema(int k) {
-               return FrameLibDetectSchema.detectSchema(this, k);
+               return FrameLibDetectSchema.detectSchema(this, 0.01, k);
        }
 
        public final FrameBlock detectSchema(double sampleFraction, int k) {

Reply via email to