Repository: incubator-systemml Updated Branches: refs/heads/master 29d3f8366 -> d6990dccd
[SYSTEMML-445] Cleanup buffer pool (gpu integration, critical path) Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/23de8e88 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/23de8e88 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/23de8e88 Branch: refs/heads/master Commit: 23de8e88151e9b249f36910740c1c29ca10a2deb Parents: 29d3f83 Author: Matthias Boehm <[email protected]> Authored: Fri Aug 5 17:25:07 2016 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sun Aug 7 12:30:36 2016 -0700 ---------------------------------------------------------------------- .../controlprogram/caching/ByteBuffer.java | 3 +-- .../controlprogram/caching/CacheableData.java | 12 ++++++---- .../controlprogram/caching/LazyWriteBuffer.java | 20 ++++++++--------- .../controlprogram/caching/MatrixObject.java | 23 +++----------------- .../context/ExecutionContext.java | 9 +++++++- .../gpu/AggregateBinaryGPUInstruction.java | 1 + .../runtime/matrix/data/LibMatrixCUDA.java | 18 +++++++-------- 7 files changed, 39 insertions(+), 47 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/23de8e88/src/main/java/org/apache/sysml/runtime/controlprogram/caching/ByteBuffer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/ByteBuffer.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/ByteBuffer.java index 31f7a00..9402392 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/ByteBuffer.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/ByteBuffer.java @@ -39,8 +39,7 @@ public class ByteBuffer protected byte[] _bdata = null; //sparse matrix protected CacheBlock _cdata = null; //dense matrix/frame - public ByteBuffer( long size ) - { + public ByteBuffer( long size ) { _size = size; _serialized = false; } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/23de8e88/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java index 2b45ddd..c7425c1 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java @@ -179,7 +179,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data //for lazily evaluated RDDs, and (2) as abstraction for environments that do not necessarily have spark libraries available private RDDObject _rddHandle = null; //RDD handle private BroadcastObject<T> _bcHandle = null; //Broadcast handle - public GPUObject _gpuHandle = null; + protected GPUObject _gpuHandle = null; /** * Basic constructor for any cacheable data. @@ -390,6 +390,10 @@ public abstract class CacheableData<T extends CacheBlock> extends Data return _gpuHandle; } + public void setGPUObject(GPUObject handle) { + _gpuHandle = handle; + } + // ********************************************* // *** *** @@ -753,8 +757,6 @@ public abstract class CacheableData<T extends CacheBlock> extends Data exportData(fName, outputFormat, -1, formatProperties); } - protected void exportGPUData() throws CacheException { } - /** * Synchronized because there might be parallel threads (parfor local) that * access the same object (in case it was created before the loop). @@ -784,7 +786,9 @@ public abstract class CacheableData<T extends CacheBlock> extends Data LOG.trace("Exporting " + this.getDebugName() + " to " + fName + " in format " + outputFormat); //TODO remove - exportGPUData(); + if( getGPUObject() != null ) { + getGPUObject().acquireHostRead(); + } boolean pWrite = false; // !fName.equals(_hdfsFileName); //persistent write flag if ( fName.equals(_hdfsFileName) ) { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/23de8e88/src/main/java/org/apache/sysml/runtime/controlprogram/caching/LazyWriteBuffer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/LazyWriteBuffer.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/LazyWriteBuffer.java index 23cc620..61742bb 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/LazyWriteBuffer.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/LazyWriteBuffer.java @@ -77,7 +77,9 @@ public class LazyWriteBuffer //handle caching/eviction if it fits in writebuffer if( !requiresWrite ) { - ByteBuffer bbuff = null; + //create byte buffer handle (no block allocation yet) + ByteBuffer bbuff = new ByteBuffer( lSize ); + int numEvicted = 0; //modify buffer pool synchronized( _mQueue ) @@ -90,8 +92,7 @@ public class LazyWriteBuffer String ftmp = entry.getKey(); ByteBuffer tmp = entry.getValue(); - if( tmp != null ) - { + if( tmp != null ) { //wait for pending serialization tmp.checkSerialized(); @@ -99,16 +100,11 @@ public class LazyWriteBuffer tmp.evictBuffer(ftmp); tmp.freeMemory(); _size-=tmp.getSize(); - - if( DMLScript.STATISTICS ) - CacheStatistics.incrementFSWrites(); + numEvicted++; } } - //create buffer (reserve mem), and lock - bbuff = new ByteBuffer( lSize ); - - //put placeholder into buffer pool + //put placeholder into buffer pool (reserve mem) _mQueue.addLast(fname, bbuff); _size += lSize; } @@ -116,8 +112,10 @@ public class LazyWriteBuffer //serialize matrix (outside synchronized critical path) bbuff.serializeBlock(cb); - if( DMLScript.STATISTICS ) + if( DMLScript.STATISTICS ) { CacheStatistics.incrementFSBuffWrites(); + CacheStatistics.incrementFSWrites(numEvicted); + } } else { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/23de8e88/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java index 4148545..b18b9ee 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java @@ -33,7 +33,6 @@ import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat; import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext; -import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; import org.apache.sysml.runtime.instructions.spark.data.RDDObject; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.MatrixDimensionsMetaData; @@ -88,8 +87,6 @@ public class MatrixObject extends CacheableData<MatrixBlock> */ public MatrixObject (ValueType vt, String file) { this (vt, file, null); //HDFS file path - if(DMLScript.USE_ACCELERATOR) - _gpuHandle = GPUContext.createGPUObject(this); } /** @@ -101,8 +98,6 @@ public class MatrixObject extends CacheableData<MatrixBlock> _hdfsFileName = file; _cache = null; _data = null; - if(DMLScript.USE_ACCELERATOR) - _gpuHandle = GPUContext.createGPUObject(this); } /** @@ -223,22 +218,10 @@ public class MatrixObject extends CacheableData<MatrixBlock> @Override protected void clearReusableData() { if(DMLScript.REUSE_NONZEROED_OUTPUT) { - if(_data == null) { + if(_data == null) getCache(); - } - if(_data != null && - // Not a column vector - _data.getNumRows() != 1 && _data.getNumColumns() != 1) { - double[] arr = _data.getDenseBlock(); - LibMatrixDNN.cacheReuseableData(arr); - } - } - } - - @Override - protected void exportGPUData() throws CacheException { - if(DMLScript.USE_ACCELERATOR && getGPUObject() != null) { - getGPUObject().acquireHostRead(); + if( _data != null && !_data.isVector() ) + LibMatrixDNN.cacheReuseableData(_data.getDenseBlock()); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/23de8e88/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java index 7e089c8..3e85a76 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java @@ -265,6 +265,9 @@ public class ExecutionContext throw new DMLRuntimeException("Sparse matrix block is not supported for GPU instruction"); } MatrixObject mo = getMatrixObject(varName); + if( mo.getGPUObject() == null ) { + mo.setGPUObject(GPUContext.createGPUObject(mo)); + } mo.getGPUObject().acquireDenseDeviceModify((int)(mo.getNumRows()*mo.getNumColumns())); mo.getMatrixCharacteristics().setNonZeros(-1); return mo; @@ -277,9 +280,13 @@ public class ExecutionContext if(mo == null) { throw new DMLRuntimeException("No matrix object available for variable:" + varName); } - if(mo.getGPUObject() == null || !mo.getGPUObject().isAllocated) { + if( mo.getGPUObject() == null ) { + mo.setGPUObject(GPUContext.createGPUObject(mo)); + } + if( !mo.getGPUObject().isAllocated ) { mo.acquireRead(); mo.release(); + //FIXME: after release the matrix block might get evicted } mo.getGPUObject().acquireDeviceRead(); return mo; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/23de8e88/src/main/java/org/apache/sysml/runtime/instructions/gpu/AggregateBinaryGPUInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/AggregateBinaryGPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/AggregateBinaryGPUInstruction.java index 67b8d30..3dc98ba 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/AggregateBinaryGPUInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/AggregateBinaryGPUInstruction.java @@ -89,6 +89,7 @@ public class AggregateBinaryGPUInstruction extends GPUInstruction { // -------------------------------------- // This code will be removed when the JIRA SYSTEMML-702 is complete + // FIXME this code does not adhere to compiler memory budgets if( isSparse(ec, _input1.getName()) || isSparse(ec, _input2.getName())) { //get inputs MatrixBlock matBlock1 = ec.getMatrixInput(_input1.getName()); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/23de8e88/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index fc1f657..4a94f6a 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -80,9 +80,9 @@ public class LibMatrixCUDA { // (Pointer) gpuCtx.prepare(image, true, true); // (Pointer) gpuCtx.prepare(filter, true, true); - Pointer imagePointer = ((JCudaObject)image._gpuHandle).jcudaPointer; - Pointer filterPointer = ((JCudaObject)filter._gpuHandle).jcudaPointer; - Pointer dstPointer = ((JCudaObject)outputBlock._gpuHandle).jcudaPointer; + Pointer imagePointer = ((JCudaObject)image.getGPUObject()).jcudaPointer; + Pointer filterPointer = ((JCudaObject)filter.getGPUObject()).jcudaPointer; + Pointer dstPointer = ((JCudaObject)outputBlock.getGPUObject()).jcudaPointer; int padding [] = { pad_h, pad_w }; int strides [] = { stride_h, stride_w }; @@ -195,9 +195,9 @@ public class LibMatrixCUDA { dwDesc = allocateFilterDescriptor(K, C, R, S); // Allocate data - Pointer imagePointer = ((JCudaObject)image._gpuHandle).jcudaPointer; - Pointer doutPointer = ((JCudaObject)dout._gpuHandle).jcudaPointer; - Pointer dwPointer = ((JCudaObject)outputBlock._gpuHandle).jcudaPointer; + Pointer imagePointer = ((JCudaObject)image.getGPUObject()).jcudaPointer; + Pointer doutPointer = ((JCudaObject)dout.getGPUObject()).jcudaPointer; + Pointer dwPointer = ((JCudaObject)outputBlock.getGPUObject()).jcudaPointer; alpha = pointerTo(1.0); // TODO beta = pointerTo(0.0f); @@ -305,9 +305,9 @@ public class LibMatrixCUDA { dxDesc = allocateTensorDescriptor(N, C, H, W); // Allocate data - Pointer w = ((JCudaObject)filter._gpuHandle).jcudaPointer; - Pointer dy = ((JCudaObject)dout._gpuHandle).jcudaPointer; - Pointer dx = ((JCudaObject)output._gpuHandle).jcudaPointer; + Pointer w = ((JCudaObject)filter.getGPUObject()).jcudaPointer; + Pointer dy = ((JCudaObject)dout.getGPUObject()).jcudaPointer; + Pointer dx = ((JCudaObject)output.getGPUObject()).jcudaPointer; alpha = pointerTo(1.0); // TODO beta = pointerTo(0.0f);
