Repository: systemml Updated Branches: refs/heads/master 3702df7c1 -> 97fd7d1aa
[SYSTEMML-445] Avoid unnecessary transfer to the GPU for size estimation - Compute memory estimates (exact and worst-case) using metadata rather than requiring pointer transfer. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/97fd7d1a Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/97fd7d1a Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/97fd7d1a Branch: refs/heads/master Commit: 97fd7d1aa3ce7a152066d4d4b713fb0a9aee4092 Parents: 3702df7 Author: Niketan Pansare <[email protected]> Authored: Tue Oct 9 16:41:18 2018 -0700 Committer: Niketan Pansare <[email protected]> Committed: Tue Oct 9 16:41:18 2018 -0700 ---------------------------------------------------------------------- .../gpu/context/GPUMatrixMemoryManager.java | 26 --------------- .../gpu/context/GPUMemoryManager.java | 16 +++++----- .../instructions/gpu/context/GPUObject.java | 33 +++++++++++++++----- 3 files changed, 34 insertions(+), 41 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java index 457968b..47a8391 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java @@ -44,32 +44,6 @@ public class GPUMatrixMemoryManager { gpuObjects.add(gpuObj); } - /** - * Returns worst-case contiguous memory size - * @param gpuObj gpu object - * @return memory size in bytes - */ - long getWorstCaseContiguousMemorySize(GPUObject gpuObj) { - long ret = 0; - if(!gpuObj.isDensePointerNull()) { - if(!gpuObj.shadowBuffer.isBuffered()) - ret = gpuManager.allPointers.get(gpuObj.getDensePointer()).getSizeInBytes(); - else - ret = 0; // evicted hence no contiguous memory on GPU - } - else if(gpuObj.getJcudaSparseMatrixPtr() != null) { - CSRPointer sparsePtr = gpuObj.getJcudaSparseMatrixPtr(); - if(sparsePtr.nnz > 0) { - if(sparsePtr.rowPtr != null) - ret = Math.max(ret, gpuManager.allPointers.get(sparsePtr.rowPtr).getSizeInBytes()); - if(sparsePtr.colInd != null) - ret = Math.max(ret, gpuManager.allPointers.get(sparsePtr.colInd).getSizeInBytes()); - if(sparsePtr.val != null) - ret = Math.max(ret, gpuManager.allPointers.get(sparsePtr.val).getSizeInBytes()); - } - } - return ret; - } /** * Get list of all Pointers in a GPUObject http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java index 57b76f6..6772b4a 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java @@ -292,7 +292,7 @@ public class GPUMemoryManager { if(A == null) { long t0 = ConfigurationManager.isStatistics() ? System.nanoTime() : 0; Optional<GPUObject> sizeBasedUnlockedGPUObjects = matrixMemoryManager.gpuObjects.stream() - .filter(gpuObj -> !gpuObj.isLocked() && matrixMemoryManager.getWorstCaseContiguousMemorySize(gpuObj) >= size) + .filter(gpuObj -> !gpuObj.isLocked() && gpuObj.getWorstCaseContiguousMemorySize() >= size) .min((o1, o2) -> worstCaseContiguousMemorySizeCompare(o1, o2)); if(sizeBasedUnlockedGPUObjects.isPresent()) { evictOrClear(sizeBasedUnlockedGPUObjects.get(), opcode); @@ -363,7 +363,7 @@ public class GPUMemoryManager { } private int worstCaseContiguousMemorySizeCompare(GPUObject o1, GPUObject o2) { - long ret = matrixMemoryManager.getWorstCaseContiguousMemorySize(o1) - matrixMemoryManager.getWorstCaseContiguousMemorySize(o2); + long ret = o1.getWorstCaseContiguousMemorySize() - o2.getWorstCaseContiguousMemorySize(); return ret < 0 ? -1 : (ret == 0 ? 0 : 1); } @@ -423,7 +423,7 @@ public class GPUMemoryManager { jcuda.runtime.JCuda.cudaDeviceSynchronize(); // Force a device synchronize after free-ing the pointer for debugging } else { - throw new RuntimeException("Attempting to free an unaccounted pointer:" + toFree); + throw new RuntimeException("ERROR : Internal state corrupted, attempting to free an unaccounted pointer:" + toFree); } } @@ -439,6 +439,12 @@ public class GPUMemoryManager { public void free(String opcode, Pointer toFree, boolean eager) throws DMLRuntimeException { if(LOG.isTraceEnabled()) LOG.trace("Free-ing the pointer with eager=" + eager); + if(toFree == null) + throw new DMLRuntimeException("Attempting to free a null pointer"); + else if (!allPointers.containsKey(toFree)) { + LOG.info("GPU memory info before failure:" + toString()); + throw new RuntimeException("ERROR : Internal state corrupted, attempting to free an unaccounted pointer:" + toFree); + } long size = allPointers.get(toFree).getSizeInBytes(); if(ConfigurationManager.isStatistics()) { currentSize -= size; @@ -449,10 +455,6 @@ public class GPUMemoryManager { addMiscTime(opcode, GPUStatistics.cudaDeAllocTime, GPUStatistics.cudaDeAllocCount, GPUInstruction.MISC_TIMER_CUDA_FREE, t0); } else { - if (!allPointers.containsKey(toFree)) { - LOG.info("GPU memory info before failure:" + toString()); - throw new RuntimeException("ERROR : Internal state corrupted, cache block size map is not aware of a block it trying to free up"); - } lazyCudaFreeMemoryManager.add(size, toFree); } } http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java index 552ee3b..6d7d73b 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java @@ -774,17 +774,34 @@ public class GPUObject { } protected long getSizeOnDevice() { - long GPUSize = 0; long rlen = mat.getNumRows(); long clen = mat.getNumColumns(); long nnz = mat.getNnz(); - - if (LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat)) { - GPUSize = CSRPointer.estimateSize(nnz, rlen); - } else { - GPUSize = getDatatypeSizeOf(rlen * clen); - } - return GPUSize; + + if(jcudaDenseMatrixPtr != null) + return getDatatypeSizeOf(rlen * clen); // allocated in dense format + else if(jcudaSparseMatrixPtr != null || LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat)) + return CSRPointer.estimateSize(nnz, rlen); // either allocated in sparse format or matrix object is in sparse format + else + return getDatatypeSizeOf(rlen * clen); // not allocated and matrix object is in dense format + } + + /** + * Returns worst-case contiguous memory size + * + * @return memory size in bytes + */ + long getWorstCaseContiguousMemorySize() { + long rlen = mat.getNumRows(); + long clen = mat.getNumColumns(); + long nnz = mat.getNnz(); + + if(jcudaDenseMatrixPtr != null) + return getDatatypeSizeOf(rlen * clen); // allocated in dense format + else if(jcudaSparseMatrixPtr != null || LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat)) + return Math.max(getDatatypeSizeOf(nnz), getIntSizeOf(Math.max(Math.max(rlen+1, clen), 4))); // either allocated in sparse format or matrix object is in sparse format + else + return getDatatypeSizeOf(rlen * clen); // not allocated and matrix object is in dense format } void copyFromHostToDevice(String opcode) {
