Repository: systemml
Updated Branches:
  refs/heads/master 3702df7c1 -> 97fd7d1aa


[SYSTEMML-445] Avoid unnecessary transfer to the GPU for size estimation

- Compute memory estimates (exact and worst-case) using metadata rather
than requiring pointer transfer.

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/97fd7d1a
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/97fd7d1a
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/97fd7d1a

Branch: refs/heads/master
Commit: 97fd7d1aa3ce7a152066d4d4b713fb0a9aee4092
Parents: 3702df7
Author: Niketan Pansare <[email protected]>
Authored: Tue Oct 9 16:41:18 2018 -0700
Committer: Niketan Pansare <[email protected]>
Committed: Tue Oct 9 16:41:18 2018 -0700

----------------------------------------------------------------------
 .../gpu/context/GPUMatrixMemoryManager.java     | 26 ---------------
 .../gpu/context/GPUMemoryManager.java           | 16 +++++-----
 .../instructions/gpu/context/GPUObject.java     | 33 +++++++++++++++-----
 3 files changed, 34 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
index 457968b..47a8391 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
@@ -44,32 +44,6 @@ public class GPUMatrixMemoryManager {
                gpuObjects.add(gpuObj);
        }
        
-       /**
-        * Returns worst-case contiguous memory size
-        * @param gpuObj gpu object
-        * @return memory size in bytes
-        */
-       long getWorstCaseContiguousMemorySize(GPUObject gpuObj) {
-               long ret = 0;
-               if(!gpuObj.isDensePointerNull()) {
-                       if(!gpuObj.shadowBuffer.isBuffered())
-                               ret = 
gpuManager.allPointers.get(gpuObj.getDensePointer()).getSizeInBytes();
-                       else
-                               ret = 0; // evicted hence no contiguous memory 
on GPU
-               }
-               else if(gpuObj.getJcudaSparseMatrixPtr() != null) {
-                       CSRPointer sparsePtr = gpuObj.getJcudaSparseMatrixPtr();
-                       if(sparsePtr.nnz > 0) {
-                               if(sparsePtr.rowPtr != null)
-                                       ret = Math.max(ret, 
gpuManager.allPointers.get(sparsePtr.rowPtr).getSizeInBytes());
-                               if(sparsePtr.colInd != null)
-                                       ret = Math.max(ret, 
gpuManager.allPointers.get(sparsePtr.colInd).getSizeInBytes());
-                               if(sparsePtr.val != null)
-                                       ret = Math.max(ret, 
gpuManager.allPointers.get(sparsePtr.val).getSizeInBytes());
-                       }
-               }
-               return ret;
-       }
        
        /**
         * Get list of all Pointers in a GPUObject 

http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
index 57b76f6..6772b4a 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
@@ -292,7 +292,7 @@ public class GPUMemoryManager {
                if(A == null) {
                        long t0 =  ConfigurationManager.isStatistics() ? 
System.nanoTime() : 0;
                        Optional<GPUObject> sizeBasedUnlockedGPUObjects = 
matrixMemoryManager.gpuObjects.stream()
-                                               .filter(gpuObj -> 
!gpuObj.isLocked() && 
matrixMemoryManager.getWorstCaseContiguousMemorySize(gpuObj) >= size)
+                                               .filter(gpuObj -> 
!gpuObj.isLocked() && gpuObj.getWorstCaseContiguousMemorySize() >= size)
                                                .min((o1, o2) -> 
worstCaseContiguousMemorySizeCompare(o1, o2));
                        if(sizeBasedUnlockedGPUObjects.isPresent()) {
                                evictOrClear(sizeBasedUnlockedGPUObjects.get(), 
opcode);
@@ -363,7 +363,7 @@ public class GPUMemoryManager {
        }
        
        private int worstCaseContiguousMemorySizeCompare(GPUObject o1, 
GPUObject o2) {
-               long ret = 
matrixMemoryManager.getWorstCaseContiguousMemorySize(o1) - 
matrixMemoryManager.getWorstCaseContiguousMemorySize(o2);
+               long ret = o1.getWorstCaseContiguousMemorySize() - 
o2.getWorstCaseContiguousMemorySize();
                return ret < 0 ? -1 : (ret == 0 ? 0 : 1);
        }
        
@@ -423,7 +423,7 @@ public class GPUMemoryManager {
                                jcuda.runtime.JCuda.cudaDeviceSynchronize(); // 
Force a device synchronize after free-ing the pointer for debugging
                }
                else {
-                       throw new RuntimeException("Attempting to free an 
unaccounted pointer:" + toFree);
+                       throw new RuntimeException("ERROR : Internal state 
corrupted, attempting to free an unaccounted pointer:" + toFree);
                }
 
        }
@@ -439,6 +439,12 @@ public class GPUMemoryManager {
        public void free(String opcode, Pointer toFree, boolean eager) throws 
DMLRuntimeException {
                if(LOG.isTraceEnabled())
                        LOG.trace("Free-ing the pointer with eager=" + eager);
+               if(toFree == null)
+                       throw new DMLRuntimeException("Attempting to free a 
null pointer");
+               else if (!allPointers.containsKey(toFree)) {
+                       LOG.info("GPU memory info before failure:" + 
toString());
+                       throw new RuntimeException("ERROR : Internal state 
corrupted, attempting to free an unaccounted pointer:" + toFree);
+               }
                long size = allPointers.get(toFree).getSizeInBytes();
                if(ConfigurationManager.isStatistics()) {
                        currentSize -= size;
@@ -449,10 +455,6 @@ public class GPUMemoryManager {
                        addMiscTime(opcode, GPUStatistics.cudaDeAllocTime, 
GPUStatistics.cudaDeAllocCount, GPUInstruction.MISC_TIMER_CUDA_FREE, t0);
                }
                else {
-                       if (!allPointers.containsKey(toFree)) {
-                               LOG.info("GPU memory info before failure:" + 
toString());
-                               throw new RuntimeException("ERROR : Internal 
state corrupted, cache block size map is not aware of a block it trying to free 
up");
-                       }
                        lazyCudaFreeMemoryManager.add(size, toFree);
                }
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
index 552ee3b..6d7d73b 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
@@ -774,17 +774,34 @@ public class GPUObject {
        }
 
        protected long getSizeOnDevice() {
-               long GPUSize = 0;
                long rlen = mat.getNumRows();
                long clen = mat.getNumColumns();
                long nnz = mat.getNnz();
-
-               if (LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat)) {
-                       GPUSize = CSRPointer.estimateSize(nnz, rlen);
-               } else {
-                       GPUSize = getDatatypeSizeOf(rlen * clen);
-               }
-               return GPUSize;
+               
+               if(jcudaDenseMatrixPtr != null)
+                       return getDatatypeSizeOf(rlen * clen); // allocated in 
dense format
+               else if(jcudaSparseMatrixPtr != null || 
LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat))
+                       return CSRPointer.estimateSize(nnz, rlen); // either 
allocated in sparse format or matrix object is in sparse format
+               else 
+                       return getDatatypeSizeOf(rlen * clen); // not allocated 
and matrix object is in dense format
+       }
+       
+       /**
+        * Returns worst-case contiguous memory size
+        * 
+        * @return memory size in bytes
+        */
+       long getWorstCaseContiguousMemorySize() {
+               long rlen = mat.getNumRows();
+               long clen = mat.getNumColumns();
+               long nnz = mat.getNnz();
+               
+               if(jcudaDenseMatrixPtr != null)
+                       return getDatatypeSizeOf(rlen * clen); // allocated in 
dense format
+               else if(jcudaSparseMatrixPtr != null || 
LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat))
+                       return Math.max(getDatatypeSizeOf(nnz), 
getIntSizeOf(Math.max(Math.max(rlen+1, clen), 4))); // either allocated in 
sparse format or matrix object is in sparse format
+               else 
+                       return getDatatypeSizeOf(rlen * clen); // not allocated 
and matrix object is in dense format
        }
 
        void copyFromHostToDevice(String opcode) {

Reply via email to