Repository: systemml
Updated Branches:
  refs/heads/master be2b3e220 -> cc7ec7625


[SYSTEMML-1325] Bugfix for GPU memory manager clear temporary memory. Fixes bug 
in GPU cleanup with JMLC.

In GPUMemoryManager.clearTemporaryMemory() we deallocate pointers but do not 
set the corresponding Pointer slots to null in the associated GPUObject 
instances. This can lead to attempted double-freeing of Pointers which results 
in an exception. This commit fixes this issue by creating a list of GPU objects 
associated with Pointers that have been freed as part of clearTemporaryMemory() 
and setting the corresponding pointer slots to null. This commit also addresses 
a minor issue with cleanup in JMLC which was causing Pointers for pinned data 
to be improperly cleared. Note this commit will reduce performance of 
GPUMemoryManager.clearTemporaryMemory() because it is now necessary to search 
through the list of managed GPUObjects to find the ones corresponding to 
pointers being freed. However, this method is only called once at the end of 
script invocation and so the performance cost will be small.

Closes #839.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/cc7ec762
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/cc7ec762
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/cc7ec762

Branch: refs/heads/master
Commit: cc7ec762583aec137ed0bd10a8754e29140b2e35
Parents: be2b3e2
Author: Anthony Thomas <ahtho...@eng.ucsd.edu>
Authored: Sat Nov 3 05:41:42 2018 +0530
Committer: Niketan Pansare <npan...@us.ibm.com>
Committed: Sat Nov 3 05:41:42 2018 +0530

----------------------------------------------------------------------
 .../gpu/context/GPUMatrixMemoryManager.java     | 27 +++++++++++++++++---
 .../gpu/context/GPUMemoryManager.java           | 13 +++++++---
 .../instructions/gpu/context/GPUObject.java     |  2 +-
 3 files changed, 34 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/cc7ec762/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
index 47a8391..f69d340 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
@@ -18,7 +18,9 @@
  */
 package org.apache.sysml.runtime.instructions.gpu.context;
 
+import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
 
@@ -43,8 +45,7 @@ public class GPUMatrixMemoryManager {
        void addGPUObject(GPUObject gpuObj) {
                gpuObjects.add(gpuObj);
        }
-       
-       
+
        /**
         * Get list of all Pointers in a GPUObject 
         * @param gObj gpu object 
@@ -81,6 +82,20 @@ public class GPUMatrixMemoryManager {
         * so that an extraneous host to dev transfer can be avoided
         */
        HashSet<GPUObject> gpuObjects = new HashSet<>();
+
+       /**
+        * Return a set of GPU Objects associated with a list of pointers
+        * @param pointers A list of pointers
+        * @return A set of GPU objects corresponding to any of these pointers
+        */
+       Set<GPUObject> getGpuObjects(Set<Pointer> pointers) {
+               Set<GPUObject> gObjs = new HashSet<>();
+               for (GPUObject g : gpuObjects) {
+                       if (!Collections.disjoint(getPointers(g), pointers))
+                               gObjs.add(g);
+               }
+               return gObjs;
+       }
        
        /**
         * Return all pointers in the first section
@@ -94,10 +109,14 @@ public class GPUMatrixMemoryManager {
         * Get pointers from the first memory sections "Matrix Memory"
         * @param locked return locked pointers if true
         * @param dirty return dirty pointers if true
+        * @param isCleanupEnabled return pointers marked for cleanup if true
         * @return set of pointers
         */
-       Set<Pointer> getPointers(boolean locked, boolean dirty) {
-               return gpuObjects.stream().filter(gObj -> gObj.isLocked() == 
locked && gObj.isDirty() == dirty).flatMap(gObj -> 
getPointers(gObj).stream()).collect(Collectors.toSet());
+       Set<Pointer> getPointers(boolean locked, boolean dirty, boolean 
isCleanupEnabled) {
+               return gpuObjects.stream().filter(
+                               gObj -> (gObj.isLocked() == locked && 
gObj.isDirty() == dirty) ||
+                                               (gObj.mat.isCleanupEnabled() == 
isCleanupEnabled)).flatMap(
+                                               gObj -> 
getPointers(gObj).stream()).collect(Collectors.toSet());
        }
        
        /**

http://git-wip-us.apache.org/repos/asf/systemml/blob/cc7ec762/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
index 6a04d97..d403ca7 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
@@ -429,7 +429,6 @@ public class GPUMemoryManager {
                else {
                        throw new RuntimeException("ERROR : Internal state 
corrupted, attempting to free an unaccounted pointer:" + toFree);
                }
-
        }
        
        /**
@@ -521,11 +520,19 @@ public class GPUMemoryManager {
         */
        public void clearTemporaryMemory() {
                // To record the cuda block sizes needed by 
allocatedGPUObjects, others are cleared up.
-               Set<Pointer> unlockedDirtyPointers = 
matrixMemoryManager.getPointers(false, true);
+               Set<Pointer> unlockedDirtyPointers = 
matrixMemoryManager.getPointers(false, true, false);
                Set<Pointer> temporaryPointers = nonIn(allPointers.keySet(), 
unlockedDirtyPointers);
-               for(Pointer tmpPtr : temporaryPointers) {
+               for (Pointer tmpPtr : temporaryPointers) {
                        guardedCudaFree(tmpPtr);
                }
+
+               // Also set the pointer(s) to null in the corresponding GPU 
objects to avoid double freeing pointers
+               Set<GPUObject> gObjs = 
matrixMemoryManager.getGpuObjects(temporaryPointers);
+               for (GPUObject g : gObjs) {
+                       g.jcudaDenseMatrixPtr = null;
+                       g.jcudaSparseMatrixPtr = null;
+                       removeGPUObject(g);
+               }
        }
        
        /**

http://git-wip-us.apache.org/repos/asf/systemml/blob/cc7ec762/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
index b95d471..edcd683 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
@@ -68,7 +68,7 @@ public class GPUObject {
        /**
         * Pointer to the underlying sparse matrix block on GPU
         */
-       private CSRPointer jcudaSparseMatrixPtr = null;
+       CSRPointer jcudaSparseMatrixPtr = null;
 
        /**
         * whether the block attached to this {@link GPUContext} is dirty on 
the device and needs to be copied back to host

Reply via email to