[systemds] branch master updated: [SYSTEMDS-2947] Fix synchronization issues with GPU evictions

arnabp20 Thu, 22 Jul 2021 04:24:11 -0700

This is an automated email from the ASF dual-hosted git repository.

arnabp20 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/master by this push:
     new e5a3665  [SYSTEMDS-2947] Fix synchronization issues with GPU evictions
e5a3665 is described below

commit e5a366560cda832b38a82b7cb2631e002f49cc22
Author: arnabp <[email protected]>
AuthorDate: Thu Jul 22 13:23:23 2021 +0200

    [SYSTEMDS-2947] Fix synchronization issues with GPU evictions
    
    This patch adds a new logic to GPU background eviction to start
    evicting only when the GPU is 80% full. In addition, this patch
    fixes a couple of synchronization bugs in async eviction and
    a bug in sparsity handling in H2D copy.
    TODO: Fix remaining synchronization bugs (gpuobj list, rmvar cache).
---
 .../runtime/instructions/cp/CPInstruction.java     | 17 ++++++---
 .../gpu/context/GPUMemoryEviction.java             | 43 +++++++++++++---------
 .../instructions/gpu/context/GPUObject.java        | 24 +++++++++++-
 3 files changed, 60 insertions(+), 24 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/cp/CPInstruction.java 
b/src/main/java/org/apache/sysds/runtime/instructions/cp/CPInstruction.java
index 84b2332..e048add 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/CPInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/CPInstruction.java
@@ -31,6 +31,7 @@ import 
org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysds.runtime.instructions.CPInstructionParser;
 import org.apache.sysds.runtime.instructions.Instruction;
 import org.apache.sysds.runtime.instructions.fed.FEDInstructionUtils;
+import org.apache.sysds.runtime.instructions.gpu.context.GPUContextPool;
 import org.apache.sysds.runtime.instructions.gpu.context.GPUMemoryEviction;
 import org.apache.sysds.runtime.lineage.LineageCacheConfig;
 import org.apache.sysds.runtime.lineage.LineageGPUCacheEviction;
@@ -109,11 +110,17 @@ public abstract class CPInstruction extends Instruction
                //eviction count and STOPBACKGROUNDEVICTION flag. 
STOPBACKGROUNDEVICTION flag
                //is set to true in the post processing of CPU instruction to 
stop eviction.
                if (!LineageCacheConfig.ReuseCacheType.isNone() && 
DMLScript.USE_ACCELERATOR
-                       && LineageCacheConfig.CONCURRENTGPUEVICTION && !(tmp 
instanceof VariableCPInstruction)) {
-                       if (LineageGPUCacheEviction.gpuEvictionThread == null)
-                               LineageGPUCacheEviction.gpuEvictionThread = 
Executors.newSingleThreadExecutor();
-                       LineageCacheConfig.STOPBACKGROUNDEVICTION = false;
-                       LineageGPUCacheEviction.gpuEvictionThread.submit(new 
GPUMemoryEviction(1));
+                       && LineageCacheConfig.CONCURRENTGPUEVICTION && 
ec.getNumGPUContexts()>0 
+                       && !(tmp instanceof VariableCPInstruction) && !(tmp 
instanceof FunctionCallCPInstruction)) {
+                       long availableMem = 
ec.getGPUContext(0).getAvailableMemory(); //TODO: multi-gpu
+                       long almostFull = (long) (0.2 * 
GPUContextPool.initialGPUMemBudget());
+
+                       if (availableMem < almostFull) { //80% full
+                               if (LineageGPUCacheEviction.gpuEvictionThread 
== null)
+                                       
LineageGPUCacheEviction.gpuEvictionThread = Executors.newSingleThreadExecutor();
+                               LineageCacheConfig.STOPBACKGROUNDEVICTION = 
false;
+                               
LineageGPUCacheEviction.gpuEvictionThread.submit(new GPUMemoryEviction());
+                       }
                }
                
                return tmp;
diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUMemoryEviction.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUMemoryEviction.java
index cb7787c..0264497 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUMemoryEviction.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUMemoryEviction.java
@@ -31,11 +31,15 @@ import org.apache.sysds.utils.GPUStatistics;
 
 public class GPUMemoryEviction implements Runnable 
 {
-       int numEvicts = 0;
+       int numEvicts;
        
        public GPUMemoryEviction(int num) {
                numEvicts = num;
        }
+       
+       public GPUMemoryEviction() {
+               numEvicts = 0;
+       }
 
        @Override
        public void run() {
@@ -46,9 +50,15 @@ public class GPUMemoryEviction implements Runnable
                // Stop if 1) Evicted the request number of entries, 2) The 
parallel
                // CPU instruction is ended, and 3) No non-live entries left in 
the cache.
                long t0 =  DMLScript.STATISTICS ? System.nanoTime() : 0;
-               while (!LineageGPUCacheEviction.isGPUCacheEmpty() && count < 
numEvicts) 
+               while (!LineageGPUCacheEviction.isGPUCacheEmpty()) 
                {
                        if (LineageCacheConfig.STOPBACKGROUNDEVICTION)
+                               // This logic reduces #evictions if the cpu 
instructions is so small
+                               // that it ends before the background thread 
reaches this condition.
+                               // However, this check decreases race 
conditions.
+                               break;
+                       
+                       if (numEvicts > 0 && count > numEvicts)
                                break;
                        
                        LineageCacheEntry le = 
LineageGPUCacheEviction.pollFirstEntry();
@@ -91,23 +101,22 @@ public class GPUMemoryEviction implements Runnable
                        nextgpuObj = headGpuObj;
                        boolean freed = false;
                        synchronized 
(nextgpuObj.getGPUContext().getMemoryManager().getGPUMatrixMemoryManager().gpuObjects)
 {
-
-                       while (nextgpuObj!= null) {
-                               // If not live or live but not dirty
-                               if (nextgpuObj.isrmVarPending() || 
!nextgpuObj.isDirty()) {
-                                       if (!freed) {
-                                               nextgpuObj.clearData(null, 
true);
-                                               //FIXME: adding to rmVar cache 
causes multiple failures due to concurrent
-                                               //access to the rmVar cache and 
other data structures. VariableCP instruction
-                                               //and other instruction free 
memory and add to rmVar cache in parallel to
-                                               //the background eviction task, 
which needs to be synchronized.
-                                               freed = true;
+                               while (nextgpuObj!= null) {
+                                       // If not live or live but not dirty
+                                       if (nextgpuObj.isrmVarPending() || 
!nextgpuObj.isDirty()) {
+                                               if (!freed) {
+                                                       
nextgpuObj.clearData(null, true);
+                                                       //FIXME: adding to 
rmVar cache causes multiple failures due to concurrent
+                                                       //access to the rmVar 
cache and other data structures. VariableCP instruction
+                                                       //and other instruction 
free memory and add to rmVar cache in parallel to
+                                                       //the background 
eviction task, which needs to be synchronized.
+                                                       freed = true;
+                                               }
+                                               else
+                                                       
nextgpuObj.clearGPUObject();
                                        }
-                                       else
-                                               nextgpuObj.clearGPUObject();
+                                       nextgpuObj = 
nextgpuObj.nextLineageCachedEntry;
                                }
-                               nextgpuObj = nextgpuObj.nextLineageCachedEntry;
-                       }
                        }
                        // Clear the GPUOjects chain
                        GPUObject currgpuObj = headGpuObj;
diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUObject.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUObject.java
index 291cb07..1c1cb2b 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUObject.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUObject.java
@@ -786,6 +786,23 @@ public class GPUObject {
                setSparseMatrixCudaPointer(tmp);
        }
 
+       void allocateSparseMatrixOnDevice(long numVals) {
+               // This method is called when #values > nnz
+               if(LOG.isTraceEnabled()) {
+                       LOG.trace("GPU : allocateSparseMatrixOnDevice, on " + 
this + ", GPUContext=" + getGPUContext());
+               }
+               if(isAllocated()) 
+                       throw new DMLRuntimeException("Internal error - trying 
to allocated sparse matrix to a GPUObject that is already allocated");
+               long rows = mat.getNumRows();
+               long nnz = mat.getNnz();
+               if(rows <= 0)
+                       throw new DMLRuntimeException("Internal error - invalid 
number of rows when allocating sparse matrix");
+               if(nnz < 0)
+                       throw new DMLRuntimeException("Internal error - invalid 
number of non zeroes when allocating a sparse matrix");
+               CSRPointer tmp = CSRPointer.allocateEmpty(getGPUContext(), 
numVals, rows);
+               setSparseMatrixCudaPointer(tmp);
+       }
+
        public long getSizeOnDevice() {
                long GPUSize = 0;
                long rlen = mat.getNumRows();
@@ -863,7 +880,10 @@ public class GPUObject {
                                values = csrBlock.values();
                        }
 
-                       allocateSparseMatrixOnDevice();
+                       if (values.length > tmp.getNonZeros())
+                               allocateSparseMatrixOnDevice(values.length);
+                       else
+                               allocateSparseMatrixOnDevice();
 
                        if (copyToDevice) {
                                CSRPointer.copyToDevice(getGPUContext(), 
getJcudaSparseMatrixPtr(),
@@ -1037,7 +1057,7 @@ public class GPUObject {
         * @param eager whether to be done synchronously or asynchronously
         * @throws DMLRuntimeException if error occurs
         */
-       public void clearData(String opcode, boolean eager) throws 
DMLRuntimeException {
+       synchronized public void clearData(String opcode, boolean eager) throws 
DMLRuntimeException {
                if (isLineageCached)
                        return;

[systemds] branch master updated: [SYSTEMDS-2947] Fix synchronization issues with GPU evictions

Reply via email to