Repository: incubator-systemml Updated Branches: refs/heads/master 6f4d8762d -> 6b1572e4b
[SYSTEMML-942] added gpu option to MLContext API Additionally, - Changed initialization of CUDA libraries from static to per instance - Added documentation to mlcontext programming guide Closes #420 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/6b1572e4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/6b1572e4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/6b1572e4 Branch: refs/heads/master Commit: 6b1572e4bba31619c5bed19fd0c106d2e759f159 Parents: 6f4d876 Author: Nakul Jindal <[email protected]> Authored: Tue Mar 7 13:41:03 2017 -0800 Committer: Nakul Jindal <[email protected]> Committed: Tue Mar 7 13:41:03 2017 -0800 ---------------------------------------------------------------------- docs/spark-mlcontext-programming-guide.md | 90 ++++++++++++++++++++ .../apache/sysml/api/mlcontext/MLContext.java | 25 ++++++ .../sysml/api/mlcontext/ScriptExecutor.java | 26 +++++- .../instructions/gpu/context/GPUContext.java | 2 +- .../instructions/gpu/context/JCudaContext.java | 42 ++++----- 5 files changed, 163 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6b1572e4/docs/spark-mlcontext-programming-guide.md ---------------------------------------------------------------------- diff --git a/docs/spark-mlcontext-programming-guide.md b/docs/spark-mlcontext-programming-guide.md index c15c27f..c28eaf5 100644 --- a/docs/spark-mlcontext-programming-guide.md +++ b/docs/spark-mlcontext-programming-guide.md @@ -1086,6 +1086,96 @@ mean: Double = 0.5002109404821844 </div> +## GPU + +If the driver node has a GPU, SystemML may be able to utilize it, subject to memory constraints and what instructions are used in the dml script + +<div class="codetabs"> + +<div data-lang="Scala" markdown="1"> +{% highlight scala %} +ml.setGPU(true) +ml.setStatistics(true) +val matMultScript = dml(""" +A = rand(rows=10, cols=1000) +B = rand(rows=1000, cols=10) +C = A %*% B +print(toString(C)) +""") +ml.execute(matMultScript) +{% endhighlight %} +</div> + +<div data-lang="Spark Shell" markdown="1"> +{% highlight scala %} +scala> ml.setGPU(true) + +scala> ml.setStatistics(true) + +scala> val matMultScript = dml(""" + | A = rand(rows=10, cols=1000) + | B = rand(rows=1000, cols=10) + | C = A %*% B + | print(toString(C)) + | """) +matMultScript: org.apache.sysml.api.mlcontext.Script = +Inputs: +None + +Outputs: +None + +scala> ml.execute(matMultScript) +249.977 238.545 233.700 234.489 248.556 244.423 249.051 255.043 249.117 251.605 +249.226 248.680 245.532 238.258 254.451 249.827 260.957 251.273 250.577 257.571 +258.703 246.969 243.463 246.547 250.784 251.758 251.654 258.318 251.817 254.097 +248.788 242.960 230.920 244.026 249.159 247.998 251.330 254.718 248.013 255.706 +253.251 248.788 235.785 242.941 252.096 248.675 256.865 251.677 252.872 250.490 +256.087 245.035 234.124 238.307 248.630 252.522 251.122 251.577 249.171 247.974 +245.419 243.114 232.262 239.776 249.583 242.351 250.972 249.244 246.729 251.807 +250.081 242.367 230.334 240.955 248.332 240.730 246.940 250.396 244.107 249.729 +247.368 239.882 234.353 237.087 252.337 248.801 246.627 249.077 244.305 245.621 +252.827 257.352 239.546 246.529 258.916 255.612 260.480 254.805 252.695 257.531 + +SystemML Statistics: +Total elapsed time: 0.000 sec. +Total compilation time: 0.000 sec. +Total execution time: 0.000 sec. +Number of compiled Spark inst: 0. +Number of executed Spark inst: 0. +CUDA/CuLibraries init time: 0.000/0.003 sec. +Number of executed GPU inst: 8. +GPU mem tx time (alloc/dealloc/toDev/fromDev): 0.003/0.002/0.010/0.002 sec. +GPU mem tx count (alloc/dealloc/toDev/fromDev/evict): 24/24/0/16/8/0. +GPU conversion time (sparseConv/sp2dense/dense2sp): 0.000/0.000/0.000 sec. +GPU conversion count (sparseConv/sp2dense/dense2sp): 0/0/0. +Cache hits (Mem, WB, FS, HDFS): 40/0/0/0. +Cache writes (WB, FS, HDFS): 21/0/0. +Cache times (ACQr/m, RLS, EXP): 0.002/0.002/0.003/0.000 sec. +HOP DAGs recompiled (PRED, SB): 0/0. +HOP DAGs recompile time: 0.000 sec. +Spark ctx create time (lazy): 0.000 sec. +Spark trans counts (par,bc,col):0/0/0. +Spark trans times (par,bc,col): 0.000/0.000/0.000 secs. +Total JIT compile time: 11.426 sec. +Total JVM GC count: 20. +Total JVM GC time: 1.078 sec. +Heavy hitter instructions (name, time, count): +-- 1) toString 0.085 sec 8 +-- 2) rand 0.027 sec 16 +-- 3) gpu_ba+* 0.018 sec 8 +-- 4) print 0.006 sec 8 +-- 5) createvar 0.003 sec 24 +-- 6) rmvar 0.003 sec 40 + +res20: org.apache.sysml.api.mlcontext.MLResults = +None +{% endhighlight %} +</div> + +</div> + +Note that GPU instructions show up prepended with a "gpu" in the statistics. ## Explain http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6b1572e4/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java index 3fe4dd0..4ef31c5 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java @@ -99,6 +99,11 @@ public class MLContext { private boolean statistics = false; /** + * Whether or not GPU mode should be enabled + */ + private boolean gpu = false; + + /** * The number of heavy hitters that are printed as part of the statistics * option */ @@ -274,6 +279,7 @@ public class MLContext { ScriptExecutor scriptExecutor = new ScriptExecutor(); scriptExecutor.setExplain(explain); scriptExecutor.setExplainLevel(explainLevel); + scriptExecutor.setGPU(gpu); scriptExecutor.setStatistics(statistics); scriptExecutor.setStatisticsMaxHeavyHitters(statisticsMaxHeavyHitters); scriptExecutor.setInit(scriptHistoryStrings.isEmpty()); @@ -412,6 +418,25 @@ public class MLContext { } /** + * Whether or not to use (an available) GPU on the driver node. + * If a GPU is not available, and the GPU mode is set, SystemML will crash when the program is run. + * @param enable + * true if needs to be enabled, false otherwise + */ + public void setGPU(boolean enable) { + this.gpu = true; + } + + /** + * Whether or not the GPU mode is enabled. + * @return true if enabled, false otherwise + */ + public boolean isGPU() { + return this.gpu; + } + + + /** * Used internally by MLContextProxy. * */ http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6b1572e4/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java index 5ee8622..069abd3 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java @@ -46,6 +46,7 @@ import org.apache.sysml.runtime.controlprogram.LocalVariableMap; import org.apache.sysml.runtime.controlprogram.Program; import org.apache.sysml.runtime.controlprogram.context.ExecutionContext; import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory; +import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; import org.apache.sysml.utils.Explain; import org.apache.sysml.utils.Explain.ExplainCounts; import org.apache.sysml.utils.Explain.ExplainType; @@ -114,6 +115,7 @@ public class ScriptExecutor { protected Script script; protected boolean init = false; protected boolean explain = false; + protected boolean gpu = false; protected boolean statistics = false; protected ExplainLevel explainLevel; protected int statisticsMaxHeavyHitters = 10; @@ -307,7 +309,7 @@ public class ScriptExecutor { * Sets the script in the ScriptExecutor, checks that the script has a type * and string, sets the ScriptExecutor in the script, sets the script string * in the Spark Monitor, and globally sets the script type. - * + * Also does GPU initialization * @param script * the DML or PYDML script to execute */ @@ -317,6 +319,12 @@ public class ScriptExecutor { script.setScriptExecutor(this); // Set global variable indicating the script type DMLScript.SCRIPT_TYPE = script.getScriptType(); + try { + if (gpu) + GPUContext.getGPUContext(); + } catch (DMLRuntimeException e) { + throw new MLContextException("Exception occurred during initialization of GPU", e); + } } /** @@ -324,6 +332,12 @@ public class ScriptExecutor { */ protected void cleanupAfterExecution() { restoreInputsInSymbolTable(); + try { + if (gpu) + executionContext.destroyGPUContext(); + } catch (DMLRuntimeException e) { + throw new MLContextException("Exception occurred during cleanup of GPU related resources", e); + } } /** @@ -632,4 +646,14 @@ public class ScriptExecutor { } } + /** + * Whether or not to enable GPU usage + * @param enabled + * true if enabled, false otherwise + */ + public void setGPU(boolean enabled) { + this.gpu = enabled; + DMLScript.USE_ACCELERATOR = enabled; + } + } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6b1572e4/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java index 86a891e..a7c0ab8 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java @@ -41,7 +41,7 @@ public abstract class GPUContext { public static ConcurrentLinkedQueue<Future> pendingDeallocates = new ConcurrentLinkedQueue<Future>(); /** All asynchronous cudaFree calls will be done on this executor service */ - public static ExecutorService deallocExecutorService = Executors.newSingleThreadExecutor(); + public static ExecutorService deallocExecutorService; /** Synchronization object to make sure no allocations happen when something is being evicted from memory */ public static final Object syncObj = new Object(); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6b1572e4/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaContext.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaContext.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaContext.java index 6911cca..b743a3d 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaContext.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaContext.java @@ -18,6 +18,7 @@ */ package org.apache.sysml.runtime.instructions.gpu.context; +import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.logging.Log; @@ -104,27 +105,8 @@ public class JCudaContext extends GPUContext { LOG.info("Active CUDA device number : " + device[0]); LOG.info("Max Blocks/Threads/SharedMem : " + maxBlocks + "/" + maxThreadsPerBlock + "/" + sharedMemPerBlock); - GPUStatistics.cudaInitTime = System.nanoTime() - start; - - start = System.nanoTime(); - LibMatrixCUDA.cudnnHandle = new cudnnHandle(); - cudnnCreate(LibMatrixCUDA.cudnnHandle); - LibMatrixCUDA.cublasHandle = new cublasHandle(); - cublasCreate(LibMatrixCUDA.cublasHandle); - // For cublas v2, cublasSetPointerMode tells Cublas whether to expect scalar arguments on device or on host - // This applies to arguments like "alpha" in Dgemm, and "y" in Ddot. - // cublasSetPointerMode(LibMatrixCUDA.cublasHandle, cublasPointerMode.CUBLAS_POINTER_MODE_DEVICE); - LibMatrixCUDA.cusparseHandle = new cusparseHandle(); - cusparseCreate(LibMatrixCUDA.cusparseHandle); - GPUStatistics.cudaLibrariesInitTime = System.nanoTime() - start; - - try { - LibMatrixCUDA.kernels = new JCudaKernels(); - } catch (DMLRuntimeException e) { - System.err.println("ERROR - Unable to initialize JCudaKernels. System in an inconsistent state"); - LibMatrixCUDA.kernels = null; - } + GPUStatistics.cudaInitTime = System.nanoTime() - start; } @Override @@ -268,6 +250,26 @@ public class JCudaContext extends GPUContext { LOG.info("Total GPU memory: " + (totalNumBytes*(1e-6)) + " MB"); LOG.info("Available GPU memory: " + (deviceMemBytes.get()*(1e-6)) + " MB"); + long start = System.nanoTime(); + LibMatrixCUDA.cudnnHandle = new cudnnHandle(); + cudnnCreate(LibMatrixCUDA.cudnnHandle); + LibMatrixCUDA.cublasHandle = new cublasHandle(); + cublasCreate(LibMatrixCUDA.cublasHandle); + // For cublas v2, cublasSetPointerMode tells Cublas whether to expect scalar arguments on device or on host + // This applies to arguments like "alpha" in Dgemm, and "y" in Ddot. + // cublasSetPointerMode(LibMatrixCUDA.cublasHandle, cublasPointerMode.CUBLAS_POINTER_MODE_DEVICE); + LibMatrixCUDA.cusparseHandle = new cusparseHandle(); + cusparseCreate(LibMatrixCUDA.cusparseHandle); + try { + LibMatrixCUDA.kernels = new JCudaKernels(); + } catch (DMLRuntimeException e) { + System.err.println("ERROR - Unable to initialize JCudaKernels. System in an inconsistent state"); + LibMatrixCUDA.kernels = null; + } + GPUStatistics.cudaLibrariesInitTime = System.nanoTime() - start; + + GPUContext.deallocExecutorService = Executors.newSingleThreadExecutor(); + } @Override
