Repository: systemml Updated Branches: refs/heads/master 1f5b14dda -> fec209306
[SYSTEMML-1795] Specify a set of GPUs to use for a given machine Can specify: a) -1 for all GPUs b) a specific number of GPU c) a comma separated list of GPUs d) a range of GPUs Closes #587 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/fec20930 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/fec20930 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/fec20930 Branch: refs/heads/master Commit: fec209306d3c7e55673872f431d43ceda53b7a6c Parents: 1f5b14d Author: Nakul Jindal <[email protected]> Authored: Fri Jul 21 13:55:03 2017 -0700 Committer: Nakul Jindal <[email protected]> Committed: Fri Jul 21 13:55:04 2017 -0700 ---------------------------------------------------------------------- conf/SystemML-config.xml.template | 4 +- .../apache/sysml/api/ScriptExecutorUtils.java | 5 +- .../java/org/apache/sysml/conf/DMLConfig.java | 4 +- .../gpu/context/GPUContextPool.java | 90 +++++++++++++++++--- .../org/apache/sysml/test/unit/UtilsTest.java | 78 +++++++++++++++++ 5 files changed, 160 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/conf/SystemML-config.xml.template ---------------------------------------------------------------------- diff --git a/conf/SystemML-config.xml.template b/conf/SystemML-config.xml.template index 11e86ed..8608a9c 100644 --- a/conf/SystemML-config.xml.template +++ b/conf/SystemML-config.xml.template @@ -78,6 +78,6 @@ <!-- prints extra statistics information for Deep Neural Networks done in CP mode --> <systemml.stats.extraDNN>false</systemml.stats.extraDNN> - <!-- sets the maximum number of GPUs per process, -1 for all GPUs --> - <systemml.gpu.perProcessMax>-1</systemml.gpu.perProcessMax> + <!-- sets the GPUs to use per process, -1 for all GPUs, a specific GPU number (5), a range (eg: 0-2) or a comma separated list (eg: 0,2,4)--> + <systemml.gpu.availableGPUs>-1</systemml.gpu.availableGPUs> </root> http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java index 0e0950e..b094c91 100644 --- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java +++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java @@ -78,9 +78,8 @@ public class ScriptExecutorUtils { LibMatrixDNN.DISPLAY_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_DNN_STATS); DMLScript.FINEGRAINED_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_FINEGRAINED_STATS); - // Sets the maximum number of GPUs per process, -1 for all available - // GPUs - GPUContextPool.PER_PROCESS_MAX_GPUS = dmlconf.getIntValue(DMLConfig.MAX_GPUS_PER_PROCESS); + // Sets the GPUs to use for this process (a range, all GPUs, comma separated list or a specific GPU) + GPUContextPool.AVAILABLE_GPUS = dmlconf.getTextValue(DMLConfig.AVAILABLE_GPUS); Statistics.startRunTimer(); try { // run execute (w/ exception handling to ensure proper shutdown) http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/conf/DMLConfig.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java b/src/main/java/org/apache/sysml/conf/DMLConfig.java index c248098..a6a4b5e 100644 --- a/src/main/java/org/apache/sysml/conf/DMLConfig.java +++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java @@ -78,7 +78,7 @@ public class DMLConfig public static final String EXTRA_FINEGRAINED_STATS = "systemml.stats.finegrained"; //boolean public static final String EXTRA_GPU_STATS = "systemml.stats.extraGPU"; //boolean public static final String EXTRA_DNN_STATS = "systemml.stats.extraDNN"; //boolean - public static final String MAX_GPUS_PER_PROCESS = "systemml.gpu.perProcessMax"; // boolean, maximum number of gpus to use, -1 for all + public static final String AVAILABLE_GPUS = "systemml.gpu.availableGPUs"; // String to specify which GPUs to use (a range, all GPUs, comma separated list or a specific GPU) // Fraction of available memory to use. The available memory is computer when the GPUContext is created // to handle the tradeoff on calling cudaMemGetInfo too often. @@ -123,7 +123,7 @@ public class DMLConfig _defaultVals.put(EXTRA_DNN_STATS, "false" ); _defaultVals.put(GPU_MEMORY_UTILIZATION_FACTOR, "0.9" ); - _defaultVals.put(MAX_GPUS_PER_PROCESS, "-1"); + _defaultVals.put(AVAILABLE_GPUS, "-1"); } public DMLConfig() http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java index ef38da8..88bf403 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java @@ -22,6 +22,7 @@ import static jcuda.driver.JCudaDriver.cuDeviceGetCount; import static jcuda.driver.JCudaDriver.cuInit; import static jcuda.runtime.JCuda.cudaGetDeviceProperties; +import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -42,9 +43,9 @@ public class GPUContextPool { protected static final Log LOG = LogFactory.getLog(GPUContextPool.class.getName()); /** - * Maximum number of gpus to use, -1 for all + * GPUs to use, can specify -1 to use all, comma separated list of GPU numbers, a specific GPU or a range */ - public static int PER_PROCESS_MAX_GPUS = -1; + public static String AVAILABLE_GPUS; private static long INITIAL_GPU_MEMORY_BUDGET = -1; @@ -98,22 +99,32 @@ public class GPUContextPool { deviceCount = deviceCountArray[0]; deviceProperties = new cudaDeviceProp[deviceCount]; - if (PER_PROCESS_MAX_GPUS > 0) - deviceCount = Math.min(PER_PROCESS_MAX_GPUS, deviceCount); + try { + ArrayList<Integer> listOfGPUs = parseListString(AVAILABLE_GPUS, deviceCount); - // Initialize the list of devices - for (int i = 0; i < deviceCount; i++) { - cudaDeviceProp properties = new cudaDeviceProp(); - cudaGetDeviceProperties(properties, i); - deviceProperties[i] = properties; - } + // Initialize the list of devices & the pool of GPUContexts + for (int i : listOfGPUs) { + cudaDeviceProp properties = new cudaDeviceProp(); + cudaGetDeviceProperties(properties, i); + deviceProperties[i] = properties; + GPUContext gCtx = new GPUContext(i); + pool.add(gCtx); + } + + } catch (IllegalArgumentException e) { + LOG.warn("Invalid setting for setting systemml.gpu.availableGPUs, defaulting to use ALL GPUs"); - // Initialize the pool of GPUContexts - for (int i = 0; i < deviceCount; i++) { - GPUContext gCtx = new GPUContext(i); - pool.add(gCtx); + // Initialize the list of devices & the pool of GPUContexts + for (int i = 0; i < deviceCount; i++) { + cudaDeviceProp properties = new cudaDeviceProp(); + cudaGetDeviceProperties(properties, i); + deviceProperties[i] = properties; + GPUContext gCtx = new GPUContext(i); + pool.add(gCtx); + } } + // Initialize the initial memory budget // If there are heterogeneous GPUs on the machine (different memory sizes) // initially available memory is set to the GPU with the lowest memory @@ -128,6 +139,7 @@ public class GPUContextPool { GPUContext.LOG.info("Total number of GPUs on the machine: " + deviceCount); + GPUContext.LOG.info("GPUs being used: " + AVAILABLE_GPUS); GPUContext.LOG.info("Initial GPU memory: " + initialGPUMemBudget()); //int[] device = {-1}; @@ -142,6 +154,56 @@ public class GPUContextPool { } /** + * Parses a string into a list. The string can be of these forms: + * 1. "-1" : all integers from range 0 to max - [0,1,2,3....max] + * 2. "2,3,0" : comma separated list of integers - [0,2,3] + * 3. "4" : a specific integer - [4] + * 4. "0-4" : a range of integers - [0,1,2,3,4] + * In ranges and comma separated lists, all values must be positive. Anything else is invalid. + * @param str input string + * @param max maximum range of integers + * @return the list of integers in the parsed string + */ + public static ArrayList<Integer> parseListString(String str, int max) { + ArrayList<Integer> result = new ArrayList<>(); + str = str.trim(); + if (str == "-1") { // all + for (int i=0; i<max; i++){ + result.add(i); + } + } else if (str.contains("-")){ // range + String[] numbersStr = str.split("-"); + if (numbersStr.length != 2) { + throw new IllegalArgumentException("Invalid string to parse to a list of numbers : " + str); + } + String beginStr = numbersStr[0]; + String endStr = numbersStr[1]; + int begin = Integer.parseInt(beginStr); + int end = Integer.parseInt(endStr); + + for (int i=begin; i<=end; i++){ + result.add(i); + } + } else if (str.contains(",")) { // comma separated list + String[] numbers = str.split(","); + for (int i = 0; i < numbers.length; i++) { + int n = Integer.parseInt(numbers[i].trim()); + result.add(n); + } + } else { // single number + int number = Integer.parseInt(str); + result.add(number); + } + // Check if all numbers between 0 and max + for (int n : result){ + if (n < 0 || n >= max) { + throw new IllegalArgumentException("Invalid string (" + str + ") parsed to a list of numbers (" + result + ") which exceeds the maximum range : "); + } + } + return result; + } + + /** * Reserves and gets an initialized list of GPUContexts * * @return null if no GPUContexts in pool, otherwise a valid list of GPUContext http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/test/java/org/apache/sysml/test/unit/UtilsTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/unit/UtilsTest.java b/src/test/java/org/apache/sysml/test/unit/UtilsTest.java new file mode 100644 index 0000000..b5dccd0 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/unit/UtilsTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.unit; + + +import java.util.Arrays; + +import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool; +import org.junit.Assert; +import org.junit.Test; + +/** + * To test utility functions scattered throughout the codebase + */ +public class UtilsTest { + + @Test + public void testParseListString0() { + Assert.assertEquals(Arrays.asList(0), GPUContextPool.parseListString("0", 10)); + } + + @Test + public void testParseListString1() { + Assert.assertEquals(Arrays.asList(7), GPUContextPool.parseListString("7", 10)); + } + + @Test + public void testParseListString2() { + Assert.assertEquals(Arrays.asList(0,1,2,3), GPUContextPool.parseListString("-1", 4)); + } + + @Test + public void testParseListString3() { + Assert.assertEquals(Arrays.asList(0,1,2,3), GPUContextPool.parseListString("0,1,2,3", 6)); + } + + @Test + public void testParseListString4() { + Assert.assertEquals(Arrays.asList(0,1,2,3), GPUContextPool.parseListString("0-3", 6)); + } + + @Test(expected=IllegalArgumentException.class) + public void testParseListStringFail0() { + GPUContextPool.parseListString("7", 4); + } + + @Test(expected=IllegalArgumentException.class) + public void testParseListStringFail1() { + GPUContextPool.parseListString("0,1,2,3", 2); + } + + @Test(expected=IllegalArgumentException.class) + public void testParseListStringFail2() { + GPUContextPool.parseListString("0,1,2,3-4", 2); + } + + @Test(expected=IllegalArgumentException.class) + public void testParseListStringFail4() { + GPUContextPool.parseListString("-1-4", 6); + } +}
