incubator-systemml git commit: [HOTFIX] Disabling GPU fused relu & maxpooling operator because of bug
Repository: incubator-systemml Updated Branches: refs/heads/master 0490fec93 -> 5baac2d62 [HOTFIX] Disabling GPU fused relu & maxpooling operator because of bug - Fixed the timer that counts the number of times memory chunks are zero-ed out - Some minor code refactoring Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5baac2d6 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5baac2d6 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5baac2d6 Branch: refs/heads/master Commit: 5baac2d62f64026ff82b9d674b909bc4b80800b0 Parents: 0490fec Author: Nakul Jindal Authored: Mon Mar 13 15:40:08 2017 -0700 Committer: Nakul Jindal Committed: Wed Mar 15 15:31:17 2017 -0700 -- .../org/apache/sysml/hops/ConvolutionOp.java| 12 +++ .../gpu/ConvolutionGPUInstruction.java | 9 +++-- .../instructions/gpu/context/JCudaObject.java | 2 +- .../runtime/matrix/data/LibMatrixCUDA.java | 21 ++-- 4 files changed, 26 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5baac2d6/src/main/java/org/apache/sysml/hops/ConvolutionOp.java -- diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java index 943ff96..9483b2c 100644 --- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java +++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java @@ -19,15 +19,13 @@ package org.apache.sysml.hops; -import java.util.ArrayList; - import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.hops.Hop.MultiThreadedHop; import org.apache.sysml.lops.ConvolutionTransform; import org.apache.sysml.lops.ConvolutionTransform.OperationTypes; import org.apache.sysml.lops.Lop; -import org.apache.sysml.lops.LopsException; import org.apache.sysml.lops.LopProperties.ExecType; +import org.apache.sysml.lops.LopsException; import org.apache.sysml.lops.ReBlock; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; @@ -35,6 +33,8 @@ import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.data.ConvolutionParameters; +import java.util.ArrayList; + public class ConvolutionOp extends Hop implements MultiThreadedHop { private Hop.ConvOp op; @@ -179,7 +179,11 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop ArrayList inputs1 = inputs; int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); OperationTypes lopOp = HopsConv2Lops.get(op); - if(op == ConvOp.MAX_POOLING && isInputReLU(inputs.get(0))) { + + // The fused relu_maxpooling is being disabled for now on the GPU + // There is a bug in LibMatrixCUDA#reluMaxpooling + // which we need to understand before enabling this by removing the "et != ExecType.GPU" guard. + if(op == ConvOp.MAX_POOLING && isInputReLU(inputs.get(0)) && et != ExecType.GPU) { in = inputs.get(0).getInput().get(0).constructLops(); lopOp = OperationTypes.RELU_MAX_POOLING; } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5baac2d6/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java index daf3c58..7460d6b 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java @@ -18,8 +18,6 @@ */ package org.apache.sysml.runtime.instructions.gpu; -import java.util.ArrayList; - import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; import org.apache.sysml.runtime.controlprogram.context.ExecutionContext; @@ -31,6 +29,8 @@ import org.apache.sysml.runtime.matrix.operators.ReorgOperator; import org.apache.sysml.runtime.util.ConvolutionUtils; import org.apache.sysml.utils.GPUStatistics; +import java.util.ArrayList; + public class ConvolutionGPUInstruction extends GPUInstruction { private CPOperand _input1; @@ -337,8 +337,13 @@ public class ConvolutionGPUInstruction extends GPUInstruction // rele
incubator-systemml git commit: [HOTFIX] Changed unit test LRUCacheMapTest to run only with mvn verify
Repository: incubator-systemml Updated Branches: refs/heads/master 95be80c5b -> 97da0004f [HOTFIX] Changed unit test LRUCacheMapTest to run only with mvn verify Closes #436 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/97da0004 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/97da0004 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/97da0004 Branch: refs/heads/master Commit: 97da0004f1423d40d63372e59c0424d28793ef92 Parents: 95be80c Author: Nakul Jindal Authored: Wed Mar 22 12:08:21 2017 -0700 Committer: Nakul Jindal Committed: Wed Mar 22 12:08:21 2017 -0700 -- pom.xml | 1 + .../apache/sysml/test/unit/LRUCacheMapTest.java | 120 +++ .../sysml/test/utils/LRUCacheMapTest.java | 120 --- 3 files changed, 121 insertions(+), 120 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/97da0004/pom.xml -- diff --git a/pom.xml b/pom.xml index 86efe21..656d0a1 100644 --- a/pom.xml +++ b/pom.xml @@ -351,6 +351,7 @@ **/slowtest/** **/integration/** + **/test/unit/** http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/97da0004/src/test/java/org/apache/sysml/test/unit/LRUCacheMapTest.java -- diff --git a/src/test/java/org/apache/sysml/test/unit/LRUCacheMapTest.java b/src/test/java/org/apache/sysml/test/unit/LRUCacheMapTest.java new file mode 100644 index 000..09df5a0 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/unit/LRUCacheMapTest.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sysml.test.unit; + +import org.apache.sysml.utils.LRUCacheMap; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Map; + +public class LRUCacheMapTest { + + @Test + public void test1() throws Exception { +LRUCacheMap m = new LRUCacheMap(); +m.put("k1", 10l); +m.put("k2", 20l); +m.put("k3", 30l); +m.put("k4", 40l); + +Map.Entry e = m.removeAndGetLRUEntry(); +Assert.assertEquals("k1", e.getKey()); + } + + @Test + public void test2() throws Exception { +LRUCacheMap m = new LRUCacheMap(); +m.put("k1", 10l); +m.put("k2", 20l); +m.put("k3", 30l); +m.put("k4", 40l); +m.get("k1"); + +Map.Entry e = m.removeAndGetLRUEntry(); +Assert.assertEquals("k2", e.getKey()); + } + + @Test(expected = IllegalArgumentException.class) + public void test3() { +LRUCacheMap m = new LRUCacheMap(); +m.put(null, 10l); + } + + @Test + public void test4() throws Exception { +LRUCacheMap m = new LRUCacheMap(); +m.put("k1", 10l); +m.put("k2", 20l); +m.put("k3", 30l); +m.put("k4", 40l); +m.remove("k1"); +m.remove("k2"); + +Map.Entry e = m.removeAndGetLRUEntry(); +Assert.assertEquals("k3", e.getKey()); + } + + @Test + public void test5() throws Exception { +LRUCacheMap m = new LRUCacheMap(); +m.put("k1", 10l); +m.put("k2", 20l); +m.put("k1", 30l); + +Map.Entry e = m.removeAndGetLRUEntry(); +Assert.assertEquals("k2", e.getKey()); + } + + @Test + public void test6() throws Exception { +LRUCacheMap m = new LRUCacheMap(); +m.put("k1", 10l); +m.put("k2", 20l); +m.put("k3", 30l); +m.put("k4", 40l); +m.put("k5", 50l); +m.put("k6", 60l); +m.put("k7", 70l); +m.put("k8", 80l); +m.get("k4"); + + +Map.Entry e; +e = m.removeAndGetLRUEntry(); +Assert.assertEquals("k1", e.getKey()); +e = m.removeAndGetLRUEntry(); +Assert.assertEquals("k2", e.getKey()); +e = m.remo
[1/3] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript
Repository: incubator-systemml Updated Branches: refs/heads/master ee6bc8ce2 -> 346d1c01a http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/src/test/java/org/apache/sysml/test/unit/CLIOptionsParserTest.java -- diff --git a/src/test/java/org/apache/sysml/test/unit/CLIOptionsParserTest.java b/src/test/java/org/apache/sysml/test/unit/CLIOptionsParserTest.java new file mode 100644 index 000..d780db4 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/unit/CLIOptionsParserTest.java @@ -0,0 +1,419 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.unit; + +import org.apache.commons.cli.AlreadySelectedException; +import org.apache.commons.cli.MissingOptionException; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.sysml.api.DMLScript; +import org.apache.sysml.api.ScriptType; +import org.apache.sysml.utils.Explain; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Map; + + +public class CLIOptionsParserTest { + + @Test(expected = MissingOptionException.class) + public void testNoOptions() throws Exception { +String cl = "systemml"; +String[] args = cl.split(" "); +Options options = DMLScript.createCLIOptions(); +DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options); + } + + @Test + public void testFile() throws Exception { +String cl = "systemml -f test.dml"; +String[] args = cl.split(" "); +Options options = DMLScript.createCLIOptions(); +DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options); +Assert.assertEquals("test.dml", o.filePath); +Assert.assertEquals(ScriptType.DML, o.scriptType); + + } + + @Test + public void testScript() throws Exception { +String cl = "systemml -s \"print('hello')\""; +String[] args = cl.split(" "); +Options options = DMLScript.createCLIOptions(); +DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options); +Assert.assertEquals("print('hello')", o.script); + } + + @Test + public void testConfig() throws Exception { +String cl = "systemml -s \"print('hello')\" -config SystemML-config.xml"; +String[] args = cl.split(" "); +Options options = DMLScript.createCLIOptions(); +DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options); +Assert.assertEquals("print('hello')", o.script); +Assert.assertEquals("SystemML-config.xml", o.configFile); + } + + @Test + public void testDebug() throws Exception { +String cl = "systemml -s \"print('hello')\" -debug"; +String[] args = cl.split(" "); +Options options = DMLScript.createCLIOptions(); +DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options); +Assert.assertEquals("print('hello')", o.script); +Assert.assertEquals(true, o.debug); + } + + @Test + public void testClean() throws Exception { +String cl = "systemml -clean"; +String[] args = cl.split(" "); +Options options = DMLScript.createCLIOptions(); +DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options); +Assert.assertEquals(true, o.clean); + } + + @Test(expected = AlreadySelectedException.class) + public void testBadClean() throws Exception { +String cl = "systemml -clean -f test.dml"; +String[] args = cl.split(" "); +Options options = DMLScript.createCLIOptions(); +DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options); + } + + @Test(expected = AlreadySelectedException.class) + public void testBadScript() throws Exception { +String cl = "systemml -f test.dml -s \"print('hello')\""; +String[] args = cl.split(" "); +Options options = DMLScript.createCLIOptions(); +DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options); + } + + @Test + public void testStats() throws Exception { +String cl = "systemml -f test.dml -stats"; +String[] args = cl.split(" "); +Options options = DMLScript.createCLIOptions(); +DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options); +Assert.assertEquals(true, o.stats); +Assert.assertEquals(10, o.sta
[2/3] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/src/main/java/org/apache/sysml/api/DMLScript.java -- diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java index c04c321..175688a 100644 --- a/src/main/java/org/apache/sysml/api/DMLScript.java +++ b/src/main/java/org/apache/sysml/api/DMLScript.java @@ -19,22 +19,15 @@ package org.apache.sysml.api; -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.FileReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.URI; -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Scanner; - +import org.apache.commons.cli.AlreadySelectedException; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.PosixParser; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -46,7 +39,6 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.log4j.Level; import org.apache.log4j.Logger; -import org.apache.sysml.api.mlcontext.ScriptType; import org.apache.sysml.conf.CompilerConfig; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.conf.DMLConfig; @@ -73,12 +65,12 @@ import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics; import org.apache.sysml.runtime.controlprogram.caching.CacheableData; import org.apache.sysml.runtime.controlprogram.context.ExecutionContext; import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory; -import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; -import org.apache.sysml.runtime.io.IOUtilFunctions; import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext; import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter; import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler; +import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; +import org.apache.sysml.runtime.io.IOUtilFunctions; import org.apache.sysml.runtime.matrix.CleanupMR; import org.apache.sysml.runtime.matrix.data.LibMatrixDNN; import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames; @@ -93,6 +85,22 @@ import org.apache.sysml.utils.Statistics; import org.apache.sysml.yarn.DMLAppMasterUtils; import org.apache.sysml.yarn.DMLYarnClientProxy; +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URI; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Scanner; + public class DMLScript { @@ -103,66 +111,77 @@ public class DMLScript HYBRID_SPARK, // execute matrix operations in CP or Spark SPARK // execute matrix operations in Spark } - - public static RUNTIME_PLATFORM rtplatform = OptimizerUtils.getDefaultExecutionMode(); - public static boolean STATISTICS = false; //default statistics - public static int STATISTICS_COUNT = 10;//default statistics maximum heavy hitter count - public static boolean ENABLE_DEBUG_MODE = false; //default debug mode - public static boolean USE_LOCAL_SPARK_CONFIG = false; //set default local spark configuration - used for local testing - public static String DML_FILE_PATH_ANTLR_PARSER = null; - public static ExplainType EXPLAIN = ExplainType.NONE; //default explain + + /** +* Set of DMLOptions that can be set through the command line +* and {@link org.apache.sysml.api.mlcontext.MLContext} +* The values have been initialized with the default values +* Despite there being a DML and PyDML, this class is named DMLOptions +* to keep it consistent with {@link DMLScript} and {@link DMLOptions} +*/ + public static class DMLOptions { + public Map argVals = new HashMap<>(); // Arguments map containing either named arguments or arguments by position for a DML program +
[3/3] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript
Use Apache Commons CLI to parse command line arguments in DMLScript - Uses Apache Commons CLI 1.2 to parse command line options - Known limitation - strips arguments of leading and trailing double quotes - Changed scripts to accept "-config " instead of "-config=" - Instead of "-gpu force=true", accepts "-gpu force" - Concise description of usage options - Updated bin/systemml script to print usage options when passed the "-help" option - Removed DMLScriptTest{1,2}, lots of test cases added as unit tests as they were test the previous hand-rolled command line parsing - Added unit tests Closes #435 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/346d1c01 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/346d1c01 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/346d1c01 Branch: refs/heads/master Commit: 346d1c01ad94c5b8178b8c9baf7d38e0867805da Parents: ee6bc8c Author: Nakul Jindal Authored: Mon Mar 27 13:35:12 2017 -0700 Committer: Nakul Jindal Committed: Mon Mar 27 13:35:12 2017 -0700 -- bin/systemml| 84 ++- bin/systemml.bat| 4 +- docs/algorithms-classification.md | 44 +- docs/algorithms-clustering.md | 14 +- docs/algorithms-descriptive-statistics.md | 14 +- docs/algorithms-matrix-factorization.md | 18 +- docs/algorithms-regression.md | 36 +- docs/algorithms-survival-analysis.md| 16 +- docs/hadoop-batch-mode.md | 16 +- docs/spark-batch-mode.md| 4 +- docs/standalone-guide.md| 2 +- docs/troubleshooting-guide.md | 2 +- scripts/sparkDML.sh | 2 +- .../java/org/apache/sysml/api/DMLScript.java| 613 --- .../java/org/apache/sysml/api/MLContext.java| 66 +- .../java/org/apache/sysml/api/ScriptType.java | 65 ++ .../org/apache/sysml/api/jmlc/Connection.java | 22 +- .../sysml/api/mlcontext/MLContextUtil.java | 1 + .../org/apache/sysml/api/mlcontext/Script.java | 1 + .../sysml/api/mlcontext/ScriptExecutor.java | 10 +- .../sysml/api/mlcontext/ScriptFactory.java | 1 + .../apache/sysml/api/mlcontext/ScriptType.java | 65 -- .../org/apache/sysml/parser/AParserWrapper.java | 28 +- .../runtime/instructions/cp/BooleanObject.java | 2 +- .../org/apache/sysml/yarn/DMLYarnClient.java| 19 +- src/main/resources/scripts/sparkDML.sh | 2 +- src/main/standalone/runStandaloneSystemML.bat | 4 +- src/main/standalone/runStandaloneSystemML.sh| 2 +- .../test/integration/AutomatedTestBase.java | 3 +- .../functions/dmlscript/DMLScriptTest1.java | 125 .../functions/dmlscript/DMLScriptTest2.java | 151 - .../functions/misc/DataTypeChangeTest.java | 27 +- .../parfor/ParForDependencyAnalysisTest.java| 15 +- .../TransformFrameEncodeDecodeTest.java | 11 +- .../integration/mlcontext/MLContextTest.java| 12 +- .../sysml/test/unit/CLIOptionsParserTest.java | 419 + .../functions/dmlscript/ZPackageSuite.java | 37 -- 37 files changed, 1121 insertions(+), 836 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/bin/systemml -- diff --git a/bin/systemml b/bin/systemml index 0ccee2d..44ab45e 100755 --- a/bin/systemml +++ b/bin/systemml @@ -20,32 +20,22 @@ # #- + # error help print -printUsageExit() +printSimpleUsage() { cat << EOF Usage: $0 [arguments] [-help] --help - Print this usage message and exit +-help - Print detailed help message EOF exit 1 } -#Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m [Custom-Java-Options] -jar StandaloneSystemML.jar -f -exec singlenode -config=SystemML-config.xml [Optional-Arguments]' -while getopts "h:" options; do - case $options in -h ) echo Warning: Help requested. Will exit after usage message -printUsageExit -;; -\? ) echo Warning: Help requested. Will exit after usage message -printUsageExit -;; -* ) echo Error: Unexpected error while processing options - esac -done +#Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m [Custom-Java-Options] -jar StandaloneSystemML.jar -f -exec singlenode -config=SystemML-config.xml [Optional-Arguments]' if [ -z "$1" ] ; then echo "Wrong Usage."; -printUsageExit; +printSimpleUsage fi @@ -98,24 +88,6 @@ then fi -# Peel off first argument so that $@ contains argume
[1/2] incubator-systemml git commit: [SYSTEMML-1431] Throw controlled error when one-dimensional numpy array is passed to SystemML
Repository: incubator-systemml Updated Branches: refs/heads/gh-pages 7407b7001 -> 8a125c75c [SYSTEMML-1431] Throw controlled error when one-dimensional numpy array is passed to SystemML Here is an example pyspark session demonstrating this PR: >>> from mlxtend.data import mnist_data >>> import numpy as np >>> from sklearn.utils import shuffle X, y = mnist_data() from systemml import MLContext, dml ml = MLContext(sc) script = dml('print(sum(X))').input(X=X) ml.execute(script) script = dml('print(sum(X))').input(X=y) ml.execute(script) script = dml('print(sum(X))').input(X=y.reshape(-1, 1)) ml.execute(script)>>> X, y = mnist_data() >>> from systemml import MLContext, dml >>> ml = MLContext(sc) Welcome to Apache SystemML! >>> script = dml('print(sum(X))').input(X=X) >>> ml.execute(script) 1.31267102E8 MLResults >>> script = dml('print(sum(X))').input(X=y) >>> ml.execute(script) ... TypeError: Expected 2-dimensional ndarray, instead passed 1-dimensional ndarray. Hint: If you intend to pass the 1-dimensional ndarray as a column-vector, please reshape it: input_ndarray.reshape(-1, 1) >>> script = dml('print(sum(X))').input(X=y.reshape(-1, 1)) >>> ml.execute(script) 22500.0 Closes #438. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a1d73f80 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a1d73f80 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a1d73f80 Branch: refs/heads/gh-pages Commit: a1d73f805bc6a94e953c0b999269b79fcbb07a16 Parents: 7407b70 Author: Niketan Pansare Authored: Thu Mar 23 11:41:16 2017 -0700 Committer: Niketan Pansare Committed: Thu Mar 23 11:44:33 2017 -0700 -- beginners-guide-python.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a1d73f80/beginners-guide-python.md -- diff --git a/beginners-guide-python.md b/beginners-guide-python.md index ffab09e..24f7151 100644 --- a/beginners-guide-python.md +++ b/beginners-guide-python.md @@ -183,7 +183,7 @@ y_train = diabetes.target[:-20] y_test = diabetes.target[-20:] # Train Linear Regression model X = sml.matrix(X_train) -y = sml.matrix(y_train) +y = sml.matrix(np.matrix(y_train).T) A = X.transpose().dot(X) b = X.transpose().dot(y) beta = sml.solve(A, b).toNumPy()
[2/2] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript
Use Apache Commons CLI to parse command line arguments in DMLScript - Uses Apache Commons CLI 1.2 to parse command line options - Known limitation - strips arguments of leading and trailing double quotes - Changed scripts to accept "-config " instead of "-config=" - Instead of "-gpu force=true", accepts "-gpu force" - Concise description of usage options - Updated bin/systemml script to print usage options when passed the "-help" option - Removed DMLScriptTest{1,2}, lots of test cases added as unit tests as they were test the previous hand-rolled command line parsing - Added unit tests Closes #435 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/8a125c75 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/8a125c75 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/8a125c75 Branch: refs/heads/gh-pages Commit: 8a125c75c5f96ca65288806f36498483c413b04d Parents: a1d73f8 Author: Nakul Jindal Authored: Mon Mar 27 13:35:12 2017 -0700 Committer: Nakul Jindal Committed: Mon Mar 27 13:35:12 2017 -0700 -- algorithms-classification.md | 44 +++ algorithms-clustering.md | 14 +- algorithms-descriptive-statistics.md | 14 +- algorithms-matrix-factorization.md | 18 ++--- algorithms-regression.md | 36 - algorithms-survival-analysis.md | 16 +-- hadoop-batch-mode.md | 16 +-- spark-batch-mode.md | 4 +-- standalone-guide.md | 2 +- troubleshooting-guide.md | 2 +- 10 files changed, 83 insertions(+), 83 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8a125c75/algorithms-classification.md -- diff --git a/algorithms-classification.md b/algorithms-classification.md index 0ee43bf..11bd1da 100644 --- a/algorithms-classification.md +++ b/algorithms-classification.md @@ -165,7 +165,7 @@ val prediction = model.transform(X_test_df) --conf spark.akka.frameSize=128 SystemML.jar -f MultiLogReg.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X= Y= @@ -336,7 +336,7 @@ prediction.show() --conf spark.akka.frameSize=128 SystemML.jar -f MultiLogReg.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -532,7 +532,7 @@ val model = svm.fit(X_train_df) --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X= Y= @@ -579,7 +579,7 @@ val prediction = model.transform(X_test_df) --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X= Y=[file] @@ -661,7 +661,7 @@ using a held-out test set. Note that this is an optional argument. --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/y.mtx @@ -695,7 +695,7 @@ using a held-out test set. Note that this is an optional argument. --conf spark.akka.frameSize=128 SystemML.jar
[incubator-systemml] Git Push Summary [forced push!] [Forced Update!]
Repository: incubator-systemml Updated Branches: refs/heads/master 346d1c01a -> ee6bc8ce2 (forced update)
incubator-systemml git commit: [MINOR] Cleanup of some comments
Repository: incubator-systemml Updated Branches: refs/heads/master ee6bc8ce2 -> af93ca8a4 [MINOR] Cleanup of some comments Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/af93ca8a Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/af93ca8a Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/af93ca8a Branch: refs/heads/master Commit: af93ca8a40befaaba79b0f96b4dbf8b8db85be13 Parents: ee6bc8c Author: Nakul Jindal Authored: Mon Mar 27 14:39:31 2017 -0700 Committer: Nakul Jindal Committed: Mon Mar 27 14:39:31 2017 -0700 -- .../runtime/matrix/data/LibMatrixCUDA.java | 25 1 file changed, 5 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/af93ca8a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java -- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index 8074e3a..23790c4 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -26,15 +26,16 @@ import jcuda.jcublas.cublasFillMode; import jcuda.jcublas.cublasHandle; import jcuda.jcublas.cublasOperation; import jcuda.jcudnn.cudnnActivationDescriptor; +import jcuda.jcudnn.cudnnBatchNormMode; import jcuda.jcudnn.cudnnConvolutionDescriptor; import jcuda.jcudnn.cudnnConvolutionFwdPreference; import jcuda.jcudnn.cudnnFilterDescriptor; import jcuda.jcudnn.cudnnHandle; import jcuda.jcudnn.cudnnPoolingDescriptor; +import jcuda.jcudnn.cudnnStatus; import jcuda.jcudnn.cudnnTensorDescriptor; import jcuda.jcusparse.JCusparse; import jcuda.jcusparse.cusparseHandle; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysml.api.DMLScript; @@ -88,6 +89,9 @@ import org.apache.sysml.utils.Statistics; import static jcuda.jcublas.cublasOperation.CUBLAS_OP_N; import static jcuda.jcublas.cublasOperation.CUBLAS_OP_T; import static jcuda.jcudnn.JCudnn.cudnnActivationForward; +import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationBackward; +import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardInference; +import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardTraining; import static jcuda.jcudnn.JCudnn.cudnnConvolutionBackwardData; import static jcuda.jcudnn.JCudnn.cudnnConvolutionBackwardFilter; import static jcuda.jcudnn.JCudnn.cudnnConvolutionForward; @@ -126,11 +130,6 @@ import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost; import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice; import static org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.allocate; import static org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.cudaFreeHelper; -import jcuda.jcudnn.cudnnBatchNormMode; -import jcuda.jcudnn.cudnnStatus; -import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardInference; -import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardTraining; -import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationBackward; //FIXME move could to respective instructions, this is not a block library public class LibMatrixCUDA { @@ -2518,22 +2517,11 @@ public class LibMatrixCUDA { Pointer A = getDensePointer(out, instName); int rlen = (int) out.getNumRows(); int clen = (int) out.getNumColumns(); -// if(constant == 0) { -// out.getMatrixCharacteristics().setNonZeros(0); -// } -// else { -// out.getMatrixCharacteristics().setNonZeros(rlen*clen); -// } - // dense_matrix_set(double* A, double scalar, int rlen, int clen) - long t0=0; if (GPUStatistics.DISPLAY_STATISTICS) t0 = System.nanoTime(); int size = rlen * clen; kernels.launchKernel("fill", ExecutionConfig.getConfigForSimpleVectorOperations(size), A, constant, size); - // kernels.launchKernel("dense_matrix_set", - // ExecutionConfig.getConfigForSimpleMatrixOperations(rlen, clen), - // A, constant, rlen, clen); if (GPUStatistics.DISPLAY_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_FILL_KERNEL, System.nanoTime() - t0); } @@ -2549,9 +2537,6 @@ public class LibMatrixCUDA { private static void deviceCopy(String
incubator-systemml git commit: Added python script to launch systemml in standalone mode
Repository: incubator-systemml Updated Branches: refs/heads/master ea6e2fe39 -> f73673d59 Added python script to launch systemml in standalone mode Closes #461 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/f73673d5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/f73673d5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/f73673d5 Branch: refs/heads/master Commit: f73673d59383ac947111cb84787cfa4df3ca7344 Parents: ea6e2fe Author: Nakul Jindal Authored: Fri Apr 21 14:25:50 2017 -0700 Committer: Nakul Jindal Committed: Fri Apr 21 14:25:50 2017 -0700 -- bin/systemml-standalone.py | 199 1 file changed, 199 insertions(+) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/f73673d5/bin/systemml-standalone.py -- diff --git a/bin/systemml-standalone.py b/bin/systemml-standalone.py new file mode 100755 index 000..367bcdf --- /dev/null +++ b/bin/systemml-standalone.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python +#- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#- + +import os +import shutil +import sys +from os.path import join, exists + + +# error help print +def print_usage_and_exit(): +this_script = sys.argv[0] +print('Usage: ' + this_script + ' [arguments]') +sys.exit(1) + + +# from http://stackoverflow.com/questions/1724693/find-a-file-in-python +def find_file(name, path): +for root, dirs, files in os.walk(path): +if name in files: +return join(root, name) +return None + + +if len(sys.argv) < 2: +print('Wrong usage') +print_usage_and_exit() + + +# find the systemML root path which contains the bin folder, the script folder and the target folder +# tolerate path with spaces +script_dir = os.path.dirname(os.path.realpath(__file__)) +project_root_dir = os.path.dirname(script_dir) +user_dir = os.getcwd() + +scripts_dir = join(project_root_dir, 'scripts') +build_dir = join(project_root_dir, 'target') +lib_dir = join(build_dir, 'lib') +dml_script_class = join(build_dir, 'classes', 'org', 'apache', 'sysml', 'api', 'DMLScript.class') +hadoop_home = join(lib_dir, 'hadoop') + + +build_err_msg = 'You must build the project before running this script.' +build_dir_err_msg = 'Could not find target directory ' + build_dir + '. ' + build_err_msg + +lib_dir_err_msg = 'Could not find required libraries.' + build_err_msg +dml_script_err_msg = 'Could not find ' + dml_script_class + '. ' + build_err_msg + +# check if the project had been built and the jar files exist +if not(exists(build_dir)): +print(build_dir_err_msg) +sys.exit(1) +if not(exists(lib_dir)): +print(lib_dir_err_msg) +sys.exit(1) +if not(exists(dml_script_class)): +print(dml_script_err_msg) +sys.exit(1) + +print('') + + +# if the present working directory is the project root or bin folder, then use the temp folder as user.dir +if user_dir == project_root_dir or user_dir == join(project_root_dir, 'bin'): +user_dir = join(project_root_dir, 'temp') +print('Output dir: ' + user_dir) + +# if the SystemML-config.xml does not exist, create it from the template +systemml_config_path = join(project_root_dir, 'conf', 'SystemML-config.xml') +systemml_template_config_path = join(project_root_dir, 'conf', 'SystemML-config.xml.template') +if not(exists(systemml_config_path)): +shutil.copyfile(systemml_template_config_path, systemml_config_path) +print('... created ' + systemml_config_path) + +# if the log4j.properties do not exist, create them from the template +log4j_properties_path = join(project_root_dir, 'conf', 'log4j.properties') +log4j_template_properties_path = join(project_root_dir, 'conf', 'log4j.properties.template') +if not(exi
[1/2] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript
Repository: incubator-systemml Updated Branches: refs/heads/master f73673d59 -> 32924dc60 http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/32924dc6/src/main/java/org/apache/sysml/api/MLContext.java -- diff --git a/src/main/java/org/apache/sysml/api/MLContext.java b/src/main/java/org/apache/sysml/api/MLContext.java index 809776a..b3102e9 100644 --- a/src/main/java/org/apache/sysml/api/MLContext.java +++ b/src/main/java/org/apache/sysml/api/MLContext.java @@ -771,7 +771,7 @@ public class MLContext { args[i] = entry.getKey() + "=" + entry.getValue(); i++; } - return compileAndExecuteScript(dmlScriptFilePath, args, true, parsePyDML, configFilePath); + return compileAndExecuteScript(dmlScriptFilePath, args, true, parsePyDML ? ScriptType.PYDML : ScriptType.DML, configFilePath); } /** @@ -785,17 +785,7 @@ public class MLContext { * @throws ParseException if ParseException occurs */ public MLOutput execute(String dmlScriptFilePath, Map namedArgs, String configFilePath) throws IOException, DMLException, ParseException { - String [] args = new String[namedArgs.size()]; - int i = 0; - for(Entry entry : namedArgs.entrySet()) { - if(entry.getValue().trim().isEmpty()) - args[i] = entry.getKey() + "=\"" + entry.getValue() + "\""; - else - args[i] = entry.getKey() + "=" + entry.getValue(); - i++; - } - - return compileAndExecuteScript(dmlScriptFilePath, args, true, false, configFilePath); + return execute(dmlScriptFilePath, namedArgs, false, configFilePath); } /** @@ -1014,7 +1004,7 @@ public class MLContext { * @throws ParseException if ParseException occurs */ public MLOutput execute(String dmlScriptFilePath, String [] args, boolean parsePyDML, String configFilePath) throws IOException, DMLException, ParseException { - return compileAndExecuteScript(dmlScriptFilePath, args, false, parsePyDML, configFilePath); + return compileAndExecuteScript(dmlScriptFilePath, args, false, parsePyDML ? ScriptType.PYDML : ScriptType.DML, configFilePath); } /** @@ -1067,7 +1057,7 @@ public class MLContext { * @throws ParseException if ParseException occurs */ public MLOutput execute(String dmlScriptFilePath, boolean parsePyDML, String configFilePath) throws IOException, DMLException, ParseException { - return compileAndExecuteScript(dmlScriptFilePath, null, false, parsePyDML, configFilePath); + return compileAndExecuteScript(dmlScriptFilePath, null, false, parsePyDML ? ScriptType.PYDML : ScriptType.DML, configFilePath); } /** @@ -1314,7 +1304,7 @@ public class MLContext { public MLOutput executeScript(String dmlScript, boolean isPyDML, String configFilePath) throws IOException, DMLException { - return compileAndExecuteScript(dmlScript, null, false, false, isPyDML, configFilePath); + return compileAndExecuteScript(dmlScript, null, false, false, isPyDML ? ScriptType.PYDML : ScriptType.DML, configFilePath); } /* @@ -1391,7 +1381,7 @@ public class MLContext { args[i] = entry.getKey() + "=" + entry.getValue(); i++; } - return compileAndExecuteScript(dmlScript, args, false, true, isPyDML, configFilePath); + return compileAndExecuteScript(dmlScript, args, false, true, isPyDML ? ScriptType.PYDML : ScriptType.DML, configFilePath); } private void checkIfRegisteringInputAllowed() throws DMLRuntimeException { @@ -1400,26 +1390,29 @@ public class MLContext { } } - private MLOutput compileAndExecuteScript(String dmlScriptFilePath, String [] args, boolean isNamedArgument, boolean isPyDML, String configFilePath) throws IOException, DMLException { - return compileAndExecuteScript(dmlScriptFilePath, args, true, isNamedArgument, isPyDML, configFilePath); + private MLOutput compileAndExecuteScript(String dmlScriptFilePath, String [] args, boolean isNamedArgument, ScriptType scriptType, String configFilePath) throws IOException, DMLException { + return compileAndExecuteScript(dmlScriptFilePath, args, true, isNamedArgument, scriptType, configFilePath); } - + /** * All the execute() methods call this, which after setting appropriate input/output variables * calls _compileAndExecuteScript
[2/2] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript
Use Apache Commons CLI to parse command line arguments in DMLScript - Added unit tests - changed scripts to accept "-config " instead of "-config=" - Removed DMLScriptTest{1,2} - Modified bin/systemml script to print a better help message - Removed extraneous ZPackageSuite for DMLScriptTest{1,2} Closes #440 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/32924dc6 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/32924dc6 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/32924dc6 Branch: refs/heads/master Commit: 32924dc6027df1973b37a8688b7d0cacbdefd4bf Parents: f73673d Author: Nakul Jindal Authored: Fri Apr 21 14:50:56 2017 -0700 Committer: Nakul Jindal Committed: Fri Apr 21 14:50:57 2017 -0700 -- bin/systemml| 84 +-- bin/systemml.bat| 4 +- docs/algorithms-classification.md | 44 +- docs/algorithms-clustering.md | 14 +- docs/algorithms-descriptive-statistics.md | 14 +- docs/algorithms-matrix-factorization.md | 18 +- docs/algorithms-regression.md | 36 +- docs/algorithms-survival-analysis.md| 16 +- docs/hadoop-batch-mode.md | 16 +- docs/spark-batch-mode.md| 4 +- docs/standalone-guide.md| 2 +- docs/troubleshooting-guide.md | 2 +- scripts/sparkDML.sh | 2 +- .../java/org/apache/sysml/api/DMLScript.java| 576 --- .../java/org/apache/sysml/api/MLContext.java| 49 +- .../org/apache/sysml/api/jmlc/Connection.java | 2 +- .../sysml/api/mlcontext/ScriptExecutor.java | 2 +- .../org/apache/sysml/parser/ParserFactory.java | 16 +- .../java/org/apache/sysml/utils/Explain.java| 32 +- .../org/apache/sysml/yarn/DMLYarnClient.java| 3 +- src/main/resources/scripts/sparkDML.sh | 2 +- src/main/standalone/runStandaloneSystemML.bat | 4 +- src/main/standalone/runStandaloneSystemML.sh| 2 +- .../test/integration/AutomatedTestBase.java | 19 +- .../functions/dmlscript/DMLScriptTest1.java | 125 .../functions/dmlscript/DMLScriptTest2.java | 151 - .../functions/misc/DataTypeChangeTest.java | 14 +- .../parfor/ParForDependencyAnalysisTest.java| 2 +- .../TransformFrameEncodeDecodeTest.java | 15 +- .../sysml/test/unit/CLIOptionsParserTest.java | 415 + .../functions/dmlscript/ZPackageSuite.java | 37 -- 31 files changed, 992 insertions(+), 730 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/32924dc6/bin/systemml -- diff --git a/bin/systemml b/bin/systemml index 0ccee2d..44ab45e 100755 --- a/bin/systemml +++ b/bin/systemml @@ -20,32 +20,22 @@ # #- + # error help print -printUsageExit() +printSimpleUsage() { cat << EOF Usage: $0 [arguments] [-help] --help - Print this usage message and exit +-help - Print detailed help message EOF exit 1 } -#Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m [Custom-Java-Options] -jar StandaloneSystemML.jar -f -exec singlenode -config=SystemML-config.xml [Optional-Arguments]' -while getopts "h:" options; do - case $options in -h ) echo Warning: Help requested. Will exit after usage message -printUsageExit -;; -\? ) echo Warning: Help requested. Will exit after usage message -printUsageExit -;; -* ) echo Error: Unexpected error while processing options - esac -done +#Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m [Custom-Java-Options] -jar StandaloneSystemML.jar -f -exec singlenode -config=SystemML-config.xml [Optional-Arguments]' if [ -z "$1" ] ; then echo "Wrong Usage."; -printUsageExit; +printSimpleUsage fi @@ -98,24 +88,6 @@ then fi -# Peel off first argument so that $@ contains arguments to DML script -SCRIPT_FILE=$1 -shift - -# if the script file path was omitted, try to complete the script path -if [ ! -f "$SCRIPT_FILE" ] -then - SCRIPT_FILE_NAME=$(basename $SCRIPT_FILE) - SCRIPT_FILE_FOUND=$(find "$PROJECT_ROOT_DIR/scripts" -name "$SCRIPT_FILE_NAME") - if [ ! "$SCRIPT_FILE_FOUND" ] - then -echo "Could not find DML script: $SCRIPT_FILE" -printUsageExit; - else -SCRIPT_FILE=$SCRIPT_FILE_FOUND -echo "DML script: $SCRIPT_FILE" - fi -fi # add hadoop libraries which were generated by the build to the classpath @@ -149,13 +121,57 @@ if [ -f "${PROJECT_ROOT_DIR}/conf/systemml-env.sh" ]; then
incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript
Repository: incubator-systemml Updated Branches: refs/heads/gh-pages c4918f5b6 -> c5ff65305 Use Apache Commons CLI to parse command line arguments in DMLScript - Added unit tests - changed scripts to accept "-config " instead of "-config=" - Removed DMLScriptTest{1,2} - Modified bin/systemml script to print a better help message - Removed extraneous ZPackageSuite for DMLScriptTest{1,2} Closes #440 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c5ff6530 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c5ff6530 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c5ff6530 Branch: refs/heads/gh-pages Commit: c5ff65305a5baeec20aec25bd6fa9ab73b1e1990 Parents: c4918f5 Author: Nakul Jindal Authored: Fri Apr 21 14:50:56 2017 -0700 Committer: Nakul Jindal Committed: Fri Apr 21 14:50:57 2017 -0700 -- algorithms-classification.md | 44 +++ algorithms-clustering.md | 14 +- algorithms-descriptive-statistics.md | 14 +- algorithms-matrix-factorization.md | 18 ++--- algorithms-regression.md | 36 - algorithms-survival-analysis.md | 16 +-- hadoop-batch-mode.md | 16 +-- spark-batch-mode.md | 4 +-- standalone-guide.md | 2 +- troubleshooting-guide.md | 2 +- 10 files changed, 83 insertions(+), 83 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c5ff6530/algorithms-classification.md -- diff --git a/algorithms-classification.md b/algorithms-classification.md index b029e0a..ed56c34 100644 --- a/algorithms-classification.md +++ b/algorithms-classification.md @@ -165,7 +165,7 @@ val prediction = model.transform(X_test_df) --conf spark.akka.frameSize=128 SystemML.jar -f MultiLogReg.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X= Y= @@ -336,7 +336,7 @@ prediction.show() --conf spark.akka.frameSize=128 SystemML.jar -f MultiLogReg.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/Y.mtx @@ -532,7 +532,7 @@ val model = svm.fit(X_train_df) --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X= Y= @@ -579,7 +579,7 @@ val prediction = model.transform(X_test_df) --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X= Y=[file] @@ -661,7 +661,7 @@ using a held-out test set. Note that this is an optional argument. --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid_spark -nvargs X=/user/ml/X.mtx Y=/user/ml/y.mtx @@ -695,7 +695,7 @@ using a held-out test set. Note that this is an optional argument. --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm-predict.dml - -config=SystemML-config.xml + -config SystemML-config.xml -exec hybrid
[1/5] incubator-systemml git commit: Refactored GPU{Contex, Object} to make it friendlier for parfor
Repository: incubator-systemml Updated Branches: refs/heads/master 9ed27ad60 -> 129f0f6b0 http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java -- diff --git a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java index f7071ba..b20f66a 100644 --- a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java +++ b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java @@ -1515,7 +1515,7 @@ public abstract class AutomatedTestBase /** * Enables expection of a line in standard output stream. * -* @param expected +* @param expectedLine */ public void setExpectedStdOut(String expectedLine) { this.expectedStdOut = expectedLine;
[2/5] incubator-systemml git commit: Refactored GPU{Contex, Object} to make it friendlier for parfor
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java -- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index c363ab1..3c32137 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -19,23 +19,49 @@ package org.apache.sysml.runtime.matrix.data; -import jcuda.Pointer; -import jcuda.Sizeof; -import jcuda.jcublas.JCublas2; -import jcuda.jcublas.cublasFillMode; -import jcuda.jcublas.cublasHandle; -import jcuda.jcublas.cublasOperation; -import jcuda.jcudnn.cudnnActivationDescriptor; -import jcuda.jcudnn.cudnnBatchNormMode; -import jcuda.jcudnn.cudnnConvolutionDescriptor; -import jcuda.jcudnn.cudnnConvolutionFwdPreference; -import jcuda.jcudnn.cudnnFilterDescriptor; -import jcuda.jcudnn.cudnnHandle; -import jcuda.jcudnn.cudnnPoolingDescriptor; -import jcuda.jcudnn.cudnnStatus; -import jcuda.jcudnn.cudnnTensorDescriptor; -import jcuda.jcusparse.JCusparse; -import jcuda.jcusparse.cusparseHandle; +import static jcuda.jcublas.cublasOperation.CUBLAS_OP_N; +import static jcuda.jcublas.cublasOperation.CUBLAS_OP_T; +import static jcuda.jcudnn.JCudnn.cudnnActivationForward; +import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationBackward; +import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardInference; +import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardTraining; +import static jcuda.jcudnn.JCudnn.cudnnConvolutionBackwardData; +import static jcuda.jcudnn.JCudnn.cudnnConvolutionBackwardFilter; +import static jcuda.jcudnn.JCudnn.cudnnConvolutionForward; +import static jcuda.jcudnn.JCudnn.cudnnCreateActivationDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnCreateConvolutionDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnCreateFilterDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnCreatePoolingDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnCreateTensorDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnDestroyConvolutionDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnDestroyFilterDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnDestroyPoolingDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnGetConvolutionBackwardDataWorkspaceSize; +import static jcuda.jcudnn.JCudnn.cudnnGetConvolutionBackwardFilterWorkspaceSize; +import static jcuda.jcudnn.JCudnn.cudnnGetConvolutionForwardWorkspaceSize; +import static jcuda.jcudnn.JCudnn.cudnnPoolingBackward; +import static jcuda.jcudnn.JCudnn.cudnnPoolingForward; +import static jcuda.jcudnn.JCudnn.cudnnSetActivationDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnSetConvolution2dDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnSetFilter4dDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnSetPooling2dDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnSetTensor4dDescriptor; +import static jcuda.jcudnn.cudnnActivationMode.CUDNN_ACTIVATION_RELU; +import static jcuda.jcudnn.cudnnConvolutionMode.CUDNN_CROSS_CORRELATION; +import static jcuda.jcudnn.cudnnDataType.CUDNN_DATA_DOUBLE; +import static jcuda.jcudnn.cudnnNanPropagation.CUDNN_PROPAGATE_NAN; +import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_MAX; +import static jcuda.jcudnn.cudnnTensorFormat.CUDNN_TENSOR_NCHW; +import static jcuda.jcusparse.JCusparse.cusparseDcsrgemm; +import static jcuda.jcusparse.JCusparse.cusparseDcsrmv; +import static jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_NON_TRANSPOSE; +import static jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_TRANSPOSE; +import static jcuda.runtime.JCuda.cudaDeviceSynchronize; +import static jcuda.runtime.JCuda.cudaMemcpy; +import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice; +import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost; +import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysml.api.DMLScript; @@ -72,10 +98,9 @@ import org.apache.sysml.runtime.instructions.cp.DoubleObject; import org.apache.sysml.runtime.instructions.gpu.GPUInstruction; import org.apache.sysml.runtime.instructions.gpu.context.ExecutionConfig; import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; -import org.apache.sysml.runtime.instructions.gpu.context.JCudaContext; +import org.apache.sysml.runtime.instructions.gpu.context.GPUObject; +import org.apache.sysml.runtime.instructions.gpu.context.CSRPointer; import org.apache.sysml.runtime.instructions.gpu.context.JCudaKernels; -import org.apache.sysml.runtime.instructions.gpu.context.JCudaObject; -import org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.CSRPointer; import org.apache.sysml.runtime.matrix.operators.AggregateOperator; impo
[4/5] incubator-systemml git commit: Refactored GPU{Contex, Object} to make it friendlier for parfor
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java index d2309b0..708f291 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java @@ -18,65 +18,584 @@ */ package org.apache.sysml.runtime.instructions.gpu.context; +import static jcuda.jcublas.JCublas2.cublasCreate; +import static jcuda.jcublas.JCublas2.cublasDestroy; +import static jcuda.jcudnn.JCudnn.cudnnCreate; +import static jcuda.jcudnn.JCudnn.cudnnDestroy; +import static jcuda.jcusparse.JCusparse.cusparseCreate; +import static jcuda.jcusparse.JCusparse.cusparseDestroy; +import static jcuda.runtime.JCuda.cudaDeviceScheduleBlockingSync; +import static jcuda.runtime.JCuda.cudaFree; +import static jcuda.runtime.JCuda.cudaGetDeviceCount; +import static jcuda.runtime.JCuda.cudaMalloc; +import static jcuda.runtime.JCuda.cudaMemGetInfo; +import static jcuda.runtime.JCuda.cudaMemset; +import static jcuda.runtime.JCuda.cudaSetDevice; +import static jcuda.runtime.JCuda.cudaSetDeviceFlags; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.sysml.api.DMLScript; -import org.apache.sysml.hops.OptimizerUtils; +import org.apache.sysml.conf.ConfigurationManager; +import org.apache.sysml.conf.DMLConfig; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; +import org.apache.sysml.runtime.instructions.gpu.GPUInstruction; +import org.apache.sysml.utils.GPUStatistics; +import org.apache.sysml.utils.LRUCacheMap; -//FIXME merge JCudaContext into GPUContext as this context is anyway CUDA specific +import jcuda.Pointer; +import jcuda.jcublas.cublasHandle; +import jcuda.jcudnn.cudnnHandle; +import jcuda.jcusparse.cusparseHandle; +import jcuda.runtime.JCuda; +import jcuda.runtime.cudaDeviceProp; -public abstract class GPUContext { +/** + * Represents a context per GPU accessible through the same JVM + * Each context holds cublas, cusparse, cudnn... handles which are separate for each GPU + */ +public class GPUContext { - protected static GPUContext currContext; - public static volatile Boolean isGPUContextCreated = false; + protected static final Log LOG = LogFactory.getLog(GPUContext.class.getName()); - protected GPUContext() {} + /** Eviction policies for {@link GPUContext#evict(long)} */ + public enum EvictionPolicy { + LRU, LFU, MIN_EVICT + } - /** -* Gets device memory available for SystemML operations -* -* @return available memory -*/ - public abstract long getAvailableMemory(); + /** currently employed eviction policy */ + public final EvictionPolicy evictionPolicy = EvictionPolicy.LRU; + + /** Map of free blocks allocate on GPU. maps size_of_block -> pointer on GPU */ + private LRUCacheMap> freeCUDASpaceMap = new LRUCacheMap<>(); + + /** To record size of allocated blocks */ + private HashMap cudaBlockSizeMap = new HashMap<>(); + + /** active device assigned to this GPUContext instance */ + private final int deviceNum; + + /** list of allocated {@link GPUObject} instances allocated on {@link GPUContext#deviceNum} GPU + * These are matrices allocated on the GPU on which rmvar hasn't been called yet. + * If a {@link GPUObject} has more than one lock on it, it cannot be freed + * If it has zero locks on it, it can be freed, but it is preferrable to keep it around + * so that an extraneous host to dev transfer can be avoided */ + private ArrayList allocatedGPUObjects = new ArrayList<>(); + + /** cudnnHandle specific to the active GPU for this GPUContext */ + private cudnnHandle cudnnHandle; + + /** cublasHandle specific to the active GPU for this GPUContext */ + private cublasHandle cublasHandle; + + /** cusparseHandle specific to the active GPU for this GPUContext */ + private cusparseHandle cusparseHandle; + + /** to launch custom CUDA kernel, specific to the active GPU for this GPUContext */ + private JCudaKernels kernels; /** -* Ensures that all the CUDA cards on the current system are -* of the minimum required compute capability. -* (The minimum required compute capability is hard coded in {@link JCudaContext}. -* -* @throws DMLRuntimeException if DMLRuntimeException occurs -*
[3/5] incubator-systemml git commit: Refactored GPU{Contex, Object} to make it friendlier for parfor
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaKernels.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaKernels.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaKernels.java index d858b0b..b4cb87d 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaKernels.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaKernels.java @@ -18,34 +18,25 @@ */ package org.apache.sysml.runtime.instructions.gpu.context; -import static jcuda.driver.JCudaDriver.cuCtxCreate; -import static jcuda.driver.JCudaDriver.cuCtxGetCurrent; -import static jcuda.driver.JCudaDriver.cuDeviceGet; -import static jcuda.driver.JCudaDriver.cuInit; import static jcuda.driver.JCudaDriver.cuLaunchKernel; import static jcuda.driver.JCudaDriver.cuModuleGetFunction; import static jcuda.driver.JCudaDriver.cuModuleLoadDataEx; -import static jcuda.driver.JCudaDriver.cuModuleUnload; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; -import jcuda.runtime.JCuda; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.io.IOUtilFunctions; -import jcuda.CudaException; import jcuda.Pointer; -import jcuda.driver.CUcontext; -import jcuda.driver.CUdevice; import jcuda.driver.CUfunction; import jcuda.driver.CUmodule; import jcuda.driver.CUresult; /** - * Utility class that allows LibMatrixCUDA as well as JCudaObject to invoke custom CUDA kernels. + * Utility class that allows LibMatrixCUDA as well as GPUObject to invoke custom CUDA kernels. * * The utility org.apache.sysml.runtime.instructions.gpu.context.JCudaKernels simplifies the launching of the kernels. * For example: to launch a kernel @@ -54,70 +45,23 @@ import jcuda.driver.CUresult; */ public class JCudaKernels { - private static String ptxFileName = "/kernels/SystemML.ptx"; + private final static String ptxFileName = "/kernels/SystemML.ptx"; private HashMap kernels = new HashMap(); private CUmodule module; + private final int deviceNum; /** * Loads the kernels in the file ptxFileName. Though cubin files are also supported, we will stick with * ptx file as they are target-independent similar to Java's .class files. -* +* @param deviceNum the device number for which to initiate the driver API * @throws DMLRuntimeException if DMLRuntimeException occurs */ - public JCudaKernels() throws DMLRuntimeException { - shutdown(); - initCUDA(); + JCudaKernels(int deviceNum) throws DMLRuntimeException { + this.deviceNum = deviceNum; module = new CUmodule(); // Load the kernels specified in the ptxFileName file checkResult(cuModuleLoadDataEx(module, initKernels(ptxFileName), 0, new int[0], Pointer.to(new int[0]))); } - - /** - * Initializes the JCuda driver API. Then it will try to attach to the - * current CUDA context. If no active CUDA context exists, then it will - * try to create one, for the device which is specified by the current - * deviceNumber. - * -* @throws DMLRuntimeException If it is neither possible to attach to an - * existing context, nor to create a new context. - */ -private static void initCUDA() throws DMLRuntimeException { -checkResult(cuInit(0)); - -// Try to obtain the current context -CUcontext context = new CUcontext(); -checkResult(cuCtxGetCurrent(context)); - -// If the context is 'null', then a new context -// has to be created. -CUcontext nullContext = new CUcontext(); -if (context.equals(nullContext)) { -createContext(); -} -} - -/** - * Tries to create a context for device 'deviceNumber'. - * @throws DMLRuntimeException - * - * @throws CudaException If the device can not be - * accessed or the context can not be created - */ -private static void createContext() throws DMLRuntimeException { - int deviceNumber = 0; -CUdevice device = new CUdevice(); -checkResult(cuDeviceGet(device, deviceNumber)); -CUcontext context = new CUcontext(); -checkResult(cuCtxCreate(context, 0, device)); -} - - /** -* Performs cleanup actions such as unloading the module -*/ - public void shutdown() { - if(module != null) - cuModuleUnload(module); - } /** * Setups the kernel parameters and launches the kernel using cuLaunchKernel API. @@ -167,7 +1
[5/5] incubator-systemml git commit: Refactored GPU{Contex, Object} to make it friendlier for parfor
Refactored GPU{Contex,Object} to make it friendlier for parfor - Folded JCuda{Context,Object} to GPU{Context,Object} - Removed "deviceMemBytes", it was redundant - Removed all synchronized in GPU{Object,Contex} - print GPUContext from everywhere in log.trace - LibMatrixCUDA functions expect a GPUContext instead of getting it statically - Restructured GPUContext to use a pool of already initialized GPUContexts - Call cudaSetDevice when on different thread - TODO FIXME Disabled cublasDgeam for scalarMatrixArithmetic - TODO FIXME revisit the need to always force gpu to be used, mem est broken - Ability to restrict parfor from picking up all GPUs on the machine, from a system property Closes #462 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/129f0f6b Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/129f0f6b Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/129f0f6b Branch: refs/heads/master Commit: 129f0f6b0e0f6167e4137c6d47374ab96501b888 Parents: 9ed27ad Author: Nakul Jindal Authored: Fri Apr 21 16:22:19 2017 -0700 Committer: Nakul Jindal Committed: Fri Apr 21 16:22:19 2017 -0700 -- conf/SystemML-config.xml.template |3 + .../java/org/apache/sysml/api/DMLScript.java| 27 +- .../api/mlcontext/MLContextConversionUtil.java |4 +- .../sysml/api/mlcontext/ScriptExecutor.java | 25 +- .../java/org/apache/sysml/conf/DMLConfig.java | 17 +- .../controlprogram/ParForProgramBlock.java | 25 + .../controlprogram/caching/CacheableData.java | 80 +- .../context/ExecutionContext.java | 56 +- .../controlprogram/parfor/LocalParWorker.java |5 + .../controlprogram/parfor/ParWorker.java|5 + .../parfor/opt/OptTreeConverter.java|2 +- .../cp/FunctionCallCPInstruction.java | 13 +- .../gpu/AggregateBinaryGPUInstruction.java |4 +- .../gpu/AggregateUnaryGPUInstruction.java |3 +- .../gpu/ConvolutionGPUInstruction.java | 25 +- .../instructions/gpu/GPUInstruction.java|3 +- .../instructions/gpu/MMTSJGPUInstruction.java |3 +- .../gpu/MatrixBuiltinGPUInstruction.java|5 +- .../MatrixMatrixArithmeticGPUInstruction.java |5 +- .../gpu/MatrixMatrixAxpyGPUInstruction.java |5 +- .../instructions/gpu/ReorgGPUInstruction.java |5 +- .../ScalarMatrixArithmeticGPUInstruction.java |4 +- .../instructions/gpu/context/CSRPointer.java| 457 ++ .../instructions/gpu/context/GPUContext.java| 619 ++- .../gpu/context/GPUContextPool.java | 158 ++ .../instructions/gpu/context/GPUObject.java | 957 --- .../instructions/gpu/context/JCudaContext.java | 286 .../instructions/gpu/context/JCudaKernels.java | 70 +- .../instructions/gpu/context/JCudaObject.java | 1330 --- .../runtime/matrix/data/LibMatrixCUDA.java | 1509 ++ .../test/integration/AutomatedTestBase.java |2 +- 31 files changed, 3035 insertions(+), 2677 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/conf/SystemML-config.xml.template -- diff --git a/conf/SystemML-config.xml.template b/conf/SystemML-config.xml.template index a4c7b2f..fe4437f 100644 --- a/conf/SystemML-config.xml.template +++ b/conf/SystemML-config.xml.template @@ -71,4 +71,7 @@ false + + + -1 http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/src/main/java/org/apache/sysml/api/DMLScript.java -- diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java index ce60d55..febbf13 100644 --- a/src/main/java/org/apache/sysml/api/DMLScript.java +++ b/src/main/java/org/apache/sysml/api/DMLScript.java @@ -89,6 +89,7 @@ import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter; import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler; import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; +import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool; import org.apache.sysml.runtime.io.IOUtilFunctions; import org.apache.sysml.runtime.matrix.CleanupMR; import org.apache.sysml.runtime.matrix.data.LibMatrixDNN; @@ -111,7 +112,7 @@ public class DMLScript HADOOP, // execute all matrix operations in MR SINGLE_NODE,// execute all matrix operations in CP HYBRID, // execute matrix operations i
incubator-systemml git commit: [HOTFIX] for missing apache license in CSRPointer
Repository: incubator-systemml Updated Branches: refs/heads/master 63e28a37b -> b481324d0 [HOTFIX] for missing apache license in CSRPointer Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/b481324d Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/b481324d Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/b481324d Branch: refs/heads/master Commit: b481324d06429d3435fcd25a78aef971e5498b6c Parents: 63e28a3 Author: Nakul Jindal Authored: Sat Apr 22 00:55:50 2017 -0700 Committer: Nakul Jindal Committed: Sat Apr 22 00:55:50 2017 -0700 -- .../instructions/gpu/context/CSRPointer.java | 19 +++ 1 file changed, 19 insertions(+) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b481324d/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/CSRPointer.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/CSRPointer.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/CSRPointer.java index 5e202a9..c25bd22 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/CSRPointer.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/CSRPointer.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.sysml.runtime.instructions.gpu.context; import static jcuda.jcusparse.JCusparse.cusparseCreateMatDescr;
incubator-systemml git commit: [SYSTEMML-1034] Initial implementation of "solve" for GPU
Repository: incubator-systemml Updated Branches: refs/heads/master f2a927f87 -> e8fbc7539 [SYSTEMML-1034] Initial implementation of "solve" for GPU Closes #476 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e8fbc753 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e8fbc753 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e8fbc753 Branch: refs/heads/master Commit: e8fbc753988dc94e97a8e8b723e22e89483a1fc6 Parents: f2a927f Author: Nakul Jindal Authored: Sun Apr 30 21:45:21 2017 -0700 Committer: Nakul Jindal Committed: Sun Apr 30 21:45:21 2017 -0700 -- .../java/org/apache/sysml/hops/BinaryOp.java| 2 +- .../instructions/GPUInstructionParser.java | 17 ++- .../gpu/BuiltinBinaryGPUInstruction.java| 78 +++ .../gpu/BuiltinUnaryGPUInstruction.java | 2 +- .../instructions/gpu/GPUInstruction.java| 2 +- .../gpu/MatrixMatrixBuiltinGPUInstruction.java | 58 .../instructions/gpu/context/CSRPointer.java| 29 +++- .../instructions/gpu/context/GPUContext.java| 35 - .../instructions/gpu/context/GPUObject.java | 72 +++--- .../runtime/matrix/data/LibMatrixCUDA.java | 133 ++- 10 files changed, 391 insertions(+), 37 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e8fbc753/src/main/java/org/apache/sysml/hops/BinaryOp.java -- diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java b/src/main/java/org/apache/sysml/hops/BinaryOp.java index 7ddc656..17a099f 100644 --- a/src/main/java/org/apache/sysml/hops/BinaryOp.java +++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java @@ -592,7 +592,7 @@ public class BinaryOp extends Hop if ( et == ExecType.CP ) { if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || getMemEstimate() < OptimizerUtils.GPU_MEMORY_BUDGET) - && (op == OpOp2.MULT || op == OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW)) { + && (op == OpOp2.MULT || op == OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW || op == OpOp2.SOLVE)) { et = ExecType.GPU; } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e8fbc753/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java index e5b3326..ef0412c 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java @@ -23,6 +23,7 @@ import java.util.HashMap; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.instructions.gpu.AggregateBinaryGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.ArithmeticBinaryGPUInstruction; +import org.apache.sysml.runtime.instructions.gpu.BuiltinBinaryGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.BuiltinUnaryGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.ConvolutionGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.GPUInstruction; @@ -68,12 +69,15 @@ public class GPUInstructionParser extends InstructionParser String2GPUInstructionType.put( "^2" , GPUINSTRUCTION_TYPE.ArithmeticBinary); //special ^ case String2GPUInstructionType.put( "*2" , GPUINSTRUCTION_TYPE.ArithmeticBinary); //special * case String2GPUInstructionType.put( "-nz" , GPUINSTRUCTION_TYPE.ArithmeticBinary); //special - case - String2GPUInstructionType.put( "+*" , GPUINSTRUCTION_TYPE.ArithmeticBinary); - String2GPUInstructionType.put( "-*" , GPUINSTRUCTION_TYPE.ArithmeticBinary); + String2GPUInstructionType.put( "+*" , GPUINSTRUCTION_TYPE.ArithmeticBinary); + String2GPUInstructionType.put( "-*" , GPUINSTRUCTION_TYPE.ArithmeticBinary); // Builtin functions - String2GPUInstructionType.put( "sel+" , GPUINSTRUCTION_TYPE.BuiltinUnary); - String2GPUInstructionType.put( "exp" , GPUINSTRUCTION_TYPE.BuiltinUnary); + String2GPUInstructionType.put( "sel+" , GPUINSTRUCTION_TYPE.Bui
incubator-systemml git commit: [HOTFIX] changes setGPU and setForceGPU to do the right thing in mlctx
Repository: incubator-systemml Updated Branches: refs/heads/master 1cc219527 -> 7989ab4f3 [HOTFIX] changes setGPU and setForceGPU to do the right thing in mlctx Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7989ab4f Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7989ab4f Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7989ab4f Branch: refs/heads/master Commit: 7989ab4f39802d0706618d495d06cb8126f98300 Parents: 1cc2195 Author: Nakul Jindal Authored: Mon May 1 17:50:44 2017 -0700 Committer: Nakul Jindal Committed: Mon May 1 17:50:44 2017 -0700 -- .../sysml/api/mlcontext/ScriptExecutor.java | 24 ++-- 1 file changed, 12 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7989ab4f/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java -- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java index 2044875..ee710b6 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java @@ -248,12 +248,8 @@ public class ScriptExecutor { if (symbolTable != null) { executionContext.setVariables(symbolTable); } - oldGPU = DMLScript.USE_ACCELERATOR; - oldStatistics = DMLScript.STATISTICS; - oldForceGPU = DMLScript.FORCE_ACCELERATOR; - DMLScript.USE_ACCELERATOR = gpu; - DMLScript.FORCE_ACCELERATOR = forceGPU; - DMLScript.STATISTICS = statistics; +oldStatistics = DMLScript.STATISTICS; +DMLScript.STATISTICS = statistics; } /** @@ -654,17 +650,21 @@ public class ScriptExecutor { * @param enabled * true if enabled, false otherwise */ - public void setGPU(boolean enabled) { - this.gpu = enabled; - } +public void setGPU(boolean enabled) { +this.gpu = enabled; +oldGPU = DMLScript.USE_ACCELERATOR; +DMLScript.USE_ACCELERATOR = gpu; +} /** * Whether or not to force GPU usage * @param enabled * true if enabled, false otherwise */ - public void setForceGPU(boolean enabled) { - this.forceGPU = enabled; - } +public void setForceGPU(boolean enabled) { +this.forceGPU = enabled; +oldForceGPU = DMLScript.FORCE_ACCELERATOR; +DMLScript.FORCE_ACCELERATOR = forceGPU; +} }
incubator-systemml git commit: [HOTFIX] Bug fix for solve, removed warnings and added instrumentation
Repository: incubator-systemml Updated Branches: refs/heads/master 76f3ca5d3 -> 2c5c3b14e [HOTFIX] Bug fix for solve, removed warnings and added instrumentation Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2c5c3b14 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2c5c3b14 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2c5c3b14 Branch: refs/heads/master Commit: 2c5c3b14e1906cda70ae1581b19a5e908b3ab329 Parents: 76f3ca5 Author: Nakul Jindal Authored: Thu May 4 16:26:47 2017 -0700 Committer: Nakul Jindal Committed: Thu May 4 16:26:47 2017 -0700 -- .../instructions/GPUInstructionParser.java | 4 +- .../gpu/BuiltinBinaryGPUInstruction.java| 2 + .../instructions/gpu/GPUInstruction.java| 28 --- .../gpu/MatrixMatrixBuiltinGPUInstruction.java | 1 + .../instructions/gpu/context/GPUContext.java| 2 + .../instructions/gpu/context/GPUObject.java | 3 +- .../runtime/matrix/data/LibMatrixCUDA.java | 77 +++- 7 files changed, 86 insertions(+), 31 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2c5c3b14/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java index ef0412c..4a45521 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java @@ -35,9 +35,9 @@ import org.apache.sysml.runtime.instructions.gpu.AggregateUnaryGPUInstruction; public class GPUInstructionParser extends InstructionParser { - public static final HashMap String2GPUInstructionType; + static final HashMap String2GPUInstructionType; static { - String2GPUInstructionType = new HashMap(); + String2GPUInstructionType = new HashMap<>(); // Neural Network Operators String2GPUInstructionType.put( "relu_backward", GPUINSTRUCTION_TYPE.Convolution); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2c5c3b14/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java index 372f883..24e9e79 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java @@ -30,7 +30,9 @@ import org.apache.sysml.runtime.matrix.operators.Operator; public abstract class BuiltinBinaryGPUInstruction extends GPUInstruction { + @SuppressWarnings("unused") private int _arity; + CPOperand output; CPOperand input1, input2; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2c5c3b14/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java index 9eef072..f4c523b 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java @@ -35,16 +35,20 @@ public abstract class GPUInstruction extends Instruction public enum GPUINSTRUCTION_TYPE { AggregateUnary, AggregateBinary, Convolution, MMTSJ, Reorg, ArithmeticBinary, BuiltinUnary, BuiltinBinary, Builtin }; // Memory/conversions - public final static String MISC_TIMER_HOST_TO_DEVICE = "H2D"; // time spent in bringing data to gpu (from host) - public final static String MISC_TIMER_DEVICE_TO_HOST = "D2H"; // time spent in bringing data from gpu (to host) - public final static String MISC_TIMER_DEVICE_TO_DEVICE = "D2D"; // time spent in copying data from one region on the device to another - public final static String MISC_TIMER_SPARSE_TO_DENSE = "s2d"; // time spent in converting data from sparse to dense - public final static String MISC_TIMER_DENSE_TO_SPARSE = "d2s"; // time spent in converting d
[1/2] incubator-systemml git commit: [SYSTEMML-1344] sqrt, round, abs, log, floor, ceil, trig funcs & sign for GPU
Repository: incubator-systemml Updated Branches: refs/heads/master 0d553e384 -> 1fc764b9b http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fc764b9/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java index f4c523b..48b7da6 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java @@ -61,41 +61,55 @@ public abstract class GPUInstruction extends Instruction public final static String MISC_TIMER_SYRK_LIB = "Msyrk";// time spent in symmetric rank-k update // Other BLAS instructions - public final static String MISC_TIMER_DAXPY_LIB = "daxpy"; // time spent in daxpy - public final static String MISC_TIMER_QR_BUFFER = "qr_buffer"; // time spent in calculating buffer needed to perform QR - public final static String MISC_TIMER_QR = "qr";// time spent in doing QR - public final static String MISC_TIMER_ORMQR = "ormqr"; // time spent in ormqr - public final static String MISC_TIMER_TRSM = "trsm"; // time spent in cublas Dtrsm + public final static String MISC_TIMER_DAXPY_LIB = "daxpy";// time spent in daxpy + public final static String MISC_TIMER_QR_BUFFER = "qr_buffer";// time spent in calculating buffer needed to perform QR + public final static String MISC_TIMER_QR = "qr"; // time spent in doing QR + public final static String MISC_TIMER_ORMQR = "ormqr";// time spent in ormqr + public final static String MISC_TIMER_TRSM ="trsm"; // time spent in cublas Dtrsm // Transpose - public final static String MISC_TIMER_SPARSE_DGEAM_LIB = "sdgeaml"; // time spent in sparse transpose (and other ops of type a*op(A) + b*op(B)) - public final static String MISC_TIMER_DENSE_DGEAM_LIB = "ddgeaml"; // time spent in dense transpose (and other ops of type a*op(A) + b*op(B)) - public final static String MISC_TIMER_TRANSPOSE_LIB = "dtl"; // time spent on dense transpose, this includes allocation of output + public final static String MISC_TIMER_SPARSE_DGEAM_LIB ="sdgeaml"; // time spent in sparse transpose (and other ops of type a*op(A) + b*op(B)) + public final static String MISC_TIMER_DENSE_DGEAM_LIB = "ddgeaml"; // time spent in dense transpose (and other ops of type a*op(A) + b*op(B)) + public final static String MISC_TIMER_TRANSPOSE_LIB = "dtl"; // time spent on dense transpose, this includes allocation of output // Custom kernels - public final static String MISC_TIMER_MATRIX_MATRIX_CELLWISE_OP_KERNEL ="mmck"; // time spent in matrix-matrix cellwise operations - public final static String MISC_TIMER_COMPARE_AND_SET_KERNEL = "cask"; // time spent in compareAndSet kernel - public final static String MISC_TIMER_EXP_KERNEL = "expk"; // time spent in the exp kernel - public final static String MISC_TIMER_DAXPY_MV_KERNEL = "daxpymv"; // time spent in the daxpy_matrix_vector kernel - public final static String MISC_TIMER_UPPER_TO_LOWER_TRIANGLE_KERNEL = "u2lk"; // time spent in the copy_u2l_dense kernel - public final static String MISC_TIMER_FILL_KERNEL = "fillk"; // time spent in the "fill" kernel - public final static String MISC_TIMER_MATRIX_SCALAR_OP_KERNEL = "msk"; // time spent in the matrix scalar kernel - public final static String MISC_TIMER_REDUCE_ALL_KERNEL = "rallk"; // time spent in reduce all kernel - public final static String MISC_TIMER_REDUCE_ROW_KERNEL = "rrowk"; // time spent in reduce row kernel - public final static String MISC_TIMER_REDUCE_COL_KERNEL = "rcolk";// time spent in reduce column kernel + public final static String MISC_TIMER_MATRIX_MATRIX_CELLWISE_OP_KERNEL = "mmck"; // time spent in matrix-matrix cellwise operations + public final static String MISC_TIMER_COMPARE_AND_SET_KERNEL =
[2/2] incubator-systemml git commit: [SYSTEMML-1344] sqrt, round, abs, log, floor, ceil, trig funcs & sign for GPU
[SYSTEMML-1344] sqrt,round,abs,log,floor,ceil,trig funcs & sign for GPU Closes #503 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/1fc764b9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/1fc764b9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/1fc764b9 Branch: refs/heads/master Commit: 1fc764b9b099271822056a82e248acdbb785dc63 Parents: 0d553e3 Author: Nakul Jindal Authored: Wed May 17 10:55:51 2017 -0700 Committer: Nakul Jindal Committed: Wed May 17 10:55:51 2017 -0700 -- src/main/cpp/kernels/Makefile | 28 + src/main/cpp/kernels/SystemML.cu| 187 ++ src/main/cpp/kernels/SystemML.ptx | 2506 ++ .../java/org/apache/sysml/hops/UnaryOp.java | 10 +- .../instructions/GPUInstructionParser.java | 19 +- .../instructions/gpu/GPUInstruction.java| 72 +- .../gpu/MatrixBuiltinGPUInstruction.java| 41 +- .../instructions/gpu/context/CSRPointer.java|2 +- .../instructions/gpu/context/GPUObject.java |4 +- .../runtime/matrix/data/LibMatrixCUDA.java | 226 +- 10 files changed, 2577 insertions(+), 518 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fc764b9/src/main/cpp/kernels/Makefile -- diff --git a/src/main/cpp/kernels/Makefile b/src/main/cpp/kernels/Makefile new file mode 100644 index 000..0b003f3 --- /dev/null +++ b/src/main/cpp/kernels/Makefile @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +NVCC=nvcc +CUDAFLAGS= -ptx -c -arch=sm_30 + +SystemML.o: SystemML.cu + $(NVCC) $(CUDAFLAGS) SystemML.cu + +all: SystemML.o + ; + +clean: + rm -rf SystemML.ptx http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fc764b9/src/main/cpp/kernels/SystemML.cu -- diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu index 2651e4a..5b4574e 100644 --- a/src/main/cpp/kernels/SystemML.cu +++ b/src/main/cpp/kernels/SystemML.cu @@ -656,3 +656,190 @@ __global__ void matrix_exp(double *A, double *C, unsigned int size) { C[index] = exp(A[index]); } } + +/** + * Do an sqrt over all the elements of a matrix + * @param A the input matrix (of length = size) + * @param C the pre-allocated output matrix (of length = size) + * @param siz the length of the input and output matrices + */ +extern "C" +__global__ void matrix_sqrt(double *A, double *C, unsigned int size) { +int index = blockIdx.x * blockDim.x + threadIdx.x; +if (index < size){ +C[index] = sqrt(A[index]); +} +} + +/** + * Do an round over all the elements of a matrix + * @param A the input matrix (of length = size) + * @param C the pre-allocated output matrix (of length = size) + * @param siz the length of the input and output matrices + */ +extern "C" +__global__ void matrix_round(double *A, double *C, unsigned int size) { +int index = blockIdx.x * blockDim.x + threadIdx.x; +if (index < size){ +C[index] = (double)llround(A[index]); +} +} + +/** + * Do an abs over all the elements of a matrix + * @param A the input matrix (of length = size) + * @param C the pre-allocated output matrix (of length = size) + * @param siz the length of the input and output matrices + */ +extern "C" +__global__ void matrix_abs(double *A, double *C, unsigned int size) { +int index = blockIdx.x * blockDim.x + threadIdx.x; +if (index < size){ +C[index] = (double)fabs(A[index]); +} +} + +/** + * Do an log over all the elements of a matrix + * @param A the input matrix (of length = size) + * @param C the pre-allocated output matrix (of length = size) + * @param siz the length of the input and output matrices + */ +extern "C" +__global__ void matrix_log(double *A, double *C, unsigned int size) { +int index = blockIdx.x * blockDim.x + threadIdx.x; +if
incubator-systemml git commit: [HOTFIX] for sparse GPU transpose
Repository: incubator-systemml Updated Branches: refs/heads/master 1fc764b9b -> c3aeb48bf [HOTFIX] for sparse GPU transpose Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c3aeb48b Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c3aeb48b Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c3aeb48b Branch: refs/heads/master Commit: c3aeb48bf6b54febb861b7b4381c3d7af450a8e8 Parents: 1fc764b Author: Nakul Jindal Authored: Wed May 17 18:46:21 2017 -0700 Committer: Nakul Jindal Committed: Wed May 17 18:46:21 2017 -0700 -- .../runtime/matrix/data/LibMatrixCUDA.java | 118 +++ 1 file changed, 68 insertions(+), 50 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c3aeb48b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java -- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index 074119b..b023159 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -52,6 +52,7 @@ import static jcuda.jcudnn.cudnnDataType.CUDNN_DATA_DOUBLE; import static jcuda.jcudnn.cudnnNanPropagation.CUDNN_PROPAGATE_NAN; import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_MAX; import static jcuda.jcudnn.cudnnTensorFormat.CUDNN_TENSOR_NCHW; +import static jcuda.jcusparse.JCusparse.cusparseDcsr2csc; import static jcuda.jcusparse.JCusparse.cusparseDcsrgemm; import static jcuda.jcusparse.JCusparse.cusparseDcsrmv; import static jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_NON_TRANSPOSE; @@ -61,6 +62,8 @@ import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice; import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost; import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice; +import jcuda.jcusparse.cusparseAction; +import jcuda.jcusparse.cusparseIndexBase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysml.api.DMLScript; @@ -2732,7 +2735,7 @@ public class LibMatrixCUDA { * Performs sparse and dense dgeam given two input matrices * C = alpha* op( A ) + beta* op ( B ) * where op = transpose or not (specified by isLeftTransposed and isRightTransposed). -* +* To indicate a transpose operation, make sure in1 == in2 and isLeftTransposed == isRightTransposed == true * @param ec execution context * @param gCtx a valid {@link GPUContext} * @param instName the invoking instruction's name for record {@link Statistics}. @@ -2756,35 +2759,6 @@ public class LibMatrixCUDA { int transa = isLeftTransposed ? CUBLAS_OP_T : CUBLAS_OP_N; int transb = isRightTransposed ? CUBLAS_OP_T : CUBLAS_OP_N; - int lda = (int) in1.getNumColumns(); - int ldb = (int) in2.getNumColumns(); - int m = (int) in1.getNumColumns(); - int n = (int) in2.getNumRows(); - if (isLeftTransposed && isRightTransposed) { - m = (int) in1.getNumRows(); - n = (int) in2.getNumColumns(); - } - else if (isLeftTransposed) { - m = (int) in1.getNumRows(); - } else if (isRightTransposed) { - n = (int) in2.getNumColumns(); - } - int ldc = m; - - - - /** - int m = (int) in1.getNumRows(); - int n = (int) in1.getNumColumns(); - if(!isLeftTransposed && isRightTransposed) { - m = (int) in1.getNumColumns(); - n = (int) in1.getNumRows(); - } - int lda = isLeftTransposed ? n : m; - int ldb = isRightTransposed ? n : m; - int ldc = m; - **/ - MatrixObject out = ec.getMatrixObject(outputName); boolean isSparse1 = isInSparseFormat(gCtx, in1); boolean isSparse2 = isInSparseFormat(gCtx, in2); @@ -2792,39 +2766,83 @@ public class LibMatrixCUDA { long t0=0,t1=0; // TODO: Implement sparse-dense matrix cublasDgeam kernel if(isSparse1 || isSparse2) { + int m = (int)in1.getNumRows(); + int n = (int)in1.getNumColumns(); // Invoke cuSparse when either are in sparse format // Perform sparse-sparse dgeam -
[1/2] incubator-systemml git commit: [SYSTEMML-1625] GPU Unit Tests (and GPU row/col variance bug fix)
Repository: incubator-systemml Updated Branches: refs/heads/master ceeec4bbf -> 772fb5883 http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java -- diff --git a/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java b/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java new file mode 100644 index 000..4052fef --- /dev/null +++ b/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.gpu; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +import org.apache.sysml.api.mlcontext.Matrix; +import org.apache.sysml.test.utils.TestUtils; +import org.junit.Ignore; +import org.junit.Test; + +/** + * Test Elementwise operations on the GPU + */ +public class MatrixMatrixElementWiseOpTests extends GPUTests { + private final static String TEST_NAME = "MatrixMatrixElementWiseOpTests"; + + private final int[] rowSizes = new int[] { 1, 64, 130, 1024, 2049 }; + private final int[] columnSizes = new int[] { 1, 64, 130, 1024, 2049 }; + private final double[] sparsities = new double[] { 0.0, 0.03, 0.3, 0.9 }; + private final double[] scalars = new double[] { 0.0, 0.5, 2.0 }; + private final int seed = 42; + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + addTestConfiguration(TEST_DIR, TEST_NAME); + getAndLoadTestConfiguration(TEST_NAME); + } + + @Test + public void testAxpy() { + runAxpyTest("O = a*X + Y", "X", "Y", "a", "O", "gpu_-*"); + } + + @Test + public void testAxmy() { + runAxpyTest("O = X - a*Y", "X", "Y", "a", "O", "gpu_+*"); + } + + @Test + public void testAdd() { + runMatrixMatrixElementwiseTest("O = X + Y", "X", "Y", "O", "gpu_+"); + } + + @Test + public void testMatrixColumnVectorAdd() { + runMatrixColumnVectorTest("O = X + Y", "X", "Y", "O", "gpu_+"); + } + + @Test + public void testMatrixRowVectorAdd() { + runMatrixRowVectorTest("O = X + Y", "X", "Y", "O", "gpu_+"); + } + + @Test + public void testSubtract() { + runMatrixMatrixElementwiseTest("O = X - Y", "X", "Y", "O", "gpu_-"); + } + + @Test + public void testMatrixColumnVectorSubtract() { + runMatrixColumnVectorTest("O = X - Y", "X", "Y", "O", "gpu_-"); + } + + @Test + public void testMatrixRowVectorSubtract() { + runMatrixRowVectorTest("O = X - Y", "X", "Y", "O", "gpu_-"); + } + + @Test + public void testMultiply() { + runMatrixMatrixElementwiseTest("O = X * Y", "X", "Y", "O", "gpu_*"); + } + + @Test + public void testMatrixColumnVectorMultiply() { + runMatrixColumnVectorTest("O = X * Y", "X", "Y", "O", "gpu_*"); + } + + @Test + public void testMatrixRowVectorMultiply() { + runMatrixRowVectorTest("O = X * Y", "X", "Y", "O", "gpu_*"); + } + + @Test + public void testDivide() { + runMatrixMatrixElementwiseTest("O = X / Y", "X", "Y", "O", "gpu_/"); + } + + @Test + public void testMatrixColumnVectorDivide() { + runMatrixColumnVectorTest("O = X / Y", "X", "Y", "O", "gpu_/"); + } + + @Test + public void testMatrixRowVectorDivide() { + runMatrixRowVectorTest("O = X / Y", "X", "Y", "O", "gpu_/"); + } + + // + // IGNORED TEST ** + // FIXME : There is a bug in CPU "^" when a A ^ B is executed where A & B are all zeroes + @Ignore + @Test + public void testPower() { + runMatrixMatrixElementwiseTest("O = X ^ Y", "X",
[2/2] incubator-systemml git commit: [SYSTEMML-1625] GPU Unit Tests (and GPU row/col variance bug fix)
[SYSTEMML-1625] GPU Unit Tests (and GPU row/col variance bug fix) - Documented random matrix generation - GPU unit test using MLContext. Compares CPU output to GPU - Pseudo-unit tests for GPU implementations of unary ops, unary aggregate ops, transpose, elementwise ops, matrix multiplication ops, builtin ops & NN ops - Fixed crucial bug in col/row var - gpuTests profile for GPU tests (mvn verify -PgpuTests) - Updated intellij style for import order Closes #513 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/772fb588 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/772fb588 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/772fb588 Branch: refs/heads/master Commit: 772fb588324916e4225bb6e1970ca6a8f87eb414 Parents: ceeec4b Author: Nakul Jindal Authored: Wed May 31 21:54:13 2017 -0700 Committer: Nakul Jindal Committed: Wed May 31 21:54:13 2017 -0700 -- dev/code-style/systemml-style-intellij.xml | 18 + pom.xml | 10 + .../apache/sysml/api/ScriptExecutorUtils.java | 1 + .../context/ExecutionContext.java | 3 + .../instructions/GPUInstructionParser.java | 120 +++-- .../instructions/gpu/context/GPUContext.java| 118 +++-- .../instructions/gpu/context/GPUObject.java | 55 +- .../instructions/gpu/context/JCudaKernels.java | 3 +- .../runtime/matrix/data/LibMatrixCUDA.java | 112 ++-- .../runtime/matrix/data/LibMatrixDatagen.java | 78 +-- .../matrix/data/RandomMatrixGenerator.java | 123 - .../sysml/test/gpu/AggregateUnaryOpTests.java | 133 + .../apache/sysml/test/gpu/BinaryOpTests.java| 85 .../org/apache/sysml/test/gpu/GPUTests.java | 250 + .../gpu/MatrixMatrixElementWiseOpTests.java | 271 ++ .../test/gpu/MatrixMultiplicationOpTest.java| 190 +++ .../sysml/test/gpu/NeuralNetworkOpTests.java| 508 +++ .../org/apache/sysml/test/gpu/ReorgOpTests.java | 70 +++ .../gpu/ScalarMatrixElementwiseOpTests.java | 131 + .../org/apache/sysml/test/gpu/UnaryOpTests.java | 113 + .../apache/sysml/test/gpu/UnaryOpTestsBase.java | 106 .../test/integration/gpu/ZPackageSuite.java | 46 ++ 22 files changed, 2308 insertions(+), 236 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/dev/code-style/systemml-style-intellij.xml -- diff --git a/dev/code-style/systemml-style-intellij.xml b/dev/code-style/systemml-style-intellij.xml index 248c600..1ad3209 100644 --- a/dev/code-style/systemml-style-intellij.xml +++ b/dev/code-style/systemml-style-intellij.xml @@ -16,7 +16,25 @@ * specific language governing permissions and limitations * under the License. --> + + + + + + + + + + + + + + + + + + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/pom.xml -- diff --git a/pom.xml b/pom.xml index 5ce5576..99e2dec 100644 --- a/pom.xml +++ b/pom.xml @@ -436,6 +436,7 @@ **/slowtest/** **/integration/** **/test/unit/** + **/test/gpu/** @@ -478,6 +479,7 @@ -Djava.awt.headless=true + ${gpuTestsPath} **/integration/applications/**/*Suite.java **/integration/conversion/*Suite.java **/integration/functions/data/*Suite.java @@ -896,6 +898,14 @@ + + + gpuTests + + **/integration/gpu/**/*Suite.java + + + ignore-doclint http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java -- diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java index f582c36..674a011 100644 --- a/sr
incubator-systemml git commit: [Doc] Change PCA scale value in documentation
Repository: incubator-systemml Updated Branches: refs/heads/master 7bba47409 -> 6b377319e [Doc] Change PCA scale value in documentation Closes #530 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/6b377319 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/6b377319 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/6b377319 Branch: refs/heads/master Commit: 6b377319e205734c8f26ba28bd18d9e720151d7e Parents: 7bba474 Author: krishnakalyan3 Authored: Tue Jun 6 22:18:51 2017 -0700 Committer: Nakul Jindal Committed: Tue Jun 6 22:18:51 2017 -0700 -- docs/algorithms-matrix-factorization.md | 8 1 file changed, 4 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6b377319/docs/algorithms-matrix-factorization.md -- diff --git a/docs/algorithms-matrix-factorization.md b/docs/algorithms-matrix-factorization.md index 9af8c19..8777130 100644 --- a/docs/algorithms-matrix-factorization.md +++ b/docs/algorithms-matrix-factorization.md @@ -113,7 +113,7 @@ SystemML Language Reference for details. -nvargs INPUT=/user/ml/input.mtx K=10 CENTER=1 -SCALE=1O +SCALE=1 FMT=csv PROJDATA=1 OUTPUT=/user/ml/pca_output/ @@ -129,7 +129,7 @@ SystemML Language Reference for details. -nvargs INPUT=/user/ml/input.mtx K=10 CENTER=1 - SCALE=1O + SCALE=1 FMT=csv PROJDATA=1 OUTPUT=/user/ml/pca_output/ @@ -142,7 +142,7 @@ SystemML Language Reference for details. -nvargs INPUT=/user/ml/test_input.mtx K=10 CENTER=1 -SCALE=1O +SCALE=1 FMT=csv PROJDATA=1 MODEL=/user/ml/pca_output/ @@ -159,7 +159,7 @@ SystemML Language Reference for details. -nvargs INPUT=/user/ml/test_input.mtx K=10 CENTER=1 - SCALE=1O + SCALE=1 FMT=csv PROJDATA=1 MODEL=/user/ml/pca_output/
[1/3] systemml git commit: [FIX] Fixed nested parfor for GPUs
Repository: systemml Updated Branches: refs/heads/master 0bcae49ff -> f58717564 http://git-wip-us.apache.org/repos/asf/systemml/blob/f5871756/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java index 0ed34c5..366eee5 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java @@ -66,73 +66,91 @@ public class GPUObject { private static final Log LOG = LogFactory.getLog(GPUObject.class.getName()); - /** GPUContext that owns this GPUObject */ + /** +* GPUContext that owns this GPUObject +*/ private final GPUContext gpuContext; - /** Pointer to the underlying dense matrix block on GPU */ + /** +* Pointer to the underlying dense matrix block on GPU +*/ private Pointer jcudaDenseMatrixPtr = null; -/** Pointer to the underlying sparse matrix block on GPU */ + /** +* Pointer to the underlying sparse matrix block on GPU +*/ private CSRPointer jcudaSparseMatrixPtr = null; - /** An optional tensor descriptor (and shape) that can be set by a tensor instruction such as convolution, + /** +* An optional tensor descriptor (and shape) that can be set by a tensor instruction such as convolution, * maxpooling and exploited by a subsequent non-tensor instruction such as relu */ private cudnnTensorDescriptor tensorDescriptor = null; - /** the shape of this tensor, if in fact this is a tensor */ - private int [] tensorShape = null; + /** +* the shape of this tensor, if in fact this is a tensor +*/ + private int[] tensorShape = null; - /** whether the block attached to this {@link GPUContext} is dirty on the device and needs to be copied back to host */ + /** +* whether the block attached to this {@link GPUContext} is dirty on the device and needs to be copied back to host +*/ protected boolean dirty = false; - /** number of read/write locks on this object (this GPUObject is being used in a current instruction) */ + /** +* number of read/write locks on this object (this GPUObject is being used in a current instruction) +*/ protected AtomicInteger locks = new AtomicInteger(0); - /** Timestamp, needed by {@link GPUContext#evict(long)} */ + /** +* Timestamp, needed by {@link GPUContext#evict(long)} +*/ AtomicLong timestamp = new AtomicLong(0); - /** Whether this block is in sparse format */ + /** +* Whether this block is in sparse format +*/ protected boolean isSparse = false; - /** Enclosing {@link MatrixObject} instance */ + /** +* Enclosing {@link MatrixObject} instance +*/ protected MatrixObject mat = null; -// private Pointer allocate(String instName, long size) throws DMLRuntimeException { -// return getGPUContext().allocate(instName, size); -// } + // private Pointer allocate(String instName, long size) throws DMLRuntimeException { + // return getGPUContext().allocate(instName, size); + // } @Override public Object clone() { GPUObject me = this; GPUObject that = new GPUObject(me.gpuContext, me.mat); if (me.tensorShape != null) { -that.tensorShape = new int[me.tensorShape.length]; -System.arraycopy(me.tensorShape, 0, that.tensorShape, 0, me.tensorShape.length); -that.allocateTensorDescriptor(me.tensorShape[0], me.tensorShape[1], me.tensorShape[2], me.tensorShape[3]); -} + that.tensorShape = new int[me.tensorShape.length]; + System.arraycopy(me.tensorShape, 0, that.tensorShape, 0, me.tensorShape.length); + that.allocateTensorDescriptor(me.tensorShape[0], me.tensorShape[1], me.tensorShape[2], me.tensorShape[3]); + } that.dirty = me.dirty; that.locks = new AtomicInteger(me.locks.get()); that.timestamp = new AtomicLong(me.timestamp.get()); that.isSparse = me.isSparse; try { - if (me.jcudaDenseMatrixPtr != null) { - long rows = me.mat.getNumRows(); - long cols = me.mat.getNumColumns(); - long size = rows * cols * Sizeof.DOUBLE; - me.gpuContext.ensureFreeSpace((int)size); - that.jcud
[2/3] systemml git commit: [FIX] Fixed nested parfor for GPUs
http://git-wip-us.apache.org/repos/asf/systemml/blob/f5871756/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java index 8da67ea..b3c19ef 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java @@ -18,14 +18,24 @@ */ package org.apache.sysml.runtime.instructions.gpu.context; -import jcuda.Pointer; -import jcuda.jcublas.cublasHandle; -import jcuda.jcudnn.cudnnHandle; -import jcuda.jcusolver.cusolverDnHandle; -import jcuda.jcusolver.cusolverSpHandle; -import jcuda.jcusparse.cusparseHandle; -import jcuda.runtime.JCuda; -import jcuda.runtime.cudaDeviceProp; +import static jcuda.jcublas.JCublas2.cublasCreate; +import static jcuda.jcublas.JCublas2.cublasDestroy; +import static jcuda.jcudnn.JCudnn.cudnnCreate; +import static jcuda.jcudnn.JCudnn.cudnnDestroy; +import static jcuda.jcusolver.JCusolverDn.cusolverDnCreate; +import static jcuda.jcusolver.JCusolverDn.cusolverDnDestroy; +import static jcuda.jcusolver.JCusolverSp.cusolverSpCreate; +import static jcuda.jcusolver.JCusolverSp.cusolverSpDestroy; +import static jcuda.jcusparse.JCusparse.cusparseCreate; +import static jcuda.jcusparse.JCusparse.cusparseDestroy; +import static jcuda.runtime.JCuda.cudaDeviceScheduleBlockingSync; +import static jcuda.runtime.JCuda.cudaFree; +import static jcuda.runtime.JCuda.cudaGetDeviceCount; +import static jcuda.runtime.JCuda.cudaMalloc; +import static jcuda.runtime.JCuda.cudaMemGetInfo; +import static jcuda.runtime.JCuda.cudaMemset; +import static jcuda.runtime.JCuda.cudaSetDevice; +import static jcuda.runtime.JCuda.cudaSetDeviceFlags; import java.util.ArrayList; import java.util.Collections; @@ -45,24 +55,14 @@ import org.apache.sysml.runtime.instructions.gpu.GPUInstruction; import org.apache.sysml.utils.GPUStatistics; import org.apache.sysml.utils.LRUCacheMap; -import static jcuda.jcublas.JCublas2.cublasCreate; -import static jcuda.jcublas.JCublas2.cublasDestroy; -import static jcuda.jcudnn.JCudnn.cudnnCreate; -import static jcuda.jcudnn.JCudnn.cudnnDestroy; -import static jcuda.jcusolver.JCusolverDn.cusolverDnCreate; -import static jcuda.jcusolver.JCusolverDn.cusolverDnDestroy; -import static jcuda.jcusolver.JCusolverSp.cusolverSpCreate; -import static jcuda.jcusolver.JCusolverSp.cusolverSpDestroy; -import static jcuda.jcusparse.JCusparse.cusparseCreate; -import static jcuda.jcusparse.JCusparse.cusparseDestroy; -import static jcuda.runtime.JCuda.cudaDeviceScheduleBlockingSync; -import static jcuda.runtime.JCuda.cudaFree; -import static jcuda.runtime.JCuda.cudaGetDeviceCount; -import static jcuda.runtime.JCuda.cudaMalloc; -import static jcuda.runtime.JCuda.cudaMemGetInfo; -import static jcuda.runtime.JCuda.cudaMemset; -import static jcuda.runtime.JCuda.cudaSetDevice; -import static jcuda.runtime.JCuda.cudaSetDeviceFlags; +import jcuda.Pointer; +import jcuda.jcublas.cublasHandle; +import jcuda.jcudnn.cudnnHandle; +import jcuda.jcusolver.cusolverDnHandle; +import jcuda.jcusolver.cusolverSpHandle; +import jcuda.jcusparse.cusparseHandle; +import jcuda.runtime.JCuda; +import jcuda.runtime.cudaDeviceProp; /** * Represents a context per GPU accessible through the same JVM @@ -71,606 +71,643 @@ import static jcuda.runtime.JCuda.cudaSetDeviceFlags; public class GPUContext { protected static final Log LOG = LogFactory.getLog(GPUContext.class.getName()); + /** +* currently employed eviction policy +*/ + public final EvictionPolicy evictionPolicy = EvictionPolicy.LRU; + /** +* The minimum CUDA Compute capability needed for SystemML. +* After compute capability 3.0, 2^31 - 1 blocks and 1024 threads per block are supported. +* If SystemML needs to run on an older card, this logic can be revisited. +*/ + final int MAJOR_REQUIRED = 3; + final int MINOR_REQUIRED = 0; + /** +* active device assigned to this GPUContext instance +*/ + private final int deviceNum; + // Invoke cudaMemGetInfo to get available memory information. Useful if GPU is shared among multiple application. + public double GPU_MEMORY_UTILIZATION_FACTOR = ConfigurationManager.getDMLConfig() + .getDoubleValue(DMLConfig.GPU_MEMORY_UTILIZATION_FACTOR); + /** +* Map of free blocks allocate on GPU. maps size_of_block -> pointer on GPU +*/ + private LRUCacheMap> freeCUDASpaceMap = new LRUCacheMap<>(); + /** +* To record size of allocated blocks +*/ + private HashMap cudaBlockSizeMap = new HashMap<>(); + /** +* list of allocated {@link GPUObject}
[3/3] systemml git commit: [FIX] Fixed nested parfor for GPUs
[FIX] Fixed nested parfor for GPUs Additionally - Fixed intellij codestyle accordingly - Fixed formatting of some GPU related source files Closes #532 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/f5871756 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/f5871756 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/f5871756 Branch: refs/heads/master Commit: f587175647a84a3825b174b4d29c0398be17331f Parents: 0bcae49 Author: Nakul Jindal Authored: Sat Jun 10 12:06:47 2017 -0700 Committer: Nakul Jindal Committed: Sat Jun 10 12:06:47 2017 -0700 -- dev/code-style/systemml-style-intellij.xml | 37 +- .../apache/sysml/api/ScriptExecutorUtils.java | 17 +- .../controlprogram/ParForProgramBlock.java | 19 +- .../context/ExecutionContext.java | 64 +- .../controlprogram/parfor/LocalParWorker.java |2 +- .../cp/FunctionCallCPInstruction.java | 12 +- .../gpu/AggregateBinaryGPUInstruction.java |4 +- .../gpu/AggregateUnaryGPUInstruction.java |2 +- .../gpu/ConvolutionGPUInstruction.java | 18 +- .../instructions/gpu/MMTSJGPUInstruction.java |2 +- .../gpu/MatrixBuiltinGPUInstruction.java| 30 +- .../MatrixMatrixArithmeticGPUInstruction.java |2 +- .../gpu/MatrixMatrixAxpyGPUInstruction.java |2 +- .../gpu/MatrixMatrixBuiltinGPUInstruction.java |2 +- .../instructions/gpu/ReorgGPUInstruction.java |2 +- .../ScalarMatrixArithmeticGPUInstruction.java |2 +- .../instructions/gpu/context/CSRPointer.java| 922 ++--- .../gpu/context/ExecutionConfig.java| 85 +- .../instructions/gpu/context/GPUContext.java| 1257 +- .../gpu/context/GPUContextPool.java | 266 ++-- .../instructions/gpu/context/GPUObject.java | 454 --- .../instructions/gpu/context/JCudaKernels.java | 141 +- .../runtime/matrix/data/LibMatrixCUDA.java | 42 +- .../runtime/matrix/data/LibMatrixDNNHelper.java |1 + .../org/apache/sysml/test/gpu/GPUTests.java | 47 +- .../sysml/test/gpu/NeuralNetworkOpTests.java| 106 +- 26 files changed, 1917 insertions(+), 1621 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/f5871756/dev/code-style/systemml-style-intellij.xml -- diff --git a/dev/code-style/systemml-style-intellij.xml b/dev/code-style/systemml-style-intellij.xml index 1ad3209..b4a53b4 100644 --- a/dev/code-style/systemml-style-intellij.xml +++ b/dev/code-style/systemml-style-intellij.xml @@ -1,28 +1,27 @@ - - + @@ -32,7 +31,7 @@ - + http://git-wip-us.apache.org/repos/asf/systemml/blob/f5871756/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java -- diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java index 674a011..2895aa4 100644 --- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java +++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java @@ -19,6 +19,8 @@ package org.apache.sysml.api; +import java.util.List; + import org.apache.sysml.api.mlcontext.ScriptExecutor; import org.apache.sysml.conf.DMLConfig; import org.apache.sysml.hops.codegen.SpoofCompiler; @@ -79,23 +81,22 @@ public class ScriptExecutorUtils { // GPUs GPUContextPool.PER_PROCESS_MAX_GPUS = dmlconf.getIntValue(DMLConfig.MAX_GPUS_PER_PROCESS); Statistics.startRunTimer(); - GPUContext gCtx = null; try { // run execute (w/ exception handling to ensure proper shutdown) if (DMLScript.USE_ACCELERATOR && ec != null) { - gCtx = GPUContextPool.getFromPool(); - if (gCtx == null) { + List gCtxs = GPUContextPool.reserveAllGPUContexts(); + if (gCtxs == null) { throw new DMLRuntimeException( "GPU : Could not create GPUContext, either no GPU or all GPUs currently in use"); } - gCtx.initializeThread(); - ec.setGPUContext(gCtx); + gCtxs.get(0).initializeThread(); + ec.setGPUContexts(gCtxs); } rtprog.execute(ec); } finally { // ensure
systemml git commit: [SYSTEMML-1532] python launch script for spark-submit
Repository: systemml Updated Branches: refs/heads/master 8544f6960 -> 3cde999c0 [SYSTEMML-1532] python launch script for spark-submit Closes #501 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3cde999c Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3cde999c Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3cde999c Branch: refs/heads/master Commit: 3cde999c09941d7fc9b4b03f733071b85ec8a343 Parents: 8544f69 Author: krishnakalyan3 Authored: Mon Jun 19 11:39:00 2017 -0700 Committer: Nakul Jindal Committed: Mon Jun 19 11:39:00 2017 -0700 -- bin/systemml-spark-submit.py | 187 ++ 1 file changed, 187 insertions(+) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/3cde999c/bin/systemml-spark-submit.py -- diff --git a/bin/systemml-spark-submit.py b/bin/systemml-spark-submit.py new file mode 100755 index 000..30974ec --- /dev/null +++ b/bin/systemml-spark-submit.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# - +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# - + +import os +import sys +from os.path import join, exists, abspath +from os import environ +import glob +import argparse +import shutil +import platform + +if environ.get('SPARK_HOME') is None: +print('SPARK_HOME not set') +sys.exit(1) +else: +spark_home = environ.get('SPARK_HOME') +spark_path = join(spark_home, 'bin', 'spark-submit') + + +# error help print +def print_usage_and_exit(): +print('Usage: ./systemml-spark-submit.py -f [arguments]') +sys.exit(1) + +cparser = argparse.ArgumentParser(description='System-ML Spark Submit Script') + +# SPARK-SUBMIT Options +cparser.add_argument('--master', default='local[*]', help='local, yarn-client, yarn-cluster', metavar='') +cparser.add_argument('--driver-memory', default='5G', help='Memory for driver (e.g. 512M)', metavar='') +cparser.add_argument('--num-executors', default='2', help='Number of executors to launch', metavar='') +cparser.add_argument('--executor-memory', default='2G', help='Memory per executor', metavar='') +cparser.add_argument('--executor-cores', default='1', help='Number of cores', metavar='') +cparser.add_argument('--conf', help='Spark configuration file', nargs='+', metavar='') + +# SYSTEM-ML Options +cparser.add_argument('-nvargs', help='List of attributeName-attributeValue pairs', nargs='+', metavar='') +cparser.add_argument('-args', help='List of positional argument values', metavar='', nargs='+') +cparser.add_argument('-config', help='System-ML configuration file (e.g SystemML-config.xml)', metavar='') +cparser.add_argument('-exec', default='hybrid_spark', help='System-ML backend (e.g spark, spark-hybrid)', metavar='') +cparser.add_argument('-explain', help='explains plan levels can be hops, runtime, ' + 'recompile_hops, recompile_runtime', nargs='?', const='runtime', metavar='') +cparser.add_argument('-debug', help='runs in debug mode', action='store_true') +cparser.add_argument('-stats', help='Monitor and report caching/recompilation statistics, ' +'heavy hitter is 10 unless overridden', nargs='?', const='10', metavar='') +cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, ' + 'set option to skip conservative memory estimates ' + 'and use GPU wherever possible', nargs='?') +cparser.add_argument('-f', required=True, help='specifies dml/pydml file to execute; ' + 'path can be local/hdfs/gpfs', metavar='') + +args = cparser.parse_args() + +# Optional arguments +ml_options = [] +if args.nvargs is not None: +ml_options.append('-nvargs') +ml_options.append(' '.join(args.nvargs)) +if args.args is not None: +ml_opti
systemml git commit: [SYSTEMML-1701] fix need to use -force for gpu
Repository: systemml Updated Branches: refs/heads/master 3cde999c0 -> df8d4a63d [SYSTEMML-1701] fix need to use -force for gpu Closes #546 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/df8d4a63 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/df8d4a63 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/df8d4a63 Branch: refs/heads/master Commit: df8d4a63d8d09cae94b6ca2634e31da554302c72 Parents: 3cde999 Author: Nakul Jindal Authored: Mon Jun 19 11:44:22 2017 -0700 Committer: Nakul Jindal Committed: Mon Jun 19 11:47:27 2017 -0700 -- .../java/org/apache/sysml/hops/AggBinaryOp.java | 9 +++-- .../java/org/apache/sysml/hops/AggUnaryOp.java | 18 +- .../java/org/apache/sysml/hops/BinaryOp.java| 7 ++-- src/main/java/org/apache/sysml/hops/Hop.java| 4 ++- .../org/apache/sysml/hops/OptimizerUtils.java | 5 +-- .../java/org/apache/sysml/hops/ReorgOp.java | 4 ++- .../java/org/apache/sysml/hops/TernaryOp.java | 4 ++- .../gpu/context/GPUContextPool.java | 35 +++- .../runtime/matrix/data/LibMatrixCUDA.java | 2 +- 9 files changed, 66 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/df8d4a63/src/main/java/org/apache/sysml/hops/AggBinaryOp.java -- diff --git a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java index 21dbbf1..c721efe 100644 --- a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java +++ b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java @@ -49,6 +49,7 @@ import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat; import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext; +import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput; @@ -150,7 +151,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop */ @Override public Lop constructLops() - throws HopsException, LopsException + throws HopsException, LopsException { //return already created lops if( getLops() != null ) @@ -546,7 +547,8 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); ExecType et = ExecType.CP; - if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || getMemEstimate() < OptimizerUtils.GPU_MEMORY_BUDGET)) { + if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || getMemEstimate() < GPUContextPool + .initialGPUMemBudget())) { et = ExecType.GPU; } @@ -625,7 +627,8 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop { Lop matmultCP = null; - if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || getMemEstimate() < OptimizerUtils.GPU_MEMORY_BUDGET)) { + if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || getMemEstimate() < GPUContextPool + .initialGPUMemBudget())) { Hop h1 = getInput().get(0); Hop h2 = getInput().get(1); Lop left; Lop right; http://git-wip-us.apache.org/repos/asf/systemml/blob/df8d4a63/src/main/java/org/apache/sysml/hops/AggUnaryOp.java -- diff --git a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java index 8e681c1..eb469ab 100644 --- a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java +++ b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java @@ -39,6 +39,7 @@ import org.apache.sysml.lops.LopProperties.ExecType; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext; +import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; @@ -149,15 +150,16 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop }
systemml git commit: [SYSTEMML-703] Prepare for GPU on Jenkins, disable expensive NN tests
Repository: systemml Updated Branches: refs/heads/master 345682404 -> 57e11e99c [SYSTEMML-703] Prepare for GPU on Jenkins, disable expensive NN tests Closes #550 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/57e11e99 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/57e11e99 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/57e11e99 Branch: refs/heads/master Commit: 57e11e99c3f110b68ad5e3397f10b30533ab9b79 Parents: 3456824 Author: Nakul Jindal Authored: Thu Jun 22 17:04:49 2017 -0700 Committer: Nakul Jindal Committed: Thu Jun 22 17:04:49 2017 -0700 -- docs/release-process.md | 12 + .../sysml/test/gpu/NeuralNetworkOpTests.java| 28 +++- 2 files changed, 33 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/57e11e99/docs/release-process.md -- diff --git a/docs/release-process.md b/docs/release-process.md index f41c7c8..36528bd 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -259,6 +259,18 @@ For examples, see the [Spark MLContext Programming Guide](http://apache.github.i Verify that the performance suite located at scripts/perftest/ executes on Spark and Hadoop. Testing should include 80MB, 800MB, 8GB, and 80GB data sizes. +# Run NN Unit Tests for GPU + +Up to Checklist + +The unit tests for NN operators for GPU take a long time to run and are therefor not run as part of the Jenkins build. +They must be run before a release. To run them, edit the +[NeuralNetworkOpTests.java|https://github.com/apache/systemml/blob/master/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java] +file and remove all the `@Ignore` annotations from all the tests. Then run the NN unit tests using mvn verify: +``` +mvn -Dit.test=org.apache.sysml.test.gpu.NeuralNetworkOpTests verify -PgpuTests +``` + # Voting http://git-wip-us.apache.org/repos/asf/systemml/blob/57e11e99/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java -- diff --git a/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java b/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java index f4e931b..c53e803 100644 --- a/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java +++ b/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java @@ -35,6 +35,15 @@ import org.junit.Test; /** * Test neural network operations on the GPU + * Because of the large number of cases that each test deals with, this class takes + * very long to run. (It took about 9 hours to run the testMaxPoolBackward() to completion. + * The recommended course of action before a release is + * 1. Remove the @Ignore annotations + * 2. Run just these test on a machine with CUDA 8 installed. + * Only this class can be run like so: + * + * mvn -Dit.test=org.apache.sysml.test.gpu.NeuralNetworkOpTests verify -PgpuTests + * */ public class NeuralNetworkOpTests extends GPUTests { @@ -100,6 +109,7 @@ public class NeuralNetworkOpTests extends GPUTests { return 1e-5; } + @Ignore @Test public void testConv2d() { String scriptStr = "O = conv2d(image, filter, padding=[padH, padW], stride=[strideH, strideW], input_shape=[N,C,H,W], filter_shape=[K,C,R,S])"; @@ -253,6 +263,7 @@ public class NeuralNetworkOpTests extends GPUTests { clearGPUMemory(); } + @Ignore @Test public void testConv2dBackwardFilter() { String scriptStr = "O = conv2d_backward_filter(image, dout, padding=[padH, padW], stride=[strideH, strideW], input_shape=[N,C,H,W], filter_shape=[K,C,R,S])"; @@ -298,9 +309,9 @@ public class NeuralNetworkOpTests extends GPUTests { filterSizeInMB, N, K, P, Q, doutSizeInMB, strideH, strideW, padH, padW); Matrix image = generateInputMatrix(spark, (int) N, - (int) (C * H * W), 0.-127.0, 127, sparsity, seed); + (int) (C * H * W), -127.0, 127, sparsity, seed);
systemml git commit: [SYSTEMML-703] Prepare for GPU on Jenkins, disable expensive NN tests
Repository: systemml Updated Branches: refs/heads/gh-pages 7c4907095 -> 05792e0e9 [SYSTEMML-703] Prepare for GPU on Jenkins, disable expensive NN tests Closes #550 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/05792e0e Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/05792e0e Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/05792e0e Branch: refs/heads/gh-pages Commit: 05792e0e947d5d1b9f4c0adbcfec7d0ec4d45bdf Parents: 7c49070 Author: Nakul Jindal Authored: Thu Jun 22 17:04:49 2017 -0700 Committer: Nakul Jindal Committed: Thu Jun 22 17:04:49 2017 -0700 -- release-process.md | 12 1 file changed, 12 insertions(+) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/05792e0e/release-process.md -- diff --git a/release-process.md b/release-process.md index f41c7c8..36528bd 100644 --- a/release-process.md +++ b/release-process.md @@ -259,6 +259,18 @@ For examples, see the [Spark MLContext Programming Guide](http://apache.github.i Verify that the performance suite located at scripts/perftest/ executes on Spark and Hadoop. Testing should include 80MB, 800MB, 8GB, and 80GB data sizes. +# Run NN Unit Tests for GPU + +Up to Checklist + +The unit tests for NN operators for GPU take a long time to run and are therefor not run as part of the Jenkins build. +They must be run before a release. To run them, edit the +[NeuralNetworkOpTests.java|https://github.com/apache/systemml/blob/master/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java] +file and remove all the `@Ignore` annotations from all the tests. Then run the NN unit tests using mvn verify: +``` +mvn -Dit.test=org.apache.sysml.test.gpu.NeuralNetworkOpTests verify -PgpuTests +``` + # Voting
systemml git commit: [SYSTEMML-1731] Added GPU instruction 1-*, -nz, %%, %/%
Repository: systemml Updated Branches: refs/heads/master 50dafa038 -> 2dc441f52 [SYSTEMML-1731] Added GPU instruction 1-*, -nz, %%, %/% Closes #554 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/2dc441f5 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/2dc441f5 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/2dc441f5 Branch: refs/heads/master Commit: 2dc441f52e4966d4c160588be6c850d778475a5f Parents: 50dafa0 Author: Nakul Jindal Authored: Tue Jun 27 16:02:38 2017 -0700 Committer: Nakul Jindal Committed: Tue Jun 27 16:02:38 2017 -0700 -- src/main/cpp/kernels/Makefile |6 +- src/main/cpp/kernels/SystemML.cu| 29 +- src/main/cpp/kernels/SystemML.ptx | 1597 +++--- .../java/org/apache/sysml/hops/BinaryOp.java|6 +- .../instructions/GPUInstructionParser.java |4 +- .../runtime/matrix/data/LibMatrixCUDA.java | 15 +- .../gpu/MatrixMatrixElementWiseOpTests.java | 55 +- .../gpu/ScalarMatrixElementwiseOpTests.java | 79 +- 8 files changed, 1175 insertions(+), 616 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/2dc441f5/src/main/cpp/kernels/Makefile -- diff --git a/src/main/cpp/kernels/Makefile b/src/main/cpp/kernels/Makefile index 0b003f3..5feae69 100644 --- a/src/main/cpp/kernels/Makefile +++ b/src/main/cpp/kernels/Makefile @@ -16,7 +16,11 @@ # under the License. NVCC=nvcc -CUDAFLAGS= -ptx -c -arch=sm_30 +CUDAFLAGS= -ptx -c -arch=sm_30 + +# Use these flags for precise math +#CUDAFLAGS= -ptx -c -arch=sm_30 -ftz=false -prec-div=true -prec-sqrt=true + SystemML.o: SystemML.cu $(NVCC) $(CUDAFLAGS) SystemML.cu http://git-wip-us.apache.org/repos/asf/systemml/blob/2dc441f5/src/main/cpp/kernels/SystemML.cu -- diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu index 5b4574e..3098282 100644 --- a/src/main/cpp/kernels/SystemML.cu +++ b/src/main/cpp/kernels/SystemML.cu @@ -24,6 +24,7 @@ nvcc -ptx -arch=sm_30 SystemML.cu ***/ #include +#include /** @@ -54,7 +55,8 @@ __forceinline__ __device__ double getBoolean(int val) { // op = {0=plus, 1=minus, 2=multiply, 3=divide, 4=power, // 5=less, 6=lessequal, 7=greater, 8=greaterequal, 9=equal, 10=notequal, -// 11=min, 12=max, 13=and, 14=or, 15=log} +// 11=min, 12=max, 13=and, 14=or, 15=minus1multiply, 16=minusnz, +// 17=modulus, 18=integer division} extern "C" __forceinline__ __device__ double binaryOp(double x, double y, int op) { switch(op) { @@ -71,6 +73,31 @@ __forceinline__ __device__ double binaryOp(double x, double y, int op) { case 10 : return getBoolean(x != y); case 11 : return min(x, y); case 12 : return max(x, y); +case 13 : return getBoolean((int)llrint(x) & (int)llrint(y)); +case 14 : return getBoolean((int)llrint(x) | (int)llrint(y)); +case 15 : return 1 - x * y; +case 16 : return (x != 0.0 ? x - y : 0.0); +case 17 : { +if (y == 0.0 || y == -0.0){ +return nan(""); +} +double v = x / y; +// Check for v being NaN (v != v) or if it is infinity +if (isnan(v) || isinf(v)){ +return v; +} else { +v = floor(v); +} +return x - v * y; +} +case 18:{ +double v = x / y; +if (isnan(v) || isinf(v)){ +return v; +} else { +return floor(v); +} +} default : return DBL_MAX; } } http://git-wip-us.apache.org/repos/asf/systemml/blob/2dc441f5/src/main/cpp/kernels/SystemML.ptx -- diff --git a/src/main/cpp/kernels/SystemML.ptx b/src/main/cpp/kernels/SystemML.ptx index 3229581..ab43758 100644 --- a/src/main/cpp/kernels/SystemML.ptx +++ b/src/main/cpp/kernels/SystemML.ptx @@ -450,10 +450,10 @@ BB6_6: .param .u32 matrix_matrix_cellwise_op_param_7 ) { - .reg .pred %p<52>; - .reg .b32 %r<56>; - .reg .f64 %fd<40>; - .reg .b64 %rd<15>; + .reg .pred %p<73>; + .reg .b32 %r<68>; + .reg .f64 %fd<56>; + .reg .b64 %rd<19>; ld.param.u64%rd2, [matrix_matrix_cellwise_op_param_0]; @@ -475,40 +475,40 @@ BB6_6: setp.lt.s32 %p2, %r1, %r14; setp.lt.s32 %p3, %r2, %r10; and.pred%p4, %p2, %p3; - @!%p4 bra BB7_55; + @!%p4 bra BB7_77; bra.
systemml git commit: [HOTFIX] for SYSTEMML-1731
Repository: systemml Updated Branches: refs/heads/master 2dc441f52 -> 9f808c43e [HOTFIX] for SYSTEMML-1731 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/9f808c43 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/9f808c43 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/9f808c43 Branch: refs/heads/master Commit: 9f808c43e380a90f814f2e5b7a78397edd1bbb90 Parents: 2dc441f Author: Nakul Jindal Authored: Tue Jun 27 17:14:41 2017 -0700 Committer: Nakul Jindal Committed: Tue Jun 27 17:14:41 2017 -0700 -- .../org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/9f808c43/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java -- diff --git a/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java b/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java index 58293d6..c58365a 100644 --- a/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java +++ b/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java @@ -136,7 +136,7 @@ public class ScalarMatrixElementwiseOpTests extends GPUTests { List cpuOut = runOnCPU(spark, scriptStr, inputs, Arrays.asList(output)); List gpuOut = runOnGPU(spark, scriptStr, inputs, Arrays.asList(output)); //assertHeavyHitterPresent(heavyHitterOpCode); - assertEqualMatrices ((Matrix)cpuOut.get(0), (Matrix)gpuOut.get(0)); + assertEqualObjects (cpuOut.get(0), gpuOut.get(0)); } @Test
systemml git commit: [SYSTEMML-1451][GSoC Phase 1] Single script to run perf tests
Repository: systemml Updated Branches: refs/heads/master 31952e47d -> e7cfcadc9 [SYSTEMML-1451][GSoC Phase 1] Single script to run perf tests - Single entry point to run perf tests in any combination of algoriths, families, matrix shapes & densities - Reports time taken by a single perf test by parsing the output and grep-ing for the time - Detects tests that did not run and reports in the generated log - Robust error handling and reporting, informative help message Closes #537 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e7cfcadc Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e7cfcadc Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e7cfcadc Branch: refs/heads/master Commit: e7cfcadc9b0e72637c67c8d6a6dcc62f62ba5177 Parents: 31952e4 Author: krishnakalyan3 Authored: Sun Jul 2 00:00:49 2017 -0700 Committer: Nakul Jindal Committed: Sun Jul 2 00:00:49 2017 -0700 -- scripts/perftest/python/datagen.py | 252 scripts/perftest/python/predict.py | 285 +++ scripts/perftest/python/run_perftest.py | 339 ++ scripts/perftest/python/train.py| 411 +++ scripts/perftest/python/utils.py| 296 +++ 5 files changed, 1583 insertions(+) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/e7cfcadc/scripts/perftest/python/datagen.py -- diff --git a/scripts/perftest/python/datagen.py b/scripts/perftest/python/datagen.py new file mode 100755 index 000..d9c49e9 --- /dev/null +++ b/scripts/perftest/python/datagen.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +#- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#- + +import itertools +from os.path import join +from utils import split_rowcol, config_writer + +# This file contains configuration settings for data generation +DATA_FORMAT = 'csv' + +MATRIX_TYPE_DICT = {'dense': '0.9', +'sparse': '0.01'} + +FAMILY_NO_MATRIX_TYPE = ['clustering', 'stats1', 'stats2'] + + +def multinomial_datagen(matrix_dim, matrix_type, datagen_dir): + +row, col = split_rowcol(matrix_dim) +path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)]) +full_path = join(datagen_dir, path_name) + +numSamples = row +numFeatures = col +sparsity = MATRIX_TYPE_DICT[matrix_type] +num_categories = '150' +intercept = '0' +X = join(full_path, 'X.data') +Y = join(full_path, 'Y.data') +fmt = DATA_FORMAT + +config = [numSamples, numFeatures, sparsity, num_categories, intercept, + X, Y, fmt, '1'] + +config_writer(full_path + '.json', config) + +return full_path + + +def binomial_datagen(matrix_dim, matrix_type, datagen_dir): + +row, col = split_rowcol(matrix_dim) +path_name = '.'.join(['binomial', matrix_type, str(matrix_dim)]) +full_path = join(datagen_dir, path_name) + +numSamples = row +numFeatures = col +maxFeatureValue = '5' +maxWeight = '5' +loc_weights = join(full_path, 'weight.data') +loc_data = join(full_path, 'X.data') +loc_labels = join(full_path, 'Y.data') +noise = '1' +intercept = '0' +sparsity = MATRIX_TYPE_DICT[matrix_type] +tranform_labels = '1' +fmt = DATA_FORMAT + +config = [numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights, loc_data, + loc_labels, noise, intercept, sparsity, fmt, tranform_labels] +config_writer(full_path + '.json', config) + +return full_path + + +def regression1_datagen(matrix_dim, matrix_type, datagen_dir): + +row, col = split_rowcol(matrix_dim) +path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)]) +full_path = join(datagen_dir, path_name) + +numSamples = row +numFeatures = col +maxFeatureValue = '5' +maxWeight = '5' +loc_weights = join(full_
systemml git commit: [SYSTEMML-1735] relational operators for GPU
Repository: systemml Updated Branches: refs/heads/master 978d4de47 -> a7364746a [SYSTEMML-1735] relational operators for GPU Closes #557 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a7364746 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a7364746 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a7364746 Branch: refs/heads/master Commit: a7364746a462069853421d59db1093ab145253c9 Parents: 978d4de Author: Nakul Jindal Authored: Wed Jul 5 11:33:41 2017 -0700 Committer: Nakul Jindal Committed: Wed Jul 5 11:33:41 2017 -0700 -- relational.dml | 6 + .../java/org/apache/sysml/hops/BinaryOp.java| 8 +- .../instructions/GPUInstructionParser.java | 13 +- .../instructions/gpu/GPUInstruction.java| 34 ++-- .../MatrixMatrixArithmeticGPUInstruction.java | 2 +- ...rixMatrixRelationalBinaryGPUInstruction.java | 69 .../gpu/RelationalBinaryGPUInstruction.java | 68 +++ ...larMatrixRelationalBinaryGPUInstruction.java | 61 +++ .../instructions/gpu/context/CSRPointer.java| 6 +- .../instructions/gpu/context/GPUObject.java | 2 +- .../runtime/matrix/data/LibMatrixCUDA.java | 177 ++- .../gpu/MatrixMatrixElementWiseOpTests.java | 32 +++- .../gpu/ScalarMatrixElementwiseOpTests.java | 64 ++- 13 files changed, 477 insertions(+), 65 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/a7364746/relational.dml -- diff --git a/relational.dml b/relational.dml new file mode 100644 index 000..3f492a1 --- /dev/null +++ b/relational.dml @@ -0,0 +1,6 @@ +A = rand(rows=10, cols=10) +B = rand(rows=10, cols=10) + +C = A >= B + +print(toString(C)) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/systemml/blob/a7364746/src/main/java/org/apache/sysml/hops/BinaryOp.java -- diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java b/src/main/java/org/apache/sysml/hops/BinaryOp.java index 83209ef..36f573c 100644 --- a/src/main/java/org/apache/sysml/hops/BinaryOp.java +++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java @@ -582,7 +582,9 @@ public class BinaryOp extends Hop if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || getMemEstimate() < GPUContextPool .initialGPUMemBudget()) && (op == OpOp2.MULT || op == OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW - || op == OpOp2.MINUS_NZ || op == OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV) ) { + || op == OpOp2.MINUS_NZ || op == OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV + || op == OpOp2.LESS || op == OpOp2.LESSEQUAL || op == OpOp2.EQUAL || op == OpOp2.NOTEQUAL + || op == OpOp2.GREATER || op == OpOp2.GREATEREQUAL)) { et = ExecType.GPU; } Unary unary1 = new Unary(getInput().get(0).constructLops(), @@ -602,7 +604,9 @@ public class BinaryOp extends Hop if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || getMemEstimate() < GPUContextPool .initialGPUMemBudget()) && (op == OpOp2.MULT || op == OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW - || op == OpOp2.SOLVE || op == OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV)) { + || op == OpOp2.SOLVE || op == OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV + || op == OpOp2.LESS || op == OpOp2.LESSEQUAL || op == OpOp2.EQUAL || op == OpOp2.NOTEQUAL + || op == OpOp2.GREATER || op == OpOp2.GREATEREQUAL)) { et = ExecType.GPU; } http://git-wip-us.apache.org/repos/asf/systemml/blob/a7364746/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java index 5fd6fa0..17b1578
systemml git commit: [HOTFIX] Removed extraneous file that got committed with SYSTEMML-1735
Repository: systemml Updated Branches: refs/heads/master a7364746a -> 82ca13d23 [HOTFIX] Removed extraneous file that got committed with SYSTEMML-1735 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/82ca13d2 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/82ca13d2 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/82ca13d2 Branch: refs/heads/master Commit: 82ca13d23f44152befcf15fd2eae09729b1f618a Parents: a736474 Author: Nakul Jindal Authored: Wed Jul 5 13:29:09 2017 -0700 Committer: Nakul Jindal Committed: Wed Jul 5 13:29:09 2017 -0700 -- relational.dml | 6 -- 1 file changed, 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/82ca13d2/relational.dml -- diff --git a/relational.dml b/relational.dml deleted file mode 100644 index 3f492a1..000 --- a/relational.dml +++ /dev/null @@ -1,6 +0,0 @@ -A = rand(rows=10, cols=10) -B = rand(rows=10, cols=10) - -C = A >= B - -print(toString(C)) \ No newline at end of file
systemml git commit: [MINOR] Available families and algorithms printed from perftest script
Repository: systemml Updated Branches: refs/heads/master 82ca13d23 -> 33cb26ded [MINOR] Available families and algorithms printed from perftest script Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/33cb26de Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/33cb26de Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/33cb26de Branch: refs/heads/master Commit: 33cb26ded9d28786159aba0d235db7ec25a442a5 Parents: 82ca13d Author: Nakul Jindal Authored: Wed Jul 5 13:59:35 2017 -0700 Committer: Nakul Jindal Committed: Wed Jul 5 14:12:15 2017 -0700 -- scripts/perftest/python/run_perftest.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/33cb26de/scripts/perftest/python/run_perftest.py -- diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py index 1421c2c..6b8b4bc 100755 --- a/scripts/perftest/python/run_perftest.py +++ b/scripts/perftest/python/run_perftest.py @@ -264,9 +264,10 @@ if __name__ == '__main__': # Argparse Module cparser = argparse.ArgumentParser(description='SystemML Performance Test Script') -cparser.add_argument('--family', help='specify class of algorithms (e.g regression, binomial)', +cparser.add_argument('--family', help='specify class of algorithms (available : ' + ', '.join(ML_ALGO.keys()) + ')', metavar='', choices=ML_ALGO.keys(), nargs='+') -cparser.add_argument('--algo', help='specify the type of algorithm to run (Overrides --family)', metavar='', +cparser.add_argument('--algo', help='specify the type of algorithm to run ' + '(Overrides --family, available : ' + ', '.join(all_algos) + ')', metavar='', choices=all_algos, nargs='+') cparser.add_argument('--exec-type', default='singlenode', help='System-ML backend '
systemml git commit: [MINOR] More updates to the perftest help message
Repository: systemml Updated Branches: refs/heads/master 33cb26ded -> 1e1d3727f [MINOR] More updates to the perftest help message Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/1e1d3727 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/1e1d3727 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/1e1d3727 Branch: refs/heads/master Commit: 1e1d3727f4d88c9ef053d56da7aec640e0b88424 Parents: 33cb26d Author: Nakul Jindal Authored: Wed Jul 5 14:50:38 2017 -0700 Committer: Nakul Jindal Committed: Wed Jul 5 14:50:38 2017 -0700 -- scripts/perftest/python/run_perftest.py | 32 1 file changed, 18 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/1e1d3727/scripts/perftest/python/run_perftest.py -- diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py index 6b8b4bc..dcc52c8 100755 --- a/scripts/perftest/python/run_perftest.py +++ b/scripts/perftest/python/run_perftest.py @@ -262,28 +262,32 @@ if __name__ == '__main__': # Remove duplicates algorithms and used as default inputs all_algos = set(reduce(lambda x, y: x + y, ML_ALGO.values())) +# Families +all_families = ML_ALGO.keys() + # Argparse Module cparser = argparse.ArgumentParser(description='SystemML Performance Test Script') -cparser.add_argument('--family', help='specify class of algorithms (available : ' + ', '.join(ML_ALGO.keys()) + ')', - metavar='', choices=ML_ALGO.keys(), nargs='+') -cparser.add_argument('--algo', help='specify the type of algorithm to run ' - '(Overrides --family, available : ' + ', '.join(all_algos) + ')', metavar='', +cparser.add_argument('--family', help='space separated list of classes of algorithms ' + '(available : ' + ', '.join(sorted(all_families)) + ')', + metavar='', choices=all_families, nargs='+') +cparser.add_argument('--algo', help='space separated list of algorithm to run ' + '(Overrides --family, available : ' + ', '.join(sorted(all_algos)) + ')', metavar='', choices=all_algos, nargs='+') cparser.add_argument('--exec-type', default='singlenode', help='System-ML backend ' - '(e.g singlenode, spark-hybrid)', metavar='', + '(available : singlenode, spark-hybrid)', metavar='', choices=default_execution_mode) -cparser.add_argument('--mat-type', default=default_mat_type, help='type of matrix to generate ' - '(e.g dense or sparse)', metavar='', choices=default_mat_type, +cparser.add_argument('--mat-type', default=default_mat_type, help='space separated list of types of matrix to generate ' + '(available : dense, sparse)', metavar='', choices=default_mat_type, nargs='+') -cparser.add_argument('--mat-shape', default=default_mat_shape, help='shape of matrix ' - 'to generate (e.g 10k_1k)', metavar='', nargs='+') -cparser.add_argument('--temp-dir', default=default_temp_dir, help='specify temporary directory', - metavar='') -cparser.add_argument('--filename', default='perf_test', help='specify output file for the perf' - ' metics', metavar='') +cparser.add_argument('--mat-shape', default=default_mat_shape, help='space separated list of shapes of matrices ' + 'to generate (e.g 10k_1k, 20M_4k)', metavar='', nargs='+') +cparser.add_argument('--temp-dir', default=default_temp_dir, help='temporary directory ' +'where generated, training and prediction data is put', metavar='') +cparser.add_argument('--filename', default='perf_test', help='name of the output file for the perf' + ' metrics', metavar='') cparser.add_argument('--mode', default=default_workload, - help='specify type of workload to run (e.g data-gen, train, predict)', + help='space separated list of types of workloads to run (available: data-gen, train, predict)', metavar='', choices=default_workload, nargs='+') # Args is a namespace
systemml git commit: [SYSTEMML-1744] JCuda jars in extra assembly jar
Repository: systemml Updated Branches: refs/heads/master 988366de0 -> 66b28c6e3 [SYSTEMML-1744] JCuda jars in extra assembly jar Closes #559 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/66b28c6e Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/66b28c6e Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/66b28c6e Branch: refs/heads/master Commit: 66b28c6e356e894c7e6c21655dab85484bf4840a Parents: 988366d Author: Nakul Jindal Authored: Thu Jul 6 15:18:35 2017 -0700 Committer: Nakul Jindal Committed: Thu Jul 6 15:18:35 2017 -0700 -- src/assembly/extra.xml | 12 +++ src/assembly/extra/LICENSE | 48 + 2 files changed, 60 insertions(+) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/66b28c6e/src/assembly/extra.xml -- diff --git a/src/assembly/extra.xml b/src/assembly/extra.xml index 75ca4d7..24c2d87 100644 --- a/src/assembly/extra.xml +++ b/src/assembly/extra.xml @@ -50,4 +50,16 @@ . + + + + + + org.jcuda:* + + true + compile + + + http://git-wip-us.apache.org/repos/asf/systemml/blob/66b28c6e/src/assembly/extra/LICENSE -- diff --git a/src/assembly/extra/LICENSE b/src/assembly/extra/LICENSE index bc42b2d..c495849 100644 --- a/src/assembly/extra/LICENSE +++ b/src/assembly/extra/LICENSE @@ -460,3 +460,51 @@ Copyright 2017 The TensorFlow Authors. All rights reserved. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +=== + +The following compile-scope dependencies come under the MIT License + +JCuda (jcuda.org) + +org.jcuda:jcuda:0.8.0 +org.jcuda:jcublas:0.8.0 +org.jcuda:jcufft:0.8.0 +org.jcuda:jcusparse:0.8.0 +org.jcuda:jcusolver:0.8.0 +org.jcuda:jcurand:0.8.0 +org.jcuda:jnvgraph:0.8.0 +org.jcuda:jcudnn:0.8.0 +org.jcuda:jcuda-natives:0.8.0 +org.jcuda:jcublas-natives:0.8.0 +org.jcuda:jcufft-natives:0.8.0 +org.jcuda:jcusparse-natives:0.8.0 +org.jcuda:jcusolver-natives:0.8.0 +org.jcuda:jcurand-natives:0.8.0 +org.jcuda:jnvgraph-natives:0.8.0 +org.jcuda:jcudnn-natives:0.8.0 + + +The MIT License (MIT) + +Copyright (c) 2008-2016 Marco Hutter - http://www.jcuda.org + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +===
systemml git commit: Write output of systemml run from perf test scripts
Repository: systemml Updated Branches: refs/heads/master c5a330d7d -> 152eba1a7 Write output of systemml run from perf test scripts Closes #561 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/152eba1a Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/152eba1a Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/152eba1a Branch: refs/heads/master Commit: 152eba1a7d5de2d34ab97db7d49596b41569aeb5 Parents: c5a330d Author: Nakul Jindal Authored: Fri Jul 7 11:23:17 2017 -0700 Committer: Nakul Jindal Committed: Fri Jul 7 11:23:18 2017 -0700 -- scripts/perftest/python/run_perftest.py | 10 +- scripts/perftest/python/utils.py| 18 +- 2 files changed, 18 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/152eba1a/scripts/perftest/python/run_perftest.py -- diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py index dcc52c8..b0257d4 100755 --- a/scripts/perftest/python/run_perftest.py +++ b/scripts/perftest/python/run_perftest.py @@ -82,7 +82,7 @@ ML_PREDICT = {'Kmeans': 'Kmeans-predict', # Responsible for execution and metric logging -def algorithm_workflow(algo, exec_type, config_path, file_name, action_mode): +def algorithm_workflow(algo, exec_type, config_path, dml_file_name, action_mode): """ This function is responsible for overall workflow. This does the following actions Check if the input is key value argument or list of positional args @@ -99,7 +99,7 @@ def algorithm_workflow(algo, exec_type, config_path, file_name, action_mode): config_path : String Path to read the json file from -file_name : String +dml_file_name : String DML file name to be used while processing the arguments give action_mode : String @@ -116,8 +116,8 @@ def algorithm_workflow(algo, exec_type, config_path, file_name, action_mode): list_args = ' '.join(config_data) args = {'-args': list_args} -folder_name = config_path.split('/')[-1] -mat_type, mat_shape, intercept = get_folder_metrics(folder_name, action_mode) +config_file_name = config_path.split('/')[-1] +mat_type, mat_shape, intercept = get_folder_metrics(config_file_name, action_mode) exit_flag_success = get_existence(config_path, action_mode) @@ -125,7 +125,7 @@ def algorithm_workflow(algo, exec_type, config_path, file_name, action_mode): print('data already exists {}'.format(config_path)) time = 'data_exists' else: -time = exec_dml_and_parse_time(exec_type, file_name, args) +time = exec_dml_and_parse_time(exec_type, dml_file_name, config_file_name, args) # Write a _SUCCESS file only if time is found and in data-gen action_mode if len(time.split('.')) == 2 and action_mode == 'data-gen': http://git-wip-us.apache.org/repos/asf/systemml/blob/152eba1a/scripts/perftest/python/utils.py -- diff --git a/scripts/perftest/python/utils.py b/scripts/perftest/python/utils.py index 7ff3b54..464d7f6 100755 --- a/scripts/perftest/python/utils.py +++ b/scripts/perftest/python/utils.py @@ -138,7 +138,7 @@ def get_existence(path, action_mode): return exist -def exec_dml_and_parse_time(exec_type, file_name, args, Time=True): +def exec_dml_and_parse_time(exec_type, dml_file_name, execution_output_file, args, Time=True): """ This function is responsible of execution of input arguments via python sub process, We also extract time obtained from the output of this subprocess @@ -146,9 +146,12 @@ def exec_dml_and_parse_time(exec_type, file_name, args, Time=True): exec_type: String Contains the execution type singlenode / hybrid_spark -file_name: String +dml_file_name: String DML file name to be used while processing the arguments give +execution_output_file: String +Name of the file where the output of the DML run is written out + args: Dictionary Key values pairs depending on the arg type @@ -156,7 +159,7 @@ def exec_dml_and_parse_time(exec_type, file_name, args, Time=True): Boolean argument used to extract time from raw output logs. """ -algorithm = file_name + '.dml' +algorithm = dml_file_name + '.dml' if exec_type == 'singlenode': exec_script = join(os.environ.get('SYSTEMML_HOME'), 'bin', 'systemml-standalone.py') @@ -189,11 +192,15 @@ def exec_dml_and_parse_time(exec_type, file_name, args, Time=True): out1, err1 = proc1.communicate() if "Error" in str(err1): -print('Error Found in {}'.format(file_name)) +print('Erro
systemml git commit: [MINOR] Performance test bug fixes
Repository: systemml Updated Branches: refs/heads/master f046051d4 -> cd1ae5b42 [MINOR] Performance test bug fixes Closes #565 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/cd1ae5b4 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/cd1ae5b4 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/cd1ae5b4 Branch: refs/heads/master Commit: cd1ae5b42499b3b97731de8b28a6d1db9cc9e7f3 Parents: f046051 Author: krishnakalyan3 Authored: Thu Jul 13 14:28:56 2017 -0700 Committer: Nakul Jindal Committed: Thu Jul 13 14:28:56 2017 -0700 -- scripts/perftest/python/datagen.py | 27 --- scripts/perftest/python/predict.py | 48 ++-- scripts/perftest/python/run_perftest.py | 53 - scripts/perftest/python/train.py| 40 +- scripts/perftest/python/utils.py| 112 +++ 5 files changed, 192 insertions(+), 88 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/cd1ae5b4/scripts/perftest/python/datagen.py -- diff --git a/scripts/perftest/python/datagen.py b/scripts/perftest/python/datagen.py index d9c49e9..88a71f0 100755 --- a/scripts/perftest/python/datagen.py +++ b/scripts/perftest/python/datagen.py @@ -22,7 +22,7 @@ import itertools from os.path import join -from utils import split_rowcol, config_writer +from utils import split_rowcol, config_writer, mat_type_check # This file contains configuration settings for data generation DATA_FORMAT = 'csv' @@ -181,8 +181,8 @@ def stats1_datagen(matrix_dim, matrix_type, datagen_dir): NC = int(int(col)/2) config = dict(R=row, C=col, NC=NC, MAXDOMAIN=MAXDOMAIN, DATA=DATA, TYPES=TYPES, SETSIZE=SETSIZE, - LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, INDEX1=INDEX1, INDEX2=INDEX2, - fmt=DATA_FORMAT) + LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, INDEX1=INDEX1, + INDEX2=INDEX2, fmt=DATA_FORMAT) config_writer(full_path + '.json', config) @@ -207,7 +207,7 @@ def stats2_datagen(matrix_dim, matrix_type, datagen_dir): return full_path -def config_packets_datagen(algo_payload, matrix_type, matrix_shape, datagen_dir): +def config_packets_datagen(algo_payload, matrix_type, matrix_shape, datagen_dir, dense_algos): """ This function has two responsibilities. Generate the configuration files for datagen algorithms and return a dictionary that will be used for execution. @@ -217,11 +217,17 @@ def config_packets_datagen(algo_payload, matrix_type, matrix_shape, datagen_dir) family type. matrix_type: String -Type of matrix to generate e.g dense or sparse +Type of matrix to generate e.g dense, sparse, all matrix_shape: String Shape of matrix to generate e.g 100k_10 +datagen_dir: String +Path of the data generation directory + +dense_algos: List +Algorithms that support only dense matrix type + return: Dictionary {string: list} This dictionary contains algorithms to be executed as keys and the path of configuration json files to be executed list of values. @@ -233,13 +239,10 @@ def config_packets_datagen(algo_payload, matrix_type, matrix_shape, datagen_dir) # Cross Product of all configurations for current_family in distinct_families: -if current_family in FAMILY_NO_MATRIX_TYPE: -config = list(itertools.product(matrix_shape, ['dense'])) -config_bundle[current_family] = config -else: -config = list(itertools.product(matrix_shape, matrix_type)) -# clustering : [[10k_1, dense], [10k_2, dense], ...] -config_bundle[current_family] = config +current_matrix_type = mat_type_check(current_family, matrix_type, dense_algos) +config = list(itertools.product(matrix_shape, current_matrix_type)) +# clustering : [[10k_1, dense], [10k_2, dense], ...] +config_bundle[current_family] = config config_packets = {} for current_family, configs in config_bundle.items(): http://git-wip-us.apache.org/repos/asf/systemml/blob/cd1ae5b4/scripts/perftest/python/predict.py -- diff --git a/scripts/perftest/python/predict.py b/scripts/perftest/python/predict.py index bc034da..92d3af4 100755 --- a/scripts/perftest/python/predict.py +++ b/scripts/perftest/python/predict.py @@ -21,10 +21,8 @@ #- import sys -import os from os.path import join -import glob -from utils import create_dir, config_writer +from utils import config_writer, relevant_folders, mat_type_check # C
[1/2] systemml git commit: [SYSTEML-1758] added cbind and rbind for GPU
Repository: systemml Updated Branches: refs/heads/master cd1ae5b42 -> 4e47b5e10 http://git-wip-us.apache.org/repos/asf/systemml/blob/4e47b5e1/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java -- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index f47c15c..17f6b22 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -2466,15 +2466,15 @@ public class LibMatrixCUDA { /** * Performs elementwise arithmetic operation specified by op of two input matrices in1 and in2 * -* @param ec execution context -* @param gCtx a valid {@link GPUContext} -* @param instName the invoking instruction's name for record {@link Statistics}. -* @param in1 input matrix 1 -* @param in2 input matrix 2 -* @param outputName output matrix name -* @param isLeftTransposed true if left-transposed +* @param ecexecution context +* @param gCtx a valid {@link GPUContext} +* @param instName the invoking instruction's name for record {@link Statistics}. +* @param in1 input matrix 1 +* @param in2 input matrix 2 +* @param outputNameoutput matrix name +* @param isLeftTransposed true if left-transposed * @param isRightTransposed true if right-transposed -* @param op binary operator +* @param opbinary operator * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static void matrixMatrixArithmetic(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in1, MatrixObject in2, @@ -2506,13 +2506,14 @@ public class LibMatrixCUDA { /** * Utility to do matrix-scalar operation kernel -* @param gCtx a valid {@link GPUContext} -* @param instName the invoking instruction's name for record {@link Statistics}. -* @param ec execution context -* @param in input matrix -* @param outputName output variable name +* +* @param gCtx a valid {@link GPUContext} +* @param instName the invoking instruction's name for record {@link Statistics}. +* @param ecexecution context +* @param ininput matrix +* @param outputNameoutput variable name * @param isInputTransposed true if input is transposed -* @param op operator +* @param opoperator * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void matrixScalarOp(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in, String outputName, boolean isInputTransposed, @@ -2703,9 +2704,9 @@ public class LibMatrixCUDA { /** * Performs a deep device copy of a matrix on the GPU * -* @param ec execution context -* @param instName the invoking instruction's name for record {@link Statistics}. -* @param src source matrix +* @param ec execution context +* @param instName the invoking instruction's name for record {@link Statistics}. +* @param srcsource matrix * @param outputName destination variable name * @throws DMLRuntimeException if DMLRuntimeException occurs */ @@ -2974,6 +2975,80 @@ public class LibMatrixCUDA { /// + /// + // Matrix Manipulation Functions */ + /// + + + public static void cbind(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in1, MatrixObject in2, String outputName) throws DMLRuntimeException { + if (ec.getGPUContext(0) != gCtx) + throw new DMLRuntimeException("GPU : Invalid internal state, the GPUContext set with the ExecutionContext is not the same used to run this LibMatrixCUDA function"); + LOG.trace("GPU : cbind" + ", GPUContext=" + gCtx); + + long t1 = 0; + + // only Dense supported + MatrixObject out = getDenseMatrixOutputForGPUInstruction(ec, instName, outputName); + Pointer C = getDensePointer(gCtx, out, instName); + Pointer A = getDensePointer(gCtx, in1, instName); + Pointer B = getDensePointer(gCtx, in2, instName); + + int rowsA = (int) in1.getNumRows(); +
[2/2] systemml git commit: [SYSTEML-1758] added cbind and rbind for GPU
[SYSTEML-1758] added cbind and rbind for GPU Closes #570 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/4e47b5e1 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/4e47b5e1 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/4e47b5e1 Branch: refs/heads/master Commit: 4e47b5e10ff1abdf1ef53c2b1b0d80614ec8e416 Parents: cd1ae5b Author: Nakul Jindal Authored: Thu Jul 13 14:31:47 2017 -0700 Committer: Nakul Jindal Committed: Thu Jul 13 14:31:47 2017 -0700 -- src/main/cpp/kernels/SystemML.cu| 78 +- src/main/cpp/kernels/SystemML.ptx | 1043 ++ .../java/org/apache/sysml/hops/BinaryOp.java| 21 +- src/main/java/org/apache/sysml/lops/Append.java | 95 ++ .../java/org/apache/sysml/lops/AppendCP.java| 93 -- .../instructions/CPInstructionParser.java |4 +- .../instructions/GPUInstructionParser.java | 17 +- .../gpu/BuiltinUnaryGPUInstruction.java |2 +- .../instructions/gpu/GPUInstruction.java|3 + .../gpu/MatrixAppendGPUInstruction.java | 102 ++ .../runtime/matrix/data/LibMatrixCUDA.java | 109 +- .../org/apache/sysml/test/gpu/AppendTest.java | 108 ++ .../test/integration/gpu/ZPackageSuite.java |2 + 13 files changed, 1099 insertions(+), 578 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/4e47b5e1/src/main/cpp/kernels/SystemML.cu -- diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu index 3098282..297269f 100644 --- a/src/main/cpp/kernels/SystemML.cu +++ b/src/main/cpp/kernels/SystemML.cu @@ -216,7 +216,7 @@ __global__ void matrix_matrix_cellwise_op(double* A, double* B, double* C, bIndex = iy; // rlen == 1 C[outIndex] = binaryOp(A[aIndex], B[bIndex], op); //printf("C[%d] = A[%d](%f) B[%d](%f) (%d %d)\n", outIndex, aIndex, A[aIndex], bIndex, B[bIndex], (ix+1), (iy+1)); -__syncthreads(); + __syncthreads(); } } @@ -238,9 +238,9 @@ __global__ void matrix_scalar_op(double* A, double scalar, double* C, int size, C[index] = binaryOp(scalar, A[index], op); } else { C[index] = binaryOp(A[index], scalar, op); -} + } } - __syncthreads(); + __syncthreads(); } @@ -259,6 +259,78 @@ __global__ void fill(double* A, double scalar, int lenA) { } /** + * Appends Matrix B to the right side of Matrix A into a new matrix C + * | 1 2 3 4 | | 8 8 8 | | 1 2 3 4 8 8 8 | + * cbind ( | 9 8 7 6 | , | 7 7 7 | ) = | 9 8 7 6 7 7 7 | + * | 4 3 2 1 | | 9 9 9 | | 4 3 2 1 9 9 9 | + * @param A input matrix A allocated on the GPU + * @param B input matrix B allocated on the GPU + * @param C input matrix C allocated on the GPU + * @param rowsA rows in A + * @param colsA columns in A + * @param rowsB rows in B + * @param colsB columns in B + */ +extern "C" +__global__ void cbind(double *A, double *B, double *C, int rowsA, int colsA, int rowsB, int colsB) { + int ix = blockIdx.x * blockDim.x + threadIdx.x; + int iy = blockIdx.y * blockDim.y + threadIdx.y; + + int colsC = colsA + colsB; + int rowsC = rowsA; + + // Copy an element of A into C into the appropriate location + if (ix < rowsA && iy < colsA) { + double elemA = A[ix * colsA + iy]; + C[ix * colsC + iy] = elemA; + } + + // Copy an element of B into C into the appropriate location + if (ix < rowsB && iy < colsB) { + double elemB = B[ix * colsB + iy]; + C[ix * colsC + (iy + colsA)] = elemB; + } +} + + +/** + * Appends Matrix B to the bottom of Matrix A into a new matrix C + * | 2 3 4 | | 8 8 8 | | 2 3 4 | + * rbind ( | 8 7 6 | , | 7 7 7 | ) = | 8 7 6 | + * | 3 2 1 | | 3 2 1 | + | 8 8 8 | + | 7 7 7 | + * @param A input matrix A allocated on the GPU + * @param B input matrix B allocated on the GPU + * @param C input matrix C allocated on the GPU + * @param rowsA rows in A + * @param colsA columns in A + * @param rowsB rows in B + * @param colsB columns in B + */ +extern "C" +__global__ void rbind(double *A, double *B, double *C, int rowsA, int colsA, int rowsB, int colsB) { + int ix = blockIdx.x * blockDim.x + threadIdx.x; + int iy = blockIdx.y * blockDim.y + threadIdx.y; + + int rowsC = rowsA + rowsB; + int colsC = colsA; + + // Copy an element of A into C into the appropriate location + if (ix < rowsA && iy < colsA
systemml git commit: [SYSTEMML-1713] Added mem estimates for various GPU ops
Repository: systemml Updated Branches: refs/heads/master 4e47b5e10 -> 32ba9cf9f [SYSTEMML-1713] Added mem estimates for various GPU ops Closes #553 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/32ba9cf9 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/32ba9cf9 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/32ba9cf9 Branch: refs/heads/master Commit: 32ba9cf9fdff2aba7432c7a4e51317b6e5bf1a18 Parents: 4e47b5e Author: Nakul Jindal Authored: Thu Jul 13 15:01:11 2017 -0700 Committer: Nakul Jindal Committed: Thu Jul 13 15:01:11 2017 -0700 -- .../java/org/apache/sysml/hops/AggBinaryOp.java | 57 - .../java/org/apache/sysml/hops/AggUnaryOp.java | 44 +++- .../java/org/apache/sysml/hops/BinaryOp.java| 32 ++- src/main/java/org/apache/sysml/hops/Hop.java| 4 +- .../java/org/apache/sysml/hops/ReorgOp.java | 4 +- .../java/org/apache/sysml/hops/TernaryOp.java | 17 +- .../java/org/apache/sysml/hops/UnaryOp.java | 16 +- .../instructions/gpu/context/CSRPointer.java| 6 +- .../runtime/matrix/data/LibMatrixCUDA.java | 214 --- 9 files changed, 279 insertions(+), 115 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/32ba9cf9/src/main/java/org/apache/sysml/hops/AggBinaryOp.java -- diff --git a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java index eb83549..9077976 100644 --- a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java +++ b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java @@ -21,19 +21,19 @@ package org.apache.sysml.hops; import org.apache.sysml.api.DMLScript; import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM; +import org.apache.sysml.hops.Hop.MultiThreadedHop; import org.apache.sysml.hops.rewrite.HopRewriteUtils; import org.apache.sysml.lops.Aggregate; import org.apache.sysml.lops.Binary; import org.apache.sysml.lops.DataPartition; import org.apache.sysml.lops.Group; -import org.apache.sysml.hops.Hop.MultiThreadedHop; import org.apache.sysml.lops.Lop; import org.apache.sysml.lops.LopProperties.ExecType; import org.apache.sysml.lops.LopsException; import org.apache.sysml.lops.MMCJ; +import org.apache.sysml.lops.MMCJ.MMCJType; import org.apache.sysml.lops.MMRJ; import org.apache.sysml.lops.MMTSJ; -import org.apache.sysml.lops.MMCJ.MMCJType; import org.apache.sysml.lops.MMTSJ.MMTSJType; import org.apache.sysml.lops.MMZip; import org.apache.sysml.lops.MapMult; @@ -343,11 +343,48 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop protected double computeIntermediateMemEstimate( long dim1, long dim2, long nnz ) { double ret = 0; - + + if (DMLScript.USE_ACCELERATOR) { + // In GPU Mode, intermediate memory is only needed in case of one of the matrix blocks is sparse + // When sparse block is converted to dense and a dense MM takes place, we need (dim1 * dim2) + // When dense block is converted to sparse and a sparse MM takes place, we need (dim1 * dim2 * 2) + + Hop in1 = _input.get(0); + Hop in2 = _input.get(1); + double in1Sparsity = OptimizerUtils.getSparsity(in1.getDim1(), in1.getDim2(), in1.getNnz()); + double in2Sparsity = OptimizerUtils.getSparsity(in2.getDim1(), in2.getDim2(), in2.getNnz()); + + boolean in1Sparse = in1Sparsity < MatrixBlock.SPARSITY_TURN_POINT; + boolean in2Sparse = in2Sparsity < MatrixBlock.SPARSITY_TURN_POINT; + + boolean in1UltraSparse = in1Sparsity < MatrixBlock.ULTRA_SPARSITY_TURN_POINT; + boolean in2UltraSparse = in2Sparsity < MatrixBlock.ULTRA_SPARSITY_TURN_POINT; + + // For Matmult X * Y, if X is sparse, Y is dense, X is converted to dense + // If X is ultrasparse, Y is converted to sparse + if (in1Sparse ^ in2Sparse) { // one sparse, one dense + if (in1Sparse) { + if (in1UltraSparse) { + ret += 2 * OptimizerUtils.estimateSizeExactSparsity(in2.getDim1(), in2.getDim2(), in2.getNnz()); + } else { + ret += OptimizerUtils.estimateSizeExactSparsity(in1.getDim1(), in1.getDim2(), in1.getNnz()); + } + } else if (in2Sparse) { + if
systemml git commit: [MINOR][DOC] Performance Test Documentation
Repository: systemml Updated Branches: refs/heads/master 32ba9cf9f -> 61467dab8 [MINOR][DOC] Performance Test Documentation Closes #563 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/61467dab Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/61467dab Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/61467dab Branch: refs/heads/master Commit: 61467dab86fad98e15d0cf529aaea7ba0cd6083f Parents: 32ba9cf Author: krishnakalyan3 Authored: Thu Jul 13 15:04:28 2017 -0700 Committer: Nakul Jindal Committed: Thu Jul 13 15:04:28 2017 -0700 -- docs/img/performance-test/perf_test_arch.png | Bin 0 -> 25831 bytes docs/python-performance-test.md | 129 ++ 2 files changed, 129 insertions(+) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/61467dab/docs/img/performance-test/perf_test_arch.png -- diff --git a/docs/img/performance-test/perf_test_arch.png b/docs/img/performance-test/perf_test_arch.png new file mode 100644 index 000..4763c8b Binary files /dev/null and b/docs/img/performance-test/perf_test_arch.png differ http://git-wip-us.apache.org/repos/asf/systemml/blob/61467dab/docs/python-performance-test.md -- diff --git a/docs/python-performance-test.md b/docs/python-performance-test.md new file mode 100644 index 000..c265bc6 --- /dev/null +++ b/docs/python-performance-test.md @@ -0,0 +1,129 @@ +# Performance Testing Algorithms User Manual + +This user manual contains details on how to conduct automated performance tests. Work was mostly done in this [PR](https://github.com/apache/systemml/pull/537) and part of [SYSTEMML-1451](https://issues.apache.org/jira/browse/SYSTEMML-1451). Our aim was to move from existing `bash` based performance tests to automatic `python` based automatic performance tests. + +### Architecture +Our performance tests suit contains `7` families namely `binomial`, `multinomial`, `stats1`, `stats2`, `regression1`, `regression2`, `clustering`. Within these families we have algorithms grouped under it. Typically a family is a set of algorithms that require the same data generation script. + +- Exceptions: `regression1`, `regression2` and `binomial`. We decide to include these algorithms in separate families to keep the architecture simple. + +![System ML Architecture](img/performance-test/perf_test_arch.png) + +On a very high level use construct a string with arguments required to run each operation. Once this string is constructed we use the subprocess module to execute this string and extract time from the standard out. + +We also use `json` module write our configurations to a json file. This ensure that our current operation is easy to debug. + + +We have `5` files in performance test suit `run_perftest.py`, `datagen.py`, `train.py`, `predict.py` and `utils.py`. + +`datagen.py`, `train.py` and `predict.py` generate a dictionary. Our key is the name of algorithm being processed and values is a list with path(s) where all the data required is present. We define this dictionary as a configuration packet. + +We will describe each of them in detail the following sections below. + +`run_perftest.py` at a high level creates `algos_to_run` list. This list is tuple with key as algorithm and value as family to be executed in our performance test. + +In `datagen.py` script we have all functions required to generate data. We return the required configuration packet as a result of this script, that contains key as the `data-gen` script to run and values with location to read data-gen json files from. + +In `train.py` script we have functions required to generate training output. We return the required configuration packet as a result of this script, that contains key as the algorithm to run and values with location to read training json files from. + +The file `predict.py` contains all functions for all algorithms in the performance test that contain predict script. We return the required configuration packet as a result of this script, that contains key as the algorithm to run and values with location to read predict json files from. + +In the file `utils.py` we have all the helper functions required in our performance test. These functions do operations like write `json` files, extract time from std out etc. + +### Adding New Algorithms +While adding a new algorithm we need know if it has to be part of the any pre existing family. If this algorithm depends on a new data generation script we would need to create a new family. Steps below to take below to add a new algorithm. + +Following changes to `run_perftest.py`: + +- Add the algorithm to `ML_ALGO`
systemml git commit: [SYSTEMML-1795] Specify a set of GPUs to use for a given machine
Repository: systemml Updated Branches: refs/heads/master 1f5b14dda -> fec209306 [SYSTEMML-1795] Specify a set of GPUs to use for a given machine Can specify: a) -1 for all GPUs b) a specific number of GPU c) a comma separated list of GPUs d) a range of GPUs Closes #587 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/fec20930 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/fec20930 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/fec20930 Branch: refs/heads/master Commit: fec209306d3c7e55673872f431d43ceda53b7a6c Parents: 1f5b14d Author: Nakul Jindal Authored: Fri Jul 21 13:55:03 2017 -0700 Committer: Nakul Jindal Committed: Fri Jul 21 13:55:04 2017 -0700 -- conf/SystemML-config.xml.template | 4 +- .../apache/sysml/api/ScriptExecutorUtils.java | 5 +- .../java/org/apache/sysml/conf/DMLConfig.java | 4 +- .../gpu/context/GPUContextPool.java | 90 +--- .../org/apache/sysml/test/unit/UtilsTest.java | 78 + 5 files changed, 160 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/conf/SystemML-config.xml.template -- diff --git a/conf/SystemML-config.xml.template b/conf/SystemML-config.xml.template index 11e86ed..8608a9c 100644 --- a/conf/SystemML-config.xml.template +++ b/conf/SystemML-config.xml.template @@ -78,6 +78,6 @@ false - - -1 + +-1 http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java -- diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java index 0e0950e..b094c91 100644 --- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java +++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java @@ -78,9 +78,8 @@ public class ScriptExecutorUtils { LibMatrixDNN.DISPLAY_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_DNN_STATS); DMLScript.FINEGRAINED_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_FINEGRAINED_STATS); - // Sets the maximum number of GPUs per process, -1 for all available - // GPUs - GPUContextPool.PER_PROCESS_MAX_GPUS = dmlconf.getIntValue(DMLConfig.MAX_GPUS_PER_PROCESS); + // Sets the GPUs to use for this process (a range, all GPUs, comma separated list or a specific GPU) + GPUContextPool.AVAILABLE_GPUS = dmlconf.getTextValue(DMLConfig.AVAILABLE_GPUS); Statistics.startRunTimer(); try { // run execute (w/ exception handling to ensure proper shutdown) http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/conf/DMLConfig.java -- diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java b/src/main/java/org/apache/sysml/conf/DMLConfig.java index c248098..a6a4b5e 100644 --- a/src/main/java/org/apache/sysml/conf/DMLConfig.java +++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java @@ -78,7 +78,7 @@ public class DMLConfig public static final String EXTRA_FINEGRAINED_STATS = "systemml.stats.finegrained"; //boolean public static final String EXTRA_GPU_STATS = "systemml.stats.extraGPU"; //boolean public static final String EXTRA_DNN_STATS = "systemml.stats.extraDNN"; //boolean - public static final String MAX_GPUS_PER_PROCESS = "systemml.gpu.perProcessMax"; // boolean, maximum number of gpus to use, -1 for all + public static final String AVAILABLE_GPUS = "systemml.gpu.availableGPUs"; // String to specify which GPUs to use (a range, all GPUs, comma separated list or a specific GPU) // Fraction of available memory to use. The available memory is computer when the GPUContext is created // to handle the tradeoff on calling cudaMemGetInfo too often. @@ -123,7 +123,7 @@ public class DMLConfig _defaultVals.put(EXTRA_DNN_STATS,"false" ); _defaultVals.put(GPU_MEMORY_UTILIZATION_FACTOR, "0.9" ); - _defaultVals.put(MAX_GPUS_PER_PROCESS, "-1"); + _defaultVals.put(AVAILABLE_GPUS, "-1"); } public DMLConfig() http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions
systemml git commit: [SYSTEMML-1806] fix for DMLConfig#setText
Repository: systemml Updated Branches: refs/heads/master 7ae1b1c4c -> 3fd8e495e [SYSTEMML-1806] fix for DMLConfig#setText Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3fd8e495 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3fd8e495 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3fd8e495 Branch: refs/heads/master Commit: 3fd8e495e26ba70eed22bf16c51a7bf69474c1c3 Parents: 7ae1b1c Author: Nakul Jindal Authored: Wed Jul 26 14:24:53 2017 -0700 Committer: Nakul Jindal Committed: Wed Jul 26 14:24:53 2017 -0700 -- .../java/org/apache/sysml/conf/DMLConfig.java | 71 ++-- .../org/apache/sysml/test/unit/UtilsTest.java | 60 + 2 files changed, 97 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/3fd8e495/src/main/java/org/apache/sysml/conf/DMLConfig.java -- diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java b/src/main/java/org/apache/sysml/conf/DMLConfig.java index a6a4b5e..415bb57 100644 --- a/src/main/java/org/apache/sysml/conf/DMLConfig.java +++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java @@ -22,6 +22,7 @@ package org.apache.sysml.conf; import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.StringWriter; import java.util.HashMap; import java.util.Map; @@ -45,6 +46,7 @@ import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.io.IOUtilFunctions; import org.w3c.dom.Document; import org.w3c.dom.Element; +import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; @@ -97,6 +99,8 @@ public class DMLConfig private String _fileName = null; private Element _xmlRoot = null; + private DocumentBuilder _documentBuilder = null; + private Document _document = null; static { @@ -130,7 +134,7 @@ public class DMLConfig { } - + public DMLConfig(String fileName) throws ParseException, FileNotFoundException { @@ -169,25 +173,32 @@ public class DMLConfig */ private void parseConfig () throws ParserConfigurationException, SAXException, IOException { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - factory.setIgnoringComments(true); //ignore XML comments - DocumentBuilder builder = factory.newDocumentBuilder(); - Document domTree = null; + DocumentBuilder builder = getDocumentBuilder(); + _document = null; if( _fileName.startsWith("hdfs:") || _fileName.startsWith("gpfs:") || IOUtilFunctions.isObjectStoreFileScheme(new Path(_fileName)) ) { Path configFilePath = new Path(_fileName); FileSystem DFS = IOUtilFunctions.getFileSystem(configFilePath); -domTree = builder.parse(DFS.open(configFilePath)); + _document = builder.parse(DFS.open(configFilePath)); } else // config from local file system { - domTree = builder.parse(_fileName); + _document = builder.parse(_fileName); } - - _xmlRoot = domTree.getDocumentElement(); + + _xmlRoot = _document.getDocumentElement(); } - + + private DocumentBuilder getDocumentBuilder() throws ParserConfigurationException { + if (_documentBuilder == null) { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setIgnoringComments(true); //ignore XML comments + _documentBuilder = factory.newDocumentBuilder(); + } + return _documentBuilder; + } + /** * Method to get string value of a configuration parameter * Handles processing of configuration parameters @@ -242,21 +253,7 @@ public class DMLConfig return textVal; } - /** -* Method to update the string value of an element identified by a tag name -* @param element the DOM element -* @param tagName the tag name -* @param newTextValue the new string value -*/ - private static void setTextValue(Element element, String tagName, String newTextValue) { - - NodeList list = element.getElementsByTagName(tagName); - if (list != nul
systemml git commit: [MINOR] fix for SYSTEMML_1795
Repository: systemml Updated Branches: refs/heads/master 3fd8e495e -> 2663ccd41 [MINOR] fix for SYSTEMML_1795 The GPUContextPool.AVAILABLE_GPUS is read after the lops are constructed, but the value needs to be read before. This patch is a fix that problem. Closes #592 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/2663ccd4 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/2663ccd4 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/2663ccd4 Branch: refs/heads/master Commit: 2663ccd417e59908c3a461adfd217b667b58ea2d Parents: 3fd8e49 Author: Nakul Jindal Authored: Wed Jul 26 15:37:06 2017 -0700 Committer: Nakul Jindal Committed: Wed Jul 26 15:37:06 2017 -0700 -- bin/systemml-standalone.py | 3 ++- src/main/java/org/apache/sysml/api/DMLScript.java | 6 +- src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java| 2 -- .../java/org/apache/sysml/api/mlcontext/ScriptExecutor.java| 4 src/main/java/org/apache/sysml/conf/DMLConfig.java | 5 +++-- .../sysml/runtime/instructions/gpu/context/GPUContextPool.java | 2 +- 6 files changed, 15 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/2663ccd4/bin/systemml-standalone.py -- diff --git a/bin/systemml-standalone.py b/bin/systemml-standalone.py index 367bcdf..a0ee8db 100755 --- a/bin/systemml-standalone.py +++ b/bin/systemml-standalone.py @@ -151,7 +151,8 @@ systemml_default_java_opts = \ '-Xmx8g -Xms4g -Xmn1g ' + \ '-cp ' + classpath + ' ' + \ '-Dlog4j.configuration=file:' + log4j_properties_path + ' ' \ -'-Duser.dir=' + user_dir +'-Duser.dir=' + user_dir +#'-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=8111' # Reads in key-value pairs from the conf/systemml-env.sh file http://git-wip-us.apache.org/repos/asf/systemml/blob/2663ccd4/src/main/java/org/apache/sysml/api/DMLScript.java -- diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java index f428aa2..9cb5ebe 100644 --- a/src/main/java/org/apache/sysml/api/DMLScript.java +++ b/src/main/java/org/apache/sysml/api/DMLScript.java @@ -85,6 +85,7 @@ import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext; import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter; import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler; +import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool; import org.apache.sysml.runtime.io.IOUtilFunctions; import org.apache.sysml.runtime.matrix.CleanupMR; import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames; @@ -659,13 +660,16 @@ public class DMLScript //print basic time and environment info printStartExecInfo( dmlScriptStr ); - //Step 1: parse configuration files + //Step 1: parse configuration files & write any configuration specific global variables DMLConfig dmlconf = DMLConfig.readConfigurationFile(fnameOptConfig); ConfigurationManager.setGlobalConfig(dmlconf); CompilerConfig cconf = OptimizerUtils.constructCompilerConfig(dmlconf); ConfigurationManager.setGlobalConfig(cconf); LOG.debug("\nDML config: \n" + dmlconf.getConfigInfo()); + // Sets the GPUs to use for this process (a range, all GPUs, comma separated list or a specific GPU) + GPUContextPool.AVAILABLE_GPUS = dmlconf.getTextValue(DMLConfig.AVAILABLE_GPUS); + //Step 2: set local/remote memory if requested (for compile in AM context) if( dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER) ){ DMLAppMasterUtils.setupConfigRemoteMaxMemory(dmlconf); http://git-wip-us.apache.org/repos/asf/systemml/blob/2663ccd4/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java -- diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java index ebbcc21..389f661 100644 --- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java +++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java @@ -79,8 +79,6 @@ public class ScriptExecutorUtils { LibMatrixDNN.DISPLAY_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_DNN_STATS); DMLScript.FINEGRAINED_STATISTIC
[2/2] systemml git commit: [SYSTEMML-1451] phase 2 work
[SYSTEMML-1451] phase 2 work Completed these tasks as part for Phase 2 for Google Summer of Code '17 - Decouple systemml-spark-submit.py - Decouple systemml-standalone.py - Refractor perf test suit to accept args like debug, stats, config etc... - Add HDFS support - Google Docs support - Compare SystemML with previous versions - Pylint, Comment - Extra arguments configuration Test - Windows Test - Doc update - systemml standalone comments - systemml spark submit comments Closes #575 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e94374af Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e94374af Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e94374af Branch: refs/heads/master Commit: e94374afb2e6be5dc81524f9c7a5de09b9f4ba26 Parents: a2db1ad Author: krishnakalyan3 Authored: Tue Aug 1 13:46:30 2017 -0700 Committer: Nakul Jindal Committed: Tue Aug 1 13:46:30 2017 -0700 -- bin/systemml-spark-submit.py | 278 +++ bin/systemml-standalone.py| 256 +- bin/utils.py | 113 ++ docs/python-performance-test.md | 35 +- scripts/perftest/python/datagen.py| 141 scripts/perftest/python/google_docs/stats.py | 113 ++ scripts/perftest/python/google_docs/update.py | 110 ++ scripts/perftest/python/predict.py| 156 - scripts/perftest/python/run_perftest.py | 135 --- scripts/perftest/python/train.py | 257 +++--- scripts/perftest/python/utils.py | 390 - scripts/perftest/python/utils_exec.py | 137 scripts/perftest/python/utils_fs.py | 162 + scripts/perftest/python/utils_misc.py | 347 ++ 14 files changed, 1580 insertions(+), 1050 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/e94374af/bin/systemml-spark-submit.py -- diff --git a/bin/systemml-spark-submit.py b/bin/systemml-spark-submit.py index 30974ec..b6426b3 100755 --- a/bin/systemml-spark-submit.py +++ b/bin/systemml-spark-submit.py @@ -21,167 +21,131 @@ # - import os -import sys -from os.path import join, exists, abspath -from os import environ import glob -import argparse -import shutil +from os.path import join import platform - -if environ.get('SPARK_HOME') is None: -print('SPARK_HOME not set') -sys.exit(1) -else: -spark_home = environ.get('SPARK_HOME') +import argparse +from utils import get_env_systemml_home, get_env_spark_home, find_dml_file, log4j_path, config_path + + +def default_jars(systemml_home): +""" +return: String +Location of systemml and jcuda jars +""" +build_dir = join(systemml_home, 'target') +lib_dir = join(build_dir, 'lib') +systemml_jar = build_dir + os.sep + "SystemML.jar" +jcuda_jars = glob.glob(lib_dir + os.sep + "jcu*.jar") +target_jars = ','.join(jcuda_jars) +return target_jars, systemml_jar + + +def spark_submit_entry(master, driver_memory, num_executors, executor_memory, + executor_cores, conf, + nvargs, args, config, explain, debug, stats, gpu, f): +""" +This function is responsible for the execution of arguments via +subprocess call in hybrid_spark mode +""" + +spark_home = get_env_spark_home() +systemml_home = get_env_systemml_home() spark_path = join(spark_home, 'bin', 'spark-submit') +script_file = find_dml_file(systemml_home, f) +# Jars +cuda_jars, systemml_jars = default_jars(systemml_home) -# error help print -def print_usage_and_exit(): -print('Usage: ./systemml-spark-submit.py -f [arguments]') -sys.exit(1) - -cparser = argparse.ArgumentParser(description='System-ML Spark Submit Script') - -# SPARK-SUBMIT Options -cparser.add_argument('--master', default='local[*]', help='local, yarn-client, yarn-cluster', metavar='') -cparser.add_argument('--driver-memory', default='5G', help='Memory for driver (e.g. 512M)', metavar='') -cparser.add_argument('--num-executors', default='2', help='Number of executors to launch', metavar='') -cparser.add_argument('--executor-memory', default='2G', help='Memory per executor', metavar='') -cparser.add_argument('--executor-cores', default='1', help='Number of cores', metavar='') -cparser.add_argument('--conf', help='Spark configuration file', nargs='+', metavar='') - -# SYSTEM-ML Options -cparser.add_argument('-nvargs', help='List of attributeName-attributeValue pairs', nargs='+', metavar='') -cparser.add_argument('-args', help='List of positional argument values', metava
[1/2] systemml git commit: [SYSTEMML-1451] phase 2 work
Repository: systemml Updated Branches: refs/heads/master a2db1ad89 -> e94374afb http://git-wip-us.apache.org/repos/asf/systemml/blob/e94374af/scripts/perftest/python/train.py -- diff --git a/scripts/perftest/python/train.py b/scripts/perftest/python/train.py index 627ba03..ec784d7 100755 --- a/scripts/perftest/python/train.py +++ b/scripts/perftest/python/train.py @@ -22,14 +22,18 @@ import sys from os.path import join -from utils import config_writer, relevant_folders, mat_type_check +from utils_misc import config_writer, mat_type_check from functools import reduce +from utils_fs import relevant_folders # Contains configuration setting for training DATA_FORMAT = 'csv' -def binomial_m_svm_train(save_folder_name, datagen_dir, train_dir): +def binomial_m_svm_train(save_folder_name, datagen_dir, train_dir, config_dir): + +save_path = join(config_dir, save_folder_name) +train_write = join(train_dir, save_folder_name) data_folders = [] for i in [0, 1]: @@ -39,21 +43,19 @@ def binomial_m_svm_train(save_folder_name, datagen_dir, train_dir): maxiter = 20 X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') - -full_path_train = join(train_dir, save_folder_name + '.' + str(i)) -data_folders.append(full_path_train) - -model = join(full_path_train, 'model.data') -Log = join(full_path_train, 'Log.data') - +model = join(train_write + '.' + str(i), 'model.data') +Log = join(train_write + '.' + str(i), 'Log.data') config = dict(X=X, Y=Y, icpt=icpt, classes=2, reg=reg, tol=tol, maxiter=maxiter, model=model, Log=Log, fmt=DATA_FORMAT) -config_writer(full_path_train + '.json', config) +config_writer(save_path + '.' + str(i) + '.json', config) +data_folders.append(save_path + '.' + str(i)) return data_folders -def binomial_l2_svm_train(save_folder_name, datagen_dir, train_dir): +def binomial_l2_svm_train(save_folder_name, datagen_dir, train_dir, config_dir): +save_path = join(config_dir, save_folder_name) +train_write = join(train_dir, save_folder_name) data_folders = [] for i in [0, 1]: @@ -63,23 +65,21 @@ def binomial_l2_svm_train(save_folder_name, datagen_dir, train_dir): maxiter = '100' X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') - -full_path_train = join(train_dir, save_folder_name + '.' + str(i)) -data_folders.append(full_path_train) - -model = join(full_path_train, 'model.data') -Log = join(full_path_train, 'Log.data') - +model = join(train_write + '.' + str(i), 'model.data') +Log = join(train_write + '.' + str(i), 'Log.data') config = dict(X=X, Y=Y, icpt=icpt, reg=reg, tol=tol, maxiter=maxiter, model=model, Log=Log, fmt=DATA_FORMAT) -config_writer(full_path_train + '.json', config) +config_writer(save_path + '.' + str(i) + '.json', config) +data_folders.append(save_path + '.' + str(i)) return data_folders -def binomial_multilogreg_train(save_folder_name, datagen_dir, train_dir): -data_folders = [] +def binomial_multilogreg_train(save_folder_name, datagen_dir, train_dir, config_dir): +save_path = join(config_dir, save_folder_name) +train_write = join(train_dir, save_folder_name) +data_folders = [] for i in [0, 1, 2]: icpt = str(i) reg = '0.01' @@ -88,125 +88,117 @@ def binomial_multilogreg_train(save_folder_name, datagen_dir, train_dir): mii = '5' X = join(datagen_dir, 'X.data') Y = join(datagen_dir, 'Y.data') - -full_path_train = join(train_dir, save_folder_name + '.' + str(i)) -data_folders.append(full_path_train) - -B = join(full_path_train, 'B.data') - +B = join(train_write + '.' + str(i), 'B.data') config = dict(X=X, Y=Y, icpt=icpt, reg=reg, tol=tol, moi=moi, mii=mii, B=B) -config_writer(full_path_train + '.json', config) -return data_folders - - -def multinomial_m_svm_train(save_folder_name, datagen_dir, train_dir): - -data_folders = [] -for i in [0, 1]: -icpt = str(i) -reg = '0.01' -tol = '0.0001' -maxiter = '20' -X = join(datagen_dir, 'X.data') -Y = join(datagen_dir, 'Y.data') - -full_path_train = join(train_dir, save_folder_name + '.' + str(i)) -model = join(full_path_train, 'model.data') -Log = join(full_path_train, 'Log.data') - -config = dict(X=X, Y=Y, icpt=icpt, classes=150, reg=reg, tol=tol, maxiter=maxiter, - model=model, Log=Log, fmt=DATA_FORMAT) -config_writer(full_path_train + '.json', config) -data_folders.append(full_path_train) +config_writer(save_path + '.' + str(i) + '.json', config) +
systemml git commit: [MINOR] bug fixes in the GPU backend
Repository: systemml Updated Branches: refs/heads/master 98a9d653d -> 815ca4f2a [MINOR] bug fixes in the GPU backend - Each thread is assigned a cuda library handle - JCudaKernels is also made thread safe - Removed setting GPUContext to null - Bug fix in initial gpu budget estimate - Cuda Kernels use blockId.x and threadId.x only Closes #607 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/815ca4f2 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/815ca4f2 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/815ca4f2 Branch: refs/heads/master Commit: 815ca4f2aedcbe491d10a873db99a9b5e6f29226 Parents: 98a9d65 Author: Nakul Jindal Authored: Tue Aug 8 13:29:11 2017 -0700 Committer: Nakul Jindal Committed: Tue Aug 8 13:29:11 2017 -0700 -- src/main/cpp/kernels/SystemML.cu| 54 +-- src/main/cpp/kernels/SystemML.ptx | 333 +-- .../controlprogram/ParForProgramBlock.java | 3 - .../controlprogram/parfor/LocalParWorker.java | 12 +- .../cp/FunctionCallCPInstruction.java | 7 - .../gpu/context/ExecutionConfig.java| 26 +- .../instructions/gpu/context/GPUContext.java| 94 +++--- .../gpu/context/GPUContextPool.java | 2 +- .../instructions/gpu/context/JCudaKernels.java | 5 +- .../org/apache/sysml/test/gpu/GPUTests.java | 18 + .../test/gpu/MatrixMultiplicationOpTest.java| 1 + 11 files changed, 303 insertions(+), 252 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/815ca4f2/src/main/cpp/kernels/SystemML.cu -- diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu index 297269f..dcd64b2 100644 --- a/src/main/cpp/kernels/SystemML.cu +++ b/src/main/cpp/kernels/SystemML.cu @@ -35,12 +35,13 @@ nvcc -ptx -arch=sm_30 SystemML.cu */ extern "C" __global__ void copy_u2l_dense(double* ret, int dim, int N) { - int ix = blockIdx.x * blockDim.x + threadIdx.x; - int iy = blockIdx.y * blockDim.y + threadIdx.y; + int tid = blockIdx.x * blockDim.x + threadIdx.x; + int ix = tid / dim; + int iy = tid % dim; int id_dest = iy * dim + ix; if(iy > ix && id_dest < N) { // TODO: Potential to reduce the number of threads by half - int id_src = ix * dim + iy; + int id_src = tid; ret[id_dest] = ret[id_src]; } } @@ -104,8 +105,9 @@ __forceinline__ __device__ double binaryOp(double x, double y, int op) { extern "C" __global__ void relu(double* A, double* ret, int rlen, int clen) { - int ix = blockIdx.x * blockDim.x + threadIdx.x; - int iy = blockIdx.y * blockDim.y + threadIdx.y; + int tid = blockIdx.x * blockDim.x + threadIdx.x; + int ix = tid / clen; + int iy = tid % clen; if(ix < rlen && iy < clen) { int index = ix * clen + iy; ret[index] = max(0.0, A[index]); @@ -115,8 +117,9 @@ __global__ void relu(double* A, double* ret, int rlen, int clen) { // This method computes the backpropagation errors for previous layer of relu operation extern "C" __global__ void relu_backward(double* X, double* dout, double* ret, int rlen, int clen) { - int ix = blockIdx.x * blockDim.x + threadIdx.x; - int iy = blockIdx.y * blockDim.y + threadIdx.y; + int tid = blockIdx.x * blockDim.x + threadIdx.x; + int ix = tid / clen; + int iy = tid % clen; if(ix < rlen && iy < clen) { int index = ix * clen + iy; ret[index] = X[index] > 0 ? dout[index] : 0; @@ -129,8 +132,9 @@ __global__ void relu_backward(double* X, double* dout, double* ret, int rlen, i // This operation is often followed by conv2d and hence we have introduced bias_add(input, bias) built-in function extern "C" __global__ void bias_add(double* input, double* bias, double* ret, int rlen, int clen, int PQ) { - int ix = blockIdx.x * blockDim.x + threadIdx.x; - int iy = blockIdx.y * blockDim.y + threadIdx.y; + int tid = blockIdx.x * blockDim.x + threadIdx.x; + int ix = tid / clen; + int iy = tid % clen; if(ix < rlen && iy < clen) { int index = ix * clen + iy; int biasIndex = iy / PQ; @@ -141,8 +145,9 @@ __global__ void bias_add(double* input, double* bias, double* ret, int rlen, in // Performs the operation "ret <- A + alpha*B", where B is a vector extern "C" __global__ void daxpy_matrix_vector(double* A, double* B, double alpha, double* ret, int rlenA, int clenA, int rlenB, int clenB) { - int ix = blockIdx.x * blockDim.x + threadIdx.x; - int iy = blockIdx.y * blockDim.y + threadIdx.y; +
systemml git commit: [SYSTEMML-1816] toString does not print negative 0s anymore
Repository: systemml Updated Branches: refs/heads/master e1a762f65 -> 5906682b0 [SYSTEMML-1816] toString does not print negative 0s anymore Closes #599 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/5906682b Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/5906682b Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/5906682b Branch: refs/heads/master Commit: 5906682b0f328a8179c66f960cedb6e68fb8a0e1 Parents: e1a762f Author: Nakul Jindal Authored: Fri Jul 28 17:07:46 2017 -0700 Committer: Nakul Jindal Committed: Tue Aug 8 22:06:51 2017 -0700 -- .../java/org/apache/sysml/runtime/util/DataConverter.java| 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/5906682b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java -- diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java index 10f043b..a758b4d 100644 --- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java +++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java @@ -862,11 +862,15 @@ public class DataConverter else { // Dense Print Format for (int i=0; i
systemml git commit: [HOTFIX] write stdout and stderr for perftests
Repository: systemml Updated Branches: refs/heads/master 95de23586 -> dc4bfd95e [HOTFIX] write stdout and stderr for perftests Closes #615 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/dc4bfd95 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/dc4bfd95 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/dc4bfd95 Branch: refs/heads/master Commit: dc4bfd95e893f924d80ca3af25c101495134fa77 Parents: 95de235 Author: krishnakalyan3 Authored: Sat Aug 12 12:31:26 2017 -0700 Committer: Nakul Jindal Committed: Sat Aug 12 12:32:16 2017 -0700 -- scripts/perftest/python/run_perftest.py | 2 +- scripts/perftest/python/utils_exec.py | 19 +-- scripts/perftest/python/utils_misc.py | 10 +- 3 files changed, 23 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/dc4bfd95/scripts/perftest/python/run_perftest.py -- diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py index a15d7e6..d430569 100755 --- a/scripts/perftest/python/run_perftest.py +++ b/scripts/perftest/python/run_perftest.py @@ -134,7 +134,7 @@ def algorithm_workflow(algo, exec_type, config_path, dml_file_name, action_mode, if exit_flag_success: time = 'data_exists' else: -time = exec_dml_and_parse_time(exec_type, dml_file_name, args, spark_args_dict, sup_args_dict) +time = exec_dml_and_parse_time(exec_type, dml_file_name, args, spark_args_dict, sup_args_dict, config_path) write_success(time, temp_cwd) print('{},{},{},{},{},{}'.format(algo, action_mode, intercept, mat_type, mat_shape, time)) http://git-wip-us.apache.org/repos/asf/systemml/blob/dc4bfd95/scripts/perftest/python/utils_exec.py -- diff --git a/scripts/perftest/python/utils_exec.py b/scripts/perftest/python/utils_exec.py index 0eb2873..87ae3cd 100755 --- a/scripts/perftest/python/utils_exec.py +++ b/scripts/perftest/python/utils_exec.py @@ -27,7 +27,7 @@ import re # Subprocess and log parsing related functions -def subprocess_exec(cmd_string, extract=None): +def subprocess_exec(cmd_string, log_file_path=None, extract=None): """ Execute the input string as subprocess @@ -38,18 +38,25 @@ def subprocess_exec(cmd_string, extract=None): Based on extract as time/dir we extract this information from the logs accordingly +log_file_path: String +Path to write the log file + return: String Based on extract we return the relevant string """ # Debug # print(cmd_string) -proc1 = subprocess.Popen(shlex.split(cmd_string), stdout=subprocess.PIPE, +exec_command = shlex.split(cmd_string) +proc1 = subprocess.Popen(exec_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) error_arr, out_arr = get_all_logs(proc1) std_outs = out_arr + error_arr return_code = proc1.returncode +if log_file_path is not None: +write_logs(std_outs, log_file_path + '.log') + if return_code == 0: if extract == 'time': return_data = parse_time(std_outs) @@ -65,6 +72,14 @@ def subprocess_exec(cmd_string, extract=None): return return_data +def write_logs(std_outs, log_file_path): +""" +Write all logs to the specified location +""" +with open(log_file_path, 'w')as log: +log.write("\n".join(std_outs)) + + def get_all_logs(process): """ Based on the subprocess capture logs http://git-wip-us.apache.org/repos/asf/systemml/blob/dc4bfd95/scripts/perftest/python/utils_misc.py -- diff --git a/scripts/perftest/python/utils_misc.py b/scripts/perftest/python/utils_misc.py index 0a765f6..e247ce8 100755 --- a/scripts/perftest/python/utils_misc.py +++ b/scripts/perftest/python/utils_misc.py @@ -166,7 +166,7 @@ def config_reader(read_path): return conf_file -def exec_dml_and_parse_time(exec_type, dml_file_name, args, spark_args_dict, sup_args_dict): +def exec_dml_and_parse_time(exec_type, dml_file_name, args, spark_args_dict, sup_args_dict, log_file_name=None): """ This function is responsible of execution of input arguments via python sub process, We also extract time obtained from the output of this subprocess @@ -186,6 +186,9 @@ def exec_dml_and_parse_time(exec_type, dml_file_name, args, spark_args_dict, sup sup_args_dict: Dictionary Supplementary arguments required by the script +log_file_name: String +Path to write the logfile + return: String The value of time parsed from the logs / error """
systemml git commit: [DOC][HOTFIX] updatest to the performance test scripts
Repository: systemml Updated Branches: refs/heads/master 54e809898 -> 667aeb2b7 [DOC][HOTFIX] updatest to the performance test scripts Closes #616 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/667aeb2b Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/667aeb2b Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/667aeb2b Branch: refs/heads/master Commit: 667aeb2b7f38b76b1ff85138426f215a03a4dfc4 Parents: 54e8098 Author: krishnakalyan3 Authored: Mon Aug 14 15:18:50 2017 -0700 Committer: Nakul Jindal Committed: Mon Aug 14 15:18:50 2017 -0700 -- docs/python-performance-test.md | 15 ++- scripts/perftest/python/utils_fs.py | 4 ++-- 2 files changed, 16 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/667aeb2b/docs/python-performance-test.md -- diff --git a/docs/python-performance-test.md b/docs/python-performance-test.md index 3d29f01..ce36c2d 100644 --- a/docs/python-performance-test.md +++ b/docs/python-performance-test.md @@ -177,7 +177,20 @@ In the example above `--tag` can be a major/minor systemml version and `--auth` Currently we only support time difference between algorithms in different versions. This can be obtained by running the script below `./stats.py --auth client_json.json --exec-mode singlenode --tags 1.0 2.0` -Note: Please pip install `https://github.com/burnash/gspread` to use google docs client. +We pass different `matrix shapes` using `--mat-shape` argument. + +Matrix Shape | Approximate Data Size +--- | --- | +10k_1k|80MB +100k_1k|800MB +1M_1k|8GB +10M_1k|80GB +100M_1k|800GB + +For example the command below runs performance test for all data sizes described above +`run_perftest.py --family binomial clustering multinomial regression1 regression2 stats1 stats2 --mat-shape 10k_1k 100k_1k 1M_1k 10M_1k 100M_1k --master yarn-client --temp-dir hdfs://localhost:9000/user/systemml` + +Note: Please use this command `pip3 install -r requirements.txt` before using the perftest scripts. ## Troubleshooting http://git-wip-us.apache.org/repos/asf/systemml/blob/667aeb2b/scripts/perftest/python/utils_fs.py -- diff --git a/scripts/perftest/python/utils_fs.py b/scripts/perftest/python/utils_fs.py index 977c4f4..7e04907 100755 --- a/scripts/perftest/python/utils_fs.py +++ b/scripts/perftest/python/utils_fs.py @@ -134,12 +134,12 @@ def relevant_folders(path, algo, family, matrix_type, matrix_shape, mode): if mode == 'data-gen': sub_folder_name = '.'.join([family, current_matrix_type, current_matrix_shape]) cmd = ['hdfs', 'dfs', '-ls', path] -path_subdir = subprocess_exec(' '.join(cmd), 'dir') +path_subdir = subprocess_exec(' '.join(cmd), extract='dir') if mode == 'train': sub_folder_name = '.'.join([algo, family, current_matrix_type, current_matrix_shape]) cmd = ['hdfs', 'dfs', '-ls', path] -path_subdir = subprocess_exec(' '.join(cmd), 'dir') +path_subdir = subprocess_exec(' '.join(cmd), extract='dir') path_folders = list(filter(lambda x: contains_dir(x, sub_folder_name), path_subdir))
systemml git commit: [MINOR] updates to performance scripts
Repository: systemml Updated Branches: refs/heads/master a2bf0006f -> ce240af57 [MINOR] updates to performance scripts Closes #618 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ce240af5 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ce240af5 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ce240af5 Branch: refs/heads/master Commit: ce240af57fb68caa3a978a8bad62701cb55a139d Parents: a2bf000 Author: Nakul Jindal Authored: Wed Aug 16 11:14:43 2017 -0700 Committer: Nakul Jindal Committed: Wed Aug 16 11:14:43 2017 -0700 -- bin/systemml-standalone.py | 10 ++- scripts/perftest/python/run_perftest.py | 26 +++ scripts/perftest/python/utils_exec.py | 1 + scripts/perftest/python/utils_misc.py | 104 +++ 4 files changed, 82 insertions(+), 59 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/ce240af5/bin/systemml-standalone.py -- diff --git a/bin/systemml-standalone.py b/bin/systemml-standalone.py index 4000e75..02aefcf 100755 --- a/bin/systemml-standalone.py +++ b/bin/systemml-standalone.py @@ -43,7 +43,7 @@ def default_classpath(systemml_home): #TODO # User dir, fix for SYSTEMML_1795 -def standalone_execution_entry(nvargs, args, config, explain, debug, stats, gpu, f): +def standalone_execution_entry(nvargs, args, config, explain, debug, stats, gpu, heapmem, f): """ This function is responsible for the execution of arguments via subprocess call in singlenode mode @@ -57,7 +57,7 @@ def standalone_execution_entry(nvargs, args, config, explain, debug, stats, gpu, else: default_cp = ':'.join(default_classpath(systemml_home)) -java_memory = '-Xmx8g -Xms4g -Xmn1g' +java_memory = '-Xmx' + heapmem + ' -Xms4g -Xmn1g' # Log4j log4j = log4j_path(systemml_home) @@ -93,7 +93,10 @@ def standalone_execution_entry(nvargs, args, config, explain, debug, stats, gpu, '-f', script_file, '-exec', 'singlenode', '-config', default_config, ' '.join(ml_options)] -return_code = os.system(' '.join(cmd)) +cmd = ' '.join(cmd) +print(cmd) + +return_code = os.system(cmd) return return_code @@ -115,6 +118,7 @@ if __name__ == '__main__': cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, ' 'set option to skip conservative memory estimates ' 'and use GPU wherever possible', nargs='?') +cparser.add_argument('-heapmem', help='maximum JVM heap memory', metavar='', default='8g') cparser.add_argument('-f', required=True, help='specifies dml/pydml file to execute; ' 'path can be local/hdfs/gpfs', metavar='') http://git-wip-us.apache.org/repos/asf/systemml/blob/ce240af5/scripts/perftest/python/run_perftest.py -- diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py index d430569..8c3d1fa 100755 --- a/scripts/perftest/python/run_perftest.py +++ b/scripts/perftest/python/run_perftest.py @@ -32,8 +32,7 @@ from datagen import config_packets_datagen from train import config_packets_train from predict import config_packets_predict from utils_misc import get_families, config_reader, \ -exec_dml_and_parse_time, exec_test_data, check_predict, get_folder_metrics, args_dict_split, \ -get_config_args +exec_dml_and_parse_time, exec_test_data, check_predict, get_folder_metrics, split_config_args from utils_fs import create_dir_local, write_success, check_SUCCESS_file_exists # A packet is a dictionary @@ -84,8 +83,6 @@ ML_PREDICT = {'Kmeans': 'Kmeans-predict', DENSE_TYPE_ALGOS = ['clustering', 'stats1', 'stats2'] -sup_args_dict = {} - # Responsible for execution and metric logging def algorithm_workflow(algo, exec_type, config_path, dml_file_name, action_mode, current_dir): @@ -134,7 +131,7 @@ def algorithm_workflow(algo, exec_type, config_path, dml_file_name, action_mode, if exit_flag_success: time = 'data_exists' else: -time = exec_dml_and_parse_time(exec_type, dml_file_name, args, spark_args_dict, sup_args_dict, config_path) +time = exec_dml_and_parse_time(exec_type, dml_file_name, args, backend_args_dict, systemml_args_dict, config_path) write_success(time, temp_cwd) print('{},{},{},{},{},{}'.format(algo, action_mode, intercept, mat_type, mat_shape, time)) @@ -222,7 +219,7 @@ def perf_test_entry(family, algo, exec_type, mat_type, mat_shape, config_dir, mo # Statistic family do not require to be s
systemml git commit: [MINOR] fixes for HDFS path
Repository: systemml Updated Branches: refs/heads/master 4384ebbda -> 114200724 [MINOR] fixes for HDFS path Closes #624 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/11420072 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/11420072 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/11420072 Branch: refs/heads/master Commit: 11420072412c0c873b72267d1e9764c87abc57b4 Parents: 4384ebb Author: krishnakalyan3 Authored: Thu Aug 17 11:43:49 2017 -0700 Committer: Nakul Jindal Committed: Thu Aug 17 11:43:49 2017 -0700 -- bin/utils.py| 9 +++ scripts/perftest/python/run_perftest.py | 17 +++--- scripts/perftest/python/utils_exec.py | 19 ++- scripts/perftest/python/utils_fs.py | 11 + scripts/perftest/python/utils_misc.py | 35 5 files changed, 78 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/bin/utils.py -- diff --git a/bin/utils.py b/bin/utils.py index 6f40881..cf17960 100644 --- a/bin/utils.py +++ b/bin/utils.py @@ -74,15 +74,16 @@ def find_dml_file(systemml_home, script_file): Location of the dml script """ scripts_dir = join(systemml_home, 'scripts') -if not (exists(script_file)): -script_file = find_file(script_file, scripts_dir) -if script_file is None: +if not exists(script_file): +script_file_path = find_file(script_file, scripts_dir) +if script_file_path is not None: +return script_file_path +else: print('Could not find DML script: ' + script_file) sys.exit() return script_file - def log4j_path(systemml_home): """ Create log4j.properties from the template if not exist http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/run_perftest.py -- diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py index 8c3d1fa..20f5380 100755 --- a/scripts/perftest/python/run_perftest.py +++ b/scripts/perftest/python/run_perftest.py @@ -32,7 +32,8 @@ from datagen import config_packets_datagen from train import config_packets_train from predict import config_packets_predict from utils_misc import get_families, config_reader, \ -exec_dml_and_parse_time, exec_test_data, check_predict, get_folder_metrics, split_config_args +exec_dml_and_parse_time, exec_test_data, check_predict, get_folder_metrics, split_config_args, \ +get_default_dir from utils_fs import create_dir_local, write_success, check_SUCCESS_file_exists # A packet is a dictionary @@ -275,7 +276,7 @@ if __name__ == '__main__': default_mat_shape = ['10k_100'] # Default temp directory, contains everything generated in perftest -default_temp_dir = join(systemml_home, 'scripts', 'perftest', 'temp') +default_config_dir = join(systemml_home, 'scripts', 'perftest', 'temp') # Initialize time start_time = time.time() @@ -308,7 +309,7 @@ if __name__ == '__main__': cparser.add_argument('--mat-shape', default=default_mat_shape, help='space separated list of shapes of matrices ' 'to generate (e.g 10k_1k, 20M_4k)', metavar='', nargs='+') -cparser.add_argument('--config-dir', default=default_temp_dir, help='temporary directory ' +cparser.add_argument('--config-dir', default=default_config_dir, help='temporary directory ' 'where generated, training and prediction data is put', metavar='') cparser.add_argument('--filename', default='perf_test', help='name of the output file for the perf' ' metrics', metavar='') @@ -316,8 +317,7 @@ if __name__ == '__main__': help='space separated list of types of workloads to run (available: data-gen, train, predict)', metavar='', choices=workload, nargs='+') # Change this to temp-dir -cparser.add_argument('--temp-dir', default=default_temp_dir, - help='define the file system to work on', metavar='') +cparser.add_argument('--temp-dir', help='define the file system to work on', metavar='') # Configuration Options cparser.add_argument('-stats', help='Monitor and report caching/recompilation statistics, ' @@ -350,8 +350,8 @@ if __name__ == '__main__': # Global variables perftest_args_dict, systemml_args_dict, backend_args_dict = split_config_args(all_arg_dict) -# Debug arguments -# print(arg_dict) +# temp_dir hdfs / local path check +perftest_args_dict['temp_di
systemml git commit: [MINOR] Print statistics to stderr if an error has occured
Repository: systemml Updated Branches: refs/heads/master 1d83cedb7 -> ac0416883 [MINOR] Print statistics to stderr if an error has occured Closes #631 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ac041688 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ac041688 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ac041688 Branch: refs/heads/master Commit: ac04168836cc68f9af940c08baccab575c7e2cb3 Parents: 1d83ced Author: Nakul Jindal Authored: Fri Aug 25 17:08:44 2017 -0700 Committer: Nakul Jindal Committed: Fri Aug 25 17:08:44 2017 -0700 -- .../apache/sysml/api/ScriptExecutorUtils.java | 20 1 file changed, 16 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/ac041688/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java -- diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java index 5f9c0a2..09897a5 100644 --- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java +++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java @@ -80,6 +80,8 @@ public class ScriptExecutorUtils { DMLScript.FINEGRAINED_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_FINEGRAINED_STATS); DMLScript.STATISTICS_MAX_WRAP_LEN = dmlconf.getIntValue(DMLConfig.STATS_MAX_WRAP_LEN); + boolean exceptionThrown = false; + Statistics.startRunTimer(); try { // run execute (w/ exception handling to ensure proper shutdown) @@ -93,6 +95,9 @@ public class ScriptExecutorUtils { ec.setGPUContexts(gCtxs); } rtprog.execute(ec); + } catch (Throwable e) { + exceptionThrown = true; + throw e; } finally { // ensure cleanup/shutdown if (DMLScript.USE_ACCELERATOR && !ec.getGPUContexts().isEmpty()) { ec.getGPUContexts().forEach(gCtx -> gCtx.clearTemporaryMemory()); @@ -104,10 +109,17 @@ public class ScriptExecutorUtils { // display statistics (incl caching stats if enabled) Statistics.stopRunTimer(); - if(statisticsMaxHeavyHitters > 0) - System.out.println(Statistics.display(statisticsMaxHeavyHitters)); - else - System.out.println(Statistics.display()); + if (!exceptionThrown) { + if (statisticsMaxHeavyHitters > 0) + System.out.println(Statistics.display(statisticsMaxHeavyHitters)); + else + System.out.println(Statistics.display()); + } else { + if (statisticsMaxHeavyHitters > 0) + System.err.println(Statistics.display(statisticsMaxHeavyHitters)); + else + System.err.println(Statistics.display()); + } } }
systemml git commit: [SYSTEMML-1451][Phase3] phase 3 work
Repository: systemml Updated Branches: refs/heads/master aedceb611 -> d2efa65c8 [SYSTEMML-1451][Phase3] phase 3 work - Offline CSV support - Family bug fix - Plots - Doc Update - Stats update - Bug train, predict append family name Closes #604 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d2efa65c Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d2efa65c Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d2efa65c Branch: refs/heads/master Commit: d2efa65c89e3f6eaf7073c314eb56a033c8c8d5f Parents: aedceb6 Author: krishnakalyan3 Authored: Sat Aug 26 11:52:59 2017 -0700 Committer: Nakul Jindal Committed: Sat Aug 26 11:52:59 2017 -0700 -- docs/python-performance-test.md | 16 ++- pom.xml | 2 + scripts/perftest/python/datagen.py | 1 + .../perftest/python/google_docs/gdocs_utils.py | 35 + scripts/perftest/python/google_docs/stats.py| 134 +-- scripts/perftest/python/google_docs/update.py | 55 ++-- scripts/perftest/python/predict.py | 9 +- scripts/perftest/python/requirements.txt| 4 + scripts/perftest/python/run_perftest.py | 51 --- scripts/perftest/python/train.py| 9 +- scripts/perftest/python/utils_exec.py | 6 +- scripts/perftest/python/utils_fs.py | 6 + scripts/perftest/python/utils_misc.py | 6 +- 13 files changed, 221 insertions(+), 113 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/d2efa65c/docs/python-performance-test.md -- diff --git a/docs/python-performance-test.md b/docs/python-performance-test.md index ce36c2d..25e1f35 100644 --- a/docs/python-performance-test.md +++ b/docs/python-performance-test.md @@ -148,6 +148,17 @@ Run performance test for all algorithms under the family `regression2` and log w Run performance test for all algorithms using HDFS. +## Result Consolidation and Plotting +We have two scripts, `stats.py` forpulling results from google docs and `update.py` to updating results to google docs or local file system. + +Example of `update.py` would be below +`./scripts/perftest/python/google_docs/update.py --file ../../temp/perf_test_singlenode.out --exec-type singlenode --tag 2 --append test.csv` +The arguments being `--file` path of the perf-test output, `--exec-type` execution mode used to generate the perf-test output, `--tag` being the realease version or a unique name, `--append` being an optional argument that would append the a local csv file. If instead of `--append` the `--auth` argument needs the location of the `google api key` file. + +Example of `stats.py` below +` ./stats.py --auth ../key/client_json.json --exec-type singlenode --plot stats1_data-gen_none_dense_10k_100` +`--plot` argument needs the name of the composite key that you would like to compare results over. If this argument is not specified the results would be grouped by keys. + ## Operational Notes All performance test depend mainly on two scripts for execution `systemml-standalone.py` and `systemml-spark-submit.py`. Incase we need to change standalone or spark parameters we need to manually change these parameters in their respective scripts. @@ -158,7 +169,7 @@ The logs contain the following information below comma separated. algorithm | run_type | intercept | matrix_type | data_shape | time_sec --- | --- | --- | --- | --- | --- | -multinomial|data-gen|0|dense|10k_100| 0.33 +multinomial|data-gen|0|10k_100|dense| 0.33 MultiLogReg|train|0|10k_100|dense|6.956 MultiLogReg|predict|0|10k_100|dense|4.780 @@ -187,9 +198,12 @@ Matrix Shape | Approximate Data Size 10M_1k|80GB 100M_1k|800GB + For example the command below runs performance test for all data sizes described above `run_perftest.py --family binomial clustering multinomial regression1 regression2 stats1 stats2 --mat-shape 10k_1k 100k_1k 1M_1k 10M_1k 100M_1k --master yarn-client --temp-dir hdfs://localhost:9000/user/systemml` +By default data generated in `hybrid_spark` execution mode is in the current users `hdfs` home directory. + Note: Please use this command `pip3 install -r requirements.txt` before using the perftest scripts. http://git-wip-us.apache.org/repos/asf/systemml/blob/d2efa65c/pom.xml -- diff --git a/pom.xml b/pom.xml index 2ed9374..0ee382c 100644 --- a/pom.xml +++ b/pom.xml @@ -897,6 +897,8 @@ src/test/scripts/functions/jmlc/tfmtd_example/dummycoded.column.names
systemml git commit: [SYSTEMML-1847] bug fixes for gpu from ml algos
Repository: systemml Updated Branches: refs/heads/master 428f3aa21 -> de0513415 [SYSTEMML-1847] bug fixes for gpu from ml algos - Fixed errors in -gpu force arguments - Fix to GPU solve - converts sparse matrices to dense - Bug fix in GPUContext::clearTemporaryMemory - Fix for removing recorded GPUObjects - Estimate memory for each parfor body and set degree of parallelism - Setting cuda pointers to null after freeing - Fix after rebase with master for SOLVE on GPU Closes #626 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/de051341 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/de051341 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/de051341 Branch: refs/heads/master Commit: de0513415e8fb6e9b9f289bc261612091bd4e664 Parents: 428f3aa Author: Nakul Jindal Authored: Mon Aug 28 13:38:49 2017 -0700 Committer: Nakul Jindal Committed: Mon Aug 28 13:38:49 2017 -0700 -- scripts/perftest/python/utils_misc.py | 12 +- .../java/org/apache/sysml/hops/BinaryOp.java| 5 +- .../controlprogram/ParForProgramBlock.java | 3 - .../parfor/opt/OptimizerRuleBased.java | 59 ++- .../instructions/gpu/context/CSRPointer.java| 3 + .../instructions/gpu/context/GPUContext.java| 36 +++-- .../instructions/gpu/context/GPUObject.java | 8 + .../runtime/matrix/data/LibMatrixCUDA.java | 162 +-- 8 files changed, 176 insertions(+), 112 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/de051341/scripts/perftest/python/utils_misc.py -- diff --git a/scripts/perftest/python/utils_misc.py b/scripts/perftest/python/utils_misc.py index f9904c5..da9dbcb 100755 --- a/scripts/perftest/python/utils_misc.py +++ b/scripts/perftest/python/utils_misc.py @@ -211,20 +211,18 @@ def exec_dml_and_parse_time(exec_type, dml_file_name, args, backend_args_dict, s """ algorithm = dml_file_name + '.dml' - -sup_args = ''.join(['{} {}'.format(k, v) for k, v in systemml_args_dict.items()]) - +sup_args = ' '.join(['{} {}'.format(k, v) for k, v in systemml_args_dict.items()]) if exec_type == 'singlenode': exec_script = join(os.environ.get('SYSTEMML_HOME'), 'bin', 'systemml-standalone.py') -singlenode_pre_args = ''.join([' {} {} '.format(k, v) for k, v in backend_args_dict.items()]) -args = ''.join(['{} {}'.format(k, v) for k, v in args.items()]) +singlenode_pre_args = ' '.join(['{} {}'.format(k, v) for k, v in backend_args_dict.items()]) +args = ' '.join(['{} {}'.format(k, v) for k, v in args.items()]) cmd = [exec_script, singlenode_pre_args, '-f', algorithm, args, sup_args] cmd_string = ' '.join(cmd) if exec_type == 'hybrid_spark': exec_script = join(os.environ.get('SYSTEMML_HOME'), 'bin', 'systemml-spark-submit.py') -spark_pre_args = ''.join([' {} {} '.format(k, v) for k, v in backend_args_dict.items()]) -args = ''.join(['{} {}'.format(k, v) for k, v in args.items()]) +spark_pre_args = ' '.join([' {} {} '.format(k, v) for k, v in backend_args_dict.items()]) +args = ' '.join(['{} {}'.format(k, v) for k, v in args.items()]) cmd = [exec_script, spark_pre_args, '-f', algorithm, args, sup_args] cmd_string = ' '.join(cmd) http://git-wip-us.apache.org/repos/asf/systemml/blob/de051341/src/main/java/org/apache/sysml/hops/BinaryOp.java -- diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java b/src/main/java/org/apache/sysml/hops/BinaryOp.java index ad9f0ad..cd1f715 100644 --- a/src/main/java/org/apache/sysml/hops/BinaryOp.java +++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java @@ -1058,7 +1058,10 @@ public class BinaryOp extends Hop //ensure cp exec type for single-node operations if ( op == OpOp2.SOLVE ) { - _etype = ExecType.CP; + if (isGPUEnabled()) + _etype = ExecType.GPU; + else + _etype = ExecType.CP; } return _etype; http://git-wip-us.apache.org/repos/asf/systemml/blob/de051341/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java -- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java index 1968c26..3a9bf51 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock
systemml git commit: [SYSTEMML-1895] jcuda for windows & linux (x86_64, ppc64le) are included in extra jar
Repository: systemml Updated Branches: refs/heads/master 137fbf18a -> c00029a7b [SYSTEMML-1895] jcuda for windows & linux (x86_64, ppc64le) are included in extra jar Closes #656 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c00029a7 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c00029a7 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c00029a7 Branch: refs/heads/master Commit: c00029a7be735dcaba533c50ba69169b18ef1675 Parents: 137fbf1 Author: Nakul Jindal Authored: Thu Sep 7 14:42:12 2017 -0700 Committer: Nakul Jindal Committed: Thu Sep 7 14:42:12 2017 -0700 -- pom.xml| 106 src/assembly/extra.xml | 29 +++- 2 files changed, 115 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/c00029a7/pom.xml -- diff --git a/pom.xml b/pom.xml index 4bbe714..eb4337d 100644 --- a/pom.xml +++ b/pom.xml @@ -1220,94 +1220,164 @@ org.jcuda - jcufft + jcusparse ${jcuda.version} ${jcuda.scope} org.jcuda - jcusparse + jcusolver ${jcuda.version} ${jcuda.scope} org.jcuda - jcusolver + jcudnn ${jcuda.version} ${jcuda.scope} + org.jcuda - jcurand + jcuda-natives + ${jcuda.os}-${jcuda.arch} ${jcuda.version} ${jcuda.scope} org.jcuda - jnvgraph + jcublas-natives + ${jcuda.os}-${jcuda.arch} ${jcuda.version} ${jcuda.scope} org.jcuda - jcudnn + jcusparse-natives + ${jcuda.os}-${jcuda.arch} + ${jcuda.version} + ${jcuda.scope} + + + org.jcuda + jcusolver-natives + ${jcuda.os}-${jcuda.arch} + ${jcuda.version} + ${jcuda.scope} + + + org.jcuda + jcudnn-natives + ${jcuda.os}-${jcuda.arch} ${jcuda.version} ${jcuda.scope} + org.jcuda jcuda-natives - ${jcuda.os}-${jcuda.arch} + windows-x86_64 ${jcuda.version} ${jcuda.scope} org.jcuda jcublas-natives - ${jcuda.os}-${jcuda.arch} + windows-x86_64 ${jcuda.version} ${jcuda.scope} org.jcuda - jcufft-natives - ${jcuda.os}-${jcuda.arch} + jcusparse-natives + windows-x86_64 + ${jcuda.version} + ${jcuda.scope} + + + org.jcuda + jcusolver-natives + windows-x86_64 + ${jcuda.version} + ${jcuda.scope} + + + org.jcuda + jcudnn-natives + windows-x86_64 + ${jcuda.version} + ${jcuda.scope} + + + + org.jcuda + jcuda-natives + linux-x86_64 + ${jcuda.version} + ${jcuda.scope} + + + org.jcuda + jcublas-natives + linux-x86_64 ${jcuda.version} ${jcuda.scope}
systemml git commit: [SYSTEMML-1895] jcuda for win & linux (x86_64, ppc64le) included in extra jar
Repository: systemml Updated Branches: refs/heads/branch-0.15 467de1cb1 -> ea8e951bc [SYSTEMML-1895] jcuda for win & linux (x86_64, ppc64le) included in extra jar Closes #660 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ea8e951b Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ea8e951b Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ea8e951b Branch: refs/heads/branch-0.15 Commit: ea8e951bc3360860d9931be5ede9850bec6088bc Parents: 467de1c Author: Nakul Jindal Authored: Thu Sep 7 15:12:04 2017 -0700 Committer: Nakul Jindal Committed: Thu Sep 7 15:12:04 2017 -0700 -- pom.xml| 106 src/assembly/extra.xml | 29 +++- 2 files changed, 115 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/ea8e951b/pom.xml -- diff --git a/pom.xml b/pom.xml index 9b57ba8..abffc0b 100644 --- a/pom.xml +++ b/pom.xml @@ -1220,94 +1220,164 @@ org.jcuda - jcufft + jcusparse ${jcuda.version} ${jcuda.scope} org.jcuda - jcusparse + jcusolver ${jcuda.version} ${jcuda.scope} org.jcuda - jcusolver + jcudnn ${jcuda.version} ${jcuda.scope} + org.jcuda - jcurand + jcuda-natives + ${jcuda.os}-${jcuda.arch} ${jcuda.version} ${jcuda.scope} org.jcuda - jnvgraph + jcublas-natives + ${jcuda.os}-${jcuda.arch} ${jcuda.version} ${jcuda.scope} org.jcuda - jcudnn + jcusparse-natives + ${jcuda.os}-${jcuda.arch} + ${jcuda.version} + ${jcuda.scope} + + + org.jcuda + jcusolver-natives + ${jcuda.os}-${jcuda.arch} + ${jcuda.version} + ${jcuda.scope} + + + org.jcuda + jcudnn-natives + ${jcuda.os}-${jcuda.arch} ${jcuda.version} ${jcuda.scope} + org.jcuda jcuda-natives - ${jcuda.os}-${jcuda.arch} + windows-x86_64 ${jcuda.version} ${jcuda.scope} org.jcuda jcublas-natives - ${jcuda.os}-${jcuda.arch} + windows-x86_64 ${jcuda.version} ${jcuda.scope} org.jcuda - jcufft-natives - ${jcuda.os}-${jcuda.arch} + jcusparse-natives + windows-x86_64 + ${jcuda.version} + ${jcuda.scope} + + + org.jcuda + jcusolver-natives + windows-x86_64 + ${jcuda.version} + ${jcuda.scope} + + + org.jcuda + jcudnn-natives + windows-x86_64 + ${jcuda.version} + ${jcuda.scope} + + + + org.jcuda + jcuda-natives + linux-x86_64 + ${jcuda.version} + ${jcuda.scope} + + + org.jcuda + jcublas-natives + linux-x86_64 ${jcuda.version} ${jcuda.scope}
systemml git commit: [MINOR] gpu memory leak fix
Repository: systemml Updated Branches: refs/heads/master f86879bd0 -> ec5dfda57 [MINOR] gpu memory leak fix - Changed list of free pointers to set of free pointers for GPU - Changed threadlocal cuda handles to non threadlocal. This is assuming there will be one thread per GPU. Closes #665 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ec5dfda5 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ec5dfda5 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ec5dfda5 Branch: refs/heads/master Commit: ec5dfda57a42b172886dd5d42bfe3b034b30c7b7 Parents: f86879b Author: Nakul Jindal Authored: Tue Sep 19 14:57:16 2017 -0700 Committer: Nakul Jindal Committed: Tue Sep 19 14:57:16 2017 -0700 -- src/main/cpp/CMakeLists.txt | 4 +- .../instructions/gpu/context/GPUContext.java| 313 +++ 2 files changed, 179 insertions(+), 138 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/ec5dfda5/src/main/cpp/CMakeLists.txt -- diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt index 47555bf..04e12b4 100644 --- a/src/main/cpp/CMakeLists.txt +++ b/src/main/cpp/CMakeLists.txt @@ -29,6 +29,7 @@ option(USE_INTEL_MKL "Whether to use Intel MKL (Defaults to compiling with Intel # Build a shared libraray add_library(systemml SHARED libmatrixdnn.cpp libmatrixmult.cpp systemml.cpp) +set_target_properties(systemml PROPERTIES MACOSX_RPATH 1) set(MATH_LIBRARIES "") @@ -72,5 +73,6 @@ if (USE_OPEN_BLAS) find_package(OpenMP REQUIRED) set_target_properties(systemml PROPERTIES LINK_FLAGS "${OpenMP_CXX_FLAGS} ${MATH_LIBRARIES}") elseif(USE_INTEL_MKL) - set_target_properties(systemml PROPERTIES LINK_FLAGS ${MATH_LIBRARIES}") + set_target_properties(systemml PROPERTIES LINK_FLAGS "${MATH_LIBRARIES}") endif() + http://git-wip-us.apache.org/repos/asf/systemml/blob/ec5dfda5/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java index 271109d..8a823cc 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java @@ -41,9 +41,11 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; -import java.util.LinkedList; +import java.util.HashSet; +import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -88,83 +90,50 @@ public class GPUContext { * active device assigned to this GPUContext instance */ private final int deviceNum; - // Invoke cudaMemGetInfo to get available memory information. Useful if GPU is shared among multiple application. - public double GPU_MEMORY_UTILIZATION_FACTOR = ConfigurationManager.getDMLConfig() - .getDoubleValue(DMLConfig.GPU_MEMORY_UTILIZATION_FACTOR); - /** -* Map of free blocks allocate on GPU. maps size_of_block -> pointer on GPU -*/ - private LRUCacheMap> freeCUDASpaceMap = new LRUCacheMap<>(); - /** -* To record size of allocated blocks -*/ - private HashMap cudaBlockSizeMap = new HashMap<>(); - /** -* list of allocated {@link GPUObject} instances allocated on {@link GPUContext#deviceNum} GPU -* These are matrices allocated on the GPU on which rmvar hasn't been called yet. -* If a {@link GPUObject} has more than one lock on it, it cannot be freed -* If it has zero locks on it, it can be freed, but it is preferrable to keep it around -* so that an extraneous host to dev transfer can be avoided -*/ - private ArrayList allocatedGPUObjects = new ArrayList<>(); /** * cudnnHandle for Deep Neural Network operations on the GPU */ - private final ThreadLocal cudnnHandle = new ThreadLocal<>(); + private cudnnHandle cudnnHandle; /** * cublasHandle for BLAS operations on the GPU */ - private final ThreadLocal cublasHandle = new ThreadLocal<>(); + private cublasHandle cublasHandle; /** * cusparseHandle for certain sparse BLAS operations on the GPU */ - private final ThreadLocal cusparseHandle = new ThreadLocal<>(); + private cusparseHandle cusparseHandle;
systemml git commit: [MINOR] bug fix in perf test script
Repository: systemml Updated Branches: refs/heads/master f6ea240ca -> a134997e6 [MINOR] bug fix in perf test script Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a134997e Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a134997e Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a134997e Branch: refs/heads/master Commit: a134997e682de07a49bbf5052a46fba8f7b9ba30 Parents: f6ea240 Author: Nakul Jindal Authored: Wed Sep 20 12:34:02 2017 -0700 Committer: Nakul Jindal Committed: Wed Sep 20 12:34:02 2017 -0700 -- pom.xml | 4 ++-- scripts/perftest/python/utils_misc.py | 34 -- 2 files changed, 25 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/a134997e/pom.xml -- diff --git a/pom.xml b/pom.xml index 7e04af5..1ee4399 100644 --- a/pom.xml +++ b/pom.xml @@ -348,7 +348,7 @@ com.github.os72 protoc-jar-maven-plugin - 3.0.0-b2.1 + 3.4.0.1-SNAPSHOT caffe-sources @@ -373,7 +373,7 @@ run - 3.0.0 + 3.4.0 src/main/proto/tensorflow http://git-wip-us.apache.org/repos/asf/systemml/blob/a134997e/scripts/perftest/python/utils_misc.py -- diff --git a/scripts/perftest/python/utils_misc.py b/scripts/perftest/python/utils_misc.py index da9dbcb..15e2a00 100755 --- a/scripts/perftest/python/utils_misc.py +++ b/scripts/perftest/python/utils_misc.py @@ -57,17 +57,29 @@ def split_config_args(args): systemml_args_dict = {} -if args['stats'] is not None: -systemml_args_dict['-stats'] = args['stats'] - -if args['explain'] is not None: -systemml_args_dict['-explain'] = args['explain'] - -if args['config'] is not None: -systemml_args_dict['-config'] = args['config'] - -if args['gpu'] is not None: -systemml_args_dict['-gpu'] = args['gpu'] +if 'stats' in args.keys(): +if args['stats'] is not None: +systemml_args_dict['-stats'] = args['stats'] +else: +systemml_args_dict['-stats'] = '' + +if 'explain' in args.keys(): +if args['explain'] is not None: +systemml_args_dict['-explain'] = args['explain'] +else: +systemml_args_dict['-explain'] = '' + +if 'config' in args.keys(): +if args['config'] is not None: +systemml_args_dict['-config'] = args['config'] +else: +systemml_args_dict['-config'] = '' + +if 'gpu' in args.keys(): +if args['gpu'] is not None: +systemml_args_dict['-gpu'] = args['gpu'] +else: +systemml_args_dict['-gpu'] = '' backend_args_dict = {} exec_type = args['exec_type']
systemml git commit: [MINOR] Refer to correct version of protoc-jar maven plugin in pom
Repository: systemml Updated Branches: refs/heads/master 3acf786d4 -> c14682b9c [MINOR] Refer to correct version of protoc-jar maven plugin in pom Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c14682b9 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c14682b9 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c14682b9 Branch: refs/heads/master Commit: c14682b9cb2d02e959ed35a87288f54b7b51da3c Parents: 3acf786 Author: Nakul Jindal Authored: Wed Sep 20 14:10:15 2017 -0700 Committer: Nakul Jindal Committed: Wed Sep 20 14:10:15 2017 -0700 -- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/c14682b9/pom.xml -- diff --git a/pom.xml b/pom.xml index 1ee4399..7e04af5 100644 --- a/pom.xml +++ b/pom.xml @@ -348,7 +348,7 @@ com.github.os72 protoc-jar-maven-plugin - 3.4.0.1-SNAPSHOT + 3.0.0-b2.1 caffe-sources @@ -373,7 +373,7 @@ run - 3.4.0 + 3.0.0 src/main/proto/tensorflow
systemml git commit: [MINOR] minor tweaks to the performance test script
Repository: systemml Updated Branches: refs/heads/master 34bb3ca82 -> aa15197ec [MINOR] minor tweaks to the performance test script Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/aa15197e Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/aa15197e Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/aa15197e Branch: refs/heads/master Commit: aa15197ec2e1a1e81c9031a91ec0791284978f27 Parents: 34bb3ca Author: Nakul Jindal Authored: Wed Sep 20 15:04:11 2017 -0700 Committer: Nakul Jindal Committed: Wed Sep 20 15:04:11 2017 -0700 -- scripts/perftest/python/utils_misc.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/aa15197e/scripts/perftest/python/utils_misc.py -- diff --git a/scripts/perftest/python/utils_misc.py b/scripts/perftest/python/utils_misc.py index 15e2a00..92dbc73 100755 --- a/scripts/perftest/python/utils_misc.py +++ b/scripts/perftest/python/utils_misc.py @@ -44,7 +44,7 @@ def split_config_args(args): """ perftest_args_dict = {} - + perftest_args_dict['family'] = args['family'] perftest_args_dict['algo'] = args['algo'] perftest_args_dict['exec_type'] = args['exec_type'] @@ -72,8 +72,6 @@ def split_config_args(args): if 'config' in args.keys(): if args['config'] is not None: systemml_args_dict['-config'] = args['config'] -else: -systemml_args_dict['-config'] = '' if 'gpu' in args.keys(): if args['gpu'] is not None:
systemml git commit: [MINOR]bug fixes & feature added in perf test & spark-submit python scripts
Repository: systemml Updated Branches: refs/heads/master 0cb2f7f68 -> a725b2d2e [MINOR]bug fixes & feature added in perf test & spark-submit python scripts Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a725b2d2 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a725b2d2 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a725b2d2 Branch: refs/heads/master Commit: a725b2d2ebf6dcb56f4edb68376c3849c8991b27 Parents: 0cb2f7f Author: Nakul Jindal Authored: Thu Sep 28 15:28:17 2017 -0700 Committer: Nakul Jindal Committed: Thu Sep 28 15:28:17 2017 -0700 -- bin/systemml-spark-submit.py| 30 - scripts/perftest/python/datagen.py | 2 +- scripts/perftest/python/predict.py | 2 +- scripts/perftest/python/run_perftest.py | 19 ++--- scripts/perftest/python/train.py| 2 +- scripts/perftest/python/utils_misc.py | 63 +++- 6 files changed, 70 insertions(+), 48 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/a725b2d2/bin/systemml-spark-submit.py -- diff --git a/bin/systemml-spark-submit.py b/bin/systemml-spark-submit.py index b6426b3..b4da801 100755 --- a/bin/systemml-spark-submit.py +++ b/bin/systemml-spark-submit.py @@ -92,25 +92,35 @@ def spark_submit_entry(master, driver_memory, num_executors, executor_memory, ml_options.append(stats) if gpu is not None: ml_options.append('-gpu') -ml_options.append(gpu) +if gpu is not 'no_option': +ml_options.append(gpu) if len(ml_options) < 1: ml_options = '' # stats, explain, target_jars cmd_spark = [spark_path, '--class', 'org.apache.sysml.api.DMLScript', - '--master', master, '--driver-memory', driver_memory, - '--num-executors', num_executors, '--executor-memory', executor_memory, - '--executor-cores', executor_cores, '--conf', default_conf, + '--master', master, + '--driver-memory', driver_memory, + '--conf', default_conf, '--jars', cuda_jars, systemml_jars] +if num_executors is not None: +cmd_spark = cmd_spark + ['--num-executors', num_executors] + +if executor_memory is not None: +cmd_spark = cmd_spark + ['--executor-memory', executor_memory] + +if executor_cores is not None: +cmd_spark = cmd_spark + ['--executor-cores', executor_cores] + cmd_system_ml = ['-config', default_config, '-exec', 'hybrid_spark', '-f', script_file, ' '.join(ml_options)] cmd = cmd_spark + cmd_system_ml # Debug -# print(' '.join(cmd)) +print(' '.join(cmd)) return_code = os.system(' '.join(cmd)) return return_code @@ -120,10 +130,10 @@ if __name__ == '__main__': description='System-ML Spark Submit Script') # SPARK-SUBMIT Options cparser.add_argument('--master', default='local[*]', help='local, yarn-client, yarn-cluster', metavar='') -cparser.add_argument('--driver-memory', default='5G', help='Memory for driver (e.g. 512M)', metavar='') -cparser.add_argument('--num-executors', default='2', help='Number of executors to launch', metavar='') -cparser.add_argument('--executor-memory', default='2G', help='Memory per executor', metavar='') -cparser.add_argument('--executor-cores', default='1', help='Number of cores', metavar='') +cparser.add_argument('--driver-memory', default='8G', help='Memory for driver (e.g. 512M, 1G)', metavar='') +cparser.add_argument('--num-executors', nargs=1, help='Number of executors to launch', metavar='') +cparser.add_argument('--executor-memory', nargs=1, help='Memory per executor', metavar='') +cparser.add_argument('--executor-cores', nargs=1, help='Number of executor cores', metavar='') cparser.add_argument('--conf', help='Spark configuration file', nargs='+', metavar='') # SYSTEM-ML Options @@ -138,7 +148,7 @@ if __name__ == '__main__': metavar='') cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, ' 'set option to skip conservative memory estimates ' - 'and use GPU wherever possible', nargs='?') + 'and use GPU wherever possible', nargs='?', const='no_option') cparser.add_argument('-f', required=True, help='specifies dml/pydml file to execute; ' 'path can be local/hdfs/gpfs', metavar='') http://git-wip-us.apache.org/repos/asf/systemml/blob/a725b2d2/
systemml git commit: [HOTFIX] minor bug fix in perf test suite
Repository: systemml Updated Branches: refs/heads/master a725b2d2e -> e4c74eda6 [HOTFIX] minor bug fix in perf test suite Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e4c74eda Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e4c74eda Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e4c74eda Branch: refs/heads/master Commit: e4c74eda67ca4596fcdbae77603514259c1b6e10 Parents: a725b2d Author: Nakul Jindal Authored: Thu Sep 28 15:43:44 2017 -0700 Committer: Nakul Jindal Committed: Thu Sep 28 15:43:44 2017 -0700 -- scripts/perftest/python/run_perftest.py | 6 ++ 1 file changed, 6 insertions(+) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/e4c74eda/scripts/perftest/python/run_perftest.py -- diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py index f0b272a..4eeb0da 100755 --- a/scripts/perftest/python/run_perftest.py +++ b/scripts/perftest/python/run_perftest.py @@ -378,6 +378,12 @@ if __name__ == '__main__': perftest_args_dict, systemml_args_dict, backend_args_dict = split_config_args(all_arg_dict) # temp_dir hdfs / local path check +if args.file_system_type is None: +if args.exec_type == 'hybrid_spark': +args.file_system_type = 'hdfs' +else: +args.file_system_type = 'local' + perftest_args_dict['temp_dir'] = get_default_dir(args.file_system_type, args.temp_dir, args.exec_type, default_config_dir) # default_mat_type validity
systemml git commit: [SYSTEMML-1875] Changed pom.xml to use the latest protoc-jar
Repository: systemml Updated Branches: refs/heads/master e4c2f9e57 -> 5dce90b3b [SYSTEMML-1875] Changed pom.xml to use the latest protoc-jar - This uses the latest protoc-jar-maven-plugin to get the protoc compiler for ppc Closes #676 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/5dce90b3 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/5dce90b3 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/5dce90b3 Branch: refs/heads/master Commit: 5dce90b3bbf120a053233f0b49eb6fa7c0ddfebf Parents: e4c2f9e Author: Nakul Jindal Authored: Fri Sep 29 14:47:49 2017 -0700 Committer: Nakul Jindal Committed: Fri Sep 29 14:47:49 2017 -0700 -- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/5dce90b3/pom.xml -- diff --git a/pom.xml b/pom.xml index c19e11c..28b166f 100644 --- a/pom.xml +++ b/pom.xml @@ -345,7 +345,7 @@ com.github.os72 protoc-jar-maven-plugin - 3.0.0-b2.1 + 3.4.0.1 caffe-sources @@ -354,7 +354,7 @@ run - 2.5.0 + 2.6.1 src/main/proto/caffe
systemml git commit: [MINOR] bug fix in perf test script
Repository: systemml Updated Branches: refs/heads/master c27c488be -> 50b3c80c6 [MINOR] bug fix in perf test script Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/50b3c80c Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/50b3c80c Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/50b3c80c Branch: refs/heads/master Commit: 50b3c80c61b9b6ab6cdb8b28a1544952fb8dd19d Parents: c27c488 Author: Nakul Jindal Authored: Mon Oct 2 17:54:17 2017 -0700 Committer: Nakul Jindal Committed: Mon Oct 2 17:54:17 2017 -0700 -- scripts/perftest/python/datagen.py| 4 +-- scripts/perftest/python/predict.py| 4 +-- scripts/perftest/python/train.py | 4 +-- scripts/perftest/python/utils_exec.py | 41 -- scripts/perftest/python/utils_fs.py | 2 +- scripts/perftest/python/utils_misc.py | 4 +-- 6 files changed, 37 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/50b3c80c/scripts/perftest/python/datagen.py -- diff --git a/scripts/perftest/python/datagen.py b/scripts/perftest/python/datagen.py index 6794187..54f2eff 100755 --- a/scripts/perftest/python/datagen.py +++ b/scripts/perftest/python/datagen.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -#- +# - # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -18,7 +18,7 @@ # specific language governing permissions and limitations # under the License. # -#- +# - import itertools from os.path import join http://git-wip-us.apache.org/repos/asf/systemml/blob/50b3c80c/scripts/perftest/python/predict.py -- diff --git a/scripts/perftest/python/predict.py b/scripts/perftest/python/predict.py index 67467b1..56ba999 100755 --- a/scripts/perftest/python/predict.py +++ b/scripts/perftest/python/predict.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -#- +# - # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -18,7 +18,7 @@ # specific language governing permissions and limitations # under the License. # -#- +# - import sys from os.path import join http://git-wip-us.apache.org/repos/asf/systemml/blob/50b3c80c/scripts/perftest/python/train.py -- diff --git a/scripts/perftest/python/train.py b/scripts/perftest/python/train.py index a95950d..4428e8f 100755 --- a/scripts/perftest/python/train.py +++ b/scripts/perftest/python/train.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -#- +# - # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -18,7 +18,7 @@ # specific language governing permissions and limitations # under the License. # -#- +# - import sys from os.path import join http://git-wip-us.apache.org/repos/asf/systemml/blob/50b3c80c/scripts/perftest/python/utils_exec.py -- diff --git a/scripts/perftest/python/utils_exec.py b/scripts/perftest/python/utils_exec.py index 27f75f2..d51cf2d 100755 --- a/scripts/perftest/python/utils_exec.py +++ b/scripts/perftest/python/utils_exec.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -#- +# - # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -18,12 +18,14 @@ # specific language governing permissions and limitations # under the License. # -#- +# - import sys import subprocess import shlex import re +import tempfile +import os # Subprocess and log parsing related functions @@ -49,24 +51,34 @@ de
systemml git commit: [HOTFIX] fix for Caffe2DML
Repository: systemml Updated Branches: refs/heads/master 355373990 -> 8ea38a1b1 [HOTFIX] fix for Caffe2DML - switching back to using protoc v2.5.0 to compile the proto file Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/8ea38a1b Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/8ea38a1b Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/8ea38a1b Branch: refs/heads/master Commit: 8ea38a1b14a17d81299063cd281631369574a067 Parents: 3553739 Author: Nakul Jindal Authored: Fri Oct 6 11:59:59 2017 -0700 Committer: Nakul Jindal Committed: Fri Oct 6 11:59:59 2017 -0700 -- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/8ea38a1b/pom.xml -- diff --git a/pom.xml b/pom.xml index 28b166f..ff90986 100644 --- a/pom.xml +++ b/pom.xml @@ -354,7 +354,7 @@ run - 2.6.1 + 2.5.0 src/main/proto/caffe
systemml git commit: [SYSTEMML-1945] added --deploy-mode param to python scripts
Repository: systemml Updated Branches: refs/heads/master 13a017583 -> 8f786aa22 [SYSTEMML-1945] added --deploy-mode param to python scripts Closes #681 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/8f786aa2 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/8f786aa2 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/8f786aa2 Branch: refs/heads/master Commit: 8f786aa227d536558ed684060cef0e628bf3247f Parents: 13a0175 Author: Krishna Kalyan Authored: Wed Oct 11 09:20:40 2017 -0700 Committer: Nakul Jindal Committed: Wed Oct 11 09:20:40 2017 -0700 -- bin/systemml-spark-submit.py| 9 + scripts/perftest/python/run_perftest.py | 3 ++- scripts/perftest/python/utils_misc.py | 3 +++ 3 files changed, 10 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/systemml/blob/8f786aa2/bin/systemml-spark-submit.py -- diff --git a/bin/systemml-spark-submit.py b/bin/systemml-spark-submit.py index b4da801..2546236 100755 --- a/bin/systemml-spark-submit.py +++ b/bin/systemml-spark-submit.py @@ -41,8 +41,8 @@ def default_jars(systemml_home): return target_jars, systemml_jar -def spark_submit_entry(master, driver_memory, num_executors, executor_memory, - executor_cores, conf, +def spark_submit_entry(master, deploy_mode, driver_memory, num_executors, + executor_memory, executor_cores, conf, nvargs, args, config, explain, debug, stats, gpu, f): """ This function is responsible for the execution of arguments via @@ -100,7 +100,7 @@ def spark_submit_entry(master, driver_memory, num_executors, executor_memory, # stats, explain, target_jars cmd_spark = [spark_path, '--class', 'org.apache.sysml.api.DMLScript', - '--master', master, + '--master', master, '--deploy-mode', deploy_mode, '--driver-memory', driver_memory, '--conf', default_conf, '--jars', cuda_jars, systemml_jars] @@ -129,7 +129,8 @@ if __name__ == '__main__': cparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='System-ML Spark Submit Script') # SPARK-SUBMIT Options -cparser.add_argument('--master', default='local[*]', help='local, yarn-client, yarn-cluster', metavar='') +cparser.add_argument('--master', default='local[*]', help='local, yarn', metavar='') +cparser.add_argument('--deploy-mode', help='client, cluster', default='client', metavar='') cparser.add_argument('--driver-memory', default='8G', help='Memory for driver (e.g. 512M, 1G)', metavar='') cparser.add_argument('--num-executors', nargs=1, help='Number of executors to launch', metavar='') cparser.add_argument('--executor-memory', nargs=1, help='Memory per executor', metavar='') http://git-wip-us.apache.org/repos/asf/systemml/blob/8f786aa2/scripts/perftest/python/run_perftest.py -- diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py index 6c016a8..6e87261 100755 --- a/scripts/perftest/python/run_perftest.py +++ b/scripts/perftest/python/run_perftest.py @@ -355,7 +355,8 @@ if __name__ == '__main__': 'set option to skip conservative memory estimates ' 'and use GPU wherever possible', nargs='?', const='no_option') # Spark Configuration Option -cparser.add_argument('--master', help='local, yarn-client, yarn-cluster', metavar='') +cparser.add_argument('--master', help='local, yarn', metavar='') +cparser.add_argument('--deploy-mode', help='client, cluster', metavar='') cparser.add_argument('--driver-memory', help='Memory for driver (e.g. 512M)', metavar='') cparser.add_argument('--num-executors', help='Number of executors to launch', metavar='') cparser.add_argument('--executor-memory', help='Memory per executor', metavar='') http://git-wip-us.apache.org/repos/asf/systemml/blob/8f786aa2/scripts/perftest/python/utils_misc.py -- diff --git a/scripts/perftest/python/utils_misc.py b/scripts/perftest/python/utils_misc.py index ebc4376..63d8c1b 100755 --- a/scripts/perftest/python/utils_misc.py +++ b/scripts/perftest/python/utils_misc.py @@ -84,6 +84,9 @@ def split_config_args(args): if args['master'] is not None: backend_args_dict['--master'] = args['master'] +if args['deploy_mode'] is not None: +backend_args_dict['--deploy-mode
incubator-systemml git commit: Updated document to correspond to the currently released artifacts
Repository: incubator-systemml Updated Branches: refs/heads/master 8eed1ec94 -> e1f713aae Updated document to correspond to the currently released artifacts Closes #403 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e1f713aa Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e1f713aa Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e1f713aa Branch: refs/heads/master Commit: e1f713aaedb472842f448dabd7063978373836c8 Parents: 8eed1ec Author: Nakul Jindal Authored: Tue Feb 21 14:56:58 2017 -0800 Committer: Nakul Jindal Committed: Thu Feb 23 13:20:27 2017 -0800 -- docs/release-process.md | 146 ++- 1 file changed, 62 insertions(+), 84 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1f713aa/docs/release-process.md -- diff --git a/docs/release-process.md b/docs/release-process.md index 1cc5c9f..a75a281 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -102,86 +102,64 @@ The build artifacts should be downloaded from [https://dist.apache.org/repos/dis this OS X example. # download artifacts - wget -r -nH -nd -np -R index.html* https://dist.apache.org/repos/dist/dev/incubator/systemml/0.11.0-incubating-rc1/ + wget -r -nH -nd -np -R 'index.html*' https://dist.apache.org/repos/dist/dev/incubator/systemml/0.13.0-incubating-rc1/ # verify standalone tgz works - tar -xvzf systemml-0.11.0-incubating-standalone.tgz - cd systemml-0.11.0-incubating-standalone + tar -xvzf systemml-0.13.0-incubating-bin.tgz + cd systemml-0.13.0-incubating-bin echo "print('hello world');" > hello.dml ./runStandaloneSystemML.sh hello.dml cd .. - # verify main jar works - mkdir lib - cp -R systemml-0.11.0-incubating-standalone/lib/* lib/ - rm lib/systemml-0.11.0-incubating.jar - java -cp ./lib/*:systemml-0.11.0-incubating.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - - # verify src works - tar -xvzf systemml-0.11.0-incubating-src.tgz - cd systemml-0.11.0-incubating-src - mvn clean package -P distribution - cd target/ - java -cp ./lib/*:systemml-0.11.0-incubating.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - cd .. + # verify standalon zip works + rm -rf systemml-0.13.0-incubating-bin + unzip systemml-0.13.0-incubating-bin.zip + cd systemml-0.13.0-incubating-bin + echo "print('hello world');" > hello.dml + ./runStandaloneSystemML.sh hello.dml cd .. - # verify distrib tgz works - tar -xvzf systemml-0.11.0-incubating.tgz - cd systemml-0.11.0-incubating - java -cp ../lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - - # verify spark batch mode - export SPARK_HOME=/Users/deroneriksson/spark-1.5.1-bin-hadoop2.6 - $SPARK_HOME/bin/spark-submit SystemML.jar -s "print('hello world');" -exec hybrid_spark - - # verify hadoop batch mode - hadoop jar SystemML.jar -s "print('hello world');" - - -Here is an example of doing a basic -sanity check on OS X after building the artifacts manually. - - # build distribution artifacts - mvn clean package -P distribution - - cd target - - # verify main jar works - java -cp ./lib/*:systemml-0.11.0-incubating.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - - # verify SystemML.jar works - java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - # verify src works - tar -xvzf systemml-0.11.0-incubating-src.tgz - cd systemml-0.11.0-incubating-src + tar -xvzf systemml-0.13.0-incubating-src.tgz + cd systemml-0.13.0-incubating-src mvn clean package -P distribution cd target/ - java -cp ./lib/*:systemml-0.11.0-incubating.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - cd .. - cd .. - - # verify standalone tgz works - tar -xvzf systemml-0.11.0-incubating-standalone.tgz - cd systemml-0.11.0-incubating-standalone - echo "print('hello world');" > hello.dml - ./runStandaloneSystemML.sh hello.dml - cd .. - - # verify distrib tgz works - tar -xvzf systemml-0.11.0-incubating.tgz - cd systemml-0.11.0-incubating - java -cp ../lib/*:SystemML.jar org.apache.sysml.
[2/2] incubator-systemml git commit: Updated document to correspond to the currently released artifacts
Updated document to correspond to the currently released artifacts Closes #403 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5c4e27c7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5c4e27c7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5c4e27c7 Branch: refs/heads/gh-pages Commit: 5c4e27c701da1084d1e47d7ad049f9570033e7ae Parents: 0fb74b9 Author: Nakul Jindal Authored: Tue Feb 21 14:56:58 2017 -0800 Committer: Nakul Jindal Committed: Thu Feb 23 13:20:27 2017 -0800 -- release-process.md | 146 1 file changed, 62 insertions(+), 84 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5c4e27c7/release-process.md -- diff --git a/release-process.md b/release-process.md index 1cc5c9f..a75a281 100644 --- a/release-process.md +++ b/release-process.md @@ -102,86 +102,64 @@ The build artifacts should be downloaded from [https://dist.apache.org/repos/dis this OS X example. # download artifacts - wget -r -nH -nd -np -R index.html* https://dist.apache.org/repos/dist/dev/incubator/systemml/0.11.0-incubating-rc1/ + wget -r -nH -nd -np -R 'index.html*' https://dist.apache.org/repos/dist/dev/incubator/systemml/0.13.0-incubating-rc1/ # verify standalone tgz works - tar -xvzf systemml-0.11.0-incubating-standalone.tgz - cd systemml-0.11.0-incubating-standalone + tar -xvzf systemml-0.13.0-incubating-bin.tgz + cd systemml-0.13.0-incubating-bin echo "print('hello world');" > hello.dml ./runStandaloneSystemML.sh hello.dml cd .. - # verify main jar works - mkdir lib - cp -R systemml-0.11.0-incubating-standalone/lib/* lib/ - rm lib/systemml-0.11.0-incubating.jar - java -cp ./lib/*:systemml-0.11.0-incubating.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - - # verify src works - tar -xvzf systemml-0.11.0-incubating-src.tgz - cd systemml-0.11.0-incubating-src - mvn clean package -P distribution - cd target/ - java -cp ./lib/*:systemml-0.11.0-incubating.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - cd .. + # verify standalon zip works + rm -rf systemml-0.13.0-incubating-bin + unzip systemml-0.13.0-incubating-bin.zip + cd systemml-0.13.0-incubating-bin + echo "print('hello world');" > hello.dml + ./runStandaloneSystemML.sh hello.dml cd .. - # verify distrib tgz works - tar -xvzf systemml-0.11.0-incubating.tgz - cd systemml-0.11.0-incubating - java -cp ../lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - - # verify spark batch mode - export SPARK_HOME=/Users/deroneriksson/spark-1.5.1-bin-hadoop2.6 - $SPARK_HOME/bin/spark-submit SystemML.jar -s "print('hello world');" -exec hybrid_spark - - # verify hadoop batch mode - hadoop jar SystemML.jar -s "print('hello world');" - - -Here is an example of doing a basic -sanity check on OS X after building the artifacts manually. - - # build distribution artifacts - mvn clean package -P distribution - - cd target - - # verify main jar works - java -cp ./lib/*:systemml-0.11.0-incubating.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - - # verify SystemML.jar works - java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - # verify src works - tar -xvzf systemml-0.11.0-incubating-src.tgz - cd systemml-0.11.0-incubating-src + tar -xvzf systemml-0.13.0-incubating-src.tgz + cd systemml-0.13.0-incubating-src mvn clean package -P distribution cd target/ - java -cp ./lib/*:systemml-0.11.0-incubating.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s "print('hello world');" - cd .. - cd .. - - # verify standalone tgz works - tar -xvzf systemml-0.11.0-incubating-standalone.tgz - cd systemml-0.11.0-incubating-standalone - echo "print('hello world');" > hello.dml - ./runStandaloneSystemML.sh hello.dml - cd .. - - # verify distrib tgz works - tar -xvzf systemml-0.11.0-incubating.tgz - cd systemml-0.11.0-incubating - java -cp ../lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s "print('hello world');" + java -cp "./lib/*:systemml-0.13.0-incubating.jar" org.apache.sysml
[1/2] incubator-systemml git commit: [SYSTEMML-1238] Updated the default parameters of mllearn to match that of scikit learn.
Repository: incubator-systemml Updated Branches: refs/heads/gh-pages bb97a4bc6 -> 5c4e27c70 [SYSTEMML-1238] Updated the default parameters of mllearn to match that of scikit learn. - Also updated the test to compare our algorithm to scikit-learn. Closes #398. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0fb74b94 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0fb74b94 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0fb74b94 Branch: refs/heads/gh-pages Commit: 0fb74b94af9e244b5695745ac7b3651b485b812f Parents: bb97a4b Author: Niketan Pansare Authored: Fri Feb 17 14:54:23 2017 -0800 Committer: Niketan Pansare Committed: Fri Feb 17 14:59:49 2017 -0800 -- algorithms-regression.md | 8 beginners-guide-python.md | 2 +- python-reference.md | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0fb74b94/algorithms-regression.md -- diff --git a/algorithms-regression.md b/algorithms-regression.md index 992862e..80b38a3 100644 --- a/algorithms-regression.md +++ b/algorithms-regression.md @@ -83,8 +83,8 @@ efficient when the number of features $m$ is relatively small {% highlight python %} from systemml.mllearn import LinearRegression -# C = 1/reg -lr = LinearRegression(sqlCtx, fit_intercept=True, C=1.0, solver='direct-solve') +# C = 1/reg (to disable regularization, use float("inf")) +lr = LinearRegression(sqlCtx, fit_intercept=True, normalize=False, C=float("inf"), solver='direct-solve') # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix y_test = lr.fit(X_train, y_train) # df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features" @@ -125,8 +125,8 @@ y_test = lr.fit(df_train) {% highlight python %} from systemml.mllearn import LinearRegression -# C = 1/reg -lr = LinearRegression(sqlCtx, fit_intercept=True, max_iter=100, tol=0.01, C=1.0, solver='newton-cg') +# C = 1/reg (to disable regularization, use float("inf")) +lr = LinearRegression(sqlCtx, fit_intercept=True, normalize=False, max_iter=100, tol=0.01, C=float("inf"), solver='newton-cg') # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrames or SciPy Sparse matrices y_test = lr.fit(X_train, y_train) # df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features" http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0fb74b94/beginners-guide-python.md -- diff --git a/beginners-guide-python.md b/beginners-guide-python.md index 4d1b098..ffab09e 100644 --- a/beginners-guide-python.md +++ b/beginners-guide-python.md @@ -228,7 +228,7 @@ X_test = diabetes_X[-20:] y_train = diabetes.target[:-20] y_test = diabetes.target[-20:] # Create linear regression object -regr = LinearRegression(sqlCtx, fit_intercept=True, C=1, solver='direct-solve') +regr = LinearRegression(sqlCtx, fit_intercept=True, C=float("inf"), solver='direct-solve') # Train the model using the training sets regr.fit(X_train, y_train) y_predicted = regr.predict(X_test) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0fb74b94/python-reference.md -- diff --git a/python-reference.md b/python-reference.md index 65dcb5c..8d38598 100644 --- a/python-reference.md +++ b/python-reference.md @@ -731,7 +731,7 @@ LogisticRegression score: 0.92 ### Reference documentation - *class*`systemml.mllearn.estimators.LinearRegression`(*sqlCtx*, *fit\_intercept=True*, *max\_iter=100*, *tol=1e-06*, *C=1.0*, *solver='newton-cg'*, *transferUsingDF=False*)(#systemml.mllearn.estimators.LinearRegression "Permalink to this definition") + *class*`systemml.mllearn.estimators.LinearRegression`(*sqlCtx*, *fit\_intercept=True*, *normalize=False*, *max\_iter=100*, *tol=1e-06*, *C=float("inf")*, *solver='newton-cg'*, *transferUsingDF=False*)(#systemml.mllearn.estimators.LinearRegression "Permalink to this definition") : Bases: `systemml.mllearn.estimators.BaseSystemMLRegressor`{.xref .py .py-class .docutils .literal} @@ -760,7 +760,7 @@ LogisticRegression score: 0.92 >>> # The mean square error >>> print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2)) - *class*`systemml.mllearn.estimators.LogisticRegression`(*sqlCtx*, *penalty='l2'*, *fit\_intercept=True*, *max\_iter=100*, *
incubator-systemml git commit: Bug fixes, instruction added, async cudaFree
Repository: incubator-systemml Updated Branches: refs/heads/master 0daae6cf0 -> 4316efeba Bug fixes, instruction added, async cudaFree - Fixes for GPU mem mgmt and related integration tests - Added "exp" function for GPU - Do cudaFree asynchronously Closes #404 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4316efeb Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4316efeb Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4316efeb Branch: refs/heads/master Commit: 4316efebaf065d7a3de067354275d1b991e38bb4 Parents: 0daae6c Author: Nakul Jindal Authored: Fri Feb 24 11:27:44 2017 -0800 Committer: Nakul Jindal Committed: Fri Feb 24 11:27:44 2017 -0800 -- src/main/cpp/kernels/SystemML.cu| 15 ++ src/main/cpp/kernels/SystemML.ptx | 136 -- .../java/org/apache/sysml/hops/UnaryOp.java | 2 +- .../context/ExecutionContext.java | 8 +- .../instructions/GPUInstructionParser.java | 3 +- .../gpu/BuiltinUnaryGPUInstruction.java | 2 +- .../gpu/ConvolutionGPUInstruction.java | 16 +- .../instructions/gpu/GPUInstruction.java| 10 +- .../gpu/MatrixBuiltinGPUInstruction.java| 14 +- .../instructions/gpu/context/GPUContext.java| 17 +- .../instructions/gpu/context/GPUObject.java | 177 +++ .../instructions/gpu/context/JCudaContext.java | 1 + .../instructions/gpu/context/JCudaObject.java | 161 ++--- .../runtime/matrix/data/LibMatrixCUDA.java | 103 +++ .../java/org/apache/sysml/utils/Statistics.java | 23 ++- .../functions/misc/RewritePushdownUaggTest.java | 15 +- .../RewriteSimplifyRowColSumMVMultTest.java | 5 +- 17 files changed, 483 insertions(+), 225 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4316efeb/src/main/cpp/kernels/SystemML.cu -- diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu index cda28ba..40a1046 100644 --- a/src/main/cpp/kernels/SystemML.cu +++ b/src/main/cpp/kernels/SystemML.cu @@ -628,3 +628,18 @@ __global__ void reduce_col_mean(double *g_idata, double *g_odata, unsigned int r MeanOp aop(rows); reduce_col(g_idata, g_odata, rows, cols, op, aop, 0.0); } + + +/** + * Do an exp over all the elements of a matrix + * @param A the input matrix (of length = size) + * @param C the pre-allocated output matrix (of length = size) + * @param siz the length of the input and output matrices + */ +extern "C" +__global__ void matrix_exp(double *A, double *C, unsigned int size) { +int index = blockIdx.x * blockDim.x + threadIdx.x; +if (index < size){ +C[index] = exp(A[index]); +} +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4316efeb/src/main/cpp/kernels/SystemML.ptx -- diff --git a/src/main/cpp/kernels/SystemML.ptx b/src/main/cpp/kernels/SystemML.ptx index 93f3879..b9efd9b 100644 --- a/src/main/cpp/kernels/SystemML.ptx +++ b/src/main/cpp/kernels/SystemML.ptx @@ -4810,6 +4810,120 @@ BB33_5: ret; } + // .globl matrix_exp +.visible .entry matrix_exp( + .param .u64 matrix_exp_param_0, + .param .u64 matrix_exp_param_1, + .param .u32 matrix_exp_param_2 +) +{ + .reg .pred %p<5>; + .reg .f32 %f<3>; + .reg .b32 %r<21>; + .reg .f64 %fd<42>; + .reg .b64 %rd<10>; + + + ld.param.u64%rd2, [matrix_exp_param_0]; + ld.param.u64%rd3, [matrix_exp_param_1]; + ld.param.u32%r5, [matrix_exp_param_2]; + mov.u32 %r6, %ctaid.x; + mov.u32 %r7, %ntid.x; + mov.u32 %r8, %tid.x; + mad.lo.s32 %r1, %r7, %r6, %r8; + setp.ge.u32 %p1, %r1, %r5; + @%p1 braBB34_5; + + cvta.to.global.u64 %rd4, %rd2; + cvt.s64.s32 %rd1, %r1; + mul.wide.s32%rd5, %r1, 8; + add.s64 %rd6, %rd4, %rd5; + ld.global.f64 %fd1, [%rd6]; + mov.f64 %fd6, 0d3FF71547652B82FE; + mul.rn.f64 %fd7, %fd1, %fd6; + mov.f64 %fd8, 0d4338; + add.rn.f64 %fd9, %fd7, %fd8; + { + .reg .b32 %temp; + mov.b64 {%r2, %temp}, %fd9; + } + mov.f64 %fd10, 0dC338; + add.rn.f64 %fd11, %fd9, %fd10; + mov.f64 %fd12, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd13, %fd11, %fd12, %fd1; + mov.f64 %fd14, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd15, %fd11, %fd14, %fd13; + mov.f64 %fd
incubator-systemml git commit: Upgraded to use jcuda8 (from the maven repo)
Repository: incubator-systemml Updated Branches: refs/heads/master ed3a15882 -> 3757995b5 Upgraded to use jcuda8 (from the maven repo) Closes #291 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3757995b Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3757995b Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3757995b Branch: refs/heads/master Commit: 3757995b50aef019b0ce22d9ae93eae42aed02b4 Parents: ed3a158 Author: Nakul Jindal Authored: Fri Mar 3 18:11:45 2017 -0800 Committer: Nakul Jindal Committed: Fri Mar 3 18:11:46 2017 -0800 -- docs/devdocs/gpu-backend.md | 61 +++--- pom.xml | 195 +++ .../runtime/matrix/data/LibMatrixCUDA.java | 19 +- 3 files changed, 195 insertions(+), 80 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3757995b/docs/devdocs/gpu-backend.md -- diff --git a/docs/devdocs/gpu-backend.md b/docs/devdocs/gpu-backend.md index c6f66d6..40311c7 100644 --- a/docs/devdocs/gpu-backend.md +++ b/docs/devdocs/gpu-backend.md @@ -19,52 +19,43 @@ limitations under the License. # Initial prototype for GPU backend -A GPU backend implements two important abstract classes: +The GPU backend implements two important abstract classes: 1. `org.apache.sysml.runtime.controlprogram.context.GPUContext` 2. `org.apache.sysml.runtime.controlprogram.context.GPUObject` -The GPUContext is responsible for GPU memory management and initialization/destruction of Cuda handles. +The `GPUContext` is responsible for GPU memory management and initialization/destruction of Cuda handles. +Currently, an active instance of the `GPUContext` class is made available globally and is used to store handles +of the allocated blocks on the GPU. A count is kept per block for the number of instructions that need it. +When the count is 0, the block may be evicted on a call to `GPUObject.evict()`. -A GPUObject (like RDDObject and BroadcastObject) is stored in CacheableData object. It gets call-backs from SystemML's bufferpool on following methods +A `GPUObject` (like RDDObject and BroadcastObject) is stored in CacheableData object. It gets call-backs from SystemML's bufferpool on following methods 1. void acquireDeviceRead() -2. void acquireDenseDeviceModify(int numElemsToAllocate) -3. void acquireHostRead() -4. void acquireHostModify() -5. void release(boolean isGPUCopyModified) +2. void acquireDeviceModifyDense() +3. void acquireDeviceModifySparse +4. void acquireHostRead() +5. void acquireHostModify() +6. void releaseInput() +7. void releaseOutput() -## JCudaContext: -The current prototype supports Nvidia's CUDA libraries using JCuda wrapper. The implementation for the above classes can be found in: -1. `org.apache.sysml.runtime.controlprogram.context.JCudaContext` -2. `org.apache.sysml.runtime.controlprogram.context.JCudaObject` +Sparse matrices on GPU are represented in `CSR` format. In the SystemML runtime, they are represented in `MCSR` or modified `CSR` format. +A conversion cost is incurred when sparse matrices are sent back and forth between host and device memory. -### Setup instructions for JCudaContext: +Concrete classes `JCudaContext` and `JCudaObject` (which extend `GPUContext` & `GPUObject` respectively) contain references to `org.jcuda.*`. -1. Follow the instructions from `https://developer.nvidia.com/cuda-downloads` and install CUDA 7.5. -2. Follow the instructions from `https://developer.nvidia.com/cudnn` and install CuDNN v4. -3. Download install JCuda binaries version 0.7.5b and JCudnn version 0.7.5. Easiest option would be to use mavenized jcuda: -```python -git clone https://github.com/MysterionRise/mavenized-jcuda.git -mvn -Djcuda.version=0.7.5b -Djcudnn.version=0.7.5 clean package -CURR_DIR=`pwd` -JCUDA_PATH=$CURR_DIR"/target/lib/" -JAR_PATH="." -for j in `ls $JCUDA_PATH/*.jar` -do -JAR_PATH=$JAR_PATH":"$j -done -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JCUDA_PATH -``` +The `LibMatrixCUDA` class contains methods to invoke CUDA libraries (where available) and invoke custom kernels. +Runtime classes (that extend `GPUInstruction`) redirect calls to functions in this class. +Some functions in `LibMatrixCUDA` need finer control over GPU memory management primitives. These are provided by `JCudaObject`. + +### Setup instructions: -Note for Windows users: -* CuDNN v4 is available to download: `http://developer.download.nvidia.com/compute/redist/cudnn/v4/cudnn-7.0-win-x64-v4.0-prod.zip` -* If above steps doesn't work for JCuda, copy the DLLs into C:\lib (or /lib) directory. +1. Follow the instructions from `https:/
incubator-systemml git commit: toString now prints NaN & Infinity like how as.scalar prints them
Repository: incubator-systemml Updated Branches: refs/heads/master 8936e4f8a -> c7eebddb1 toString now prints NaN & Infinity like how as.scalar prints them Closes #415 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c7eebddb Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c7eebddb Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c7eebddb Branch: refs/heads/master Commit: c7eebddb17820398ac5e8ee740c6944d893ec95a Parents: 8936e4f Author: Nakul Jindal Authored: Mon Mar 6 14:22:22 2017 -0800 Committer: Nakul Jindal Committed: Mon Mar 6 14:22:22 2017 -0800 -- .../sysml/runtime/util/DataConverter.java | 26 1 file changed, 21 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c7eebddb/src/main/java/org/apache/sysml/runtime/util/DataConverter.java -- diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java index d4348b7..699a602 100644 --- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java +++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java @@ -779,6 +779,22 @@ public class DataConverter System.arraycopy(mb.getDenseBlock(), 0, dest, destPos, rows*cols); } } + + /** +* Convenience method to print NaN & Infinity compliant with how as.scalar prints them. +* {@link DecimalFormat} prints NaN as \uFFFD and Infinity as \u221E +* http://docs.oracle.com/javase/6/docs/api/java/text/DecimalFormat.html +* @param dfThe {@link DecimalFormat} instance, constructed with the appropriate options +* @param value The double value to print +* @return a string formatted with the {@link DecimalFormat} instance or "NaN" or "Infinity" or "-Infinity" +*/ + private static String dfFormat(DecimalFormat df, double value) { + if (Double.isNaN(value) || Double.isInfinite(value)){ + return Double.toString(value); + } else { + return df.format(value); + } + } public static String toString(MatrixBlock mb) { return toString(mb, false, " ", "\n", mb.getNumRows(), mb.getNumColumns(), 3); @@ -826,7 +842,7 @@ public class DataConverter if (row < rowLength && col < colLength) { // Print (row+1) and (col+1) since for a DML user, everything is 1-indexed sb.append(row+1).append(separator).append(col+1).append(separator); - sb.append(df.format(value)).append(lineseparator); + sb.append(dfFormat(df, value)).append(lineseparator); } } } else {// Block is in dense format @@ -835,7 +851,7 @@ public class DataConverter double value = mb.getValue(i, j); if (value != 0.0){ sb.append(i+1).append(separator).append(j+1).append(separator); - sb.append(df.format(value)).append(lineseparator); + sb.append(dfFormat(df, value)).append(lineseparator); } } } @@ -845,11 +861,11 @@ public class DataConverter for (int i=0; i
[5/5] incubator-systemml git commit: [SYSTEMML-1359] Added extra instrumentation for CUDA lib calls
[SYSTEMML-1359] Added extra instrumentation for CUDA lib calls - Added instrumentation around input copies & output allocations - A config property is available to enable/disable advanced stats for DNN and GPU - Minor refactoring and change of SystemML.cu function names Closes #412 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4f9dcf9a Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4f9dcf9a Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4f9dcf9a Branch: refs/heads/master Commit: 4f9dcf9add6b9bdbc190d97efef9781e32772dd9 Parents: ee33ec6 Author: Nakul Jindal Authored: Tue Mar 7 10:50:47 2017 -0800 Committer: Nakul Jindal Committed: Tue Mar 7 10:50:47 2017 -0800 -- conf/SystemML-config.xml.template |6 + src/main/cpp/kernels/SystemML.cu| 37 +- src/main/cpp/kernels/SystemML.ptx | 3923 +- .../java/org/apache/sysml/api/DMLScript.java|9 +- .../java/org/apache/sysml/conf/DMLConfig.java |8 +- .../context/ExecutionContext.java | 32 +- .../instructions/GPUInstructionParser.java |2 +- .../gpu/AggregateBinaryGPUInstruction.java | 13 +- .../gpu/AggregateUnaryGPUInstruction.java | 109 + .../gpu/ConvolutionGPUInstruction.java | 66 +- .../instructions/gpu/GPUInstruction.java| 91 +- .../instructions/gpu/MMTSJGPUInstruction.java |8 +- .../gpu/MatrixBuiltinGPUInstruction.java| 12 +- .../MatrixMatrixArithmeticGPUInstruction.java | 10 +- .../gpu/MatrixMatrixAxpyGPUInstruction.java | 10 +- .../instructions/gpu/ReorgGPUInstruction.java |8 +- .../ScalarMatrixArithmeticGPUInstruction.java |8 +- .../context/AggregateUnaryGPUInstruction.java | 110 - .../instructions/gpu/context/GPUObject.java | 29 +- .../instructions/gpu/context/JCudaContext.java |5 +- .../instructions/gpu/context/JCudaKernels.java |3 +- .../instructions/gpu/context/JCudaObject.java | 164 +- .../runtime/matrix/data/LibMatrixCUDA.java | 1681 +--- .../sysml/runtime/matrix/data/LibMatrixDNN.java |4 +- .../org/apache/sysml/utils/GPUStatistics.java | 209 + .../java/org/apache/sysml/utils/Statistics.java | 116 +- 26 files changed, 2782 insertions(+), 3891 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/conf/SystemML-config.xml.template -- diff --git a/conf/SystemML-config.xml.template b/conf/SystemML-config.xml.template index da80039..a4c7b2f 100644 --- a/conf/SystemML-config.xml.template +++ b/conf/SystemML-config.xml.template @@ -65,4 +65,10 @@ 1 + + + false + + + false http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/cpp/kernels/SystemML.cu -- diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu index 40a1046..7bb2c34 100644 --- a/src/main/cpp/kernels/SystemML.cu +++ b/src/main/cpp/kernels/SystemML.cu @@ -25,10 +25,15 @@ nvcc -ptx -arch=sm_30 SystemML.cu #include -// dim => rlen (Assumption: rlen == clen) -// N = length of dense array + +/** + * Does a copy of upper to lower triangle of the given matrix + * @param ret the input and output array allocated on the GPU + * @param dim the number of rows of the square matrix ret + * @param N total number of elements of the matrix + */ extern "C" -__global__ void copyUpperToLowerTriangleDense(double* ret, int dim, int N) { +__global__ void copy_u2l_dense(double* ret, int dim, int N) { int ix = blockIdx.x * blockDim.x + threadIdx.x; int iy = blockIdx.y * blockDim.y + threadIdx.y; int id_dest = iy * dim + ix; @@ -71,26 +76,6 @@ __forceinline__ __device__ double binaryOp(double x, double y, int op) { } extern "C" -__global__ void dense_matrix_set(double* A, double scalar, int rlen, int clen) { - int ix = blockIdx.x * blockDim.x + threadIdx.x; - int iy = blockIdx.y * blockDim.y + threadIdx.y; - int index = ix * clen + iy; - if(index < rlen*clen) { - A[index] = scalar; - } -} - -extern "C" -__global__ void dense_matrix_copy(double* A, double* ret, int rlen, int clen) { - int ix = blockIdx.x * blockDim.x + threadIdx.x; - int iy = blockIdx.y * blockDim.y + threadIdx.y; - int index = ix * clen + iy; - if(ix < rlen && iy < clen) { - ret[index] = A[index]; - } -} - -extern "C" __global__ void relu(double* A, double* ret, int rlen, int clen) { int ix = blockIdx.x * blockDim.x + threadIdx.x; int iy = blockIdx.y
[4/5] incubator-systemml git commit: [SYSTEMML-1359] Added extra instrumentation for CUDA lib calls
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/cpp/kernels/SystemML.ptx -- diff --git a/src/main/cpp/kernels/SystemML.ptx b/src/main/cpp/kernels/SystemML.ptx index b9efd9b..efaf29b 100644 --- a/src/main/cpp/kernels/SystemML.ptx +++ b/src/main/cpp/kernels/SystemML.ptx @@ -1,16 +1,16 @@ // // Generated by NVIDIA NVVM Compiler // -// Compiler Build ID: CL-19856038 -// Cuda compilation tools, release 7.5, V7.5.17 +// Compiler Build ID: CL-21124049 +// Cuda compilation tools, release 8.0, V8.0.44 // Based on LLVM 3.4svn // -.version 4.3 +.version 5.0 .target sm_30 .address_size 64 - // .globl _Z6reduceI5SumOpEvPdS1_jT_d + // .globl copy_u2l_dense .func (.param .b64 func_retval0) __internal_accurate_pow ( .param .b64 __internal_accurate_pow_param_0, @@ -19,1692 +19,10 @@ ; .extern .shared .align 8 .b8 sdata[]; -.visible .func _Z6reduceI5SumOpEvPdS1_jT_d( - .param .b64 _Z6reduceI5SumOpEvPdS1_jT_d_param_0, - .param .b64 _Z6reduceI5SumOpEvPdS1_jT_d_param_1, - .param .b32 _Z6reduceI5SumOpEvPdS1_jT_d_param_2, - .param .align 1 .b8 _Z6reduceI5SumOpEvPdS1_jT_d_param_3[1], - .param .b64 _Z6reduceI5SumOpEvPdS1_jT_d_param_4 -) -{ - .reg .pred %p<20>; - .reg .b32 %r<33>; - .reg .f64 %fd<79>; - .reg .b64 %rd<12>; - - - ld.param.u64%rd2, [_Z6reduceI5SumOpEvPdS1_jT_d_param_0]; - ld.param.u64%rd3, [_Z6reduceI5SumOpEvPdS1_jT_d_param_1]; - ld.param.u32%r5, [_Z6reduceI5SumOpEvPdS1_jT_d_param_2]; - ld.param.f64%fd76, [_Z6reduceI5SumOpEvPdS1_jT_d_param_4]; - mov.u32 %r6, %tid.x; - mov.u32 %r7, %ctaid.x; - shl.b32 %r8, %r7, 1; - mov.u32 %r9, %ntid.x; - mad.lo.s32 %r32, %r8, %r9, %r6; - setp.ge.u32 %p1, %r32, %r5; - @%p1 braBB0_5; - - mov.f64 %fd77, %fd76; - -BB0_2: - mov.f64 %fd1, %fd77; - mul.wide.u32%rd4, %r32, 8; - add.s64 %rd5, %rd2, %rd4; - ld.f64 %fd29, [%rd5]; - add.f64 %fd78, %fd1, %fd29; - add.s32 %r3, %r32, %r9; - setp.ge.u32 %p2, %r3, %r5; - @%p2 braBB0_4; - - mul.wide.u32%rd6, %r3, 8; - add.s64 %rd7, %rd2, %rd6; - ld.f64 %fd30, [%rd7]; - add.f64 %fd78, %fd78, %fd30; - -BB0_4: - mov.f64 %fd77, %fd78; - shl.b32 %r12, %r9, 1; - mov.u32 %r13, %nctaid.x; - mad.lo.s32 %r32, %r12, %r13, %r32; - setp.lt.u32 %p3, %r32, %r5; - mov.f64 %fd76, %fd77; - @%p3 braBB0_2; - -BB0_5: - mov.f64 %fd74, %fd76; - mul.wide.u32%rd8, %r6, 8; - mov.u64 %rd9, sdata; - add.s64 %rd1, %rd9, %rd8; - st.shared.f64 [%rd1], %fd74; - bar.sync0; - setp.lt.u32 %p4, %r9, 1024; - @%p4 braBB0_9; - - setp.gt.u32 %p5, %r6, 511; - mov.f64 %fd75, %fd74; - @%p5 braBB0_8; - - ld.shared.f64 %fd31, [%rd1+4096]; - add.f64 %fd75, %fd74, %fd31; - st.shared.f64 [%rd1], %fd75; - -BB0_8: - mov.f64 %fd74, %fd75; - bar.sync0; - -BB0_9: - mov.f64 %fd72, %fd74; - setp.lt.u32 %p6, %r9, 512; - @%p6 braBB0_13; - - setp.gt.u32 %p7, %r6, 255; - mov.f64 %fd73, %fd72; - @%p7 braBB0_12; - - ld.shared.f64 %fd32, [%rd1+2048]; - add.f64 %fd73, %fd72, %fd32; - st.shared.f64 [%rd1], %fd73; - -BB0_12: - mov.f64 %fd72, %fd73; - bar.sync0; - -BB0_13: - mov.f64 %fd70, %fd72; - setp.lt.u32 %p8, %r9, 256; - @%p8 braBB0_17; - - setp.gt.u32 %p9, %r6, 127; - mov.f64 %fd71, %fd70; - @%p9 braBB0_16; - - ld.shared.f64 %fd33, [%rd1+1024]; - add.f64 %fd71, %fd70, %fd33; - st.shared.f64 [%rd1], %fd71; - -BB0_16: - mov.f64 %fd70, %fd71; - bar.sync0; - -BB0_17: - mov.f64 %fd68, %fd70; - setp.lt.u32 %p10, %r9, 128; - @%p10 bra BB0_21; - - setp.gt.u32 %p11, %r6, 63; - mov.f64 %fd69, %fd68; - @%p11 bra BB0_20; - - ld.shared.f64 %fd34, [%rd1+512]; - add.f64 %fd69, %fd68, %fd34; - st.shared.f64 [%rd1], %fd69; - -BB0_20: - mov.f64 %fd68, %fd69; - bar.sync0; - -BB0_21: - mov.f64 %fd67, %fd68; - setp.gt.u32 %p12, %r6, 31; - @%p12 bra BB0_34; - - setp.lt.u32 %p13, %r9, 64; - @%p13 bra BB0_24; - - ld.volatile.shared.f64 %fd35, [%rd1+256]; - add.f64 %fd67, %fd67, %
[2/5] incubator-systemml git commit: [SYSTEMML-1359] Added extra instrumentation for CUDA lib calls
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java -- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index 51a0f6b..1511afc 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -44,6 +44,7 @@ import static jcuda.jcudnn.JCudnn.cudnnSetConvolution2dDescriptor; import static jcuda.jcudnn.JCudnn.cudnnSetFilter4dDescriptor; import static jcuda.jcudnn.JCudnn.cudnnSetPooling2dDescriptor; import static jcuda.jcudnn.JCudnn.cudnnSetTensor4dDescriptor; +import static jcuda.jcudnn.cudnnActivationMode.CUDNN_ACTIVATION_RELU; import static jcuda.jcudnn.cudnnConvolutionMode.CUDNN_CROSS_CORRELATION; import static jcuda.jcudnn.cudnnDataType.CUDNN_DATA_DOUBLE; import static jcuda.jcudnn.cudnnNanPropagation.CUDNN_PROPAGATE_NAN; @@ -55,23 +56,61 @@ import static jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_NON_TRANSPOSE import static jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_TRANSPOSE; import static jcuda.runtime.JCuda.cudaDeviceSynchronize; import static jcuda.runtime.JCuda.cudaMemcpy; +import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice; import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost; import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice; -import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice; -import static jcuda.jcudnn.cudnnActivationMode.CUDNN_ACTIVATION_RELU; import static org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.allocate; import static org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.cudaFreeHelper; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; import org.apache.sysml.runtime.controlprogram.context.ExecutionContext; -import org.apache.sysml.runtime.functionobjects.*; +import org.apache.sysml.runtime.functionobjects.And; +import org.apache.sysml.runtime.functionobjects.Builtin; +import org.apache.sysml.runtime.functionobjects.CM; +import org.apache.sysml.runtime.functionobjects.Divide; +import org.apache.sysml.runtime.functionobjects.Equals; +import org.apache.sysml.runtime.functionobjects.GreaterThan; +import org.apache.sysml.runtime.functionobjects.GreaterThanEquals; +import org.apache.sysml.runtime.functionobjects.IndexFunction; +import org.apache.sysml.runtime.functionobjects.KahanPlus; +import org.apache.sysml.runtime.functionobjects.KahanPlusSq; +import org.apache.sysml.runtime.functionobjects.LessThan; +import org.apache.sysml.runtime.functionobjects.LessThanEquals; +import org.apache.sysml.runtime.functionobjects.Mean; +import org.apache.sysml.runtime.functionobjects.Minus; +import org.apache.sysml.runtime.functionobjects.Multiply; +import org.apache.sysml.runtime.functionobjects.Multiply2; +import org.apache.sysml.runtime.functionobjects.NotEquals; +import org.apache.sysml.runtime.functionobjects.Or; +import org.apache.sysml.runtime.functionobjects.Plus; +import org.apache.sysml.runtime.functionobjects.Power; +import org.apache.sysml.runtime.functionobjects.Power2; +import org.apache.sysml.runtime.functionobjects.ReduceAll; +import org.apache.sysml.runtime.functionobjects.ReduceCol; +import org.apache.sysml.runtime.functionobjects.ReduceDiag; +import org.apache.sysml.runtime.functionobjects.ReduceRow; +import org.apache.sysml.runtime.functionobjects.ValueFunction; import org.apache.sysml.runtime.instructions.cp.DoubleObject; -import org.apache.sysml.runtime.instructions.gpu.context.*; +import org.apache.sysml.runtime.instructions.gpu.GPUInstruction; +import org.apache.sysml.runtime.instructions.gpu.context.ExecutionConfig; +import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; +import org.apache.sysml.runtime.instructions.gpu.context.JCudaContext; +import org.apache.sysml.runtime.instructions.gpu.context.JCudaKernels; +import org.apache.sysml.runtime.instructions.gpu.context.JCudaObject; import org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.CSRPointer; -import org.apache.sysml.runtime.matrix.operators.*; +import org.apache.sysml.runtime.matrix.operators.AggregateOperator; +import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator; +import org.apache.sysml.runtime.matrix.operators.BinaryOperator; +import org.apache.sysml.runtime.matrix.operators.CMOperator; +import org.apache.sysml.runtime.matrix.operators.LeftScalarOperator; +import org.apache.sysml.runtime.matrix.operators.RightScalarOperator; +import org.apache.sysml.runtime.matrix.operators.ScalarOperator; +import org.apache.sysml.utils.GPUStatistic
[3/5] incubator-systemml git commit: [SYSTEMML-1359] Added extra instrumentation for CUDA lib calls
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/api/DMLScript.java -- diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java index 97597e0..798e74e 100644 --- a/src/main/java/org/apache/sysml/api/DMLScript.java +++ b/src/main/java/org/apache/sysml/api/DMLScript.java @@ -78,6 +78,8 @@ import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter; import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler; import org.apache.sysml.runtime.matrix.CleanupMR; +import org.apache.sysml.runtime.matrix.data.LibMatrixCUDA; +import org.apache.sysml.runtime.matrix.data.LibMatrixDNN; import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames; import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration; import org.apache.sysml.runtime.util.LocalFileUtils; @@ -85,6 +87,7 @@ import org.apache.sysml.runtime.util.MapReduceTool; import org.apache.sysml.utils.Explain; import org.apache.sysml.utils.Explain.ExplainCounts; import org.apache.sysml.utils.Explain.ExplainType; +import org.apache.sysml.utils.GPUStatistics; import org.apache.sysml.utils.Statistics; import org.apache.sysml.yarn.DMLAppMasterUtils; import org.apache.sysml.yarn.DMLYarnClientProxy; @@ -646,7 +649,11 @@ public class DMLScript //double costs = CostEstimationWrapper.getTimeEstimate(rtprog, ExecutionContextFactory.createContext()); //System.out.println("Estimated costs: "+costs); - + + // Whether extra statistics useful for developers and others interested in digging + // into performance problems are recorded and displayed + GPUStatistics.DISPLAY_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_GPU_STATS); + LibMatrixDNN.DISPLAY_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_DNN_STATS); //Step 10: execute runtime program Statistics.startRunTimer(); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/conf/DMLConfig.java -- diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java b/src/main/java/org/apache/sysml/conf/DMLConfig.java index 3d0fb28..a42b1ca 100644 --- a/src/main/java/org/apache/sysml/conf/DMLConfig.java +++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java @@ -74,6 +74,8 @@ public class DMLConfig public static final String CODEGEN = "codegen.enabled"; //boolean public static final String CODEGEN_PLANCACHE= "codegen.plancache"; //boolean public static final String CODEGEN_LITERALS = "codegen.literals"; //1..heuristic, 2..always + public static final String EXTRA_GPU_STATS = "systemml.stats.extraGPU"; //boolean + public static final String EXTRA_DNN_STATS = "systemml.stats.extraDNN"; //boolean // Fraction of available memory to use. The available memory is computer when the JCudaContext is created // to handle the tradeoff on calling cudaMemGetInfo too often. @@ -114,7 +116,10 @@ public class DMLConfig _defaultVals.put(CODEGEN,"false" ); _defaultVals.put(CODEGEN_PLANCACHE, "true" ); _defaultVals.put(CODEGEN_LITERALS, "1" ); - + + _defaultVals.put(EXTRA_GPU_STATS, "false" ); + _defaultVals.put(EXTRA_DNN_STATS, "false" ); + _defaultVals.put(GPU_MEMORY_UTILIZATION_FACTOR, "0.9" ); _defaultVals.put(REFRESH_AVAILABLE_MEMORY_EVERY_TIME, "true" ); } @@ -402,6 +407,7 @@ public class DMLConfig YARN_APPMASTER, YARN_APPMASTERMEM, YARN_MAPREDUCEMEM, CP_PARALLEL_MATRIXMULT, CP_PARALLEL_TEXTIO, COMPRESSED_LINALG, CODEGEN, CODEGEN_LITERALS, CODEGEN_PLANCACHE, + EXTRA_GPU_STATS, EXTRA_DNN_STATS }; StringBuilder sb = new StringBuilder(); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java -- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java index f14123e..6455add 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java +++ b/src/main/
[1/5] incubator-systemml git commit: [SYSTEMML-1359] Added extra instrumentation for CUDA lib calls
Repository: incubator-systemml Updated Branches: refs/heads/master ee33ec62d -> 4f9dcf9ad http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java -- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java index 0c0410c..b46985f 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java @@ -50,10 +50,10 @@ public class LibMatrixDNN { //library configurations and external contracts public static final boolean SUPPORTS_SPARSE_OUTPUTS = false; //operations able to handle sparse outputs - private static final boolean DISPLAY_STATISTICS = false; //conv2d summaries in stats output private static final boolean ALLOW_MULTI_THREADED_OPS = true; //enable multi-threading in cp private static final int NUM_TASK_FACTOR = 2; //number of tasks is vcores scaled by this factor - + public static boolean DISPLAY_STATISTICS = false; //conv2d summaries in stats output + private enum TaskType { MaxPooling_Forward, MaxPooling_Backward, // Alternate approaches that we tried but the performance was unsatisfactory be included: direct, non-looped im2col http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/utils/GPUStatistics.java -- diff --git a/src/main/java/org/apache/sysml/utils/GPUStatistics.java b/src/main/java/org/apache/sysml/utils/GPUStatistics.java new file mode 100644 index 000..044901b --- /dev/null +++ b/src/main/java/org/apache/sysml/utils/GPUStatistics.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.utils; + +import org.apache.sysml.api.DMLScript; + +import java.util.*; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Measures performance numbers when GPU mode is enabled + * Printed as part of {@link Statistics}. + */ +public class GPUStatistics { + + // Whether or not extra per-instruction statistics will be recorded and shown for the GPU + public static boolean DISPLAY_STATISTICS = false; + + private static int iNoOfExecutedGPUInst = 0; + + public static long cudaInitTime = 0; + public static long cudaLibrariesInitTime = 0; + public static AtomicLong cudaSparseToDenseTime = new AtomicLong(0); // time spent in converting sparse matrix block to dense + public static AtomicLong cudaDenseToSparseTime = new AtomicLong(0); // time spent in converting dense matrix block to sparse + public static AtomicLong cudaSparseConversionTime = new AtomicLong(0); // time spent in converting between sparse block types + public static AtomicLong cudaSparseToDenseCount = new AtomicLong(0); + public static AtomicLong cudaDenseToSparseCount = new AtomicLong(0); + public static AtomicLong cudaSparseConversionCount = new AtomicLong(0); + + public static AtomicLong cudaAllocTime = new AtomicLong(0); // time spent in allocating memory on the GPU + public static AtomicLong cudaDeAllocTime = new AtomicLong(0); // time spent in deallocating memory on the GPU + public static AtomicLong cudaToDevTime = new AtomicLong(0); // time spent in copying data from host (CPU) to device (GPU) memory + public static AtomicLong cudaFromDevTime = new AtomicLong(0); // time spent in copying data from device to host + public static AtomicLong cudaAllocCount = new AtomicLong(0); + public static AtomicLong cudaDeAllocCount = new AtomicLong(0); + public static AtomicLong cudaToDevCount = new AtomicLong(0); + public static AtomicLong cudaFromDevCount = new AtomicLong(0); + public static AtomicLong cudaEvictionCount = new AtomicLong(0); + + // Per instruction miscellaneous timers. + // Used to record events in a CP Heavy Hitter instruction and + // provide a breakdown of how t
incubator-systemml git commit: [SYSTEMML-942] added gpu option to MLContext API
Repository: incubator-systemml Updated Branches: refs/heads/master 6f4d8762d -> 6b1572e4b [SYSTEMML-942] added gpu option to MLContext API Additionally, - Changed initialization of CUDA libraries from static to per instance - Added documentation to mlcontext programming guide Closes #420 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/6b1572e4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/6b1572e4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/6b1572e4 Branch: refs/heads/master Commit: 6b1572e4bba31619c5bed19fd0c106d2e759f159 Parents: 6f4d876 Author: Nakul Jindal Authored: Tue Mar 7 13:41:03 2017 -0800 Committer: Nakul Jindal Committed: Tue Mar 7 13:41:03 2017 -0800 -- docs/spark-mlcontext-programming-guide.md | 90 .../apache/sysml/api/mlcontext/MLContext.java | 25 ++ .../sysml/api/mlcontext/ScriptExecutor.java | 26 +- .../instructions/gpu/context/GPUContext.java| 2 +- .../instructions/gpu/context/JCudaContext.java | 42 - 5 files changed, 163 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6b1572e4/docs/spark-mlcontext-programming-guide.md -- diff --git a/docs/spark-mlcontext-programming-guide.md b/docs/spark-mlcontext-programming-guide.md index c15c27f..c28eaf5 100644 --- a/docs/spark-mlcontext-programming-guide.md +++ b/docs/spark-mlcontext-programming-guide.md @@ -1086,6 +1086,96 @@ mean: Double = 0.5002109404821844 +## GPU + +If the driver node has a GPU, SystemML may be able to utilize it, subject to memory constraints and what instructions are used in the dml script + + + + +{% highlight scala %} +ml.setGPU(true) +ml.setStatistics(true) +val matMultScript = dml(""" +A = rand(rows=10, cols=1000) +B = rand(rows=1000, cols=10) +C = A %*% B +print(toString(C)) +""") +ml.execute(matMultScript) +{% endhighlight %} + + + +{% highlight scala %} +scala> ml.setGPU(true) + +scala> ml.setStatistics(true) + +scala> val matMultScript = dml(""" + | A = rand(rows=10, cols=1000) + | B = rand(rows=1000, cols=10) + | C = A %*% B + | print(toString(C)) + | """) +matMultScript: org.apache.sysml.api.mlcontext.Script = +Inputs: +None + +Outputs: +None + +scala> ml.execute(matMultScript) +249.977 238.545 233.700 234.489 248.556 244.423 249.051 255.043 249.117 251.605 +249.226 248.680 245.532 238.258 254.451 249.827 260.957 251.273 250.577 257.571 +258.703 246.969 243.463 246.547 250.784 251.758 251.654 258.318 251.817 254.097 +248.788 242.960 230.920 244.026 249.159 247.998 251.330 254.718 248.013 255.706 +253.251 248.788 235.785 242.941 252.096 248.675 256.865 251.677 252.872 250.490 +256.087 245.035 234.124 238.307 248.630 252.522 251.122 251.577 249.171 247.974 +245.419 243.114 232.262 239.776 249.583 242.351 250.972 249.244 246.729 251.807 +250.081 242.367 230.334 240.955 248.332 240.730 246.940 250.396 244.107 249.729 +247.368 239.882 234.353 237.087 252.337 248.801 246.627 249.077 244.305 245.621 +252.827 257.352 239.546 246.529 258.916 255.612 260.480 254.805 252.695 257.531 + +SystemML Statistics: +Total elapsed time:0.000 sec. +Total compilation time:0.000 sec. +Total execution time: 0.000 sec. +Number of compiled Spark inst: 0. +Number of executed Spark inst: 0. +CUDA/CuLibraries init time:0.000/0.003 sec. +Number of executed GPU inst: 8. +GPU mem tx time (alloc/dealloc/toDev/fromDev):0.003/0.002/0.010/0.002 sec. +GPU mem tx count (alloc/dealloc/toDev/fromDev/evict): 24/24/0/16/8/0. +GPU conversion time (sparseConv/sp2dense/dense2sp): 0.000/0.000/0.000 sec. +GPU conversion count (sparseConv/sp2dense/dense2sp): 0/0/0. +Cache hits (Mem, WB, FS, HDFS):40/0/0/0. +Cache writes (WB, FS, HDFS): 21/0/0. +Cache times (ACQr/m, RLS, EXP):0.002/0.002/0.003/0.000 sec. +HOP DAGs recompiled (PRED, SB):0/0. +HOP DAGs recompile time: 0.000 sec. +Spark ctx create time (lazy): 0.000 sec. +Spark trans counts (par,bc,col):0/0/0. +Spark trans times (par,bc,col):0.000/0.000/0.000 secs. +Total JIT compile time:11.426 sec. +Total JVM GC count:20. +Total JVM GC time: 1.078 sec. +Heavy hitter instructions (name, time, count): +-- 1) toString0.085 sec 8 +-- 2) rand0.027 sec 16 +-- 3) gpu_ba+*0.018 sec 8 +-- 4) print 0.006 sec 8 +-- 5) createvar 0.003 sec 24 +-- 6) rmvar 0.003 sec 40 + +res20: org.apache.sysml.api.mlcontext.MLResults = +None +{% endhighlight %} + + + + +Note that GPU instructions show up prepended with a "gpu" in the statistics.
incubator-systemml git commit: [SYSTEMML-942] added gpu option to MLContext API
Repository: incubator-systemml Updated Branches: refs/heads/gh-pages 4ec1b9f40 -> 42e86e76c [SYSTEMML-942] added gpu option to MLContext API Additionally, - Changed initialization of CUDA libraries from static to per instance - Added documentation to mlcontext programming guide Closes #420 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/42e86e76 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/42e86e76 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/42e86e76 Branch: refs/heads/gh-pages Commit: 42e86e76c1e324f53351fe5866ce5675482df15a Parents: 4ec1b9f Author: Nakul Jindal Authored: Tue Mar 7 13:41:03 2017 -0800 Committer: Nakul Jindal Committed: Tue Mar 7 13:41:03 2017 -0800 -- spark-mlcontext-programming-guide.md | 90 +++ 1 file changed, 90 insertions(+) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/42e86e76/spark-mlcontext-programming-guide.md -- diff --git a/spark-mlcontext-programming-guide.md b/spark-mlcontext-programming-guide.md index c15c27f..c28eaf5 100644 --- a/spark-mlcontext-programming-guide.md +++ b/spark-mlcontext-programming-guide.md @@ -1086,6 +1086,96 @@ mean: Double = 0.5002109404821844 +## GPU + +If the driver node has a GPU, SystemML may be able to utilize it, subject to memory constraints and what instructions are used in the dml script + + + + +{% highlight scala %} +ml.setGPU(true) +ml.setStatistics(true) +val matMultScript = dml(""" +A = rand(rows=10, cols=1000) +B = rand(rows=1000, cols=10) +C = A %*% B +print(toString(C)) +""") +ml.execute(matMultScript) +{% endhighlight %} + + + +{% highlight scala %} +scala> ml.setGPU(true) + +scala> ml.setStatistics(true) + +scala> val matMultScript = dml(""" + | A = rand(rows=10, cols=1000) + | B = rand(rows=1000, cols=10) + | C = A %*% B + | print(toString(C)) + | """) +matMultScript: org.apache.sysml.api.mlcontext.Script = +Inputs: +None + +Outputs: +None + +scala> ml.execute(matMultScript) +249.977 238.545 233.700 234.489 248.556 244.423 249.051 255.043 249.117 251.605 +249.226 248.680 245.532 238.258 254.451 249.827 260.957 251.273 250.577 257.571 +258.703 246.969 243.463 246.547 250.784 251.758 251.654 258.318 251.817 254.097 +248.788 242.960 230.920 244.026 249.159 247.998 251.330 254.718 248.013 255.706 +253.251 248.788 235.785 242.941 252.096 248.675 256.865 251.677 252.872 250.490 +256.087 245.035 234.124 238.307 248.630 252.522 251.122 251.577 249.171 247.974 +245.419 243.114 232.262 239.776 249.583 242.351 250.972 249.244 246.729 251.807 +250.081 242.367 230.334 240.955 248.332 240.730 246.940 250.396 244.107 249.729 +247.368 239.882 234.353 237.087 252.337 248.801 246.627 249.077 244.305 245.621 +252.827 257.352 239.546 246.529 258.916 255.612 260.480 254.805 252.695 257.531 + +SystemML Statistics: +Total elapsed time:0.000 sec. +Total compilation time:0.000 sec. +Total execution time: 0.000 sec. +Number of compiled Spark inst: 0. +Number of executed Spark inst: 0. +CUDA/CuLibraries init time:0.000/0.003 sec. +Number of executed GPU inst: 8. +GPU mem tx time (alloc/dealloc/toDev/fromDev):0.003/0.002/0.010/0.002 sec. +GPU mem tx count (alloc/dealloc/toDev/fromDev/evict): 24/24/0/16/8/0. +GPU conversion time (sparseConv/sp2dense/dense2sp): 0.000/0.000/0.000 sec. +GPU conversion count (sparseConv/sp2dense/dense2sp): 0/0/0. +Cache hits (Mem, WB, FS, HDFS):40/0/0/0. +Cache writes (WB, FS, HDFS): 21/0/0. +Cache times (ACQr/m, RLS, EXP):0.002/0.002/0.003/0.000 sec. +HOP DAGs recompiled (PRED, SB):0/0. +HOP DAGs recompile time: 0.000 sec. +Spark ctx create time (lazy): 0.000 sec. +Spark trans counts (par,bc,col):0/0/0. +Spark trans times (par,bc,col):0.000/0.000/0.000 secs. +Total JIT compile time:11.426 sec. +Total JVM GC count:20. +Total JVM GC time: 1.078 sec. +Heavy hitter instructions (name, time, count): +-- 1) toString0.085 sec 8 +-- 2) rand0.027 sec 16 +-- 3) gpu_ba+*0.018 sec 8 +-- 4) print 0.006 sec 8 +-- 5) createvar 0.003 sec 24 +-- 6) rmvar 0.003 sec 40 + +res20: org.apache.sysml.api.mlcontext.MLResults = +None +{% endhighlight %} + + + + +Note that GPU instructions show up prepended with a "gpu" in the statistics. ## Explain
incubator-systemml git commit: [SYSTEMML-1396] cudaFree is called lazily instead of asynchronously
Repository: incubator-systemml Updated Branches: refs/heads/master 9137f7b02 -> 80225f014 [SYSTEMML-1396] cudaFree is called lazily instead of asynchronously - If a block is available in a free list of the exact matching size, it is memset to 0 and allocated, otherwise, the LRU blocks are cudaFree-ed until enough memory is available on the GPU - Fixed timers - bug fix for SYSTEMML-1340,1341 related to redundantly releasing an input block on the GPU Closes #426 Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/80225f01 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/80225f01 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/80225f01 Branch: refs/heads/master Commit: 80225f014338d9671aaf187186f117feb5d0c093 Parents: 9137f7b Author: Nakul Jindal Authored: Mon Mar 13 13:55:39 2017 -0700 Committer: Nakul Jindal Committed: Mon Mar 13 13:55:39 2017 -0700 -- .../instructions/gpu/GPUInstruction.java| 2 + .../instructions/gpu/context/GPUContext.java| 19 +- .../instructions/gpu/context/GPUObject.java | 76 ++-- .../instructions/gpu/context/JCudaContext.java | 52 ++- .../instructions/gpu/context/JCudaObject.java | 353 +++ .../runtime/matrix/data/LibMatrixCUDA.java | 186 +- .../org/apache/sysml/utils/GPUStatistics.java | 10 +- .../org/apache/sysml/utils/LRUCacheMap.java | 71 .../sysml/test/utils/LRUCacheMapTest.java | 102 ++ 9 files changed, 537 insertions(+), 334 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/80225f01/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java index dcb2edc..04a2f1a 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java @@ -44,6 +44,8 @@ public abstract class GPUInstruction extends Instruction public final static String MISC_TIMER_CUDA_FREE = "f";// time spent in calling cudaFree public final static String MISC_TIMER_ALLOCATE = "a";// time spent to allocate memory on gpu public final static String MISC_TIMER_ALLOCATE_DENSE_OUTPUT = "ao"; // time spent to allocate dense output (recorded differently than MISC_TIMER_ALLOCATE) + public final static String MISC_TIMER_SET_ZERO = "az"; // time spent to allocate + public final static String MISC_TIMER_REUSE = "r";// time spent in reusing already allocated memory on GPU (mainly for the count) // Matmult instructions public final static String MISC_TIMER_SPARSE_ALLOCATE_LIB = "Msao"; // time spend in allocating for sparse matrix output http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/80225f01/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java index b792882..f076350 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java @@ -18,11 +18,6 @@ */ package org.apache.sysml.runtime.instructions.gpu.context; -import java.util.ArrayList; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; - import org.apache.sysml.api.DMLScript; import org.apache.sysml.hops.OptimizerUtils; import org.apache.sysml.runtime.DMLRuntimeException; @@ -33,18 +28,6 @@ import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; @SuppressWarnings("rawtypes") public abstract class GPUContext { - public static ArrayList allocatedPointers = new ArrayList(); - - /** cudaFree calls are done asynchronously on a separate thread, -* this list preserve the list of currently happening cudaFree calls */ - public static ConcurrentLinkedQueue pendingDeallocates = new ConcurrentLinkedQueue(); - - /*
incubator-systemml git commit: [HOTFIX] added missing license, removed missing exception in javadoc
Repository: incubator-systemml Updated Branches: refs/heads/master 80225f014 -> 95f300d9d [HOTFIX] added missing license,removed missing exception in javadoc Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/95f300d9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/95f300d9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/95f300d9 Branch: refs/heads/master Commit: 95f300d9d18801f585e579227c4123c475eb5c9c Parents: 80225f0 Author: Nakul Jindal Authored: Mon Mar 13 14:21:49 2017 -0700 Committer: Nakul Jindal Committed: Mon Mar 13 14:21:49 2017 -0700 -- .../instructions/gpu/context/JCudaObject.java | 1 - .../java/org/apache/sysml/utils/LRUCacheMap.java | 18 ++ .../apache/sysml/test/utils/LRUCacheMapTest.java | 18 ++ 3 files changed, 36 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/95f300d9/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaObject.java -- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaObject.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaObject.java index 31664f6..92284d0 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaObject.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaObject.java @@ -1209,7 +1209,6 @@ public class JCudaObject extends GPUObject { * does lazy/eager cudaFree calls * @param toFree {@link Pointer} instance to be freed * @param eager true if to be done eagerly -* @throws DMLRuntimeException */ public static void cudaFreeHelper(final Pointer toFree, boolean eager) { cudaFreeHelper(null, toFree, eager); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/95f300d9/src/main/java/org/apache/sysml/utils/LRUCacheMap.java -- diff --git a/src/main/java/org/apache/sysml/utils/LRUCacheMap.java b/src/main/java/org/apache/sysml/utils/LRUCacheMap.java index 0c8449a..64560af 100644 --- a/src/main/java/org/apache/sysml/utils/LRUCacheMap.java +++ b/src/main/java/org/apache/sysml/utils/LRUCacheMap.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sysml.utils; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/95f300d9/src/test/java/org/apache/sysml/test/utils/LRUCacheMapTest.java -- diff --git a/src/test/java/org/apache/sysml/test/utils/LRUCacheMapTest.java b/src/test/java/org/apache/sysml/test/utils/LRUCacheMapTest.java index d076266..3cf7e76 100644 --- a/src/test/java/org/apache/sysml/test/utils/LRUCacheMapTest.java +++ b/src/test/java/org/apache/sysml/test/utils/LRUCacheMapTest.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sysml.test.utils; import org.apache.sysml.utils.LRUCacheMap;