from:"nakul02"

incubator-systemml git commit: [HOTFIX] Disabling GPU fused relu & maxpooling operator because of bug

2017-03-15 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 0490fec93 -> 5baac2d62


[HOTFIX] Disabling GPU fused relu & maxpooling operator because of bug

- Fixed the timer that counts the number of times memory chunks are
  zero-ed out
- Some minor code refactoring


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5baac2d6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5baac2d6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5baac2d6

Branch: refs/heads/master
Commit: 5baac2d62f64026ff82b9d674b909bc4b80800b0
Parents: 0490fec
Author: Nakul Jindal 
Authored: Mon Mar 13 15:40:08 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Mar 15 15:31:17 2017 -0700

--
 .../org/apache/sysml/hops/ConvolutionOp.java| 12 +++
 .../gpu/ConvolutionGPUInstruction.java  |  9 +++--
 .../instructions/gpu/context/JCudaObject.java   |  2 +-
 .../runtime/matrix/data/LibMatrixCUDA.java  | 21 ++--
 4 files changed, 26 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5baac2d6/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
--
diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java 
b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
index 943ff96..9483b2c 100644
--- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
+++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
@@ -19,15 +19,13 @@
 
 package org.apache.sysml.hops;
 
-import java.util.ArrayList;
-
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.Hop.MultiThreadedHop;
 import org.apache.sysml.lops.ConvolutionTransform;
 import org.apache.sysml.lops.ConvolutionTransform.OperationTypes;
 import org.apache.sysml.lops.Lop;
-import org.apache.sysml.lops.LopsException;
 import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.lops.LopsException;
 import org.apache.sysml.lops.ReBlock;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
@@ -35,6 +33,8 @@ import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.ConvolutionParameters;
 
+import java.util.ArrayList;
+
 public class ConvolutionOp extends Hop  implements MultiThreadedHop
 {  
private Hop.ConvOp op;
@@ -179,7 +179,11 @@ public class ConvolutionOp extends Hop  implements 
MultiThreadedHop
ArrayList inputs1 = inputs;
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
OperationTypes lopOp = HopsConv2Lops.get(op);
-   if(op == ConvOp.MAX_POOLING && isInputReLU(inputs.get(0))) {
+
+   // The fused relu_maxpooling is being disabled for now on the 
GPU
+   // There is a bug in LibMatrixCUDA#reluMaxpooling
+   // which we need to understand before enabling this by removing 
the "et != ExecType.GPU" guard.
+   if(op == ConvOp.MAX_POOLING && isInputReLU(inputs.get(0)) && et 
!= ExecType.GPU) {
in = inputs.get(0).getInput().get(0).constructLops();
lopOp = OperationTypes.RELU_MAX_POOLING;
}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5baac2d6/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java
index daf3c58..7460d6b 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java
@@ -18,8 +18,6 @@
  */
 package org.apache.sysml.runtime.instructions.gpu;
 
-import java.util.ArrayList;
-
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
@@ -31,6 +29,8 @@ import 
org.apache.sysml.runtime.matrix.operators.ReorgOperator;
 import org.apache.sysml.runtime.util.ConvolutionUtils;
 import org.apache.sysml.utils.GPUStatistics;
 
+import java.util.ArrayList;
+
 public class ConvolutionGPUInstruction extends GPUInstruction 
 {
private CPOperand _input1; 
@@ -337,8 +337,13 @@ public class ConvolutionGPUInstruction extends 
GPUInstruction

// rele

incubator-systemml git commit: [HOTFIX] Changed unit test LRUCacheMapTest to run only with mvn verify

2017-03-22 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 95be80c5b -> 97da0004f


[HOTFIX] Changed unit test LRUCacheMapTest to run only with mvn verify

Closes #436


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/97da0004
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/97da0004
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/97da0004

Branch: refs/heads/master
Commit: 97da0004f1423d40d63372e59c0424d28793ef92
Parents: 95be80c
Author: Nakul Jindal 
Authored: Wed Mar 22 12:08:21 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Mar 22 12:08:21 2017 -0700

--
 pom.xml |   1 +
 .../apache/sysml/test/unit/LRUCacheMapTest.java | 120 +++
 .../sysml/test/utils/LRUCacheMapTest.java   | 120 ---
 3 files changed, 121 insertions(+), 120 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/97da0004/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 86efe21..656d0a1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -351,6 +351,7 @@


**/slowtest/**

**/integration/**
+   
**/test/unit/**

 


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/97da0004/src/test/java/org/apache/sysml/test/unit/LRUCacheMapTest.java
--
diff --git a/src/test/java/org/apache/sysml/test/unit/LRUCacheMapTest.java 
b/src/test/java/org/apache/sysml/test/unit/LRUCacheMapTest.java
new file mode 100644
index 000..09df5a0
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/unit/LRUCacheMapTest.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sysml.test.unit;
+
+import org.apache.sysml.utils.LRUCacheMap;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Map;
+
+public class LRUCacheMapTest {
+
+  @Test
+  public void test1() throws Exception {
+LRUCacheMap m = new LRUCacheMap();
+m.put("k1", 10l);
+m.put("k2", 20l);
+m.put("k3", 30l);
+m.put("k4", 40l);
+
+Map.Entry e = m.removeAndGetLRUEntry();
+Assert.assertEquals("k1", e.getKey());
+  }
+
+  @Test
+  public void test2() throws Exception {
+LRUCacheMap m = new LRUCacheMap();
+m.put("k1", 10l);
+m.put("k2", 20l);
+m.put("k3", 30l);
+m.put("k4", 40l);
+m.get("k1");
+
+Map.Entry e = m.removeAndGetLRUEntry();
+Assert.assertEquals("k2", e.getKey());
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void test3() {
+LRUCacheMap m = new LRUCacheMap();
+m.put(null, 10l);
+  }
+
+  @Test
+  public void test4() throws Exception {
+LRUCacheMap m = new LRUCacheMap();
+m.put("k1", 10l);
+m.put("k2", 20l);
+m.put("k3", 30l);
+m.put("k4", 40l);
+m.remove("k1");
+m.remove("k2");
+
+Map.Entry e = m.removeAndGetLRUEntry();
+Assert.assertEquals("k3", e.getKey());
+  }
+
+  @Test
+  public void test5() throws Exception {
+LRUCacheMap m = new LRUCacheMap();
+m.put("k1", 10l);
+m.put("k2", 20l);
+m.put("k1", 30l);
+
+Map.Entry e = m.removeAndGetLRUEntry();
+Assert.assertEquals("k2", e.getKey());
+  }
+
+  @Test
+  public void test6() throws Exception {
+LRUCacheMap m = new LRUCacheMap();
+m.put("k1", 10l);
+m.put("k2", 20l);
+m.put("k3", 30l);
+m.put("k4", 40l);
+m.put("k5", 50l);
+m.put("k6", 60l);
+m.put("k7", 70l);
+m.put("k8", 80l);
+m.get("k4");
+
+
+Map.Entry e;
+e = m.removeAndGetLRUEntry();
+Assert.assertEquals("k1", e.getKey());
+e = m.removeAndGetLRUEntry();
+Assert.assertEquals("k2", e.getKey());
+e = m.remo

[1/3] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript

2017-03-27 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master ee6bc8ce2 -> 346d1c01a


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/src/test/java/org/apache/sysml/test/unit/CLIOptionsParserTest.java
--
diff --git a/src/test/java/org/apache/sysml/test/unit/CLIOptionsParserTest.java 
b/src/test/java/org/apache/sysml/test/unit/CLIOptionsParserTest.java
new file mode 100644
index 000..d780db4
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/unit/CLIOptionsParserTest.java
@@ -0,0 +1,419 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.unit;
+
+import org.apache.commons.cli.AlreadySelectedException;
+import org.apache.commons.cli.MissingOptionException;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.ScriptType;
+import org.apache.sysml.utils.Explain;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Map;
+
+
+public class CLIOptionsParserTest {
+
+  @Test(expected = MissingOptionException.class)
+  public void testNoOptions() throws Exception {
+String cl = "systemml";
+String[] args = cl.split(" ");
+Options options = DMLScript.createCLIOptions();
+DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options);
+  }
+
+  @Test
+  public void testFile() throws Exception {
+String cl = "systemml -f test.dml";
+String[] args = cl.split(" ");
+Options options = DMLScript.createCLIOptions();
+DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options);
+Assert.assertEquals("test.dml", o.filePath);
+Assert.assertEquals(ScriptType.DML, o.scriptType);
+
+  }
+
+  @Test
+  public void testScript() throws Exception {
+String cl = "systemml -s \"print('hello')\"";
+String[] args = cl.split(" ");
+Options options = DMLScript.createCLIOptions();
+DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options);
+Assert.assertEquals("print('hello')", o.script);
+  }
+
+  @Test
+  public void testConfig() throws Exception {
+String cl = "systemml -s \"print('hello')\" -config SystemML-config.xml";
+String[] args = cl.split(" ");
+Options options = DMLScript.createCLIOptions();
+DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options);
+Assert.assertEquals("print('hello')", o.script);
+Assert.assertEquals("SystemML-config.xml", o.configFile);
+  }
+
+  @Test
+  public void testDebug() throws Exception {
+String cl = "systemml -s \"print('hello')\" -debug";
+String[] args = cl.split(" ");
+Options options = DMLScript.createCLIOptions();
+DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options);
+Assert.assertEquals("print('hello')", o.script);
+Assert.assertEquals(true, o.debug);
+  }
+
+  @Test
+  public void testClean() throws Exception {
+String cl = "systemml -clean";
+String[] args = cl.split(" ");
+Options options = DMLScript.createCLIOptions();
+DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options);
+Assert.assertEquals(true, o.clean);
+  }
+
+  @Test(expected = AlreadySelectedException.class)
+  public void testBadClean() throws Exception {
+String cl = "systemml -clean -f test.dml";
+String[] args = cl.split(" ");
+Options options = DMLScript.createCLIOptions();
+DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options);
+  }
+
+  @Test(expected = AlreadySelectedException.class)
+  public void testBadScript() throws Exception {
+String cl = "systemml -f test.dml -s \"print('hello')\"";
+String[] args = cl.split(" ");
+Options options = DMLScript.createCLIOptions();
+DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options);
+  }
+
+  @Test
+  public void testStats() throws Exception {
+String cl = "systemml -f test.dml -stats";
+String[] args = cl.split(" ");
+Options options = DMLScript.createCLIOptions();
+DMLScript.DMLOptions o = DMLScript.parseCLArguments(args, options);
+Assert.assertEquals(true, o.stats);
+Assert.assertEquals(10, o.sta

[2/3] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript

2017-03-27 Thread nakul02

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/src/main/java/org/apache/sysml/api/DMLScript.java
--
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java 
b/src/main/java/org/apache/sysml/api/DMLScript.java
index c04c321..175688a 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -19,22 +19,15 @@
 
 package org.apache.sysml.api;
 
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.URI;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Scanner;
-
+import org.apache.commons.cli.AlreadySelectedException;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.OptionGroup;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.PosixParser;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -46,7 +39,6 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
-import org.apache.sysml.api.mlcontext.ScriptType;
 import org.apache.sysml.conf.CompilerConfig;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.conf.DMLConfig;
@@ -73,12 +65,12 @@ import 
org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
 import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
-import org.apache.sysml.runtime.io.IOUtilFunctions;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
 import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
 import 
org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
+import org.apache.sysml.runtime.io.IOUtilFunctions;
 import org.apache.sysml.runtime.matrix.CleanupMR;
 import org.apache.sysml.runtime.matrix.data.LibMatrixDNN;
 import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
@@ -93,6 +85,22 @@ import org.apache.sysml.utils.Statistics;
 import org.apache.sysml.yarn.DMLAppMasterUtils;
 import org.apache.sysml.yarn.DMLYarnClientProxy;
 
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Scanner;
+
 
 public class DMLScript 
 {  
@@ -103,66 +111,77 @@ public class DMLScript
HYBRID_SPARK,   // execute matrix operations in CP or Spark   
SPARK   // execute matrix operations in Spark
}
-   
-   public static RUNTIME_PLATFORM rtplatform = 
OptimizerUtils.getDefaultExecutionMode();
-   public static boolean STATISTICS = false; //default statistics
-   public static int STATISTICS_COUNT = 10;//default statistics 
maximum heavy hitter count
-   public static boolean ENABLE_DEBUG_MODE = false; //default debug mode
-   public static boolean USE_LOCAL_SPARK_CONFIG = false; //set default 
local spark configuration - used for local testing
-   public static String DML_FILE_PATH_ANTLR_PARSER = null;
-   public static ExplainType EXPLAIN = ExplainType.NONE; //default explain
+
+   /**
+* Set of DMLOptions that can be set through the command line
+* and {@link org.apache.sysml.api.mlcontext.MLContext}
+* The values have been initialized with the default values
+* Despite there being a DML and PyDML, this class is named DMLOptions
+* to keep it consistent with {@link DMLScript} and {@link DMLOptions}
+*/
+   public static class DMLOptions {
+   public Map  argVals   = new HashMap<>();  
// Arguments map containing either named arguments or arguments by position for 
a DML program
+

[3/3] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript

2017-03-27 Thread nakul02

Use Apache Commons CLI to parse command line arguments in DMLScript

- Uses Apache Commons CLI 1.2 to parse command line options
- Known limitation - strips arguments of leading and trailing double
  quotes
- Changed scripts to accept "-config " instead of "-config="
- Instead of "-gpu force=true", accepts "-gpu force"
- Concise description of usage options
- Updated bin/systemml script to print usage options when passed the
  "-help" option
- Removed DMLScriptTest{1,2}, lots of test cases added as unit tests as
  they were test the previous hand-rolled command line parsing
- Added unit tests

Closes #435


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/346d1c01
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/346d1c01
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/346d1c01

Branch: refs/heads/master
Commit: 346d1c01ad94c5b8178b8c9baf7d38e0867805da
Parents: ee6bc8c
Author: Nakul Jindal 
Authored: Mon Mar 27 13:35:12 2017 -0700
Committer: Nakul Jindal 
Committed: Mon Mar 27 13:35:12 2017 -0700

--
 bin/systemml|  84 ++-
 bin/systemml.bat|   4 +-
 docs/algorithms-classification.md   |  44 +-
 docs/algorithms-clustering.md   |  14 +-
 docs/algorithms-descriptive-statistics.md   |  14 +-
 docs/algorithms-matrix-factorization.md |  18 +-
 docs/algorithms-regression.md   |  36 +-
 docs/algorithms-survival-analysis.md|  16 +-
 docs/hadoop-batch-mode.md   |  16 +-
 docs/spark-batch-mode.md|   4 +-
 docs/standalone-guide.md|   2 +-
 docs/troubleshooting-guide.md   |   2 +-
 scripts/sparkDML.sh |   2 +-
 .../java/org/apache/sysml/api/DMLScript.java| 613 ---
 .../java/org/apache/sysml/api/MLContext.java|  66 +-
 .../java/org/apache/sysml/api/ScriptType.java   |  65 ++
 .../org/apache/sysml/api/jmlc/Connection.java   |  22 +-
 .../sysml/api/mlcontext/MLContextUtil.java  |   1 +
 .../org/apache/sysml/api/mlcontext/Script.java  |   1 +
 .../sysml/api/mlcontext/ScriptExecutor.java |  10 +-
 .../sysml/api/mlcontext/ScriptFactory.java  |   1 +
 .../apache/sysml/api/mlcontext/ScriptType.java  |  65 --
 .../org/apache/sysml/parser/AParserWrapper.java |  28 +-
 .../runtime/instructions/cp/BooleanObject.java  |   2 +-
 .../org/apache/sysml/yarn/DMLYarnClient.java|  19 +-
 src/main/resources/scripts/sparkDML.sh  |   2 +-
 src/main/standalone/runStandaloneSystemML.bat   |   4 +-
 src/main/standalone/runStandaloneSystemML.sh|   2 +-
 .../test/integration/AutomatedTestBase.java |   3 +-
 .../functions/dmlscript/DMLScriptTest1.java | 125 
 .../functions/dmlscript/DMLScriptTest2.java | 151 -
 .../functions/misc/DataTypeChangeTest.java  |  27 +-
 .../parfor/ParForDependencyAnalysisTest.java|  15 +-
 .../TransformFrameEncodeDecodeTest.java |  11 +-
 .../integration/mlcontext/MLContextTest.java|  12 +-
 .../sysml/test/unit/CLIOptionsParserTest.java   | 419 +
 .../functions/dmlscript/ZPackageSuite.java  |  37 --
 37 files changed, 1121 insertions(+), 836 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/bin/systemml
--
diff --git a/bin/systemml b/bin/systemml
index 0ccee2d..44ab45e 100755
--- a/bin/systemml
+++ b/bin/systemml
@@ -20,32 +20,22 @@
 #
 #-
 
+
 # error help print
-printUsageExit()
+printSimpleUsage()
 {
 cat << EOF
 Usage: $0  [arguments] [-help]
--help - Print this usage message and exit
+-help - Print detailed help message
 EOF
   exit 1
 }
-#Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m 
[Custom-Java-Options] -jar StandaloneSystemML.jar -f  -exec 
singlenode -config=SystemML-config.xml [Optional-Arguments]'
 
-while getopts "h:" options; do
-  case $options in
-h ) echo Warning: Help requested. Will exit after usage message
-printUsageExit
-;;
-\? ) echo Warning: Help requested. Will exit after usage message
-printUsageExit
-;;
-* ) echo Error: Unexpected error while processing options
-  esac
-done
+#Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m 
[Custom-Java-Options] -jar StandaloneSystemML.jar -f  -exec 
singlenode -config=SystemML-config.xml [Optional-Arguments]'
 
 if [ -z "$1" ] ; then
 echo "Wrong Usage.";
-printUsageExit;
+printSimpleUsage
 fi
 
 
@@ -98,24 +88,6 @@ then
 fi
 
 
-# Peel off first argument so that $@ contains argume

[1/2] incubator-systemml git commit: [SYSTEMML-1431] Throw controlled error when one-dimensional numpy array is passed to SystemML

2017-03-27 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/gh-pages 7407b7001 -> 8a125c75c


[SYSTEMML-1431] Throw controlled error when one-dimensional numpy array is 
passed to SystemML

Here is an example pyspark session demonstrating this PR:
>>> from mlxtend.data import mnist_data
>>> import numpy as np
>>> from sklearn.utils import shuffle
X, y = mnist_data()
from systemml import MLContext, dml
ml = MLContext(sc)
script = dml('print(sum(X))').input(X=X)
ml.execute(script)
script = dml('print(sum(X))').input(X=y)
ml.execute(script)
script = dml('print(sum(X))').input(X=y.reshape(-1, 1))
ml.execute(script)>>> X, y = mnist_data()
>>> from systemml import MLContext, dml
>>> ml = MLContext(sc)

Welcome to Apache SystemML!

>>> script = dml('print(sum(X))').input(X=X)
>>> ml.execute(script)
1.31267102E8
MLResults
>>> script = dml('print(sum(X))').input(X=y)
>>> ml.execute(script)
...
TypeError: Expected 2-dimensional ndarray, instead passed 1-dimensional
ndarray. Hint: If you intend to pass the 1-dimensional ndarray as a
column-vector, please reshape it: input_ndarray.reshape(-1, 1)
>>> script = dml('print(sum(X))').input(X=y.reshape(-1, 1))
>>> ml.execute(script)
22500.0

Closes #438.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a1d73f80
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a1d73f80
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a1d73f80

Branch: refs/heads/gh-pages
Commit: a1d73f805bc6a94e953c0b999269b79fcbb07a16
Parents: 7407b70
Author: Niketan Pansare 
Authored: Thu Mar 23 11:41:16 2017 -0700
Committer: Niketan Pansare 
Committed: Thu Mar 23 11:44:33 2017 -0700

--
 beginners-guide-python.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a1d73f80/beginners-guide-python.md
--
diff --git a/beginners-guide-python.md b/beginners-guide-python.md
index ffab09e..24f7151 100644
--- a/beginners-guide-python.md
+++ b/beginners-guide-python.md
@@ -183,7 +183,7 @@ y_train = diabetes.target[:-20]
 y_test = diabetes.target[-20:]
 # Train Linear Regression model
 X = sml.matrix(X_train)
-y = sml.matrix(y_train)
+y = sml.matrix(np.matrix(y_train).T)
 A = X.transpose().dot(X)
 b = X.transpose().dot(y)
 beta = sml.solve(A, b).toNumPy()

[2/2] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript

2017-03-27 Thread nakul02

Use Apache Commons CLI to parse command line arguments in DMLScript

- Uses Apache Commons CLI 1.2 to parse command line options
- Known limitation - strips arguments of leading and trailing double
  quotes
- Changed scripts to accept "-config " instead of "-config="
- Instead of "-gpu force=true", accepts "-gpu force"
- Concise description of usage options
- Updated bin/systemml script to print usage options when passed the
  "-help" option
- Removed DMLScriptTest{1,2}, lots of test cases added as unit tests as
  they were test the previous hand-rolled command line parsing
- Added unit tests

Closes #435


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/8a125c75
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/8a125c75
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/8a125c75

Branch: refs/heads/gh-pages
Commit: 8a125c75c5f96ca65288806f36498483c413b04d
Parents: a1d73f8
Author: Nakul Jindal 
Authored: Mon Mar 27 13:35:12 2017 -0700
Committer: Nakul Jindal 
Committed: Mon Mar 27 13:35:12 2017 -0700

--
 algorithms-classification.md | 44 +++
 algorithms-clustering.md | 14 +-
 algorithms-descriptive-statistics.md | 14 +-
 algorithms-matrix-factorization.md   | 18 ++---
 algorithms-regression.md | 36 -
 algorithms-survival-analysis.md  | 16 +--
 hadoop-batch-mode.md | 16 +--
 spark-batch-mode.md  |  4 +--
 standalone-guide.md  |  2 +-
 troubleshooting-guide.md |  2 +-
 10 files changed, 83 insertions(+), 83 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8a125c75/algorithms-classification.md
--
diff --git a/algorithms-classification.md b/algorithms-classification.md
index 0ee43bf..11bd1da 100644
--- a/algorithms-classification.md
+++ b/algorithms-classification.md
@@ -165,7 +165,7 @@ val prediction = model.transform(X_test_df)
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f MultiLogReg.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid_spark
  -nvargs X=
  Y=
@@ -336,7 +336,7 @@ prediction.show()
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f MultiLogReg.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid_spark
  -nvargs X=/user/ml/X.mtx
  Y=/user/ml/Y.mtx
@@ -532,7 +532,7 @@ val model = svm.fit(X_train_df)
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f l2-svm.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid_spark
  -nvargs X=
  Y=
@@ -579,7 +579,7 @@ val prediction = model.transform(X_test_df)
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f l2-svm-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid_spark
  -nvargs X=
  Y=[file]
@@ -661,7 +661,7 @@ using a held-out test set. Note that this is an optional 
argument.
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f l2-svm.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid_spark
  -nvargs X=/user/ml/X.mtx
  Y=/user/ml/y.mtx
@@ -695,7 +695,7 @@ using a held-out test set. Note that this is an optional 
argument.
  --conf spark.akka.frameSize=128
  SystemML.jar

[incubator-systemml] Git Push Summary [forced push!] [Forced Update!]

2017-03-27 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 346d1c01a -> ee6bc8ce2 (forced update)

incubator-systemml git commit: [MINOR] Cleanup of some comments

2017-03-27 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master ee6bc8ce2 -> af93ca8a4


[MINOR] Cleanup of some comments


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/af93ca8a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/af93ca8a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/af93ca8a

Branch: refs/heads/master
Commit: af93ca8a40befaaba79b0f96b4dbf8b8db85be13
Parents: ee6bc8c
Author: Nakul Jindal 
Authored: Mon Mar 27 14:39:31 2017 -0700
Committer: Nakul Jindal 
Committed: Mon Mar 27 14:39:31 2017 -0700

--
 .../runtime/matrix/data/LibMatrixCUDA.java  | 25 
 1 file changed, 5 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/af93ca8a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 8074e3a..23790c4 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -26,15 +26,16 @@ import jcuda.jcublas.cublasFillMode;
 import jcuda.jcublas.cublasHandle;
 import jcuda.jcublas.cublasOperation;
 import jcuda.jcudnn.cudnnActivationDescriptor;
+import jcuda.jcudnn.cudnnBatchNormMode;
 import jcuda.jcudnn.cudnnConvolutionDescriptor;
 import jcuda.jcudnn.cudnnConvolutionFwdPreference;
 import jcuda.jcudnn.cudnnFilterDescriptor;
 import jcuda.jcudnn.cudnnHandle;
 import jcuda.jcudnn.cudnnPoolingDescriptor;
+import jcuda.jcudnn.cudnnStatus;
 import jcuda.jcudnn.cudnnTensorDescriptor;
 import jcuda.jcusparse.JCusparse;
 import jcuda.jcusparse.cusparseHandle;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.api.DMLScript;
@@ -88,6 +89,9 @@ import org.apache.sysml.utils.Statistics;
 import static jcuda.jcublas.cublasOperation.CUBLAS_OP_N;
 import static jcuda.jcublas.cublasOperation.CUBLAS_OP_T;
 import static jcuda.jcudnn.JCudnn.cudnnActivationForward;
+import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationBackward;
+import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardInference;
+import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardTraining;
 import static jcuda.jcudnn.JCudnn.cudnnConvolutionBackwardData;
 import static jcuda.jcudnn.JCudnn.cudnnConvolutionBackwardFilter;
 import static jcuda.jcudnn.JCudnn.cudnnConvolutionForward;
@@ -126,11 +130,6 @@ import static 
jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost;
 import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice;
 import static 
org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.allocate;
 import static 
org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.cudaFreeHelper;
-import jcuda.jcudnn.cudnnBatchNormMode;
-import jcuda.jcudnn.cudnnStatus;
-import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardInference;
-import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardTraining;
-import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationBackward;
 
 //FIXME move could to respective instructions, this is not a block library
 public class LibMatrixCUDA {
@@ -2518,22 +2517,11 @@ public class LibMatrixCUDA {
Pointer A = getDensePointer(out, instName);
int rlen = (int) out.getNumRows();
int clen = (int) out.getNumColumns();
-// if(constant == 0) {
-// out.getMatrixCharacteristics().setNonZeros(0);
-// }
-// else {
-// out.getMatrixCharacteristics().setNonZeros(rlen*clen);
-// }
-   // dense_matrix_set(double* A,  double scalar, int rlen, int 
clen)
-
long t0=0;
if (GPUStatistics.DISPLAY_STATISTICS) t0 = System.nanoTime();
int size = rlen * clen;
kernels.launchKernel("fill", 
ExecutionConfig.getConfigForSimpleVectorOperations(size),
A, constant, size);
-   //  kernels.launchKernel("dense_matrix_set",
-   //  
ExecutionConfig.getConfigForSimpleMatrixOperations(rlen, clen),
-   //  A, constant, 
rlen, clen);
if (GPUStatistics.DISPLAY_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_FILL_KERNEL, System.nanoTime() - t0);
}
 
@@ -2549,9 +2537,6 @@ public class LibMatrixCUDA {
private static void deviceCopy(String

incubator-systemml git commit: Added python script to launch systemml in standalone mode

2017-04-21 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master ea6e2fe39 -> f73673d59


Added python script to launch systemml in standalone mode

Closes #461


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/f73673d5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/f73673d5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/f73673d5

Branch: refs/heads/master
Commit: f73673d59383ac947111cb84787cfa4df3ca7344
Parents: ea6e2fe
Author: Nakul Jindal 
Authored: Fri Apr 21 14:25:50 2017 -0700
Committer: Nakul Jindal 
Committed: Fri Apr 21 14:25:50 2017 -0700

--
 bin/systemml-standalone.py | 199 
 1 file changed, 199 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/f73673d5/bin/systemml-standalone.py
--
diff --git a/bin/systemml-standalone.py b/bin/systemml-standalone.py
new file mode 100755
index 000..367bcdf
--- /dev/null
+++ b/bin/systemml-standalone.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+#-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-
+
+import os
+import shutil
+import sys
+from os.path import join, exists
+
+
+# error help print
+def print_usage_and_exit():
+this_script = sys.argv[0]
+print('Usage: ' + this_script + '  [arguments]')
+sys.exit(1)
+
+
+# from http://stackoverflow.com/questions/1724693/find-a-file-in-python
+def find_file(name, path):
+for root, dirs, files in os.walk(path):
+if name in files:
+return join(root, name)
+return None
+
+
+if len(sys.argv) < 2:
+print('Wrong usage')
+print_usage_and_exit()
+
+
+# find the systemML root path which contains the bin folder, the script folder 
and the target folder
+# tolerate path with spaces
+script_dir = os.path.dirname(os.path.realpath(__file__))
+project_root_dir = os.path.dirname(script_dir)
+user_dir = os.getcwd()
+
+scripts_dir = join(project_root_dir, 'scripts')
+build_dir = join(project_root_dir, 'target')
+lib_dir = join(build_dir, 'lib')
+dml_script_class = join(build_dir, 'classes', 'org', 'apache', 'sysml', 'api', 
'DMLScript.class')
+hadoop_home = join(lib_dir, 'hadoop')
+
+
+build_err_msg = 'You must build the project before running this script.'
+build_dir_err_msg = 'Could not find target directory ' + build_dir + '. ' + 
build_err_msg
+
+lib_dir_err_msg = 'Could not find required libraries.' + build_err_msg
+dml_script_err_msg = 'Could not find ' + dml_script_class + '. ' + 
build_err_msg
+
+# check if the project had been built and the jar files exist
+if not(exists(build_dir)):
+print(build_dir_err_msg)
+sys.exit(1)
+if not(exists(lib_dir)):
+print(lib_dir_err_msg)
+sys.exit(1)
+if not(exists(dml_script_class)):
+print(dml_script_err_msg)
+sys.exit(1)
+
+print('')
+
+
+# if the present working directory is the project root or bin folder, then use 
the temp folder as user.dir
+if user_dir == project_root_dir or user_dir == join(project_root_dir, 'bin'):
+user_dir = join(project_root_dir, 'temp')
+print('Output dir: ' + user_dir)
+
+# if the SystemML-config.xml does not exist, create it from the template
+systemml_config_path = join(project_root_dir, 'conf', 'SystemML-config.xml')
+systemml_template_config_path = join(project_root_dir, 'conf', 
'SystemML-config.xml.template')
+if not(exists(systemml_config_path)):
+shutil.copyfile(systemml_template_config_path, systemml_config_path)
+print('... created ' + systemml_config_path)
+
+# if the log4j.properties do not exist, create them from the template
+log4j_properties_path = join(project_root_dir, 'conf', 'log4j.properties')
+log4j_template_properties_path = join(project_root_dir, 'conf', 
'log4j.properties.template')
+if not(exi

[1/2] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript

2017-04-21 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master f73673d59 -> 32924dc60


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/32924dc6/src/main/java/org/apache/sysml/api/MLContext.java
--
diff --git a/src/main/java/org/apache/sysml/api/MLContext.java 
b/src/main/java/org/apache/sysml/api/MLContext.java
index 809776a..b3102e9 100644
--- a/src/main/java/org/apache/sysml/api/MLContext.java
+++ b/src/main/java/org/apache/sysml/api/MLContext.java
@@ -771,7 +771,7 @@ public class MLContext {
args[i] = entry.getKey() + "=" + 
entry.getValue();
i++;
}
-   return compileAndExecuteScript(dmlScriptFilePath, args, true, 
parsePyDML, configFilePath);
+   return compileAndExecuteScript(dmlScriptFilePath, args, true, 
parsePyDML ? ScriptType.PYDML : ScriptType.DML, configFilePath);
}

/**
@@ -785,17 +785,7 @@ public class MLContext {
 * @throws ParseException if ParseException occurs
 */
public MLOutput execute(String dmlScriptFilePath, Map 
namedArgs, String configFilePath) throws IOException, DMLException, 
ParseException {
-   String [] args = new String[namedArgs.size()];
-   int i = 0;
-   for(Entry entry : namedArgs.entrySet()) {
-   if(entry.getValue().trim().isEmpty())
-   args[i] = entry.getKey() + "=\"" + 
entry.getValue() + "\"";
-   else
-   args[i] = entry.getKey() + "=" + 
entry.getValue();
-   i++;
-   }
-   
-   return compileAndExecuteScript(dmlScriptFilePath, args, true, 
false, configFilePath);
+   return execute(dmlScriptFilePath, namedArgs, false, 
configFilePath);
}

/**
@@ -1014,7 +1004,7 @@ public class MLContext {
 * @throws ParseException if ParseException occurs
 */
public MLOutput execute(String dmlScriptFilePath, String [] args, 
boolean parsePyDML, String configFilePath) throws IOException, DMLException, 
ParseException {
-   return compileAndExecuteScript(dmlScriptFilePath, args, false, 
parsePyDML, configFilePath);
+   return compileAndExecuteScript(dmlScriptFilePath, args, false, 
parsePyDML ? ScriptType.PYDML : ScriptType.DML, configFilePath);
}

/**
@@ -1067,7 +1057,7 @@ public class MLContext {
 * @throws ParseException if ParseException occurs
 */
public MLOutput execute(String dmlScriptFilePath, boolean parsePyDML, 
String configFilePath) throws IOException, DMLException, ParseException {
-   return compileAndExecuteScript(dmlScriptFilePath, null, false, 
parsePyDML, configFilePath);
+   return compileAndExecuteScript(dmlScriptFilePath, null, false, 
parsePyDML ? ScriptType.PYDML : ScriptType.DML, configFilePath);
}

/**
@@ -1314,7 +1304,7 @@ public class MLContext {
 
public MLOutput executeScript(String dmlScript, boolean isPyDML, String 
configFilePath)
throws IOException, DMLException {
-   return compileAndExecuteScript(dmlScript, null, false, false, 
isPyDML, configFilePath);
+   return compileAndExecuteScript(dmlScript, null, false, false, 
isPyDML ? ScriptType.PYDML : ScriptType.DML, configFilePath);
}
 
/*
@@ -1391,7 +1381,7 @@ public class MLContext {
args[i] = entry.getKey() + "=" + 
entry.getValue();
i++;
}
-   return compileAndExecuteScript(dmlScript, args, false, true, 
isPyDML, configFilePath);
+   return compileAndExecuteScript(dmlScript, args, false, true, 
isPyDML ? ScriptType.PYDML : ScriptType.DML, configFilePath);
}
 
private void checkIfRegisteringInputAllowed() throws 
DMLRuntimeException {
@@ -1400,26 +1390,29 @@ public class MLContext {
}
}

-   private MLOutput compileAndExecuteScript(String dmlScriptFilePath, 
String [] args, boolean isNamedArgument, boolean isPyDML, String 
configFilePath) throws IOException, DMLException {
-   return compileAndExecuteScript(dmlScriptFilePath, args, true, 
isNamedArgument, isPyDML, configFilePath);
+   private MLOutput compileAndExecuteScript(String dmlScriptFilePath, 
String [] args, boolean isNamedArgument, ScriptType scriptType, String 
configFilePath) throws IOException, DMLException {
+   return compileAndExecuteScript(dmlScriptFilePath, args, true, 
isNamedArgument, scriptType, configFilePath);
}
-   
+
/**
 * All the execute() methods call this, which  after setting 
appropriate input/output variables
 * calls _compileAndExecuteScript

[2/2] incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript

2017-04-21 Thread nakul02

Use Apache Commons CLI to parse command line arguments in DMLScript

- Added unit tests
- changed scripts to accept "-config " instead of "-config="
- Removed DMLScriptTest{1,2}
- Modified bin/systemml script to print a better help message
- Removed extraneous ZPackageSuite for DMLScriptTest{1,2}

Closes #440


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/32924dc6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/32924dc6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/32924dc6

Branch: refs/heads/master
Commit: 32924dc6027df1973b37a8688b7d0cacbdefd4bf
Parents: f73673d
Author: Nakul Jindal 
Authored: Fri Apr 21 14:50:56 2017 -0700
Committer: Nakul Jindal 
Committed: Fri Apr 21 14:50:57 2017 -0700

--
 bin/systemml|  84 +--
 bin/systemml.bat|   4 +-
 docs/algorithms-classification.md   |  44 +-
 docs/algorithms-clustering.md   |  14 +-
 docs/algorithms-descriptive-statistics.md   |  14 +-
 docs/algorithms-matrix-factorization.md |  18 +-
 docs/algorithms-regression.md   |  36 +-
 docs/algorithms-survival-analysis.md|  16 +-
 docs/hadoop-batch-mode.md   |  16 +-
 docs/spark-batch-mode.md|   4 +-
 docs/standalone-guide.md|   2 +-
 docs/troubleshooting-guide.md   |   2 +-
 scripts/sparkDML.sh |   2 +-
 .../java/org/apache/sysml/api/DMLScript.java| 576 ---
 .../java/org/apache/sysml/api/MLContext.java|  49 +-
 .../org/apache/sysml/api/jmlc/Connection.java   |   2 +-
 .../sysml/api/mlcontext/ScriptExecutor.java |   2 +-
 .../org/apache/sysml/parser/ParserFactory.java  |  16 +-
 .../java/org/apache/sysml/utils/Explain.java|  32 +-
 .../org/apache/sysml/yarn/DMLYarnClient.java|   3 +-
 src/main/resources/scripts/sparkDML.sh  |   2 +-
 src/main/standalone/runStandaloneSystemML.bat   |   4 +-
 src/main/standalone/runStandaloneSystemML.sh|   2 +-
 .../test/integration/AutomatedTestBase.java |  19 +-
 .../functions/dmlscript/DMLScriptTest1.java | 125 
 .../functions/dmlscript/DMLScriptTest2.java | 151 -
 .../functions/misc/DataTypeChangeTest.java  |  14 +-
 .../parfor/ParForDependencyAnalysisTest.java|   2 +-
 .../TransformFrameEncodeDecodeTest.java |  15 +-
 .../sysml/test/unit/CLIOptionsParserTest.java   | 415 +
 .../functions/dmlscript/ZPackageSuite.java  |  37 --
 31 files changed, 992 insertions(+), 730 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/32924dc6/bin/systemml
--
diff --git a/bin/systemml b/bin/systemml
index 0ccee2d..44ab45e 100755
--- a/bin/systemml
+++ b/bin/systemml
@@ -20,32 +20,22 @@
 #
 #-
 
+
 # error help print
-printUsageExit()
+printSimpleUsage()
 {
 cat << EOF
 Usage: $0  [arguments] [-help]
--help - Print this usage message and exit
+-help - Print detailed help message
 EOF
   exit 1
 }
-#Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m 
[Custom-Java-Options] -jar StandaloneSystemML.jar -f  -exec 
singlenode -config=SystemML-config.xml [Optional-Arguments]'
 
-while getopts "h:" options; do
-  case $options in
-h ) echo Warning: Help requested. Will exit after usage message
-printUsageExit
-;;
-\? ) echo Warning: Help requested. Will exit after usage message
-printUsageExit
-;;
-* ) echo Error: Unexpected error while processing options
-  esac
-done
+#Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m 
[Custom-Java-Options] -jar StandaloneSystemML.jar -f  -exec 
singlenode -config=SystemML-config.xml [Optional-Arguments]'
 
 if [ -z "$1" ] ; then
 echo "Wrong Usage.";
-printUsageExit;
+printSimpleUsage
 fi
 
 
@@ -98,24 +88,6 @@ then
 fi
 
 
-# Peel off first argument so that $@ contains arguments to DML script
-SCRIPT_FILE=$1
-shift
-
-# if the script file path was omitted, try to complete the script path
-if [ ! -f "$SCRIPT_FILE" ]
-then
-  SCRIPT_FILE_NAME=$(basename $SCRIPT_FILE)
-  SCRIPT_FILE_FOUND=$(find "$PROJECT_ROOT_DIR/scripts" -name 
"$SCRIPT_FILE_NAME")
-  if [ ! "$SCRIPT_FILE_FOUND" ]
-  then
-echo "Could not find DML script: $SCRIPT_FILE"
-printUsageExit;
-  else
-SCRIPT_FILE=$SCRIPT_FILE_FOUND
-echo "DML script: $SCRIPT_FILE"
-  fi
-fi
 
 
 # add hadoop libraries which were generated by the build to the classpath
@@ -149,13 +121,57 @@ if [ -f "${PROJECT_ROOT_DIR}/conf/systemml-env.sh" ]; then

incubator-systemml git commit: Use Apache Commons CLI to parse command line arguments in DMLScript

2017-04-21 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/gh-pages c4918f5b6 -> c5ff65305


Use Apache Commons CLI to parse command line arguments in DMLScript

- Added unit tests
- changed scripts to accept "-config " instead of "-config="
- Removed DMLScriptTest{1,2}
- Modified bin/systemml script to print a better help message
- Removed extraneous ZPackageSuite for DMLScriptTest{1,2}

Closes #440


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c5ff6530
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c5ff6530
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c5ff6530

Branch: refs/heads/gh-pages
Commit: c5ff65305a5baeec20aec25bd6fa9ab73b1e1990
Parents: c4918f5
Author: Nakul Jindal 
Authored: Fri Apr 21 14:50:56 2017 -0700
Committer: Nakul Jindal 
Committed: Fri Apr 21 14:50:57 2017 -0700

--
 algorithms-classification.md | 44 +++
 algorithms-clustering.md | 14 +-
 algorithms-descriptive-statistics.md | 14 +-
 algorithms-matrix-factorization.md   | 18 ++---
 algorithms-regression.md | 36 -
 algorithms-survival-analysis.md  | 16 +--
 hadoop-batch-mode.md | 16 +--
 spark-batch-mode.md  |  4 +--
 standalone-guide.md  |  2 +-
 troubleshooting-guide.md |  2 +-
 10 files changed, 83 insertions(+), 83 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c5ff6530/algorithms-classification.md
--
diff --git a/algorithms-classification.md b/algorithms-classification.md
index b029e0a..ed56c34 100644
--- a/algorithms-classification.md
+++ b/algorithms-classification.md
@@ -165,7 +165,7 @@ val prediction = model.transform(X_test_df)
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f MultiLogReg.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid_spark
  -nvargs X=
  Y=
@@ -336,7 +336,7 @@ prediction.show()
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f MultiLogReg.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid_spark
  -nvargs X=/user/ml/X.mtx
  Y=/user/ml/Y.mtx
@@ -532,7 +532,7 @@ val model = svm.fit(X_train_df)
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f l2-svm.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid_spark
  -nvargs X=
  Y=
@@ -579,7 +579,7 @@ val prediction = model.transform(X_test_df)
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f l2-svm-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid_spark
  -nvargs X=
  Y=[file]
@@ -661,7 +661,7 @@ using a held-out test set. Note that this is an optional 
argument.
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f l2-svm.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid_spark
  -nvargs X=/user/ml/X.mtx
  Y=/user/ml/y.mtx
@@ -695,7 +695,7 @@ using a held-out test set. Note that this is an optional 
argument.
  --conf spark.akka.frameSize=128
  SystemML.jar
  -f l2-svm-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
  -exec hybrid

[1/5] incubator-systemml git commit: Refactored GPU{Contex, Object} to make it friendlier for parfor

2017-04-21 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 9ed27ad60 -> 129f0f6b0


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
--
diff --git 
a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java 
b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
index f7071ba..b20f66a 100644
--- a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
+++ b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
@@ -1515,7 +1515,7 @@ public abstract class AutomatedTestBase
/**
 * Enables expection of a line in standard output stream.
 * 
-* @param expected
+* @param expectedLine
 */
public void setExpectedStdOut(String expectedLine) {
this.expectedStdOut = expectedLine;

[2/5] incubator-systemml git commit: Refactored GPU{Contex, Object} to make it friendlier for parfor

2017-04-21 Thread nakul02

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index c363ab1..3c32137 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -19,23 +19,49 @@
 
 package org.apache.sysml.runtime.matrix.data;
 
-import jcuda.Pointer;
-import jcuda.Sizeof;
-import jcuda.jcublas.JCublas2;
-import jcuda.jcublas.cublasFillMode;
-import jcuda.jcublas.cublasHandle;
-import jcuda.jcublas.cublasOperation;
-import jcuda.jcudnn.cudnnActivationDescriptor;
-import jcuda.jcudnn.cudnnBatchNormMode;
-import jcuda.jcudnn.cudnnConvolutionDescriptor;
-import jcuda.jcudnn.cudnnConvolutionFwdPreference;
-import jcuda.jcudnn.cudnnFilterDescriptor;
-import jcuda.jcudnn.cudnnHandle;
-import jcuda.jcudnn.cudnnPoolingDescriptor;
-import jcuda.jcudnn.cudnnStatus;
-import jcuda.jcudnn.cudnnTensorDescriptor;
-import jcuda.jcusparse.JCusparse;
-import jcuda.jcusparse.cusparseHandle;
+import static jcuda.jcublas.cublasOperation.CUBLAS_OP_N;
+import static jcuda.jcublas.cublasOperation.CUBLAS_OP_T;
+import static jcuda.jcudnn.JCudnn.cudnnActivationForward;
+import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationBackward;
+import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardInference;
+import static jcuda.jcudnn.JCudnn.cudnnBatchNormalizationForwardTraining;
+import static jcuda.jcudnn.JCudnn.cudnnConvolutionBackwardData;
+import static jcuda.jcudnn.JCudnn.cudnnConvolutionBackwardFilter;
+import static jcuda.jcudnn.JCudnn.cudnnConvolutionForward;
+import static jcuda.jcudnn.JCudnn.cudnnCreateActivationDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnCreateConvolutionDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnCreateFilterDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnCreatePoolingDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnCreateTensorDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnDestroyConvolutionDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnDestroyFilterDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnDestroyPoolingDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnGetConvolutionBackwardDataWorkspaceSize;
+import static 
jcuda.jcudnn.JCudnn.cudnnGetConvolutionBackwardFilterWorkspaceSize;
+import static jcuda.jcudnn.JCudnn.cudnnGetConvolutionForwardWorkspaceSize;
+import static jcuda.jcudnn.JCudnn.cudnnPoolingBackward;
+import static jcuda.jcudnn.JCudnn.cudnnPoolingForward;
+import static jcuda.jcudnn.JCudnn.cudnnSetActivationDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnSetConvolution2dDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnSetFilter4dDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnSetPooling2dDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnSetTensor4dDescriptor;
+import static jcuda.jcudnn.cudnnActivationMode.CUDNN_ACTIVATION_RELU;
+import static jcuda.jcudnn.cudnnConvolutionMode.CUDNN_CROSS_CORRELATION;
+import static jcuda.jcudnn.cudnnDataType.CUDNN_DATA_DOUBLE;
+import static jcuda.jcudnn.cudnnNanPropagation.CUDNN_PROPAGATE_NAN;
+import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_MAX;
+import static jcuda.jcudnn.cudnnTensorFormat.CUDNN_TENSOR_NCHW;
+import static jcuda.jcusparse.JCusparse.cusparseDcsrgemm;
+import static jcuda.jcusparse.JCusparse.cusparseDcsrmv;
+import static 
jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_NON_TRANSPOSE;
+import static jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_TRANSPOSE;
+import static jcuda.runtime.JCuda.cudaDeviceSynchronize;
+import static jcuda.runtime.JCuda.cudaMemcpy;
+import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice;
+import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost;
+import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.api.DMLScript;
@@ -72,10 +98,9 @@ import org.apache.sysml.runtime.instructions.cp.DoubleObject;
 import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.context.ExecutionConfig;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
-import org.apache.sysml.runtime.instructions.gpu.context.JCudaContext;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUObject;
+import org.apache.sysml.runtime.instructions.gpu.context.CSRPointer;
 import org.apache.sysml.runtime.instructions.gpu.context.JCudaKernels;
-import org.apache.sysml.runtime.instructions.gpu.context.JCudaObject;
-import 
org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.CSRPointer;
 import org.apache.sysml.runtime.matrix.operators.AggregateOperator;
 impo

[4/5] incubator-systemml git commit: Refactored GPU{Contex, Object} to make it friendlier for parfor

2017-04-21 Thread nakul02

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
index d2309b0..708f291 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
@@ -18,65 +18,584 @@
  */
 package org.apache.sysml.runtime.instructions.gpu.context;
 
+import static jcuda.jcublas.JCublas2.cublasCreate;
+import static jcuda.jcublas.JCublas2.cublasDestroy;
+import static jcuda.jcudnn.JCudnn.cudnnCreate;
+import static jcuda.jcudnn.JCudnn.cudnnDestroy;
+import static jcuda.jcusparse.JCusparse.cusparseCreate;
+import static jcuda.jcusparse.JCusparse.cusparseDestroy;
+import static jcuda.runtime.JCuda.cudaDeviceScheduleBlockingSync;
+import static jcuda.runtime.JCuda.cudaFree;
+import static jcuda.runtime.JCuda.cudaGetDeviceCount;
+import static jcuda.runtime.JCuda.cudaMalloc;
+import static jcuda.runtime.JCuda.cudaMemGetInfo;
+import static jcuda.runtime.JCuda.cudaMemset;
+import static jcuda.runtime.JCuda.cudaSetDevice;
+import static jcuda.runtime.JCuda.cudaSetDeviceFlags;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.conf.DMLConfig;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
+import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
+import org.apache.sysml.utils.GPUStatistics;
+import org.apache.sysml.utils.LRUCacheMap;
 
-//FIXME merge JCudaContext into GPUContext as this context is anyway CUDA 
specific
+import jcuda.Pointer;
+import jcuda.jcublas.cublasHandle;
+import jcuda.jcudnn.cudnnHandle;
+import jcuda.jcusparse.cusparseHandle;
+import jcuda.runtime.JCuda;
+import jcuda.runtime.cudaDeviceProp;
 
-public abstract class GPUContext {
+/**
+ * Represents a context per GPU accessible through the same JVM
+ * Each context holds cublas, cusparse, cudnn... handles which are separate 
for each GPU
+ */
+public class GPUContext {
 
-   protected static GPUContext currContext;
-   public static volatile Boolean isGPUContextCreated = false;
+   protected static final Log LOG = 
LogFactory.getLog(GPUContext.class.getName());
 
-   protected GPUContext() {}
+  /** Eviction policies for {@link GPUContext#evict(long)} */
+   public enum EvictionPolicy {
+   LRU, LFU, MIN_EVICT
+   }
 
-   /**
-* Gets device memory available for SystemML operations
-* 
-* @return available memory
-*/
-   public abstract long getAvailableMemory();
+   /** currently employed eviction policy */
+   public final EvictionPolicy evictionPolicy = EvictionPolicy.LRU;
+
+   /** Map of free blocks allocate on GPU. maps size_of_block -> pointer 
on GPU */
+   private LRUCacheMap> freeCUDASpaceMap = new 
LRUCacheMap<>();
+
+   /** To record size of allocated blocks */
+   private HashMap cudaBlockSizeMap = new HashMap<>();
+
+  /** active device assigned to this GPUContext instance */
+  private final int deviceNum;
+
+  /** list of allocated {@link GPUObject} instances allocated on {@link 
GPUContext#deviceNum} GPU
+   * These are matrices allocated on the GPU on which rmvar hasn't been called 
yet.
+   * If a {@link GPUObject} has more than one lock on it, it cannot be freed
+   * If it has zero locks on it, it can be freed, but it is preferrable to 
keep it around
+   * so that an extraneous host to dev transfer can be avoided */
+  private ArrayList allocatedGPUObjects = new ArrayList<>();
+
+  /** cudnnHandle specific to the active GPU for this GPUContext */
+  private cudnnHandle cudnnHandle;
+
+  /** cublasHandle specific to the active GPU for this GPUContext */
+  private cublasHandle cublasHandle;
+
+  /** cusparseHandle specific to the active GPU for this GPUContext */
+  private cusparseHandle cusparseHandle;
+
+  /** to launch custom CUDA kernel, specific to the active GPU for this 
GPUContext */
+  private JCudaKernels kernels;
 
/**
-* Ensures that all the CUDA cards on the current system are
-* of the minimum required compute capability.
-* (The minimum required compute capability is hard coded in {@link 
JCudaContext}.
-* 
-* @throws DMLRuntimeException if DMLRuntimeException occurs
-*

[3/5] incubator-systemml git commit: Refactored GPU{Contex, Object} to make it friendlier for parfor

2017-04-21 Thread nakul02

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaKernels.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaKernels.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaKernels.java
index d858b0b..b4cb87d 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaKernels.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaKernels.java
@@ -18,34 +18,25 @@
  */
 package org.apache.sysml.runtime.instructions.gpu.context;
 
-import static jcuda.driver.JCudaDriver.cuCtxCreate;
-import static jcuda.driver.JCudaDriver.cuCtxGetCurrent;
-import static jcuda.driver.JCudaDriver.cuDeviceGet;
-import static jcuda.driver.JCudaDriver.cuInit;
 import static jcuda.driver.JCudaDriver.cuLaunchKernel;
 import static jcuda.driver.JCudaDriver.cuModuleGetFunction;
 import static jcuda.driver.JCudaDriver.cuModuleLoadDataEx;
-import static jcuda.driver.JCudaDriver.cuModuleUnload;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashMap;
 
-import jcuda.runtime.JCuda;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.io.IOUtilFunctions;
 
-import jcuda.CudaException;
 import jcuda.Pointer;
-import jcuda.driver.CUcontext;
-import jcuda.driver.CUdevice;
 import jcuda.driver.CUfunction;
 import jcuda.driver.CUmodule;
 import jcuda.driver.CUresult;
 
 /**
- * Utility class that allows LibMatrixCUDA as well as JCudaObject to invoke 
custom CUDA kernels.
+ * Utility class that allows LibMatrixCUDA as well as GPUObject to invoke 
custom CUDA kernels.
  * 
  * The utility org.apache.sysml.runtime.instructions.gpu.context.JCudaKernels 
simplifies the launching of the kernels. 
  * For example: to launch a kernel 
@@ -54,70 +45,23 @@ import jcuda.driver.CUresult;
  */
 public class JCudaKernels {
 
-   private static String ptxFileName = "/kernels/SystemML.ptx";
+   private final static String ptxFileName = "/kernels/SystemML.ptx";
private HashMap kernels = new HashMap();
private CUmodule module;
+   private final int deviceNum;

/**
 * Loads the kernels in the file ptxFileName. Though cubin files are 
also supported, we will stick with
 * ptx file as they are target-independent similar to Java's .class 
files.
-* 
+* @param deviceNum  the device number for which to initiate the driver 
API
 * @throws DMLRuntimeException if DMLRuntimeException occurs
 */
-   public JCudaKernels() throws DMLRuntimeException {
-   shutdown();
-   initCUDA();
+   JCudaKernels(int deviceNum) throws DMLRuntimeException {
+   this.deviceNum = deviceNum;
module = new CUmodule();
// Load the kernels specified in the ptxFileName file
checkResult(cuModuleLoadDataEx(module, 
initKernels(ptxFileName), 0, new int[0], Pointer.to(new int[0])));
}
-   
-   /**
- * Initializes the JCuda driver API. Then it will try to attach to the 
- * current CUDA context. If no active CUDA context exists, then it will 
- * try to create one, for the device which is specified by the current 
- * deviceNumber.
- * 
-* @throws DMLRuntimeException If it is neither possible to attach to 
an 
- * existing context, nor to create a new context.
- */
-private static void initCUDA() throws DMLRuntimeException {
-checkResult(cuInit(0));
-
-// Try to obtain the current context
-CUcontext context = new CUcontext();
-checkResult(cuCtxGetCurrent(context));
-
-// If the context is 'null', then a new context
-// has to be created.
-CUcontext nullContext = new CUcontext(); 
-if (context.equals(nullContext)) {
-createContext();
-}
-}
-
-/**
- * Tries to create a context for device 'deviceNumber'.
- * @throws DMLRuntimeException 
- * 
- * @throws CudaException If the device can not be 
- * accessed or the context can not be created
- */
-private static void createContext() throws DMLRuntimeException {
-   int deviceNumber = 0;
-CUdevice device = new CUdevice();
-checkResult(cuDeviceGet(device, deviceNumber));
-CUcontext context = new CUcontext();
-checkResult(cuCtxCreate(context, 0, device));
-}
-
-   /**
-* Performs cleanup actions such as unloading the module
-*/
-   public void shutdown() {
-   if(module != null)
-   cuModuleUnload(module);
-   }
 
/**
 * Setups the kernel parameters and launches the kernel using 
cuLaunchKernel API. 
@@ -167,7 +1

[5/5] incubator-systemml git commit: Refactored GPU{Contex, Object} to make it friendlier for parfor

2017-04-21 Thread nakul02

Refactored GPU{Contex,Object} to make it friendlier for parfor

- Folded JCuda{Context,Object} to GPU{Context,Object}
- Removed "deviceMemBytes", it was redundant
- Removed all synchronized in GPU{Object,Contex}
- print GPUContext from everywhere in log.trace
- LibMatrixCUDA functions expect a GPUContext instead of getting it statically
- Restructured GPUContext to use a pool of already initialized GPUContexts
- Call cudaSetDevice when on different thread
- TODO FIXME Disabled cublasDgeam for scalarMatrixArithmetic
- TODO FIXME revisit the need to always force gpu to be used, mem est
  broken
- Ability to restrict parfor from picking up all GPUs on the machine,
  from a system property

Closes #462


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/129f0f6b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/129f0f6b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/129f0f6b

Branch: refs/heads/master
Commit: 129f0f6b0e0f6167e4137c6d47374ab96501b888
Parents: 9ed27ad
Author: Nakul Jindal 
Authored: Fri Apr 21 16:22:19 2017 -0700
Committer: Nakul Jindal 
Committed: Fri Apr 21 16:22:19 2017 -0700

--
 conf/SystemML-config.xml.template   |3 +
 .../java/org/apache/sysml/api/DMLScript.java|   27 +-
 .../api/mlcontext/MLContextConversionUtil.java  |4 +-
 .../sysml/api/mlcontext/ScriptExecutor.java |   25 +-
 .../java/org/apache/sysml/conf/DMLConfig.java   |   17 +-
 .../controlprogram/ParForProgramBlock.java  |   25 +
 .../controlprogram/caching/CacheableData.java   |   80 +-
 .../context/ExecutionContext.java   |   56 +-
 .../controlprogram/parfor/LocalParWorker.java   |5 +
 .../controlprogram/parfor/ParWorker.java|5 +
 .../parfor/opt/OptTreeConverter.java|2 +-
 .../cp/FunctionCallCPInstruction.java   |   13 +-
 .../gpu/AggregateBinaryGPUInstruction.java  |4 +-
 .../gpu/AggregateUnaryGPUInstruction.java   |3 +-
 .../gpu/ConvolutionGPUInstruction.java  |   25 +-
 .../instructions/gpu/GPUInstruction.java|3 +-
 .../instructions/gpu/MMTSJGPUInstruction.java   |3 +-
 .../gpu/MatrixBuiltinGPUInstruction.java|5 +-
 .../MatrixMatrixArithmeticGPUInstruction.java   |5 +-
 .../gpu/MatrixMatrixAxpyGPUInstruction.java |5 +-
 .../instructions/gpu/ReorgGPUInstruction.java   |5 +-
 .../ScalarMatrixArithmeticGPUInstruction.java   |4 +-
 .../instructions/gpu/context/CSRPointer.java|  457 ++
 .../instructions/gpu/context/GPUContext.java|  619 ++-
 .../gpu/context/GPUContextPool.java |  158 ++
 .../instructions/gpu/context/GPUObject.java |  957 ---
 .../instructions/gpu/context/JCudaContext.java  |  286 
 .../instructions/gpu/context/JCudaKernels.java  |   70 +-
 .../instructions/gpu/context/JCudaObject.java   | 1330 ---
 .../runtime/matrix/data/LibMatrixCUDA.java  | 1509 ++
 .../test/integration/AutomatedTestBase.java |2 +-
 31 files changed, 3035 insertions(+), 2677 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/conf/SystemML-config.xml.template
--
diff --git a/conf/SystemML-config.xml.template 
b/conf/SystemML-config.xml.template
index a4c7b2f..fe4437f 100644
--- a/conf/SystemML-config.xml.template
+++ b/conf/SystemML-config.xml.template
@@ -71,4 +71,7 @@
 

false
+
+   
+   -1
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/129f0f6b/src/main/java/org/apache/sysml/api/DMLScript.java
--
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java 
b/src/main/java/org/apache/sysml/api/DMLScript.java
index ce60d55..febbf13 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -89,6 +89,7 @@ import 
org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
 import 
org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
 import org.apache.sysml.runtime.io.IOUtilFunctions;
 import org.apache.sysml.runtime.matrix.CleanupMR;
 import org.apache.sysml.runtime.matrix.data.LibMatrixDNN;
@@ -111,7 +112,7 @@ public class DMLScript
HADOOP, // execute all matrix operations in MR
SINGLE_NODE,// execute all matrix operations in CP
HYBRID, // execute matrix operations i

incubator-systemml git commit: [HOTFIX] for missing apache license in CSRPointer

2017-04-22 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 63e28a37b -> b481324d0


[HOTFIX] for missing apache license in CSRPointer


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/b481324d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/b481324d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/b481324d

Branch: refs/heads/master
Commit: b481324d06429d3435fcd25a78aef971e5498b6c
Parents: 63e28a3
Author: Nakul Jindal 
Authored: Sat Apr 22 00:55:50 2017 -0700
Committer: Nakul Jindal 
Committed: Sat Apr 22 00:55:50 2017 -0700

--
 .../instructions/gpu/context/CSRPointer.java | 19 +++
 1 file changed, 19 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b481324d/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/CSRPointer.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/CSRPointer.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/CSRPointer.java
index 5e202a9..c25bd22 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/CSRPointer.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/CSRPointer.java
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 package org.apache.sysml.runtime.instructions.gpu.context;
 
 import static jcuda.jcusparse.JCusparse.cusparseCreateMatDescr;

incubator-systemml git commit: [SYSTEMML-1034] Initial implementation of "solve" for GPU

2017-04-30 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master f2a927f87 -> e8fbc7539


[SYSTEMML-1034] Initial implementation of "solve" for GPU

Closes #476


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e8fbc753
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e8fbc753
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e8fbc753

Branch: refs/heads/master
Commit: e8fbc753988dc94e97a8e8b723e22e89483a1fc6
Parents: f2a927f
Author: Nakul Jindal 
Authored: Sun Apr 30 21:45:21 2017 -0700
Committer: Nakul Jindal 
Committed: Sun Apr 30 21:45:21 2017 -0700

--
 .../java/org/apache/sysml/hops/BinaryOp.java|   2 +-
 .../instructions/GPUInstructionParser.java  |  17 ++-
 .../gpu/BuiltinBinaryGPUInstruction.java|  78 +++
 .../gpu/BuiltinUnaryGPUInstruction.java |   2 +-
 .../instructions/gpu/GPUInstruction.java|   2 +-
 .../gpu/MatrixMatrixBuiltinGPUInstruction.java  |  58 
 .../instructions/gpu/context/CSRPointer.java|  29 +++-
 .../instructions/gpu/context/GPUContext.java|  35 -
 .../instructions/gpu/context/GPUObject.java |  72 +++---
 .../runtime/matrix/data/LibMatrixCUDA.java  | 133 ++-
 10 files changed, 391 insertions(+), 37 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e8fbc753/src/main/java/org/apache/sysml/hops/BinaryOp.java
--
diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java 
b/src/main/java/org/apache/sysml/hops/BinaryOp.java
index 7ddc656..17a099f 100644
--- a/src/main/java/org/apache/sysml/hops/BinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java
@@ -592,7 +592,7 @@ public class BinaryOp extends Hop
if ( et == ExecType.CP ) 
{
if(DMLScript.USE_ACCELERATOR && 
(DMLScript.FORCE_ACCELERATOR || getMemEstimate() < 
OptimizerUtils.GPU_MEMORY_BUDGET) 
-   && (op == OpOp2.MULT || op == 
OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW)) {
+   && (op == OpOp2.MULT || op == 
OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW || op == 
OpOp2.SOLVE)) {
et = ExecType.GPU;
}


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e8fbc753/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java 
b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
index e5b3326..ef0412c 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
@@ -23,6 +23,7 @@ import java.util.HashMap;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.instructions.gpu.AggregateBinaryGPUInstruction;
 import 
org.apache.sysml.runtime.instructions.gpu.ArithmeticBinaryGPUInstruction;
+import org.apache.sysml.runtime.instructions.gpu.BuiltinBinaryGPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.BuiltinUnaryGPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.ConvolutionGPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
@@ -68,12 +69,15 @@ public class GPUInstructionParser  extends InstructionParser
String2GPUInstructionType.put( "^2"   , 
GPUINSTRUCTION_TYPE.ArithmeticBinary); //special ^ case
String2GPUInstructionType.put( "*2"   , 
GPUINSTRUCTION_TYPE.ArithmeticBinary); //special * case
String2GPUInstructionType.put( "-nz"  , 
GPUINSTRUCTION_TYPE.ArithmeticBinary); //special - case
-   String2GPUInstructionType.put( "+*"  , 
GPUINSTRUCTION_TYPE.ArithmeticBinary); 
-   String2GPUInstructionType.put( "-*"  , 
GPUINSTRUCTION_TYPE.ArithmeticBinary); 
+   String2GPUInstructionType.put( "+*" , 
GPUINSTRUCTION_TYPE.ArithmeticBinary);
+   String2GPUInstructionType.put( "-*" , 
GPUINSTRUCTION_TYPE.ArithmeticBinary);

// Builtin functions
-   String2GPUInstructionType.put( "sel+"  , 
GPUINSTRUCTION_TYPE.BuiltinUnary);
-   String2GPUInstructionType.put( "exp"  , 
GPUINSTRUCTION_TYPE.BuiltinUnary);
+   String2GPUInstructionType.put( "sel+"   , 
GPUINSTRUCTION_TYPE.Bui

incubator-systemml git commit: [HOTFIX] changes setGPU and setForceGPU to do the right thing in mlctx

2017-05-01 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 1cc219527 -> 7989ab4f3


[HOTFIX] changes setGPU and setForceGPU to do the right thing in mlctx


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7989ab4f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7989ab4f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7989ab4f

Branch: refs/heads/master
Commit: 7989ab4f39802d0706618d495d06cb8126f98300
Parents: 1cc2195
Author: Nakul Jindal 
Authored: Mon May 1 17:50:44 2017 -0700
Committer: Nakul Jindal 
Committed: Mon May 1 17:50:44 2017 -0700

--
 .../sysml/api/mlcontext/ScriptExecutor.java | 24 ++--
 1 file changed, 12 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7989ab4f/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
--
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java 
b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
index 2044875..ee710b6 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
@@ -248,12 +248,8 @@ public class ScriptExecutor {
if (symbolTable != null) {
executionContext.setVariables(symbolTable);
}
-   oldGPU = DMLScript.USE_ACCELERATOR; 
-   oldStatistics = DMLScript.STATISTICS;
-   oldForceGPU = DMLScript.FORCE_ACCELERATOR;
-   DMLScript.USE_ACCELERATOR = gpu;
-   DMLScript.FORCE_ACCELERATOR = forceGPU;
-   DMLScript.STATISTICS = statistics;
+oldStatistics = DMLScript.STATISTICS;
+DMLScript.STATISTICS = statistics;
}
 
/**
@@ -654,17 +650,21 @@ public class ScriptExecutor {
 * @param enabled
 *  true if enabled, false otherwise
 */
-   public void setGPU(boolean enabled) {
-   this.gpu = enabled;
-   }
+public void setGPU(boolean enabled) {
+this.gpu = enabled;
+oldGPU = DMLScript.USE_ACCELERATOR;
+DMLScript.USE_ACCELERATOR = gpu;
+}

/**
 * Whether or not to force GPU usage
 * @param enabled
 *  true if enabled, false otherwise
 */
-   public void setForceGPU(boolean enabled) {
-   this.forceGPU = enabled;
-   }
+public void setForceGPU(boolean enabled) {
+this.forceGPU = enabled;
+oldForceGPU = DMLScript.FORCE_ACCELERATOR;
+DMLScript.FORCE_ACCELERATOR = forceGPU;
+}
 
 }

incubator-systemml git commit: [HOTFIX] Bug fix for solve, removed warnings and added instrumentation

2017-05-04 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 76f3ca5d3 -> 2c5c3b14e


[HOTFIX] Bug fix for solve, removed warnings and added instrumentation


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2c5c3b14
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2c5c3b14
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2c5c3b14

Branch: refs/heads/master
Commit: 2c5c3b14e1906cda70ae1581b19a5e908b3ab329
Parents: 76f3ca5
Author: Nakul Jindal 
Authored: Thu May 4 16:26:47 2017 -0700
Committer: Nakul Jindal 
Committed: Thu May 4 16:26:47 2017 -0700

--
 .../instructions/GPUInstructionParser.java  |  4 +-
 .../gpu/BuiltinBinaryGPUInstruction.java|  2 +
 .../instructions/gpu/GPUInstruction.java| 28 ---
 .../gpu/MatrixMatrixBuiltinGPUInstruction.java  |  1 +
 .../instructions/gpu/context/GPUContext.java|  2 +
 .../instructions/gpu/context/GPUObject.java |  3 +-
 .../runtime/matrix/data/LibMatrixCUDA.java  | 77 +++-
 7 files changed, 86 insertions(+), 31 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2c5c3b14/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java 
b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
index ef0412c..4a45521 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
@@ -35,9 +35,9 @@ import 
org.apache.sysml.runtime.instructions.gpu.AggregateUnaryGPUInstruction;
 
 public class GPUInstructionParser  extends InstructionParser 
 {
-   public static final HashMap 
String2GPUInstructionType;
+   static final HashMap 
String2GPUInstructionType;
static {
-   String2GPUInstructionType = new HashMap();
+   String2GPUInstructionType = new HashMap<>();
 
// Neural Network Operators
String2GPUInstructionType.put( "relu_backward",  
GPUINSTRUCTION_TYPE.Convolution);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2c5c3b14/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
index 372f883..24e9e79 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
@@ -30,7 +30,9 @@ import org.apache.sysml.runtime.matrix.operators.Operator;
 
 public abstract class BuiltinBinaryGPUInstruction extends GPUInstruction {
 
+  @SuppressWarnings("unused")
   private int _arity;
+
   CPOperand output;
   CPOperand input1, input2;
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2c5c3b14/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
index 9eef072..f4c523b 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
@@ -35,16 +35,20 @@ public abstract class GPUInstruction extends Instruction
public enum GPUINSTRUCTION_TYPE { AggregateUnary, AggregateBinary, 
Convolution, MMTSJ, Reorg, ArithmeticBinary, BuiltinUnary, BuiltinBinary, 
Builtin };
 
// Memory/conversions
-   public final static String MISC_TIMER_HOST_TO_DEVICE =  
"H2D";  // time spent in bringing data to gpu (from host)
-   public final static String MISC_TIMER_DEVICE_TO_HOST =  
"D2H";  // time spent in bringing data from gpu (to host)
-   public final static String MISC_TIMER_DEVICE_TO_DEVICE =
"D2D";  // time spent in copying data from one region on the device to 
another
-   public final static String MISC_TIMER_SPARSE_TO_DENSE = 
"s2d";  // time spent in converting data from sparse to dense
-   public final static String MISC_TIMER_DENSE_TO_SPARSE = 
"d2s";  // time spent in converting d

[1/2] incubator-systemml git commit: [SYSTEMML-1344] sqrt, round, abs, log, floor, ceil, trig funcs & sign for GPU

2017-05-17 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 0d553e384 -> 1fc764b9b


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fc764b9/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
index f4c523b..48b7da6 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
@@ -61,41 +61,55 @@ public abstract class GPUInstruction extends Instruction
public final static String MISC_TIMER_SYRK_LIB =

"Msyrk";// time spent in symmetric rank-k update
 
// Other BLAS instructions
-   public final static String MISC_TIMER_DAXPY_LIB = "daxpy";  // time 
spent in daxpy
-   public final static String MISC_TIMER_QR_BUFFER = "qr_buffer";  // time 
spent in calculating buffer needed to perform QR
-   public final static String MISC_TIMER_QR = "qr";// time spent 
in doing QR
-   public final static String MISC_TIMER_ORMQR = "ormqr"; // time spent in 
ormqr
-   public final static String MISC_TIMER_TRSM = "trsm"; // time spent in 
cublas Dtrsm
+   public final static String MISC_TIMER_DAXPY_LIB =   "daxpy";// time 
spent in daxpy
+   public final static String MISC_TIMER_QR_BUFFER =   "qr_buffer";// time 
spent in calculating buffer needed to perform QR
+   public final static String MISC_TIMER_QR =  "qr";   // time 
spent in doing QR
+   public final static String MISC_TIMER_ORMQR =   "ormqr";// time 
spent in ormqr
+   public final static String MISC_TIMER_TRSM ="trsm"; // time 
spent in cublas Dtrsm
 
// Transpose
-   public final static String MISC_TIMER_SPARSE_DGEAM_LIB =
"sdgeaml";  // time spent in sparse transpose (and other ops of type 
a*op(A) + b*op(B))
-   public final static String MISC_TIMER_DENSE_DGEAM_LIB = 
"ddgeaml";  // time spent in dense transpose (and other ops of type a*op(A) 
+ b*op(B))
-   public final static String MISC_TIMER_TRANSPOSE_LIB =   "dtl";  
// time spent on dense transpose, this includes allocation of 
output
+   public final static String MISC_TIMER_SPARSE_DGEAM_LIB ="sdgeaml";  
// time spent in sparse transpose (and other ops of type a*op(A) + b*op(B))
+   public final static String MISC_TIMER_DENSE_DGEAM_LIB = "ddgeaml";  
// time spent in dense transpose (and other ops of type a*op(A) + b*op(B))
+   public final static String MISC_TIMER_TRANSPOSE_LIB =   "dtl";  
// time spent on dense transpose, this includes allocation of output
 
// Custom kernels
-   public final static String MISC_TIMER_MATRIX_MATRIX_CELLWISE_OP_KERNEL 
="mmck"; // time spent in matrix-matrix cellwise operations
-   public final static String MISC_TIMER_COMPARE_AND_SET_KERNEL =  
"cask"; // time spent in compareAndSet kernel
-   public final static String MISC_TIMER_EXP_KERNEL =  
"expk"; 
// time spent in the exp kernel
-   public final static String MISC_TIMER_DAXPY_MV_KERNEL = 
"daxpymv";  // time 
spent in the daxpy_matrix_vector kernel
-   public final static String MISC_TIMER_UPPER_TO_LOWER_TRIANGLE_KERNEL =  
"u2lk"; // time spent in the copy_u2l_dense kernel
-   public final static String MISC_TIMER_FILL_KERNEL   =   

"fillk"; // time spent in the "fill" kernel
-   public final static String MISC_TIMER_MATRIX_SCALAR_OP_KERNEL = 
"msk";  // time spent in the matrix scalar 
kernel
-   public final static String MISC_TIMER_REDUCE_ALL_KERNEL =   
"rallk"; // time spent in 
reduce all kernel
-   public final static String MISC_TIMER_REDUCE_ROW_KERNEL =   
"rrowk"; // time spent in 
reduce row kernel
-   public final static String MISC_TIMER_REDUCE_COL_KERNEL =   
"rcolk";// time spent 
in reduce column kernel
+   public final static String MISC_TIMER_MATRIX_MATRIX_CELLWISE_OP_KERNEL 
= "mmck";   // time spent in matrix-matrix cellwise operations
+   public final static String MISC_TIMER_COMPARE_AND_SET_KERNEL =

[2/2] incubator-systemml git commit: [SYSTEMML-1344] sqrt, round, abs, log, floor, ceil, trig funcs & sign for GPU

2017-05-17 Thread nakul02

[SYSTEMML-1344] sqrt,round,abs,log,floor,ceil,trig funcs & sign for GPU

Closes #503


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/1fc764b9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/1fc764b9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/1fc764b9

Branch: refs/heads/master
Commit: 1fc764b9b099271822056a82e248acdbb785dc63
Parents: 0d553e3
Author: Nakul Jindal 
Authored: Wed May 17 10:55:51 2017 -0700
Committer: Nakul Jindal 
Committed: Wed May 17 10:55:51 2017 -0700

--
 src/main/cpp/kernels/Makefile   |   28 +
 src/main/cpp/kernels/SystemML.cu|  187 ++
 src/main/cpp/kernels/SystemML.ptx   | 2506 ++
 .../java/org/apache/sysml/hops/UnaryOp.java |   10 +-
 .../instructions/GPUInstructionParser.java  |   19 +-
 .../instructions/gpu/GPUInstruction.java|   72 +-
 .../gpu/MatrixBuiltinGPUInstruction.java|   41 +-
 .../instructions/gpu/context/CSRPointer.java|2 +-
 .../instructions/gpu/context/GPUObject.java |4 +-
 .../runtime/matrix/data/LibMatrixCUDA.java  |  226 +-
 10 files changed, 2577 insertions(+), 518 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fc764b9/src/main/cpp/kernels/Makefile
--
diff --git a/src/main/cpp/kernels/Makefile b/src/main/cpp/kernels/Makefile
new file mode 100644
index 000..0b003f3
--- /dev/null
+++ b/src/main/cpp/kernels/Makefile
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+NVCC=nvcc
+CUDAFLAGS= -ptx -c -arch=sm_30
+
+SystemML.o: SystemML.cu
+   $(NVCC) $(CUDAFLAGS)  SystemML.cu
+
+all: SystemML.o
+   ;
+
+clean:
+   rm -rf SystemML.ptx

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fc764b9/src/main/cpp/kernels/SystemML.cu
--
diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu
index 2651e4a..5b4574e 100644
--- a/src/main/cpp/kernels/SystemML.cu
+++ b/src/main/cpp/kernels/SystemML.cu
@@ -656,3 +656,190 @@ __global__ void matrix_exp(double *A, double *C, unsigned 
int size) {
 C[index] = exp(A[index]);
 }
 }
+
+/**
+ * Do an sqrt over all the elements of a matrix
+ * @param A the input matrix (of length = size)
+ * @param C the pre-allocated output matrix (of length = size)
+ * @param siz the length of the input and output matrices
+ */
+extern "C"
+__global__ void matrix_sqrt(double *A, double *C, unsigned int size) {
+int index = blockIdx.x * blockDim.x + threadIdx.x;
+if (index < size){
+C[index] = sqrt(A[index]);
+}
+}
+
+/**
+ * Do an round over all the elements of a matrix
+ * @param A the input matrix (of length = size)
+ * @param C the pre-allocated output matrix (of length = size)
+ * @param siz the length of the input and output matrices
+ */
+extern "C"
+__global__ void matrix_round(double *A, double *C, unsigned int size) {
+int index = blockIdx.x * blockDim.x + threadIdx.x;
+if (index < size){
+C[index] = (double)llround(A[index]);
+}
+}
+
+/**
+ * Do an abs over all the elements of a matrix
+ * @param A the input matrix (of length = size)
+ * @param C the pre-allocated output matrix (of length = size)
+ * @param siz the length of the input and output matrices
+ */
+extern "C"
+__global__ void matrix_abs(double *A, double *C, unsigned int size) {
+int index = blockIdx.x * blockDim.x + threadIdx.x;
+if (index < size){
+C[index] = (double)fabs(A[index]);
+}
+}
+
+/**
+ * Do an log over all the elements of a matrix
+ * @param A the input matrix (of length = size)
+ * @param C the pre-allocated output matrix (of length = size)
+ * @param siz the length of the input and output matrices
+ */
+extern "C"
+__global__ void matrix_log(double *A, double *C, unsigned int size) {
+int index = blockIdx.x * blockDim.x + threadIdx.x;
+if

incubator-systemml git commit: [HOTFIX] for sparse GPU transpose

2017-05-17 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 1fc764b9b -> c3aeb48bf


[HOTFIX] for sparse GPU transpose


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c3aeb48b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c3aeb48b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c3aeb48b

Branch: refs/heads/master
Commit: c3aeb48bf6b54febb861b7b4381c3d7af450a8e8
Parents: 1fc764b
Author: Nakul Jindal 
Authored: Wed May 17 18:46:21 2017 -0700
Committer: Nakul Jindal 
Committed: Wed May 17 18:46:21 2017 -0700

--
 .../runtime/matrix/data/LibMatrixCUDA.java  | 118 +++
 1 file changed, 68 insertions(+), 50 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c3aeb48b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 074119b..b023159 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -52,6 +52,7 @@ import static jcuda.jcudnn.cudnnDataType.CUDNN_DATA_DOUBLE;
 import static jcuda.jcudnn.cudnnNanPropagation.CUDNN_PROPAGATE_NAN;
 import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_MAX;
 import static jcuda.jcudnn.cudnnTensorFormat.CUDNN_TENSOR_NCHW;
+import static jcuda.jcusparse.JCusparse.cusparseDcsr2csc;
 import static jcuda.jcusparse.JCusparse.cusparseDcsrgemm;
 import static jcuda.jcusparse.JCusparse.cusparseDcsrmv;
 import static 
jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_NON_TRANSPOSE;
@@ -61,6 +62,8 @@ import static 
jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice;
 import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost;
 import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice;
 
+import jcuda.jcusparse.cusparseAction;
+import jcuda.jcusparse.cusparseIndexBase;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.api.DMLScript;
@@ -2732,7 +2735,7 @@ public class LibMatrixCUDA {
 * Performs sparse and dense dgeam given two input matrices
 * C = alpha* op( A ) + beta* op ( B )
 * where op = transpose or not (specified by isLeftTransposed and 
isRightTransposed).
-*
+* To indicate a transpose operation, make sure in1 == in2 and 
isLeftTransposed == isRightTransposed == true
 * @param ec execution context
 * @param gCtx a valid {@link GPUContext}
 * @param instName the invoking instruction's name for record {@link 
Statistics}.
@@ -2756,35 +2759,6 @@ public class LibMatrixCUDA {
int transa = isLeftTransposed ? CUBLAS_OP_T : CUBLAS_OP_N;
int transb = isRightTransposed ? CUBLAS_OP_T : CUBLAS_OP_N;
 
-   int lda = (int) in1.getNumColumns();
-   int ldb = (int) in2.getNumColumns();
-   int m = (int) in1.getNumColumns();
-   int n = (int) in2.getNumRows();
-   if (isLeftTransposed && isRightTransposed) {
-   m = (int) in1.getNumRows();
-   n = (int) in2.getNumColumns();
-   }
-   else if (isLeftTransposed) {
-   m = (int) in1.getNumRows();
-   } else if (isRightTransposed) {
-   n = (int) in2.getNumColumns();
-   }
-   int ldc = m;
-
-
-
-   /**
-   int m = (int) in1.getNumRows();
-   int n = (int) in1.getNumColumns();
-   if(!isLeftTransposed && isRightTransposed) {
-   m = (int) in1.getNumColumns();
-   n = (int) in1.getNumRows();
-   }
-   int lda = isLeftTransposed ? n : m;
-   int ldb = isRightTransposed ? n : m;
-   int ldc = m;
-   **/
-
MatrixObject out = ec.getMatrixObject(outputName);
boolean isSparse1 = isInSparseFormat(gCtx, in1);
boolean isSparse2 = isInSparseFormat(gCtx, in2);
@@ -2792,39 +2766,83 @@ public class LibMatrixCUDA {
long t0=0,t1=0;
// TODO: Implement sparse-dense matrix cublasDgeam kernel
if(isSparse1 || isSparse2) {
+   int m = (int)in1.getNumRows();
+   int n = (int)in1.getNumColumns();
// Invoke cuSparse when either are in sparse format
// Perform sparse-sparse dgeam
-

[1/2] incubator-systemml git commit: [SYSTEMML-1625] GPU Unit Tests (and GPU row/col variance bug fix)

2017-05-31 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master ceeec4bbf -> 772fb5883


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java
--
diff --git 
a/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java 
b/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java
new file mode 100644
index 000..4052fef
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.gpu;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysml.api.mlcontext.Matrix;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Test Elementwise operations on the GPU
+ */
+public class MatrixMatrixElementWiseOpTests extends GPUTests {
+   private final static String TEST_NAME = 
"MatrixMatrixElementWiseOpTests";
+
+   private final int[] rowSizes = new int[] { 1, 64, 130, 1024, 2049 };
+   private final int[] columnSizes = new int[] { 1, 64, 130, 1024, 2049 };
+   private final double[] sparsities = new double[] { 0.0, 0.03, 0.3, 0.9 
};
+   private final double[] scalars = new double[] { 0.0, 0.5, 2.0 };
+   private final int seed = 42;
+
+   @Override
+   public void setUp() {
+   TestUtils.clearAssertionInformation();
+   addTestConfiguration(TEST_DIR, TEST_NAME);
+   getAndLoadTestConfiguration(TEST_NAME);
+   }
+
+   @Test
+   public void testAxpy() {
+   runAxpyTest("O = a*X + Y", "X", "Y", "a", "O", "gpu_-*");
+   }
+
+   @Test
+   public void testAxmy() {
+   runAxpyTest("O = X - a*Y", "X", "Y", "a", "O", "gpu_+*");
+   }
+
+   @Test
+   public void testAdd() {
+   runMatrixMatrixElementwiseTest("O = X + Y", "X", "Y", "O", 
"gpu_+");
+   }
+
+   @Test
+   public void testMatrixColumnVectorAdd() {
+   runMatrixColumnVectorTest("O = X + Y", "X", "Y", "O", "gpu_+");
+   }
+
+   @Test
+   public void testMatrixRowVectorAdd() {
+   runMatrixRowVectorTest("O = X + Y", "X", "Y", "O", "gpu_+");
+   }
+
+   @Test
+   public void testSubtract() {
+   runMatrixMatrixElementwiseTest("O = X - Y", "X", "Y", "O", 
"gpu_-");
+   }
+
+   @Test
+   public void testMatrixColumnVectorSubtract() {
+   runMatrixColumnVectorTest("O = X - Y", "X", "Y", "O", "gpu_-");
+   }
+
+   @Test
+   public void testMatrixRowVectorSubtract() {
+   runMatrixRowVectorTest("O = X - Y", "X", "Y", "O", "gpu_-");
+   }
+
+   @Test
+   public void testMultiply() {
+   runMatrixMatrixElementwiseTest("O = X * Y", "X", "Y", "O", 
"gpu_*");
+   }
+
+   @Test
+   public void testMatrixColumnVectorMultiply() {
+   runMatrixColumnVectorTest("O = X * Y", "X", "Y", "O", "gpu_*");
+   }
+
+   @Test
+   public void testMatrixRowVectorMultiply() {
+   runMatrixRowVectorTest("O = X * Y", "X", "Y", "O", "gpu_*");
+   }
+
+   @Test
+   public void testDivide() {
+   runMatrixMatrixElementwiseTest("O = X / Y", "X", "Y", "O", 
"gpu_/");
+   }
+
+   @Test
+   public void testMatrixColumnVectorDivide() {
+   runMatrixColumnVectorTest("O = X / Y", "X", "Y", "O", "gpu_/");
+   }
+
+   @Test
+   public void testMatrixRowVectorDivide() {
+   runMatrixRowVectorTest("O = X / Y", "X", "Y", "O", "gpu_/");
+   }
+
+   // 
+   //  IGNORED TEST **
+   // FIXME : There is a bug in CPU "^" when a A ^ B is executed where A & 
B are all zeroes
+   @Ignore
+   @Test
+   public void testPower() {
+   runMatrixMatrixElementwiseTest("O = X ^ Y", "X",

[2/2] incubator-systemml git commit: [SYSTEMML-1625] GPU Unit Tests (and GPU row/col variance bug fix)

2017-05-31 Thread nakul02

[SYSTEMML-1625] GPU Unit Tests (and GPU row/col variance bug fix)

- Documented random matrix generation
- GPU unit test using MLContext. Compares CPU output to GPU
- Pseudo-unit tests for GPU implementations of
  unary ops, unary aggregate ops, transpose, elementwise ops,
  matrix multiplication ops, builtin ops & NN ops
- Fixed crucial bug in col/row var
- gpuTests profile for GPU tests (mvn verify -PgpuTests)
- Updated intellij style for import order

Closes #513


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/772fb588
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/772fb588
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/772fb588

Branch: refs/heads/master
Commit: 772fb588324916e4225bb6e1970ca6a8f87eb414
Parents: ceeec4b
Author: Nakul Jindal 
Authored: Wed May 31 21:54:13 2017 -0700
Committer: Nakul Jindal 
Committed: Wed May 31 21:54:13 2017 -0700

--
 dev/code-style/systemml-style-intellij.xml  |  18 +
 pom.xml |  10 +
 .../apache/sysml/api/ScriptExecutorUtils.java   |   1 +
 .../context/ExecutionContext.java   |   3 +
 .../instructions/GPUInstructionParser.java  | 120 +++--
 .../instructions/gpu/context/GPUContext.java| 118 +++--
 .../instructions/gpu/context/GPUObject.java |  55 +-
 .../instructions/gpu/context/JCudaKernels.java  |   3 +-
 .../runtime/matrix/data/LibMatrixCUDA.java  | 112 ++--
 .../runtime/matrix/data/LibMatrixDatagen.java   |  78 +--
 .../matrix/data/RandomMatrixGenerator.java  | 123 -
 .../sysml/test/gpu/AggregateUnaryOpTests.java   | 133 +
 .../apache/sysml/test/gpu/BinaryOpTests.java|  85 
 .../org/apache/sysml/test/gpu/GPUTests.java | 250 +
 .../gpu/MatrixMatrixElementWiseOpTests.java | 271 ++
 .../test/gpu/MatrixMultiplicationOpTest.java| 190 +++
 .../sysml/test/gpu/NeuralNetworkOpTests.java| 508 +++
 .../org/apache/sysml/test/gpu/ReorgOpTests.java |  70 +++
 .../gpu/ScalarMatrixElementwiseOpTests.java | 131 +
 .../org/apache/sysml/test/gpu/UnaryOpTests.java | 113 +
 .../apache/sysml/test/gpu/UnaryOpTestsBase.java | 106 
 .../test/integration/gpu/ZPackageSuite.java |  46 ++
 22 files changed, 2308 insertions(+), 236 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/dev/code-style/systemml-style-intellij.xml
--
diff --git a/dev/code-style/systemml-style-intellij.xml 
b/dev/code-style/systemml-style-intellij.xml
index 248c600..1ad3209 100644
--- a/dev/code-style/systemml-style-intellij.xml
+++ b/dev/code-style/systemml-style-intellij.xml
@@ -16,7 +16,25 @@
  * specific language governing permissions and limitations
  * under the License.
 -->
+
 
+  
+  
+  
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+  
   
 
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 5ce5576..99e2dec 100644
--- a/pom.xml
+++ b/pom.xml
@@ -436,6 +436,7 @@

**/slowtest/**

**/integration/**

**/test/unit/**
+   
**/test/gpu/**

 

@@ -478,6 +479,7 @@

-Djava.awt.headless=true
 

+   
${gpuTestsPath} 

**/integration/applications/**/*Suite.java

**/integration/conversion/*Suite.java

**/integration/functions/data/*Suite.java
@@ -896,6 +898,14 @@


 
+   
+   
+   gpuTests
+   
+   
**/integration/gpu/**/*Suite.java
+   
+   
+


ignore-doclint

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
--
diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java 
b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
index f582c36..674a011 100644
--- a/sr

incubator-systemml git commit: [Doc] Change PCA scale value in documentation

2017-06-06 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 7bba47409 -> 6b377319e


[Doc] Change PCA scale value in documentation

Closes #530


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/6b377319
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/6b377319
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/6b377319

Branch: refs/heads/master
Commit: 6b377319e205734c8f26ba28bd18d9e720151d7e
Parents: 7bba474
Author: krishnakalyan3 
Authored: Tue Jun 6 22:18:51 2017 -0700
Committer: Nakul Jindal 
Committed: Tue Jun 6 22:18:51 2017 -0700

--
 docs/algorithms-matrix-factorization.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6b377319/docs/algorithms-matrix-factorization.md
--
diff --git a/docs/algorithms-matrix-factorization.md 
b/docs/algorithms-matrix-factorization.md
index 9af8c19..8777130 100644
--- a/docs/algorithms-matrix-factorization.md
+++ b/docs/algorithms-matrix-factorization.md
@@ -113,7 +113,7 @@ SystemML Language Reference for details.
 -nvargs INPUT=/user/ml/input.mtx
 K=10
 CENTER=1
-SCALE=1O
+SCALE=1
 FMT=csv
 PROJDATA=1
 OUTPUT=/user/ml/pca_output/
@@ -129,7 +129,7 @@ SystemML Language Reference for details.
  -nvargs INPUT=/user/ml/input.mtx
  K=10
  CENTER=1
- SCALE=1O
+ SCALE=1
  FMT=csv
  PROJDATA=1
  OUTPUT=/user/ml/pca_output/
@@ -142,7 +142,7 @@ SystemML Language Reference for details.
 -nvargs INPUT=/user/ml/test_input.mtx
 K=10
 CENTER=1
-SCALE=1O
+SCALE=1
 FMT=csv
 PROJDATA=1
 MODEL=/user/ml/pca_output/
@@ -159,7 +159,7 @@ SystemML Language Reference for details.
  -nvargs INPUT=/user/ml/test_input.mtx
  K=10
  CENTER=1
- SCALE=1O
+ SCALE=1
  FMT=csv
  PROJDATA=1
  MODEL=/user/ml/pca_output/

[1/3] systemml git commit: [FIX] Fixed nested parfor for GPUs

2017-06-10 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 0bcae49ff -> f58717564


http://git-wip-us.apache.org/repos/asf/systemml/blob/f5871756/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
index 0ed34c5..366eee5 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
@@ -66,73 +66,91 @@ public class GPUObject {
 
private static final Log LOG = 
LogFactory.getLog(GPUObject.class.getName());
 
-   /** GPUContext that owns this GPUObject */
+   /**
+* GPUContext that owns this GPUObject
+*/
private final GPUContext gpuContext;
 
-   /** Pointer to the underlying dense matrix block on GPU */
+   /**
+* Pointer to the underlying dense matrix block on GPU
+*/
private Pointer jcudaDenseMatrixPtr = null;
 
-/** Pointer to the underlying sparse matrix block on GPU */
+   /**
+* Pointer to the underlying sparse matrix block on GPU
+*/
private CSRPointer jcudaSparseMatrixPtr = null;
 
-   /** An optional tensor descriptor (and shape) that can be set by a 
tensor instruction such as convolution,
+   /**
+* An optional tensor descriptor (and shape) that can be set by a 
tensor instruction such as convolution,
 * maxpooling and exploited by a subsequent non-tensor instruction such 
as relu
 */
private cudnnTensorDescriptor tensorDescriptor = null;
 
-   /** the shape of this tensor, if in fact this is a tensor */
-   private int [] tensorShape = null;
+   /**
+* the shape of this tensor, if in fact this is a tensor
+*/
+   private int[] tensorShape = null;
 
-   /** whether the block attached to this {@link GPUContext} is dirty on 
the device and needs to be copied back to host */
+   /**
+* whether the block attached to this {@link GPUContext} is dirty on 
the device and needs to be copied back to host
+*/
protected boolean dirty = false;
 
-   /** number of read/write locks on this object (this GPUObject is being 
used in a current instruction) */
+   /**
+* number of read/write locks on this object (this GPUObject is being 
used in a current instruction)
+*/
protected AtomicInteger locks = new AtomicInteger(0);
 
-   /** Timestamp, needed by {@link GPUContext#evict(long)} */
+   /**
+* Timestamp, needed by {@link GPUContext#evict(long)}
+*/
AtomicLong timestamp = new AtomicLong(0);
 
-   /** Whether this block is in sparse format */
+   /**
+* Whether this block is in sparse format
+*/
protected boolean isSparse = false;
 
-   /** Enclosing {@link MatrixObject} instance */
+   /**
+* Enclosing {@link MatrixObject} instance
+*/
protected MatrixObject mat = null;
 
-// private Pointer allocate(String instName, long size) throws 
DMLRuntimeException {
-// return getGPUContext().allocate(instName, size);
-// }
+   //  private Pointer allocate(String instName, long size) throws 
DMLRuntimeException {
+   //  return getGPUContext().allocate(instName, size);
+   //  }
 
@Override
public Object clone() {
GPUObject me = this;
GPUObject that = new GPUObject(me.gpuContext, me.mat);
if (me.tensorShape != null) {
-that.tensorShape = new int[me.tensorShape.length];
-System.arraycopy(me.tensorShape, 0, that.tensorShape, 0, 
me.tensorShape.length);
-that.allocateTensorDescriptor(me.tensorShape[0], 
me.tensorShape[1], me.tensorShape[2], me.tensorShape[3]);
-}
+   that.tensorShape = new int[me.tensorShape.length];
+   System.arraycopy(me.tensorShape, 0, that.tensorShape, 
0, me.tensorShape.length);
+   that.allocateTensorDescriptor(me.tensorShape[0], 
me.tensorShape[1], me.tensorShape[2], me.tensorShape[3]);
+   }
that.dirty = me.dirty;
that.locks = new AtomicInteger(me.locks.get());
that.timestamp = new AtomicLong(me.timestamp.get());
that.isSparse = me.isSparse;
 
try {
-   if (me.jcudaDenseMatrixPtr != null) {
-   long rows = me.mat.getNumRows();
-   long cols = me.mat.getNumColumns();
-   long size = rows * cols * Sizeof.DOUBLE;
-   me.gpuContext.ensureFreeSpace((int)size);
-   that.jcud

[2/3] systemml git commit: [FIX] Fixed nested parfor for GPUs

2017-06-10 Thread nakul02

http://git-wip-us.apache.org/repos/asf/systemml/blob/f5871756/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
index 8da67ea..b3c19ef 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
@@ -18,14 +18,24 @@
  */
 package org.apache.sysml.runtime.instructions.gpu.context;
 
-import jcuda.Pointer;
-import jcuda.jcublas.cublasHandle;
-import jcuda.jcudnn.cudnnHandle;
-import jcuda.jcusolver.cusolverDnHandle;
-import jcuda.jcusolver.cusolverSpHandle;
-import jcuda.jcusparse.cusparseHandle;
-import jcuda.runtime.JCuda;
-import jcuda.runtime.cudaDeviceProp;
+import static jcuda.jcublas.JCublas2.cublasCreate;
+import static jcuda.jcublas.JCublas2.cublasDestroy;
+import static jcuda.jcudnn.JCudnn.cudnnCreate;
+import static jcuda.jcudnn.JCudnn.cudnnDestroy;
+import static jcuda.jcusolver.JCusolverDn.cusolverDnCreate;
+import static jcuda.jcusolver.JCusolverDn.cusolverDnDestroy;
+import static jcuda.jcusolver.JCusolverSp.cusolverSpCreate;
+import static jcuda.jcusolver.JCusolverSp.cusolverSpDestroy;
+import static jcuda.jcusparse.JCusparse.cusparseCreate;
+import static jcuda.jcusparse.JCusparse.cusparseDestroy;
+import static jcuda.runtime.JCuda.cudaDeviceScheduleBlockingSync;
+import static jcuda.runtime.JCuda.cudaFree;
+import static jcuda.runtime.JCuda.cudaGetDeviceCount;
+import static jcuda.runtime.JCuda.cudaMalloc;
+import static jcuda.runtime.JCuda.cudaMemGetInfo;
+import static jcuda.runtime.JCuda.cudaMemset;
+import static jcuda.runtime.JCuda.cudaSetDevice;
+import static jcuda.runtime.JCuda.cudaSetDeviceFlags;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -45,24 +55,14 @@ import 
org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
 import org.apache.sysml.utils.GPUStatistics;
 import org.apache.sysml.utils.LRUCacheMap;
 
-import static jcuda.jcublas.JCublas2.cublasCreate;
-import static jcuda.jcublas.JCublas2.cublasDestroy;
-import static jcuda.jcudnn.JCudnn.cudnnCreate;
-import static jcuda.jcudnn.JCudnn.cudnnDestroy;
-import static jcuda.jcusolver.JCusolverDn.cusolverDnCreate;
-import static jcuda.jcusolver.JCusolverDn.cusolverDnDestroy;
-import static jcuda.jcusolver.JCusolverSp.cusolverSpCreate;
-import static jcuda.jcusolver.JCusolverSp.cusolverSpDestroy;
-import static jcuda.jcusparse.JCusparse.cusparseCreate;
-import static jcuda.jcusparse.JCusparse.cusparseDestroy;
-import static jcuda.runtime.JCuda.cudaDeviceScheduleBlockingSync;
-import static jcuda.runtime.JCuda.cudaFree;
-import static jcuda.runtime.JCuda.cudaGetDeviceCount;
-import static jcuda.runtime.JCuda.cudaMalloc;
-import static jcuda.runtime.JCuda.cudaMemGetInfo;
-import static jcuda.runtime.JCuda.cudaMemset;
-import static jcuda.runtime.JCuda.cudaSetDevice;
-import static jcuda.runtime.JCuda.cudaSetDeviceFlags;
+import jcuda.Pointer;
+import jcuda.jcublas.cublasHandle;
+import jcuda.jcudnn.cudnnHandle;
+import jcuda.jcusolver.cusolverDnHandle;
+import jcuda.jcusolver.cusolverSpHandle;
+import jcuda.jcusparse.cusparseHandle;
+import jcuda.runtime.JCuda;
+import jcuda.runtime.cudaDeviceProp;
 
 /**
  * Represents a context per GPU accessible through the same JVM
@@ -71,606 +71,643 @@ import static jcuda.runtime.JCuda.cudaSetDeviceFlags;
 public class GPUContext {
 
protected static final Log LOG = 
LogFactory.getLog(GPUContext.class.getName());
+   /**
+* currently employed eviction policy
+*/
+   public final EvictionPolicy evictionPolicy = EvictionPolicy.LRU;
+   /**
+* The minimum CUDA Compute capability needed for SystemML.
+* After compute capability 3.0, 2^31 - 1 blocks and 1024 threads per 
block are supported.
+* If SystemML needs to run on an older card, this logic can be 
revisited.
+*/
+   final int MAJOR_REQUIRED = 3;
+   final int MINOR_REQUIRED = 0;
+   /**
+* active device assigned to this GPUContext instance
+*/
+   private final int deviceNum;
+   // Invoke cudaMemGetInfo to get available memory information. Useful if 
GPU is shared among multiple application.
+   public double GPU_MEMORY_UTILIZATION_FACTOR = 
ConfigurationManager.getDMLConfig()
+   
.getDoubleValue(DMLConfig.GPU_MEMORY_UTILIZATION_FACTOR);
+   /**
+* Map of free blocks allocate on GPU. maps size_of_block -> pointer on 
GPU
+*/
+   private LRUCacheMap> freeCUDASpaceMap = new 
LRUCacheMap<>();
+   /**
+* To record size of allocated blocks
+*/
+   private HashMap cudaBlockSizeMap = new HashMap<>();
+   /**
+* list of allocated {@link GPUObject}

[3/3] systemml git commit: [FIX] Fixed nested parfor for GPUs

2017-06-10 Thread nakul02

[FIX] Fixed nested parfor for GPUs

Additionally
- Fixed intellij codestyle accordingly
- Fixed formatting of some GPU related source files

Closes #532


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/f5871756
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/f5871756
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/f5871756

Branch: refs/heads/master
Commit: f587175647a84a3825b174b4d29c0398be17331f
Parents: 0bcae49
Author: Nakul Jindal 
Authored: Sat Jun 10 12:06:47 2017 -0700
Committer: Nakul Jindal 
Committed: Sat Jun 10 12:06:47 2017 -0700

--
 dev/code-style/systemml-style-intellij.xml  |   37 +-
 .../apache/sysml/api/ScriptExecutorUtils.java   |   17 +-
 .../controlprogram/ParForProgramBlock.java  |   19 +-
 .../context/ExecutionContext.java   |   64 +-
 .../controlprogram/parfor/LocalParWorker.java   |2 +-
 .../cp/FunctionCallCPInstruction.java   |   12 +-
 .../gpu/AggregateBinaryGPUInstruction.java  |4 +-
 .../gpu/AggregateUnaryGPUInstruction.java   |2 +-
 .../gpu/ConvolutionGPUInstruction.java  |   18 +-
 .../instructions/gpu/MMTSJGPUInstruction.java   |2 +-
 .../gpu/MatrixBuiltinGPUInstruction.java|   30 +-
 .../MatrixMatrixArithmeticGPUInstruction.java   |2 +-
 .../gpu/MatrixMatrixAxpyGPUInstruction.java |2 +-
 .../gpu/MatrixMatrixBuiltinGPUInstruction.java  |2 +-
 .../instructions/gpu/ReorgGPUInstruction.java   |2 +-
 .../ScalarMatrixArithmeticGPUInstruction.java   |2 +-
 .../instructions/gpu/context/CSRPointer.java|  922 ++---
 .../gpu/context/ExecutionConfig.java|   85 +-
 .../instructions/gpu/context/GPUContext.java| 1257 +-
 .../gpu/context/GPUContextPool.java |  266 ++--
 .../instructions/gpu/context/GPUObject.java |  454 ---
 .../instructions/gpu/context/JCudaKernels.java  |  141 +-
 .../runtime/matrix/data/LibMatrixCUDA.java  |   42 +-
 .../runtime/matrix/data/LibMatrixDNNHelper.java |1 +
 .../org/apache/sysml/test/gpu/GPUTests.java |   47 +-
 .../sysml/test/gpu/NeuralNetworkOpTests.java|  106 +-
 26 files changed, 1917 insertions(+), 1621 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/f5871756/dev/code-style/systemml-style-intellij.xml
--
diff --git a/dev/code-style/systemml-style-intellij.xml 
b/dev/code-style/systemml-style-intellij.xml
index 1ad3209..b4a53b4 100644
--- a/dev/code-style/systemml-style-intellij.xml
+++ b/dev/code-style/systemml-style-intellij.xml
@@ -1,28 +1,27 @@
 
-
 
   
   
   
 
-  
+  
   
   
   
@@ -32,7 +31,7 @@
   
   
   
-  
+  
 
   
   

http://git-wip-us.apache.org/repos/asf/systemml/blob/f5871756/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
--
diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java 
b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
index 674a011..2895aa4 100644
--- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
+++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
@@ -19,6 +19,8 @@
 
 package org.apache.sysml.api;
 
+import java.util.List;
+
 import org.apache.sysml.api.mlcontext.ScriptExecutor;
 import org.apache.sysml.conf.DMLConfig;
 import org.apache.sysml.hops.codegen.SpoofCompiler;
@@ -79,23 +81,22 @@ public class ScriptExecutorUtils {
// GPUs
GPUContextPool.PER_PROCESS_MAX_GPUS = 
dmlconf.getIntValue(DMLConfig.MAX_GPUS_PER_PROCESS);
Statistics.startRunTimer();
-   GPUContext gCtx = null;
try {
// run execute (w/ exception handling to ensure proper 
shutdown)
if (DMLScript.USE_ACCELERATOR && ec != null) {
-   gCtx = GPUContextPool.getFromPool();
-   if (gCtx == null) {
+   List gCtxs = 
GPUContextPool.reserveAllGPUContexts();
+   if (gCtxs == null) {
throw new DMLRuntimeException(
"GPU : Could not create 
GPUContext, either no GPU or all GPUs currently in use");
}
-   gCtx.initializeThread();
-   ec.setGPUContext(gCtx);
+   gCtxs.get(0).initializeThread();
+   ec.setGPUContexts(gCtxs);
}
rtprog.execute(ec);
} finally { // ensure

systemml git commit: [SYSTEMML-1532] python launch script for spark-submit

2017-06-19 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 8544f6960 -> 3cde999c0


[SYSTEMML-1532] python launch script for spark-submit

Closes #501


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3cde999c
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3cde999c
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3cde999c

Branch: refs/heads/master
Commit: 3cde999c09941d7fc9b4b03f733071b85ec8a343
Parents: 8544f69
Author: krishnakalyan3 
Authored: Mon Jun 19 11:39:00 2017 -0700
Committer: Nakul Jindal 
Committed: Mon Jun 19 11:39:00 2017 -0700

--
 bin/systemml-spark-submit.py | 187 ++
 1 file changed, 187 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/3cde999c/bin/systemml-spark-submit.py
--
diff --git a/bin/systemml-spark-submit.py b/bin/systemml-spark-submit.py
new file mode 100755
index 000..30974ec
--- /dev/null
+++ b/bin/systemml-spark-submit.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+# -
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -
+
+import os
+import sys
+from os.path import join, exists, abspath
+from os import environ
+import glob
+import argparse
+import shutil
+import platform
+
+if environ.get('SPARK_HOME') is None:
+print('SPARK_HOME not set')
+sys.exit(1)
+else:
+spark_home = environ.get('SPARK_HOME')
+spark_path = join(spark_home, 'bin', 'spark-submit')
+
+
+# error help print
+def print_usage_and_exit():
+print('Usage: ./systemml-spark-submit.py -f  [arguments]')
+sys.exit(1)
+
+cparser = argparse.ArgumentParser(description='System-ML Spark Submit Script')
+
+# SPARK-SUBMIT Options
+cparser.add_argument('--master', default='local[*]', help='local, yarn-client, 
yarn-cluster', metavar='')
+cparser.add_argument('--driver-memory', default='5G', help='Memory for driver 
(e.g. 512M)', metavar='')
+cparser.add_argument('--num-executors', default='2', help='Number of executors 
to launch', metavar='')
+cparser.add_argument('--executor-memory', default='2G', help='Memory per 
executor', metavar='')
+cparser.add_argument('--executor-cores', default='1', help='Number of cores', 
metavar='')
+cparser.add_argument('--conf', help='Spark configuration file', nargs='+', 
metavar='')
+
+# SYSTEM-ML Options
+cparser.add_argument('-nvargs', help='List of attributeName-attributeValue 
pairs', nargs='+', metavar='')
+cparser.add_argument('-args', help='List of positional argument values', 
metavar='', nargs='+')
+cparser.add_argument('-config', help='System-ML configuration file (e.g 
SystemML-config.xml)', metavar='')
+cparser.add_argument('-exec', default='hybrid_spark', help='System-ML backend 
(e.g spark, spark-hybrid)', metavar='')
+cparser.add_argument('-explain', help='explains plan levels can be hops, 
runtime, '
+  'recompile_hops, recompile_runtime', 
nargs='?', const='runtime', metavar='')
+cparser.add_argument('-debug', help='runs in debug mode', action='store_true')
+cparser.add_argument('-stats', help='Monitor and report caching/recompilation 
statistics, '
+'heavy hitter  is 10 unless 
overridden', nargs='?', const='10', metavar='')
+cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, '
+  'set  option to skip conservative 
memory estimates '
+  'and use GPU wherever possible', nargs='?')
+cparser.add_argument('-f', required=True, help='specifies dml/pydml file to 
execute; '
+   'path can be local/hdfs/gpfs', 
metavar='')
+
+args = cparser.parse_args()
+
+# Optional arguments
+ml_options = []
+if args.nvargs is not None:
+ml_options.append('-nvargs')
+ml_options.append(' '.join(args.nvargs))
+if args.args is not None:
+ml_opti

systemml git commit: [SYSTEMML-1701] fix need to use -force for gpu

2017-06-19 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 3cde999c0 -> df8d4a63d


[SYSTEMML-1701] fix need to use -force for gpu

Closes #546


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/df8d4a63
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/df8d4a63
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/df8d4a63

Branch: refs/heads/master
Commit: df8d4a63d8d09cae94b6ca2634e31da554302c72
Parents: 3cde999
Author: Nakul Jindal 
Authored: Mon Jun 19 11:44:22 2017 -0700
Committer: Nakul Jindal 
Committed: Mon Jun 19 11:47:27 2017 -0700

--
 .../java/org/apache/sysml/hops/AggBinaryOp.java |  9 +++--
 .../java/org/apache/sysml/hops/AggUnaryOp.java  | 18 +-
 .../java/org/apache/sysml/hops/BinaryOp.java|  7 ++--
 src/main/java/org/apache/sysml/hops/Hop.java|  4 ++-
 .../org/apache/sysml/hops/OptimizerUtils.java   |  5 +--
 .../java/org/apache/sysml/hops/ReorgOp.java |  4 ++-
 .../java/org/apache/sysml/hops/TernaryOp.java   |  4 ++-
 .../gpu/context/GPUContextPool.java | 35 +++-
 .../runtime/matrix/data/LibMatrixCUDA.java  |  2 +-
 9 files changed, 66 insertions(+), 22 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/df8d4a63/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
--
diff --git a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java 
b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
index 21dbbf1..c721efe 100644
--- a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
@@ -49,6 +49,7 @@ import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import 
org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput;
@@ -150,7 +151,7 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
 */
@Override
public Lop constructLops() 
-   throws HopsException, LopsException 
+   throws HopsException, LopsException
{
//return already created lops
if( getLops() != null )
@@ -546,7 +547,8 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);

ExecType et = ExecType.CP;
-   if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || 
getMemEstimate() < OptimizerUtils.GPU_MEMORY_BUDGET)) {
+   if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || 
getMemEstimate() < GPUContextPool
+   .initialGPUMemBudget())) {
et = ExecType.GPU;
}

@@ -625,7 +627,8 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
{   
Lop matmultCP = null;

-   if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || 
getMemEstimate() < OptimizerUtils.GPU_MEMORY_BUDGET)) {
+   if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || 
getMemEstimate() < GPUContextPool
+   .initialGPUMemBudget())) {
Hop h1 = getInput().get(0);
Hop h2 = getInput().get(1);
Lop left; Lop right;

http://git-wip-us.apache.org/repos/asf/systemml/blob/df8d4a63/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
--
diff --git a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java 
b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
index 8e681c1..eb469ab 100644
--- a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
@@ -39,6 +39,7 @@ import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 
 
@@ -149,15 +150,16 @@ public class AggUnaryOp extends Hop implements 
MultiThreadedHop
}

systemml git commit: [SYSTEMML-703] Prepare for GPU on Jenkins, disable expensive NN tests

2017-06-22 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 345682404 -> 57e11e99c


[SYSTEMML-703] Prepare for GPU on Jenkins, disable expensive NN tests

Closes #550


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/57e11e99
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/57e11e99
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/57e11e99

Branch: refs/heads/master
Commit: 57e11e99c3f110b68ad5e3397f10b30533ab9b79
Parents: 3456824
Author: Nakul Jindal 
Authored: Thu Jun 22 17:04:49 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Jun 22 17:04:49 2017 -0700

--
 docs/release-process.md | 12 +
 .../sysml/test/gpu/NeuralNetworkOpTests.java| 28 +++-
 2 files changed, 33 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/57e11e99/docs/release-process.md
--
diff --git a/docs/release-process.md b/docs/release-process.md
index f41c7c8..36528bd 100644
--- a/docs/release-process.md
+++ b/docs/release-process.md
@@ -259,6 +259,18 @@ For examples, see the [Spark MLContext Programming 
Guide](http://apache.github.i
 Verify that the performance suite located at scripts/perftest/ executes on 
Spark and Hadoop. Testing should
 include 80MB, 800MB, 8GB, and 80GB data sizes.
 
+# Run NN Unit Tests for GPU
+
+Up to Checklist
+
+The unit tests for NN operators for GPU take a long time to run and are 
therefor not run as part of the Jenkins build.
+They must be run before a release. To run them, edit the 
+[NeuralNetworkOpTests.java|https://github.com/apache/systemml/blob/master/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java]
+file and remove all the `@Ignore` annotations from all the tests. Then run the 
NN unit tests using mvn verify:
+```
+mvn -Dit.test=org.apache.sysml.test.gpu.NeuralNetworkOpTests verify -PgpuTests
+```
+
 
 # Voting
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/57e11e99/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java
--
diff --git a/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java 
b/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java
index f4e931b..c53e803 100644
--- a/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java
+++ b/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java
@@ -35,6 +35,15 @@ import org.junit.Test;
 
 /**
  * Test neural network operations on the GPU
+ * Because of the large number of cases that each test deals with, this class 
takes
+ * very long to run. (It took about 9 hours to run the testMaxPoolBackward() 
to completion.
+ * The recommended course of action before a release is
+ * 1. Remove the @Ignore annotations
+ * 2. Run just these test on a machine with CUDA 8 installed.
+ * Only this class can be run like so:
+ * 
+ * mvn -Dit.test=org.apache.sysml.test.gpu.NeuralNetworkOpTests verify 
-PgpuTests
+ * 
  */
 public class NeuralNetworkOpTests extends GPUTests {
 
@@ -100,6 +109,7 @@ public class NeuralNetworkOpTests extends GPUTests {
return 1e-5;
}
 
+   @Ignore
@Test
public void testConv2d() {
String scriptStr = "O = conv2d(image, filter, padding=[padH, 
padW], stride=[strideH, strideW], input_shape=[N,C,H,W], 
filter_shape=[K,C,R,S])";
@@ -253,6 +263,7 @@ public class NeuralNetworkOpTests extends GPUTests {
clearGPUMemory();
}
 
+   @Ignore
@Test
public void testConv2dBackwardFilter() {
String scriptStr = "O = conv2d_backward_filter(image, dout, 
padding=[padH, padW], stride=[strideH, strideW], input_shape=[N,C,H,W], 
filter_shape=[K,C,R,S])";
@@ -298,9 +309,9 @@ public class NeuralNetworkOpTests extends GPUTests {

filterSizeInMB, 
N, K, P, Q, doutSizeInMB,

strideH, 
strideW, padH, padW);

Matrix image = generateInputMatrix(spark, (int) 
N,
-   
(int) (C * H * W), 0.-127.0, 
127, sparsity, seed);
+   
(int) (C * H * W), -127.0, 127, 
sparsity, seed);

systemml git commit: [SYSTEMML-703] Prepare for GPU on Jenkins, disable expensive NN tests

2017-06-22 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/gh-pages 7c4907095 -> 05792e0e9


[SYSTEMML-703] Prepare for GPU on Jenkins, disable expensive NN tests

Closes #550


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/05792e0e
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/05792e0e
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/05792e0e

Branch: refs/heads/gh-pages
Commit: 05792e0e947d5d1b9f4c0adbcfec7d0ec4d45bdf
Parents: 7c49070
Author: Nakul Jindal 
Authored: Thu Jun 22 17:04:49 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Jun 22 17:04:49 2017 -0700

--
 release-process.md | 12 
 1 file changed, 12 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/05792e0e/release-process.md
--
diff --git a/release-process.md b/release-process.md
index f41c7c8..36528bd 100644
--- a/release-process.md
+++ b/release-process.md
@@ -259,6 +259,18 @@ For examples, see the [Spark MLContext Programming 
Guide](http://apache.github.i
 Verify that the performance suite located at scripts/perftest/ executes on 
Spark and Hadoop. Testing should
 include 80MB, 800MB, 8GB, and 80GB data sizes.
 
+# Run NN Unit Tests for GPU
+
+Up to Checklist
+
+The unit tests for NN operators for GPU take a long time to run and are 
therefor not run as part of the Jenkins build.
+They must be run before a release. To run them, edit the 
+[NeuralNetworkOpTests.java|https://github.com/apache/systemml/blob/master/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java]
+file and remove all the `@Ignore` annotations from all the tests. Then run the 
NN unit tests using mvn verify:
+```
+mvn -Dit.test=org.apache.sysml.test.gpu.NeuralNetworkOpTests verify -PgpuTests
+```
+
 
 # Voting

systemml git commit: [SYSTEMML-1731] Added GPU instruction 1-*, -nz, %%, %/%

2017-06-27 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 50dafa038 -> 2dc441f52


[SYSTEMML-1731] Added GPU instruction 1-*, -nz, %%, %/%

Closes #554


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/2dc441f5
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/2dc441f5
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/2dc441f5

Branch: refs/heads/master
Commit: 2dc441f52e4966d4c160588be6c850d778475a5f
Parents: 50dafa0
Author: Nakul Jindal 
Authored: Tue Jun 27 16:02:38 2017 -0700
Committer: Nakul Jindal 
Committed: Tue Jun 27 16:02:38 2017 -0700

--
 src/main/cpp/kernels/Makefile   |6 +-
 src/main/cpp/kernels/SystemML.cu|   29 +-
 src/main/cpp/kernels/SystemML.ptx   | 1597 +++---
 .../java/org/apache/sysml/hops/BinaryOp.java|6 +-
 .../instructions/GPUInstructionParser.java  |4 +-
 .../runtime/matrix/data/LibMatrixCUDA.java  |   15 +-
 .../gpu/MatrixMatrixElementWiseOpTests.java |   55 +-
 .../gpu/ScalarMatrixElementwiseOpTests.java |   79 +-
 8 files changed, 1175 insertions(+), 616 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/2dc441f5/src/main/cpp/kernels/Makefile
--
diff --git a/src/main/cpp/kernels/Makefile b/src/main/cpp/kernels/Makefile
index 0b003f3..5feae69 100644
--- a/src/main/cpp/kernels/Makefile
+++ b/src/main/cpp/kernels/Makefile
@@ -16,7 +16,11 @@
 # under the License.
 
 NVCC=nvcc
-CUDAFLAGS= -ptx -c -arch=sm_30
+CUDAFLAGS= -ptx -c -arch=sm_30 
+
+# Use these flags for precise math
+#CUDAFLAGS= -ptx -c -arch=sm_30 -ftz=false -prec-div=true -prec-sqrt=true
+
 
 SystemML.o: SystemML.cu
$(NVCC) $(CUDAFLAGS)  SystemML.cu

http://git-wip-us.apache.org/repos/asf/systemml/blob/2dc441f5/src/main/cpp/kernels/SystemML.cu
--
diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu
index 5b4574e..3098282 100644
--- a/src/main/cpp/kernels/SystemML.cu
+++ b/src/main/cpp/kernels/SystemML.cu
@@ -24,6 +24,7 @@ nvcc -ptx -arch=sm_30 SystemML.cu
 ***/
 
 #include 
+#include 
 
 
 /**
@@ -54,7 +55,8 @@ __forceinline__ __device__ double getBoolean(int val) {
 
 // op = {0=plus, 1=minus, 2=multiply, 3=divide, 4=power,
 // 5=less, 6=lessequal, 7=greater, 8=greaterequal, 9=equal, 10=notequal,
-// 11=min, 12=max, 13=and, 14=or, 15=log}
+// 11=min, 12=max, 13=and, 14=or, 15=minus1multiply, 16=minusnz,
+// 17=modulus, 18=integer division}
 extern "C"
 __forceinline__ __device__ double binaryOp(double x, double y, int op) {
switch(op) {
@@ -71,6 +73,31 @@ __forceinline__ __device__ double binaryOp(double x, double 
y, int op) {
 case 10 : return getBoolean(x != y);
 case 11 : return min(x, y);
 case 12 : return max(x, y);
+case 13 : return getBoolean((int)llrint(x) & (int)llrint(y));
+case 14 : return getBoolean((int)llrint(x) | (int)llrint(y));
+case 15 : return 1 - x * y;
+case 16 : return (x != 0.0 ? x - y : 0.0);
+case 17 : {
+if (y == 0.0 || y == -0.0){
+return nan("");
+}
+double v = x / y;
+// Check for v being NaN (v != v) or if it is infinity
+if (isnan(v) || isinf(v)){
+return v;
+} else {
+v = floor(v);
+}
+return x - v * y;
+}
+case 18:{
+double v = x / y;
+if (isnan(v) || isinf(v)){
+return v;
+} else {
+return floor(v);
+}
+}
 default : return DBL_MAX;
 }
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/2dc441f5/src/main/cpp/kernels/SystemML.ptx
--
diff --git a/src/main/cpp/kernels/SystemML.ptx 
b/src/main/cpp/kernels/SystemML.ptx
index 3229581..ab43758 100644
--- a/src/main/cpp/kernels/SystemML.ptx
+++ b/src/main/cpp/kernels/SystemML.ptx
@@ -450,10 +450,10 @@ BB6_6:
.param .u32 matrix_matrix_cellwise_op_param_7
 )
 {
-   .reg .pred  %p<52>;
-   .reg .b32   %r<56>;
-   .reg .f64   %fd<40>;
-   .reg .b64   %rd<15>;
+   .reg .pred  %p<73>;
+   .reg .b32   %r<68>;
+   .reg .f64   %fd<56>;
+   .reg .b64   %rd<19>;
 
 
ld.param.u64%rd2, [matrix_matrix_cellwise_op_param_0];
@@ -475,40 +475,40 @@ BB6_6:
setp.lt.s32 %p2, %r1, %r14;
setp.lt.s32 %p3, %r2, %r10;
and.pred%p4, %p2, %p3;
-   @!%p4 bra   BB7_55;
+   @!%p4 bra   BB7_77;
bra.

systemml git commit: [HOTFIX] for SYSTEMML-1731

2017-06-27 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 2dc441f52 -> 9f808c43e


[HOTFIX] for SYSTEMML-1731


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/9f808c43
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/9f808c43
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/9f808c43

Branch: refs/heads/master
Commit: 9f808c43e380a90f814f2e5b7a78397edd1bbb90
Parents: 2dc441f
Author: Nakul Jindal 
Authored: Tue Jun 27 17:14:41 2017 -0700
Committer: Nakul Jindal 
Committed: Tue Jun 27 17:14:41 2017 -0700

--
 .../org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/9f808c43/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java
--
diff --git 
a/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java 
b/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java
index 58293d6..c58365a 100644
--- 
a/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java
+++ 
b/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java
@@ -136,7 +136,7 @@ public class ScalarMatrixElementwiseOpTests extends 
GPUTests {
List cpuOut = runOnCPU(spark, scriptStr, inputs, 
Arrays.asList(output));
List gpuOut = runOnGPU(spark, scriptStr, inputs, 
Arrays.asList(output));
//assertHeavyHitterPresent(heavyHitterOpCode);
-   assertEqualMatrices ((Matrix)cpuOut.get(0), 
(Matrix)gpuOut.get(0));
+   assertEqualObjects (cpuOut.get(0), gpuOut.get(0));
}
 
@Test

systemml git commit: [SYSTEMML-1451][GSoC Phase 1] Single script to run perf tests

2017-07-02 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 31952e47d -> e7cfcadc9


[SYSTEMML-1451][GSoC Phase 1] Single script to run perf tests

- Single entry point to run perf tests in any combination of algoriths,
  families, matrix shapes & densities
- Reports time taken by a single perf test by parsing the output and
  grep-ing for the time
- Detects tests that did not run and reports in the generated log
- Robust error handling and reporting, informative help message

Closes #537


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e7cfcadc
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e7cfcadc
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e7cfcadc

Branch: refs/heads/master
Commit: e7cfcadc9b0e72637c67c8d6a6dcc62f62ba5177
Parents: 31952e4
Author: krishnakalyan3 
Authored: Sun Jul 2 00:00:49 2017 -0700
Committer: Nakul Jindal 
Committed: Sun Jul 2 00:00:49 2017 -0700

--
 scripts/perftest/python/datagen.py  | 252 
 scripts/perftest/python/predict.py  | 285 +++
 scripts/perftest/python/run_perftest.py | 339 ++
 scripts/perftest/python/train.py| 411 +++
 scripts/perftest/python/utils.py| 296 +++
 5 files changed, 1583 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/e7cfcadc/scripts/perftest/python/datagen.py
--
diff --git a/scripts/perftest/python/datagen.py 
b/scripts/perftest/python/datagen.py
new file mode 100755
index 000..d9c49e9
--- /dev/null
+++ b/scripts/perftest/python/datagen.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+#-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-
+
+import itertools
+from os.path import join
+from utils import split_rowcol, config_writer
+
+# This file contains configuration settings for data generation
+DATA_FORMAT = 'csv'
+
+MATRIX_TYPE_DICT = {'dense': '0.9',
+'sparse': '0.01'}
+
+FAMILY_NO_MATRIX_TYPE = ['clustering', 'stats1', 'stats2']
+
+
+def multinomial_datagen(matrix_dim, matrix_type, datagen_dir):
+
+row, col = split_rowcol(matrix_dim)
+path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)])
+full_path = join(datagen_dir, path_name)
+
+numSamples = row
+numFeatures = col
+sparsity = MATRIX_TYPE_DICT[matrix_type]
+num_categories = '150'
+intercept = '0'
+X = join(full_path, 'X.data')
+Y = join(full_path, 'Y.data')
+fmt = DATA_FORMAT
+
+config = [numSamples, numFeatures, sparsity, num_categories, intercept,
+  X, Y, fmt, '1']
+
+config_writer(full_path + '.json', config)
+
+return full_path
+
+
+def binomial_datagen(matrix_dim, matrix_type, datagen_dir):
+
+row, col = split_rowcol(matrix_dim)
+path_name = '.'.join(['binomial', matrix_type, str(matrix_dim)])
+full_path = join(datagen_dir, path_name)
+
+numSamples = row
+numFeatures = col
+maxFeatureValue = '5'
+maxWeight = '5'
+loc_weights = join(full_path, 'weight.data')
+loc_data = join(full_path, 'X.data')
+loc_labels = join(full_path, 'Y.data')
+noise = '1'
+intercept = '0'
+sparsity = MATRIX_TYPE_DICT[matrix_type]
+tranform_labels = '1'
+fmt = DATA_FORMAT
+
+config = [numSamples, numFeatures, maxFeatureValue, maxWeight, 
loc_weights, loc_data,
+  loc_labels, noise, intercept, sparsity, fmt, tranform_labels]
+config_writer(full_path + '.json', config)
+
+return full_path
+
+
+def regression1_datagen(matrix_dim, matrix_type, datagen_dir):
+
+row, col = split_rowcol(matrix_dim)
+path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)])
+full_path = join(datagen_dir, path_name)
+
+numSamples = row
+numFeatures = col
+maxFeatureValue = '5'
+maxWeight = '5'
+loc_weights = join(full_

systemml git commit: [SYSTEMML-1735] relational operators for GPU

2017-07-05 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 978d4de47 -> a7364746a


[SYSTEMML-1735] relational operators for GPU

Closes #557


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a7364746
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a7364746
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a7364746

Branch: refs/heads/master
Commit: a7364746a462069853421d59db1093ab145253c9
Parents: 978d4de
Author: Nakul Jindal 
Authored: Wed Jul 5 11:33:41 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Jul 5 11:33:41 2017 -0700

--
 relational.dml  |   6 +
 .../java/org/apache/sysml/hops/BinaryOp.java|   8 +-
 .../instructions/GPUInstructionParser.java  |  13 +-
 .../instructions/gpu/GPUInstruction.java|  34 ++--
 .../MatrixMatrixArithmeticGPUInstruction.java   |   2 +-
 ...rixMatrixRelationalBinaryGPUInstruction.java |  69 
 .../gpu/RelationalBinaryGPUInstruction.java |  68 +++
 ...larMatrixRelationalBinaryGPUInstruction.java |  61 +++
 .../instructions/gpu/context/CSRPointer.java|   6 +-
 .../instructions/gpu/context/GPUObject.java |   2 +-
 .../runtime/matrix/data/LibMatrixCUDA.java  | 177 ++-
 .../gpu/MatrixMatrixElementWiseOpTests.java |  32 +++-
 .../gpu/ScalarMatrixElementwiseOpTests.java |  64 ++-
 13 files changed, 477 insertions(+), 65 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/a7364746/relational.dml
--
diff --git a/relational.dml b/relational.dml
new file mode 100644
index 000..3f492a1
--- /dev/null
+++ b/relational.dml
@@ -0,0 +1,6 @@
+A = rand(rows=10, cols=10)
+B = rand(rows=10, cols=10)
+
+C = A >= B
+
+print(toString(C))
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/systemml/blob/a7364746/src/main/java/org/apache/sysml/hops/BinaryOp.java
--
diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java 
b/src/main/java/org/apache/sysml/hops/BinaryOp.java
index 83209ef..36f573c 100644
--- a/src/main/java/org/apache/sysml/hops/BinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java
@@ -582,7 +582,9 @@ public class BinaryOp extends Hop
if(DMLScript.USE_ACCELERATOR && 
(DMLScript.FORCE_ACCELERATOR || getMemEstimate() < GPUContextPool
.initialGPUMemBudget())
&& (op == OpOp2.MULT || op == 
OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW
-   || op == OpOp2.MINUS_NZ || op == 
OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV) ) {
+   || op == OpOp2.MINUS_NZ || op == 
OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV
+   || op == OpOp2.LESS || op == 
OpOp2.LESSEQUAL || op == OpOp2.EQUAL || op == OpOp2.NOTEQUAL
+   || op == OpOp2.GREATER || op == 
OpOp2.GREATEREQUAL)) {
et = ExecType.GPU;
}
Unary unary1 = new 
Unary(getInput().get(0).constructLops(),
@@ -602,7 +604,9 @@ public class BinaryOp extends Hop
if(DMLScript.USE_ACCELERATOR && 
(DMLScript.FORCE_ACCELERATOR || getMemEstimate() < GPUContextPool
.initialGPUMemBudget())
&& (op == OpOp2.MULT || op == 
OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW
-   || op == OpOp2.SOLVE || op == 
OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV)) {
+   || op == OpOp2.SOLVE || op == 
OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV
+   || op == OpOp2.LESS || op == 
OpOp2.LESSEQUAL || op == OpOp2.EQUAL || op == OpOp2.NOTEQUAL
+   || op == OpOp2.GREATER || op == 
OpOp2.GREATEREQUAL)) {
et = ExecType.GPU;
}


http://git-wip-us.apache.org/repos/asf/systemml/blob/a7364746/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java 
b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
index 5fd6fa0..17b1578

systemml git commit: [HOTFIX] Removed extraneous file that got committed with SYSTEMML-1735

2017-07-05 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master a7364746a -> 82ca13d23


[HOTFIX] Removed extraneous file that got committed with SYSTEMML-1735


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/82ca13d2
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/82ca13d2
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/82ca13d2

Branch: refs/heads/master
Commit: 82ca13d23f44152befcf15fd2eae09729b1f618a
Parents: a736474
Author: Nakul Jindal 
Authored: Wed Jul 5 13:29:09 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Jul 5 13:29:09 2017 -0700

--
 relational.dml | 6 --
 1 file changed, 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/82ca13d2/relational.dml
--
diff --git a/relational.dml b/relational.dml
deleted file mode 100644
index 3f492a1..000
--- a/relational.dml
+++ /dev/null
@@ -1,6 +0,0 @@
-A = rand(rows=10, cols=10)
-B = rand(rows=10, cols=10)
-
-C = A >= B
-
-print(toString(C))
\ No newline at end of file

systemml git commit: [MINOR] Available families and algorithms printed from perftest script

2017-07-05 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 82ca13d23 -> 33cb26ded


[MINOR] Available families and algorithms printed from perftest script


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/33cb26de
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/33cb26de
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/33cb26de

Branch: refs/heads/master
Commit: 33cb26ded9d28786159aba0d235db7ec25a442a5
Parents: 82ca13d
Author: Nakul Jindal 
Authored: Wed Jul 5 13:59:35 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Jul 5 14:12:15 2017 -0700

--
 scripts/perftest/python/run_perftest.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/33cb26de/scripts/perftest/python/run_perftest.py
--
diff --git a/scripts/perftest/python/run_perftest.py 
b/scripts/perftest/python/run_perftest.py
index 1421c2c..6b8b4bc 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -264,9 +264,10 @@ if __name__ == '__main__':
 
 # Argparse Module
 cparser = argparse.ArgumentParser(description='SystemML Performance Test 
Script')
-cparser.add_argument('--family', help='specify class of algorithms (e.g 
regression, binomial)',
+cparser.add_argument('--family', help='specify class of algorithms 
(available : ' + ', '.join(ML_ALGO.keys()) + ')',
  metavar='', choices=ML_ALGO.keys(), nargs='+')
-cparser.add_argument('--algo', help='specify the type of algorithm to run 
(Overrides --family)', metavar='',
+cparser.add_argument('--algo', help='specify the type of algorithm to run '
+ '(Overrides --family, available : ' + ', 
'.join(all_algos) + ')', metavar='',
  choices=all_algos, nargs='+')
 
 cparser.add_argument('--exec-type', default='singlenode', help='System-ML 
backend '

systemml git commit: [MINOR] More updates to the perftest help message

2017-07-05 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 33cb26ded -> 1e1d3727f


[MINOR] More updates to the perftest help message


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/1e1d3727
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/1e1d3727
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/1e1d3727

Branch: refs/heads/master
Commit: 1e1d3727f4d88c9ef053d56da7aec640e0b88424
Parents: 33cb26d
Author: Nakul Jindal 
Authored: Wed Jul 5 14:50:38 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Jul 5 14:50:38 2017 -0700

--
 scripts/perftest/python/run_perftest.py | 32 
 1 file changed, 18 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/1e1d3727/scripts/perftest/python/run_perftest.py
--
diff --git a/scripts/perftest/python/run_perftest.py 
b/scripts/perftest/python/run_perftest.py
index 6b8b4bc..dcc52c8 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -262,28 +262,32 @@ if __name__ == '__main__':
 # Remove duplicates algorithms and used as default inputs
 all_algos = set(reduce(lambda x, y: x + y, ML_ALGO.values()))
 
+# Families
+all_families = ML_ALGO.keys()
+
 # Argparse Module
 cparser = argparse.ArgumentParser(description='SystemML Performance Test 
Script')
-cparser.add_argument('--family', help='specify class of algorithms 
(available : ' + ', '.join(ML_ALGO.keys()) + ')',
- metavar='', choices=ML_ALGO.keys(), nargs='+')
-cparser.add_argument('--algo', help='specify the type of algorithm to run '
- '(Overrides --family, available : ' + ', 
'.join(all_algos) + ')', metavar='',
+cparser.add_argument('--family', help='space separated list of classes of 
algorithms '
+ '(available : ' + ', '.join(sorted(all_families)) + 
')',
+ metavar='', choices=all_families, nargs='+')
+cparser.add_argument('--algo', help='space separated list of algorithm to 
run '
+ '(Overrides --family, available : ' + ', 
'.join(sorted(all_algos)) + ')', metavar='',
  choices=all_algos, nargs='+')
 
 cparser.add_argument('--exec-type', default='singlenode', help='System-ML 
backend '
- '(e.g singlenode, spark-hybrid)', metavar='',
+ '(available : singlenode, spark-hybrid)', metavar='',
  choices=default_execution_mode)
-cparser.add_argument('--mat-type', default=default_mat_type, help='type of 
matrix to generate '
- '(e.g dense or sparse)', metavar='', 
choices=default_mat_type,
+cparser.add_argument('--mat-type', default=default_mat_type, help='space 
separated list of types of matrix to generate '
+ '(available : dense, sparse)', metavar='', 
choices=default_mat_type,
  nargs='+')
-cparser.add_argument('--mat-shape', default=default_mat_shape, help='shape 
of matrix '
- 'to generate (e.g 10k_1k)', metavar='', nargs='+')
-cparser.add_argument('--temp-dir', default=default_temp_dir, help='specify 
temporary directory',
- metavar='')
-cparser.add_argument('--filename', default='perf_test', help='specify 
output file for the perf'
- ' metics', metavar='')
+cparser.add_argument('--mat-shape', default=default_mat_shape, help='space 
separated list of shapes of matrices '
+ 'to generate (e.g 10k_1k, 20M_4k)', metavar='', 
nargs='+')
+cparser.add_argument('--temp-dir', default=default_temp_dir, 
help='temporary directory '
+'where generated, training and prediction data is 
put', metavar='')
+cparser.add_argument('--filename', default='perf_test', help='name of the 
output file for the perf'
+ ' metrics', metavar='')
 cparser.add_argument('--mode', default=default_workload,
- help='specify type of workload to run (e.g data-gen, 
train, predict)',
+ help='space separated list of types of workloads to 
run (available: data-gen, train, predict)',
  metavar='', choices=default_workload, nargs='+')
 
 # Args is a namespace

systemml git commit: [SYSTEMML-1744] JCuda jars in extra assembly jar

2017-07-06 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 988366de0 -> 66b28c6e3


[SYSTEMML-1744] JCuda jars in extra assembly jar

Closes #559


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/66b28c6e
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/66b28c6e
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/66b28c6e

Branch: refs/heads/master
Commit: 66b28c6e356e894c7e6c21655dab85484bf4840a
Parents: 988366d
Author: Nakul Jindal 
Authored: Thu Jul 6 15:18:35 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Jul 6 15:18:35 2017 -0700

--
 src/assembly/extra.xml | 12 +++
 src/assembly/extra/LICENSE | 48 +
 2 files changed, 60 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/66b28c6e/src/assembly/extra.xml
--
diff --git a/src/assembly/extra.xml b/src/assembly/extra.xml
index 75ca4d7..24c2d87 100644
--- a/src/assembly/extra.xml
+++ b/src/assembly/extra.xml
@@ -50,4 +50,16 @@
.


+
+   
+   
+   
+   
+   org.jcuda:*
+   
+   true
+   compile
+   
+   
+
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/66b28c6e/src/assembly/extra/LICENSE
--
diff --git a/src/assembly/extra/LICENSE b/src/assembly/extra/LICENSE
index bc42b2d..c495849 100644
--- a/src/assembly/extra/LICENSE
+++ b/src/assembly/extra/LICENSE
@@ -460,3 +460,51 @@ Copyright 2017 The TensorFlow Authors.  All rights 
reserved.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
+
+===
+
+The following compile-scope dependencies come under the MIT License
+
+JCuda (jcuda.org)
+
+org.jcuda:jcuda:0.8.0
+org.jcuda:jcublas:0.8.0
+org.jcuda:jcufft:0.8.0
+org.jcuda:jcusparse:0.8.0
+org.jcuda:jcusolver:0.8.0
+org.jcuda:jcurand:0.8.0
+org.jcuda:jnvgraph:0.8.0
+org.jcuda:jcudnn:0.8.0
+org.jcuda:jcuda-natives:0.8.0
+org.jcuda:jcublas-natives:0.8.0
+org.jcuda:jcufft-natives:0.8.0
+org.jcuda:jcusparse-natives:0.8.0
+org.jcuda:jcusolver-natives:0.8.0
+org.jcuda:jcurand-natives:0.8.0
+org.jcuda:jnvgraph-natives:0.8.0
+org.jcuda:jcudnn-natives:0.8.0
+
+
+The MIT License (MIT)
+
+Copyright (c) 2008-2016 Marco Hutter - http://www.jcuda.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+===

systemml git commit: Write output of systemml run from perf test scripts

2017-07-07 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master c5a330d7d -> 152eba1a7


Write output of systemml run from perf test scripts

Closes #561


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/152eba1a
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/152eba1a
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/152eba1a

Branch: refs/heads/master
Commit: 152eba1a7d5de2d34ab97db7d49596b41569aeb5
Parents: c5a330d
Author: Nakul Jindal 
Authored: Fri Jul 7 11:23:17 2017 -0700
Committer: Nakul Jindal 
Committed: Fri Jul 7 11:23:18 2017 -0700

--
 scripts/perftest/python/run_perftest.py | 10 +-
 scripts/perftest/python/utils.py| 18 +-
 2 files changed, 18 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/152eba1a/scripts/perftest/python/run_perftest.py
--
diff --git a/scripts/perftest/python/run_perftest.py 
b/scripts/perftest/python/run_perftest.py
index dcc52c8..b0257d4 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -82,7 +82,7 @@ ML_PREDICT = {'Kmeans': 'Kmeans-predict',
 
 
 # Responsible for execution and metric logging
-def algorithm_workflow(algo, exec_type, config_path, file_name, action_mode):
+def algorithm_workflow(algo, exec_type, config_path, dml_file_name, 
action_mode):
 """
 This function is responsible for overall workflow. This does the following 
actions
 Check if the input is key value argument or list of positional args
@@ -99,7 +99,7 @@ def algorithm_workflow(algo, exec_type, config_path, 
file_name, action_mode):
 config_path : String
 Path to read the json file from
 
-file_name : String
+dml_file_name : String
 DML file name to be used while processing the arguments give
 
 action_mode : String
@@ -116,8 +116,8 @@ def algorithm_workflow(algo, exec_type, config_path, 
file_name, action_mode):
 list_args = ' '.join(config_data)
 args = {'-args': list_args}
 
-folder_name = config_path.split('/')[-1]
-mat_type, mat_shape, intercept = get_folder_metrics(folder_name, 
action_mode)
+config_file_name = config_path.split('/')[-1]
+mat_type, mat_shape, intercept = get_folder_metrics(config_file_name, 
action_mode)
 
 exit_flag_success = get_existence(config_path, action_mode)
 
@@ -125,7 +125,7 @@ def algorithm_workflow(algo, exec_type, config_path, 
file_name, action_mode):
 print('data already exists {}'.format(config_path))
 time = 'data_exists'
 else:
-time = exec_dml_and_parse_time(exec_type, file_name, args)
+time = exec_dml_and_parse_time(exec_type, dml_file_name, 
config_file_name,  args)
 
 # Write a _SUCCESS file only if time is found and in data-gen action_mode
 if len(time.split('.')) == 2 and action_mode == 'data-gen':

http://git-wip-us.apache.org/repos/asf/systemml/blob/152eba1a/scripts/perftest/python/utils.py
--
diff --git a/scripts/perftest/python/utils.py b/scripts/perftest/python/utils.py
index 7ff3b54..464d7f6 100755
--- a/scripts/perftest/python/utils.py
+++ b/scripts/perftest/python/utils.py
@@ -138,7 +138,7 @@ def get_existence(path, action_mode):
 return exist
 
 
-def exec_dml_and_parse_time(exec_type, file_name, args, Time=True):
+def exec_dml_and_parse_time(exec_type, dml_file_name, execution_output_file, 
args, Time=True):
 """
 This function is responsible of execution of input arguments via python 
sub process,
 We also extract time obtained from the output of this subprocess
@@ -146,9 +146,12 @@ def exec_dml_and_parse_time(exec_type, file_name, args, 
Time=True):
 exec_type: String
 Contains the execution type singlenode / hybrid_spark
 
-file_name: String
+dml_file_name: String
 DML file name to be used while processing the arguments give
 
+execution_output_file: String
+Name of the file where the output of the DML run is written out
+
 args: Dictionary
 Key values pairs depending on the arg type
 
@@ -156,7 +159,7 @@ def exec_dml_and_parse_time(exec_type, file_name, args, 
Time=True):
 Boolean argument used to extract time from raw output logs.
 """
 
-algorithm = file_name + '.dml'
+algorithm = dml_file_name + '.dml'
 if exec_type == 'singlenode':
 exec_script = join(os.environ.get('SYSTEMML_HOME'), 'bin', 
'systemml-standalone.py')
 
@@ -189,11 +192,15 @@ def exec_dml_and_parse_time(exec_type, file_name, args, 
Time=True):
 out1, err1 = proc1.communicate()
 
 if "Error" in str(err1):
-print('Error Found in {}'.format(file_name))
+print('Erro

systemml git commit: [MINOR] Performance test bug fixes

2017-07-13 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master f046051d4 -> cd1ae5b42


[MINOR] Performance test bug fixes

Closes #565


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/cd1ae5b4
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/cd1ae5b4
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/cd1ae5b4

Branch: refs/heads/master
Commit: cd1ae5b42499b3b97731de8b28a6d1db9cc9e7f3
Parents: f046051
Author: krishnakalyan3 
Authored: Thu Jul 13 14:28:56 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Jul 13 14:28:56 2017 -0700

--
 scripts/perftest/python/datagen.py  |  27 ---
 scripts/perftest/python/predict.py  |  48 ++--
 scripts/perftest/python/run_perftest.py |  53 -
 scripts/perftest/python/train.py|  40 +-
 scripts/perftest/python/utils.py| 112 +++
 5 files changed, 192 insertions(+), 88 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/cd1ae5b4/scripts/perftest/python/datagen.py
--
diff --git a/scripts/perftest/python/datagen.py 
b/scripts/perftest/python/datagen.py
index d9c49e9..88a71f0 100755
--- a/scripts/perftest/python/datagen.py
+++ b/scripts/perftest/python/datagen.py
@@ -22,7 +22,7 @@
 
 import itertools
 from os.path import join
-from utils import split_rowcol, config_writer
+from utils import split_rowcol, config_writer, mat_type_check
 
 # This file contains configuration settings for data generation
 DATA_FORMAT = 'csv'
@@ -181,8 +181,8 @@ def stats1_datagen(matrix_dim, matrix_type, datagen_dir):
 NC = int(int(col)/2)
 
 config = dict(R=row, C=col, NC=NC, MAXDOMAIN=MAXDOMAIN, DATA=DATA, 
TYPES=TYPES, SETSIZE=SETSIZE,
-  LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, 
INDEX1=INDEX1, INDEX2=INDEX2,
-  fmt=DATA_FORMAT)
+  LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, 
INDEX1=INDEX1,
+  INDEX2=INDEX2, fmt=DATA_FORMAT)
 
 config_writer(full_path + '.json', config)
 
@@ -207,7 +207,7 @@ def stats2_datagen(matrix_dim, matrix_type, datagen_dir):
 return full_path
 
 
-def config_packets_datagen(algo_payload, matrix_type, matrix_shape, 
datagen_dir):
+def config_packets_datagen(algo_payload, matrix_type, matrix_shape, 
datagen_dir, dense_algos):
 """
 This function has two responsibilities. Generate the configuration files 
for
 datagen algorithms and return a dictionary that will be used for execution.
@@ -217,11 +217,17 @@ def config_packets_datagen(algo_payload, matrix_type, 
matrix_shape, datagen_dir)
 family type.
 
 matrix_type: String
-Type of matrix to generate e.g dense or sparse
+Type of matrix to generate e.g dense, sparse, all
 
 matrix_shape: String
 Shape of matrix to generate e.g 100k_10
 
+datagen_dir: String
+Path of the data generation directory
+
+dense_algos: List
+Algorithms that support only dense matrix type
+
 return: Dictionary {string: list}
 This dictionary contains algorithms to be executed as keys and the path of 
configuration
 json files to be executed list of values.
@@ -233,13 +239,10 @@ def config_packets_datagen(algo_payload, matrix_type, 
matrix_shape, datagen_dir)
 
 # Cross Product of all configurations
 for current_family in distinct_families:
-if current_family in FAMILY_NO_MATRIX_TYPE:
-config = list(itertools.product(matrix_shape, ['dense']))
-config_bundle[current_family] = config
-else:
-config = list(itertools.product(matrix_shape, matrix_type))
-# clustering : [[10k_1, dense], [10k_2, dense], ...]
-config_bundle[current_family] = config
+current_matrix_type = mat_type_check(current_family, matrix_type, 
dense_algos)
+config = list(itertools.product(matrix_shape, current_matrix_type))
+# clustering : [[10k_1, dense], [10k_2, dense], ...]
+config_bundle[current_family] = config
 
 config_packets = {}
 for current_family, configs in config_bundle.items():

http://git-wip-us.apache.org/repos/asf/systemml/blob/cd1ae5b4/scripts/perftest/python/predict.py
--
diff --git a/scripts/perftest/python/predict.py 
b/scripts/perftest/python/predict.py
index bc034da..92d3af4 100755
--- a/scripts/perftest/python/predict.py
+++ b/scripts/perftest/python/predict.py
@@ -21,10 +21,8 @@
 #-
 
 import sys
-import os
 from os.path import join
-import glob
-from utils import create_dir, config_writer
+from utils import config_writer, relevant_folders, mat_type_check
 
 # C

[1/2] systemml git commit: [SYSTEML-1758] added cbind and rbind for GPU

2017-07-13 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master cd1ae5b42 -> 4e47b5e10


http://git-wip-us.apache.org/repos/asf/systemml/blob/4e47b5e1/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index f47c15c..17f6b22 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -2466,15 +2466,15 @@ public class LibMatrixCUDA {
/**
 * Performs elementwise arithmetic operation specified by op of two 
input matrices in1 and in2
 *
-* @param ec execution context
-* @param gCtx a valid {@link GPUContext}
-* @param instName the invoking instruction's name for record {@link 
Statistics}.
-* @param in1 input matrix 1
-* @param in2 input matrix 2
-* @param outputName output matrix name
-* @param isLeftTransposed true if left-transposed
+* @param ecexecution context
+* @param gCtx  a valid {@link GPUContext}
+* @param instName  the invoking instruction's name for record 
{@link Statistics}.
+* @param in1   input matrix 1
+* @param in2   input matrix 2
+* @param outputNameoutput matrix name
+* @param isLeftTransposed  true if left-transposed
 * @param isRightTransposed true if right-transposed
-* @param op binary operator
+* @param opbinary operator
 * @throws DMLRuntimeException if DMLRuntimeException occurs
 */
public static void matrixMatrixArithmetic(ExecutionContext ec, 
GPUContext gCtx, String instName, MatrixObject in1, MatrixObject in2,
@@ -2506,13 +2506,14 @@ public class LibMatrixCUDA {
 
/**
 * Utility to do matrix-scalar operation kernel
-* @param gCtx a valid {@link GPUContext}
-* @param instName the invoking instruction's name for record {@link 
Statistics}.
-* @param ec execution context
-* @param in input matrix
-* @param outputName output variable name
+*
+* @param gCtx  a valid {@link GPUContext}
+* @param instName  the invoking instruction's name for record 
{@link Statistics}.
+* @param ecexecution context
+* @param ininput matrix
+* @param outputNameoutput variable name
 * @param isInputTransposed true if input is transposed
-* @param op operator
+* @param opoperator
 * @throws DMLRuntimeException if DMLRuntimeException occurs
 */
private static void matrixScalarOp(ExecutionContext ec, GPUContext 
gCtx, String instName, MatrixObject in, String outputName, boolean 
isInputTransposed,
@@ -2703,9 +2704,9 @@ public class LibMatrixCUDA {
/**
 * Performs a deep device copy of a matrix on the GPU
 *
-* @param ec execution context
-* @param instName the invoking instruction's name for record {@link 
Statistics}.
-* @param src source matrix
+* @param ec execution context
+* @param instName   the invoking instruction's name for record {@link 
Statistics}.
+* @param srcsource matrix
 * @param outputName destination variable name
 * @throws DMLRuntimeException if DMLRuntimeException occurs
 */
@@ -2974,6 +2975,80 @@ public class LibMatrixCUDA {
///
 
 
+   ///
+   // Matrix Manipulation Functions */
+   ///
+
+
+   public static void cbind(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, MatrixObject in2, String outputName) throws 
DMLRuntimeException {
+   if (ec.getGPUContext(0) != gCtx)
+   throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
+   LOG.trace("GPU : cbind" + ", GPUContext=" + gCtx);
+
+   long t1 = 0;
+
+   // only Dense supported
+   MatrixObject out = getDenseMatrixOutputForGPUInstruction(ec, 
instName, outputName);
+   Pointer C = getDensePointer(gCtx, out, instName);
+   Pointer A = getDensePointer(gCtx, in1, instName);
+   Pointer B = getDensePointer(gCtx, in2, instName);
+
+   int rowsA = (int) in1.getNumRows();
+

[2/2] systemml git commit: [SYSTEML-1758] added cbind and rbind for GPU

2017-07-13 Thread nakul02

[SYSTEML-1758] added cbind and rbind for GPU

Closes #570


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/4e47b5e1
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/4e47b5e1
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/4e47b5e1

Branch: refs/heads/master
Commit: 4e47b5e10ff1abdf1ef53c2b1b0d80614ec8e416
Parents: cd1ae5b
Author: Nakul Jindal 
Authored: Thu Jul 13 14:31:47 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Jul 13 14:31:47 2017 -0700

--
 src/main/cpp/kernels/SystemML.cu|   78 +-
 src/main/cpp/kernels/SystemML.ptx   | 1043 ++
 .../java/org/apache/sysml/hops/BinaryOp.java|   21 +-
 src/main/java/org/apache/sysml/lops/Append.java |   95 ++
 .../java/org/apache/sysml/lops/AppendCP.java|   93 --
 .../instructions/CPInstructionParser.java   |4 +-
 .../instructions/GPUInstructionParser.java  |   17 +-
 .../gpu/BuiltinUnaryGPUInstruction.java |2 +-
 .../instructions/gpu/GPUInstruction.java|3 +
 .../gpu/MatrixAppendGPUInstruction.java |  102 ++
 .../runtime/matrix/data/LibMatrixCUDA.java  |  109 +-
 .../org/apache/sysml/test/gpu/AppendTest.java   |  108 ++
 .../test/integration/gpu/ZPackageSuite.java |2 +
 13 files changed, 1099 insertions(+), 578 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/4e47b5e1/src/main/cpp/kernels/SystemML.cu
--
diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu
index 3098282..297269f 100644
--- a/src/main/cpp/kernels/SystemML.cu
+++ b/src/main/cpp/kernels/SystemML.cu
@@ -216,7 +216,7 @@ __global__ void matrix_matrix_cellwise_op(double* A, 
double* B, double* C,
bIndex = iy; // rlen == 1
C[outIndex] = binaryOp(A[aIndex], B[bIndex], op);
//printf("C[%d] = A[%d](%f) B[%d](%f) (%d %d)\n", outIndex, 
aIndex, A[aIndex], bIndex,  B[bIndex], (ix+1), (iy+1));
-__syncthreads();
+   __syncthreads();
}
 }
 
@@ -238,9 +238,9 @@ __global__ void matrix_scalar_op(double* A, double scalar, 
double* C, int size,
C[index] = binaryOp(scalar, A[index], op);
} else {
C[index] = binaryOp(A[index], scalar, op);
-}
+   }
}
-  __syncthreads();
+   __syncthreads();
 }
 
 
@@ -259,6 +259,78 @@ __global__ void fill(double* A, double scalar, int lenA) {
 }
 
 /**
+ * Appends Matrix B to the right side of Matrix A into a new matrix C
+ * | 1 2 3 4 |   | 8 8 8 | | 1 2 3 4 8 8 8 |
+ * cbind ( | 9 8 7 6 | , | 7 7 7 | ) = | 9 8 7 6 7 7 7 |
+ * | 4 3 2 1 |   | 9 9 9 | | 4 3 2 1 9 9 9 |
+ * @param A  input matrix A allocated on the GPU
+ * @param B  input matrix B allocated on the GPU
+ * @param C  input matrix C allocated on the GPU
+ * @param rowsA  rows in A
+ * @param colsA  columns in A
+ * @param rowsB  rows in B
+ * @param colsB  columns in B
+ */
+extern "C"
+__global__ void cbind(double *A, double *B, double *C, int rowsA, int colsA, 
int rowsB, int colsB) {
+   int ix = blockIdx.x * blockDim.x + threadIdx.x;
+   int iy = blockIdx.y * blockDim.y + threadIdx.y;
+
+   int colsC = colsA + colsB;
+   int rowsC = rowsA;
+
+   // Copy an element of A into C into the appropriate location
+   if (ix < rowsA && iy < colsA) {
+   double elemA = A[ix * colsA + iy];
+   C[ix * colsC + iy] = elemA;
+   }
+
+   // Copy an element of B into C into the appropriate location
+   if (ix < rowsB && iy < colsB) {
+   double elemB = B[ix * colsB + iy];
+   C[ix * colsC + (iy + colsA)] = elemB;
+   }
+}
+
+
+/**
+ * Appends Matrix B to the bottom of Matrix A into a new matrix C
+ * | 2 3 4 |   | 8 8 8 | | 2 3 4 |
+ * rbind ( | 8 7 6 | , | 7 7 7 | ) = | 8 7 6 |
+ * | 3 2 1 | | 3 2 1 |
+ | 8 8 8 |
+ | 7 7 7 |
+ * @param A  input matrix A allocated on the GPU
+ * @param B  input matrix B allocated on the GPU
+ * @param C  input matrix C allocated on the GPU
+ * @param rowsA  rows in A
+ * @param colsA  columns in A
+ * @param rowsB  rows in B
+ * @param colsB  columns in B
+ */
+extern "C"
+__global__ void rbind(double *A, double *B, double *C, int rowsA, int colsA, 
int rowsB, int colsB) {
+   int ix = blockIdx.x * blockDim.x + threadIdx.x;
+   int iy = blockIdx.y * blockDim.y + threadIdx.y;
+
+   int rowsC = rowsA + rowsB;
+   int colsC = colsA;
+
+   // Copy an element of A into C into the appropriate location
+   if (ix < rowsA && iy < colsA

systemml git commit: [SYSTEMML-1713] Added mem estimates for various GPU ops

2017-07-13 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 4e47b5e10 -> 32ba9cf9f


[SYSTEMML-1713] Added mem estimates for various GPU ops

Closes #553


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/32ba9cf9
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/32ba9cf9
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/32ba9cf9

Branch: refs/heads/master
Commit: 32ba9cf9fdff2aba7432c7a4e51317b6e5bf1a18
Parents: 4e47b5e
Author: Nakul Jindal 
Authored: Thu Jul 13 15:01:11 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Jul 13 15:01:11 2017 -0700

--
 .../java/org/apache/sysml/hops/AggBinaryOp.java |  57 -
 .../java/org/apache/sysml/hops/AggUnaryOp.java  |  44 +++-
 .../java/org/apache/sysml/hops/BinaryOp.java|  32 ++-
 src/main/java/org/apache/sysml/hops/Hop.java|   4 +-
 .../java/org/apache/sysml/hops/ReorgOp.java |   4 +-
 .../java/org/apache/sysml/hops/TernaryOp.java   |  17 +-
 .../java/org/apache/sysml/hops/UnaryOp.java |  16 +-
 .../instructions/gpu/context/CSRPointer.java|   6 +-
 .../runtime/matrix/data/LibMatrixCUDA.java  | 214 ---
 9 files changed, 279 insertions(+), 115 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/32ba9cf9/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
--
diff --git a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java 
b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
index eb83549..9077976 100644
--- a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
@@ -21,19 +21,19 @@ package org.apache.sysml.hops;
 
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.Hop.MultiThreadedHop;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.lops.Aggregate;
 import org.apache.sysml.lops.Binary;
 import org.apache.sysml.lops.DataPartition;
 import org.apache.sysml.lops.Group;
-import org.apache.sysml.hops.Hop.MultiThreadedHop;
 import org.apache.sysml.lops.Lop;
 import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.lops.LopsException;
 import org.apache.sysml.lops.MMCJ;
+import org.apache.sysml.lops.MMCJ.MMCJType;
 import org.apache.sysml.lops.MMRJ;
 import org.apache.sysml.lops.MMTSJ;
-import org.apache.sysml.lops.MMCJ.MMCJType;
 import org.apache.sysml.lops.MMTSJ.MMTSJType;
 import org.apache.sysml.lops.MMZip;
 import org.apache.sysml.lops.MapMult;
@@ -343,11 +343,48 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
protected double computeIntermediateMemEstimate( long dim1, long dim2, 
long nnz )
{
double ret = 0;
-   
+
+   if (DMLScript.USE_ACCELERATOR) {
+   // In GPU Mode, intermediate memory is only needed in 
case of one of the matrix blocks is sparse
+   // When sparse block is converted to dense and a dense 
MM takes place, we need (dim1 * dim2)
+   // When dense block is converted to sparse and a sparse 
MM takes place, we need (dim1 * dim2 * 2)
+
+   Hop in1 = _input.get(0);
+   Hop in2 = _input.get(1);
+   double in1Sparsity = 
OptimizerUtils.getSparsity(in1.getDim1(), in1.getDim2(), in1.getNnz());
+   double in2Sparsity = 
OptimizerUtils.getSparsity(in2.getDim1(), in2.getDim2(), in2.getNnz());
+
+   boolean in1Sparse = in1Sparsity < 
MatrixBlock.SPARSITY_TURN_POINT;
+   boolean in2Sparse = in2Sparsity < 
MatrixBlock.SPARSITY_TURN_POINT;
+
+   boolean in1UltraSparse = in1Sparsity < 
MatrixBlock.ULTRA_SPARSITY_TURN_POINT;
+   boolean in2UltraSparse = in2Sparsity < 
MatrixBlock.ULTRA_SPARSITY_TURN_POINT;
+
+   // For Matmult X * Y, if X is sparse, Y is dense, X is 
converted to dense
+   // If X is ultrasparse, Y is converted to sparse
+   if (in1Sparse ^ in2Sparse) { // one sparse, one dense
+   if (in1Sparse) {
+   if (in1UltraSparse) {
+   ret += 2 * 
OptimizerUtils.estimateSizeExactSparsity(in2.getDim1(), in2.getDim2(), 
in2.getNnz());
+   } else {
+   ret += 
OptimizerUtils.estimateSizeExactSparsity(in1.getDim1(), in1.getDim2(), 
in1.getNnz());
+   }
+   } else if (in2Sparse) {
+   if

systemml git commit: [MINOR][DOC] Performance Test Documentation

2017-07-13 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 32ba9cf9f -> 61467dab8


[MINOR][DOC] Performance Test Documentation

Closes #563


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/61467dab
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/61467dab
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/61467dab

Branch: refs/heads/master
Commit: 61467dab86fad98e15d0cf529aaea7ba0cd6083f
Parents: 32ba9cf
Author: krishnakalyan3 
Authored: Thu Jul 13 15:04:28 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Jul 13 15:04:28 2017 -0700

--
 docs/img/performance-test/perf_test_arch.png | Bin 0 -> 25831 bytes
 docs/python-performance-test.md  | 129 ++
 2 files changed, 129 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/61467dab/docs/img/performance-test/perf_test_arch.png
--
diff --git a/docs/img/performance-test/perf_test_arch.png 
b/docs/img/performance-test/perf_test_arch.png
new file mode 100644
index 000..4763c8b
Binary files /dev/null and b/docs/img/performance-test/perf_test_arch.png differ

http://git-wip-us.apache.org/repos/asf/systemml/blob/61467dab/docs/python-performance-test.md
--
diff --git a/docs/python-performance-test.md b/docs/python-performance-test.md
new file mode 100644
index 000..c265bc6
--- /dev/null
+++ b/docs/python-performance-test.md
@@ -0,0 +1,129 @@
+# Performance Testing Algorithms User Manual
+
+This user manual contains details on how to conduct automated performance 
tests. Work was mostly done in this 
[PR](https://github.com/apache/systemml/pull/537) and part of 
[SYSTEMML-1451](https://issues.apache.org/jira/browse/SYSTEMML-1451). Our aim 
was to move from existing `bash` based performance tests to automatic `python` 
based automatic performance tests.
+
+### Architecture
+Our performance tests suit contains `7` families namely `binomial`, 
`multinomial`, `stats1`, `stats2`, `regression1`, `regression2`, `clustering`. 
Within these families we have algorithms grouped under it. Typically a family 
is a set of algorithms that require the same data generation script. 
+
+- Exceptions: `regression1`, `regression2` and `binomial`. We decide to 
include these algorithms in separate families to keep the architecture simple.
+
+![System ML Architecture](img/performance-test/perf_test_arch.png)
+
+On a very high level use construct a string with arguments required to run 
each operation. Once this string is constructed we use the subprocess module to 
execute this string and extract time from the standard out. 
+
+We also use `json` module write our configurations to a json file. This ensure 
that our current operation is easy to debug.
+
+
+We have `5` files in performance test suit `run_perftest.py`, `datagen.py`, 
`train.py`, `predict.py` and `utils.py`. 
+
+`datagen.py`, `train.py` and `predict.py` generate a dictionary. Our key is 
the name of algorithm being processed and values is a list with path(s) where 
all the data required is present. We define this dictionary as a configuration 
packet.
+
+We will describe each of them in detail the following sections below.
+
+`run_perftest.py` at a high level creates `algos_to_run` list. This list is 
tuple with key as algorithm and value as family to be executed in our 
performance test.
+
+In `datagen.py` script we have all functions required to generate data. We 
return the required configuration packet as a result of this script, that 
contains key as the `data-gen` script to run and values with location to read 
data-gen json files from.
+
+In `train.py` script we have functions required to generate training output. 
We return the required configuration packet as a result of this script, that 
contains key as the algorithm to run and values with location to read training 
json files from.
+
+The file `predict.py` contains all functions for all algorithms in the 
performance test that contain predict script. We return the required 
configuration packet as a result of this script, that contains key as the 
algorithm to run and values with location to read predict json files from.
+
+In the file `utils.py` we have all the helper functions required in our 
performance test. These functions do operations like write `json` files, 
extract time from std out etc.
+ 
+### Adding New Algorithms
+While adding a new algorithm we need know if it has to be part of the any pre 
existing family. If this algorithm depends on a new data generation script we 
would need to create a new family. Steps below to take below to add a new 
algorithm.
+
+Following changes to `run_perftest.py`:
+
+- Add the algorithm to `ML_ALGO`

systemml git commit: [SYSTEMML-1795] Specify a set of GPUs to use for a given machine

2017-07-21 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 1f5b14dda -> fec209306


[SYSTEMML-1795] Specify a set of GPUs to use for a given machine

Can specify:
a) -1 for all GPUs
b) a specific number of GPU
c) a comma separated list of GPUs
d) a range of GPUs

Closes #587


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/fec20930
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/fec20930
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/fec20930

Branch: refs/heads/master
Commit: fec209306d3c7e55673872f431d43ceda53b7a6c
Parents: 1f5b14d
Author: Nakul Jindal 
Authored: Fri Jul 21 13:55:03 2017 -0700
Committer: Nakul Jindal 
Committed: Fri Jul 21 13:55:04 2017 -0700

--
 conf/SystemML-config.xml.template   |  4 +-
 .../apache/sysml/api/ScriptExecutorUtils.java   |  5 +-
 .../java/org/apache/sysml/conf/DMLConfig.java   |  4 +-
 .../gpu/context/GPUContextPool.java | 90 +---
 .../org/apache/sysml/test/unit/UtilsTest.java   | 78 +
 5 files changed, 160 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/conf/SystemML-config.xml.template
--
diff --git a/conf/SystemML-config.xml.template 
b/conf/SystemML-config.xml.template
index 11e86ed..8608a9c 100644
--- a/conf/SystemML-config.xml.template
+++ b/conf/SystemML-config.xml.template
@@ -78,6 +78,6 @@

false
 
-   
-   -1
+
+-1
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
--
diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java 
b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
index 0e0950e..b094c91 100644
--- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
+++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
@@ -78,9 +78,8 @@ public class ScriptExecutorUtils {
LibMatrixDNN.DISPLAY_STATISTICS = 
dmlconf.getBooleanValue(DMLConfig.EXTRA_DNN_STATS);
DMLScript.FINEGRAINED_STATISTICS = 
dmlconf.getBooleanValue(DMLConfig.EXTRA_FINEGRAINED_STATS);
 
-   // Sets the maximum number of GPUs per process, -1 for all 
available
-   // GPUs
-   GPUContextPool.PER_PROCESS_MAX_GPUS = 
dmlconf.getIntValue(DMLConfig.MAX_GPUS_PER_PROCESS);
+   // Sets the GPUs to use for this process (a range, all GPUs, 
comma separated list or a specific GPU)
+   GPUContextPool.AVAILABLE_GPUS = 
dmlconf.getTextValue(DMLConfig.AVAILABLE_GPUS);
Statistics.startRunTimer();
try {
// run execute (w/ exception handling to ensure proper 
shutdown)

http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/conf/DMLConfig.java
--
diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java 
b/src/main/java/org/apache/sysml/conf/DMLConfig.java
index c248098..a6a4b5e 100644
--- a/src/main/java/org/apache/sysml/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java
@@ -78,7 +78,7 @@ public class DMLConfig
public static final String EXTRA_FINEGRAINED_STATS = 
"systemml.stats.finegrained"; //boolean
public static final String EXTRA_GPU_STATS  = 
"systemml.stats.extraGPU"; //boolean
public static final String EXTRA_DNN_STATS  = 
"systemml.stats.extraDNN"; //boolean
-   public static final String MAX_GPUS_PER_PROCESS = 
"systemml.gpu.perProcessMax"; // boolean, maximum number of gpus to use, -1 for 
all
+   public static final String AVAILABLE_GPUS = 
"systemml.gpu.availableGPUs"; // String to specify which GPUs to use (a range, 
all GPUs, comma separated list or a specific GPU)
 
// Fraction of available memory to use. The available memory is 
computer when the GPUContext is created
// to handle the tradeoff on calling cudaMemGetInfo too often.
@@ -123,7 +123,7 @@ public class DMLConfig
_defaultVals.put(EXTRA_DNN_STATS,"false" );
 
_defaultVals.put(GPU_MEMORY_UTILIZATION_FACTOR,  "0.9" );
-   _defaultVals.put(MAX_GPUS_PER_PROCESS,   "-1");
+   _defaultVals.put(AVAILABLE_GPUS,   "-1");
}

public DMLConfig()

http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions

systemml git commit: [SYSTEMML-1806] fix for DMLConfig#setText

2017-07-26 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 7ae1b1c4c -> 3fd8e495e


[SYSTEMML-1806] fix for DMLConfig#setText


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3fd8e495
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3fd8e495
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3fd8e495

Branch: refs/heads/master
Commit: 3fd8e495e26ba70eed22bf16c51a7bf69474c1c3
Parents: 7ae1b1c
Author: Nakul Jindal 
Authored: Wed Jul 26 14:24:53 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Jul 26 14:24:53 2017 -0700

--
 .../java/org/apache/sysml/conf/DMLConfig.java   | 71 ++--
 .../org/apache/sysml/test/unit/UtilsTest.java   | 60 +
 2 files changed, 97 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/3fd8e495/src/main/java/org/apache/sysml/conf/DMLConfig.java
--
diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java 
b/src/main/java/org/apache/sysml/conf/DMLConfig.java
index a6a4b5e..415bb57 100644
--- a/src/main/java/org/apache/sysml/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java
@@ -22,6 +22,7 @@ package org.apache.sysml.conf;
 import java.io.ByteArrayInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.StringWriter;
 import java.util.HashMap;
 import java.util.Map;
@@ -45,6 +46,7 @@ import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.io.IOUtilFunctions;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
+import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;
 
@@ -97,6 +99,8 @@ public class DMLConfig
 
 private String _fileName = null;
private Element _xmlRoot = null;
+   private DocumentBuilder _documentBuilder = null;
+   private Document _document = null;

static
{
@@ -130,7 +134,7 @@ public class DMLConfig
{

}
-   
+
public DMLConfig(String fileName) 
throws ParseException, FileNotFoundException
{
@@ -169,25 +173,32 @@ public class DMLConfig
 */
private void parseConfig () throws ParserConfigurationException, 
SAXException, IOException 
{
-   DocumentBuilderFactory factory = 
DocumentBuilderFactory.newInstance();
-   factory.setIgnoringComments(true); //ignore XML comments
-   DocumentBuilder builder = factory.newDocumentBuilder();
-   Document domTree = null;
+   DocumentBuilder builder = getDocumentBuilder();
+   _document = null;
if( _fileName.startsWith("hdfs:") || 
_fileName.startsWith("gpfs:")
|| IOUtilFunctions.isObjectStoreFileScheme(new 
Path(_fileName)) )
{
Path configFilePath = new Path(_fileName);
FileSystem DFS = 
IOUtilFunctions.getFileSystem(configFilePath);
-domTree = builder.parse(DFS.open(configFilePath));  
+   _document = builder.parse(DFS.open(configFilePath));
}
else  // config from local file system
{
-   domTree = builder.parse(_fileName);
+   _document = builder.parse(_fileName);
}
-   
-   _xmlRoot = domTree.getDocumentElement();
+
+   _xmlRoot = _document.getDocumentElement();
}
-   
+
+   private DocumentBuilder getDocumentBuilder() throws 
ParserConfigurationException {
+   if (_documentBuilder == null) {
+   DocumentBuilderFactory factory = 
DocumentBuilderFactory.newInstance();
+   factory.setIgnoringComments(true); //ignore XML comments
+   _documentBuilder = factory.newDocumentBuilder();
+   }
+   return _documentBuilder;
+   }
+
/**
 * Method to get string value of a configuration parameter
 * Handles processing of configuration parameters 
@@ -242,21 +253,7 @@ public class DMLConfig
return textVal;
}

-   /**
-* Method to update the string value of an element identified by a tag 
name
-* @param element the DOM element
-* @param tagName the tag name
-* @param newTextValue the new string value
-*/
-   private static void setTextValue(Element element, String tagName, 
String newTextValue) {
-   
-   NodeList list = element.getElementsByTagName(tagName);
-   if (list != nul

systemml git commit: [MINOR] fix for SYSTEMML_1795

2017-07-26 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 3fd8e495e -> 2663ccd41


[MINOR] fix for SYSTEMML_1795

The GPUContextPool.AVAILABLE_GPUS is read after the lops are
constructed, but the value needs to be read before. This patch is a fix
that problem.

Closes #592


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/2663ccd4
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/2663ccd4
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/2663ccd4

Branch: refs/heads/master
Commit: 2663ccd417e59908c3a461adfd217b667b58ea2d
Parents: 3fd8e49
Author: Nakul Jindal 
Authored: Wed Jul 26 15:37:06 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Jul 26 15:37:06 2017 -0700

--
 bin/systemml-standalone.py | 3 ++-
 src/main/java/org/apache/sysml/api/DMLScript.java  | 6 +-
 src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java| 2 --
 .../java/org/apache/sysml/api/mlcontext/ScriptExecutor.java| 4 
 src/main/java/org/apache/sysml/conf/DMLConfig.java | 5 +++--
 .../sysml/runtime/instructions/gpu/context/GPUContextPool.java | 2 +-
 6 files changed, 15 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/2663ccd4/bin/systemml-standalone.py
--
diff --git a/bin/systemml-standalone.py b/bin/systemml-standalone.py
index 367bcdf..a0ee8db 100755
--- a/bin/systemml-standalone.py
+++ b/bin/systemml-standalone.py
@@ -151,7 +151,8 @@ systemml_default_java_opts = \
 '-Xmx8g -Xms4g -Xmn1g ' + \
 '-cp ' + classpath + ' ' + \
 '-Dlog4j.configuration=file:' + log4j_properties_path + ' ' \
-'-Duser.dir=' + user_dir
+'-Duser.dir=' + user_dir 
+#'-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=8111'
 
 
 # Reads in key-value pairs from the conf/systemml-env.sh file

http://git-wip-us.apache.org/repos/asf/systemml/blob/2663ccd4/src/main/java/org/apache/sysml/api/DMLScript.java
--
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java 
b/src/main/java/org/apache/sysml/api/DMLScript.java
index f428aa2..9cb5ebe 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -85,6 +85,7 @@ import 
org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
 import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
 import 
org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
 import org.apache.sysml.runtime.io.IOUtilFunctions;
 import org.apache.sysml.runtime.matrix.CleanupMR;
 import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
@@ -659,13 +660,16 @@ public class DMLScript
//print basic time and environment info
printStartExecInfo( dmlScriptStr );

-   //Step 1: parse configuration files
+   //Step 1: parse configuration files & write any configuration 
specific global variables
DMLConfig dmlconf = 
DMLConfig.readConfigurationFile(fnameOptConfig);
ConfigurationManager.setGlobalConfig(dmlconf);  
CompilerConfig cconf = 
OptimizerUtils.constructCompilerConfig(dmlconf);
ConfigurationManager.setGlobalConfig(cconf);
LOG.debug("\nDML config: \n" + dmlconf.getConfigInfo());
 
+   // Sets the GPUs to use for this process (a range, all GPUs, 
comma separated list or a specific GPU)
+   GPUContextPool.AVAILABLE_GPUS = 
dmlconf.getTextValue(DMLConfig.AVAILABLE_GPUS);
+
//Step 2: set local/remote memory if requested (for compile in 
AM context) 
if( dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER) ){
DMLAppMasterUtils.setupConfigRemoteMaxMemory(dmlconf); 

http://git-wip-us.apache.org/repos/asf/systemml/blob/2663ccd4/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
--
diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java 
b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
index ebbcc21..389f661 100644
--- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
+++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
@@ -79,8 +79,6 @@ public class ScriptExecutorUtils {
LibMatrixDNN.DISPLAY_STATISTICS = 
dmlconf.getBooleanValue(DMLConfig.EXTRA_DNN_STATS);
DMLScript.FINEGRAINED_STATISTIC

[2/2] systemml git commit: [SYSTEMML-1451] phase 2 work

2017-08-01 Thread nakul02

[SYSTEMML-1451] phase 2 work

Completed these tasks as part for Phase 2 for Google Summer of Code '17
- Decouple systemml-spark-submit.py
- Decouple systemml-standalone.py
- Refractor perf test suit to accept args like debug, stats, config etc...
- Add HDFS support
- Google Docs support
- Compare SystemML with previous versions
- Pylint, Comment
- Extra arguments configuration Test
- Windows Test
- Doc update
- systemml standalone comments
- systemml spark submit comments

Closes #575


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e94374af
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e94374af
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e94374af

Branch: refs/heads/master
Commit: e94374afb2e6be5dc81524f9c7a5de09b9f4ba26
Parents: a2db1ad
Author: krishnakalyan3 
Authored: Tue Aug 1 13:46:30 2017 -0700
Committer: Nakul Jindal 
Committed: Tue Aug 1 13:46:30 2017 -0700

--
 bin/systemml-spark-submit.py  | 278 +++
 bin/systemml-standalone.py| 256 +-
 bin/utils.py  | 113 ++
 docs/python-performance-test.md   |  35 +-
 scripts/perftest/python/datagen.py| 141 
 scripts/perftest/python/google_docs/stats.py  | 113 ++
 scripts/perftest/python/google_docs/update.py | 110 ++
 scripts/perftest/python/predict.py| 156 -
 scripts/perftest/python/run_perftest.py   | 135 ---
 scripts/perftest/python/train.py  | 257 +++---
 scripts/perftest/python/utils.py  | 390 -
 scripts/perftest/python/utils_exec.py | 137 
 scripts/perftest/python/utils_fs.py   | 162 +
 scripts/perftest/python/utils_misc.py | 347 ++
 14 files changed, 1580 insertions(+), 1050 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/e94374af/bin/systemml-spark-submit.py
--
diff --git a/bin/systemml-spark-submit.py b/bin/systemml-spark-submit.py
index 30974ec..b6426b3 100755
--- a/bin/systemml-spark-submit.py
+++ b/bin/systemml-spark-submit.py
@@ -21,167 +21,131 @@
 # -
 
 import os
-import sys
-from os.path import join, exists, abspath
-from os import environ
 import glob
-import argparse
-import shutil
+from os.path import join
 import platform
-
-if environ.get('SPARK_HOME') is None:
-print('SPARK_HOME not set')
-sys.exit(1)
-else:
-spark_home = environ.get('SPARK_HOME')
+import argparse
+from utils import get_env_systemml_home, get_env_spark_home, find_dml_file, 
log4j_path, config_path
+
+
+def default_jars(systemml_home):
+"""
+return: String
+Location of systemml and jcuda jars
+"""
+build_dir = join(systemml_home, 'target')
+lib_dir = join(build_dir, 'lib')
+systemml_jar = build_dir + os.sep + "SystemML.jar"
+jcuda_jars = glob.glob(lib_dir + os.sep + "jcu*.jar")
+target_jars = ','.join(jcuda_jars)
+return target_jars, systemml_jar
+
+
+def spark_submit_entry(master, driver_memory, num_executors, executor_memory,
+   executor_cores, conf,
+   nvargs, args, config, explain, debug, stats, gpu, f):
+"""
+This function is responsible for the execution of arguments via
+subprocess call in hybrid_spark mode
+"""
+
+spark_home = get_env_spark_home()
+systemml_home = get_env_systemml_home()
 spark_path = join(spark_home, 'bin', 'spark-submit')
+script_file = find_dml_file(systemml_home, f)
 
+# Jars
+cuda_jars, systemml_jars = default_jars(systemml_home)
 
-# error help print
-def print_usage_and_exit():
-print('Usage: ./systemml-spark-submit.py -f  [arguments]')
-sys.exit(1)
-
-cparser = argparse.ArgumentParser(description='System-ML Spark Submit Script')
-
-# SPARK-SUBMIT Options
-cparser.add_argument('--master', default='local[*]', help='local, yarn-client, 
yarn-cluster', metavar='')
-cparser.add_argument('--driver-memory', default='5G', help='Memory for driver 
(e.g. 512M)', metavar='')
-cparser.add_argument('--num-executors', default='2', help='Number of executors 
to launch', metavar='')
-cparser.add_argument('--executor-memory', default='2G', help='Memory per 
executor', metavar='')
-cparser.add_argument('--executor-cores', default='1', help='Number of cores', 
metavar='')
-cparser.add_argument('--conf', help='Spark configuration file', nargs='+', 
metavar='')
-
-# SYSTEM-ML Options
-cparser.add_argument('-nvargs', help='List of attributeName-attributeValue 
pairs', nargs='+', metavar='')
-cparser.add_argument('-args', help='List of positional argument values', 
metava

[1/2] systemml git commit: [SYSTEMML-1451] phase 2 work

2017-08-01 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master a2db1ad89 -> e94374afb


http://git-wip-us.apache.org/repos/asf/systemml/blob/e94374af/scripts/perftest/python/train.py
--
diff --git a/scripts/perftest/python/train.py b/scripts/perftest/python/train.py
index 627ba03..ec784d7 100755
--- a/scripts/perftest/python/train.py
+++ b/scripts/perftest/python/train.py
@@ -22,14 +22,18 @@
 
 import sys
 from os.path import join
-from utils import config_writer, relevant_folders, mat_type_check
+from utils_misc import config_writer, mat_type_check
 from functools import reduce
+from utils_fs import relevant_folders
 
 # Contains configuration setting for training
 DATA_FORMAT = 'csv'
 
 
-def binomial_m_svm_train(save_folder_name, datagen_dir, train_dir):
+def binomial_m_svm_train(save_folder_name, datagen_dir, train_dir, config_dir):
+
+save_path = join(config_dir, save_folder_name)
+train_write = join(train_dir, save_folder_name)
 
 data_folders = []
 for i in [0, 1]:
@@ -39,21 +43,19 @@ def binomial_m_svm_train(save_folder_name, datagen_dir, 
train_dir):
 maxiter = 20
 X = join(datagen_dir, 'X.data')
 Y = join(datagen_dir, 'Y.data')
-
-full_path_train = join(train_dir, save_folder_name + '.' + str(i))
-data_folders.append(full_path_train)
-
-model = join(full_path_train, 'model.data')
-Log = join(full_path_train, 'Log.data')
-
+model = join(train_write + '.' + str(i), 'model.data')
+Log = join(train_write + '.' + str(i), 'Log.data')
 config = dict(X=X, Y=Y, icpt=icpt, classes=2, reg=reg, tol=tol, 
maxiter=maxiter,
   model=model, Log=Log, fmt=DATA_FORMAT)
-config_writer(full_path_train + '.json', config)
+config_writer(save_path + '.' + str(i) + '.json', config)
+data_folders.append(save_path + '.' + str(i))
 
 return data_folders
 
 
-def binomial_l2_svm_train(save_folder_name, datagen_dir, train_dir):
+def binomial_l2_svm_train(save_folder_name, datagen_dir, train_dir, 
config_dir):
+save_path = join(config_dir, save_folder_name)
+train_write = join(train_dir, save_folder_name)
 
 data_folders = []
 for i in [0, 1]:
@@ -63,23 +65,21 @@ def binomial_l2_svm_train(save_folder_name, datagen_dir, 
train_dir):
 maxiter = '100'
 X = join(datagen_dir, 'X.data')
 Y = join(datagen_dir, 'Y.data')
-
-full_path_train = join(train_dir, save_folder_name + '.' + str(i))
-data_folders.append(full_path_train)
-
-model = join(full_path_train, 'model.data')
-Log = join(full_path_train, 'Log.data')
-
+model = join(train_write + '.' + str(i), 'model.data')
+Log = join(train_write + '.' + str(i), 'Log.data')
 config = dict(X=X, Y=Y, icpt=icpt, reg=reg, tol=tol, maxiter=maxiter, 
model=model,
   Log=Log, fmt=DATA_FORMAT)
-config_writer(full_path_train + '.json', config)
+config_writer(save_path + '.' + str(i) + '.json', config)
+data_folders.append(save_path + '.' + str(i))
 
 return data_folders
 
 
-def binomial_multilogreg_train(save_folder_name, datagen_dir, train_dir):
-data_folders = []
+def binomial_multilogreg_train(save_folder_name, datagen_dir, train_dir, 
config_dir):
+save_path = join(config_dir, save_folder_name)
+train_write = join(train_dir, save_folder_name)
 
+data_folders = []
 for i in [0, 1, 2]:
 icpt = str(i)
 reg = '0.01'
@@ -88,125 +88,117 @@ def binomial_multilogreg_train(save_folder_name, 
datagen_dir, train_dir):
 mii = '5'
 X = join(datagen_dir, 'X.data')
 Y = join(datagen_dir, 'Y.data')
-
-full_path_train = join(train_dir, save_folder_name + '.' + str(i))
-data_folders.append(full_path_train)
-
-B = join(full_path_train, 'B.data')
-
+B = join(train_write + '.' + str(i), 'B.data')
 config = dict(X=X, Y=Y, icpt=icpt, reg=reg, tol=tol, moi=moi, mii=mii,
   B=B)
-config_writer(full_path_train + '.json', config)
-return data_folders
-
-
-def multinomial_m_svm_train(save_folder_name, datagen_dir, train_dir):
-
-data_folders = []
-for i in [0, 1]:
-icpt = str(i)
-reg = '0.01'
-tol = '0.0001'
-maxiter = '20'
-X = join(datagen_dir, 'X.data')
-Y = join(datagen_dir, 'Y.data')
-
-full_path_train = join(train_dir, save_folder_name + '.' + str(i))
-model = join(full_path_train, 'model.data')
-Log = join(full_path_train, 'Log.data')
-
-config = dict(X=X, Y=Y, icpt=icpt, classes=150, reg=reg, tol=tol, 
maxiter=maxiter,
-  model=model, Log=Log, fmt=DATA_FORMAT)
-config_writer(full_path_train + '.json', config)
-data_folders.append(full_path_train)
+config_writer(save_path + '.' + str(i) + '.json', config)
+

systemml git commit: [MINOR] bug fixes in the GPU backend

2017-08-08 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 98a9d653d -> 815ca4f2a


[MINOR] bug fixes in the GPU backend

- Each thread is assigned a cuda library handle
- JCudaKernels is also made thread safe
- Removed setting GPUContext to null
- Bug fix in initial gpu budget estimate
- Cuda Kernels use blockId.x and threadId.x only

Closes #607


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/815ca4f2
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/815ca4f2
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/815ca4f2

Branch: refs/heads/master
Commit: 815ca4f2aedcbe491d10a873db99a9b5e6f29226
Parents: 98a9d65
Author: Nakul Jindal 
Authored: Tue Aug 8 13:29:11 2017 -0700
Committer: Nakul Jindal 
Committed: Tue Aug 8 13:29:11 2017 -0700

--
 src/main/cpp/kernels/SystemML.cu|  54 +--
 src/main/cpp/kernels/SystemML.ptx   | 333 +--
 .../controlprogram/ParForProgramBlock.java  |   3 -
 .../controlprogram/parfor/LocalParWorker.java   |  12 +-
 .../cp/FunctionCallCPInstruction.java   |   7 -
 .../gpu/context/ExecutionConfig.java|  26 +-
 .../instructions/gpu/context/GPUContext.java|  94 +++---
 .../gpu/context/GPUContextPool.java |   2 +-
 .../instructions/gpu/context/JCudaKernels.java  |   5 +-
 .../org/apache/sysml/test/gpu/GPUTests.java |  18 +
 .../test/gpu/MatrixMultiplicationOpTest.java|   1 +
 11 files changed, 303 insertions(+), 252 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/815ca4f2/src/main/cpp/kernels/SystemML.cu
--
diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu
index 297269f..dcd64b2 100644
--- a/src/main/cpp/kernels/SystemML.cu
+++ b/src/main/cpp/kernels/SystemML.cu
@@ -35,12 +35,13 @@ nvcc -ptx -arch=sm_30 SystemML.cu
  */
 extern "C"
 __global__ void copy_u2l_dense(double* ret, int dim, int N) {
-   int ix = blockIdx.x * blockDim.x + threadIdx.x;
-   int iy = blockIdx.y * blockDim.y + threadIdx.y;
+   int tid = blockIdx.x * blockDim.x + threadIdx.x;
+   int ix = tid / dim;
+   int iy = tid % dim;
int id_dest = iy * dim + ix;
if(iy > ix && id_dest < N) {
// TODO: Potential to reduce the number of threads by half
-   int id_src = ix * dim + iy;
+   int id_src = tid;
ret[id_dest] = ret[id_src];
}
 }
@@ -104,8 +105,9 @@ __forceinline__ __device__ double binaryOp(double x, double 
y, int op) {
 
 extern "C"
 __global__ void relu(double* A,  double* ret, int rlen, int clen) {
-   int ix = blockIdx.x * blockDim.x + threadIdx.x;
-   int iy = blockIdx.y * blockDim.y + threadIdx.y;
+   int tid = blockIdx.x * blockDim.x + threadIdx.x;
+   int ix = tid / clen;
+   int iy = tid % clen;
if(ix < rlen && iy < clen) {
int index = ix * clen + iy;
ret[index] = max(0.0, A[index]);
@@ -115,8 +117,9 @@ __global__ void relu(double* A,  double* ret, int rlen, int 
clen) {
 // This method computes the backpropagation errors for previous layer of relu 
operation
 extern "C"
 __global__ void relu_backward(double* X,  double* dout, double* ret, int rlen, 
int clen) {
-   int ix = blockIdx.x * blockDim.x + threadIdx.x;
-   int iy = blockIdx.y * blockDim.y + threadIdx.y;
+   int tid = blockIdx.x * blockDim.x + threadIdx.x;
+   int ix = tid / clen;
+   int iy = tid % clen;
if(ix < rlen && iy < clen) {
int index = ix * clen + iy;
ret[index] = X[index] > 0 ?  dout[index] : 0;
@@ -129,8 +132,9 @@ __global__ void relu_backward(double* X,  double* dout, 
double* ret, int rlen, i
 // This operation is often followed by conv2d and hence we have introduced 
bias_add(input, bias) built-in function
 extern "C"
 __global__ void bias_add(double* input,  double* bias, double* ret, int rlen, 
int clen, int PQ) {
-   int ix = blockIdx.x * blockDim.x + threadIdx.x;
-   int iy = blockIdx.y * blockDim.y + threadIdx.y;
+   int tid = blockIdx.x * blockDim.x + threadIdx.x;
+   int ix = tid / clen;
+   int iy = tid % clen;
if(ix < rlen && iy < clen) {
int index = ix * clen + iy;
int biasIndex = iy / PQ;
@@ -141,8 +145,9 @@ __global__ void bias_add(double* input,  double* bias, 
double* ret, int rlen, in
 // Performs the operation "ret <- A + alpha*B", where B is a vector
 extern "C"
 __global__ void daxpy_matrix_vector(double* A,  double* B, double alpha, 
double* ret, int rlenA, int clenA, int rlenB, int clenB) {
-   int ix = blockIdx.x * blockDim.x + threadIdx.x;
-   int iy = blockIdx.y * blockDim.y + threadIdx.y;
+

systemml git commit: [SYSTEMML-1816] toString does not print negative 0s anymore

2017-08-08 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master e1a762f65 -> 5906682b0


[SYSTEMML-1816] toString does not print negative 0s anymore

Closes #599


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/5906682b
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/5906682b
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/5906682b

Branch: refs/heads/master
Commit: 5906682b0f328a8179c66f960cedb6e68fb8a0e1
Parents: e1a762f
Author: Nakul Jindal 
Authored: Fri Jul 28 17:07:46 2017 -0700
Committer: Nakul Jindal 
Committed: Tue Aug 8 22:06:51 2017 -0700

--
 .../java/org/apache/sysml/runtime/util/DataConverter.java| 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/5906682b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
--
diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java 
b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
index 10f043b..a758b4d 100644
--- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
@@ -862,11 +862,15 @@ public class DataConverter
else {  // Dense Print Format
for (int i=0; i

systemml git commit: [HOTFIX] write stdout and stderr for perftests

2017-08-12 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 95de23586 -> dc4bfd95e


[HOTFIX] write stdout and stderr for perftests

Closes #615


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/dc4bfd95
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/dc4bfd95
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/dc4bfd95

Branch: refs/heads/master
Commit: dc4bfd95e893f924d80ca3af25c101495134fa77
Parents: 95de235
Author: krishnakalyan3 
Authored: Sat Aug 12 12:31:26 2017 -0700
Committer: Nakul Jindal 
Committed: Sat Aug 12 12:32:16 2017 -0700

--
 scripts/perftest/python/run_perftest.py |  2 +-
 scripts/perftest/python/utils_exec.py   | 19 +--
 scripts/perftest/python/utils_misc.py   | 10 +-
 3 files changed, 23 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/dc4bfd95/scripts/perftest/python/run_perftest.py
--
diff --git a/scripts/perftest/python/run_perftest.py 
b/scripts/perftest/python/run_perftest.py
index a15d7e6..d430569 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -134,7 +134,7 @@ def algorithm_workflow(algo, exec_type, config_path, 
dml_file_name, action_mode,
 if exit_flag_success:
 time = 'data_exists'
 else:
-time = exec_dml_and_parse_time(exec_type, dml_file_name, args, 
spark_args_dict, sup_args_dict)
+time = exec_dml_and_parse_time(exec_type, dml_file_name, args, 
spark_args_dict, sup_args_dict, config_path)
 write_success(time, temp_cwd)
 
 print('{},{},{},{},{},{}'.format(algo, action_mode, intercept, mat_type, 
mat_shape, time))

http://git-wip-us.apache.org/repos/asf/systemml/blob/dc4bfd95/scripts/perftest/python/utils_exec.py
--
diff --git a/scripts/perftest/python/utils_exec.py 
b/scripts/perftest/python/utils_exec.py
index 0eb2873..87ae3cd 100755
--- a/scripts/perftest/python/utils_exec.py
+++ b/scripts/perftest/python/utils_exec.py
@@ -27,7 +27,7 @@ import re
 # Subprocess and log parsing related functions
 
 
-def subprocess_exec(cmd_string, extract=None):
+def subprocess_exec(cmd_string, log_file_path=None, extract=None):
 """
 Execute the input string as subprocess
 
@@ -38,18 +38,25 @@ def subprocess_exec(cmd_string, extract=None):
 Based on extract as time/dir we extract this information from
 the logs accordingly
 
+log_file_path: String
+Path to write the log file
+
 return: String
 Based on extract we return the relevant string
 """
 # Debug
 # print(cmd_string)
-proc1 = subprocess.Popen(shlex.split(cmd_string), stdout=subprocess.PIPE,
+exec_command = shlex.split(cmd_string)
+proc1 = subprocess.Popen(exec_command, stdout=subprocess.PIPE,
  stderr=subprocess.PIPE)
 
 error_arr, out_arr = get_all_logs(proc1)
 std_outs = out_arr + error_arr
 return_code = proc1.returncode
 
+if log_file_path is not None:
+write_logs(std_outs, log_file_path + '.log')
+
 if return_code == 0:
 if extract == 'time':
 return_data = parse_time(std_outs)
@@ -65,6 +72,14 @@ def subprocess_exec(cmd_string, extract=None):
 return return_data
 
 
+def write_logs(std_outs, log_file_path):
+"""
+Write all logs to the specified location
+"""
+with open(log_file_path, 'w')as log:
+log.write("\n".join(std_outs))
+
+
 def get_all_logs(process):
 """
 Based on the subprocess capture logs

http://git-wip-us.apache.org/repos/asf/systemml/blob/dc4bfd95/scripts/perftest/python/utils_misc.py
--
diff --git a/scripts/perftest/python/utils_misc.py 
b/scripts/perftest/python/utils_misc.py
index 0a765f6..e247ce8 100755
--- a/scripts/perftest/python/utils_misc.py
+++ b/scripts/perftest/python/utils_misc.py
@@ -166,7 +166,7 @@ def config_reader(read_path):
 return conf_file
 
 
-def exec_dml_and_parse_time(exec_type, dml_file_name, args, spark_args_dict, 
sup_args_dict):
+def exec_dml_and_parse_time(exec_type, dml_file_name, args, spark_args_dict, 
sup_args_dict, log_file_name=None):
 """
 This function is responsible of execution of input arguments via python 
sub process,
 We also extract time obtained from the output of this subprocess
@@ -186,6 +186,9 @@ def exec_dml_and_parse_time(exec_type, dml_file_name, args, 
spark_args_dict, sup
 sup_args_dict: Dictionary
 Supplementary arguments required by the script
 
+log_file_name: String
+Path to write the logfile
+
 return: String
 The value of time parsed from the logs / error
 """

systemml git commit: [DOC][HOTFIX] updatest to the performance test scripts

2017-08-14 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 54e809898 -> 667aeb2b7


[DOC][HOTFIX] updatest to the performance test scripts

Closes #616


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/667aeb2b
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/667aeb2b
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/667aeb2b

Branch: refs/heads/master
Commit: 667aeb2b7f38b76b1ff85138426f215a03a4dfc4
Parents: 54e8098
Author: krishnakalyan3 
Authored: Mon Aug 14 15:18:50 2017 -0700
Committer: Nakul Jindal 
Committed: Mon Aug 14 15:18:50 2017 -0700

--
 docs/python-performance-test.md | 15 ++-
 scripts/perftest/python/utils_fs.py |  4 ++--
 2 files changed, 16 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/667aeb2b/docs/python-performance-test.md
--
diff --git a/docs/python-performance-test.md b/docs/python-performance-test.md
index 3d29f01..ce36c2d 100644
--- a/docs/python-performance-test.md
+++ b/docs/python-performance-test.md
@@ -177,7 +177,20 @@ In the example above `--tag` can be a major/minor systemml 
version and `--auth`
 Currently we only support time difference between algorithms in different 
versions. This can be obtained by running the script below
 `./stats.py --auth client_json.json --exec-mode singlenode --tags 1.0 2.0`
 
-Note: Please pip install `https://github.com/burnash/gspread` to use google 
docs client.
+We pass different `matrix shapes` using `--mat-shape` argument.
+
+Matrix Shape | Approximate Data Size 
+--- | --- |
+10k_1k|80MB
+100k_1k|800MB
+1M_1k|8GB
+10M_1k|80GB
+100M_1k|800GB
+
+For example the command below runs performance test for all data sizes 
described above
+`run_perftest.py --family binomial clustering multinomial regression1 
regression2 stats1 stats2 --mat-shape 10k_1k 100k_1k 1M_1k 10M_1k 100M_1k 
--master yarn-client  --temp-dir hdfs://localhost:9000/user/systemml`
+
+Note: Please use this command `pip3 install -r requirements.txt` before using 
the perftest scripts.
 
 
 ## Troubleshooting

http://git-wip-us.apache.org/repos/asf/systemml/blob/667aeb2b/scripts/perftest/python/utils_fs.py
--
diff --git a/scripts/perftest/python/utils_fs.py 
b/scripts/perftest/python/utils_fs.py
index 977c4f4..7e04907 100755
--- a/scripts/perftest/python/utils_fs.py
+++ b/scripts/perftest/python/utils_fs.py
@@ -134,12 +134,12 @@ def relevant_folders(path, algo, family, matrix_type, 
matrix_shape, mode):
 if mode == 'data-gen':
 sub_folder_name = '.'.join([family, current_matrix_type, 
current_matrix_shape])
 cmd = ['hdfs', 'dfs', '-ls', path]
-path_subdir = subprocess_exec(' '.join(cmd), 'dir')
+path_subdir = subprocess_exec(' '.join(cmd), extract='dir')
 
 if mode == 'train':
 sub_folder_name = '.'.join([algo, family, 
current_matrix_type, current_matrix_shape])
 cmd = ['hdfs', 'dfs', '-ls', path]
-path_subdir = subprocess_exec(' '.join(cmd), 'dir')
+path_subdir = subprocess_exec(' '.join(cmd), extract='dir')
 
 path_folders = list(filter(lambda x: contains_dir(x, 
sub_folder_name), path_subdir))

systemml git commit: [MINOR] updates to performance scripts

2017-08-16 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master a2bf0006f -> ce240af57


[MINOR] updates to performance scripts

Closes #618


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ce240af5
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ce240af5
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ce240af5

Branch: refs/heads/master
Commit: ce240af57fb68caa3a978a8bad62701cb55a139d
Parents: a2bf000
Author: Nakul Jindal 
Authored: Wed Aug 16 11:14:43 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Aug 16 11:14:43 2017 -0700

--
 bin/systemml-standalone.py  |  10 ++-
 scripts/perftest/python/run_perftest.py |  26 +++
 scripts/perftest/python/utils_exec.py   |   1 +
 scripts/perftest/python/utils_misc.py   | 104 +++
 4 files changed, 82 insertions(+), 59 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/ce240af5/bin/systemml-standalone.py
--
diff --git a/bin/systemml-standalone.py b/bin/systemml-standalone.py
index 4000e75..02aefcf 100755
--- a/bin/systemml-standalone.py
+++ b/bin/systemml-standalone.py
@@ -43,7 +43,7 @@ def default_classpath(systemml_home):
 
 #TODO
 # User dir, fix for SYSTEMML_1795
-def standalone_execution_entry(nvargs, args, config, explain, debug, stats, 
gpu, f):
+def standalone_execution_entry(nvargs, args, config, explain, debug, stats, 
gpu, heapmem, f):
 """
 This function is responsible for the execution of arguments via
 subprocess call in singlenode mode
@@ -57,7 +57,7 @@ def standalone_execution_entry(nvargs, args, config, explain, 
debug, stats, gpu,
 else:
 default_cp = ':'.join(default_classpath(systemml_home))
 
-java_memory = '-Xmx8g -Xms4g -Xmn1g'
+java_memory = '-Xmx' + heapmem + ' -Xms4g -Xmn1g'
 
 # Log4j
 log4j = log4j_path(systemml_home)
@@ -93,7 +93,10 @@ def standalone_execution_entry(nvargs, args, config, 
explain, debug, stats, gpu,
'-f', script_file, '-exec', 'singlenode', '-config', default_config,
' '.join(ml_options)]
 
-return_code = os.system(' '.join(cmd))
+cmd = ' '.join(cmd)
+print(cmd)
+
+return_code = os.system(cmd)
 return return_code
 
 
@@ -115,6 +118,7 @@ if __name__ == '__main__':
 cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, 
'
   'set  option to skip conservative 
memory estimates '
   'and use GPU wherever possible', 
nargs='?')
+cparser.add_argument('-heapmem', help='maximum JVM heap memory', 
metavar='', default='8g')
 cparser.add_argument('-f', required=True, help='specifies dml/pydml file 
to execute; '
'path can be 
local/hdfs/gpfs', metavar='')
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/ce240af5/scripts/perftest/python/run_perftest.py
--
diff --git a/scripts/perftest/python/run_perftest.py 
b/scripts/perftest/python/run_perftest.py
index d430569..8c3d1fa 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -32,8 +32,7 @@ from datagen import config_packets_datagen
 from train import config_packets_train
 from predict import config_packets_predict
 from utils_misc import get_families, config_reader, \
-exec_dml_and_parse_time, exec_test_data, check_predict, 
get_folder_metrics, args_dict_split, \
-get_config_args
+exec_dml_and_parse_time, exec_test_data, check_predict, 
get_folder_metrics, split_config_args
 from utils_fs import create_dir_local, write_success, check_SUCCESS_file_exists
 
 # A packet is a dictionary
@@ -84,8 +83,6 @@ ML_PREDICT = {'Kmeans': 'Kmeans-predict',
 
 DENSE_TYPE_ALGOS = ['clustering', 'stats1', 'stats2']
 
-sup_args_dict = {}
-
 
 # Responsible for execution and metric logging
 def algorithm_workflow(algo, exec_type, config_path, dml_file_name, 
action_mode, current_dir):
@@ -134,7 +131,7 @@ def algorithm_workflow(algo, exec_type, config_path, 
dml_file_name, action_mode,
 if exit_flag_success:
 time = 'data_exists'
 else:
-time = exec_dml_and_parse_time(exec_type, dml_file_name, args, 
spark_args_dict, sup_args_dict, config_path)
+time = exec_dml_and_parse_time(exec_type, dml_file_name, args, 
backend_args_dict, systemml_args_dict, config_path)
 write_success(time, temp_cwd)
 
 print('{},{},{},{},{},{}'.format(algo, action_mode, intercept, mat_type, 
mat_shape, time))
@@ -222,7 +219,7 @@ def perf_test_entry(family, algo, exec_type, mat_type, 
mat_shape, config_dir, mo
 # Statistic family do not require to be s

systemml git commit: [MINOR] fixes for HDFS path

2017-08-17 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 4384ebbda -> 114200724


[MINOR] fixes for HDFS path

Closes #624


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/11420072
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/11420072
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/11420072

Branch: refs/heads/master
Commit: 11420072412c0c873b72267d1e9764c87abc57b4
Parents: 4384ebb
Author: krishnakalyan3 
Authored: Thu Aug 17 11:43:49 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Aug 17 11:43:49 2017 -0700

--
 bin/utils.py|  9 +++
 scripts/perftest/python/run_perftest.py | 17 +++---
 scripts/perftest/python/utils_exec.py   | 19 ++-
 scripts/perftest/python/utils_fs.py | 11 +
 scripts/perftest/python/utils_misc.py   | 35 
 5 files changed, 78 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/bin/utils.py
--
diff --git a/bin/utils.py b/bin/utils.py
index 6f40881..cf17960 100644
--- a/bin/utils.py
+++ b/bin/utils.py
@@ -74,15 +74,16 @@ def find_dml_file(systemml_home, script_file):
 Location of the dml script
 """
 scripts_dir = join(systemml_home, 'scripts')
-if not (exists(script_file)):
-script_file = find_file(script_file, scripts_dir)
-if script_file is None:
+if not exists(script_file):
+script_file_path = find_file(script_file, scripts_dir)
+if script_file_path is not None:
+return script_file_path
+else:
 print('Could not find DML script: ' + script_file)
 sys.exit()
 
 return script_file
 
-
 def log4j_path(systemml_home):
 """
 Create log4j.properties from the template if not exist

http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/run_perftest.py
--
diff --git a/scripts/perftest/python/run_perftest.py 
b/scripts/perftest/python/run_perftest.py
index 8c3d1fa..20f5380 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -32,7 +32,8 @@ from datagen import config_packets_datagen
 from train import config_packets_train
 from predict import config_packets_predict
 from utils_misc import get_families, config_reader, \
-exec_dml_and_parse_time, exec_test_data, check_predict, 
get_folder_metrics, split_config_args
+exec_dml_and_parse_time, exec_test_data, check_predict, 
get_folder_metrics, split_config_args, \
+get_default_dir
 from utils_fs import create_dir_local, write_success, check_SUCCESS_file_exists
 
 # A packet is a dictionary
@@ -275,7 +276,7 @@ if __name__ == '__main__':
 default_mat_shape = ['10k_100']
 
 # Default temp directory, contains everything generated in perftest
-default_temp_dir = join(systemml_home, 'scripts', 'perftest', 'temp')
+default_config_dir = join(systemml_home, 'scripts', 'perftest', 'temp')
 
 # Initialize time
 start_time = time.time()
@@ -308,7 +309,7 @@ if __name__ == '__main__':
 cparser.add_argument('--mat-shape', default=default_mat_shape, help='space 
separated list of shapes of matrices '
  'to generate (e.g 10k_1k, 20M_4k)', metavar='', 
nargs='+')
 
-cparser.add_argument('--config-dir', default=default_temp_dir, 
help='temporary directory '
+cparser.add_argument('--config-dir', default=default_config_dir, 
help='temporary directory '
  'where generated, training and prediction data is 
put', metavar='')
 cparser.add_argument('--filename', default='perf_test', help='name of the 
output file for the perf'
  ' metrics', metavar='')
@@ -316,8 +317,7 @@ if __name__ == '__main__':
  help='space separated list of types of workloads to 
run (available: data-gen, train, predict)',
  metavar='', choices=workload, nargs='+')
 # Change this to temp-dir
-cparser.add_argument('--temp-dir', default=default_temp_dir,
- help='define the file system to work on', metavar='')
+cparser.add_argument('--temp-dir', help='define the file system to work 
on', metavar='')
 
 # Configuration Options
 cparser.add_argument('-stats', help='Monitor and report 
caching/recompilation statistics, '
@@ -350,8 +350,8 @@ if __name__ == '__main__':
 # Global variables
 perftest_args_dict, systemml_args_dict, backend_args_dict = 
split_config_args(all_arg_dict)
 
-# Debug arguments
-# print(arg_dict)
+# temp_dir hdfs / local path check
+perftest_args_dict['temp_di

systemml git commit: [MINOR] Print statistics to stderr if an error has occured

2017-08-25 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 1d83cedb7 -> ac0416883


[MINOR] Print statistics to stderr if an error has occured

Closes #631


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ac041688
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ac041688
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ac041688

Branch: refs/heads/master
Commit: ac04168836cc68f9af940c08baccab575c7e2cb3
Parents: 1d83ced
Author: Nakul Jindal 
Authored: Fri Aug 25 17:08:44 2017 -0700
Committer: Nakul Jindal 
Committed: Fri Aug 25 17:08:44 2017 -0700

--
 .../apache/sysml/api/ScriptExecutorUtils.java   | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/ac041688/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
--
diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java 
b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
index 5f9c0a2..09897a5 100644
--- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
+++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
@@ -80,6 +80,8 @@ public class ScriptExecutorUtils {
DMLScript.FINEGRAINED_STATISTICS = 
dmlconf.getBooleanValue(DMLConfig.EXTRA_FINEGRAINED_STATS);
DMLScript.STATISTICS_MAX_WRAP_LEN = 
dmlconf.getIntValue(DMLConfig.STATS_MAX_WRAP_LEN);
 
+   boolean exceptionThrown = false;
+
Statistics.startRunTimer();
try {
// run execute (w/ exception handling to ensure proper 
shutdown)
@@ -93,6 +95,9 @@ public class ScriptExecutorUtils {
ec.setGPUContexts(gCtxs);
}
rtprog.execute(ec);
+   } catch (Throwable e) {
+   exceptionThrown = true;
+   throw e;
} finally { // ensure cleanup/shutdown
if (DMLScript.USE_ACCELERATOR && 
!ec.getGPUContexts().isEmpty()) {
ec.getGPUContexts().forEach(gCtx -> 
gCtx.clearTemporaryMemory());
@@ -104,10 +109,17 @@ public class ScriptExecutorUtils {
// display statistics (incl caching stats if enabled)
Statistics.stopRunTimer();
 
-   if(statisticsMaxHeavyHitters > 0)
-   
System.out.println(Statistics.display(statisticsMaxHeavyHitters));
-   else
-   System.out.println(Statistics.display());
+   if (!exceptionThrown) {
+   if (statisticsMaxHeavyHitters > 0)
+   
System.out.println(Statistics.display(statisticsMaxHeavyHitters));
+   else
+   
System.out.println(Statistics.display());
+   } else {
+   if (statisticsMaxHeavyHitters > 0)
+   
System.err.println(Statistics.display(statisticsMaxHeavyHitters));
+   else
+   
System.err.println(Statistics.display());
+   }
}
}

systemml git commit: [SYSTEMML-1451][Phase3] phase 3 work

2017-08-26 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master aedceb611 -> d2efa65c8


[SYSTEMML-1451][Phase3] phase 3 work

- Offline CSV support
- Family bug fix
- Plots
- Doc Update
- Stats update
- Bug train, predict append family name

Closes #604


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d2efa65c
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d2efa65c
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d2efa65c

Branch: refs/heads/master
Commit: d2efa65c89e3f6eaf7073c314eb56a033c8c8d5f
Parents: aedceb6
Author: krishnakalyan3 
Authored: Sat Aug 26 11:52:59 2017 -0700
Committer: Nakul Jindal 
Committed: Sat Aug 26 11:52:59 2017 -0700

--
 docs/python-performance-test.md |  16 ++-
 pom.xml |   2 +
 scripts/perftest/python/datagen.py  |   1 +
 .../perftest/python/google_docs/gdocs_utils.py  |  35 +
 scripts/perftest/python/google_docs/stats.py| 134 +--
 scripts/perftest/python/google_docs/update.py   |  55 ++--
 scripts/perftest/python/predict.py  |   9 +-
 scripts/perftest/python/requirements.txt|   4 +
 scripts/perftest/python/run_perftest.py |  51 ---
 scripts/perftest/python/train.py|   9 +-
 scripts/perftest/python/utils_exec.py   |   6 +-
 scripts/perftest/python/utils_fs.py |   6 +
 scripts/perftest/python/utils_misc.py   |   6 +-
 13 files changed, 221 insertions(+), 113 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/d2efa65c/docs/python-performance-test.md
--
diff --git a/docs/python-performance-test.md b/docs/python-performance-test.md
index ce36c2d..25e1f35 100644
--- a/docs/python-performance-test.md
+++ b/docs/python-performance-test.md
@@ -148,6 +148,17 @@ Run performance test for all algorithms under the family 
`regression2` and log w
 Run performance test for all algorithms using HDFS.
 
 
+## Result Consolidation and Plotting
+We have two scripts, `stats.py` forpulling results from google docs and 
`update.py` to updating results to google docs or local file system.
+
+Example of `update.py` would be below
+`./scripts/perftest/python/google_docs/update.py --file  
../../temp/perf_test_singlenode.out --exec-type singlenode --tag 2 --append 
test.csv` 
+The arguments being `--file` path of the perf-test output, `--exec-type` 
execution mode used to generate the perf-test output, `--tag` being the 
realease version or a unique name, `--append` being an optional argument that 
would append the a local csv file. If instead of `--append` the `--auth` 
argument needs the location of the `google api key` file.
+
+Example of `stats.py` below 
+`  ./stats.py --auth ../key/client_json.json --exec-type singlenode --plot 
stats1_data-gen_none_dense_10k_100`
+`--plot` argument needs the name of the composite key that you would like to 
compare results over. If this argument is not specified the results would be 
grouped by keys.
+
 ## Operational Notes
 
 All performance test depend mainly on two scripts for execution 
`systemml-standalone.py` and `systemml-spark-submit.py`. Incase we need to 
change standalone or spark parameters we need to manually change these 
parameters in their respective scripts.
@@ -158,7 +169,7 @@ The logs contain the following information below comma 
separated.
 
 algorithm | run_type | intercept | matrix_type | data_shape | time_sec
 --- | --- | --- | --- | --- | --- |
-multinomial|data-gen|0|dense|10k_100| 0.33
+multinomial|data-gen|0|10k_100|dense| 0.33
 MultiLogReg|train|0|10k_100|dense|6.956
 MultiLogReg|predict|0|10k_100|dense|4.780
 
@@ -187,9 +198,12 @@ Matrix Shape | Approximate Data Size
 10M_1k|80GB
 100M_1k|800GB
 
+
 For example the command below runs performance test for all data sizes 
described above
 `run_perftest.py --family binomial clustering multinomial regression1 
regression2 stats1 stats2 --mat-shape 10k_1k 100k_1k 1M_1k 10M_1k 100M_1k 
--master yarn-client  --temp-dir hdfs://localhost:9000/user/systemml`
 
+By default data generated in `hybrid_spark` execution mode is in the current 
users `hdfs` home directory.
+
 Note: Please use this command `pip3 install -r requirements.txt` before using 
the perftest scripts.
 
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/d2efa65c/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 2ed9374..0ee382c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -897,6 +897,8 @@

src/test/scripts/functions/jmlc/tfmtd_example/dummycoded.column.names

systemml git commit: [SYSTEMML-1847] bug fixes for gpu from ml algos

2017-08-28 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 428f3aa21 -> de0513415


[SYSTEMML-1847] bug fixes for gpu from ml algos

- Fixed errors in -gpu force arguments
- Fix to GPU solve - converts sparse matrices to dense
- Bug fix in GPUContext::clearTemporaryMemory
- Fix for removing recorded GPUObjects
- Estimate memory for each parfor body and set degree of parallelism
- Setting cuda pointers to null after freeing
- Fix after rebase with master for SOLVE on GPU

Closes #626


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/de051341
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/de051341
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/de051341

Branch: refs/heads/master
Commit: de0513415e8fb6e9b9f289bc261612091bd4e664
Parents: 428f3aa
Author: Nakul Jindal 
Authored: Mon Aug 28 13:38:49 2017 -0700
Committer: Nakul Jindal 
Committed: Mon Aug 28 13:38:49 2017 -0700

--
 scripts/perftest/python/utils_misc.py   |  12 +-
 .../java/org/apache/sysml/hops/BinaryOp.java|   5 +-
 .../controlprogram/ParForProgramBlock.java  |   3 -
 .../parfor/opt/OptimizerRuleBased.java  |  59 ++-
 .../instructions/gpu/context/CSRPointer.java|   3 +
 .../instructions/gpu/context/GPUContext.java|  36 +++--
 .../instructions/gpu/context/GPUObject.java |   8 +
 .../runtime/matrix/data/LibMatrixCUDA.java  | 162 +--
 8 files changed, 176 insertions(+), 112 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/de051341/scripts/perftest/python/utils_misc.py
--
diff --git a/scripts/perftest/python/utils_misc.py 
b/scripts/perftest/python/utils_misc.py
index f9904c5..da9dbcb 100755
--- a/scripts/perftest/python/utils_misc.py
+++ b/scripts/perftest/python/utils_misc.py
@@ -211,20 +211,18 @@ def exec_dml_and_parse_time(exec_type, dml_file_name, 
args, backend_args_dict, s
 """
 
 algorithm = dml_file_name + '.dml'
-
-sup_args = ''.join(['{} {}'.format(k, v) for k, v in 
systemml_args_dict.items()])
-
+sup_args = ' '.join(['{} {}'.format(k, v) for k, v in 
systemml_args_dict.items()])
 if exec_type == 'singlenode':
 exec_script = join(os.environ.get('SYSTEMML_HOME'), 'bin', 
'systemml-standalone.py')
-singlenode_pre_args = ''.join([' {} {} '.format(k, v) for k, v in 
backend_args_dict.items()])
-args = ''.join(['{} {}'.format(k, v) for k, v in args.items()])
+singlenode_pre_args = ' '.join(['{} {}'.format(k, v) for k, v in 
backend_args_dict.items()])
+args = ' '.join(['{} {}'.format(k, v) for k, v in args.items()])
 cmd = [exec_script, singlenode_pre_args, '-f', algorithm, args, 
sup_args]
 cmd_string = ' '.join(cmd)
 
 if exec_type == 'hybrid_spark':
 exec_script = join(os.environ.get('SYSTEMML_HOME'), 'bin', 
'systemml-spark-submit.py')
-spark_pre_args = ''.join([' {} {} '.format(k, v) for k, v in 
backend_args_dict.items()])
-args = ''.join(['{} {}'.format(k, v) for k, v in args.items()])
+spark_pre_args = ' '.join([' {} {} '.format(k, v) for k, v in 
backend_args_dict.items()])
+args = ' '.join(['{} {}'.format(k, v) for k, v in args.items()])
 cmd = [exec_script, spark_pre_args, '-f', algorithm, args, sup_args]
 cmd_string = ' '.join(cmd)
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/de051341/src/main/java/org/apache/sysml/hops/BinaryOp.java
--
diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java 
b/src/main/java/org/apache/sysml/hops/BinaryOp.java
index ad9f0ad..cd1f715 100644
--- a/src/main/java/org/apache/sysml/hops/BinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java
@@ -1058,7 +1058,10 @@ public class BinaryOp extends Hop

//ensure cp exec type for single-node operations
if ( op == OpOp2.SOLVE ) {
-   _etype = ExecType.CP;
+   if (isGPUEnabled())
+   _etype = ExecType.GPU;
+   else
+   _etype = ExecType.CP;
}

return _etype;

http://git-wip-us.apache.org/repos/asf/systemml/blob/de051341/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java 
b/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
index 1968c26..3a9bf51 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock

systemml git commit: [SYSTEMML-1895] jcuda for windows & linux (x86_64, ppc64le) are included in extra jar

2017-09-07 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 137fbf18a -> c00029a7b


[SYSTEMML-1895] jcuda for windows & linux (x86_64, ppc64le) are included in 
extra jar

Closes #656


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c00029a7
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c00029a7
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c00029a7

Branch: refs/heads/master
Commit: c00029a7be735dcaba533c50ba69169b18ef1675
Parents: 137fbf1
Author: Nakul Jindal 
Authored: Thu Sep 7 14:42:12 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Sep 7 14:42:12 2017 -0700

--
 pom.xml| 106 
 src/assembly/extra.xml |  29 +++-
 2 files changed, 115 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/c00029a7/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 4bbe714..eb4337d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1220,94 +1220,164 @@


org.jcuda
-   jcufft
+   jcusparse
${jcuda.version}
${jcuda.scope}


org.jcuda
-   jcusparse
+   jcusolver
${jcuda.version}
${jcuda.scope}


org.jcuda
-   jcusolver
+   jcudnn
${jcuda.version}
${jcuda.scope}

+

org.jcuda
-   jcurand
+   jcuda-natives
+   ${jcuda.os}-${jcuda.arch}
${jcuda.version}
${jcuda.scope}


org.jcuda
-   jnvgraph
+   jcublas-natives
+   ${jcuda.os}-${jcuda.arch}
${jcuda.version}
${jcuda.scope}


org.jcuda
-   jcudnn
+   jcusparse-natives
+   ${jcuda.os}-${jcuda.arch}
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+   
+   org.jcuda
+   jcusolver-natives
+   ${jcuda.os}-${jcuda.arch}
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+   
+   org.jcuda
+   jcudnn-natives
+   ${jcuda.os}-${jcuda.arch}
${jcuda.version}
${jcuda.scope}

 
+   

org.jcuda
jcuda-natives
-   ${jcuda.os}-${jcuda.arch}
+   windows-x86_64
${jcuda.version}
${jcuda.scope}


org.jcuda
jcublas-natives
-   ${jcuda.os}-${jcuda.arch}
+   windows-x86_64
${jcuda.version}
${jcuda.scope}


org.jcuda
-   jcufft-natives
-   ${jcuda.os}-${jcuda.arch}
+   jcusparse-natives
+   windows-x86_64
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+   
+   org.jcuda
+   jcusolver-natives
+   windows-x86_64
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+   
+   org.jcuda
+   jcudnn-natives
+   windows-x86_64
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+
+   
+   org.jcuda
+   jcuda-natives
+   linux-x86_64
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+   
+   org.jcuda
+   jcublas-natives
+   linux-x86_64
${jcuda.version}
${jcuda.scope}

systemml git commit: [SYSTEMML-1895] jcuda for win & linux (x86_64, ppc64le) included in extra jar

2017-09-07 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/branch-0.15 467de1cb1 -> ea8e951bc


[SYSTEMML-1895] jcuda for win & linux (x86_64, ppc64le) included in extra jar

Closes #660


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ea8e951b
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ea8e951b
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ea8e951b

Branch: refs/heads/branch-0.15
Commit: ea8e951bc3360860d9931be5ede9850bec6088bc
Parents: 467de1c
Author: Nakul Jindal 
Authored: Thu Sep 7 15:12:04 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Sep 7 15:12:04 2017 -0700

--
 pom.xml| 106 
 src/assembly/extra.xml |  29 +++-
 2 files changed, 115 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/ea8e951b/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 9b57ba8..abffc0b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1220,94 +1220,164 @@


org.jcuda
-   jcufft
+   jcusparse
${jcuda.version}
${jcuda.scope}


org.jcuda
-   jcusparse
+   jcusolver
${jcuda.version}
${jcuda.scope}


org.jcuda
-   jcusolver
+   jcudnn
${jcuda.version}
${jcuda.scope}

+

org.jcuda
-   jcurand
+   jcuda-natives
+   ${jcuda.os}-${jcuda.arch}
${jcuda.version}
${jcuda.scope}


org.jcuda
-   jnvgraph
+   jcublas-natives
+   ${jcuda.os}-${jcuda.arch}
${jcuda.version}
${jcuda.scope}


org.jcuda
-   jcudnn
+   jcusparse-natives
+   ${jcuda.os}-${jcuda.arch}
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+   
+   org.jcuda
+   jcusolver-natives
+   ${jcuda.os}-${jcuda.arch}
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+   
+   org.jcuda
+   jcudnn-natives
+   ${jcuda.os}-${jcuda.arch}
${jcuda.version}
${jcuda.scope}

 
+   

org.jcuda
jcuda-natives
-   ${jcuda.os}-${jcuda.arch}
+   windows-x86_64
${jcuda.version}
${jcuda.scope}


org.jcuda
jcublas-natives
-   ${jcuda.os}-${jcuda.arch}
+   windows-x86_64
${jcuda.version}
${jcuda.scope}


org.jcuda
-   jcufft-natives
-   ${jcuda.os}-${jcuda.arch}
+   jcusparse-natives
+   windows-x86_64
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+   
+   org.jcuda
+   jcusolver-natives
+   windows-x86_64
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+   
+   org.jcuda
+   jcudnn-natives
+   windows-x86_64
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+
+   
+   org.jcuda
+   jcuda-natives
+   linux-x86_64
+   ${jcuda.version}
+   ${jcuda.scope}
+   
+   
+   org.jcuda
+   jcublas-natives
+   linux-x86_64
${jcuda.version}
${jcuda.scope}

systemml git commit: [MINOR] gpu memory leak fix

2017-09-19 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master f86879bd0 -> ec5dfda57


[MINOR] gpu memory leak fix

- Changed list of free pointers to set of free pointers for GPU
- Changed threadlocal cuda handles to non threadlocal. This is assuming there 
will be one thread per GPU.

Closes #665


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ec5dfda5
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ec5dfda5
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ec5dfda5

Branch: refs/heads/master
Commit: ec5dfda57a42b172886dd5d42bfe3b034b30c7b7
Parents: f86879b
Author: Nakul Jindal 
Authored: Tue Sep 19 14:57:16 2017 -0700
Committer: Nakul Jindal 
Committed: Tue Sep 19 14:57:16 2017 -0700

--
 src/main/cpp/CMakeLists.txt |   4 +-
 .../instructions/gpu/context/GPUContext.java| 313 +++
 2 files changed, 179 insertions(+), 138 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/ec5dfda5/src/main/cpp/CMakeLists.txt
--
diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt
index 47555bf..04e12b4 100644
--- a/src/main/cpp/CMakeLists.txt
+++ b/src/main/cpp/CMakeLists.txt
@@ -29,6 +29,7 @@ option(USE_INTEL_MKL "Whether to use Intel MKL (Defaults to 
compiling with Intel
 
 # Build a shared libraray
 add_library(systemml SHARED libmatrixdnn.cpp  libmatrixmult.cpp  systemml.cpp)
+set_target_properties(systemml PROPERTIES MACOSX_RPATH 1)
 
 set(MATH_LIBRARIES "")
 
@@ -72,5 +73,6 @@ if (USE_OPEN_BLAS)
   find_package(OpenMP REQUIRED)
   set_target_properties(systemml PROPERTIES LINK_FLAGS "${OpenMP_CXX_FLAGS} 
${MATH_LIBRARIES}")
 elseif(USE_INTEL_MKL)
-  set_target_properties(systemml PROPERTIES LINK_FLAGS ${MATH_LIBRARIES}")
+  set_target_properties(systemml PROPERTIES LINK_FLAGS "${MATH_LIBRARIES}")
 endif()
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/ec5dfda5/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
index 271109d..8a823cc 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
@@ -41,9 +41,11 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
-import java.util.LinkedList;
+import java.util.HashSet;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -88,83 +90,50 @@ public class GPUContext {
 * active device assigned to this GPUContext instance
 */
private final int deviceNum;
-   // Invoke cudaMemGetInfo to get available memory information. Useful if 
GPU is shared among multiple application.
-   public double GPU_MEMORY_UTILIZATION_FACTOR = 
ConfigurationManager.getDMLConfig()
-   
.getDoubleValue(DMLConfig.GPU_MEMORY_UTILIZATION_FACTOR);
-   /**
-* Map of free blocks allocate on GPU. maps size_of_block -> pointer on 
GPU
-*/
-   private LRUCacheMap> freeCUDASpaceMap = new 
LRUCacheMap<>();
-   /**
-* To record size of allocated blocks
-*/
-   private HashMap cudaBlockSizeMap = new HashMap<>();
-   /**
-* list of allocated {@link GPUObject} instances allocated on {@link 
GPUContext#deviceNum} GPU
-* These are matrices allocated on the GPU on which rmvar hasn't been 
called yet.
-* If a {@link GPUObject} has more than one lock on it, it cannot be 
freed
-* If it has zero locks on it, it can be freed, but it is preferrable 
to keep it around
-* so that an extraneous host to dev transfer can be avoided
-*/
-   private ArrayList allocatedGPUObjects = new ArrayList<>();
/**
 * cudnnHandle for Deep Neural Network operations on the GPU
 */
-   private final ThreadLocal cudnnHandle = new 
ThreadLocal<>();
+   private cudnnHandle cudnnHandle;
/**
 * cublasHandle for BLAS operations on the GPU
 */
-   private final ThreadLocal cublasHandle = new 
ThreadLocal<>();
+   private cublasHandle cublasHandle;
/**
 * cusparseHandle for certain sparse BLAS operations on the GPU
 */
-   private final ThreadLocal cusparseHandle = new 
ThreadLocal<>();
+   private cusparseHandle cusparseHandle;

systemml git commit: [MINOR] bug fix in perf test script

2017-09-20 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master f6ea240ca -> a134997e6


[MINOR] bug fix in perf test script


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a134997e
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a134997e
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a134997e

Branch: refs/heads/master
Commit: a134997e682de07a49bbf5052a46fba8f7b9ba30
Parents: f6ea240
Author: Nakul Jindal 
Authored: Wed Sep 20 12:34:02 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Sep 20 12:34:02 2017 -0700

--
 pom.xml   |  4 ++--
 scripts/perftest/python/utils_misc.py | 34 --
 2 files changed, 25 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/a134997e/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 7e04af5..1ee4399 100644
--- a/pom.xml
+++ b/pom.xml
@@ -348,7 +348,7 @@

com.github.os72
protoc-jar-maven-plugin
-   3.0.0-b2.1
+   3.4.0.1-SNAPSHOT


caffe-sources
@@ -373,7 +373,7 @@
run


-   3.0.0 

+   3.4.0 



src/main/proto/tensorflow


http://git-wip-us.apache.org/repos/asf/systemml/blob/a134997e/scripts/perftest/python/utils_misc.py
--
diff --git a/scripts/perftest/python/utils_misc.py 
b/scripts/perftest/python/utils_misc.py
index da9dbcb..15e2a00 100755
--- a/scripts/perftest/python/utils_misc.py
+++ b/scripts/perftest/python/utils_misc.py
@@ -57,17 +57,29 @@ def split_config_args(args):
 
 systemml_args_dict = {}
 
-if args['stats'] is not None:
-systemml_args_dict['-stats'] = args['stats']
-
-if args['explain'] is not None:
-systemml_args_dict['-explain'] = args['explain']
-
-if args['config'] is not None:
-systemml_args_dict['-config'] = args['config']
-
-if args['gpu'] is not None:
-systemml_args_dict['-gpu'] = args['gpu']
+if 'stats' in args.keys():
+if args['stats'] is not None:
+systemml_args_dict['-stats'] = args['stats']
+else:
+systemml_args_dict['-stats'] = ''
+
+if 'explain' in args.keys():
+if args['explain'] is not None:
+systemml_args_dict['-explain'] = args['explain']
+else:
+systemml_args_dict['-explain'] = ''
+
+if 'config' in args.keys():
+if args['config'] is not None:
+systemml_args_dict['-config'] = args['config']
+else:
+systemml_args_dict['-config'] = ''
+
+if 'gpu' in args.keys():
+if args['gpu'] is not None:
+systemml_args_dict['-gpu'] = args['gpu']
+else:
+systemml_args_dict['-gpu'] = ''
 
 backend_args_dict = {}
 exec_type = args['exec_type']

systemml git commit: [MINOR] Refer to correct version of protoc-jar maven plugin in pom

2017-09-20 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 3acf786d4 -> c14682b9c


[MINOR] Refer to correct version of protoc-jar maven plugin in pom


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c14682b9
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c14682b9
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c14682b9

Branch: refs/heads/master
Commit: c14682b9cb2d02e959ed35a87288f54b7b51da3c
Parents: 3acf786
Author: Nakul Jindal 
Authored: Wed Sep 20 14:10:15 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Sep 20 14:10:15 2017 -0700

--
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/c14682b9/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 1ee4399..7e04af5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -348,7 +348,7 @@

com.github.os72
protoc-jar-maven-plugin
-   3.4.0.1-SNAPSHOT
+   3.0.0-b2.1


caffe-sources
@@ -373,7 +373,7 @@
run


-   3.4.0 

+   3.0.0 



src/main/proto/tensorflow

systemml git commit: [MINOR] minor tweaks to the performance test script

2017-09-20 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 34bb3ca82 -> aa15197ec


[MINOR] minor tweaks to the performance test script


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/aa15197e
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/aa15197e
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/aa15197e

Branch: refs/heads/master
Commit: aa15197ec2e1a1e81c9031a91ec0791284978f27
Parents: 34bb3ca
Author: Nakul Jindal 
Authored: Wed Sep 20 15:04:11 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Sep 20 15:04:11 2017 -0700

--
 scripts/perftest/python/utils_misc.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/aa15197e/scripts/perftest/python/utils_misc.py
--
diff --git a/scripts/perftest/python/utils_misc.py 
b/scripts/perftest/python/utils_misc.py
index 15e2a00..92dbc73 100755
--- a/scripts/perftest/python/utils_misc.py
+++ b/scripts/perftest/python/utils_misc.py
@@ -44,7 +44,7 @@ def split_config_args(args):
 """
 
 perftest_args_dict = {}
-
+
 perftest_args_dict['family'] = args['family']
 perftest_args_dict['algo'] = args['algo']
 perftest_args_dict['exec_type'] = args['exec_type']
@@ -72,8 +72,6 @@ def split_config_args(args):
 if 'config' in args.keys():
 if args['config'] is not None:
 systemml_args_dict['-config'] = args['config']
-else:
-systemml_args_dict['-config'] = ''
 
 if 'gpu' in args.keys():
 if args['gpu'] is not None:

systemml git commit: [MINOR]bug fixes & feature added in perf test & spark-submit python scripts

2017-09-28 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 0cb2f7f68 -> a725b2d2e


[MINOR]bug fixes & feature added in perf test & spark-submit python scripts


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a725b2d2
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a725b2d2
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a725b2d2

Branch: refs/heads/master
Commit: a725b2d2ebf6dcb56f4edb68376c3849c8991b27
Parents: 0cb2f7f
Author: Nakul Jindal 
Authored: Thu Sep 28 15:28:17 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Sep 28 15:28:17 2017 -0700

--
 bin/systemml-spark-submit.py| 30 -
 scripts/perftest/python/datagen.py  |  2 +-
 scripts/perftest/python/predict.py  |  2 +-
 scripts/perftest/python/run_perftest.py | 19 ++---
 scripts/perftest/python/train.py|  2 +-
 scripts/perftest/python/utils_misc.py   | 63 +++-
 6 files changed, 70 insertions(+), 48 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/a725b2d2/bin/systemml-spark-submit.py
--
diff --git a/bin/systemml-spark-submit.py b/bin/systemml-spark-submit.py
index b6426b3..b4da801 100755
--- a/bin/systemml-spark-submit.py
+++ b/bin/systemml-spark-submit.py
@@ -92,25 +92,35 @@ def spark_submit_entry(master, driver_memory, 
num_executors, executor_memory,
 ml_options.append(stats)
 if gpu is not None:
 ml_options.append('-gpu')
-ml_options.append(gpu)
+if gpu is not 'no_option':
+ml_options.append(gpu)
 
 if len(ml_options) < 1:
 ml_options = ''
 
 # stats, explain, target_jars
 cmd_spark = [spark_path, '--class', 'org.apache.sysml.api.DMLScript',
- '--master', master, '--driver-memory', driver_memory,
- '--num-executors', num_executors, '--executor-memory', 
executor_memory,
- '--executor-cores', executor_cores, '--conf', default_conf,
+ '--master', master,
+ '--driver-memory', driver_memory,
+ '--conf', default_conf,
  '--jars', cuda_jars, systemml_jars]
 
+if num_executors is not None:
+cmd_spark = cmd_spark + ['--num-executors', num_executors]
+
+if executor_memory is not None:
+cmd_spark = cmd_spark + ['--executor-memory', executor_memory]
+
+if executor_cores is not None:
+cmd_spark = cmd_spark + ['--executor-cores', executor_cores]
+
 cmd_system_ml = ['-config', default_config,
  '-exec', 'hybrid_spark', '-f', script_file, ' 
'.join(ml_options)]
 
 cmd = cmd_spark + cmd_system_ml
 
 # Debug
-# print(' '.join(cmd))
+print(' '.join(cmd))
 return_code = os.system(' '.join(cmd))
 return return_code
 
@@ -120,10 +130,10 @@ if __name__ == '__main__':
   description='System-ML Spark Submit 
Script')
 # SPARK-SUBMIT Options
 cparser.add_argument('--master', default='local[*]', help='local, 
yarn-client, yarn-cluster', metavar='')
-cparser.add_argument('--driver-memory', default='5G', help='Memory for 
driver (e.g. 512M)', metavar='')
-cparser.add_argument('--num-executors', default='2', help='Number of 
executors to launch', metavar='')
-cparser.add_argument('--executor-memory', default='2G', help='Memory per 
executor', metavar='')
-cparser.add_argument('--executor-cores', default='1', help='Number of 
cores', metavar='')
+cparser.add_argument('--driver-memory', default='8G', help='Memory for 
driver (e.g. 512M, 1G)', metavar='')
+cparser.add_argument('--num-executors', nargs=1, help='Number of executors 
to launch', metavar='')
+cparser.add_argument('--executor-memory', nargs=1, help='Memory per 
executor', metavar='')
+cparser.add_argument('--executor-cores', nargs=1, help='Number of executor 
cores', metavar='')
 cparser.add_argument('--conf', help='Spark configuration file', nargs='+', 
metavar='')
 
 # SYSTEM-ML Options
@@ -138,7 +148,7 @@ if __name__ == '__main__':
metavar='')
 cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, 
'
   'set  option to skip conservative 
memory estimates '
-  'and use GPU wherever possible', 
nargs='?')
+  'and use GPU wherever possible', 
nargs='?', const='no_option')
 cparser.add_argument('-f', required=True, help='specifies dml/pydml file 
to execute; '
'path can be 
local/hdfs/gpfs', metavar='')
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/a725b2d2/

systemml git commit: [HOTFIX] minor bug fix in perf test suite

2017-09-28 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master a725b2d2e -> e4c74eda6


[HOTFIX] minor bug fix in perf test suite


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e4c74eda
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e4c74eda
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e4c74eda

Branch: refs/heads/master
Commit: e4c74eda67ca4596fcdbae77603514259c1b6e10
Parents: a725b2d
Author: Nakul Jindal 
Authored: Thu Sep 28 15:43:44 2017 -0700
Committer: Nakul Jindal 
Committed: Thu Sep 28 15:43:44 2017 -0700

--
 scripts/perftest/python/run_perftest.py | 6 ++
 1 file changed, 6 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/e4c74eda/scripts/perftest/python/run_perftest.py
--
diff --git a/scripts/perftest/python/run_perftest.py 
b/scripts/perftest/python/run_perftest.py
index f0b272a..4eeb0da 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -378,6 +378,12 @@ if __name__ == '__main__':
 perftest_args_dict, systemml_args_dict, backend_args_dict = 
split_config_args(all_arg_dict)
 
 # temp_dir hdfs / local path check
+if args.file_system_type is None:
+if args.exec_type == 'hybrid_spark':
+args.file_system_type = 'hdfs'
+else:
+args.file_system_type = 'local'
+
 perftest_args_dict['temp_dir'] = get_default_dir(args.file_system_type, 
args.temp_dir, args.exec_type, default_config_dir)
 
 # default_mat_type validity

systemml git commit: [SYSTEMML-1875] Changed pom.xml to use the latest protoc-jar

2017-09-29 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master e4c2f9e57 -> 5dce90b3b


[SYSTEMML-1875] Changed pom.xml to use the latest protoc-jar

- This uses the latest protoc-jar-maven-plugin to get the protoc
compiler for ppc

Closes #676


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/5dce90b3
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/5dce90b3
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/5dce90b3

Branch: refs/heads/master
Commit: 5dce90b3bbf120a053233f0b49eb6fa7c0ddfebf
Parents: e4c2f9e
Author: Nakul Jindal 
Authored: Fri Sep 29 14:47:49 2017 -0700
Committer: Nakul Jindal 
Committed: Fri Sep 29 14:47:49 2017 -0700

--
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/5dce90b3/pom.xml
--
diff --git a/pom.xml b/pom.xml
index c19e11c..28b166f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -345,7 +345,7 @@

com.github.os72
protoc-jar-maven-plugin
-   3.0.0-b2.1
+   3.4.0.1


caffe-sources
@@ -354,7 +354,7 @@
run


-   2.5.0 

+   2.6.1 



src/main/proto/caffe

systemml git commit: [MINOR] bug fix in perf test script

2017-10-02 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master c27c488be -> 50b3c80c6


[MINOR] bug fix in perf test script


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/50b3c80c
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/50b3c80c
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/50b3c80c

Branch: refs/heads/master
Commit: 50b3c80c61b9b6ab6cdb8b28a1544952fb8dd19d
Parents: c27c488
Author: Nakul Jindal 
Authored: Mon Oct 2 17:54:17 2017 -0700
Committer: Nakul Jindal 
Committed: Mon Oct 2 17:54:17 2017 -0700

--
 scripts/perftest/python/datagen.py|  4 +--
 scripts/perftest/python/predict.py|  4 +--
 scripts/perftest/python/train.py  |  4 +--
 scripts/perftest/python/utils_exec.py | 41 --
 scripts/perftest/python/utils_fs.py   |  2 +-
 scripts/perftest/python/utils_misc.py |  4 +--
 6 files changed, 37 insertions(+), 22 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/50b3c80c/scripts/perftest/python/datagen.py
--
diff --git a/scripts/perftest/python/datagen.py 
b/scripts/perftest/python/datagen.py
index 6794187..54f2eff 100755
--- a/scripts/perftest/python/datagen.py
+++ b/scripts/perftest/python/datagen.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-#-
+# -
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -18,7 +18,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-#-
+# -
 
 import itertools
 from os.path import join

http://git-wip-us.apache.org/repos/asf/systemml/blob/50b3c80c/scripts/perftest/python/predict.py
--
diff --git a/scripts/perftest/python/predict.py 
b/scripts/perftest/python/predict.py
index 67467b1..56ba999 100755
--- a/scripts/perftest/python/predict.py
+++ b/scripts/perftest/python/predict.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-#-
+# -
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -18,7 +18,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-#-
+# -
 
 import sys
 from os.path import join

http://git-wip-us.apache.org/repos/asf/systemml/blob/50b3c80c/scripts/perftest/python/train.py
--
diff --git a/scripts/perftest/python/train.py b/scripts/perftest/python/train.py
index a95950d..4428e8f 100755
--- a/scripts/perftest/python/train.py
+++ b/scripts/perftest/python/train.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-#-
+# -
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -18,7 +18,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-#-
+# -
 
 import sys
 from os.path import join

http://git-wip-us.apache.org/repos/asf/systemml/blob/50b3c80c/scripts/perftest/python/utils_exec.py
--
diff --git a/scripts/perftest/python/utils_exec.py 
b/scripts/perftest/python/utils_exec.py
index 27f75f2..d51cf2d 100755
--- a/scripts/perftest/python/utils_exec.py
+++ b/scripts/perftest/python/utils_exec.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-#-
+# -
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -18,12 +18,14 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-#-
+# -
 
 import sys
 import subprocess
 import shlex
 import re
+import tempfile
+import os
 
 # Subprocess and log parsing related functions
 
@@ -49,24 +51,34 @@ de

systemml git commit: [HOTFIX] fix for Caffe2DML

2017-10-06 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 355373990 -> 8ea38a1b1


[HOTFIX] fix for Caffe2DML

- switching back to using protoc v2.5.0 to compile the proto file


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/8ea38a1b
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/8ea38a1b
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/8ea38a1b

Branch: refs/heads/master
Commit: 8ea38a1b14a17d81299063cd281631369574a067
Parents: 3553739
Author: Nakul Jindal 
Authored: Fri Oct 6 11:59:59 2017 -0700
Committer: Nakul Jindal 
Committed: Fri Oct 6 11:59:59 2017 -0700

--
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/8ea38a1b/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 28b166f..ff90986 100644
--- a/pom.xml
+++ b/pom.xml
@@ -354,7 +354,7 @@
run


-   2.6.1 

+   2.5.0 



src/main/proto/caffe

systemml git commit: [SYSTEMML-1945] added --deploy-mode param to python scripts

2017-10-11 Thread nakul02

Repository: systemml
Updated Branches:
  refs/heads/master 13a017583 -> 8f786aa22


[SYSTEMML-1945] added --deploy-mode param to python scripts

Closes #681


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/8f786aa2
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/8f786aa2
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/8f786aa2

Branch: refs/heads/master
Commit: 8f786aa227d536558ed684060cef0e628bf3247f
Parents: 13a0175
Author: Krishna Kalyan 
Authored: Wed Oct 11 09:20:40 2017 -0700
Committer: Nakul Jindal 
Committed: Wed Oct 11 09:20:40 2017 -0700

--
 bin/systemml-spark-submit.py| 9 +
 scripts/perftest/python/run_perftest.py | 3 ++-
 scripts/perftest/python/utils_misc.py   | 3 +++
 3 files changed, 10 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/systemml/blob/8f786aa2/bin/systemml-spark-submit.py
--
diff --git a/bin/systemml-spark-submit.py b/bin/systemml-spark-submit.py
index b4da801..2546236 100755
--- a/bin/systemml-spark-submit.py
+++ b/bin/systemml-spark-submit.py
@@ -41,8 +41,8 @@ def default_jars(systemml_home):
 return target_jars, systemml_jar
 
 
-def spark_submit_entry(master, driver_memory, num_executors, executor_memory,
-   executor_cores, conf,
+def spark_submit_entry(master, deploy_mode, driver_memory, num_executors,
+   executor_memory, executor_cores, conf,
nvargs, args, config, explain, debug, stats, gpu, f):
 """
 This function is responsible for the execution of arguments via
@@ -100,7 +100,7 @@ def spark_submit_entry(master, driver_memory, 
num_executors, executor_memory,
 
 # stats, explain, target_jars
 cmd_spark = [spark_path, '--class', 'org.apache.sysml.api.DMLScript',
- '--master', master,
+ '--master', master, '--deploy-mode', deploy_mode,
  '--driver-memory', driver_memory,
  '--conf', default_conf,
  '--jars', cuda_jars, systemml_jars]
@@ -129,7 +129,8 @@ if __name__ == '__main__':
 cparser = 
argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
   description='System-ML Spark Submit 
Script')
 # SPARK-SUBMIT Options
-cparser.add_argument('--master', default='local[*]', help='local, 
yarn-client, yarn-cluster', metavar='')
+cparser.add_argument('--master', default='local[*]', help='local, yarn', 
metavar='')
+cparser.add_argument('--deploy-mode', help='client, cluster', 
default='client', metavar='')
 cparser.add_argument('--driver-memory', default='8G', help='Memory for 
driver (e.g. 512M, 1G)', metavar='')
 cparser.add_argument('--num-executors', nargs=1, help='Number of executors 
to launch', metavar='')
 cparser.add_argument('--executor-memory', nargs=1, help='Memory per 
executor', metavar='')

http://git-wip-us.apache.org/repos/asf/systemml/blob/8f786aa2/scripts/perftest/python/run_perftest.py
--
diff --git a/scripts/perftest/python/run_perftest.py 
b/scripts/perftest/python/run_perftest.py
index 6c016a8..6e87261 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -355,7 +355,8 @@ if __name__ == '__main__':
   'set  option to skip conservative 
memory estimates '
   'and use GPU wherever possible', 
nargs='?', const='no_option')
 # Spark Configuration Option
-cparser.add_argument('--master', help='local, yarn-client, yarn-cluster', 
metavar='')
+cparser.add_argument('--master', help='local, yarn', metavar='')
+cparser.add_argument('--deploy-mode', help='client, cluster', metavar='')
 cparser.add_argument('--driver-memory', help='Memory for driver (e.g. 
512M)', metavar='')
 cparser.add_argument('--num-executors', help='Number of executors to 
launch', metavar='')
 cparser.add_argument('--executor-memory', help='Memory per executor', 
metavar='')

http://git-wip-us.apache.org/repos/asf/systemml/blob/8f786aa2/scripts/perftest/python/utils_misc.py
--
diff --git a/scripts/perftest/python/utils_misc.py 
b/scripts/perftest/python/utils_misc.py
index ebc4376..63d8c1b 100755
--- a/scripts/perftest/python/utils_misc.py
+++ b/scripts/perftest/python/utils_misc.py
@@ -84,6 +84,9 @@ def split_config_args(args):
 if args['master'] is not None:
 backend_args_dict['--master'] = args['master']
 
+if args['deploy_mode'] is not None:
+backend_args_dict['--deploy-mode

incubator-systemml git commit: Updated document to correspond to the currently released artifacts

2017-02-23 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 8eed1ec94 -> e1f713aae


Updated document to correspond to the currently released artifacts

Closes #403


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e1f713aa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e1f713aa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e1f713aa

Branch: refs/heads/master
Commit: e1f713aaedb472842f448dabd7063978373836c8
Parents: 8eed1ec
Author: Nakul Jindal 
Authored: Tue Feb 21 14:56:58 2017 -0800
Committer: Nakul Jindal 
Committed: Thu Feb 23 13:20:27 2017 -0800

--
 docs/release-process.md | 146 ++-
 1 file changed, 62 insertions(+), 84 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1f713aa/docs/release-process.md
--
diff --git a/docs/release-process.md b/docs/release-process.md
index 1cc5c9f..a75a281 100644
--- a/docs/release-process.md
+++ b/docs/release-process.md
@@ -102,86 +102,64 @@ The build artifacts should be downloaded from 
[https://dist.apache.org/repos/dis
 this OS X example.
 
# download artifacts
-   wget -r -nH -nd -np -R index.html* 
https://dist.apache.org/repos/dist/dev/incubator/systemml/0.11.0-incubating-rc1/
+   wget -r -nH -nd -np -R 'index.html*' 
https://dist.apache.org/repos/dist/dev/incubator/systemml/0.13.0-incubating-rc1/
 
# verify standalone tgz works
-   tar -xvzf systemml-0.11.0-incubating-standalone.tgz
-   cd systemml-0.11.0-incubating-standalone
+   tar -xvzf systemml-0.13.0-incubating-bin.tgz
+   cd systemml-0.13.0-incubating-bin
echo "print('hello world');" > hello.dml
./runStandaloneSystemML.sh hello.dml
cd ..
 
-   # verify main jar works
-   mkdir lib
-   cp -R systemml-0.11.0-incubating-standalone/lib/* lib/
-   rm lib/systemml-0.11.0-incubating.jar
-   java -cp ./lib/*:systemml-0.11.0-incubating.jar 
org.apache.sysml.api.DMLScript -s "print('hello world');"
-
-   # verify src works
-   tar -xvzf systemml-0.11.0-incubating-src.tgz
-   cd systemml-0.11.0-incubating-src
-   mvn clean package -P distribution
-   cd target/
-   java -cp ./lib/*:systemml-0.11.0-incubating.jar 
org.apache.sysml.api.DMLScript -s "print('hello world');"
-   java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s 
"print('hello world');"
-   cd ..
+   # verify standalon zip works
+   rm -rf systemml-0.13.0-incubating-bin
+   unzip systemml-0.13.0-incubating-bin.zip
+   cd systemml-0.13.0-incubating-bin
+   echo "print('hello world');" > hello.dml
+   ./runStandaloneSystemML.sh hello.dml
cd ..
 
-   # verify distrib tgz works
-   tar -xvzf systemml-0.11.0-incubating.tgz
-   cd systemml-0.11.0-incubating
-   java -cp ../lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s 
"print('hello world');"
-
-   # verify spark batch mode
-   export SPARK_HOME=/Users/deroneriksson/spark-1.5.1-bin-hadoop2.6
-   $SPARK_HOME/bin/spark-submit SystemML.jar -s "print('hello world');" 
-exec hybrid_spark
-
-   # verify hadoop batch mode
-   hadoop jar SystemML.jar -s "print('hello world');"
-
-
-Here is an example of doing a basic
-sanity check on OS X after building the artifacts manually.
-
-   # build distribution artifacts
-   mvn clean package -P distribution
-
-   cd target
-
-   # verify main jar works
-   java -cp ./lib/*:systemml-0.11.0-incubating.jar 
org.apache.sysml.api.DMLScript -s "print('hello world');"
-
-   # verify SystemML.jar works
-   java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s 
"print('hello world');"
-
# verify src works
-   tar -xvzf systemml-0.11.0-incubating-src.tgz
-   cd systemml-0.11.0-incubating-src
+   tar -xvzf systemml-0.13.0-incubating-src.tgz
+   cd systemml-0.13.0-incubating-src
mvn clean package -P distribution
cd target/
-   java -cp ./lib/*:systemml-0.11.0-incubating.jar 
org.apache.sysml.api.DMLScript -s "print('hello world');"
-   java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s 
"print('hello world');"
-   cd ..
-   cd ..
-
-   # verify standalone tgz works
-   tar -xvzf systemml-0.11.0-incubating-standalone.tgz
-   cd systemml-0.11.0-incubating-standalone
-   echo "print('hello world');" > hello.dml
-   ./runStandaloneSystemML.sh hello.dml
-   cd ..
-
-   # verify distrib tgz works
-   tar -xvzf systemml-0.11.0-incubating.tgz
-   cd systemml-0.11.0-incubating
-   java -cp ../lib/*:SystemML.jar org.apache.sysml.

[2/2] incubator-systemml git commit: Updated document to correspond to the currently released artifacts

2017-02-23 Thread nakul02

Updated document to correspond to the currently released artifacts

Closes #403


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5c4e27c7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5c4e27c7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5c4e27c7

Branch: refs/heads/gh-pages
Commit: 5c4e27c701da1084d1e47d7ad049f9570033e7ae
Parents: 0fb74b9
Author: Nakul Jindal 
Authored: Tue Feb 21 14:56:58 2017 -0800
Committer: Nakul Jindal 
Committed: Thu Feb 23 13:20:27 2017 -0800

--
 release-process.md | 146 
 1 file changed, 62 insertions(+), 84 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5c4e27c7/release-process.md
--
diff --git a/release-process.md b/release-process.md
index 1cc5c9f..a75a281 100644
--- a/release-process.md
+++ b/release-process.md
@@ -102,86 +102,64 @@ The build artifacts should be downloaded from 
[https://dist.apache.org/repos/dis
 this OS X example.
 
# download artifacts
-   wget -r -nH -nd -np -R index.html* 
https://dist.apache.org/repos/dist/dev/incubator/systemml/0.11.0-incubating-rc1/
+   wget -r -nH -nd -np -R 'index.html*' 
https://dist.apache.org/repos/dist/dev/incubator/systemml/0.13.0-incubating-rc1/
 
# verify standalone tgz works
-   tar -xvzf systemml-0.11.0-incubating-standalone.tgz
-   cd systemml-0.11.0-incubating-standalone
+   tar -xvzf systemml-0.13.0-incubating-bin.tgz
+   cd systemml-0.13.0-incubating-bin
echo "print('hello world');" > hello.dml
./runStandaloneSystemML.sh hello.dml
cd ..
 
-   # verify main jar works
-   mkdir lib
-   cp -R systemml-0.11.0-incubating-standalone/lib/* lib/
-   rm lib/systemml-0.11.0-incubating.jar
-   java -cp ./lib/*:systemml-0.11.0-incubating.jar 
org.apache.sysml.api.DMLScript -s "print('hello world');"
-
-   # verify src works
-   tar -xvzf systemml-0.11.0-incubating-src.tgz
-   cd systemml-0.11.0-incubating-src
-   mvn clean package -P distribution
-   cd target/
-   java -cp ./lib/*:systemml-0.11.0-incubating.jar 
org.apache.sysml.api.DMLScript -s "print('hello world');"
-   java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s 
"print('hello world');"
-   cd ..
+   # verify standalon zip works
+   rm -rf systemml-0.13.0-incubating-bin
+   unzip systemml-0.13.0-incubating-bin.zip
+   cd systemml-0.13.0-incubating-bin
+   echo "print('hello world');" > hello.dml
+   ./runStandaloneSystemML.sh hello.dml
cd ..
 
-   # verify distrib tgz works
-   tar -xvzf systemml-0.11.0-incubating.tgz
-   cd systemml-0.11.0-incubating
-   java -cp ../lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s 
"print('hello world');"
-
-   # verify spark batch mode
-   export SPARK_HOME=/Users/deroneriksson/spark-1.5.1-bin-hadoop2.6
-   $SPARK_HOME/bin/spark-submit SystemML.jar -s "print('hello world');" 
-exec hybrid_spark
-
-   # verify hadoop batch mode
-   hadoop jar SystemML.jar -s "print('hello world');"
-
-
-Here is an example of doing a basic
-sanity check on OS X after building the artifacts manually.
-
-   # build distribution artifacts
-   mvn clean package -P distribution
-
-   cd target
-
-   # verify main jar works
-   java -cp ./lib/*:systemml-0.11.0-incubating.jar 
org.apache.sysml.api.DMLScript -s "print('hello world');"
-
-   # verify SystemML.jar works
-   java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s 
"print('hello world');"
-
# verify src works
-   tar -xvzf systemml-0.11.0-incubating-src.tgz
-   cd systemml-0.11.0-incubating-src
+   tar -xvzf systemml-0.13.0-incubating-src.tgz
+   cd systemml-0.13.0-incubating-src
mvn clean package -P distribution
cd target/
-   java -cp ./lib/*:systemml-0.11.0-incubating.jar 
org.apache.sysml.api.DMLScript -s "print('hello world');"
-   java -cp ./lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s 
"print('hello world');"
-   cd ..
-   cd ..
-
-   # verify standalone tgz works
-   tar -xvzf systemml-0.11.0-incubating-standalone.tgz
-   cd systemml-0.11.0-incubating-standalone
-   echo "print('hello world');" > hello.dml
-   ./runStandaloneSystemML.sh hello.dml
-   cd ..
-
-   # verify distrib tgz works
-   tar -xvzf systemml-0.11.0-incubating.tgz
-   cd systemml-0.11.0-incubating
-   java -cp ../lib/*:SystemML.jar org.apache.sysml.api.DMLScript -s 
"print('hello world');"
+   java -cp "./lib/*:systemml-0.13.0-incubating.jar" 
org.apache.sysml

[1/2] incubator-systemml git commit: [SYSTEMML-1238] Updated the default parameters of mllearn to match that of scikit learn.

2017-02-23 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/gh-pages bb97a4bc6 -> 5c4e27c70


[SYSTEMML-1238] Updated the default parameters of mllearn to match that of
scikit learn.

- Also updated the test to compare our algorithm to scikit-learn.

Closes #398.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0fb74b94
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0fb74b94
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0fb74b94

Branch: refs/heads/gh-pages
Commit: 0fb74b94af9e244b5695745ac7b3651b485b812f
Parents: bb97a4b
Author: Niketan Pansare 
Authored: Fri Feb 17 14:54:23 2017 -0800
Committer: Niketan Pansare 
Committed: Fri Feb 17 14:59:49 2017 -0800

--
 algorithms-regression.md  | 8 
 beginners-guide-python.md | 2 +-
 python-reference.md   | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0fb74b94/algorithms-regression.md
--
diff --git a/algorithms-regression.md b/algorithms-regression.md
index 992862e..80b38a3 100644
--- a/algorithms-regression.md
+++ b/algorithms-regression.md
@@ -83,8 +83,8 @@ efficient when the number of features $m$ is relatively small
 
 {% highlight python %}
 from systemml.mllearn import LinearRegression
-# C = 1/reg
-lr = LinearRegression(sqlCtx, fit_intercept=True, C=1.0, solver='direct-solve')
+# C = 1/reg (to disable regularization, use float("inf"))
+lr = LinearRegression(sqlCtx, fit_intercept=True, normalize=False, 
C=float("inf"), solver='direct-solve')
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or 
SciPy Sparse Matrix
 y_test = lr.fit(X_train, y_train)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) 
and "label". df_test is a DataFrame that contains the column "features"
@@ -125,8 +125,8 @@ y_test = lr.fit(df_train)
 
 {% highlight python %}
 from systemml.mllearn import LinearRegression
-# C = 1/reg
-lr = LinearRegression(sqlCtx, fit_intercept=True, max_iter=100, tol=0.01, 
C=1.0, solver='newton-cg')
+# C = 1/reg (to disable regularization, use float("inf"))
+lr = LinearRegression(sqlCtx, fit_intercept=True, normalize=False, 
max_iter=100, tol=0.01, C=float("inf"), solver='newton-cg')
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrames or 
SciPy Sparse matrices
 y_test = lr.fit(X_train, y_train)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) 
and "label". df_test is a DataFrame that contains the column "features"

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0fb74b94/beginners-guide-python.md
--
diff --git a/beginners-guide-python.md b/beginners-guide-python.md
index 4d1b098..ffab09e 100644
--- a/beginners-guide-python.md
+++ b/beginners-guide-python.md
@@ -228,7 +228,7 @@ X_test = diabetes_X[-20:]
 y_train = diabetes.target[:-20]
 y_test = diabetes.target[-20:]
 # Create linear regression object
-regr = LinearRegression(sqlCtx, fit_intercept=True, C=1, solver='direct-solve')
+regr = LinearRegression(sqlCtx, fit_intercept=True, C=float("inf"), 
solver='direct-solve')
 # Train the model using the training sets
 regr.fit(X_train, y_train)
 y_predicted = regr.predict(X_test)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0fb74b94/python-reference.md
--
diff --git a/python-reference.md b/python-reference.md
index 65dcb5c..8d38598 100644
--- a/python-reference.md
+++ b/python-reference.md
@@ -731,7 +731,7 @@ LogisticRegression score: 0.92
 
 ### Reference documentation
 
- *class*`systemml.mllearn.estimators.LinearRegression`(*sqlCtx*, 
*fit\_intercept=True*, *max\_iter=100*, *tol=1e-06*, *C=1.0*, 
*solver='newton-cg'*, 
*transferUsingDF=False*)(#systemml.mllearn.estimators.LinearRegression 
"Permalink to this definition")
+ *class*`systemml.mllearn.estimators.LinearRegression`(*sqlCtx*, 
*fit\_intercept=True*, *normalize=False*, *max\_iter=100*, *tol=1e-06*, 
*C=float("inf")*, *solver='newton-cg'*, 
*transferUsingDF=False*)(#systemml.mllearn.estimators.LinearRegression 
"Permalink to this definition")
 :   Bases: `systemml.mllearn.estimators.BaseSystemMLRegressor`{.xref .py
 .py-class .docutils .literal}
 
@@ -760,7 +760,7 @@ LogisticRegression score: 0.92
 >>> # The mean square error
 >>> print("Residual sum of squares: %.2f" % 
np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2))
 
- *class*`systemml.mllearn.estimators.LogisticRegression`(*sqlCtx*, 
*penalty='l2'*, *fit\_intercept=True*, *max\_iter=100*, *

incubator-systemml git commit: Bug fixes, instruction added, async cudaFree

2017-02-24 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 0daae6cf0 -> 4316efeba


Bug fixes, instruction added, async cudaFree

- Fixes for GPU mem mgmt and related integration tests
- Added "exp" function for GPU
- Do cudaFree asynchronously

Closes #404


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4316efeb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4316efeb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4316efeb

Branch: refs/heads/master
Commit: 4316efebaf065d7a3de067354275d1b991e38bb4
Parents: 0daae6c
Author: Nakul Jindal 
Authored: Fri Feb 24 11:27:44 2017 -0800
Committer: Nakul Jindal 
Committed: Fri Feb 24 11:27:44 2017 -0800

--
 src/main/cpp/kernels/SystemML.cu|  15 ++
 src/main/cpp/kernels/SystemML.ptx   | 136 --
 .../java/org/apache/sysml/hops/UnaryOp.java |   2 +-
 .../context/ExecutionContext.java   |   8 +-
 .../instructions/GPUInstructionParser.java  |   3 +-
 .../gpu/BuiltinUnaryGPUInstruction.java |   2 +-
 .../gpu/ConvolutionGPUInstruction.java  |  16 +-
 .../instructions/gpu/GPUInstruction.java|  10 +-
 .../gpu/MatrixBuiltinGPUInstruction.java|  14 +-
 .../instructions/gpu/context/GPUContext.java|  17 +-
 .../instructions/gpu/context/GPUObject.java | 177 +++
 .../instructions/gpu/context/JCudaContext.java  |   1 +
 .../instructions/gpu/context/JCudaObject.java   | 161 ++---
 .../runtime/matrix/data/LibMatrixCUDA.java  | 103 +++
 .../java/org/apache/sysml/utils/Statistics.java |  23 ++-
 .../functions/misc/RewritePushdownUaggTest.java |  15 +-
 .../RewriteSimplifyRowColSumMVMultTest.java |   5 +-
 17 files changed, 483 insertions(+), 225 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4316efeb/src/main/cpp/kernels/SystemML.cu
--
diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu
index cda28ba..40a1046 100644
--- a/src/main/cpp/kernels/SystemML.cu
+++ b/src/main/cpp/kernels/SystemML.cu
@@ -628,3 +628,18 @@ __global__ void reduce_col_mean(double *g_idata, double 
*g_odata, unsigned int r
 MeanOp aop(rows);
 reduce_col(g_idata, g_odata, rows, cols, op, aop, 0.0);
 }
+
+
+/**
+ * Do an exp over all the elements of a matrix
+ * @param A the input matrix (of length = size)
+ * @param C the pre-allocated output matrix (of length = size)
+ * @param siz the length of the input and output matrices
+ */
+extern "C"
+__global__ void matrix_exp(double *A, double *C, unsigned int size) {
+int index = blockIdx.x * blockDim.x + threadIdx.x;
+if (index < size){
+C[index] = exp(A[index]);
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4316efeb/src/main/cpp/kernels/SystemML.ptx
--
diff --git a/src/main/cpp/kernels/SystemML.ptx 
b/src/main/cpp/kernels/SystemML.ptx
index 93f3879..b9efd9b 100644
--- a/src/main/cpp/kernels/SystemML.ptx
+++ b/src/main/cpp/kernels/SystemML.ptx
@@ -4810,6 +4810,120 @@ BB33_5:
ret;
 }
 
+   // .globl   matrix_exp
+.visible .entry matrix_exp(
+   .param .u64 matrix_exp_param_0,
+   .param .u64 matrix_exp_param_1,
+   .param .u32 matrix_exp_param_2
+)
+{
+   .reg .pred  %p<5>;
+   .reg .f32   %f<3>;
+   .reg .b32   %r<21>;
+   .reg .f64   %fd<42>;
+   .reg .b64   %rd<10>;
+
+
+   ld.param.u64%rd2, [matrix_exp_param_0];
+   ld.param.u64%rd3, [matrix_exp_param_1];
+   ld.param.u32%r5, [matrix_exp_param_2];
+   mov.u32 %r6, %ctaid.x;
+   mov.u32 %r7, %ntid.x;
+   mov.u32 %r8, %tid.x;
+   mad.lo.s32  %r1, %r7, %r6, %r8;
+   setp.ge.u32 %p1, %r1, %r5;
+   @%p1 braBB34_5;
+
+   cvta.to.global.u64  %rd4, %rd2;
+   cvt.s64.s32 %rd1, %r1;
+   mul.wide.s32%rd5, %r1, 8;
+   add.s64 %rd6, %rd4, %rd5;
+   ld.global.f64   %fd1, [%rd6];
+   mov.f64 %fd6, 0d3FF71547652B82FE;
+   mul.rn.f64  %fd7, %fd1, %fd6;
+   mov.f64 %fd8, 0d4338;
+   add.rn.f64  %fd9, %fd7, %fd8;
+   {
+   .reg .b32 %temp; 
+   mov.b64 {%r2, %temp}, %fd9;
+   }
+   mov.f64 %fd10, 0dC338;
+   add.rn.f64  %fd11, %fd9, %fd10;
+   mov.f64 %fd12, 0dBFE62E42FEFA39EF;
+   fma.rn.f64  %fd13, %fd11, %fd12, %fd1;
+   mov.f64 %fd14, 0dBC7ABC9E3B39803F;
+   fma.rn.f64  %fd15, %fd11, %fd14, %fd13;
+   mov.f64 %fd

incubator-systemml git commit: Upgraded to use jcuda8 (from the maven repo)

2017-03-03 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master ed3a15882 -> 3757995b5


Upgraded to use jcuda8 (from the maven repo)

Closes #291


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3757995b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3757995b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3757995b

Branch: refs/heads/master
Commit: 3757995b50aef019b0ce22d9ae93eae42aed02b4
Parents: ed3a158
Author: Nakul Jindal 
Authored: Fri Mar 3 18:11:45 2017 -0800
Committer: Nakul Jindal 
Committed: Fri Mar 3 18:11:46 2017 -0800

--
 docs/devdocs/gpu-backend.md |  61 +++---
 pom.xml | 195 +++
 .../runtime/matrix/data/LibMatrixCUDA.java  |  19 +-
 3 files changed, 195 insertions(+), 80 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3757995b/docs/devdocs/gpu-backend.md
--
diff --git a/docs/devdocs/gpu-backend.md b/docs/devdocs/gpu-backend.md
index c6f66d6..40311c7 100644
--- a/docs/devdocs/gpu-backend.md
+++ b/docs/devdocs/gpu-backend.md
@@ -19,52 +19,43 @@ limitations under the License.
 
 # Initial prototype for GPU backend
 
-A GPU backend implements two important abstract classes:
+The GPU backend implements two important abstract classes:
 1. `org.apache.sysml.runtime.controlprogram.context.GPUContext`
 2. `org.apache.sysml.runtime.controlprogram.context.GPUObject`
 
-The GPUContext is responsible for GPU memory management and 
initialization/destruction of Cuda handles.
+The `GPUContext` is responsible for GPU memory management and 
initialization/destruction of Cuda handles.
+Currently, an active instance of the `GPUContext` class is made available 
globally and is used to store handles
+of the allocated blocks on the GPU. A count is kept per block for the number 
of instructions that need it.
+When the count is 0, the block may be evicted on a call to `GPUObject.evict()`.
 
-A GPUObject (like RDDObject and BroadcastObject) is stored in CacheableData 
object. It gets call-backs from SystemML's bufferpool on following methods
+A `GPUObject` (like RDDObject and BroadcastObject) is stored in CacheableData 
object. It gets call-backs from SystemML's bufferpool on following methods
 1. void acquireDeviceRead()
-2. void acquireDenseDeviceModify(int numElemsToAllocate)
-3. void acquireHostRead()
-4. void acquireHostModify()
-5. void release(boolean isGPUCopyModified)
+2. void acquireDeviceModifyDense()
+3. void acquireDeviceModifySparse
+4. void acquireHostRead()
+5. void acquireHostModify()
+6. void releaseInput()
+7. void releaseOutput()
 
-## JCudaContext:
-The current prototype supports Nvidia's CUDA libraries using JCuda wrapper. 
The implementation for the above classes can be found in:
-1. `org.apache.sysml.runtime.controlprogram.context.JCudaContext`
-2. `org.apache.sysml.runtime.controlprogram.context.JCudaObject`
+Sparse matrices on GPU are represented in `CSR` format. In the SystemML 
runtime, they are represented in `MCSR` or modified `CSR` format.
+A conversion cost is incurred when sparse matrices are sent back and forth 
between host and device memory.
 
-### Setup instructions for JCudaContext:
+Concrete classes `JCudaContext` and `JCudaObject` (which extend `GPUContext` & 
`GPUObject` respectively) contain references to `org.jcuda.*`.
 
-1. Follow the instructions from `https://developer.nvidia.com/cuda-downloads` 
and install CUDA 7.5.
-2. Follow the instructions from `https://developer.nvidia.com/cudnn` and 
install CuDNN v4.
-3. Download install JCuda binaries version 0.7.5b and JCudnn version 0.7.5. 
Easiest option would be to use mavenized jcuda: 
-```python
-git clone https://github.com/MysterionRise/mavenized-jcuda.git
-mvn -Djcuda.version=0.7.5b -Djcudnn.version=0.7.5 clean package
-CURR_DIR=`pwd`
-JCUDA_PATH=$CURR_DIR"/target/lib/"
-JAR_PATH="."
-for j in `ls $JCUDA_PATH/*.jar`
-do
-JAR_PATH=$JAR_PATH":"$j
-done
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JCUDA_PATH
-```
+The `LibMatrixCUDA` class contains methods to invoke CUDA libraries (where 
available) and invoke custom kernels. 
+Runtime classes (that extend `GPUInstruction`) redirect calls to functions in 
this class.
+Some functions in `LibMatrixCUDA` need finer control over GPU memory 
management primitives. These are provided by `JCudaObject`.
+
+### Setup instructions:
 
-Note for Windows users:
-* CuDNN v4 is available to download: 
`http://developer.download.nvidia.com/compute/redist/cudnn/v4/cudnn-7.0-win-x64-v4.0-prod.zip`
-* If above steps doesn't work for JCuda, copy the DLLs into C:\lib (or /lib) 
directory.
+1. Follow the instructions from `https:/

incubator-systemml git commit: toString now prints NaN & Infinity like how as.scalar prints them

2017-03-06 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 8936e4f8a -> c7eebddb1


toString now prints NaN & Infinity like how as.scalar prints them

Closes #415


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c7eebddb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c7eebddb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c7eebddb

Branch: refs/heads/master
Commit: c7eebddb17820398ac5e8ee740c6944d893ec95a
Parents: 8936e4f
Author: Nakul Jindal 
Authored: Mon Mar 6 14:22:22 2017 -0800
Committer: Nakul Jindal 
Committed: Mon Mar 6 14:22:22 2017 -0800

--
 .../sysml/runtime/util/DataConverter.java   | 26 
 1 file changed, 21 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c7eebddb/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
--
diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java 
b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
index d4348b7..699a602 100644
--- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
@@ -779,6 +779,22 @@ public class DataConverter
System.arraycopy(mb.getDenseBlock(), 0, dest, destPos, 
rows*cols);
}
}
+   
+   /**
+* Convenience method to print NaN & Infinity compliant with how 
as.scalar prints them.
+* {@link DecimalFormat} prints NaN as \uFFFD and Infinity as \u221E
+* http://docs.oracle.com/javase/6/docs/api/java/text/DecimalFormat.html
+* @param dfThe {@link DecimalFormat} instance, constructed with 
the appropriate options
+* @param value The double value to print
+* @return  a string formatted with the {@link DecimalFormat} 
instance or "NaN" or "Infinity" or "-Infinity"
+*/
+   private static String dfFormat(DecimalFormat df, double value) {
+   if (Double.isNaN(value) || Double.isInfinite(value)){
+   return Double.toString(value);
+   } else {
+   return df.format(value);
+   }
+   }
 
public static String toString(MatrixBlock mb) {
return toString(mb, false, " ", "\n", mb.getNumRows(), 
mb.getNumColumns(), 3);
@@ -826,7 +842,7 @@ public class DataConverter
if (row < rowLength && col < colLength) 
{
// Print (row+1) and (col+1) 
since for a DML user, everything is 1-indexed

sb.append(row+1).append(separator).append(col+1).append(separator);
-   
sb.append(df.format(value)).append(lineseparator);
+   sb.append(dfFormat(df, 
value)).append(lineseparator);
}
}
} else {// Block is in dense format
@@ -835,7 +851,7 @@ public class DataConverter
double value = mb.getValue(i, 
j);
if (value != 0.0){

sb.append(i+1).append(separator).append(j+1).append(separator);
-   
sb.append(df.format(value)).append(lineseparator);
+   sb.append(dfFormat(df, 
value)).append(lineseparator);
}
}
}
@@ -845,11 +861,11 @@ public class DataConverter
for (int i=0; i

[5/5] incubator-systemml git commit: [SYSTEMML-1359] Added extra instrumentation for CUDA lib calls

2017-03-07 Thread nakul02

[SYSTEMML-1359] Added extra instrumentation for CUDA lib calls

- Added instrumentation around input copies & output allocations
- A config property is available to enable/disable advanced stats for
  DNN and GPU
- Minor refactoring and change of SystemML.cu function names

Closes #412


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4f9dcf9a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4f9dcf9a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4f9dcf9a

Branch: refs/heads/master
Commit: 4f9dcf9add6b9bdbc190d97efef9781e32772dd9
Parents: ee33ec6
Author: Nakul Jindal 
Authored: Tue Mar 7 10:50:47 2017 -0800
Committer: Nakul Jindal 
Committed: Tue Mar 7 10:50:47 2017 -0800

--
 conf/SystemML-config.xml.template   |6 +
 src/main/cpp/kernels/SystemML.cu|   37 +-
 src/main/cpp/kernels/SystemML.ptx   | 3923 +-
 .../java/org/apache/sysml/api/DMLScript.java|9 +-
 .../java/org/apache/sysml/conf/DMLConfig.java   |8 +-
 .../context/ExecutionContext.java   |   32 +-
 .../instructions/GPUInstructionParser.java  |2 +-
 .../gpu/AggregateBinaryGPUInstruction.java  |   13 +-
 .../gpu/AggregateUnaryGPUInstruction.java   |  109 +
 .../gpu/ConvolutionGPUInstruction.java  |   66 +-
 .../instructions/gpu/GPUInstruction.java|   91 +-
 .../instructions/gpu/MMTSJGPUInstruction.java   |8 +-
 .../gpu/MatrixBuiltinGPUInstruction.java|   12 +-
 .../MatrixMatrixArithmeticGPUInstruction.java   |   10 +-
 .../gpu/MatrixMatrixAxpyGPUInstruction.java |   10 +-
 .../instructions/gpu/ReorgGPUInstruction.java   |8 +-
 .../ScalarMatrixArithmeticGPUInstruction.java   |8 +-
 .../context/AggregateUnaryGPUInstruction.java   |  110 -
 .../instructions/gpu/context/GPUObject.java |   29 +-
 .../instructions/gpu/context/JCudaContext.java  |5 +-
 .../instructions/gpu/context/JCudaKernels.java  |3 +-
 .../instructions/gpu/context/JCudaObject.java   |  164 +-
 .../runtime/matrix/data/LibMatrixCUDA.java  | 1681 +---
 .../sysml/runtime/matrix/data/LibMatrixDNN.java |4 +-
 .../org/apache/sysml/utils/GPUStatistics.java   |  209 +
 .../java/org/apache/sysml/utils/Statistics.java |  116 +-
 26 files changed, 2782 insertions(+), 3891 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/conf/SystemML-config.xml.template
--
diff --git a/conf/SystemML-config.xml.template 
b/conf/SystemML-config.xml.template
index da80039..a4c7b2f 100644
--- a/conf/SystemML-config.xml.template
+++ b/conf/SystemML-config.xml.template
@@ -65,4 +65,10 @@


1
+
+   
+   false
+
+   
+   false
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/cpp/kernels/SystemML.cu
--
diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu
index 40a1046..7bb2c34 100644
--- a/src/main/cpp/kernels/SystemML.cu
+++ b/src/main/cpp/kernels/SystemML.cu
@@ -25,10 +25,15 @@ nvcc -ptx -arch=sm_30 SystemML.cu
 
 #include 
 
-// dim => rlen (Assumption: rlen == clen)
-// N = length of dense array
+
+/**
+ * Does a copy of upper to lower triangle of the given matrix
+ * @param ret the input and output array allocated on the GPU
+ * @param dim the number of rows of the square matrix ret
+ * @param N total number of elements of the matrix
+ */
 extern "C"
-__global__ void copyUpperToLowerTriangleDense(double* ret, int dim, int N) {
+__global__ void copy_u2l_dense(double* ret, int dim, int N) {
int ix = blockIdx.x * blockDim.x + threadIdx.x;
int iy = blockIdx.y * blockDim.y + threadIdx.y;
int id_dest = iy * dim + ix;
@@ -71,26 +76,6 @@ __forceinline__ __device__ double binaryOp(double x, double 
y, int op) {
 }
 
 extern "C"
-__global__ void dense_matrix_set(double* A,  double scalar, int rlen, int 
clen) {
-   int ix = blockIdx.x * blockDim.x + threadIdx.x;
-   int iy = blockIdx.y * blockDim.y + threadIdx.y;
-   int index = ix * clen + iy;
-   if(index < rlen*clen) {
-   A[index] = scalar;
-   }
-}
-
-extern "C"
-__global__ void dense_matrix_copy(double* A,  double* ret, int rlen, int clen) 
{
-   int ix = blockIdx.x * blockDim.x + threadIdx.x;
-   int iy = blockIdx.y * blockDim.y + threadIdx.y;
-   int index = ix * clen + iy;
-   if(ix < rlen && iy < clen) {
-   ret[index] = A[index];
-   }
-}
-
-extern "C"
 __global__ void relu(double* A,  double* ret, int rlen, int clen) {
int ix = blockIdx.x * blockDim.x + threadIdx.x;
int iy = blockIdx.y

[4/5] incubator-systemml git commit: [SYSTEMML-1359] Added extra instrumentation for CUDA lib calls

2017-03-07 Thread nakul02

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/cpp/kernels/SystemML.ptx
--
diff --git a/src/main/cpp/kernels/SystemML.ptx 
b/src/main/cpp/kernels/SystemML.ptx
index b9efd9b..efaf29b 100644
--- a/src/main/cpp/kernels/SystemML.ptx
+++ b/src/main/cpp/kernels/SystemML.ptx
@@ -1,16 +1,16 @@
 //
 // Generated by NVIDIA NVVM Compiler
 //
-// Compiler Build ID: CL-19856038
-// Cuda compilation tools, release 7.5, V7.5.17
+// Compiler Build ID: CL-21124049
+// Cuda compilation tools, release 8.0, V8.0.44
 // Based on LLVM 3.4svn
 //
 
-.version 4.3
+.version 5.0
 .target sm_30
 .address_size 64
 
-   // .globl   _Z6reduceI5SumOpEvPdS1_jT_d
+   // .globl   copy_u2l_dense
 .func  (.param .b64 func_retval0) __internal_accurate_pow
 (
.param .b64 __internal_accurate_pow_param_0,
@@ -19,1692 +19,10 @@
 ;
 .extern .shared .align 8 .b8 sdata[];
 
-.visible .func _Z6reduceI5SumOpEvPdS1_jT_d(
-   .param .b64 _Z6reduceI5SumOpEvPdS1_jT_d_param_0,
-   .param .b64 _Z6reduceI5SumOpEvPdS1_jT_d_param_1,
-   .param .b32 _Z6reduceI5SumOpEvPdS1_jT_d_param_2,
-   .param .align 1 .b8 _Z6reduceI5SumOpEvPdS1_jT_d_param_3[1],
-   .param .b64 _Z6reduceI5SumOpEvPdS1_jT_d_param_4
-)
-{
-   .reg .pred  %p<20>;
-   .reg .b32   %r<33>;
-   .reg .f64   %fd<79>;
-   .reg .b64   %rd<12>;
-
-
-   ld.param.u64%rd2, [_Z6reduceI5SumOpEvPdS1_jT_d_param_0];
-   ld.param.u64%rd3, [_Z6reduceI5SumOpEvPdS1_jT_d_param_1];
-   ld.param.u32%r5, [_Z6reduceI5SumOpEvPdS1_jT_d_param_2];
-   ld.param.f64%fd76, [_Z6reduceI5SumOpEvPdS1_jT_d_param_4];
-   mov.u32 %r6, %tid.x;
-   mov.u32 %r7, %ctaid.x;
-   shl.b32 %r8, %r7, 1;
-   mov.u32 %r9, %ntid.x;
-   mad.lo.s32  %r32, %r8, %r9, %r6;
-   setp.ge.u32 %p1, %r32, %r5;
-   @%p1 braBB0_5;
-
-   mov.f64 %fd77, %fd76;
-
-BB0_2:
-   mov.f64 %fd1, %fd77;
-   mul.wide.u32%rd4, %r32, 8;
-   add.s64 %rd5, %rd2, %rd4;
-   ld.f64  %fd29, [%rd5];
-   add.f64 %fd78, %fd1, %fd29;
-   add.s32 %r3, %r32, %r9;
-   setp.ge.u32 %p2, %r3, %r5;
-   @%p2 braBB0_4;
-
-   mul.wide.u32%rd6, %r3, 8;
-   add.s64 %rd7, %rd2, %rd6;
-   ld.f64  %fd30, [%rd7];
-   add.f64 %fd78, %fd78, %fd30;
-
-BB0_4:
-   mov.f64 %fd77, %fd78;
-   shl.b32 %r12, %r9, 1;
-   mov.u32 %r13, %nctaid.x;
-   mad.lo.s32  %r32, %r12, %r13, %r32;
-   setp.lt.u32 %p3, %r32, %r5;
-   mov.f64 %fd76, %fd77;
-   @%p3 braBB0_2;
-
-BB0_5:
-   mov.f64 %fd74, %fd76;
-   mul.wide.u32%rd8, %r6, 8;
-   mov.u64 %rd9, sdata;
-   add.s64 %rd1, %rd9, %rd8;
-   st.shared.f64   [%rd1], %fd74;
-   bar.sync0;
-   setp.lt.u32 %p4, %r9, 1024;
-   @%p4 braBB0_9;
-
-   setp.gt.u32 %p5, %r6, 511;
-   mov.f64 %fd75, %fd74;
-   @%p5 braBB0_8;
-
-   ld.shared.f64   %fd31, [%rd1+4096];
-   add.f64 %fd75, %fd74, %fd31;
-   st.shared.f64   [%rd1], %fd75;
-
-BB0_8:
-   mov.f64 %fd74, %fd75;
-   bar.sync0;
-
-BB0_9:
-   mov.f64 %fd72, %fd74;
-   setp.lt.u32 %p6, %r9, 512;
-   @%p6 braBB0_13;
-
-   setp.gt.u32 %p7, %r6, 255;
-   mov.f64 %fd73, %fd72;
-   @%p7 braBB0_12;
-
-   ld.shared.f64   %fd32, [%rd1+2048];
-   add.f64 %fd73, %fd72, %fd32;
-   st.shared.f64   [%rd1], %fd73;
-
-BB0_12:
-   mov.f64 %fd72, %fd73;
-   bar.sync0;
-
-BB0_13:
-   mov.f64 %fd70, %fd72;
-   setp.lt.u32 %p8, %r9, 256;
-   @%p8 braBB0_17;
-
-   setp.gt.u32 %p9, %r6, 127;
-   mov.f64 %fd71, %fd70;
-   @%p9 braBB0_16;
-
-   ld.shared.f64   %fd33, [%rd1+1024];
-   add.f64 %fd71, %fd70, %fd33;
-   st.shared.f64   [%rd1], %fd71;
-
-BB0_16:
-   mov.f64 %fd70, %fd71;
-   bar.sync0;
-
-BB0_17:
-   mov.f64 %fd68, %fd70;
-   setp.lt.u32 %p10, %r9, 128;
-   @%p10 bra   BB0_21;
-
-   setp.gt.u32 %p11, %r6, 63;
-   mov.f64 %fd69, %fd68;
-   @%p11 bra   BB0_20;
-
-   ld.shared.f64   %fd34, [%rd1+512];
-   add.f64 %fd69, %fd68, %fd34;
-   st.shared.f64   [%rd1], %fd69;
-
-BB0_20:
-   mov.f64 %fd68, %fd69;
-   bar.sync0;
-
-BB0_21:
-   mov.f64 %fd67, %fd68;
-   setp.gt.u32 %p12, %r6, 31;
-   @%p12 bra   BB0_34;
-
-   setp.lt.u32 %p13, %r9, 64;
-   @%p13 bra   BB0_24;
-
-   ld.volatile.shared.f64  %fd35, [%rd1+256];
-   add.f64 %fd67, %fd67, %

[2/5] incubator-systemml git commit: [SYSTEMML-1359] Added extra instrumentation for CUDA lib calls

2017-03-07 Thread nakul02

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 51a0f6b..1511afc 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -44,6 +44,7 @@ import static 
jcuda.jcudnn.JCudnn.cudnnSetConvolution2dDescriptor;
 import static jcuda.jcudnn.JCudnn.cudnnSetFilter4dDescriptor;
 import static jcuda.jcudnn.JCudnn.cudnnSetPooling2dDescriptor;
 import static jcuda.jcudnn.JCudnn.cudnnSetTensor4dDescriptor;
+import static jcuda.jcudnn.cudnnActivationMode.CUDNN_ACTIVATION_RELU;
 import static jcuda.jcudnn.cudnnConvolutionMode.CUDNN_CROSS_CORRELATION;
 import static jcuda.jcudnn.cudnnDataType.CUDNN_DATA_DOUBLE;
 import static jcuda.jcudnn.cudnnNanPropagation.CUDNN_PROPAGATE_NAN;
@@ -55,23 +56,61 @@ import static 
jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_NON_TRANSPOSE
 import static jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_TRANSPOSE;
 import static jcuda.runtime.JCuda.cudaDeviceSynchronize;
 import static jcuda.runtime.JCuda.cudaMemcpy;
+import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice;
 import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost;
 import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice;
-import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice;
-import static jcuda.jcudnn.cudnnActivationMode.CUDNN_ACTIVATION_RELU;
 import static 
org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.allocate;
 import static 
org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.cudaFreeHelper;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
-import org.apache.sysml.runtime.functionobjects.*;
+import org.apache.sysml.runtime.functionobjects.And;
+import org.apache.sysml.runtime.functionobjects.Builtin;
+import org.apache.sysml.runtime.functionobjects.CM;
+import org.apache.sysml.runtime.functionobjects.Divide;
+import org.apache.sysml.runtime.functionobjects.Equals;
+import org.apache.sysml.runtime.functionobjects.GreaterThan;
+import org.apache.sysml.runtime.functionobjects.GreaterThanEquals;
+import org.apache.sysml.runtime.functionobjects.IndexFunction;
+import org.apache.sysml.runtime.functionobjects.KahanPlus;
+import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
+import org.apache.sysml.runtime.functionobjects.LessThan;
+import org.apache.sysml.runtime.functionobjects.LessThanEquals;
+import org.apache.sysml.runtime.functionobjects.Mean;
+import org.apache.sysml.runtime.functionobjects.Minus;
+import org.apache.sysml.runtime.functionobjects.Multiply;
+import org.apache.sysml.runtime.functionobjects.Multiply2;
+import org.apache.sysml.runtime.functionobjects.NotEquals;
+import org.apache.sysml.runtime.functionobjects.Or;
+import org.apache.sysml.runtime.functionobjects.Plus;
+import org.apache.sysml.runtime.functionobjects.Power;
+import org.apache.sysml.runtime.functionobjects.Power2;
+import org.apache.sysml.runtime.functionobjects.ReduceAll;
+import org.apache.sysml.runtime.functionobjects.ReduceCol;
+import org.apache.sysml.runtime.functionobjects.ReduceDiag;
+import org.apache.sysml.runtime.functionobjects.ReduceRow;
+import org.apache.sysml.runtime.functionobjects.ValueFunction;
 import org.apache.sysml.runtime.instructions.cp.DoubleObject;
-import org.apache.sysml.runtime.instructions.gpu.context.*;
+import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
+import org.apache.sysml.runtime.instructions.gpu.context.ExecutionConfig;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
+import org.apache.sysml.runtime.instructions.gpu.context.JCudaContext;
+import org.apache.sysml.runtime.instructions.gpu.context.JCudaKernels;
+import org.apache.sysml.runtime.instructions.gpu.context.JCudaObject;
 import 
org.apache.sysml.runtime.instructions.gpu.context.JCudaObject.CSRPointer;
-import org.apache.sysml.runtime.matrix.operators.*;
+import org.apache.sysml.runtime.matrix.operators.AggregateOperator;
+import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
+import org.apache.sysml.runtime.matrix.operators.BinaryOperator;
+import org.apache.sysml.runtime.matrix.operators.CMOperator;
+import org.apache.sysml.runtime.matrix.operators.LeftScalarOperator;
+import org.apache.sysml.runtime.matrix.operators.RightScalarOperator;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+import org.apache.sysml.utils.GPUStatistic

[3/5] incubator-systemml git commit: [SYSTEMML-1359] Added extra instrumentation for CUDA lib calls

2017-03-07 Thread nakul02

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/api/DMLScript.java
--
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java 
b/src/main/java/org/apache/sysml/api/DMLScript.java
index 97597e0..798e74e 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -78,6 +78,8 @@ import 
org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
 import 
org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
 import org.apache.sysml.runtime.matrix.CleanupMR;
+import org.apache.sysml.runtime.matrix.data.LibMatrixCUDA;
+import org.apache.sysml.runtime.matrix.data.LibMatrixDNN;
 import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
 import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
 import org.apache.sysml.runtime.util.LocalFileUtils;
@@ -85,6 +87,7 @@ import org.apache.sysml.runtime.util.MapReduceTool;
 import org.apache.sysml.utils.Explain;
 import org.apache.sysml.utils.Explain.ExplainCounts;
 import org.apache.sysml.utils.Explain.ExplainType;
+import org.apache.sysml.utils.GPUStatistics;
 import org.apache.sysml.utils.Statistics;
 import org.apache.sysml.yarn.DMLAppMasterUtils;
 import org.apache.sysml.yarn.DMLYarnClientProxy;
@@ -646,7 +649,11 @@ public class DMLScript

//double costs = CostEstimationWrapper.getTimeEstimate(rtprog, 
ExecutionContextFactory.createContext());
//System.out.println("Estimated costs: "+costs);
-   
+
+   // Whether extra statistics useful for developers and others 
interested in digging
+   // into performance problems are recorded and displayed
+   GPUStatistics.DISPLAY_STATISTICS = 
dmlconf.getBooleanValue(DMLConfig.EXTRA_GPU_STATS);
+   LibMatrixDNN.DISPLAY_STATISTICS = 
dmlconf.getBooleanValue(DMLConfig.EXTRA_DNN_STATS);

//Step 10: execute runtime program
Statistics.startRunTimer();

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/conf/DMLConfig.java
--
diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java 
b/src/main/java/org/apache/sysml/conf/DMLConfig.java
index 3d0fb28..a42b1ca 100644
--- a/src/main/java/org/apache/sysml/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java
@@ -74,6 +74,8 @@ public class DMLConfig
public static final String CODEGEN  = "codegen.enabled"; 
//boolean
public static final String CODEGEN_PLANCACHE= "codegen.plancache"; 
//boolean
public static final String CODEGEN_LITERALS = "codegen.literals"; 
//1..heuristic, 2..always
+   public static final String EXTRA_GPU_STATS  = 
"systemml.stats.extraGPU"; //boolean
+   public static final String EXTRA_DNN_STATS  = 
"systemml.stats.extraDNN"; //boolean
 
// Fraction of available memory to use. The available memory is 
computer when the JCudaContext is created
// to handle the tradeoff on calling cudaMemGetInfo too often.
@@ -114,7 +116,10 @@ public class DMLConfig
_defaultVals.put(CODEGEN,"false" );
_defaultVals.put(CODEGEN_PLANCACHE,  "true" );
_defaultVals.put(CODEGEN_LITERALS,   "1" );
-   
+
+   _defaultVals.put(EXTRA_GPU_STATS,   "false" );
+   _defaultVals.put(EXTRA_DNN_STATS,   "false" );
+
_defaultVals.put(GPU_MEMORY_UTILIZATION_FACTOR,  "0.9" );
_defaultVals.put(REFRESH_AVAILABLE_MEMORY_EVERY_TIME,  
"true" );
}
@@ -402,6 +407,7 @@ public class DMLConfig
YARN_APPMASTER, YARN_APPMASTERMEM, 
YARN_MAPREDUCEMEM, 
CP_PARALLEL_MATRIXMULT, CP_PARALLEL_TEXTIO,
COMPRESSED_LINALG, CODEGEN, CODEGEN_LITERALS, 
CODEGEN_PLANCACHE,
+   EXTRA_GPU_STATS, EXTRA_DNN_STATS
}; 

StringBuilder sb = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
index f14123e..6455add 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
+++ 
b/src/main/

[1/5] incubator-systemml git commit: [SYSTEMML-1359] Added extra instrumentation for CUDA lib calls

2017-03-07 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master ee33ec62d -> 4f9dcf9ad


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 0c0410c..b46985f 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -50,10 +50,10 @@ public class LibMatrixDNN {

//library configurations and external contracts
public static final boolean SUPPORTS_SPARSE_OUTPUTS = false; 
//operations able to handle sparse outputs 
-   private static final boolean DISPLAY_STATISTICS = false; //conv2d 
summaries in stats output
private static final boolean ALLOW_MULTI_THREADED_OPS = true; //enable 
multi-threading in cp
private static final int NUM_TASK_FACTOR = 2; //number of tasks is 
vcores scaled by this factor
-   
+   public static boolean DISPLAY_STATISTICS = false; //conv2d summaries in 
stats output
+
private enum TaskType {
MaxPooling_Forward, MaxPooling_Backward, 
// Alternate approaches that we tried but the performance was 
unsatisfactory be included: direct, non-looped im2col

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f9dcf9a/src/main/java/org/apache/sysml/utils/GPUStatistics.java
--
diff --git a/src/main/java/org/apache/sysml/utils/GPUStatistics.java 
b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
new file mode 100644
index 000..044901b
--- /dev/null
+++ b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.utils;
+
+import org.apache.sysml.api.DMLScript;
+
+import java.util.*;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Measures performance numbers when GPU mode is enabled
+ * Printed as part of {@link Statistics}.
+ */
+public class GPUStatistics {
+
+  // Whether or not extra per-instruction statistics will be recorded and 
shown for the GPU
+  public static boolean DISPLAY_STATISTICS = false;
+
+  private static int iNoOfExecutedGPUInst = 0;
+
+  public static long cudaInitTime = 0;
+  public static long cudaLibrariesInitTime = 0;
+  public static AtomicLong cudaSparseToDenseTime = new AtomicLong(0);  
// time spent in converting sparse matrix block to dense
+  public static AtomicLong cudaDenseToSparseTime = new AtomicLong(0);  
// time spent in converting dense matrix block to sparse
+  public static AtomicLong cudaSparseConversionTime = new AtomicLong(0);   
// time spent in converting between sparse block types
+  public static AtomicLong cudaSparseToDenseCount = new AtomicLong(0);
+  public static AtomicLong cudaDenseToSparseCount = new AtomicLong(0);
+  public static AtomicLong cudaSparseConversionCount = new AtomicLong(0);
+
+  public static AtomicLong cudaAllocTime = new AtomicLong(0); // 
time spent in allocating memory on the GPU
+  public static AtomicLong cudaDeAllocTime = new AtomicLong(0);   // 
time spent in deallocating memory on the GPU
+  public static AtomicLong cudaToDevTime = new AtomicLong(0); // 
time spent in copying data from host (CPU) to device (GPU) memory
+  public static AtomicLong cudaFromDevTime = new AtomicLong(0);   // 
time spent in copying data from device to host
+  public static AtomicLong cudaAllocCount = new AtomicLong(0);
+  public static AtomicLong cudaDeAllocCount = new AtomicLong(0);
+  public static AtomicLong cudaToDevCount = new AtomicLong(0);
+  public static AtomicLong cudaFromDevCount = new AtomicLong(0);
+  public static AtomicLong cudaEvictionCount = new AtomicLong(0);
+
+  // Per instruction miscellaneous timers.
+  // Used to record events in a CP Heavy Hitter instruction and
+  // provide a breakdown of how t

incubator-systemml git commit: [SYSTEMML-942] added gpu option to MLContext API

2017-03-07 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 6f4d8762d -> 6b1572e4b


[SYSTEMML-942] added gpu option to MLContext API

Additionally,
- Changed initialization of CUDA libraries from static to per instance
- Added documentation to mlcontext programming guide

Closes #420


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/6b1572e4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/6b1572e4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/6b1572e4

Branch: refs/heads/master
Commit: 6b1572e4bba31619c5bed19fd0c106d2e759f159
Parents: 6f4d876
Author: Nakul Jindal 
Authored: Tue Mar 7 13:41:03 2017 -0800
Committer: Nakul Jindal 
Committed: Tue Mar 7 13:41:03 2017 -0800

--
 docs/spark-mlcontext-programming-guide.md   | 90 
 .../apache/sysml/api/mlcontext/MLContext.java   | 25 ++
 .../sysml/api/mlcontext/ScriptExecutor.java | 26 +-
 .../instructions/gpu/context/GPUContext.java|  2 +-
 .../instructions/gpu/context/JCudaContext.java  | 42 -
 5 files changed, 163 insertions(+), 22 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6b1572e4/docs/spark-mlcontext-programming-guide.md
--
diff --git a/docs/spark-mlcontext-programming-guide.md 
b/docs/spark-mlcontext-programming-guide.md
index c15c27f..c28eaf5 100644
--- a/docs/spark-mlcontext-programming-guide.md
+++ b/docs/spark-mlcontext-programming-guide.md
@@ -1086,6 +1086,96 @@ mean: Double = 0.5002109404821844
 
 
 
+## GPU
+
+If the driver node has a GPU, SystemML may be able to utilize it, subject to 
memory constraints and what instructions are used in the dml script
+
+
+
+
+{% highlight scala %}
+ml.setGPU(true)
+ml.setStatistics(true)
+val matMultScript = dml("""
+A = rand(rows=10, cols=1000)
+B = rand(rows=1000, cols=10)
+C = A %*% B
+print(toString(C))
+""")
+ml.execute(matMultScript)
+{% endhighlight %}
+
+
+
+{% highlight scala %}
+scala> ml.setGPU(true)
+
+scala> ml.setStatistics(true)
+
+scala> val matMultScript = dml("""
+ | A = rand(rows=10, cols=1000)
+ | B = rand(rows=1000, cols=10)
+ | C = A %*% B
+ | print(toString(C))
+ | """)
+matMultScript: org.apache.sysml.api.mlcontext.Script =
+Inputs:
+None
+
+Outputs:
+None
+
+scala> ml.execute(matMultScript)
+249.977 238.545 233.700 234.489 248.556 244.423 249.051 255.043 249.117 251.605
+249.226 248.680 245.532 238.258 254.451 249.827 260.957 251.273 250.577 257.571
+258.703 246.969 243.463 246.547 250.784 251.758 251.654 258.318 251.817 254.097
+248.788 242.960 230.920 244.026 249.159 247.998 251.330 254.718 248.013 255.706
+253.251 248.788 235.785 242.941 252.096 248.675 256.865 251.677 252.872 250.490
+256.087 245.035 234.124 238.307 248.630 252.522 251.122 251.577 249.171 247.974
+245.419 243.114 232.262 239.776 249.583 242.351 250.972 249.244 246.729 251.807
+250.081 242.367 230.334 240.955 248.332 240.730 246.940 250.396 244.107 249.729
+247.368 239.882 234.353 237.087 252.337 248.801 246.627 249.077 244.305 245.621
+252.827 257.352 239.546 246.529 258.916 255.612 260.480 254.805 252.695 257.531
+
+SystemML Statistics:
+Total elapsed time:0.000 sec.
+Total compilation time:0.000 sec.
+Total execution time:  0.000 sec.
+Number of compiled Spark inst: 0.
+Number of executed Spark inst: 0.
+CUDA/CuLibraries init time:0.000/0.003 sec.
+Number of executed GPU inst:   8.
+GPU mem tx time  (alloc/dealloc/toDev/fromDev):0.003/0.002/0.010/0.002 
sec.
+GPU mem tx count (alloc/dealloc/toDev/fromDev/evict):  24/24/0/16/8/0.
+GPU conversion time  (sparseConv/sp2dense/dense2sp):   0.000/0.000/0.000 sec.
+GPU conversion count (sparseConv/sp2dense/dense2sp):   0/0/0.
+Cache hits (Mem, WB, FS, HDFS):40/0/0/0.
+Cache writes (WB, FS, HDFS):   21/0/0.
+Cache times (ACQr/m, RLS, EXP):0.002/0.002/0.003/0.000 sec.
+HOP DAGs recompiled (PRED, SB):0/0.
+HOP DAGs recompile time:   0.000 sec.
+Spark ctx create time (lazy):  0.000 sec.
+Spark trans counts (par,bc,col):0/0/0.
+Spark trans times (par,bc,col):0.000/0.000/0.000 secs.
+Total JIT compile time:11.426 sec.
+Total JVM GC count:20.
+Total JVM GC time: 1.078 sec.
+Heavy hitter instructions (name, time, count):
+-- 1)  toString0.085 sec   8
+-- 2)  rand0.027 sec   16
+-- 3)  gpu_ba+*0.018 sec   8
+-- 4)  print   0.006 sec   8
+-- 5)  createvar   0.003 sec   24
+-- 6)  rmvar   0.003 sec   40
+
+res20: org.apache.sysml.api.mlcontext.MLResults =
+None
+{% endhighlight %}
+
+
+
+
+Note that GPU instructions show up prepended with a "gpu" in the statistics.

incubator-systemml git commit: [SYSTEMML-942] added gpu option to MLContext API

2017-03-07 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/gh-pages 4ec1b9f40 -> 42e86e76c


[SYSTEMML-942] added gpu option to MLContext API

Additionally,
- Changed initialization of CUDA libraries from static to per instance
- Added documentation to mlcontext programming guide

Closes #420


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/42e86e76
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/42e86e76
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/42e86e76

Branch: refs/heads/gh-pages
Commit: 42e86e76c1e324f53351fe5866ce5675482df15a
Parents: 4ec1b9f
Author: Nakul Jindal 
Authored: Tue Mar 7 13:41:03 2017 -0800
Committer: Nakul Jindal 
Committed: Tue Mar 7 13:41:03 2017 -0800

--
 spark-mlcontext-programming-guide.md | 90 +++
 1 file changed, 90 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/42e86e76/spark-mlcontext-programming-guide.md
--
diff --git a/spark-mlcontext-programming-guide.md 
b/spark-mlcontext-programming-guide.md
index c15c27f..c28eaf5 100644
--- a/spark-mlcontext-programming-guide.md
+++ b/spark-mlcontext-programming-guide.md
@@ -1086,6 +1086,96 @@ mean: Double = 0.5002109404821844
 
 
 
+## GPU
+
+If the driver node has a GPU, SystemML may be able to utilize it, subject to 
memory constraints and what instructions are used in the dml script
+
+
+
+
+{% highlight scala %}
+ml.setGPU(true)
+ml.setStatistics(true)
+val matMultScript = dml("""
+A = rand(rows=10, cols=1000)
+B = rand(rows=1000, cols=10)
+C = A %*% B
+print(toString(C))
+""")
+ml.execute(matMultScript)
+{% endhighlight %}
+
+
+
+{% highlight scala %}
+scala> ml.setGPU(true)
+
+scala> ml.setStatistics(true)
+
+scala> val matMultScript = dml("""
+ | A = rand(rows=10, cols=1000)
+ | B = rand(rows=1000, cols=10)
+ | C = A %*% B
+ | print(toString(C))
+ | """)
+matMultScript: org.apache.sysml.api.mlcontext.Script =
+Inputs:
+None
+
+Outputs:
+None
+
+scala> ml.execute(matMultScript)
+249.977 238.545 233.700 234.489 248.556 244.423 249.051 255.043 249.117 251.605
+249.226 248.680 245.532 238.258 254.451 249.827 260.957 251.273 250.577 257.571
+258.703 246.969 243.463 246.547 250.784 251.758 251.654 258.318 251.817 254.097
+248.788 242.960 230.920 244.026 249.159 247.998 251.330 254.718 248.013 255.706
+253.251 248.788 235.785 242.941 252.096 248.675 256.865 251.677 252.872 250.490
+256.087 245.035 234.124 238.307 248.630 252.522 251.122 251.577 249.171 247.974
+245.419 243.114 232.262 239.776 249.583 242.351 250.972 249.244 246.729 251.807
+250.081 242.367 230.334 240.955 248.332 240.730 246.940 250.396 244.107 249.729
+247.368 239.882 234.353 237.087 252.337 248.801 246.627 249.077 244.305 245.621
+252.827 257.352 239.546 246.529 258.916 255.612 260.480 254.805 252.695 257.531
+
+SystemML Statistics:
+Total elapsed time:0.000 sec.
+Total compilation time:0.000 sec.
+Total execution time:  0.000 sec.
+Number of compiled Spark inst: 0.
+Number of executed Spark inst: 0.
+CUDA/CuLibraries init time:0.000/0.003 sec.
+Number of executed GPU inst:   8.
+GPU mem tx time  (alloc/dealloc/toDev/fromDev):0.003/0.002/0.010/0.002 
sec.
+GPU mem tx count (alloc/dealloc/toDev/fromDev/evict):  24/24/0/16/8/0.
+GPU conversion time  (sparseConv/sp2dense/dense2sp):   0.000/0.000/0.000 sec.
+GPU conversion count (sparseConv/sp2dense/dense2sp):   0/0/0.
+Cache hits (Mem, WB, FS, HDFS):40/0/0/0.
+Cache writes (WB, FS, HDFS):   21/0/0.
+Cache times (ACQr/m, RLS, EXP):0.002/0.002/0.003/0.000 sec.
+HOP DAGs recompiled (PRED, SB):0/0.
+HOP DAGs recompile time:   0.000 sec.
+Spark ctx create time (lazy):  0.000 sec.
+Spark trans counts (par,bc,col):0/0/0.
+Spark trans times (par,bc,col):0.000/0.000/0.000 secs.
+Total JIT compile time:11.426 sec.
+Total JVM GC count:20.
+Total JVM GC time: 1.078 sec.
+Heavy hitter instructions (name, time, count):
+-- 1)  toString0.085 sec   8
+-- 2)  rand0.027 sec   16
+-- 3)  gpu_ba+*0.018 sec   8
+-- 4)  print   0.006 sec   8
+-- 5)  createvar   0.003 sec   24
+-- 6)  rmvar   0.003 sec   40
+
+res20: org.apache.sysml.api.mlcontext.MLResults =
+None
+{% endhighlight %}
+
+
+
+
+Note that GPU instructions show up prepended with a "gpu" in the statistics.
 
 ## Explain

incubator-systemml git commit: [SYSTEMML-1396] cudaFree is called lazily instead of asynchronously

2017-03-13 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 9137f7b02 -> 80225f014


[SYSTEMML-1396] cudaFree is called lazily instead of asynchronously

- If a block is available in a free list of the exact matching size, it
  is memset to 0 and allocated, otherwise, the LRU blocks are cudaFree-ed
  until enough memory is available on the GPU
- Fixed timers
- bug fix for SYSTEMML-1340,1341 related to redundantly releasing an
  input block on the GPU

Closes #426


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/80225f01
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/80225f01
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/80225f01

Branch: refs/heads/master
Commit: 80225f014338d9671aaf187186f117feb5d0c093
Parents: 9137f7b
Author: Nakul Jindal 
Authored: Mon Mar 13 13:55:39 2017 -0700
Committer: Nakul Jindal 
Committed: Mon Mar 13 13:55:39 2017 -0700

--
 .../instructions/gpu/GPUInstruction.java|   2 +
 .../instructions/gpu/context/GPUContext.java|  19 +-
 .../instructions/gpu/context/GPUObject.java |  76 ++--
 .../instructions/gpu/context/JCudaContext.java  |  52 ++-
 .../instructions/gpu/context/JCudaObject.java   | 353 +++
 .../runtime/matrix/data/LibMatrixCUDA.java  | 186 +-
 .../org/apache/sysml/utils/GPUStatistics.java   |  10 +-
 .../org/apache/sysml/utils/LRUCacheMap.java |  71 
 .../sysml/test/utils/LRUCacheMapTest.java   | 102 ++
 9 files changed, 537 insertions(+), 334 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/80225f01/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
index dcb2edc..04a2f1a 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
@@ -44,6 +44,8 @@ public abstract class GPUInstruction extends Instruction
public final static String MISC_TIMER_CUDA_FREE =   
"f";// time spent in calling cudaFree
public final static String MISC_TIMER_ALLOCATE =
"a";// time spent to allocate 
memory on gpu
public final static String MISC_TIMER_ALLOCATE_DENSE_OUTPUT = "ao"; 
// time spent to allocate dense output (recorded differently than 
MISC_TIMER_ALLOCATE)
+   public final static String MISC_TIMER_SET_ZERO =
"az";   // time spent to allocate
+   public final static String MISC_TIMER_REUSE =   
"r";// time spent in reusing 
already allocated memory on GPU (mainly for the count)
 
// Matmult instructions
public final static String MISC_TIMER_SPARSE_ALLOCATE_LIB = 
"Msao"; // time spend in allocating for 
sparse matrix output

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/80225f01/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
index b792882..f076350 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
@@ -18,11 +18,6 @@
  */
 package org.apache.sysml.runtime.instructions.gpu.context;
 
-import java.util.ArrayList;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
@@ -33,18 +28,6 @@ import 
org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 @SuppressWarnings("rawtypes")
 public abstract class GPUContext {
 
-   public static ArrayList allocatedPointers = new 
ArrayList();
-
-   /** cudaFree calls are done asynchronously on a separate thread,
-*  this list preserve the list of currently happening cudaFree calls */
-   public static ConcurrentLinkedQueue pendingDeallocates = new 
ConcurrentLinkedQueue();
-
-   /*

incubator-systemml git commit: [HOTFIX] added missing license, removed missing exception in javadoc

2017-03-13 Thread nakul02

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 80225f014 -> 95f300d9d


[HOTFIX] added missing license,removed missing exception in javadoc


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/95f300d9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/95f300d9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/95f300d9

Branch: refs/heads/master
Commit: 95f300d9d18801f585e579227c4123c475eb5c9c
Parents: 80225f0
Author: Nakul Jindal 
Authored: Mon Mar 13 14:21:49 2017 -0700
Committer: Nakul Jindal 
Committed: Mon Mar 13 14:21:49 2017 -0700

--
 .../instructions/gpu/context/JCudaObject.java |  1 -
 .../java/org/apache/sysml/utils/LRUCacheMap.java  | 18 ++
 .../apache/sysml/test/utils/LRUCacheMapTest.java  | 18 ++
 3 files changed, 36 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/95f300d9/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaObject.java
--
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaObject.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaObject.java
index 31664f6..92284d0 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaObject.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/JCudaObject.java
@@ -1209,7 +1209,6 @@ public class JCudaObject extends GPUObject {
 * does lazy/eager cudaFree calls
 * @param toFree {@link Pointer} instance to be freed
 * @param eager true if to be done eagerly
-* @throws DMLRuntimeException
 */
public static void cudaFreeHelper(final Pointer toFree, boolean eager) {
cudaFreeHelper(null, toFree, eager);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/95f300d9/src/main/java/org/apache/sysml/utils/LRUCacheMap.java
--
diff --git a/src/main/java/org/apache/sysml/utils/LRUCacheMap.java 
b/src/main/java/org/apache/sysml/utils/LRUCacheMap.java
index 0c8449a..64560af 100644
--- a/src/main/java/org/apache/sysml/utils/LRUCacheMap.java
+++ b/src/main/java/org/apache/sysml/utils/LRUCacheMap.java
@@ -1,3 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.sysml.utils;
 
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/95f300d9/src/test/java/org/apache/sysml/test/utils/LRUCacheMapTest.java
--
diff --git a/src/test/java/org/apache/sysml/test/utils/LRUCacheMapTest.java 
b/src/test/java/org/apache/sysml/test/utils/LRUCacheMapTest.java
index d076266..3cf7e76 100644
--- a/src/test/java/org/apache/sysml/test/utils/LRUCacheMapTest.java
+++ b/src/test/java/org/apache/sysml/test/utils/LRUCacheMapTest.java
@@ -1,3 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.sysml.test.utils;
 
 import org.apache.sysml.utils.LRUCacheMap;

90 matches

Mail list logo