[2/2] incubator-systemml git commit: [SYSTEMML-1632] Support loading and saving models via mllearn

2017-05-25 Thread niketanpansare
[SYSTEMML-1632] Support loading and saving models via mllearn

- Also, updated documentation and fixed bugs.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c067a585
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c067a585
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c067a585

Branch: refs/heads/gh-pages
Commit: c067a5858f0e9360513c4c6be9e3a1940eb3b87a
Parents: b31ebbf
Author: Niketan Pansare 
Authored: Thu May 25 22:32:02 2017 -0700
Committer: Niketan Pansare 
Committed: Thu May 25 22:32:02 2017 -0700

--
 algorithms-classification.md |  52 +--
 algorithms-regression.md |   8 +-
 beginners-guide-caffe2dml.md | 264 +++
 beginners-guide-python.md|  33 +-
 native-backend.md|   4 +-
 python-reference.md  | 929 +-
 6 files changed, 455 insertions(+), 835 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c067a585/algorithms-classification.md
--
diff --git a/algorithms-classification.md b/algorithms-classification.md
index ed56c34..04c5eb8 100644
--- a/algorithms-classification.md
+++ b/algorithms-classification.md
@@ -131,7 +131,7 @@ Eqs. (1) and (2).
 {% highlight python %}
 from systemml.mllearn import LogisticRegression
 # C = 1/reg
-logistic = LogisticRegression(sqlCtx, fit_intercept=True, max_iter=100, 
max_inner_iter=0, tol=0.01, C=1.0)
+logistic = LogisticRegression(spark, fit_intercept=True, max_iter=100, 
max_inner_iter=0, tol=0.01, C=1.0)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or 
SciPy Sparse Matrix
 y_test = logistic.fit(X_train, y_train).predict(X_test)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) 
and "label". df_test is a DataFrame that contains the column "features"
@@ -229,6 +229,8 @@ if no maximum limit provided
 `mm`, or `csv`; see read/write functions in
 SystemML Language Reference for details.
 
+Please see [mllearn 
documentation](https://apache.github.io/incubator-systemml/python-reference#mllearn-api)
 for
+more details on the Python API. 
 
 ### Examples
 
@@ -255,9 +257,7 @@ print('LogisticRegression score: %f' % 
logistic.fit(X_train, y_train).score(X_te
 from pyspark.ml import Pipeline
 from systemml.mllearn import LogisticRegression
 from pyspark.ml.feature import HashingTF, Tokenizer
-from pyspark.sql import SQLContext
-sqlCtx = SQLContext(sc)
-training = sqlCtx.createDataFrame([
+training = spark.createDataFrame([
 (0L, "a b c d e spark", 1.0),
 (1L, "b d", 2.0),
 (2L, "spark f g h", 1.0),
@@ -273,10 +273,10 @@ training = sqlCtx.createDataFrame([
 ], ["id", "text", "label"])
 tokenizer = Tokenizer(inputCol="text", outputCol="words")
 hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
-lr = LogisticRegression(sqlCtx)
+lr = LogisticRegression(spark)
 pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
 model = pipeline.fit(training)
-test = sqlCtx.createDataFrame([
+test = spark.createDataFrame([
 (12L, "spark i j k"),
 (13L, "l m n"),
 (14L, "mapreduce spark"),
@@ -290,7 +290,7 @@ prediction.show()
 import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
 import org.apache.sysml.api.ml.LogisticRegression
 import org.apache.spark.ml.Pipeline
-val training = sqlContext.createDataFrame(Seq(
+val training = spark.createDataFrame(Seq(
 ("a b c d e spark", 1.0),
 ("b d", 2.0),
 ("spark f g h", 1.0),
@@ -308,7 +308,7 @@ val hashingTF = new 
HashingTF().setNumFeatures(20).setInputCol(tokenizer.getOutp
 val lr = new LogisticRegression("logReg", sc)
 val pipeline = new Pipeline().setStages(Array(tokenizer, hashingTF, lr))
 val model = pipeline.fit(training)
-val test = sqlContext.createDataFrame(Seq(
+val test = spark.createDataFrame(Seq(
 ("spark i j k", 1.0),
 ("l m n", 2.0),
 ("mapreduce spark", 1.0),
@@ -500,7 +500,7 @@ support vector machine (`y` with domain size `2`).
 {% highlight python %}
 from systemml.mllearn import SVM
 # C = 1/reg
-svm = SVM(sqlCtx, fit_intercept=True, max_iter=100, tol=0.01, C=1.0, 
is_multi_class=False)
+svm = SVM(spark, fit_intercept=True, max_iter=100, tol=0.01, C=1.0, 
is_multi_class=False)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or 
SciPy Sparse Matrix
 y_test = svm.fit(X_train, y_train)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) 
and "label". df_test is a DataFrame that contains the column "features"
@@ -637,6 +637,8 @@ held-out test set. Note that this is an optional argument.
 **confusion**: Location (on HDFS) to store the confusion 

[2/2] incubator-systemml git commit: [SYSTEMML-1632] Support loading and saving models via mllearn

2017-05-25 Thread niketanpansare
[SYSTEMML-1632] Support loading and saving models via mllearn

- Also, updated documentation and fixed bugs.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/d69f3441
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/d69f3441
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/d69f3441

Branch: refs/heads/master
Commit: d69f3441c8243ddd13dd3da6aab9c2d5701c6e50
Parents: d36a0c1
Author: Niketan Pansare 
Authored: Thu May 25 22:32:02 2017 -0700
Committer: Niketan Pansare 
Committed: Thu May 25 22:32:02 2017 -0700

--
 docs/algorithms-classification.md   |  52 +-
 docs/algorithms-regression.md   |   8 +-
 docs/beginners-guide-caffe2dml.md   | 264 --
 docs/beginners-guide-python.md  |  33 +-
 docs/native-backend.md  |   4 +-
 docs/python-reference.md| 929 +--
 pom.xml |   3 +
 .../caffe2dml/models/mnist_lenet/lenet.proto| 195 
 .../models/mnist_lenet/lenet_solver.proto   |  19 +
 src/main/python/systemml/converters.py  |  96 +-
 src/main/python/systemml/mllearn/estimators.py  | 179 ++--
 .../org/apache/sysml/api/dl/Caffe2DML.scala |  64 +-
 .../org/apache/sysml/api/dl/CaffeLayer.scala| 108 ++-
 .../org/apache/sysml/api/dl/CaffeNetwork.scala  |  55 +-
 .../scala/org/apache/sysml/api/dl/Utils.scala   | 145 +++
 .../sysml/api/ml/BaseSystemMLClassifier.scala   |  43 +-
 .../sysml/api/ml/BaseSystemMLRegressor.scala|   8 +-
 .../apache/sysml/api/ml/LinearRegression.scala  |  33 +-
 .../sysml/api/ml/LogisticRegression.scala   |  28 +-
 .../org/apache/sysml/api/ml/NaiveBayes.scala|  28 +-
 .../scala/org/apache/sysml/api/ml/SVM.scala |  27 +-
 .../scala/org/apache/sysml/api/ml/Utils.scala   |  25 +
 22 files changed, 1365 insertions(+), 981 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d69f3441/docs/algorithms-classification.md
--
diff --git a/docs/algorithms-classification.md 
b/docs/algorithms-classification.md
index ed56c34..04c5eb8 100644
--- a/docs/algorithms-classification.md
+++ b/docs/algorithms-classification.md
@@ -131,7 +131,7 @@ Eqs. (1) and (2).
 {% highlight python %}
 from systemml.mllearn import LogisticRegression
 # C = 1/reg
-logistic = LogisticRegression(sqlCtx, fit_intercept=True, max_iter=100, 
max_inner_iter=0, tol=0.01, C=1.0)
+logistic = LogisticRegression(spark, fit_intercept=True, max_iter=100, 
max_inner_iter=0, tol=0.01, C=1.0)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or 
SciPy Sparse Matrix
 y_test = logistic.fit(X_train, y_train).predict(X_test)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) 
and "label". df_test is a DataFrame that contains the column "features"
@@ -229,6 +229,8 @@ if no maximum limit provided
 `mm`, or `csv`; see read/write functions in
 SystemML Language Reference for details.
 
+Please see [mllearn 
documentation](https://apache.github.io/incubator-systemml/python-reference#mllearn-api)
 for
+more details on the Python API. 
 
 ### Examples
 
@@ -255,9 +257,7 @@ print('LogisticRegression score: %f' % 
logistic.fit(X_train, y_train).score(X_te
 from pyspark.ml import Pipeline
 from systemml.mllearn import LogisticRegression
 from pyspark.ml.feature import HashingTF, Tokenizer
-from pyspark.sql import SQLContext
-sqlCtx = SQLContext(sc)
-training = sqlCtx.createDataFrame([
+training = spark.createDataFrame([
 (0L, "a b c d e spark", 1.0),
 (1L, "b d", 2.0),
 (2L, "spark f g h", 1.0),
@@ -273,10 +273,10 @@ training = sqlCtx.createDataFrame([
 ], ["id", "text", "label"])
 tokenizer = Tokenizer(inputCol="text", outputCol="words")
 hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
-lr = LogisticRegression(sqlCtx)
+lr = LogisticRegression(spark)
 pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
 model = pipeline.fit(training)
-test = sqlCtx.createDataFrame([
+test = spark.createDataFrame([
 (12L, "spark i j k"),
 (13L, "l m n"),
 (14L, "mapreduce spark"),
@@ -290,7 +290,7 @@ prediction.show()
 import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
 import org.apache.sysml.api.ml.LogisticRegression
 import org.apache.spark.ml.Pipeline
-val training = sqlContext.createDataFrame(Seq(
+val training = spark.createDataFrame(Seq(
 ("a b c d e spark", 1.0),
 ("b d", 2.0),
 ("spark f g h", 1.0),
@@ -308,7 +308,7 @@ val hashingTF = new 
HashingTF().setNumFeatures(20).setInputCol(tokenizer.getOutp
 val lr = new