svn commit: r1828046 [8/20] - /systemml/site/docs/1.1.0/

reinwald Thu, 29 Mar 2018 21:31:53 -0700

Added: systemml/site/docs/1.1.0/beginners-guide-keras2dml.html
URL: 
http://svn.apache.org/viewvc/systemml/site/docs/1.1.0/beginners-guide-keras2dml.html?rev=1828046&view=auto
==============================================================================
--- systemml/site/docs/1.1.0/beginners-guide-keras2dml.html (added)
+++ systemml/site/docs/1.1.0/beginners-guide-keras2dml.html Fri Mar 30 04:31:05 
2018
@@ -0,0 +1,354 @@
+<!DOCTYPE html>
+<!--[if lt IE 7]>      <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
+<!--[if IE 7]>         <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
+<!--[if IE 8]>         <html class="no-js lt-ie9"> <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
+    <head>
+        <title>Beginner's Guide for Keras2DML users - SystemML 1.1.0</title>
+        <meta charset="utf-8">
+        <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+        
+        <meta name="description" content="Beginner's Guide for Keras2DML 
users">
+        
+        <meta name="viewport" content="width=device-width">
+        <link rel="stylesheet" href="css/bootstrap.min.css">
+        <link rel="stylesheet" href="css/main.css">
+        <link rel="stylesheet" href="css/pygments-default.css">
+        <link rel="shortcut icon" href="img/favicon.png">
+    </head>
+    <body>
+        <!--[if lt IE 7]>
+            <p class="chromeframe">You are using an outdated browser. <a 
href="http://browsehappy.com/";>Upgrade your browser today</a> or <a 
href="http://www.google.com/chromeframe/?redirect=true";>install Google Chrome 
Frame</a> to better experience this site.</p>
+        <![endif]-->
+
+        <header class="navbar navbar-default navbar-fixed-top" id="topbar">
+            <div class="container">
+                <div class="navbar-header">
+                    <div class="navbar-brand brand projectlogo">
+                        <a href="http://systemml.apache.org/";><img 
class="logo" src="img/systemml-logo.png" alt="Apache SystemML" title="Apache 
SystemML"/></a>
+                    </div>
+                    <div class="navbar-brand brand projecttitle">
+                        <a href="http://systemml.apache.org/";>Apache 
SystemML<sup id="trademark">â¢</sup></a><br/>
+                        <span class="version">1.1.0</span>
+                    </div>
+                    <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target=".navbar-collapse">
+                        <span class="sr-only">Toggle navigation</span>
+                        <span class="icon-bar"></span>
+                        <span class="icon-bar"></span>
+                        <span class="icon-bar"></span>
+                    </button>
+                </div>
+                <nav class="navbar-collapse collapse">
+                    <ul class="nav navbar-nav navbar-right">
+                        <li><a href="index.html">Overview</a></li>
+                        <li><a 
href="https://github.com/apache/systemml";>GitHub</a></li>
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">Documentation<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><b>Running SystemML:</b></li>
+                                <li><a 
href="https://github.com/apache/systemml";>SystemML GitHub README</a></li>
+                                <li><a 
href="spark-mlcontext-programming-guide.html">Spark MLContext</a></li>
+                                <li><a href="spark-batch-mode.html">Spark 
Batch Mode</a>
+                                <li><a href="hadoop-batch-mode.html">Hadoop 
Batch Mode</a>
+                                <li><a href="standalone-guide.html">Standalone 
Guide</a></li>
+                                <li><a href="jmlc.html">Java Machine Learning 
Connector (JMLC)</a>
+                                <li class="divider"></li>
+                                <li><b>Language Guides:</b></li>
+                                <li><a href="dml-language-reference.html">DML 
Language Reference</a></li>
+                                <li><a 
href="beginners-guide-to-dml-and-pydml.html">Beginner's Guide to DML and 
PyDML</a></li>
+                                <li><a 
href="beginners-guide-python.html">Beginner's Guide for Python Users</a></li>
+                                <li><a href="python-reference.html">Reference 
Guide for Python Users</a></li>
+                                <li class="divider"></li>
+                                <li><b>ML Algorithms:</b></li>
+                                <li><a 
href="algorithms-reference.html">Algorithms Reference</a></li>
+                                <li class="divider"></li>
+                                <li><b>Tools:</b></li>
+                                <li><a href="debugger-guide.html">Debugger 
Guide</a></li>
+                                <li><a 
href="developer-tools-systemml.html">IDE Guide</a></li>
+                                <li class="divider"></li>
+                                <li><b>Other:</b></li>
+                                <li><a 
href="contributing-to-systemml.html">Contributing to SystemML</a></li>
+                                <li><a href="engine-dev-guide.html">Engine 
Developer Guide</a></li>
+                                <li><a 
href="troubleshooting-guide.html">Troubleshooting Guide</a></li>
+                                <li><a href="release-process.html">Release 
Process</a></li>
+                            </ul>
+                        </li>
+                        
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">API Docs<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><a 
href="./api/java/index.html">Java</a></li>
+                                <li><a 
href="./api/python/index.html">Python</a></li>
+                            </ul>
+                        </li>
+                        
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">Issues<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><b>JIRA:</b></li>
+                                <li><a 
href="https://issues.apache.org/jira/browse/SYSTEMML";>SystemML JIRA</a></li>
+                                
+                            </ul>
+                        </li>
+                    </ul>
+                </nav>
+            </div>
+        </header>
+
+        <div class="container" id="content">
+          
+            <h1 class="title">Beginner's Guide for Keras2DML users</h1>
+          
+
+          <!--
+
+-->
+
+<ul id="markdown-toc">
+  <li><a href="#introduction" id="markdown-toc-introduction">Introduction</a>  
  <ul>
+      <li><a href="#getting-started" id="markdown-toc-getting-started">Getting 
Started</a></li>
+      <li><a href="#model-conversion" id="markdown-toc-model-conversion">Model 
Conversion</a></li>
+    </ul>
+  </li>
+  <li><a href="#frequently-asked-questions" 
id="markdown-toc-frequently-asked-questions">Frequently asked questions</a>    
<ul>
+      <li><a 
href="#what-is-the-mapping-between-keras-parameters-and-caffes-solver-specification-"
 
id="markdown-toc-what-is-the-mapping-between-keras-parameters-and-caffes-solver-specification-">What
 is the mapping between Keras&#8217; parameters and Caffe&#8217;s solver 
specification ?</a></li>
+      <li><a href="#how-do-i-specify-the-batch-size-and-the-number-of-epochs-" 
id="markdown-toc-how-do-i-specify-the-batch-size-and-the-number-of-epochs-">How 
do I specify the batch size and the number of epochs ?</a></li>
+      <li><a 
href="#what-optimizer-and-loss-does-keras2dml-use-by-default-if-kerasmodel-is-not-compiled-"
 
id="markdown-toc-what-optimizer-and-loss-does-keras2dml-use-by-default-if-kerasmodel-is-not-compiled-">What
 optimizer and loss does Keras2DML use by default if <code>keras_model</code> 
is not compiled ?</a></li>
+      <li><a href="#what-is-the-learning-rate-schedule-used-" 
id="markdown-toc-what-is-the-learning-rate-schedule-used-">What is the learning 
rate schedule used ?</a></li>
+      <li><a href="#how-to-set-the-size-of-the-validation-dataset-" 
id="markdown-toc-how-to-set-the-size-of-the-validation-dataset-">How to set the 
size of the validation dataset ?</a></li>
+      <li><a href="#how-to-monitor-loss-via-command-line-" 
id="markdown-toc-how-to-monitor-loss-via-command-line-">How to monitor loss via 
command-line ?</a></li>
+    </ul>
+  </li>
+</ul>
+
+<p><br /></p>
+
+<h2 id="introduction">Introduction</h2>
+
+<p>Keras2DML is an <strong>experimental API</strong> that converts a Keras 
specification to DML through the intermediate Caffe2DML module. 
+It is designed to fit well into the mllearn framework and hence supports 
NumPy, Pandas as well as PySpark DataFrame.</p>
+
+<h3 id="getting-started">Getting Started</h3>
+
+<p>To create a Keras2DML object, one needs to create a Keras model through the 
Funcitonal API. please see the <a 
href="https://keras.io/models/model/";>Functional API.</a>
+This module utilizes the existing <a 
href="beginners-guide-caffe2dml">Caffe2DML</a> backend to convert Keras models 
into DML. Keras models are 
+parsed and translated into Caffe prototext and caffemodel files which are then 
piped into Caffe2DML. Thus one can follow the Caffe2DML
+documentation for further information.</p>
+
+<h3 id="model-conversion">Model Conversion</h3>
+
+<p>Keras models are parsed based on their layer structure and corresponding 
weights and translated into the relative Caffe layer and weight
+configuration. Be aware that currently this is a translation into Caffe and 
there will be loss of information from keras models such as 
+intializer information, and other layers which do not exist in Caffe.</p>
+
+<p>To create a Keras2DML object, simply pass the keras object to the Keras2DML 
constructor. It&#8217;s also important to note that your models
+should be compiled so that the loss can be accessed for Caffe2DML</p>
+
+<p>```python
+from systemml.mllearn import Keras2DML
+import keras
+from keras.applications.resnet50 import preprocess_input, decode_predictions, 
ResNet50</p>
+
+<p>keras_model = 
ResNet50(weights=&#8217;imagenet&#8217;,include_top=True,pooling=&#8217;None&#8217;,input_shape=(224,224,3))
+keras_model.compile(optimizer=&#8217;sgd&#8217;, loss= 
&#8216;categorical_crossentropy&#8217;)</p>
+
+<p>sysml_model = Keras2DML(spark, keras_model,input_shape=(3,224,224))
+sysml_model.summary()
+```</p>
+
+<h1 id="frequently-asked-questions">Frequently asked questions</h1>
+
+<h4 
id="what-is-the-mapping-between-keras-parameters-and-caffes-solver-specification-">What
 is the mapping between Keras&#8217; parameters and Caffe&#8217;s solver 
specification ?</h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>&#160;</th>
+      <th>Specified via the given parameter in the Keras2DML constructor</th>
+      <th>From input Keras&#8217; model</th>
+      <th>Corresponding parameter in the Caffe solver file</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>Solver type</td>
+      <td>&#160;</td>
+      <td><code>type(keras_model.optimizer)</code>. Supported types: 
<code>keras.optimizers.{SGD, Adagrad, Adam}</code></td>
+      <td><code>type</code></td>
+    </tr>
+    <tr>
+      <td>Maximum number of iterations</td>
+      <td><code>max_iter</code></td>
+      <td>The <code>epoch</code> parameter in the <code>fit</code> method is 
not supported.</td>
+      <td><code>max_iter</code></td>
+    </tr>
+    <tr>
+      <td>Validation dataset</td>
+      <td><code>test_iter</code> (explained in the below section)</td>
+      <td>The <code>validation_data</code> parameter in the <code>fit</code> 
method is not supported.</td>
+      <td><code>test_iter</code></td>
+    </tr>
+    <tr>
+      <td>Monitoring the loss</td>
+      <td><code>display, test_interval</code> (explained in the below 
section)</td>
+      <td>The <code>LossHistory</code> callback in the <code>fit</code> method 
is not supported.</td>
+      <td><code>display, test_interval</code></td>
+    </tr>
+    <tr>
+      <td>Learning rate schedule</td>
+      <td><code>lr_policy</code></td>
+      <td>The <code>LearningRateScheduler</code> callback in the 
<code>fit</code> method is not supported.</td>
+      <td><code>lr_policy</code> (default: step)</td>
+    </tr>
+    <tr>
+      <td>Base learning rate</td>
+      <td>&#160;</td>
+      <td><code>keras_model.optimizer.lr</code></td>
+      <td><code>base_lr</code></td>
+    </tr>
+    <tr>
+      <td>Learning rate decay over each update</td>
+      <td>&#160;</td>
+      <td><code>keras_model.optimizer.decay</code></td>
+      <td><code>gamma</code></td>
+    </tr>
+    <tr>
+      <td>Global regularizer to use for all layers</td>
+      <td><code>regularization_type,weight_decay</code></td>
+      <td>The current version of Keras2DML doesnot support custom regularizers 
per layer.</td>
+      <td><code>regularization_type,weight_decay</code></td>
+    </tr>
+    <tr>
+      <td>If type of the optimizer is <code>keras.optimizers.SGD</code></td>
+      <td>&#160;</td>
+      <td><code>momentum, nesterov</code></td>
+      <td><code>momentum, type</code></td>
+    </tr>
+    <tr>
+      <td>If type of the optimizer is <code>keras.optimizers.Adam</code></td>
+      <td>&#160;</td>
+      <td><code>beta_1, beta_2, epsilon</code>. The parameter 
<code>amsgrad</code> is not supported.</td>
+      <td><code>momentum, momentum2, delta</code></td>
+    </tr>
+    <tr>
+      <td>If type of the optimizer is 
<code>keras.optimizers.Adagrad</code></td>
+      <td>&#160;</td>
+      <td><code>epsilon</code></td>
+      <td><code>delta</code></td>
+    </tr>
+  </tbody>
+</table>
+
+<h4 id="how-do-i-specify-the-batch-size-and-the-number-of-epochs-">How do I 
specify the batch size and the number of epochs ?</h4>
+
+<p>Since Keras2DML is a mllearn API, it doesnot accept the batch size and 
number of epochs as the parameter in the <code>fit</code> method.
+Instead, these parameters are passed via <code>batch_size</code> and 
<code>max_iter</code> parameters in the Keras2DML constructor.
+For example, the equivalent Python code for <code>keras_model.fit(features, 
labels, epochs=10, batch_size=64)</code> is as follows:</p>
+
+<p><code>python
+from systemml.mllearn import Keras2DML
+epochs = 10
+batch_size = 64
+num_samples = features.shape[0]
+max_iter = int(epochs*math.ceil(num_samples/batch_size))
+sysml_model = Keras2DML(spark, keras_model, batch_size=batch_size, 
max_iter=max_iter, ...)
+sysml_model.fit(features, labels)
+</code></p>
+
+<h4 
id="what-optimizer-and-loss-does-keras2dml-use-by-default-if-kerasmodel-is-not-compiled-">What
 optimizer and loss does Keras2DML use by default if <code>keras_model</code> 
is not compiled ?</h4>
+
+<p>If the user does not <code>compile</code> the keras model, then we use 
cross entropy loss and SGD optimizer with nesterov momentum:</p>
+
+<p><code>python 
+keras_model.compile(loss='categorical_crossentropy', 
optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.95, decay=5e-4, 
nesterov=True))
+</code></p>
+
+<h4 id="what-is-the-learning-rate-schedule-used-">What is the learning rate 
schedule used ?</h4>
+
+<p>Keras2DML does not support the <code>LearningRateScheduler</code> callback. 
+Instead one can set the custom learning rate schedule to one of the following 
schedules by using the <code>lr_policy</code> parameter of the constructor:
+- <code>step</code>: return <code>base_lr * gamma ^ (floor(iter / 
step))</code> (default schedule)
+- <code>fixed</code>: always return <code>base_lr</code>.
+- <code>exp</code>: return <code>base_lr * gamma ^ iter</code>
+- <code>inv</code>: return <code>base_lr * (1 + gamma * iter) ^ (- 
power)</code>
+- <code>poly</code>: the effective learning rate follows a polynomial decay, 
to be zero by the max_iter. return <code>base_lr (1 - iter/max_iter) ^ 
(power)</code>
+- <code>sigmoid</code>: the effective learning rate follows a sigmod decay 
return b<code>ase_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))</code></p>
+
+<h4 id="how-to-set-the-size-of-the-validation-dataset-">How to set the size of 
the validation dataset ?</h4>
+
+<p>The size of the validation dataset is determined by the parameters 
<code>test_iter</code> and the batch size. For example: If the batch size is 64 
and 
+<code>test_iter</code> is set to 10 in the <code>Keras2DML</code>&#8217;s 
constructor, then the validation size is 640. This setting generates following 
DML code internally:</p>
+
+<p><code>python
+num_images = nrow(y_full)
+BATCH_SIZE = 64
+num_validation = 10 * BATCH_SIZE
+X = X_full[(num_validation+1):num_images,]; y = 
y_full[(num_validation+1):num_images,]
+X_val = X_full[1:num_validation,]; y_val = y_full[1:num_validation,]
+num_images = nrow(y)
+</code></p>
+
+<h4 id="how-to-monitor-loss-via-command-line-">How to monitor loss via 
command-line ?</h4>
+
+<p>To monitor loss, please set the parameters <code>display</code>, 
<code>test_iter</code> and <code>test_interval</code> in the 
<code>Keras2DML</code>&#8217;s constructor.<br />
+For example: for the expression <code>Keras2DML(..., display=100, 
test_iter=10, test_interval=500)</code>, we
+- display the training loss and accuracy every 100 iterations and
+- carry out validation every 500 training iterations and display validation 
loss and accuracy.</p>
+
+
+
+        </div> <!-- /container -->
+
+        
+
+        <script src="js/vendor/jquery-1.12.0.min.js"></script>
+        <script src="js/vendor/bootstrap.min.js"></script>
+        <script src="js/vendor/anchor.min.js"></script>
+        <script src="js/main.js"></script>
+        
+
+
+
+
+        <!-- Analytics -->
+        <script>
+            
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+            (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+            
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+            
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+            ga('create', 'UA-71553733-1', 'auto');
+            ga('send', 'pageview');
+        </script>
+
+
+
+        <!-- MathJax Section -->
+        <script type="text/x-mathjax-config">
+            MathJax.Hub.Config({
+                TeX: { equationNumbers: { autoNumber: "AMS" } }
+            });
+        </script>
+        <script>
+            // Note that we load MathJax this way to work with local file 
(file://), HTTP and HTTPS.
+            // We could use "//cdn.mathjax...", but that won't support 
"file://".
+            (function(d, script) {
+                script = d.createElement('script');
+                script.type = 'text/javascript';
+                script.async = true;
+                script.onload = function(){
+                    MathJax.Hub.Config({
+                        tex2jax: {
+                            inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
+                            displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
+                            processEscapes: true,
+                            skipTags: ['script', 'noscript', 'style', 
'textarea', 'pre']
+                        }
+                    });
+                };
+                script.src = ('https:' == document.location.protocol ? 
'https://' : 'http://') +
+                    
'cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+                d.getElementsByTagName('head')[0].appendChild(script);
+            }(document));
+        </script>
+    </body>
+</html>


Added: systemml/site/docs/1.1.0/beginners-guide-python.html
URL: 
http://svn.apache.org/viewvc/systemml/site/docs/1.1.0/beginners-guide-python.html?rev=1828046&view=auto
==============================================================================
--- systemml/site/docs/1.1.0/beginners-guide-python.html (added)
+++ systemml/site/docs/1.1.0/beginners-guide-python.html Fri Mar 30 04:31:05 
2018
@@ -0,0 +1,540 @@
+<!DOCTYPE html>
+<!--[if lt IE 7]>      <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
+<!--[if IE 7]>         <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
+<!--[if IE 8]>         <html class="no-js lt-ie9"> <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
+    <head>
+        <title>Beginner's Guide for Python Users - SystemML 1.1.0</title>
+        <meta charset="utf-8">
+        <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+        
+        <meta name="description" content="Beginner's Guide for Python Users">
+        
+        <meta name="viewport" content="width=device-width">
+        <link rel="stylesheet" href="css/bootstrap.min.css">
+        <link rel="stylesheet" href="css/main.css">
+        <link rel="stylesheet" href="css/pygments-default.css">
+        <link rel="shortcut icon" href="img/favicon.png">
+    </head>
+    <body>
+        <!--[if lt IE 7]>
+            <p class="chromeframe">You are using an outdated browser. <a 
href="http://browsehappy.com/";>Upgrade your browser today</a> or <a 
href="http://www.google.com/chromeframe/?redirect=true";>install Google Chrome 
Frame</a> to better experience this site.</p>
+        <![endif]-->
+
+        <header class="navbar navbar-default navbar-fixed-top" id="topbar">
+            <div class="container">
+                <div class="navbar-header">
+                    <div class="navbar-brand brand projectlogo">
+                        <a href="http://systemml.apache.org/";><img 
class="logo" src="img/systemml-logo.png" alt="Apache SystemML" title="Apache 
SystemML"/></a>
+                    </div>
+                    <div class="navbar-brand brand projecttitle">
+                        <a href="http://systemml.apache.org/";>Apache 
SystemML<sup id="trademark">â¢</sup></a><br/>
+                        <span class="version">1.1.0</span>
+                    </div>
+                    <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target=".navbar-collapse">
+                        <span class="sr-only">Toggle navigation</span>
+                        <span class="icon-bar"></span>
+                        <span class="icon-bar"></span>
+                        <span class="icon-bar"></span>
+                    </button>
+                </div>
+                <nav class="navbar-collapse collapse">
+                    <ul class="nav navbar-nav navbar-right">
+                        <li><a href="index.html">Overview</a></li>
+                        <li><a 
href="https://github.com/apache/systemml";>GitHub</a></li>
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">Documentation<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><b>Running SystemML:</b></li>
+                                <li><a 
href="https://github.com/apache/systemml";>SystemML GitHub README</a></li>
+                                <li><a 
href="spark-mlcontext-programming-guide.html">Spark MLContext</a></li>
+                                <li><a href="spark-batch-mode.html">Spark 
Batch Mode</a>
+                                <li><a href="hadoop-batch-mode.html">Hadoop 
Batch Mode</a>
+                                <li><a href="standalone-guide.html">Standalone 
Guide</a></li>
+                                <li><a href="jmlc.html">Java Machine Learning 
Connector (JMLC)</a>
+                                <li class="divider"></li>
+                                <li><b>Language Guides:</b></li>
+                                <li><a href="dml-language-reference.html">DML 
Language Reference</a></li>
+                                <li><a 
href="beginners-guide-to-dml-and-pydml.html">Beginner's Guide to DML and 
PyDML</a></li>
+                                <li><a 
href="beginners-guide-python.html">Beginner's Guide for Python Users</a></li>
+                                <li><a href="python-reference.html">Reference 
Guide for Python Users</a></li>
+                                <li class="divider"></li>
+                                <li><b>ML Algorithms:</b></li>
+                                <li><a 
href="algorithms-reference.html">Algorithms Reference</a></li>
+                                <li class="divider"></li>
+                                <li><b>Tools:</b></li>
+                                <li><a href="debugger-guide.html">Debugger 
Guide</a></li>
+                                <li><a 
href="developer-tools-systemml.html">IDE Guide</a></li>
+                                <li class="divider"></li>
+                                <li><b>Other:</b></li>
+                                <li><a 
href="contributing-to-systemml.html">Contributing to SystemML</a></li>
+                                <li><a href="engine-dev-guide.html">Engine 
Developer Guide</a></li>
+                                <li><a 
href="troubleshooting-guide.html">Troubleshooting Guide</a></li>
+                                <li><a href="release-process.html">Release 
Process</a></li>
+                            </ul>
+                        </li>
+                        
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">API Docs<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><a 
href="./api/java/index.html">Java</a></li>
+                                <li><a 
href="./api/python/index.html">Python</a></li>
+                            </ul>
+                        </li>
+                        
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">Issues<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><b>JIRA:</b></li>
+                                <li><a 
href="https://issues.apache.org/jira/browse/SYSTEMML";>SystemML JIRA</a></li>
+                                
+                            </ul>
+                        </li>
+                    </ul>
+                </nav>
+            </div>
+        </header>
+
+        <div class="container" id="content">
+          
+            <h1 class="title">Beginner's Guide for Python Users</h1>
+          
+
+          <!--
+
+-->
+
+<ul id="markdown-toc">
+  <li><a href="#introduction" 
id="markdown-toc-introduction">Introduction</a></li>
+  <li><a href="#download--setup" id="markdown-toc-download--setup">Download 
&amp; Setup</a>    <ul>
+      <li><a href="#install-java-need-java-8-and-apache-spark" 
id="markdown-toc-install-java-need-java-8-and-apache-spark">Install Java (need 
Java 8) and Apache Spark</a></li>
+      <li><a href="#install-systemml" 
id="markdown-toc-install-systemml">Install SystemML</a></li>
+      <li><a href="#uninstall-systemml" 
id="markdown-toc-uninstall-systemml">Uninstall SystemML</a></li>
+      <li><a href="#start-pyspark-shell" 
id="markdown-toc-start-pyspark-shell">Start Pyspark shell</a></li>
+    </ul>
+  </li>
+  <li><a href="#matrix-operations" id="markdown-toc-matrix-operations">Matrix 
operations</a></li>
+  <li><a href="#invoke-systemmls-algorithms" 
id="markdown-toc-invoke-systemmls-algorithms">Invoke SystemML&#8217;s 
algorithms</a>    <ul>
+      <li><a href="#scikit-learn-interface" 
id="markdown-toc-scikit-learn-interface">Scikit-learn interface</a></li>
+      <li><a href="#passing-pyspark-dataframe" 
id="markdown-toc-passing-pyspark-dataframe">Passing PySpark DataFrame</a></li>
+      <li><a href="#mlpipeline-interface" 
id="markdown-toc-mlpipeline-interface">MLPipeline interface</a></li>
+    </ul>
+  </li>
+  <li><a href="#invoking-dmlpydml-scripts-using-mlcontext" 
id="markdown-toc-invoking-dmlpydml-scripts-using-mlcontext">Invoking DML/PyDML 
scripts using MLContext</a></li>
+</ul>
+
+<p><br /></p>
+
+<h2 id="introduction">Introduction</h2>
+
+<p>SystemML enables flexible, scalable machine learning. This flexibility is 
achieved through the specification of a high-level declarative machine learning 
language that comes in two flavors, 
+one with an R-like syntax (DML) and one with a Python-like syntax (PyDML).</p>
+
+<p>Algorithm scripts written in DML and PyDML can be run on Hadoop, on Spark, 
or in Standalone mode. 
+No script modifications are required to change between modes. SystemML 
automatically performs advanced optimizations 
+based on data and cluster characteristics, so much of the need to manually 
tweak algorithms is largely reduced or eliminated.
+To understand more about DML and PyDML, we recommend that you read <a 
href="https://apache.github.io/systemml/beginners-guide-to-dml-and-pydml.html";>Beginner&#8217;s
 Guide to DML and PyDML</a>.</p>
+
+<p>For convenience of Python users, SystemML exposes several language-level 
APIs that allow Python users to use SystemML
+and its algorithms without the need to know DML or PyDML. We explain these 
APIs in the below sections with example usecases.</p>
+
+<h2 id="download--setup">Download &amp; Setup</h2>
+
+<p>Before you get started on SystemML, make sure that your environment is set 
up and ready to go.</p>
+
+<h3 id="install-java-need-java-8-and-apache-spark">Install Java (need Java 8) 
and Apache Spark</h3>
+
+<p>If you already have an Apache Spark installation, you can skip this 
step.</p>
+
+<div class="codetabs">
+<div data-lang="OSX">
+    <p><code>bash
+/usr/bin/ruby -e "$(curl -fsSL 
https://raw.githubusercontent.com/Homebrew/install/master/install)"
+brew tap caskroom/cask
+brew install Caskroom/cask/java
+brew tap homebrew/versions
+brew install apache-spark16
+</code></p>
+  </div>
+<div data-lang="Linux">
+    <p><code>bash
+ruby -e "$(curl -fsSL 
https://raw.githubusercontent.com/Linuxbrew/install/master/install)"
+brew tap caskroom/cask
+brew install Caskroom/cask/java
+brew tap homebrew/versions
+brew install apache-spark16
+</code></p>
+  </div>
+</div>
+
+<h3 id="install-systemml">Install SystemML</h3>
+
+<p>To install released SystemML, please use following commands:</p>
+
+<div class="codetabs">
+<div data-lang="Python 2">
+    <p><code>bash
+pip install systemml
+</code></p>
+  </div>
+<div data-lang="Python 3">
+    <p><code>bash
+pip3 install systemml
+</code></p>
+  </div>
+</div>
+
+<p>If you want to try out the bleeding edge version, please use following 
commands:</p>
+
+<div class="codetabs">
+<div data-lang="Python 2">
+    <p><code>bash
+git checkout https://github.com/apache/systemml.git
+cd systemml
+mvn clean package -P distribution
+pip install target/systemml-1.0.0-SNAPSHOT-python.tar.gz
+</code></p>
+  </div>
+<div data-lang="Python 3">
+    <p><code>bash
+git checkout https://github.com/apache/systemml.git
+cd systemml
+mvn clean package -P distribution
+pip3 install target/systemml-1.0.0-SNAPSHOT-python.tar.gz
+</code></p>
+  </div>
+</div>
+
+<h3 id="uninstall-systemml">Uninstall SystemML</h3>
+<p>To uninstall SystemML, please use following command:</p>
+
+<div class="codetabs">
+<div data-lang="Python 2">
+    <p><code>bash
+pip uninstall systemml
+</code></p>
+  </div>
+<div data-lang="Python 3">
+    <p><code>bash
+pip3 uninstall systemml
+</code></p>
+  </div>
+</div>
+
+<h3 id="start-pyspark-shell">Start Pyspark shell</h3>
+
+<div class="codetabs">
+<div data-lang="Python 2">
+    <p><code>bash
+pyspark
+</code></p>
+  </div>
+<div data-lang="Python 3">
+    <p><code>bash
+PYSPARK_PYTHON=python3 pyspark
+</code></p>
+  </div>
+</div>
+
+<hr />
+
+<h2 id="matrix-operations">Matrix operations</h2>
+
+<p>To get started with SystemML, let&#8217;s try few elementary matrix 
multiplication operations:</p>
+
+<p><code>python
+import systemml as sml
+import numpy as np
+m1 = sml.matrix(np.ones((3,3)) + 2)
+m2 = sml.matrix(np.ones((3,3)) + 3)
+m2 = m1 * (m2 + m1)
+m4 = 1.0 - m2
+m4.sum(axis=1).toNumPy()
+</code></p>
+
+<p>Output:</p>
+
+<p><code>python
+array([[-60.],
+       [-60.],
+       [-60.]])
+</code></p>
+
+<p>Let us now write a simple script to train <a 
href="https://apache.github.io/systemml/algorithms-regression.html#linear-regression";>linear
 regression</a> 
+model: $ \beta = solve(X^T X, X^T y) $. For simplicity, we will use 
direct-solve method and ignore
+regularization parameter as well as intercept.</p>
+
+<p><code>python
+import numpy as np
+from sklearn import datasets
+import systemml as sml
+# Load the diabetes dataset
+diabetes = datasets.load_diabetes()
+# Use only one feature
+diabetes_X = diabetes.data[:, np.newaxis, 2]
+# Split the data into training/testing sets
+X_train = diabetes_X[:-20]
+X_test = diabetes_X[-20:]
+# Split the targets into training/testing sets
+y_train = diabetes.target[:-20]
+y_test = diabetes.target[-20:]
+# Train Linear Regression model
+X = sml.matrix(X_train)
+y = sml.matrix(np.matrix(y_train).T)
+A = X.transpose().dot(X)
+b = X.transpose().dot(y)
+beta = sml.solve(A, b).toNumPy()
+y_predicted = X_test.dot(beta)
+print('Residual sum of squares: %.2f' % np.mean((y_predicted - y_test) ** 2)) 
+</code></p>
+
+<p>Output:</p>
+
+<p><code>bash
+Residual sum of squares: 25282.12
+</code></p>
+
+<p>We can improve the residual error by adding an intercept and regularization 
parameter. To do so, we
+will use <code>mllearn</code> API described in the next section.</p>
+
+<hr />
+
+<h2 id="invoke-systemmls-algorithms">Invoke SystemML&#8217;s algorithms</h2>
+
+<p>SystemML also exposes a subpackage <a 
href="https://apache.github.io/systemml/python-reference#mllearn-api";>mllearn</a>.
 This subpackage allows Python users to invoke SystemML algorithms
+using Scikit-learn or MLPipeline API.</p>
+
+<h3 id="scikit-learn-interface">Scikit-learn interface</h3>
+
+<p>In the below example, we invoke SystemML&#8217;s <a 
href="https://apache.github.io/systemml/algorithms-regression.html#linear-regression";>Linear
 Regression</a>
+algorithm.</p>
+
+<p><code>python
+import numpy as np
+from sklearn import datasets
+from systemml.mllearn import LinearRegression
+# Load the diabetes dataset
+diabetes = datasets.load_diabetes()
+# Use only one feature
+diabetes_X = diabetes.data[:, np.newaxis, 2]
+# Split the data into training/testing sets
+X_train = diabetes_X[:-20]
+X_test = diabetes_X[-20:]
+# Split the targets into training/testing sets
+y_train = diabetes.target[:-20]
+y_test = diabetes.target[-20:]
+# Create linear regression object
+regr = LinearRegression(spark, fit_intercept=True, C=float("inf"), 
solver='direct-solve')
+# Train the model using the training sets
+regr.fit(X_train, y_train)
+y_predicted = regr.predict(X_test)
+print('Residual sum of squares: %.2f' % np.mean((y_predicted - y_test) ** 2)) 
+</code></p>
+
+<p>Output:</p>
+
+<p><code>bash
+Residual sum of squares: 6991.17
+</code></p>
+
+<p>As expected, by adding intercept and regularizer the residual error drops 
significantly.</p>
+
+<p>Here is another example that where we invoke SystemML&#8217;s <a 
href="https://apache.github.io/systemml/algorithms-classification.html#multinomial-logistic-regression";>Logistic
 Regression</a>
+algorithm on digits datasets.</p>
+
+<p><code>python
+# Scikit-learn way
+from sklearn import datasets, neighbors
+from systemml.mllearn import LogisticRegression
+digits = datasets.load_digits()
+X_digits = digits.data
+y_digits = digits.target
+n_samples = len(X_digits)
+X_train = X_digits[:int(.9 * n_samples)]
+y_train = y_digits[:int(.9 * n_samples)]
+X_test = X_digits[int(.9 * n_samples):]
+y_test = y_digits[int(.9 * n_samples):]
+logistic = LogisticRegression(spark)
+print('LogisticRegression score: %f' % logistic.fit(X_train, 
y_train).score(X_test, y_test))
+</code></p>
+
+<p>Output:</p>
+
+<p><code>bash
+LogisticRegression score: 0.927778
+</code></p>
+
+<p>You can also save the trained model and load it later for prediction:</p>
+
+<p><code>python
+# Assuming logistic.fit(X_train, y_train) is already invoked
+logistic.save('logistic_model')
+new_logistic = LogisticRegression(spark)
+new_logistic.load('logistic_model')
+print('LogisticRegression score: %f' % new_logistic.score(X_test, y_test))
+</code></p>
+
+<h3 id="passing-pyspark-dataframe">Passing PySpark DataFrame</h3>
+
+<p>To train the above algorithm on larger dataset, we can load the dataset 
into DataFrame and pass it to the <code>fit</code> method:</p>
+
+<p><code>python
+from sklearn import datasets
+from systemml.mllearn import LogisticRegression
+import pandas as pd
+from sklearn.metrics import accuracy_score
+import systemml as sml
+digits = datasets.load_digits()
+X_digits = digits.data
+y_digits = digits.target
+n_samples = len(X_digits)
+# Split the data into training/testing sets and convert to PySpark DataFrame
+df_train = sml.convertToLabeledDF(sqlCtx, X_digits[:int(.9 * n_samples)], 
y_digits[:int(.9 * n_samples)])
+X_test = spark.createDataFrame(pd.DataFrame(X_digits[int(.9 * n_samples):]))
+logistic = LogisticRegression(spark)
+logistic.fit(df_train)
+y_predicted = logistic.predict(X_test)
+y_predicted = y_predicted.select('prediction').toPandas().as_matrix().flatten()
+y_test = y_digits[int(.9 * n_samples):]
+print('LogisticRegression score: %f' % accuracy_score(y_test, y_predicted))
+</code></p>
+
+<p>Output:</p>
+
+<p><code>bash
+LogisticRegression score: 0.922222
+</code></p>
+
+<h3 id="mlpipeline-interface">MLPipeline interface</h3>
+
+<p>In the below example, we demonstrate how the same 
<code>LogisticRegression</code> class can allow SystemML to fit seamlessly into 
+large data pipelines.</p>
+
+<p><code>python
+# MLPipeline way
+from pyspark.ml import Pipeline
+from systemml.mllearn import LogisticRegression
+from pyspark.ml.feature import HashingTF, Tokenizer
+training = spark.createDataFrame([
+    (0, "a b c d e spark", 1.0),
+    (1, "b d", 2.0),
+    (2, "spark f g h", 1.0),
+    (3, "hadoop mapreduce", 2.0),
+    (4, "b spark who", 1.0),
+    (5, "g d a y", 2.0),
+    (6, "spark fly", 1.0),
+    (7, "was mapreduce", 2.0),
+    (8, "e spark program", 1.0),
+    (9, "a e c l", 2.0),
+    (10, "spark compile", 1.0),
+    (11, "hadoop software", 2.0)
+], ["id", "text", "label"])
+tokenizer = Tokenizer(inputCol="text", outputCol="words")
+hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
+lr = LogisticRegression(sqlCtx)
+pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
+model = pipeline.fit(training)
+test = spark.createDataFrame([
+    (12, "spark i j k"),
+    (13, "l m n"),
+    (14, "mapreduce spark"),
+    (15, "apache hadoop")], ["id", "text"])
+prediction = model.transform(test)
+prediction.show()
+</code></p>
+
+<p>Output:</p>
+
+<p><code>bash
++-------+---+---------------+------------------+--------------------+--------------------+----------+
+|__INDEX| id|           text|             words|            features|         
probability|prediction|
++-------+---+---------------+------------------+--------------------+--------------------+----------+
+|    1.0| 12|    spark i j k|  [spark, i, j, 
k]|(20,[5,6,7],[2.0,...|[0.99999999999975...|       1.0|
+|    2.0| 13|          l m n|         [l, m, 
n]|(20,[8,9,10],[1.0...|[1.37552128844736...|       2.0|
+|    3.0| 14|mapreduce spark|[mapreduce, 
spark]|(20,[5,10],[1.0,1...|[0.99860290938153...|       1.0|
+|    4.0| 15|  apache hadoop|  [apache, 
hadoop]|(20,[9,14],[1.0,1...|[5.41688748236143...|       2.0|
++-------+---+---------------+------------------+--------------------+--------------------+----------+
+</code></p>
+
+<hr />
+
+<h2 id="invoking-dmlpydml-scripts-using-mlcontext">Invoking DML/PyDML scripts 
using MLContext</h2>
+
+<p>The below example demonstrates how to invoke the algorithm <a 
href="https://github.com/apache/systemml/blob/master/scripts/algorithms/MultiLogReg.dml";>scripts/algorithms/MultiLogReg.dml</a>
+using Python <a 
href="https://apache.github.io/systemml/spark-mlcontext-programming-guide";>MLContext
 API</a>.</p>
+
+<p><code>python
+from sklearn import datasets
+from pyspark.sql import SQLContext
+import systemml as sml
+import pandas as pd
+digits = datasets.load_digits()
+X_digits = digits.data
+y_digits = digits.target + 1
+n_samples = len(X_digits)
+# Split the data into training/testing sets and convert to PySpark DataFrame
+X_df = sqlCtx.createDataFrame(pd.DataFrame(X_digits[:int(.9 * n_samples)]))
+y_df = sqlCtx.createDataFrame(pd.DataFrame(y_digits[:int(.9 * n_samples)]))
+ml = sml.MLContext(sc)
+# Run the MultiLogReg.dml script at the given URL
+scriptUrl = 
"https://raw.githubusercontent.com/apache/systemml/master/scripts/algorithms/MultiLogReg.dml";
+script = sml.dml(scriptUrl).input(X=X_df, Y_vec=y_df).output("B_out")
+beta = ml.execute(script).get('B_out').toNumPy()
+</code></p>
+
+
+        </div> <!-- /container -->
+
+        
+
+        <script src="js/vendor/jquery-1.12.0.min.js"></script>
+        <script src="js/vendor/bootstrap.min.js"></script>
+        <script src="js/vendor/anchor.min.js"></script>
+        <script src="js/main.js"></script>
+        
+
+
+
+
+        <!-- Analytics -->
+        <script>
+            
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+            (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+            
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+            
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+            ga('create', 'UA-71553733-1', 'auto');
+            ga('send', 'pageview');
+        </script>
+
+
+
+        <!-- MathJax Section -->
+        <script type="text/x-mathjax-config">
+            MathJax.Hub.Config({
+                TeX: { equationNumbers: { autoNumber: "AMS" } }
+            });
+        </script>
+        <script>
+            // Note that we load MathJax this way to work with local file 
(file://), HTTP and HTTPS.
+            // We could use "//cdn.mathjax...", but that won't support 
"file://".
+            (function(d, script) {
+                script = d.createElement('script');
+                script.type = 'text/javascript';
+                script.async = true;
+                script.onload = function(){
+                    MathJax.Hub.Config({
+                        tex2jax: {
+                            inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
+                            displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
+                            processEscapes: true,
+                            skipTags: ['script', 'noscript', 'style', 
'textarea', 'pre']
+                        }
+                    });
+                };
+                script.src = ('https:' == document.location.protocol ? 
'https://' : 'http://') +
+                    
'cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+                d.getElementsByTagName('head')[0].appendChild(script);
+            }(document));
+        </script>
+    </body>
+</html>

svn commit: r1828046 [8/20] - /systemml/site/docs/1.1.0/

Reply via email to