Added: systemml/site/docs/1.1.0/spark-mlcontext-programming-guide.html URL: http://svn.apache.org/viewvc/systemml/site/docs/1.1.0/spark-mlcontext-programming-guide.html?rev=1828046&view=auto ============================================================================== --- systemml/site/docs/1.1.0/spark-mlcontext-programming-guide.html (added) +++ systemml/site/docs/1.1.0/spark-mlcontext-programming-guide.html Fri Mar 30 04:31:05 2018 @@ -0,0 +1,2888 @@ +<!DOCTYPE html> +<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]--> +<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]--> +<!--[if IE 8]> <html class="no-js lt-ie9"> <![endif]--> +<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]--> + <head> + <title>Spark MLContext Programming Guide - SystemML 1.1.0</title> + <meta charset="utf-8"> + <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"> + + <meta name="description" content="Spark MLContext Programming Guide"> + + <meta name="viewport" content="width=device-width"> + <link rel="stylesheet" href="css/bootstrap.min.css"> + <link rel="stylesheet" href="css/main.css"> + <link rel="stylesheet" href="css/pygments-default.css"> + <link rel="shortcut icon" href="img/favicon.png"> + </head> + <body> + <!--[if lt IE 7]> + <p class="chromeframe">You are using an outdated browser. <a href="http://browsehappy.com/">Upgrade your browser today</a> or <a href="http://www.google.com/chromeframe/?redirect=true">install Google Chrome Frame</a> to better experience this site.</p> + <![endif]--> + + <header class="navbar navbar-default navbar-fixed-top" id="topbar"> + <div class="container"> + <div class="navbar-header"> + <div class="navbar-brand brand projectlogo"> + <a href="http://systemml.apache.org/"><img class="logo" src="img/systemml-logo.png" alt="Apache SystemML" title="Apache SystemML"/></a> + </div> + <div class="navbar-brand brand projecttitle"> + <a href="http://systemml.apache.org/">Apache SystemML<sup id="trademark">â¢</sup></a><br/> + <span class="version">1.1.0</span> + </div> + <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target=".navbar-collapse"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + </div> + <nav class="navbar-collapse collapse"> + <ul class="nav navbar-nav navbar-right"> + <li><a href="index.html">Overview</a></li> + <li><a href="https://github.com/apache/systemml">GitHub</a></li> + <li class="dropdown"> + <a href="#" class="dropdown-toggle" data-toggle="dropdown">Documentation<b class="caret"></b></a> + <ul class="dropdown-menu" role="menu"> + <li><b>Running SystemML:</b></li> + <li><a href="https://github.com/apache/systemml">SystemML GitHub README</a></li> + <li><a href="spark-mlcontext-programming-guide.html">Spark MLContext</a></li> + <li><a href="spark-batch-mode.html">Spark Batch Mode</a> + <li><a href="hadoop-batch-mode.html">Hadoop Batch Mode</a> + <li><a href="standalone-guide.html">Standalone Guide</a></li> + <li><a href="jmlc.html">Java Machine Learning Connector (JMLC)</a> + <li class="divider"></li> + <li><b>Language Guides:</b></li> + <li><a href="dml-language-reference.html">DML Language Reference</a></li> + <li><a href="beginners-guide-to-dml-and-pydml.html">Beginner's Guide to DML and PyDML</a></li> + <li><a href="beginners-guide-python.html">Beginner's Guide for Python Users</a></li> + <li><a href="python-reference.html">Reference Guide for Python Users</a></li> + <li class="divider"></li> + <li><b>ML Algorithms:</b></li> + <li><a href="algorithms-reference.html">Algorithms Reference</a></li> + <li class="divider"></li> + <li><b>Tools:</b></li> + <li><a href="debugger-guide.html">Debugger Guide</a></li> + <li><a href="developer-tools-systemml.html">IDE Guide</a></li> + <li class="divider"></li> + <li><b>Other:</b></li> + <li><a href="contributing-to-systemml.html">Contributing to SystemML</a></li> + <li><a href="engine-dev-guide.html">Engine Developer Guide</a></li> + <li><a href="troubleshooting-guide.html">Troubleshooting Guide</a></li> + <li><a href="release-process.html">Release Process</a></li> + </ul> + </li> + + <li class="dropdown"> + <a href="#" class="dropdown-toggle" data-toggle="dropdown">API Docs<b class="caret"></b></a> + <ul class="dropdown-menu" role="menu"> + <li><a href="./api/java/index.html">Java</a></li> + <li><a href="./api/python/index.html">Python</a></li> + </ul> + </li> + + <li class="dropdown"> + <a href="#" class="dropdown-toggle" data-toggle="dropdown">Issues<b class="caret"></b></a> + <ul class="dropdown-menu" role="menu"> + <li><b>JIRA:</b></li> + <li><a href="https://issues.apache.org/jira/browse/SYSTEMML">SystemML JIRA</a></li> + + </ul> + </li> + </ul> + </nav> + </div> + </header> + + <div class="container" id="content"> + + <h1 class="title">Spark MLContext Programming Guide</h1> + + + <!-- + +--> + +<ul id="markdown-toc"> + <li><a href="#overview" id="markdown-toc-overview">Overview</a></li> + <li><a href="#spark-shell-example" id="markdown-toc-spark-shell-example">Spark Shell Example</a> <ul> + <li><a href="#start-spark-shell-with-systemml" id="markdown-toc-start-spark-shell-with-systemml">Start Spark Shell with SystemML</a></li> + <li><a href="#create-mlcontext" id="markdown-toc-create-mlcontext">Create MLContext</a></li> + <li><a href="#hello-world" id="markdown-toc-hello-world">Hello World</a></li> + <li><a href="#lenet-on-mnist-example" id="markdown-toc-lenet-on-mnist-example">LeNet on MNIST Example</a></li> + <li><a href="#dataframe-example" id="markdown-toc-dataframe-example">DataFrame Example</a></li> + <li><a href="#rdd-example" id="markdown-toc-rdd-example">RDD Example</a></li> + <li><a href="#matrix-output" id="markdown-toc-matrix-output">Matrix Output</a></li> + <li><a href="#univariate-statistics-on-haberman-data" id="markdown-toc-univariate-statistics-on-haberman-data">Univariate Statistics on Haberman Data</a> <ul> + <li><a href="#input-variables-vs-input-parameters" id="markdown-toc-input-variables-vs-input-parameters">Input Variables vs Input Parameters</a></li> + </ul> + </li> + <li><a href="#script-information" id="markdown-toc-script-information">Script Information</a></li> + <li><a href="#clearing-scripts-and-mlcontext" id="markdown-toc-clearing-scripts-and-mlcontext">Clearing Scripts and MLContext</a></li> + <li><a href="#statistics" id="markdown-toc-statistics">Statistics</a></li> + <li><a href="#gpu" id="markdown-toc-gpu">GPU</a></li> + <li><a href="#explain" id="markdown-toc-explain">Explain</a></li> + <li><a href="#script-creation-and-scriptfactory" id="markdown-toc-script-creation-and-scriptfactory">Script Creation and ScriptFactory</a></li> + <li><a href="#scriptexecutor" id="markdown-toc-scriptexecutor">ScriptExecutor</a></li> + <li><a href="#matrixmetadata" id="markdown-toc-matrixmetadata">MatrixMetadata</a></li> + <li><a href="#matrix-data-conversions-and-performance" id="markdown-toc-matrix-data-conversions-and-performance">Matrix Data Conversions and Performance</a></li> + <li><a href="#project-information" id="markdown-toc-project-information">Project Information</a></li> + </ul> + </li> + <li><a href="#jupyter-pyspark-notebook-example---poisson-nonnegative-matrix-factorization" id="markdown-toc-jupyter-pyspark-notebook-example---poisson-nonnegative-matrix-factorization">Jupyter (PySpark) Notebook Example - Poisson Nonnegative Matrix Factorization</a> <ul> + <li><a href="#set-up-the-notebook-and-download-the-data" id="markdown-toc-set-up-the-notebook-and-download-the-data">Set up the notebook and download the data</a></li> + <li><a href="#use-pyspark-to-load-the-data-in-as-a-spark-dataframe" id="markdown-toc-use-pyspark-to-load-the-data-in-as-a-spark-dataframe">Use PySpark to load the data in as a Spark DataFrame</a></li> + <li><a href="#create-a-systemml-mlcontext-object" id="markdown-toc-create-a-systemml-mlcontext-object">Create a SystemML MLContext object</a></li> + <li><a href="#define-a-kernel-for-poisson-nonnegative-matrix-factorization-pnmf-in-dml" id="markdown-toc-define-a-kernel-for-poisson-nonnegative-matrix-factorization-pnmf-in-dml">Define a kernel for Poisson nonnegative matrix factorization (PNMF) in DML</a></li> + <li><a href="#execute-the-algorithm" id="markdown-toc-execute-the-algorithm">Execute the algorithm</a></li> + <li><a href="#retrieve-the-losses-during-training-and-plot-them" id="markdown-toc-retrieve-the-losses-during-training-and-plot-them">Retrieve the losses during training and plot them</a></li> + </ul> + </li> + <li><a href="#recommended-spark-configuration-settings" id="markdown-toc-recommended-spark-configuration-settings">Recommended Spark Configuration Settings</a></li> +</ul> + +<p><br /></p> + +<h1 id="overview">Overview</h1> + +<p>The Spark <code>MLContext</code> API offers a programmatic interface for interacting with SystemML from Spark using languages +such as Scala, Java, and Python. As a result, it offers a convenient way to interact with SystemML from the Spark +Shell and from Notebooks such as Jupyter and Zeppelin.</p> + +<h1 id="spark-shell-example">Spark Shell Example</h1> + +<h2 id="start-spark-shell-with-systemml">Start Spark Shell with SystemML</h2> + +<p>To use SystemML with Spark Shell, the SystemML jar can be referenced using Spark Shell’s <code>--jars</code> option.</p> + +<div class="codetabs"> + +<div data-lang="Spark Shell"> + + <div class="highlight"><pre><code class="language-bash" data-lang="bash">spark-shell <span class="nt">--executor-memory</span> 4G <span class="nt">--driver-memory</span> 4G <span class="nt">--jars</span> SystemML.jar</code></pre></div> + + </div> + +<div data-lang="PySpark Shell"> + + <div class="highlight"><pre><code class="language-bash" data-lang="bash">pyspark <span class="nt">--executor-memory</span> 4G <span class="nt">--driver-memory</span> 4G <span class="nt">--jars</span> SystemML.jar <span class="nt">--driver-class-path</span> SystemML.jar</code></pre></div> + + </div> + +</div> + +<h2 id="create-mlcontext">Create MLContext</h2> + +<p>All primary classes that a user interacts with are located in the <code>org.apache.sysml.api.mlcontext</code> package. +For convenience, we can additionally add a static import of <code>ScriptFactory</code> to shorten the syntax for creating <code>Script</code> objects. +An <code>MLContext</code> object can be created by passing its constructor a reference to the <code>SparkSession</code> (<code>spark</code>) or <code>SparkContext</code> (<code>sc</code>). +If successful, you should see a “<code>Welcome to Apache SystemML!</code>” message.</p> + +<div class="codetabs"> + +<div data-lang="Scala"> + + <div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="k">import</span> <span class="nn">org.apache.sysml.api.mlcontext._</span> +<span class="k">import</span> <span class="nn">org.apache.sysml.api.mlcontext.ScriptFactory._</span> +<span class="k">val</span> <span class="n">ml</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">MLContext</span><span class="o">(</span><span class="n">spark</span><span class="o">)</span></code></pre></div> + + </div> + +<div data-lang="Spark Shell"> + + <div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="n">scala</span><span class="o">></span> <span class="k">import</span> <span class="nn">org.apache.sysml.api.mlcontext._</span> +<span class="k">import</span> <span class="nn">org.apache.sysml.api.mlcontext._</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">import</span> <span class="nn">org.apache.sysml.api.mlcontext.ScriptFactory._</span> +<span class="k">import</span> <span class="nn">org.apache.sysml.api.mlcontext.ScriptFactory._</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">ml</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">MLContext</span><span class="o">(</span><span class="n">spark</span><span class="o">)</span> + +<span class="nc">Welcome</span> <span class="n">to</span> <span class="nc">Apache</span> <span class="nc">SystemML</span><span class="o">!</span> + +<span class="n">ml</span><span class="k">:</span> <span class="kt">org.apache.sysml.api.mlcontext.MLContext</span> <span class="o">=</span> <span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">sysml</span><span class="o">.</span><span class="n">api</span><span class="o">.</span><span class="n">mlcontext</span><span class="o">.</span><span class="nc">MLContext</span><span class="k">@</span><span class="mf">12139d</span><span class="n">b0</span></code></pre></div> + + </div> + + +<div data-lang="Python"> + + <div class="highlight"><pre><code class="language-python" data-lang="python"><span class="kn">from</span> <span class="nn">systemml</span> <span class="kn">import</span> <span class="n">MLContext</span><span class="p">,</span> <span class="n">dml</span><span class="p">,</span> <span class="n">dmlFromResource</span><span class="p">,</span> <span class="n">dmlFromFile</span><span class="p">,</span> <span class="n">dmlFromUrl</span> +<span class="n">ml</span> <span class="o">=</span> <span class="n">MLContext</span><span class="p">(</span><span class="n">spark</span><span class="p">)</span></code></pre></div> + + </div> + +<div data-lang="PySpark Shell"> + + <div class="highlight"><pre><code class="language-python" data-lang="python"><span class="o">>>></span> <span class="kn">from</span> <span class="nn">systemml</span> <span class="kn">import</span> <span class="n">MLContext</span><span class="p">,</span> <span class="n">dml</span><span class="p">,</span> <span class="n">dmlFromResource</span><span class="p">,</span> <span class="n">dmlFromFile</span><span class="p">,</span> <span class="n">dmlFromUrl</span> +<span class="o">>>></span> <span class="n">ml</span> <span class="o">=</span> <span class="n">MLContext</span><span class="p">(</span><span class="n">spark</span><span class="p">)</span> + +<span class="n">Welcome</span> <span class="n">to</span> <span class="n">Apache</span> <span class="n">SystemML</span><span class="err">!</span> +<span class="n">Version</span> <span class="mf">1.0</span><span class="o">.</span><span class="mi">0</span><span class="o">-</span><span class="n">SNAPSHOT</span></code></pre></div> + + </div> + +</div> + +<h2 id="hello-world">Hello World</h2> + +<p>The ScriptFactory class allows DML and PYDML scripts to be created from Strings, Files, URLs, and InputStreams. +Here, we’ll use the <code>dml</code> method to create a DML “hello world” script based on a String. Notice that the script +reports that it has no inputs or outputs.</p> + +<p>We execute the script using MLContext’s <code>execute</code> method, which displays “<code>hello world</code>” to the console. +The <code>execute</code> method returns an MLResults object, which contains no results since the script has +no outputs.</p> + +<div class="codetabs"> + +<div data-lang="Scala"> + + <div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="k">val</span> <span class="n">helloScript</span> <span class="k">=</span> <span class="n">dml</span><span class="o">(</span><span class="s">"print('hello world')"</span><span class="o">)</span> +<span class="n">ml</span><span class="o">.</span><span class="n">execute</span><span class="o">(</span><span class="n">helloScript</span><span class="o">)</span></code></pre></div> + + </div> + +<div data-lang="Spark Shell"> + + <div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">helloScript</span> <span class="k">=</span> <span class="n">dml</span><span class="o">(</span><span class="s">"print('hello world')"</span><span class="o">)</span> +<span class="n">helloScript</span><span class="k">:</span> <span class="kt">org.apache.sysml.api.mlcontext.Script</span> <span class="o">=</span> +<span class="nc">Inputs</span><span class="k">:</span> +<span class="kt">None</span> + +<span class="nc">Outputs</span><span class="k">:</span> +<span class="kt">None</span> + +<span class="n">scala</span><span class="o">></span> <span class="n">ml</span><span class="o">.</span><span class="n">execute</span><span class="o">(</span><span class="n">helloScript</span><span class="o">)</span> +<span class="n">hello</span> <span class="n">world</span> +<span class="n">res0</span><span class="k">:</span> <span class="kt">org.apache.sysml.api.mlcontext.MLResults</span> <span class="o">=</span> +<span class="nc">None</span></code></pre></div> + + </div> + +<div data-lang="Python"> + + <div class="highlight"><pre><code class="language-python" data-lang="python"><span class="n">helloScript</span> <span class="o">=</span> <span class="n">dml</span><span class="p">(</span><span class="s">"print('hello world')"</span><span class="p">)</span> +<span class="n">ml</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">helloScript</span><span class="p">)</span></code></pre></div> + + </div> + +<div data-lang="PySpark Shell"> + + <div class="highlight"><pre><code class="language-python" data-lang="python"><span class="o">>>></span> <span class="n">helloScript</span> <span class="o">=</span> <span class="n">dml</span><span class="p">(</span><span class="s">"print('hello world')"</span><span class="p">)</span> +<span class="o">>>></span> <span class="n">ml</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">helloScript</span><span class="p">)</span> +<span class="n">hello</span> <span class="n">world</span> +<span class="n">SystemML</span> <span class="n">Statistics</span><span class="p">:</span> +<span class="n">Total</span> <span class="n">execution</span> <span class="n">time</span><span class="p">:</span> <span class="mf">0.001</span> <span class="n">sec</span><span class="o">.</span> +<span class="n">Number</span> <span class="n">of</span> <span class="n">executed</span> <span class="n">Spark</span> <span class="n">inst</span><span class="p">:</span> <span class="mf">0.</span> + +<span class="n">MLResults</span></code></pre></div> + + </div> + + +</div> + +<h2 id="lenet-on-mnist-example">LeNet on MNIST Example</h2> + +<p>SystemML features the DML-based <a href="https://github.com/apache/systemml/tree/master/scripts/nn"><code>nn</code> library for deep learning</a>.</p> + +<p>At project build time, SystemML automatically generates wrapper classes for DML scripts +to enable convenient access to scripts and execution of functions. +In the example below, we obtain a reference (<code>clf</code>) to the LeNet on MNIST example. +We generate dummy data, train a convolutional net using the LeNet architecture, +compute the class probability predictions, and then evaluate the convolutional net.</p> + +<p>Note that these automatic script wrappers are currently not available in Python but will be made available in the near future.</p> + +<div class="codetabs"> + +<div data-lang="Scala"> + + <div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="k">val</span> <span class="n">clf</span> <span class="k">=</span> <span class="n">ml</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">examples</span><span class="o">.</span><span class="nc">Mnist_lenet</span> +<span class="k">val</span> <span class="n">dummy</span> <span class="k">=</span> <span class="n">clf</span><span class="o">.</span><span class="n">generate_dummy_data</span> +<span class="k">val</span> <span class="n">dummyVal</span> <span class="k">=</span> <span class="n">clf</span><span class="o">.</span><span class="n">generate_dummy_data</span> +<span class="k">val</span> <span class="n">params</span> <span class="k">=</span> <span class="n">clf</span><span class="o">.</span><span class="n">train</span><span class="o">(</span><span class="n">dummy</span><span class="o">.</span><span class="n">X</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="n">Y</span><span class="o">,</span> <span class="n">dummyVal</span><span class="o">.</span><span class="n">X</span><span class="o">,</span> <span class="n">dummyVal</span><span class="o">.</span><span class="n">Y</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="n">C</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="nc">Hin</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="nc">Win</span><span class="o">,</span> <span class="mi">1</span><span class="o">)</span> +<span class="k">val</span> <span class="n">probs</span> <span class="k">=</span> <span class="n">clf</span><span class="o">.</span><span class="n">predict</span><span class="o">(</span><span class="n">dummy</span><span class="o">.</span><span class="n">X</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="n">C</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="nc">Hin</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="nc">Win</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">W1</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">b1</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">W2</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">b2</span><span class="o" >,</span> <span class="n">params</span><span class="o">.</span><span >class="n">W3</span><span class="o">,</span> <span >class="n">params</span><span class="o">.</span><span class="n">b3</span><span >class="o">,</span> <span class="n">params</span><span class="o">.</span><span >class="n">W4</span><span class="o">,</span> <span >class="n">params</span><span class="o">.</span><span class="n">b4</span><span >class="o">)</span> +<span class="k">val</span> <span class="n">perf</span> <span class="k">=</span> <span class="n">clf</span><span class="o">.</span><span class="n">eval</span><span class="o">(</span><span class="n">probs</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="n">Y</span><span class="o">)</span></code></pre></div> + + </div> + +<div data-lang="Spark Shell"> + + <div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">clf</span> <span class="k">=</span> <span class="n">ml</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">examples</span><span class="o">.</span><span class="nc">Mnist_lenet</span> +<span class="n">clf</span><span class="k">:</span> <span class="kt">org.apache.sysml.scripts.nn.examples.Mnist_lenet</span> <span class="o">=</span> +<span class="nc">Inputs</span><span class="k">:</span> +<span class="kt">None</span> + +<span class="nc">Outputs</span><span class="k">:</span> +<span class="kt">None</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">dummy</span> <span class="k">=</span> <span class="n">clf</span><span class="o">.</span><span class="n">generate_dummy_data</span> +<span class="nc">SystemML</span> <span class="nc">Statistics</span><span class="k">:</span> +<span class="kt">Total</span> <span class="kt">execution</span> <span class="kt">time:</span> <span class="err">0</span><span class="kt">.</span><span class="err">144</span> <span class="kt">sec.</span> +<span class="kt">Number</span> <span class="kt">of</span> <span class="kt">executed</span> <span class="kt">Spark</span> <span class="kt">inst:</span> <span class="err">0</span><span class="kt">.</span> + +<span class="kt">dummy:</span> <span class="kt">org.apache.sysml.scripts.nn.examples.mnist_lenet.Generate_dummy_data_output</span> <span class="o">=</span> +<span class="n">X</span> <span class="o">(</span><span class="nc">Matrix</span><span class="o">)</span><span class="k">:</span> <span class="kt">MatrixObject:</span> <span class="kt">scratch_space</span><span class="c1">//_p64701_192.168.1.103//_t0/temp0_0, [1024 x 784, nnz=802816, blocks (1000 x 1000)], binaryblock, dirty +</span><span class="n">Y</span> <span class="o">(</span><span class="nc">Matrix</span><span class="o">)</span><span class="k">:</span> <span class="kt">MatrixObject:</span> <span class="kt">scratch_space</span><span class="c1">//_p64701_192.168.1.103//_t0/temp4_4, [1024 x 10, nnz=1024, blocks (1000 x 1000)], binaryblock, dirty +</span><span class="n">C</span> <span class="o">(</span><span class="n">long</span><span class="o">)</span><span class="k">:</span> <span class="err">1</span> +<span class="kt">Hin</span> <span class="o">(</span><span class="kt">long</span><span class="o">)</span><span class="kt">:</span> <span class="err">28</span> +<span class="kt">Win</span> <span class="o">(</span><span class="kt">long</span><span class="o">)</span><span class="kt">:</span> <span class="err">28</span> + +<span class="kt">scala></span> <span class="kt">val</span> <span class="kt">dummyVal</span> <span class="o">=</span> <span class="n">clf</span><span class="o">.</span><span class="n">generate_dummy_data</span> +<span class="nc">SystemML</span> <span class="nc">Statistics</span><span class="k">:</span> +<span class="kt">Total</span> <span class="kt">execution</span> <span class="kt">time:</span> <span class="err">0</span><span class="kt">.</span><span class="err">147</span> <span class="kt">sec.</span> +<span class="kt">Number</span> <span class="kt">of</span> <span class="kt">executed</span> <span class="kt">Spark</span> <span class="kt">inst:</span> <span class="err">0</span><span class="kt">.</span> + +<span class="kt">dummyVal:</span> <span class="kt">org.apache.sysml.scripts.nn.examples.mnist_lenet.Generate_dummy_data_output</span> <span class="o">=</span> +<span class="n">X</span> <span class="o">(</span><span class="nc">Matrix</span><span class="o">)</span><span class="k">:</span> <span class="kt">MatrixObject:</span> <span class="kt">scratch_space</span><span class="c1">//_p64701_192.168.1.103//_t0/temp5_5, [1024 x 784, nnz=802816, blocks (1000 x 1000)], binaryblock, dirty +</span><span class="n">Y</span> <span class="o">(</span><span class="nc">Matrix</span><span class="o">)</span><span class="k">:</span> <span class="kt">MatrixObject:</span> <span class="kt">scratch_space</span><span class="c1">//_p64701_192.168.1.103//_t0/temp9_9, [1024 x 10, nnz=1024, blocks (1000 x 1000)], binaryblock, dirty +</span><span class="n">C</span> <span class="o">(</span><span class="n">long</span><span class="o">)</span><span class="k">:</span> <span class="err">1</span> +<span class="kt">Hin</span> <span class="o">(</span><span class="kt">long</span><span class="o">)</span><span class="kt">:</span> <span class="err">28</span> +<span class="kt">Win</span> <span class="o">(</span><span class="kt">long</span><span class="o">)</span><span class="kt">:</span> <span class="err">28</span> + +<span class="kt">scala></span> <span class="kt">val</span> <span class="kt">params</span> <span class="o">=</span> <span class="n">clf</span><span class="o">.</span><span class="n">train</span><span class="o">(</span><span class="n">dummy</span><span class="o">.</span><span class="n">X</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="n">Y</span><span class="o">,</span> <span class="n">dummyVal</span><span class="o">.</span><span class="n">X</span><span class="o">,</span> <span class="n">dummyVal</span><span class="o">.</span><span class="n">Y</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="n">C</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="nc">Hin</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="nc">Win</span><span class="o">,</span> <span class="mi">1</span><span class="o">)</span> +<span class="mi">17</span><span class="o">/</span><span class="mi">06</span><span class="o">/</span><span class="mi">05</span> <span class="mi">15</span><span class="k">:</span><span class="err">52</span><span class="kt">:</span><span class="err">09</span> <span class="kt">WARN</span> <span class="kt">SparkExecutionContext:</span> <span class="kt">Configuration</span> <span class="kt">parameter</span> <span class="kt">spark.driver.maxResultSize</span> <span class="kt">set</span> <span class="kt">to</span> <span class="err">1</span> <span class="kt">GB.</span> <span class="kt">You</span> <span class="kt">can</span> <span class="kt">set</span> <span class="kt">it</span> <span class="kt">through</span> <span class="kt">Spark</span> <span class="kt">default</span> <span class="kt">configuration</span> <span class="kt">setting</span> <span class="kt">either</span> <span class="kt">to</span> <span class="err">0</span> <span class="o">(</span><span class="kt">unlimited</span><span class="o ">)</span> <span class="kt">or</span> <span class="kt">to</span> <span class="kt">available</span> <span class="kt">memory</span> <span class="kt">budget</span> <span class="kt">of</span> <span class="kt">size</span> <span class="err">2</span> <span class="kt">GB.</span> +<span class="kt">Starting</span> <span class="kt">optimization</span> +<span class="mi">17</span><span class="o">/</span><span class="mi">06</span><span class="o">/</span><span class="mi">05</span> <span class="mi">15</span><span class="k">:</span><span class="err">52</span><span class="kt">:</span><span class="err">10</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">0</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">11</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">1</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">11</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">2</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">11</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">3</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">13</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">4</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">296</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">13</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">5</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">14</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">6</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">118</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">14</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">7</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">14</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">8</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">115</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">14</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">9</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">15</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">11</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">15</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">13</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">16</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">15</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">16</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">17</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">17</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">19</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">17</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">21</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">18</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">23</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">18</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">25</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">19</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">27</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">19</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">29</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="err">17</span><span class="kt">/</span><span class="err">06</span><span class="kt">/</span><span class="err">05</span> <span class="err">15</span><span class="kt">:</span><span class="err">52</span><span class="kt">:</span><span class="err">20</span> <span class="kt">WARN</span> <span class="kt">TaskSetManager:</span> <span class="kt">Stage</span> <span class="err">31</span> <span class="kt">contains</span> <span class="kt">a</span> <span class="kt">task</span> <span class="kt">of</span> <span class="kt">very</span> <span class="kt">large</span> <span class="kt">size</span> <span class="o">(</span><span class="err">508</span> <span class="kt">KB</span><span class="o">)</span><span class="kt">.</span> <span class="kt">The</span> <span class="kt">maximum</span> <span class="kt">recommended</span> <span class="kt">task</span> <span class="kt">size</span> <span class="kt">is</span> <span class="err">100</span> <span class="kt">KB.</span> +<span class="kt">SystemML</span> <span class="kt">Statistics:</span> +<span class="kt">Total</span> <span class="kt">execution</span> <span class="kt">time:</span> <span class="err">11</span><span class="kt">.</span><span class="err">261</span> <span class="kt">sec.</span> +<span class="kt">Number</span> <span class="kt">of</span> <span class="kt">executed</span> <span class="kt">Spark</span> <span class="kt">inst:</span> <span class="err">32</span><span class="kt">.</span> + +<span class="kt">params:</span> <span class="kt">org.apache.sysml.scripts.nn.examples.mnist_lenet.Train_output</span> <span class="o">=</span> +<span class="n">W1</span> <span class="o">(</span><span class="nc">Matrix</span><span class="o">)</span><span class="k">:</span> <span class="kt">MatrixObject:</span> <span class="kt">scratch_space</span><span class="c1">//_p64701_192.168.1.103//_t0/temp2203_1606, [32 x 25, nnz=800, blocks (1000 x 1000)], binaryblock, dirty +</span><span class="n">b1</span> <span class="o">(</span><span class="nc">Matrix</span><span class="o">)</span><span class="k">:</span> <span class="kt">MatrixObject:</span> <span class="kt">scratch_space</span><span class="c1">//_p64701_192.168.1.103//_t0/temp2205_1608, [32 x 1, nnz=32, blocks (1000 x 1000)], binaryblock, dirty +</span><span class="n">W2</span> <span class="o">(</span><span class="nc">Matrix</span><span class="o">)</span><span class="k">:</span> <span class="kt">MatrixObject:</span> <span class="kt">scratch_space</span><span class="c1">//_p64701_192.168.1.103//_t0/temp2196_1599, [64 x 800, nnz=51200, blocks (1000 x 1000)], binaryblock, dirty +</span><span class="n">b2</span> <span class="o">(</span><span class="nc">Matrix</span><span class="o">)</span><span class="k">:</span> <span class="kt">MatrixObject:</span> <span class="kt">scratch_space</span><span class="c1">//_p64701_192.168.1.103//_t0/temp2200_1603, [64 x 1, nnz=64, blocks (1000 x 1000)], binaryblock, dirty +</span><span class="n">W3</span> <span class="o">(</span><span class="nc">Matrix</span><span class="o">)</span><span class="k">:</span> <span class="kt">MatrixObject:</span> <span class="kt">scratch_space</span><span class="c1">//_p64701_192.168.1.103//_t0/temp2186_1589, [3136 x 512, nnz=1605632, blocks (1000 x 1000)], binaryblock, ... +</span><span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">probs</span> <span class="k">=</span> <span class="n">clf</span><span class="o">.</span><span class="n">predict</span><span class="o">(</span><span class="n">dummy</span><span class="o">.</span><span class="n">X</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="n">C</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="nc">Hin</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="nc">Win</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">W1</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">b1</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">W2</span><span class="o">,</span> <span class="n">params</span>< span class="o">.</span><span class="n">b2</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">W3</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">b3</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">W4</span><span class="o">,</span> <span class="n">params</span><span class="o">.</span><span class="n">b4</span><span class="o">)</span> +<span class="nc">SystemML</span> <span class="nc">Statistics</span><span class="k">:</span> +<span class="kt">Total</span> <span class="kt">execution</span> <span class="kt">time:</span> <span class="err">2</span><span class="kt">.</span><span class="err">148</span> <span class="kt">sec.</span> +<span class="kt">Number</span> <span class="kt">of</span> <span class="kt">executed</span> <span class="kt">Spark</span> <span class="kt">inst:</span> <span class="err">48</span><span class="kt">.</span> + +<span class="kt">probs:</span> <span class="kt">org.apache.sysml.api.mlcontext.Matrix</span> <span class="o">=</span> <span class="nc">MatrixObject</span><span class="k">:</span> <span class="kt">scratch_space</span><span class="c1">//_p64701_192.168.1.103//_t0/temp2505_1865, [1024 x 10, nnz=10240, blocks (1000 x 1000)], binaryblock, dirty +</span> +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">perf</span> <span class="k">=</span> <span class="n">clf</span><span class="o">.</span><span class="n">eval</span><span class="o">(</span><span class="n">probs</span><span class="o">,</span> <span class="n">dummy</span><span class="o">.</span><span class="n">Y</span><span class="o">)</span> +<span class="nc">SystemML</span> <span class="nc">Statistics</span><span class="k">:</span> +<span class="kt">Total</span> <span class="kt">execution</span> <span class="kt">time:</span> <span class="err">0</span><span class="kt">.</span><span class="err">007</span> <span class="kt">sec.</span> +<span class="kt">Number</span> <span class="kt">of</span> <span class="kt">executed</span> <span class="kt">Spark</span> <span class="kt">inst:</span> <span class="err">48</span><span class="kt">.</span> + +<span class="kt">perf:</span> <span class="kt">org.apache.sysml.scripts.nn.examples.mnist_lenet.Eval_output</span> <span class="o">=</span> +<span class="n">loss</span> <span class="o">(</span><span class="n">double</span><span class="o">)</span><span class="k">:</span> <span class="err">2</span><span class="kt">.</span><span class="err">2681513307168797</span> +<span class="kt">accuracy</span> <span class="o">(</span><span class="kt">double</span><span class="o">)</span><span class="kt">:</span> <span class="err">0</span><span class="kt">.</span><span class="err">1435546875</span></code></pre></div> + + </div> + +</div> + +<h2 id="dataframe-example">DataFrame Example</h2> + +<p>For demonstration purposes, we’ll use Spark to create a <code>DataFrame</code> called <code>df</code> of random <code>double</code>s from 0 to 1 consisting of 10,000 rows and 100 columns.</p> + +<div class="codetabs"> + +<div data-lang="Scala"> + + <div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="k">import</span> <span class="nn">org.apache.spark.sql._</span> +<span class="k">import</span> <span class="nn">org.apache.spark.sql.types.</span><span class="o">{</span><span class="nc">StructType</span><span class="o">,</span><span class="nc">StructField</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">}</span> +<span class="k">import</span> <span class="nn">scala.util.Random</span> +<span class="k">val</span> <span class="n">numRows</span> <span class="k">=</span> <span class="mi">10000</span> +<span class="k">val</span> <span class="n">numCols</span> <span class="k">=</span> <span class="mi">100</span> +<span class="k">val</span> <span class="n">data</span> <span class="k">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="o">(</span><span class="mi">0</span> <span class="n">to</span> <span class="n">numRows</span><span class="o">-</span><span class="mi">1</span><span class="o">).</span><span class="n">map</span> <span class="o">{</span> <span class="k">_</span> <span class="k">=></span> <span class="nc">Row</span><span class="o">.</span><span class="n">fromSeq</span><span class="o">(</span><span class="nc">Seq</span><span class="o">.</span><span class="n">fill</span><span class="o">(</span><span class="n">numCols</span><span class="o">)(</span><span class="nc">Random</span><span class="o">.</span><span class="n">nextDouble</span><span class="o">))</span> <span class="o">}</span> +<span class="k">val</span> <span class="n">schema</span> <span class="k">=</span> <span class="nc">StructType</span><span class="o">((</span><span class="mi">0</span> <span class="n">to</span> <span class="n">numCols</span><span class="o">-</span><span class="mi">1</span><span class="o">).</span><span class="n">map</span> <span class="o">{</span> <span class="n">i</span> <span class="k">=></span> <span class="nc">StructField</span><span class="o">(</span><span class="s">"C"</span> <span class="o">+</span> <span class="n">i</span><span class="o">,</span> <span class="nc">DoubleType</span><span class="o">,</span> <span class="kc">true</span><span class="o">)</span> <span class="o">}</span> <span class="o">)</span> +<span class="k">val</span> <span class="n">df</span> <span class="k">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="o">(</span><span class="n">data</span><span class="o">,</span> <span class="n">schema</span><span class="o">)</span></code></pre></div> + + </div> + +<div data-lang="Spark Shell"> + + <div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="n">scala</span><span class="o">></span> <span class="k">import</span> <span class="nn">org.apache.spark.sql._</span> +<span class="k">import</span> <span class="nn">org.apache.spark.sql._</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">import</span> <span class="nn">org.apache.spark.sql.types.</span><span class="o">{</span><span class="nc">StructType</span><span class="o">,</span><span class="nc">StructField</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">}</span> +<span class="k">import</span> <span class="nn">org.apache.spark.sql.types.</span><span class="o">{</span><span class="nc">StructType</span><span class="o">,</span> <span class="nc">StructField</span><span class="o">,</span> <span class="nc">DoubleType</span><span class="o">}</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">import</span> <span class="nn">scala.util.Random</span> +<span class="k">import</span> <span class="nn">scala.util.Random</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">numRows</span> <span class="k">=</span> <span class="mi">10000</span> +<span class="n">numRows</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="mi">10000</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">numCols</span> <span class="k">=</span> <span class="mi">100</span> +<span class="n">numCols</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="mi">100</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">data</span> <span class="k">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="o">(</span><span class="mi">0</span> <span class="n">to</span> <span class="n">numRows</span><span class="o">-</span><span class="mi">1</span><span class="o">).</span><span class="n">map</span> <span class="o">{</span> <span class="k">_</span> <span class="k">=></span> <span class="nc">Row</span><span class="o">.</span><span class="n">fromSeq</span><span class="o">(</span><span class="nc">Seq</span><span class="o">.</span><span class="n">fill</span><span class="o">(</span><span class="n">numCols</span><span class="o">)(</span><span class="nc">Random</span><span class="o">.</span><span class="n">nextDouble</span><span class="o">))</span> <span class="o">}</span> +<span class="n">data</span><span class="k">:</span> <span class="kt">org.apache.spark.rdd.RDD</span><span class="o">[</span><span class="kt">org.apache.spark.sql.Row</span><span class="o">]</span> <span class="k">=</span> <span class="nc">MapPartitionsRDD</span><span class="o">[</span><span class="err">1</span><span class="o">]</span> <span class="n">at</span> <span class="n">map</span> <span class="n">at</span> <span class="o"><</span><span class="n">console</span><span class="k">>:</span><span class="mi">42</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">schema</span> <span class="k">=</span> <span class="nc">StructType</span><span class="o">((</span><span class="mi">0</span> <span class="n">to</span> <span class="n">numCols</span><span class="o">-</span><span class="mi">1</span><span class="o">).</span><span class="n">map</span> <span class="o">{</span> <span class="n">i</span> <span class="k">=></span> <span class="nc">StructField</span><span class="o">(</span><span class="s">"C"</span> <span class="o">+</span> <span class="n">i</span><span class="o">,</span> <span class="nc">DoubleType</span><span class="o">,</span> <span class="kc">true</span><span class="o">)</span> <span class="o">}</span> <span class="o">)</span> +<span class="n">schema</span><span class="k">:</span> <span class="kt">org.apache.spark.sql.</span><span class="k">type</span><span class="kt">s.StructType</span> <span class="o">=</span> <span class="nc">StructType</span><span class="o">(</span><span class="nc">StructField</span><span class="o">(</span><span class="n">C0</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C1</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C2</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C3</span><span c lass="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C4</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C5</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C6</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C7</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o ">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C8</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C9</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C10</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C11</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C12</span><span class="o">,< /span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C13</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C14</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C15</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C16</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</s pan> <span class="nc">StructField</span><span class="o">(</span><span class="n">C17</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C18</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C19</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C20</span><span class="o">,</span><span class="nc">DoubleType</span><span class="o">,</span><span class="kc">true</span><span class="o">),</span> <span class="nc">StructField</span><span class="o">(</span><span class="n">C21</span><span class="o">,</span ><span class="nc">DoubleType</span><span class="o">,</span><span >class="kc">true</span><span class="o">),</span> <span class="o">...</span> +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">df</span> <span class="k">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="o">(</span><span class="n">data</span><span class="o">,</span> <span class="n">schema</span><span class="o">)</span> +<span class="n">df</span><span class="k">:</span> <span class="kt">org.apache.spark.sql.DataFrame</span> <span class="o">=</span> <span class="o">[</span><span class="kt">C0:</span> <span class="kt">double</span>, <span class="kt">C1:</span> <span class="kt">double</span>, <span class="kt">C2:</span> <span class="kt">double</span>, <span class="kt">C3:</span> <span class="kt">double</span>, <span class="kt">C4:</span> <span class="kt">double</span>, <span class="kt">C5:</span> <span class="kt">double</span>, <span class="kt">C6:</span> <span class="kt">double</span>, <span class="kt">C7:</span> <span class="kt">double</span>, <span class="kt">C8:</span> <span class="kt">double</span>, <span class="kt">C9:</span> <span class="kt">double</span>, <span class="kt">C10:</span> <span class="kt">double</span>, <span class="kt">C11:</span> <span class="kt">double</span>, <span class="kt">C12:</span> <span class="kt">double</span>, <span class="kt">C13:</span> <span class="kt">double</span>, <span class="kt">C14:</span> <span class="kt">double</span>, <span class="kt">C15:</span> <span class="kt">double</span>, <span class="kt">C16:</span> <span class="kt">double</span>, <span class="kt">C17:</span> <span class="kt">double</span>, <span class="kt">C18:</span> <span class="kt">double</span>, <span class="kt">C19:</span> <span class="kt">double</span>, <span class="kt">C20:</span> <span class="kt">double</span>, <span class="kt">C21:</span> <span class="kt">double</span>, <span class="kt">C22:</span> <span class="kt">double</span>, <span class="kt">C23:</span> <span class="kt">double</span>, <span class="kt">C24:</span> <span class="kt">double</span>, <span class="kt">C25:</span> <span class="kt">double</span>, <span class="kt">C26:</span> <span class="kt">double</span>, <span class="kt">C27:</span> <span class="kt">double</span>, <span class="kt">C28:</span> <span class="kt">double</span>, <span class="kt">C29:</span> <span class="kt">double</span>, <span class="kt">C30 :</span> <span class="kt">double</span>, <span class="kt">C31:</span> <span class="kt">double</span>, <span class="kt">C32:</span> <span class="kt">double</span>, <span class="kt">C33:</span> <span class="kt">double</span>, <span class="kt">C34:</span> <span class="kt">double</span>, <span class="kt">C35:</span> <span class="kt">double</span>, <span class="kt">C36:</span> <span class="kt">double</span>, <span class="kt">C37:</span> <span class="kt">double</span>, <span class="kt">C38:</span> <span class="kt">double</span>, <span class="kt">C39:</span> <span class="kt">double</span>, <span class="kt">C40:</span> <span class="kt">double</span>, <span class="kt">C41:</span> <span class="kt">double</span>, <span class="kt">C42:</span> <span class="kt">double</span>, <span class="kt">C43:</span> <span class="kt">double</span>, <span class="kt">C44:</span> <span class="kt">double</span>, <span class="kt">C45:</span> <span class="kt">double</span>, <span class="kt">C46:</span> <span class= "kt">double</span>, <span class="kt">C47:</span> <span class="kt">double</span>, <span class="kt">C48:</span> <span class="kt">double</span>, <span class="kt">C49:</span> <span class="kt">double</span>, <span class="kt">C50:</span> <span class="kt">double</span>, <span class="kt">C51:</span> <span class="kt">double</span>, <span class="kt">C52:</span> <span class="kt">double</span>, <span class="kt">C53:</span> <span class="kt">double</span>, <span class="kt">C54:</span> <span class="kt">double</span>, <span class="kt">C55:</span> <span class="kt">double</span>, <span class="kt">C56:</span> <span class="kt">double</span>, <span class="kt">C57:</span> <span class="kt">double</span>, <span class="kt">C58:</span> <span class="kt">double</span>, <span class="kt">C5...</span></code></pre></div> + + </div> + +<div data-lang="Python"> + + <div class="highlight"><pre><code class="language-python" data-lang="python"><span class="n">numRows</span> <span class="o">=</span> <span class="mi">10000</span> +<span class="n">numCols</span> <span class="o">=</span> <span class="mi">100</span> +<span class="kn">from</span> <span class="nn">random</span> <span class="kn">import</span> <span class="n">random</span> +<span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="o">*</span> +<span class="n">data</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">numRows</span><span class="p">))</span><span class="o">.</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span> <span class="p">:</span> <span class="p">[</span> <span class="n">random</span><span class="p">()</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">numCols</span><span class="p">)</span> <span class="p">])</span> +<span class="n">schema</span> <span class="o">=</span> <span class="n">StructType</span><span class="p">([</span> <span class="n">StructField</span><span class="p">(</span><span class="s">"C"</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">),</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="bp">True</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">numCols</span><span class="p">)</span> <span class="p">])</span> +<span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span></code></pre></div> + + </div> + +<div data-lang="PySpark Shell"> + + <div class="highlight"><pre><code class="language-python" data-lang="python"><span class="o">>>></span> <span class="n">numRows</span> <span class="o">=</span> <span class="mi">10000</span> +<span class="o">>>></span> <span class="n">numCols</span> <span class="o">=</span> <span class="mi">100</span> +<span class="o">>>></span> <span class="kn">from</span> <span class="nn">random</span> <span class="kn">import</span> <span class="n">random</span> +<span class="o">>>></span> <span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="o">*</span> +<span class="o">>>></span> <span class="n">data</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">numRows</span><span class="p">))</span><span class="o">.</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span> <span class="p">:</span> <span class="p">[</span> <span class="n">random</span><span class="p">()</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">numCols</span><span class="p">)</span> <span class="p">])</span> +<span class="o">>>></span> <span class="n">schema</span> <span class="o">=</span> <span class="n">StructType</span><span class="p">([</span> <span class="n">StructField</span><span class="p">(</span><span class="s">"C"</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">),</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="bp">True</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">numCols</span><span class="p">)</span> <span class="p">])</span> +<span class="o">>>></span> <span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span></code></pre></div> + + </div> + +</div> + +<p>We’ll create a DML script to find the minimum, maximum, and mean values in a matrix. This +script has one input variable, matrix <code>Xin</code>, and three output variables, <code>minOut</code>, <code>maxOut</code>, and <code>meanOut</code>.</p> + +<p>For performance, we’ll specify metadata indicating that the matrix has 10,000 rows and 100 columns.</p> + +<p>We’ll create a DML script using the ScriptFactory <code>dml</code> method with the <code>minMaxMean</code> script String. The +input variable is specified to be our <code>DataFrame</code> <code>df</code> with <code>MatrixMetadata</code> <code>mm</code>. The output +variables are specified to be <code>minOut</code>, <code>maxOut</code>, and <code>meanOut</code>. Notice that inputs are supplied by the +<code>in</code> method, and outputs are supplied by the <code>out</code> method.</p> + +<p>We execute the script and obtain the results as a Tuple by calling <code>getTuple</code> on the results, specifying +the types and names of the output variables.</p> + +<div class="codetabs"> + +<div data-lang="Scala"> + + <div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="k">val</span> <span class="n">minMaxMean</span> <span class="k">=</span> +<span class="s">""" +minOut = min(Xin) +maxOut = max(Xin) +meanOut = mean(Xin) +"""</span> +<span class="k">val</span> <span class="n">mm</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">MatrixMetadata</span><span class="o">(</span><span class="n">numRows</span><span class="o">,</span> <span class="n">numCols</span><span class="o">)</span> +<span class="k">val</span> <span class="n">minMaxMeanScript</span> <span class="k">=</span> <span class="n">dml</span><span class="o">(</span><span class="n">minMaxMean</span><span class="o">).</span><span class="n">in</span><span class="o">(</span><span class="s">"Xin"</span><span class="o">,</span> <span class="n">df</span><span class="o">,</span> <span class="n">mm</span><span class="o">).</span><span class="n">out</span><span class="o">(</span><span class="s">"minOut"</span><span class="o">,</span> <span class="s">"maxOut"</span><span class="o">,</span> <span class="s">"meanOut"</span><span class="o">)</span> +<span class="k">val</span> <span class="o">(</span><span class="n">min</span><span class="o">,</span> <span class="n">max</span><span class="o">,</span> <span class="n">mean</span><span class="o">)</span> <span class="k">=</span> <span class="n">ml</span><span class="o">.</span><span class="n">execute</span><span class="o">(</span><span class="n">minMaxMeanScript</span><span class="o">).</span><span class="n">getTuple</span><span class="o">[</span><span class="kt">Double</span>, <span class="kt">Double</span>, <span class="kt">Double</span><span class="o">](</span><span class="s">"minOut"</span><span class="o">,</span> <span class="s">"maxOut"</span><span class="o">,</span> <span class="s">"meanOut"</span><span class="o">)</span></code></pre></div> + + </div> + +<div data-lang="Spark Shell"> + + <div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">minMaxMean</span> <span class="k">=</span> + <span class="o">|</span> <span class="s">""" + | minOut = min(Xin) + | maxOut = max(Xin) + | meanOut = mean(Xin) + | """</span> +<span class="n">minMaxMean</span><span class="k">:</span> <span class="kt">String</span> <span class="o">=</span> +<span class="s">" +minOut = min(Xin) +maxOut = max(Xin) +meanOut = mean(Xin) +"</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">mm</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">MatrixMetadata</span><span class="o">(</span><span class="n">numRows</span><span class="o">,</span> <span class="n">numCols</span><span class="o">)</span> +<span class="n">mm</span><span class="k">:</span> <span class="kt">org.apache.sysml.api.mlcontext.MatrixMetadata</span> <span class="o">=</span> <span class="n">rows</span><span class="k">:</span> <span class="err">10000</span><span class="o">,</span> <span class="n">columns</span><span class="k">:</span> <span class="err">100</span><span class="o">,</span> <span class="n">non</span><span class="o">-</span><span class="n">zeros</span><span class="k">:</span> <span class="kt">None</span><span class="o">,</span> <span class="n">rows</span> <span class="n">per</span> <span class="n">block</span><span class="k">:</span> <span class="kt">None</span><span class="o">,</span> <span class="n">columns</span> <span class="n">per</span> <span class="n">block</span><span class="k">:</span> <span class="kt">None</span> + +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">minMaxMeanScript</span> <span class="k">=</span> <span class="n">dml</span><span class="o">(</span><span class="n">minMaxMean</span><span class="o">).</span><span class="n">in</span><span class="o">(</span><span class="s">"Xin"</span><span class="o">,</span> <span class="n">df</span><span class="o">,</span> <span class="n">mm</span><span class="o">).</span><span class="n">out</span><span class="o">(</span><span class="s">"minOut"</span><span class="o">,</span> <span class="s">"maxOut"</span><span class="o">,</span> <span class="s">"meanOut"</span><span class="o">)</span> +<span class="n">minMaxMeanScript</span><span class="k">:</span> <span class="kt">org.apache.sysml.api.mlcontext.Script</span> <span class="o">=</span> +<span class="nc">Inputs</span><span class="k">:</span> + <span class="err">[1]</span> <span class="o">(</span><span class="kt">DataFrame</span><span class="o">)</span> <span class="kt">Xin:</span> <span class="err">[</span><span class="kt">C0:</span> <span class="kt">double</span><span class="o">,</span> <span class="n">C1</span><span class="k">:</span> <span class="kt">double</span><span class="o">,</span> <span class="n">C2</span><span class="k">:</span> <span class="kt">double</span><span class="o">,</span> <span class="n">C3</span><span class="k">:</span> <span class="kt">double</span><span class="o">,</span> <span class="n">C4</span><span class="k">:</span> <span class="kt">double</span><span class="o">,</span> <span class="n">C5</span><span class="k">:</span> <span class="kt">double</span><span class="o">,</span> <span class="n">C6</span><span class="k">:</span> <span class="kt">double</span><span class="o">,</span> <span class="n">C7</span><span class="k">:</span> <span class="kt">double</span><span class="o">,</span> <span class ="o">...</span> + +<span class="nc">Outputs</span><span class="k">:</span> + <span class="err">[1]</span> <span class="kt">minOut</span> + <span class="o">[</span><span class="err">2</span><span class="o">]</span> <span class="n">maxOut</span> + <span class="o">[</span><span class="err">3</span><span class="o">]</span> <span class="n">meanOut</span> + + +<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="o">(</span><span class="n">min</span><span class="o">,</span> <span class="n">max</span><span class="o">,</span> <span class="n">mean</span><span class="o">)</span> <span class="k">=</span> <span class="n">ml</span><span class="o">.</span><span class="n">execute</span><span class="o">(</span><span class="n">minMaxMeanScript</span><span class="o">).</span><span class="n">getTuple</span><span class="o">[</span><span class="kt">Double</span>, <span class="kt">Double</span>, <span class="kt">Double</span><span class="o">](</span><span class="s">"minOut"</span><span class="o">,</span> <span class="s">"maxOut"</span><span class="o">,</span> <span class="s">"meanOut"</span><span class="o">)</span> +<span class="n">min</span><span class="k">:</span> <span class="kt">Double</span> <span class="o">=</span> <span class="mf">2.6257349849956313E-8</span> +<span class="n">max</span><span class="k">:</span> <span class="kt">Double</span> <span class="o">=</span> <span class="mf">0.9999999686609718</span> +<span class="n">mean</span><span class="k">:</span> <span class="kt">Double</span> <span class="o">=</span> <span class="mf">0.49996223966662934</span></code></pre></div> + + </div> + +<div data-lang="Python"> + + <div class="highlight"><pre><code class="language-python" data-lang="python"><span class="n">minMaxMean</span> <span class="o">=</span> <span class="s">""" +minOut = min(Xin) +maxOut = max(Xin) +meanOut = mean(Xin) +"""</span> +<span class="n">minMaxMeanScript</span> <span class="o">=</span> <span class="n">dml</span><span class="p">(</span><span class="n">minMaxMean</span><span class="p">)</span><span class="o">.</span><span class="nb">input</span><span class="p">(</span><span class="s">"Xin"</span><span class="p">,</span> <span class="n">df</span><span class="p">)</span><span class="o">.</span><span class="n">output</span><span class="p">(</span><span class="s">"minOut"</span><span class="p">,</span> <span class="s">"maxOut"</span><span class="p">,</span> <span class="s">"meanOut"</span><span class="p">)</span> +<span class="nb">min</span><span class="p">,</span> <span class="nb">max</span><span class="p">,</span> <span class="n">mean</span> <span class="o">=</span> <span class="n">ml</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">minMaxMeanScript</span><span class="p">)</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s">"minOut"</span><span class="p">,</span> <span class="s">"maxOut"</span><span class="p">,</span> <span class="s">"meanOut"</span><span class="p">)</span></code></pre></div> + + </div> + +<div data-lang="PySpark Shell"> + + <div class="highlight"><pre><code class="language-python" data-lang="python"><span class="o">>>></span> <span class="n">minMaxMean</span> <span class="o">=</span> <span class="s">""" +... minOut = min(Xin) +... maxOut = max(Xin) +... meanOut = mean(Xin) +... """</span>
[... 2334 lines stripped ...]
