svn commit: r1828046 [5/20] - /systemml/site/docs/1.1.0/

reinwald Thu, 29 Mar 2018 21:31:48 -0700

Added: systemml/site/docs/1.1.0/algorithms-matrix-factorization.html
URL: 
http://svn.apache.org/viewvc/systemml/site/docs/1.1.0/algorithms-matrix-factorization.html?rev=1828046&view=auto
==============================================================================
--- systemml/site/docs/1.1.0/algorithms-matrix-factorization.html (added)
+++ systemml/site/docs/1.1.0/algorithms-matrix-factorization.html Fri Mar 30 
04:31:05 2018
@@ -0,0 +1,793 @@
+<!DOCTYPE html>
+<!--[if lt IE 7]>      <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
+<!--[if IE 7]>         <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
+<!--[if IE 8]>         <html class="no-js lt-ie9"> <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
+    <head>
+        <title>SystemML Algorithms Reference - Matrix Factorization - SystemML 
1.1.0</title>
+        <meta charset="utf-8">
+        <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+        
+        <meta name="viewport" content="width=device-width">
+        <link rel="stylesheet" href="css/bootstrap.min.css">
+        <link rel="stylesheet" href="css/main.css">
+        <link rel="stylesheet" href="css/pygments-default.css">
+        <link rel="shortcut icon" href="img/favicon.png">
+    </head>
+    <body>
+        <!--[if lt IE 7]>
+            <p class="chromeframe">You are using an outdated browser. <a 
href="http://browsehappy.com/";>Upgrade your browser today</a> or <a 
href="http://www.google.com/chromeframe/?redirect=true";>install Google Chrome 
Frame</a> to better experience this site.</p>
+        <![endif]-->
+
+        <header class="navbar navbar-default navbar-fixed-top" id="topbar">
+            <div class="container">
+                <div class="navbar-header">
+                    <div class="navbar-brand brand projectlogo">
+                        <a href="http://systemml.apache.org/";><img 
class="logo" src="img/systemml-logo.png" alt="Apache SystemML" title="Apache 
SystemML"/></a>
+                    </div>
+                    <div class="navbar-brand brand projecttitle">
+                        <a href="http://systemml.apache.org/";>Apache 
SystemML<sup id="trademark">â¢</sup></a><br/>
+                        <span class="version">1.1.0</span>
+                    </div>
+                    <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target=".navbar-collapse">
+                        <span class="sr-only">Toggle navigation</span>
+                        <span class="icon-bar"></span>
+                        <span class="icon-bar"></span>
+                        <span class="icon-bar"></span>
+                    </button>
+                </div>
+                <nav class="navbar-collapse collapse">
+                    <ul class="nav navbar-nav navbar-right">
+                        <li><a href="index.html">Overview</a></li>
+                        <li><a 
href="https://github.com/apache/systemml";>GitHub</a></li>
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">Documentation<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><b>Running SystemML:</b></li>
+                                <li><a 
href="https://github.com/apache/systemml";>SystemML GitHub README</a></li>
+                                <li><a 
href="spark-mlcontext-programming-guide.html">Spark MLContext</a></li>
+                                <li><a href="spark-batch-mode.html">Spark 
Batch Mode</a>
+                                <li><a href="hadoop-batch-mode.html">Hadoop 
Batch Mode</a>
+                                <li><a href="standalone-guide.html">Standalone 
Guide</a></li>
+                                <li><a href="jmlc.html">Java Machine Learning 
Connector (JMLC)</a>
+                                <li class="divider"></li>
+                                <li><b>Language Guides:</b></li>
+                                <li><a href="dml-language-reference.html">DML 
Language Reference</a></li>
+                                <li><a 
href="beginners-guide-to-dml-and-pydml.html">Beginner's Guide to DML and 
PyDML</a></li>
+                                <li><a 
href="beginners-guide-python.html">Beginner's Guide for Python Users</a></li>
+                                <li><a href="python-reference.html">Reference 
Guide for Python Users</a></li>
+                                <li class="divider"></li>
+                                <li><b>ML Algorithms:</b></li>
+                                <li><a 
href="algorithms-reference.html">Algorithms Reference</a></li>
+                                <li class="divider"></li>
+                                <li><b>Tools:</b></li>
+                                <li><a href="debugger-guide.html">Debugger 
Guide</a></li>
+                                <li><a 
href="developer-tools-systemml.html">IDE Guide</a></li>
+                                <li class="divider"></li>
+                                <li><b>Other:</b></li>
+                                <li><a 
href="contributing-to-systemml.html">Contributing to SystemML</a></li>
+                                <li><a href="engine-dev-guide.html">Engine 
Developer Guide</a></li>
+                                <li><a 
href="troubleshooting-guide.html">Troubleshooting Guide</a></li>
+                                <li><a href="release-process.html">Release 
Process</a></li>
+                            </ul>
+                        </li>
+                        
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">API Docs<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><a 
href="./api/java/index.html">Java</a></li>
+                                <li><a 
href="./api/python/index.html">Python</a></li>
+                            </ul>
+                        </li>
+                        
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">Issues<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><b>JIRA:</b></li>
+                                <li><a 
href="https://issues.apache.org/jira/browse/SYSTEMML";>SystemML JIRA</a></li>
+                                
+                            </ul>
+                        </li>
+                    </ul>
+                </nav>
+            </div>
+        </header>
+
+        <div class="container" id="content">
+          
+            <h1 class="title"><a href="algorithms-reference.html">SystemML 
Algorithms Reference</a></h1>
+          
+
+          <!--
+
+-->
+
+<h1 id="matrix-factorization">5 Matrix Factorization</h1>
+
+<h2 id="principal-component-analysis">5.1 Principal Component Analysis</h2>
+
+<h3 id="description">Description</h3>
+
+<p>Principal Component Analysis (PCA) is a simple, non-parametric method to
+transform the given data set with possibly correlated columns into a set
+of linearly uncorrelated or orthogonal columns, called <em>principal
+components</em>. The principal components are ordered in such a way
+that the first component accounts for the largest possible variance,
+followed by remaining principal components in the decreasing order of
+the amount of variance captured from the data. PCA is often used as a
+dimensionality reduction technique, where the original data is projected
+or rotated onto a low-dimensional space with basis vectors defined by
+top-$K$ (for a given value of $K$) principal components.</p>
+
+<h3 id="usage">Usage</h3>
+
+<div class="codetabs">
+<div data-lang="Hadoop">
+    <pre><code>hadoop jar SystemML.jar -f PCA.dml
+                        -nvargs INPUT=&lt;file&gt;
+                                K=&lt;int&gt;
+                                CENTER=[int]
+                                SCALE=[int]
+                                PROJDATA=&lt;int&gt;
+                                OFMT=[format]
+                                MODEL=&lt;file&gt;
+                                OUTPUT=&lt;file&gt;
+</code></pre>
+  </div>
+<div data-lang="Spark">
+    <pre><code>$SPARK_HOME/bin/spark-submit --master yarn
+                             --deploy-mode cluster
+                             --conf spark.driver.maxResultSize=0
+                             SystemML.jar
+                             -f PCA.dml
+                             -config SystemML-config.xml
+                             -exec hybrid_spark
+                             -nvargs INPUT=&lt;file&gt;
+                                     K=&lt;int&gt;
+                                     CENTER=[int]
+                                     SCALE=[int]
+                                     PROJDATA=&lt;int&gt;
+                                     OFMT=[format]
+                                     MODEL=&lt;file&gt;
+                                     OUTPUT=&lt;file&gt;
+</code></pre>
+  </div>
+</div>
+
+<h4 id="arguments">Arguments</h4>
+
+<p><strong>INPUT</strong>: Location (on HDFS) to read the input matrix.</p>
+
+<p><strong>K</strong>: Indicates dimension of the new vector space constructed 
from $K$
+    principal components. It must be a value between <code>1</code> and the 
number
+    of columns in the input data.</p>
+
+<p><strong>CENTER</strong>: (default: <code>0</code>) <code>0</code> or 
<code>1</code>. Indicates whether or not to
+    <em>center</em> input data prior to the computation of
+    principal components.</p>
+
+<p><strong>SCALE</strong>: (default: <code>0</code>) <code>0</code> or 
<code>1</code>. Indicates whether or not to
+    <em>scale</em> input data prior to the computation of
+    principal components.</p>
+
+<p><strong>PROJDATA</strong>: <code>0</code> or <code>1</code>. Indicates 
whether or not the input data must be projected
+    on to new vector space defined over principal components.</p>
+
+<p><strong>OFMT</strong>: (default: <code>"csv"</code>) Matrix file output 
format, such as <code>text</code>,
+<code>mm</code>, or <code>csv</code>; see read/write functions in
+SystemML Language Reference for details.</p>
+
+<p><strong>MODEL</strong>: Either the location (on HDFS) where the computed 
model is
+    stored; or the location of an existing model.</p>
+
+<p><strong>OUTPUT</strong>: Location (on HDFS) to store the data rotated on to 
the new
+    vector space.</p>
+
+<h4 id="examples">Examples</h4>
+
+<div class="codetabs">
+<div data-lang="Hadoop">
+    <pre><code>hadoop jar SystemML.jar -f PCA.dml 
+                        -nvargs INPUT=/user/ml/input.mtx
+                                K=10
+                                CENTER=1
+                                SCALE=1
+                                FMT=csv
+                                PROJDATA=1
+                                OUTPUT=/user/ml/pca_output/
+</code></pre>
+  </div>
+<div data-lang="Spark">
+    <pre><code>$SPARK_HOME/bin/spark-submit --master yarn
+                             --deploy-mode cluster
+                             --conf spark.driver.maxResultSize=0
+                             SystemML.jar
+                             -f PCA.dml
+                             -config SystemML-config.xml
+                             -exec hybrid_spark
+                             -nvargs INPUT=/user/ml/input.mtx
+                                     K=10
+                                     CENTER=1
+                                     SCALE=1
+                                     FMT=csv
+                                     PROJDATA=1
+                                     OUTPUT=/user/ml/pca_output/
+</code></pre>
+  </div>
+</div>
+
+<div class="codetabs">
+<div data-lang="Hadoop">
+    <pre><code>hadoop jar SystemML.jar -f PCA.dml
+                        -nvargs INPUT=/user/ml/test_input.mtx
+                                K=10
+                                CENTER=1
+                                SCALE=1
+                                FMT=csv
+                                PROJDATA=1
+                                MODEL=/user/ml/pca_output/
+                                OUTPUT=/user/ml/test_output.mtx
+</code></pre>
+  </div>
+<div data-lang="Spark">
+    <pre><code>$SPARK_HOME/bin/spark-submit --master yarn
+                             --deploy-mode cluster
+                             --conf spark.driver.maxResultSize=0
+                             SystemML.jar
+                             -f PCA.dml
+                             -config SystemML-config.xml
+                             -exec hybrid_spark
+                             -nvargs INPUT=/user/ml/test_input.mtx
+                                     K=10
+                                     CENTER=1
+                                     SCALE=1
+                                     FMT=csv
+                                     PROJDATA=1
+                                     MODEL=/user/ml/pca_output/
+                                     OUTPUT=/user/ml/test_output.mtx
+</code></pre>
+  </div>
+</div>
+
+<h4 id="details">Details</h4>
+
+<p>Principal Component Analysis (PCA) is a non-parametric procedure for
+orthogonal linear transformation of the input data to a new coordinate
+system, such that the greatest variance by some projection of the data
+comes to lie on the first coordinate (called the first principal
+component), the second greatest variance on the second coordinate, and
+so on. In other words, PCA first selects a normalized direction in
+$m$-dimensional space ($m$ is the number of columns in the input data)
+along which the variance in input data is maximized â this is referred
+to as the first principal component. It then repeatedly finds other
+directions (principal components) in which the variance is maximized. At
+every step, PCA restricts the search for only those directions that are
+perpendicular to all previously selected directions. By doing so, PCA
+aims to reduce the redundancy among input variables. To understand the
+notion of redundancy, consider an extreme scenario with a data set
+comprising of two variables, where the first one denotes some quantity
+expressed in meters, and the other variable represents the same quantity
+but in inches. Both these variables evidently capture redundant
+information, and hence one of them can be removed. In a general
+scenario, keeping solely the linear combination of input variables would
+both express the data more concisely and reduce the number of variables.
+This is why PCA is often used as a dimensionality reduction technique.</p>
+
+<p>The specific method to compute such a new coordinate system is as
+follows â compute a covariance matrix $C$ that measures the strength of
+correlation among all pairs of variables in the input data; factorize
+$C$ according to eigen decomposition to calculate its eigenvalues and
+eigenvectors; and finally, order eigenvectors in the decreasing order of
+their corresponding eigenvalue. The computed eigenvectors (also known as
+<em>loadings</em>) define the new coordinate system and the square
+root of eigen values provide the amount of variance in the input data
+explained by each coordinate or eigenvector.</p>
+
+<h4 id="returns">Returns</h4>
+
+<p>When MODEL is not provided, PCA procedure is
+applied on INPUT data to generate MODEL as well as the rotated data
+OUTPUT (if PROJDATA is set to $1$) in the new coordinate system. The
+produced model consists of basis vectors MODEL$/dominant.eigen.vectors$
+for the new coordinate system; eigen values
+MODEL$/dominant.eigen.values$; and the standard deviation
+MODEL$/dominant.eigen.standard.deviations$ of principal components. When
+MODEL is provided, INPUT data is rotated according to the coordinate
+system defined by MODEL$/dominant.eigen.vectors$. The resulting data is
+stored at location OUTPUT.</p>
+
+<hr />
+
+<h2 id="matrix-completion-via-alternating-minimizations">5.2 Matrix Completion 
via Alternating Minimizations</h2>
+
+<h3 id="description-1">Description</h3>
+
+<p>Low-rank matrix completion is an effective technique for statistical
+data analysis widely used in the data mining and machine learning
+applications. Matrix completion is a variant of low-rank matrix
+factorization with the goal of recovering a partially observed and
+potentially noisy matrix from a subset of its revealed entries. Perhaps
+the most popular applications in which matrix completion has been
+successfully applied is in the context of collaborative filtering in
+recommender systems. In this setting, the rows in the data matrix
+correspond to users, the columns to items such as movies, and entries to
+feedback provided by users for items. The goal is to predict missing
+entries of the rating matrix. This implementation uses the alternating
+least-squares (ALS) technique for solving large-scale matrix completion
+problems.</p>
+
+<h3 id="usage-1">Usage</h3>
+
+<p><strong>ALS</strong>:</p>
+
+<div class="codetabs">
+<div data-lang="Hadoop">
+    <pre><code>hadoop jar SystemML.jar -f ALS.dml
+                        -nvargs V=&lt;file&gt;
+                                L=&lt;file&gt;
+                                R=&lt;file&gt;
+                                rank=[int]
+                                reg=[L2|wL2]
+                                lambda=[double]
+                                maxi=[int]
+                                check=[boolean]
+                                thr=[double]
+                                fmt=[format]
+</code></pre>
+  </div>
+<div data-lang="Spark">
+    <pre><code>$SPARK_HOME/bin/spark-submit --master yarn
+                             --deploy-mode cluster
+                             --conf spark.driver.maxResultSize=0
+                             SystemML.jar
+                             -f ALS.dml
+                             -config SystemML-config.xml
+                             -exec hybrid_spark
+                             -nvargs V=&lt;file&gt;
+                                     L=&lt;file&gt;
+                                     R=&lt;file&gt;
+                                     rank=[int]
+                                     reg=[L2|wL2]
+                                     lambda=[double]
+                                     maxi=[int]
+                                     check=[boolean]
+                                     thr=[double]
+                                     fmt=[format]
+</code></pre>
+  </div>
+</div>
+
+<p><strong>ALS Prediction</strong>:</p>
+
+<div class="codetabs">
+<div data-lang="Hadoop">
+    <pre><code>hadoop jar SystemML.jar -f ALS_predict.dml
+                        -nvargs X=&lt;file&gt;
+                                Y=&lt;file&gt;
+                                L=&lt;file&gt;
+                                R=&lt;file&gt;
+                                Vrows=&lt;int&gt;
+                                Vcols=&lt;int&gt;
+                                fmt=[format]
+</code></pre>
+  </div>
+<div data-lang="Spark">
+    <pre><code>$SPARK_HOME/bin/spark-submit --master yarn
+                             --deploy-mode cluster
+                             --conf spark.driver.maxResultSize=0
+                             SystemML.jar
+                             -f ALS_predict.dml
+                             -config SystemML-config.xml
+                             -exec hybrid_spark
+                             -nvargs X=&lt;file&gt;
+                                     Y=&lt;file&gt;
+                                     L=&lt;file&gt;
+                                     R=&lt;file&gt;
+                                     Vrows=&lt;int&gt;
+                                     Vcols=&lt;int&gt;
+                                     fmt=[format]
+</code></pre>
+  </div>
+</div>
+
+<p><strong>ALS Top-K Prediction</strong>:</p>
+
+<div class="codetabs">
+<div data-lang="Hadoop">
+    <pre><code>hadoop jar SystemML.jar -f ALS_topk_predict.dml
+                        -nvargs X=&lt;file&gt;
+                                Y=&lt;file&gt;
+                                L=&lt;file&gt;
+                                R=&lt;file&gt;
+                                V=&lt;file&gt;
+                                K=[int]
+                                fmt=[format]
+</code></pre>
+  </div>
+<div data-lang="Spark">
+    <pre><code>$SPARK_HOME/bin/spark-submit --master yarn
+                             --deploy-mode cluster
+                             --conf spark.driver.maxResultSize=0
+                             SystemML.jar
+                             -f ALS_topk_predict.dml
+                             -config SystemML-config.xml
+                             -exec hybrid_spark
+                             -nvargs X=&lt;file&gt;
+                                     Y=&lt;file&gt;
+                                     L=&lt;file&gt;
+                                     R=&lt;file&gt;
+                                     V=&lt;file&gt;
+                                     K=[int]
+                                     fmt=[format]
+</code></pre>
+  </div>
+</div>
+
+<h3 id="arguments---als">Arguments - ALS</h3>
+
+<p><strong>V</strong>: Location (on HDFS) to read the input (user-item) matrix 
$V$ to be
+factorized</p>
+
+<p><strong>L</strong>: Location (on HDFS) to write the left (user) factor 
matrix $L$</p>
+
+<p><strong>R</strong>: Location (on HDFS) to write the right (item) factor 
matrix $R$</p>
+
+<p><strong>rank</strong>: (default: <code>10</code>) Rank of the 
factorization</p>
+
+<p><strong>reg</strong>: (default: <code>L2</code>) Regularization:</p>
+
+<ul>
+  <li><code>L2</code> = <code>L2</code> regularization</li>
+  <li><code>wL2</code> = weighted <code>L2</code> regularization</li>
+</ul>
+
+<p><strong>lambda</strong>: (default: <code>0.000001</code>) Regularization 
parameter</p>
+
+<p><strong>maxi</strong>: (default: <code>50</code>) Maximum number of 
iterations</p>
+
+<p><strong>check</strong>: (default: <code>FALSE</code>) Check for convergence 
after every iteration, i.e., updating
+$L$ and $R$ once</p>
+
+<p><strong>thr</strong>: (default: <code>0.0001</code>) Assuming 
<code>check=TRUE</code>, the algorithm stops and
+convergence is declared if the decrease in loss in any two consecutive
+iterations falls below threshold <code>thr</code>; if
+<code>check=FALSE</code>, parameter <code>thr</code> is ignored.</p>
+
+<p><strong>fmt</strong>: (default: <code>"text"</code>) Matrix file output 
format, such as <code>text</code>,
+<code>mm</code>, or <code>csv</code>; see read/write functions in
+SystemML Language Reference for details.</p>
+
+<h3 id="arguments---als-predictiontop-k-prediction">Arguments - ALS 
Prediction/Top-K Prediction</h3>
+
+<p><strong>X</strong>: Location (on HDFS) to read the input matrix $X$ with 
following format:</p>
+
+<ul>
+  <li>for <code>ALS_predict.dml</code>: a 2-column matrix that contains
+the user-ids (first column) and the item-ids (second column)</li>
+  <li>for <code>ALS_topk_predict.dml</code>: a 1-column matrix that
+contains the user-ids</li>
+</ul>
+
+<p><strong>Y</strong>: Location (on HDFS) to write the output of prediction 
with the following
+format:</p>
+
+<ul>
+  <li>for <code>ALS_predict.dml</code>: a 3-column matrix that contains
+the user-ids (first column), the item-ids (second column) and the
+predicted ratings (third column)</li>
+  <li>for <code>ALS_topk_predict.dml</code>: a (<code>K+1</code>)-column matrix
+that contains the user-ids in the first column and the top-K
+item-ids in the remaining <code>K</code> columns will be stored at
+<code>Y</code>. Additionally, a matrix with the same dimensions that
+contains the corresponding actual top-K ratings will be stored at
+<code>Y.ratings</code>; see below for details</li>
+</ul>
+
+<p><strong>L</strong>: Location (on HDFS) to read the left (user) factor 
matrix $L$</p>
+
+<p><strong>R</strong>: Location (on HDFS) to write the right (item) factor 
matrix $R$</p>
+
+<p><strong>V</strong>: Location (on HDFS) to read the user-item matrix $V$</p>
+
+<p><strong>Vrows</strong>: Number of rows of $V$ (i.e., number of users)</p>
+
+<p><strong>Vcols</strong>: Number of columns of $V$ (i.e., number of items)</p>
+
+<p><strong>K</strong>: (default: <code>5</code>) Number of top-K items for 
top-K prediction</p>
+
+<p><strong>fmt</strong>: (default: <code>"text"</code>) Matrix file output 
format, such as <code>text</code>,
+<code>mm</code>, or <code>csv</code>; see read/write functions in
+SystemML Language Reference for details.</p>
+
+<h3 id="examples-1">Examples</h3>
+
+<p><strong>ALS</strong>:</p>
+
+<div class="codetabs">
+<div data-lang="Hadoop">
+    <pre><code>hadoop jar SystemML.jar -f ALS.dml
+                        -nvargs V=/user/ml/V
+                                L=/user/ml/L
+                                R=/user/ml/R
+                                rank=10
+                                reg="wL"
+                                lambda=0.0001
+                                maxi=50
+                                check=TRUE
+                                thr=0.001
+                                fmt=csv
+</code></pre>
+  </div>
+<div data-lang="Spark">
+    <pre><code>$SPARK_HOME/bin/spark-submit --master yarn
+                             --deploy-mode cluster
+                             --conf spark.driver.maxResultSize=0
+                             SystemML.jar
+                             -f ALS.dml
+                             -config SystemML-config.xml
+                             -exec hybrid_spark
+                             -nvargs V=/user/ml/V
+                                     L=/user/ml/L
+                                     R=/user/ml/R
+                                     rank=10
+                                     reg="wL"
+                                     lambda=0.0001
+                                     maxi=50
+                                     check=TRUE
+                                     thr=0.001
+                                     fmt=csv
+</code></pre>
+  </div>
+</div>
+
+<p><strong>ALS Prediction</strong>:</p>
+
+<p>To compute predicted ratings for a given list of users and items:</p>
+
+<div class="codetabs">
+<div data-lang="Hadoop">
+    <pre><code>hadoop jar SystemML.jar -f ALS_predict.dml
+                        -nvargs X=/user/ml/X
+                                Y=/user/ml/Y
+                                L=/user/ml/L
+                                R=/user/ml/R
+                                Vrows=100000
+                                Vcols=10000
+                                fmt=csv
+</code></pre>
+  </div>
+<div data-lang="Spark">
+    <pre><code>$SPARK_HOME/bin/spark-submit --master yarn
+                             --deploy-mode cluster
+                             --conf spark.driver.maxResultSize=0
+                             SystemML.jar
+                             -f ALS_predict.dml
+                             -config SystemML-config.xml
+                             -exec hybrid_spark
+                             -nvargs X=/user/ml/X
+                                     Y=/user/ml/Y
+                                     L=/user/ml/L
+                                     R=/user/ml/R
+                                     Vrows=100000
+                                     Vcols=10000
+                                     fmt=csv
+</code></pre>
+  </div>
+</div>
+
+<p><strong>ALS Top-K Prediction</strong>:</p>
+
+<p>To compute top-K items with highest predicted ratings together with the
+predicted ratings for a given list of users:</p>
+
+<div class="codetabs">
+<div data-lang="Hadoop">
+    <pre><code>hadoop jar SystemML.jar -f ALS_topk_predict.dml
+                        -nvargs X=/user/ml/X
+                                Y=/user/ml/Y
+                                L=/user/ml/L
+                                R=/user/ml/R
+                                V=/user/ml/V
+                                K=10
+                                fmt=csv
+</code></pre>
+  </div>
+<div data-lang="Spark">
+    <pre><code>$SPARK_HOME/bin/spark-submit --master yarn
+                             --deploy-mode cluster
+                             --conf spark.driver.maxResultSize=0
+                             SystemML.jar
+                             -f ALS_topk_predict.dml
+                             -config SystemML-config.xml
+                             -exec hybrid_spark
+                             -nvargs X=/user/ml/X
+                                     Y=/user/ml/Y
+                                     L=/user/ml/L
+                                     R=/user/ml/R
+                                     V=/user/ml/V
+                                     K=10
+                                     fmt=csv
+</code></pre>
+  </div>
+</div>
+
+<h3 id="details-1">Details</h3>
+
+<p>Given an $m \times n$ input matrix $V$ and a rank parameter
+$r \ll \min{(m,n)}$, low-rank matrix factorization seeks to find an
+$m \times r$ matrix $L$ and an $r \times n$ matrix $R$ such that
+$V \approx LR$, i.e., we aim to approximate $V$ by the low-rank matrix
+$LR$. The quality of the approximation is determined by an
+application-dependent loss function $\mathcal{L}$. We aim at finding the
+loss-minimizing factor matrices, i.e.,</p>
+
+<script type="math/tex; mode=display">\begin{equation}
+(L^*, R^*) = \textrm{argmin}_{L,R}{\mathcal{L}(V,L,R)}
+\end{equation}</script>
+
+<p>In the
+context of collaborative filtering in the recommender systems it is
+often the case that the input matrix $V$ contains several missing
+entries. Such entries are coded with the 0 value and the loss function
+is computed only based on the nonzero entries in $V$, i.e.,</p>
+
+<script type="math/tex; mode=display">%\label{eq:loss}
+ \mathcal{L}=\sum_{(i,j)\in\Omega}l(V_{ij},L_{i*},R_{*j})</script>
+
+<p>where
+<script type="math/tex">L_{i*}</script> and <script 
type="math/tex">R_{*j}</script>, respectively, denote the $i$th row of $L$ and 
the
+$j$th column of $R$, <script 
type="math/tex">\Omega=\{\omega_1,\dots,\omega_N\}</script> denotes the
+training set containing the observed (nonzero) entries in $V$, and $l$
+is some local loss function.</p>
+
+<p>ALS is an optimization technique that can
+be used to solve quadratic problems. For matrix completion, the
+algorithm repeatedly keeps one of the unknown matrices ($L$ or $R$)
+fixed and optimizes the other one. In particular, ALS alternates between
+recomputing the rows of $L$ in one step and the columns of $R$ in the
+subsequent step. Our implementation of the ALS algorithm supports the
+loss functions summarized in <a 
href="algorithms-matrix-factorization.html#table16"><strong>TableÂ 
16</strong></a>
+commonly used for matrix completionÂ <a 
href="algorithms-bibliography.html">[ZhouWSP08]</a>.</p>
+
+<hr />
+
+<p><a name="table16"></a>
+<strong>Table 16</strong>: Popular loss functions supported by our ALS 
implementation; <script type="math/tex">N_{i*}</script>
+  and <script type="math/tex">N_{*j}</script>, respectively, denote the number 
of nonzero entries in
+  row $i$ and column $j$ of $V$.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th>Loss</th>
+      <th>Definition</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><script type="math/tex">\mathcal{L}_\text{Nzsl}</script></td>
+      <td><script type="math/tex">\sum_{i,j:V_{ij}\neq 0} (V_{ij} - 
[LR]_{ij})^2</script></td>
+    </tr>
+    <tr>
+      <td><script type="math/tex">\mathcal{L}_\text{Nzsl+L2}</script></td>
+      <td><script type="math/tex">\mathcal{L}_\text{Nzsl} + \lambda \Bigl( 
\sum_{ik} L_{ik}^2 + \sum_{kj} R_{kj}^2 \Bigr)</script></td>
+    </tr>
+    <tr>
+      <td><script type="math/tex">\mathcal{L}_\text{Nzsl+wL2}</script></td>
+      <td><script type="math/tex">\mathcal{L}_\text{Nzsl} + \lambda 
\Bigl(\sum_{ik}N_{i*} L_{ik}^2 + \sum_{kj}N_{*j} R_{kj}^2 \Bigr)</script></td>
+    </tr>
+  </tbody>
+</table>
+
+<hr />
+
+<p>Note that the matrix completion problem as defined in (1)
+is a non-convex problem for all loss functions from
+<a href="algorithms-matrix-factorization.html#table16"><strong>TableÂ 
16</strong></a>. However, when fixing one of the matrices
+$L$ or $R$, we get a least-squares problem with a globally optimal
+solution. For example, for the case of <script 
type="math/tex">\mathcal{L}_\text{Nzsl+wL2}</script> we
+have the following closed form solutions</p>
+
+<script type="math/tex; mode=display">% <![CDATA[
+\begin{aligned}
+  L^\top_{n+1,i*} &\leftarrow (R^{(i)}_n {[R^{(i)}_n]}^\top + \lambda N_2 
I)^{-1} R_n V^\top_{i*}, \\
+  R_{n+1,*j} &\leftarrow ({[L^{(j)}_{n+1}]}^\top L^{(j)}_{n+1} + \lambda N_1 
I)^{-1} L^\top_{n+1} V_{*j}, 
+  \end{aligned} %]]></script>
+
+<p>where <script type="math/tex">L_{n+1,i*}</script> (resp. <script 
type="math/tex">R_{n+1,*j}</script>) denotes the
+$i$th row of <script type="math/tex">L_{n+1}</script> (resp. $j$th column of 
<script type="math/tex">R_{n+1}</script>), $\lambda$
+denotes the regularization parameter, $I$ is the identity matrix of
+appropriate dimensionality, <script type="math/tex">V_{i*}</script> (resp. 
<script type="math/tex">V_{*j}</script>) denotes the
+revealed entries in row $i$ (column $j$), <script 
type="math/tex">R^{(i)}_n</script> (resp.
+<script type="math/tex">L^{(j)}_{n+1}</script>) refers to the corresponding 
columns of $R_n$ (rows of
+<script type="math/tex">L_{n+1}</script>), and $N_1$ (resp. $N_2$) denotes a 
diagonal matrix that
+contains the number of nonzero entries in row $i$ (column $j$) of $V$.</p>
+
+<p><strong>Prediction.</strong> Based on the factor matrices computed by ALS 
we provide
+two prediction scripts:</p>
+
+<ol>
+  <li><code>ALS_predict.dml</code> computes the predicted ratings for a given
+list of users and items.</li>
+  <li><code>ALS_topk_predict.dml</code> computes top-K item (where $K$ is
+given as input) with highest predicted ratings together with their
+corresponding ratings for a given list of users.</li>
+</ol>
+
+<h3 id="returns-1">Returns</h3>
+
+<p>We output the factor matrices $L$ and $R$ after the algorithm has
+converged. The algorithm is declared as converged if one of the two
+criteria is meet: (1) the decrease in the value of loss function falls
+below <code>thr</code> given as an input parameter (if parameter
+<code>check=TRUE</code>), or (2) the maximum number of iterations
+(defined as parameter <code>maxi</code>) is reached. Note that for a
+given user $i$ prediction is possible only if user $i$ has rated at
+least one item, i.e., row $i$ in matrix $V$ has at least one nonzero
+entry. In case, some users have not rated any items the corresponding
+factor in $L$ will be all 0s. Similarly if some items have not been
+rated at all the corresponding factors in $R$ will contain only 0s. Our
+prediction scripts output the predicted ratings for a given list of
+users and items as well as the top-K items with highest predicted
+ratings together with the predicted ratings for a given list of users.
+Note that the predictions will only be provided for the users who have
+rated at least one item, i.e., the corresponding rows contain at least
+one nonzero entry.</p>
+
+
+
+        </div> <!-- /container -->
+
+        
+
+        <script src="js/vendor/jquery-1.12.0.min.js"></script>
+        <script src="js/vendor/bootstrap.min.js"></script>
+        <script src="js/vendor/anchor.min.js"></script>
+        <script src="js/main.js"></script>
+        
+
+
+
+
+        <!-- Analytics -->
+        <script>
+            
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+            (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+            
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+            
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+            ga('create', 'UA-71553733-1', 'auto');
+            ga('send', 'pageview');
+        </script>
+
+
+
+        <!-- MathJax Section -->
+        <script type="text/x-mathjax-config">
+            MathJax.Hub.Config({
+                TeX: { equationNumbers: { autoNumber: "AMS" } }
+            });
+        </script>
+        <script>
+            // Note that we load MathJax this way to work with local file 
(file://), HTTP and HTTPS.
+            // We could use "//cdn.mathjax...", but that won't support 
"file://".
+            (function(d, script) {
+                script = d.createElement('script');
+                script.type = 'text/javascript';
+                script.async = true;
+                script.onload = function(){
+                    MathJax.Hub.Config({
+                        tex2jax: {
+                            inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
+                            displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
+                            processEscapes: true,
+                            skipTags: ['script', 'noscript', 'style', 
'textarea', 'pre']
+                        }
+                    });
+                };
+                script.src = ('https:' == document.location.protocol ? 
'https://' : 'http://') +
+                    
'cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+                d.getElementsByTagName('head')[0].appendChild(script);
+            }(document));
+        </script>
+    </body>
+</html>


Added: systemml/site/docs/1.1.0/algorithms-reference.html
URL: 
http://svn.apache.org/viewvc/systemml/site/docs/1.1.0/algorithms-reference.html?rev=1828046&view=auto
==============================================================================
--- systemml/site/docs/1.1.0/algorithms-reference.html (added)
+++ systemml/site/docs/1.1.0/algorithms-reference.html Fri Mar 30 04:31:05 2018
@@ -0,0 +1,214 @@
+<!DOCTYPE html>
+<!--[if lt IE 7]>      <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
+<!--[if IE 7]>         <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
+<!--[if IE 8]>         <html class="no-js lt-ie9"> <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
+    <head>
+        <title>SystemML Algorithms Reference - SystemML 1.1.0</title>
+        <meta charset="utf-8">
+        <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+        
+        <meta name="description" content="SystemML Algorithms Reference">
+        
+        <meta name="viewport" content="width=device-width">
+        <link rel="stylesheet" href="css/bootstrap.min.css">
+        <link rel="stylesheet" href="css/main.css">
+        <link rel="stylesheet" href="css/pygments-default.css">
+        <link rel="shortcut icon" href="img/favicon.png">
+    </head>
+    <body>
+        <!--[if lt IE 7]>
+            <p class="chromeframe">You are using an outdated browser. <a 
href="http://browsehappy.com/";>Upgrade your browser today</a> or <a 
href="http://www.google.com/chromeframe/?redirect=true";>install Google Chrome 
Frame</a> to better experience this site.</p>
+        <![endif]-->
+
+        <header class="navbar navbar-default navbar-fixed-top" id="topbar">
+            <div class="container">
+                <div class="navbar-header">
+                    <div class="navbar-brand brand projectlogo">
+                        <a href="http://systemml.apache.org/";><img 
class="logo" src="img/systemml-logo.png" alt="Apache SystemML" title="Apache 
SystemML"/></a>
+                    </div>
+                    <div class="navbar-brand brand projecttitle">
+                        <a href="http://systemml.apache.org/";>Apache 
SystemML<sup id="trademark">â¢</sup></a><br/>
+                        <span class="version">1.1.0</span>
+                    </div>
+                    <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target=".navbar-collapse">
+                        <span class="sr-only">Toggle navigation</span>
+                        <span class="icon-bar"></span>
+                        <span class="icon-bar"></span>
+                        <span class="icon-bar"></span>
+                    </button>
+                </div>
+                <nav class="navbar-collapse collapse">
+                    <ul class="nav navbar-nav navbar-right">
+                        <li><a href="index.html">Overview</a></li>
+                        <li><a 
href="https://github.com/apache/systemml";>GitHub</a></li>
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">Documentation<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><b>Running SystemML:</b></li>
+                                <li><a 
href="https://github.com/apache/systemml";>SystemML GitHub README</a></li>
+                                <li><a 
href="spark-mlcontext-programming-guide.html">Spark MLContext</a></li>
+                                <li><a href="spark-batch-mode.html">Spark 
Batch Mode</a>
+                                <li><a href="hadoop-batch-mode.html">Hadoop 
Batch Mode</a>
+                                <li><a href="standalone-guide.html">Standalone 
Guide</a></li>
+                                <li><a href="jmlc.html">Java Machine Learning 
Connector (JMLC)</a>
+                                <li class="divider"></li>
+                                <li><b>Language Guides:</b></li>
+                                <li><a href="dml-language-reference.html">DML 
Language Reference</a></li>
+                                <li><a 
href="beginners-guide-to-dml-and-pydml.html">Beginner's Guide to DML and 
PyDML</a></li>
+                                <li><a 
href="beginners-guide-python.html">Beginner's Guide for Python Users</a></li>
+                                <li><a href="python-reference.html">Reference 
Guide for Python Users</a></li>
+                                <li class="divider"></li>
+                                <li><b>ML Algorithms:</b></li>
+                                <li><a 
href="algorithms-reference.html">Algorithms Reference</a></li>
+                                <li class="divider"></li>
+                                <li><b>Tools:</b></li>
+                                <li><a href="debugger-guide.html">Debugger 
Guide</a></li>
+                                <li><a 
href="developer-tools-systemml.html">IDE Guide</a></li>
+                                <li class="divider"></li>
+                                <li><b>Other:</b></li>
+                                <li><a 
href="contributing-to-systemml.html">Contributing to SystemML</a></li>
+                                <li><a href="engine-dev-guide.html">Engine 
Developer Guide</a></li>
+                                <li><a 
href="troubleshooting-guide.html">Troubleshooting Guide</a></li>
+                                <li><a href="release-process.html">Release 
Process</a></li>
+                            </ul>
+                        </li>
+                        
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">API Docs<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><a 
href="./api/java/index.html">Java</a></li>
+                                <li><a 
href="./api/python/index.html">Python</a></li>
+                            </ul>
+                        </li>
+                        
+                        <li class="dropdown">
+                            <a href="#" class="dropdown-toggle" 
data-toggle="dropdown">Issues<b class="caret"></b></a>
+                            <ul class="dropdown-menu" role="menu">
+                                <li><b>JIRA:</b></li>
+                                <li><a 
href="https://issues.apache.org/jira/browse/SYSTEMML";>SystemML JIRA</a></li>
+                                
+                            </ul>
+                        </li>
+                    </ul>
+                </nav>
+            </div>
+        </header>
+
+        <div class="container" id="content">
+          
+            <h1 class="title">SystemML Algorithms Reference</h1>
+          
+
+          <!--
+
+-->
+
+<ul>
+  <li><a href="algorithms-descriptive-statistics.html">Descriptive 
Statistics</a>
+    <ul>
+      <li><a 
href="algorithms-descriptive-statistics.html#univariate-statistics">Univariate 
Statistics</a></li>
+      <li><a 
href="algorithms-descriptive-statistics.html#bivariate-statistics">Bivariate 
Statistics</a></li>
+      <li><a 
href="algorithms-descriptive-statistics.html#stratified-bivariate-statistics">Stratified
 Bivariate Statistics</a></li>
+    </ul>
+  </li>
+  <li><a href="algorithms-classification.html">Classification</a>
+    <ul>
+      <li><a 
href="algorithms-classification.html#multinomial-logistic-regression">Multinomial
 Logistic Regression</a></li>
+      <li><a 
href="algorithms-classification.html#support-vector-machines">Support Vector 
Machines</a>
+        <ul>
+          <li><a 
href="algorithms-classification.html#binary-class-support-vector-machines">Binary-Class
 Support Vector Machines</a></li>
+          <li><a 
href="algorithms-classification.html#multi-class-support-vector-machines">Multi-Class
 Support Vector Machines</a></li>
+        </ul>
+      </li>
+      <li><a href="algorithms-classification.html#naive-bayes">Naive 
Bayes</a></li>
+      <li><a href="algorithms-classification.html#decision-trees">Decision 
Trees</a></li>
+      <li><a href="algorithms-classification.html#random-forests">Random 
Forests</a></li>
+    </ul>
+  </li>
+  <li><a href="algorithms-clustering.html">Clustering</a>
+    <ul>
+      <li><a href="algorithms-clustering.html#k-means-clustering">K-Means 
Clustering</a></li>
+    </ul>
+  </li>
+  <li><a href="algorithms-regression.html">Regression</a>
+    <ul>
+      <li><a href="algorithms-regression.html#linear-regression">Linear 
Regression</a></li>
+      <li><a 
href="algorithms-regression.html#stepwise-linear-regression">Stepwise Linear 
Regression</a></li>
+      <li><a 
href="algorithms-regression.html#generalized-linear-models">Generalized Linear 
Models</a></li>
+      <li><a 
href="algorithms-regression.html#stepwise-generalized-linear-regression">Stepwise
 Generalized Linear Regression</a></li>
+      <li><a 
href="algorithms-regression.html#regression-scoring-and-prediction">Regression 
Scoring and Prediction</a></li>
+    </ul>
+  </li>
+  <li><a href="algorithms-matrix-factorization.html">Matrix Factorization</a>
+    <ul>
+      <li><a 
href="algorithms-matrix-factorization.html#principal-component-analysis">Principal
 Component Analysis</a></li>
+      <li><a 
href="algorithms-matrix-factorization.html#matrix-completion-via-alternating-minimizations">Matrix
 Completion via Alternating Minimizations</a></li>
+    </ul>
+  </li>
+  <li><a href="algorithms-survival-analysis.html">Survival Analysis</a>
+    <ul>
+      <li><a 
href="algorithms-survival-analysis.html#kaplan-meier-survival-analysis">Kaplan-Meier
 Survival Analysis</a></li>
+      <li><a 
href="algorithms-survival-analysis.html#cox-proportional-hazard-regression-model">Cox
 Proportional Hazard Regression Model</a></li>
+    </ul>
+  </li>
+  <li><a href="algorithms-bibliography.html">Bibliography</a></li>
+</ul>
+
+
+
+        </div> <!-- /container -->
+
+        
+
+        <script src="js/vendor/jquery-1.12.0.min.js"></script>
+        <script src="js/vendor/bootstrap.min.js"></script>
+        <script src="js/vendor/anchor.min.js"></script>
+        <script src="js/main.js"></script>
+        
+
+
+
+
+        <!-- Analytics -->
+        <script>
+            
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+            (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+            
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+            
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+            ga('create', 'UA-71553733-1', 'auto');
+            ga('send', 'pageview');
+        </script>
+
+
+
+        <!-- MathJax Section -->
+        <script type="text/x-mathjax-config">
+            MathJax.Hub.Config({
+                TeX: { equationNumbers: { autoNumber: "AMS" } }
+            });
+        </script>
+        <script>
+            // Note that we load MathJax this way to work with local file 
(file://), HTTP and HTTPS.
+            // We could use "//cdn.mathjax...", but that won't support 
"file://".
+            (function(d, script) {
+                script = d.createElement('script');
+                script.type = 'text/javascript';
+                script.async = true;
+                script.onload = function(){
+                    MathJax.Hub.Config({
+                        tex2jax: {
+                            inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
+                            displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
+                            processEscapes: true,
+                            skipTags: ['script', 'noscript', 'style', 
'textarea', 'pre']
+                        }
+                    });
+                };
+                script.src = ('https:' == document.location.protocol ? 
'https://' : 'http://') +
+                    
'cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+                d.getElementsByTagName('head')[0].appendChild(script);
+            }(document));
+        </script>
+    </body>
+</html>

svn commit: r1828046 [5/20] - /systemml/site/docs/1.1.0/

Reply via email to