http://git-wip-us.apache.org/repos/asf/mahout/blob/a9262d54/docs/latest/algorithms/map-reduce/clustering/spectral-clustering.html
----------------------------------------------------------------------
diff --git 
a/docs/latest/algorithms/map-reduce/clustering/spectral-clustering.html 
b/docs/latest/algorithms/map-reduce/clustering/spectral-clustering.html
index 78cb515..d414825 100644
--- a/docs/latest/algorithms/map-reduce/clustering/spectral-clustering.html
+++ b/docs/latest/algorithms/map-reduce/clustering/spectral-clustering.html
@@ -78,7 +78,7 @@
                     <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Tutorial</a>
                     <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
                         <div class="dropdown-divider"></div>
-                        <h6 class="dropdown-header">Reccomenders</h6>
+                        <h6 class="dropdown-header">Recommenders</h6>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/cco-lastfm">CCO Example with Last.FM Data</a>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/intro-cooccurrence-spark">Introduction to 
Cooccurrence in Spark</a>
                         <div class="dropdown-divider"></div>
@@ -107,14 +107,14 @@
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/preprocessors">Preprocessors</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/regression">Regression</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/clustering">Clustering</a>
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders">Reccomenders</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders">Recommenders</a>
                         <div class="dropdown-divider"></div>
                         <h6 class="dropdown-header">Deprecated</h6>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/map-reduce">MapReduce <i>(deprecated)</i></a>
                     </div>
-                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
+                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
                         <!--
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
                         <li role="separator" class="divider"></li>
                         <li><span>&nbsp;&nbsp;<a 
href="/docs/latest/algorithms/map-reduce"><b>MapReduce</b> 
(deprecated)</a><span></li>
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/a9262d54/docs/latest/algorithms/map-reduce/clustering/streaming-k-means.html
----------------------------------------------------------------------
diff --git 
a/docs/latest/algorithms/map-reduce/clustering/streaming-k-means.html 
b/docs/latest/algorithms/map-reduce/clustering/streaming-k-means.html
index ce276f1..270b4b3 100644
--- a/docs/latest/algorithms/map-reduce/clustering/streaming-k-means.html
+++ b/docs/latest/algorithms/map-reduce/clustering/streaming-k-means.html
@@ -78,7 +78,7 @@
                     <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Tutorial</a>
                     <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
                         <div class="dropdown-divider"></div>
-                        <h6 class="dropdown-header">Reccomenders</h6>
+                        <h6 class="dropdown-header">Recommenders</h6>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/cco-lastfm">CCO Example with Last.FM Data</a>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/intro-cooccurrence-spark">Introduction to 
Cooccurrence in Spark</a>
                         <div class="dropdown-divider"></div>
@@ -107,14 +107,14 @@
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/preprocessors">Preprocessors</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/regression">Regression</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/clustering">Clustering</a>
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders">Reccomenders</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders">Recommenders</a>
                         <div class="dropdown-divider"></div>
                         <h6 class="dropdown-header">Deprecated</h6>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/map-reduce">MapReduce <i>(deprecated)</i></a>
                     </div>
-                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
+                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
                         <!--
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
                         <li role="separator" class="divider"></li>
                         <li><span>&nbsp;&nbsp;<a 
href="/docs/latest/algorithms/map-reduce"><b>MapReduce</b> 
(deprecated)</a><span></li>
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/a9262d54/docs/latest/algorithms/map-reduce/index.html
----------------------------------------------------------------------
diff --git a/docs/latest/algorithms/map-reduce/index.html 
b/docs/latest/algorithms/map-reduce/index.html
index 663b6a2..249118f 100644
--- a/docs/latest/algorithms/map-reduce/index.html
+++ b/docs/latest/algorithms/map-reduce/index.html
@@ -78,7 +78,7 @@
                     <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Tutorial</a>
                     <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
                         <div class="dropdown-divider"></div>
-                        <h6 class="dropdown-header">Reccomenders</h6>
+                        <h6 class="dropdown-header">Recommenders</h6>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/cco-lastfm">CCO Example with Last.FM Data</a>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/intro-cooccurrence-spark">Introduction to 
Cooccurrence in Spark</a>
                         <div class="dropdown-divider"></div>
@@ -107,14 +107,14 @@
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/preprocessors">Preprocessors</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/regression">Regression</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/clustering">Clustering</a>
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders">Reccomenders</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders">Recommenders</a>
                         <div class="dropdown-divider"></div>
                         <h6 class="dropdown-header">Deprecated</h6>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/map-reduce">MapReduce <i>(deprecated)</i></a>
                     </div>
-                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
+                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
                         <!--
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
                         <li role="separator" class="divider"></li>
                         <li><span>&nbsp;&nbsp;<a 
href="/docs/latest/algorithms/map-reduce"><b>MapReduce</b> 
(deprecated)</a><span></li>
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/a9262d54/docs/latest/algorithms/preprocessors/AsFactor.html
----------------------------------------------------------------------
diff --git a/docs/latest/algorithms/preprocessors/AsFactor.html 
b/docs/latest/algorithms/preprocessors/AsFactor.html
index ca9b664..f118a64 100644
--- a/docs/latest/algorithms/preprocessors/AsFactor.html
+++ b/docs/latest/algorithms/preprocessors/AsFactor.html
@@ -78,7 +78,7 @@
                     <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Tutorial</a>
                     <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
                         <div class="dropdown-divider"></div>
-                        <h6 class="dropdown-header">Reccomenders</h6>
+                        <h6 class="dropdown-header">Recommenders</h6>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/cco-lastfm">CCO Example with Last.FM Data</a>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/intro-cooccurrence-spark">Introduction to 
Cooccurrence in Spark</a>
                         <div class="dropdown-divider"></div>
@@ -107,14 +107,14 @@
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/preprocessors">Preprocessors</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/regression">Regression</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/clustering">Clustering</a>
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders">Reccomenders</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders">Recommenders</a>
                         <div class="dropdown-divider"></div>
                         <h6 class="dropdown-header">Deprecated</h6>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/map-reduce">MapReduce <i>(deprecated)</i></a>
                     </div>
-                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
+                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
                         <!--
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
                         <li role="separator" class="divider"></li>
                         <li><span>&nbsp;&nbsp;<a 
href="/docs/latest/algorithms/map-reduce"><b>MapReduce</b> 
(deprecated)</a><span></li>
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/a9262d54/docs/latest/algorithms/preprocessors/MeanCenter.html
----------------------------------------------------------------------
diff --git a/docs/latest/algorithms/preprocessors/MeanCenter.html 
b/docs/latest/algorithms/preprocessors/MeanCenter.html
index 4c4d57b..8d30bb1 100644
--- a/docs/latest/algorithms/preprocessors/MeanCenter.html
+++ b/docs/latest/algorithms/preprocessors/MeanCenter.html
@@ -78,7 +78,7 @@
                     <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Tutorial</a>
                     <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
                         <div class="dropdown-divider"></div>
-                        <h6 class="dropdown-header">Reccomenders</h6>
+                        <h6 class="dropdown-header">Recommenders</h6>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/cco-lastfm">CCO Example with Last.FM Data</a>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/intro-cooccurrence-spark">Introduction to 
Cooccurrence in Spark</a>
                         <div class="dropdown-divider"></div>
@@ -107,14 +107,14 @@
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/preprocessors">Preprocessors</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/regression">Regression</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/clustering">Clustering</a>
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders">Reccomenders</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders">Recommenders</a>
                         <div class="dropdown-divider"></div>
                         <h6 class="dropdown-header">Deprecated</h6>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/map-reduce">MapReduce <i>(deprecated)</i></a>
                     </div>
-                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
+                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
                         <!--
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
                         <li role="separator" class="divider"></li>
                         <li><span>&nbsp;&nbsp;<a 
href="/docs/latest/algorithms/map-reduce"><b>MapReduce</b> 
(deprecated)</a><span></li>
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/a9262d54/docs/latest/algorithms/preprocessors/StandardScaler.html
----------------------------------------------------------------------
diff --git a/docs/latest/algorithms/preprocessors/StandardScaler.html 
b/docs/latest/algorithms/preprocessors/StandardScaler.html
index bf63666..65f26f6 100644
--- a/docs/latest/algorithms/preprocessors/StandardScaler.html
+++ b/docs/latest/algorithms/preprocessors/StandardScaler.html
@@ -78,7 +78,7 @@
                     <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Tutorial</a>
                     <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
                         <div class="dropdown-divider"></div>
-                        <h6 class="dropdown-header">Reccomenders</h6>
+                        <h6 class="dropdown-header">Recommenders</h6>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/cco-lastfm">CCO Example with Last.FM Data</a>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/intro-cooccurrence-spark">Introduction to 
Cooccurrence in Spark</a>
                         <div class="dropdown-divider"></div>
@@ -107,14 +107,14 @@
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/preprocessors">Preprocessors</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/regression">Regression</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/clustering">Clustering</a>
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders">Reccomenders</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders">Recommenders</a>
                         <div class="dropdown-divider"></div>
                         <h6 class="dropdown-header">Deprecated</h6>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/map-reduce">MapReduce <i>(deprecated)</i></a>
                     </div>
-                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
+                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
                         <!--
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
                         <li role="separator" class="divider"></li>
                         <li><span>&nbsp;&nbsp;<a 
href="/docs/latest/algorithms/map-reduce"><b>MapReduce</b> 
(deprecated)</a><span></li>
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/a9262d54/docs/latest/algorithms/preprocessors/index.html
----------------------------------------------------------------------
diff --git a/docs/latest/algorithms/preprocessors/index.html 
b/docs/latest/algorithms/preprocessors/index.html
index 6152d00..9ae5013 100644
--- a/docs/latest/algorithms/preprocessors/index.html
+++ b/docs/latest/algorithms/preprocessors/index.html
@@ -78,7 +78,7 @@
                     <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Tutorial</a>
                     <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
                         <div class="dropdown-divider"></div>
-                        <h6 class="dropdown-header">Reccomenders</h6>
+                        <h6 class="dropdown-header">Recommenders</h6>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/cco-lastfm">CCO Example with Last.FM Data</a>
                         <a class="dropdown-item"  
href="/docs/latest/tutorials/intro-cooccurrence-spark">Introduction to 
Cooccurrence in Spark</a>
                         <div class="dropdown-divider"></div>
@@ -107,14 +107,14 @@
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/preprocessors">Preprocessors</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/regression">Regression</a>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/clustering">Clustering</a>
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders">Reccomenders</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders">Recommenders</a>
                         <div class="dropdown-divider"></div>
                         <h6 class="dropdown-header">Deprecated</h6>
                         <a class="dropdown-item"  
href="/docs/latest/algorithms/map-reduce">MapReduce <i>(deprecated)</i></a>
                     </div>
-                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
+                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
                         <!--
-                        <a class="dropdown-item"  
href="/docs/latest/algorithms/reccomenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
                         <li role="separator" class="divider"></li>
                         <li><span>&nbsp;&nbsp;<a 
href="/docs/latest/algorithms/map-reduce"><b>MapReduce</b> 
(deprecated)</a><span></li>
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/a9262d54/docs/latest/algorithms/recommenders/cco.html
----------------------------------------------------------------------
diff --git a/docs/latest/algorithms/recommenders/cco.html 
b/docs/latest/algorithms/recommenders/cco.html
new file mode 100644
index 0000000..a4cd25f
--- /dev/null
+++ b/docs/latest/algorithms/recommenders/cco.html
@@ -0,0 +1,672 @@
+<!DOCTYPE html>
+<html lang=" en ">
+
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>
+    Building a Mahout Recommender
+    
+  </title>
+
+  <meta name="description" content="Distributed Linear Algebra">
+
+  <link rel="stylesheet" href="/assets/css/main.css">
+
+  <!-- Font Awesome -->
+  <link 
href="https://maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css";
 rel="stylesheet" 
integrity="sha384-wvfXpqpZZVQGK6TAh5PVlGOfQNHSoD2xbE+QkPxCAFlNEevoEH3Sl0sibVcOQVnN"
 crossorigin="anonymous">
+
+  <!-- Google Fonts -->
+  <link href="https://fonts.googleapis.com/css?family=Maven+Pro:400,500"; 
rel="stylesheet">
+  <link href="https://fonts.googleapis.com/css?family=Muli:400,400i,700,700i"; 
rel="stylesheet">
+
+  <link rel="canonical" 
href="http://mahout.apache.org//docs/latest/algorithms/recommenders/cco.html";>
+  <link rel="alternate" type="application/rss+xml" title="Apache Mahout" 
href="/%20/feed.xml">
+
+
+</head>
+
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+  ga('create', 'UA-98314020-1', 'auto');
+  ga('send', 'pageview');
+</script>
+<body>
+
+  <nav class="navbar navbar-expand-lg navbar-light bg-light navbar-mahout">
+
+    <div class="container">
+
+        <a class="navbar-brand" href="/">
+          <img src="/assets/mahout-logo-blue.svg" alt="">
+        </a>
+
+        <button class="navbar-toggler" type="button" data-toggle="collapse" 
data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" 
aria-expanded="false" aria-label="Toggle navigation">
+            <span class="navbar-toggler-icon"></span>
+        </button>
+
+        <div class="collapse navbar-collapse" id="navbarSupportedContent">
+
+            <ul class="navbar-nav ml-auto">
+
+                <!-- Download -->
+                <li class="nav-item">
+                    <a class="nav-link" href="/general/downloads">Download</a>
+                </li>
+
+                <li class="nav-item">
+                    <a class="nav-link" 
href="/docs/latest/index.html">Overview</a>
+                </li>
+
+                <!-- Algorithms (Samsara / MR) -->
+                <li class="nav-item dropdown">
+                    <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Algorithms</a>
+                    <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/linear-algebra">Distributed Linear Algebra</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/preprocessors">Preprocessors</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/regression">Regression</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/clustering">Clustering</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders">Recommenders</a>
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Deprecated</h6>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/map-reduce">MapReduce <i>(deprecated)</i></a>
+                    </div>
+                    <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
+                    <!--
+                    <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
+                    <li role="separator" class="divider"></li>
+                    <li><span>&nbsp;&nbsp;<a 
href="/docs/latest/algorithms/map-reduce"><b>MapReduce</b> 
(deprecated)</a><span></li>
+
+                 -->
+                </li>
+                <!-- Algorithms (Samsara / MR) -->
+                <li class="nav-item dropdown">
+                    <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Algorithms</a>
+                    <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/linear-algebra">Distributed Linear Algebra</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/preprocessors">Preprocessors</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/regression">Regression</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/clustering">Clustering</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders">Recommenders</a>
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Deprecated</h6>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/map-reduce">MapReduce <i>(deprecated)</i></a>
+                    </div>
+                    <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
+                    <!--
+                    <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
+                    <li role="separator" class="divider"></li>
+                    <li><span>&nbsp;&nbsp;<a 
href="/docs/latest/algorithms/map-reduce"><b>MapReduce</b> 
(deprecated)</a><span></li>
+                 -->
+                </li>
+
+                <!-- Developers -->
+                <li class="nav-item dropdown">
+                    <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Developers</a>
+                    <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
+                        <a class="dropdown-item" 
href="/developers/developer-resources.html">Developer Resources</a>
+                        <a class="dropdown-item" 
href="/developers/buildingmahout">Building Mahout from Source</a>
+                        <a class="dropdown-item" 
href="/developers/issue-tracker">Issues Tracking (JIRA)</a>
+                        <!-- <a class="dropdown-item" 
href="/developers/patch-check-list/">Patch Check List</a> going to github 
template -->
+                        <!-- <a class="dropdown-item" 
href="/developers/reference/">References</a> a lot of overlap with books, 
talks, etc. page -->
+                        <a class="dropdown-item" 
href="/developers/release-notes/">Release Notes</a>
+                        <!-- <a class="dropdown-item" 
href="/developers/thirdparty-dependencies/">Third Party Dependencies</a> is our 
site the reasonable place for this? -->
+                        <!-- <a class="dropdown-item" 
href="/developers/version-control/">Version Control</a> -->
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">How To's</h6>
+                        <a class="dropdown-item" 
href="/developers/how-to-contribute">How to Contribute</a>
+                        <a class="dropdown-item" 
href="/developers/githubPRs">Github PRs</a>
+                        <a class="dropdown-item" 
href="/developers/how-to-become-a-committer">How to Become a Committer</a>
+                        <a class="dropdown-item" 
href="/developers/how-to-release">How to Release</a>
+                        <a class="dropdown-item" 
href="/developers/how-to-update-the-website">How to Update the Website</a>
+                    </div>
+                </li>
+
+                <!-- Docs -->
+                <li class="nav-item dropdown">
+                    <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Docs</a>
+                    <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
+                        <h6 class="dropdown-header">Release</h6>
+                        <a class="dropdown-item" href="/docs/0.13.0">0.13.0</a>
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Latest Snapshot 
(Development)</h6>
+                        <a class="dropdown-item" 
href="/docs/latest">0.13.1-SNAPSHOT</a>
+                    </div>
+                </li>
+
+                <!-- Community -->
+                <li class="nav-item dropdown">
+                    <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Community</a>
+                    <div class="dropdown-menu dropdown-menu-right">
+                        <!--<a class="dropdown-item" 
href="/community/history/">History of the Apache Mahout Project</a>-->
+                        <!--<a class="dropdown-item" 
href="/community/blogs/">Blog Posts About Mahout</a>-->
+                        <!--<a class="dropdown-item" 
href="/community/recent-upcoming-talks/">Recent and Upcoming Talks</a>-->
+                        <!-- <a class="dropdown-item" 
href="/community/books-tutorials-and-talks/">Books Tutorials and Talks</a> -->
+                        <!-- <a class="dropdown-item" 
href="/community/faq/">FAQ</a> needs a lot of updating -->
+                        <a class="dropdown-item" 
href="/developers/gsoc">GSoC</a>
+                        <!-- Is OK- updated Map/Reduce verbage to reflect 
Samsara -->
+                        <!-- <a class="dropdown-item" 
href="/community/mahout-benchmarks/">Mahout Benchmarks</a> These are old, Keep 
them? -->
+                        <!-- <a class="dropdown-item" 
href="/community/mahout-wiki/">Mahout Wiki</a> at very least needs links 
cleanedup - do we still want this even?-->
+                        <a class="dropdown-item" 
href="/general/mailing-lists">Mailing Lists</a>
+                        <!-- Clean and pretty -->
+                        <!-- <a class="dropdown-item" 
href="/community/powered-by-mahout/">Powered By Mahout</a> needs update -->
+                        <a class="dropdown-item" 
href="/general/privacy-policy">Privacy Policy</a>
+                        <!-- <a class="dropdown-item" 
href="/community/professional-support/">Professional Support</a> update if we 
even want to keep -->
+                        <a class="dropdown-item" 
href="/general/who-we-are">Who We Are</a>
+                        <!-- nikolai needs to add himself -->
+                    </div>
+                </li>
+
+                <!-- GitHub -->
+                <li class="nav-item">
+                    <a class="nav-link" 
href="http://github.com/apache/mahout";><i class="fa fa-github"></i></a>
+                </li>
+
+            </ul>
+
+            <!-- <form class="form-inline my-2 my-lg-0">
+            <input class="form-control mr-sm-2" type="text" 
placeholder="Search" aria-label="Search">
+            <button class="btn btn-outline-success my-2 my-sm-0" 
type="submit">Search</button>
+        </form> -->
+
+        </div>
+
+    </div>
+
+</nav>
+
+
+  <h1 
id="building-a-correlated-cross-occurrence-cco-recommenders-with-the-mahout-cli">Building
 a Correlated Cross-Occurrence (CCO) Recommenders with the Mahout CLI</h1>
+
+<p>Mahout’s CCO algorithm is one of a new breed of “Multimodal” 
recommenders that can use input of many types in very flexible ways.</p>
+
+<p>Mahout provides several important building blocks for creating 
recommendations using Spark. <em>spark-itemsimilarity</em> can be used to 
create “other people also liked these things” type recommendations and 
paired with a search engine can personalize recommendations for individual 
users. <em>spark-rowsimilarity</em> can provide non-personalized content based 
recommendations and when paired with a search engine can be used to personalize 
content based recommendations.</p>
+
+<p><img src="http://s6.postimg.org/r0m8bpjw1/recommender_architecture.png"; 
alt="image" /></p>
+
+<p>This is a simplified Lambda architecture with Mahout’s 
<em>spark-itemsimilarity</em> playing the batch model building role and a 
search engine playing the realtime serving role.</p>
+
+<p>You will create two collections, one for user history and one for item 
“indicators”. Indicators are user interactions that lead to the wished for 
interaction. So for example if you wish a user to purchase something and you 
collect all users purchase interactions <em>spark-itemsimilarity</em> will 
create a purchase indicator from them. But you can also use other user 
interactions in a cross-cooccurrence calculation, to create purchase 
indicators.</p>
+
+<p>User history is used as a query on the item collection with its 
cooccurrence and cross-cooccurrence indicators (there may be several 
indicators). The primary interaction or indicator is picked to be the thing you 
want to recommend, other action / indicators are believed to be correlated but 
may not indicate exactly the same user intent. For instance in an ecom 
recommender a purchase is a very good primary action / indicator, but you may 
also know product detail-views, or additions-to-wishlists. These can be 
considered secondary actions / indicators which may all be used to calculate 
cross-cooccurrence indicators. The user history that forms the recommendations 
query will contain recorded primary and secondary indicators all targeted 
towards the correct indicator fields.</p>
+
+<h2 id="references">References</h2>
+
+<ol>
+  <li>A free ebook, which talks about the general idea: <a 
href="https://www.mapr.com/practical-machine-learning";>Practical Machine 
Learning</a></li>
+  <li>A slide deck, which talks about mixing indicators or other indicators: 
<a 
href="http://occamsmachete.com/ml/2014/10/07/creating-a-unified-recommender-with-mahout-and-a-search-engine/";>Creating
 a Unified Recommender</a></li>
+  <li>Two blog posts: <a 
href="http://occamsmachete.com/ml/2014/08/11/mahout-on-spark-whats-new-in-recommenders/";>What’s
 New in Recommenders: part #1</a>
+and  <a 
href="http://occamsmachete.com/ml/2014/09/09/mahout-on-spark-whats-new-in-recommenders-part-2/";>What’s
 New in Recommenders: part #2</a></li>
+  <li>A post describing the loglikelihood ratio:  <a 
href="http://tdunning.blogspot.com/2008/03/surprise-and-coincidence.html";>Surprise
 and Coinsidense</a>  LLR is used to reduce noise in the data while keeping the 
calculations O(n) complexity.</li>
+</ol>
+
+<p>Below are the command line jobs but the drivers and associated code can 
also be customized and accessed from the Scala APIs.</p>
+
+<h2 id="1-spark-itemsimilarity">1. spark-itemsimilarity</h2>
+<p><em>spark-itemsimilarity</em> is the Spark counterpart of the of the Mahout 
mapreduce job called <em>itemsimilarity</em>. It takes in elements of 
interactions, which have userID, itemID, and optionally a value. It will 
produce one of more indicator matrices created by comparing every user’s 
interactions with every other user. The indicator matrix is an item x item 
matrix where the values are log-likelihood ratio strengths. For the legacy 
mapreduce version, there were several possible similarity measures but these 
are being deprecated in favor of LLR because in practice it performs the 
best.</p>
+
+<p>Mahout’s mapreduce version of itemsimilarity takes a text file that is 
expected to have user and item IDs that conform to 
+Mahout’s ID requirements–they are non-negative integers that can be viewed 
as row and column numbers in a matrix.</p>
+
+<p><em>spark-itemsimilarity</em> also extends the notion of cooccurrence to 
cross-cooccurrence, in other words the Spark version will 
+account for multi-modal interactions and create cross-cooccurrence indicator 
matrices allowing the use of much more data in 
+creating recommendations or similar item lists. People try to do this by 
mixing different indicators and giving them weights. 
+For instance they might say an item-view is 0.2 of an item purchase. In 
practice this is often not helpful. Spark-itemsimilarity’s
+cross-cooccurrence is a more principled way to handle this case. In effect it 
scrubs secondary indicators with the indicator you want
+to recommend.</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>spark-itemsimilarity Mahout 1.0
+Usage: spark-itemsimilarity [options]
+
+Disconnected from the target VM, address: '127.0.0.1:64676', transport: 
'socket'
+Input, output options
+  -i &lt;value&gt; | --input &lt;value&gt;
+        Input path, may be a filename, directory name, or comma delimited list 
of HDFS supported URIs (required)
+  -i2 &lt;value&gt; | --input2 &lt;value&gt;
+        Secondary input path for cross-similarity calculation, same 
restrictions as "--input" (optional). Default: empty.
+  -o &lt;value&gt; | --output &lt;value&gt;
+        Path for output, any local or HDFS supported URI (required)
+
+Algorithm control options:
+  -mppu &lt;value&gt; | --maxPrefs &lt;value&gt;
+        Max number of preferences to consider per user (optional). Default: 500
+  -m &lt;value&gt; | --maxSimilaritiesPerItem &lt;value&gt;
+        Limit the number of similarities per item to this number (optional). 
Default: 100
+
+Note: Only the Log Likelihood Ratio (LLR) is supported as a similarity measure.
+
+Input text file schema options:
+  -id &lt;value&gt; | --inDelim &lt;value&gt;
+        Input delimiter character (optional). Default: "[,\t]"
+  -f1 &lt;value&gt; | --filter1 &lt;value&gt;
+        String (or regex) whose presence indicates a datum for the primary 
item set (optional). Default: no filter, all data is used
+  -f2 &lt;value&gt; | --filter2 &lt;value&gt;
+        String (or regex) whose presence indicates a datum for the secondary 
item set (optional). If not present no secondary dataset is collected
+  -rc &lt;value&gt; | --rowIDColumn &lt;value&gt;
+        Column number (0 based Int) containing the row ID string (optional). 
Default: 0
+  -ic &lt;value&gt; | --itemIDColumn &lt;value&gt;
+        Column number (0 based Int) containing the item ID string (optional). 
Default: 1
+  -fc &lt;value&gt; | --filterColumn &lt;value&gt;
+        Column number (0 based Int) containing the filter string (optional). 
Default: -1 for no filter
+
+Using all defaults the input is expected of the form: 
"userID&lt;tab&gt;itemId" or "userID&lt;tab&gt;itemID&lt;tab&gt;any-text..." 
and all rows will be used
+
+File discovery options:
+  -r | --recursive
+        Searched the -i path recursively for files that match 
--filenamePattern (optional), Default: false
+  -fp &lt;value&gt; | --filenamePattern &lt;value&gt;
+        Regex to match in determining input files (optional). Default: 
filename in the --input option or "^part-.*" if --input is a directory
+
+Output text file schema options:
+  -rd &lt;value&gt; | --rowKeyDelim &lt;value&gt;
+        Separates the rowID key from the vector values list (optional). 
Default: "\t"
+  -cd &lt;value&gt; | --columnIdStrengthDelim &lt;value&gt;
+        Separates column IDs from their values in the vector values list 
(optional). Default: ":"
+  -td &lt;value&gt; | --elementDelim &lt;value&gt;
+        Separates vector element values in the values list (optional). 
Default: " "
+  -os | --omitStrength
+        Do not write the strength to the output files (optional), Default: 
false.
+This option is used to output indexable data for creating a search engine 
recommender.
+
+Default delimiters will produce output of the form: 
"itemID1&lt;tab&gt;itemID2:value2&lt;space&gt;itemID10:value10..."
+
+Spark config options:
+  -ma &lt;value&gt; | --master &lt;value&gt;
+        Spark Master URL (optional). Default: "local". Note that you can 
specify the number of cores to get a performance improvement, for example 
"local[4]"
+  -sem &lt;value&gt; | --sparkExecutorMem &lt;value&gt;
+        Max Java heap available as "executor memory" on each node (optional). 
Default: 4g
+  -rs &lt;value&gt; | --randomSeed &lt;value&gt;
+        
+  -h | --help
+        prints this usage text
+</code></pre>
+</div>
+
+<p>This looks daunting but defaults to simple fairly sane values to take 
exactly the same input as legacy code and is pretty flexible. It allows the 
user to point to a single text file, a directory full of files, or a tree of 
directories to be traversed recursively. The files included can be specified 
with either a regex-style pattern or filename. The schema for the file is 
defined by column numbers, which map to the important bits of data including 
IDs and values. The files can even contain filters, which allow unneeded rows 
to be discarded or used for cross-cooccurrence calculations.</p>
+
+<p>See <code class="highlighter-rouge">ItemSimilarityDriver.scala</code> in 
Mahout’s spark module if you want to customize the code.</p>
+
+<h3 id="defaults-in-the-spark-itemsimilarity-cli">Defaults in the 
<em><strong>spark-itemsimilarity</strong></em> CLI</h3>
+
+<p>If all defaults are used the input can be as simple as:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>userID1,itemID1
+userID2,itemID2
+...
+</code></pre>
+</div>
+
+<p>With the command line:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>bash$ mahout 
spark-itemsimilarity --input in-file --output out-dir
+</code></pre>
+</div>
+
+<p>This will use the “local” Spark context and will output the standard 
text version of a DRM</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>itemID1&lt;tab&gt;itemID2:value2&lt;space&gt;itemID10:value10...
+</code></pre>
+</div>
+
+<h3 id="how-to-use-multiple-user-indicators"><a name="multiple-actions">How To 
Use Multiple User Indicators</a></h3>
+
+<p>Often we record various indicators the user takes for later analytics. 
These can now be used to make recommendations. 
+The idea of a recommender is to recommend the action you want the user to 
make. For an ecom app this might be a purchase action recorded in a 
“purchase” indicator. It is usually not a good idea to just treat other 
indicators the same as the indicator you want to recommend. For example is you 
have user purchase and view data, never treat a view as a purchase it will 
never increase the quality of recommendations, instead use the view data as a 
secondary indicator so the CCO algorithm will find meaningful correlated 
cross-occurrences. Without this the views will be so noisy they will almost 
surely reduce the performance of the recommender. Too many people have fallen 
into this mistake. With <em>spark-itemsimilarity</em>
+we can now use both indicators. Mahout will use cross-occurrence analysis to 
limit the views to ones that do predict purchases.
+We do this by treating the primary indicator (purchase) as data for the 
indicator matrix and use the secondary indicator (view) 
+to calculate the cross-cooccurrence indicator matrix.</p>
+
+<p><em>spark-itemsimilarity</em> can read separate indicators from separate 
files or from a mixed indicator log by filtering certain lines. For a mixed 
+indicator log of the form:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>u1,purchase,iphone
+u1,purchase,ipad
+u2,purchase,nexus
+u2,purchase,galaxy
+u3,purchase,surface
+u4,purchase,iphone
+u4,purchase,galaxy
+u1,view,iphone
+u1,view,ipad
+u1,view,nexus
+u1,view,galaxy
+u2,view,iphone
+u2,view,ipad
+u2,view,nexus
+u2,view,galaxy
+u3,view,surface
+u3,view,nexus
+u4,view,iphone
+u4,view,ipad
+u4,view,galaxy
+</code></pre>
+</div>
+
+<h3 id="command-line">Command Line</h3>
+
+<p>Use the following options:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>bash$ mahout 
spark-itemsimilarity \
+       --input in-file \     # where to look for data
+    --output out-path \   # root dir for output
+    --master masterUrl \  # URL of the Spark master server
+    --filter1 purchase \  # word that flags input for the primary indicator
+    --filter2 view \      # word that flags input for the secondary indicator
+    --itemIDPosition 2 \  # column that has the item ID
+    --rowIDPosition 0 \   # column that has the user ID
+    --filterPosition 1    # column that has the filter word
+</code></pre>
+</div>
+
+<h3 id="output">Output</h3>
+
+<p>The output of the job will be the standard text version of two Mahout DRMs. 
This is a case where we are calculating 
+cross-cooccurrence so a primary indicator matrix and cross-cooccurrence 
indicator matrix will be created</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>out-path
+  |-- similarity-matrix - TDF part files
+  \-- cross-similarity-matrix - TDF part-files
+</code></pre>
+</div>
+
+<p>The similarity-matrix will contain the lines:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>galaxy&lt;tab&gt;nexus:1.7260924347106847
+ipad&lt;tab&gt;iphone:1.7260924347106847
+nexus&lt;tab&gt;galaxy:1.7260924347106847
+iphone&lt;tab&gt;ipad:1.7260924347106847
+surface
+</code></pre>
+</div>
+
+<p>The cross-similarity-matrix will contain:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>iphone&lt;tab&gt;nexus:1.7260924347106847 
iphone:1.7260924347106847 ipad:1.7260924347106847 galaxy:1.7260924347106847
+ipad&lt;tab&gt;nexus:0.6795961471815897 iphone:0.6795961471815897 
ipad:0.6795961471815897 galaxy:0.6795961471815897
+nexus&lt;tab&gt;nexus:0.6795961471815897 iphone:0.6795961471815897 
ipad:0.6795961471815897 galaxy:0.6795961471815897
+galaxy&lt;tab&gt;nexus:1.7260924347106847 iphone:1.7260924347106847 
ipad:1.7260924347106847 galaxy:1.7260924347106847
+surface&lt;tab&gt;surface:4.498681156950466 nexus:0.6795961471815897
+</code></pre>
+</div>
+
+<p><strong>Note:</strong> You can run this multiple times to use more than two 
indicators or you can use the underlying SimilarityAnalysis.cooccurrence API in 
you own application as a library, which will more efficiently calculate any 
number of cross-cooccurrence indicators.</p>
+
+<h3 id="log-file-input">Log File Input</h3>
+
+<p>A common method of storing data is in log files. If they are written using 
some delimiter they can be consumed directly by spark-itemsimilarity. For 
instance input of the form:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>2014-06-23 
14:46:53.115&lt;tab&gt;u1&lt;tab&gt;purchase&lt;tab&gt;random 
text&lt;tab&gt;iphone
+2014-06-23 14:46:53.115&lt;tab&gt;u1&lt;tab&gt;purchase&lt;tab&gt;random 
text&lt;tab&gt;ipad
+2014-06-23 14:46:53.115&lt;tab&gt;u2&lt;tab&gt;purchase&lt;tab&gt;random 
text&lt;tab&gt;nexus
+2014-06-23 14:46:53.115&lt;tab&gt;u2&lt;tab&gt;purchase&lt;tab&gt;random 
text&lt;tab&gt;galaxy
+2014-06-23 14:46:53.115&lt;tab&gt;u3&lt;tab&gt;purchase&lt;tab&gt;random 
text&lt;tab&gt;surface
+2014-06-23 14:46:53.115&lt;tab&gt;u4&lt;tab&gt;purchase&lt;tab&gt;random 
text&lt;tab&gt;iphone
+2014-06-23 14:46:53.115&lt;tab&gt;u4&lt;tab&gt;purchase&lt;tab&gt;random 
text&lt;tab&gt;galaxy
+2014-06-23 14:46:53.115&lt;tab&gt;u1&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;iphone
+2014-06-23 14:46:53.115&lt;tab&gt;u1&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;ipad
+2014-06-23 14:46:53.115&lt;tab&gt;u1&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;nexus
+2014-06-23 14:46:53.115&lt;tab&gt;u1&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;galaxy
+2014-06-23 14:46:53.115&lt;tab&gt;u2&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;iphone
+2014-06-23 14:46:53.115&lt;tab&gt;u2&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;ipad
+2014-06-23 14:46:53.115&lt;tab&gt;u2&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;nexus
+2014-06-23 14:46:53.115&lt;tab&gt;u2&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;galaxy
+2014-06-23 14:46:53.115&lt;tab&gt;u3&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;surface
+2014-06-23 14:46:53.115&lt;tab&gt;u3&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;nexus
+2014-06-23 14:46:53.115&lt;tab&gt;u4&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;iphone
+2014-06-23 14:46:53.115&lt;tab&gt;u4&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;ipad
+2014-06-23 14:46:53.115&lt;tab&gt;u4&lt;tab&gt;view&lt;tab&gt;random 
text&lt;tab&gt;galaxy    
+</code></pre>
+</div>
+
+<p>Can be parsed with the following CLI and run on the cluster producing the 
same output as the above example. The important bit of information in the 
example tab delimited file are user-id, indicator-name, and item-id. The rest 
is ignored.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>bash$ mahout 
spark-itemsimilarity \
+    --input in-file \
+    --output out-path \
+    --master spark://sparkmaster:4044 \
+    --filter1 purchase \
+    --filter2 view \
+    --inDelim "\t" \
+    --itemIDPosition 4 \
+    --rowIDPosition 1 \
+    --filterPosition 2
+</code></pre>
+</div>
+
+<h2 id="2-spark-rowsimilarity">2. spark-rowsimilarity</h2>
+
+<p><em>spark-rowsimilarity</em> is the companion to 
<em>spark-itemsimilarity</em> the primary difference is that it takes a text 
file version of 
+a matrix of sparse vectors with optional application specific IDs and it finds 
similar rows rather than items (columns). Its use is
+not limited to collaborative filtering. The input is in text-delimited form 
where there are three delimiters used. By 
+default it reads <code 
class="highlighter-rouge">(rowID&lt;tab&gt;columnID1:strength1&lt;space&gt;columnID2:strength2...)</code>
 Since this job only supports LLR similarity,
+ which does not use the input strengths, they may be omitted in the input. It 
writes 
+<code 
class="highlighter-rouge">(rowID&lt;tab&gt;rowID1:strength1&lt;space&gt;rowID2:strength2...)</code>
 
+The output is sorted by strength descending. The output can be interpreted as 
a row ID from the primary input followed 
+by a list of the most similar rows.</p>
+
+<p>The command line interface is:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>spark-rowsimilarity Mahout 0.x
+Usage: spark-rowsimilarity [options]
+
+Input, output options
+  -i &lt;value&gt; | --input &lt;value&gt;
+        Input path, may be a filename, directory name, or comma delimited list 
of HDFS supported URIs (required)
+  -o &lt;value&gt; | --output &lt;value&gt;
+        Path for output, any local or HDFS supported URI (required)
+
+Algorithm control options:
+  -mo &lt;value&gt; | --maxObservations &lt;value&gt;
+        Max number of observations to consider per row (optional). Default: 500
+  -m &lt;value&gt; | --maxSimilaritiesPerRow &lt;value&gt;
+        Limit the number of similarities per item to this number (optional). 
Default: 100
+
+Note: Only the Log Likelihood Ratio (LLR) is supported as a similarity measure.
+Disconnected from the target VM, address: '127.0.0.1:49162', transport: 
'socket'
+
+Output text file schema options:
+  -rd &lt;value&gt; | --rowKeyDelim &lt;value&gt;
+        Separates the rowID key from the vector values list (optional). 
Default: "\t"
+  -cd &lt;value&gt; | --columnIdStrengthDelim &lt;value&gt;
+        Separates column IDs from their values in the vector values list 
(optional). Default: ":"
+  -td &lt;value&gt; | --elementDelim &lt;value&gt;
+        Separates vector element values in the values list (optional). 
Default: " "
+  -os | --omitStrength
+        Do not write the strength to the output files (optional), Default: 
false.
+This option is used to output indexable data for creating a search engine 
recommender.
+
+Default delimiters will produce output of the form: 
"itemID1&lt;tab&gt;itemID2:value2&lt;space&gt;itemID10:value10..."
+
+File discovery options:
+  -r | --recursive
+        Searched the -i path recursively for files that match 
--filenamePattern (optional), Default: false
+  -fp &lt;value&gt; | --filenamePattern &lt;value&gt;
+        Regex to match in determining input files (optional). Default: 
filename in the --input option or "^part-.*" if --input is a directory
+
+Spark config options:
+  -ma &lt;value&gt; | --master &lt;value&gt;
+        Spark Master URL (optional). Default: "local". Note that you can 
specify the number of cores to get a performance improvement, for example 
"local[4]"
+  -sem &lt;value&gt; | --sparkExecutorMem &lt;value&gt;
+        Max Java heap available as "executor memory" on each node (optional). 
Default: 4g
+  -rs &lt;value&gt; | --randomSeed &lt;value&gt;
+        
+  -h | --help
+        prints this usage text
+</code></pre>
+</div>
+
+<p>See RowSimilarityDriver.scala in Mahout’s spark module if you want to 
customize the code.</p>
+
+<p>#3. Using <em>spark-rowsimilarity</em> with Text Data</p>
+
+<p>Another use case for <em>spark-rowsimilarity</em> is in finding similar 
textual content. For instance given the tags associated with 
+a blog post, which other posts have similar tags. In this case the columns are 
tags and the rows are posts. Since LLR is 
+the only similarity method supported this is not the optimal way to determine 
general “bag-of-words” document similarity. 
+LLR is used more as a quality filter than as a similarity measure. However 
<em>spark-rowsimilarity</em> will produce 
+lists of similar docs for every doc if input is docs with lists of terms. The 
Apache <a href="http://lucene.apache.org";>Lucene</a> project provides several 
methods of analyzing and tokenizing documents.</p>
+
+<h1 id="4-creating-a-multimodal-recommender"><a name="unified-recommender">4. 
Creating a Multimodal Recommender</a></h1>
+
+<p>Using the output of <em>spark-itemsimilarity</em> and 
<em>spark-rowsimilarity</em> you can build a miltimodal cooccurrence and 
content based
+ recommender that can be used in both or either mode depending on indicators 
available and the history available at 
+runtime for a user. Some slide describing this method can be found <a 
href="http://occamsmachete.com/ml/2014/10/07/creating-a-unified-recommender-with-mahout-and-a-search-engine/";>here</a></p>
+
+<h2 id="requirements">Requirements</h2>
+
+<ol>
+  <li>Mahout 0.13.0 or later</li>
+  <li>Hadoop</li>
+  <li>Spark, the correct version for your version of Mahout and Hadoop</li>
+  <li>A search engine like Solr or Elasticsearch</li>
+</ol>
+
+<h2 id="indicators">Indicators</h2>
+
+<p>Indicators come in 3 types</p>
+
+<ol>
+  <li><strong>Correlated Cross-Occurrence</strong>: calculated with 
<em>spark-itemsimilarity</em> from user indicators</li>
+  <li><strong>Content</strong>: calculated from item metadata or content using 
<em>spark-rowsimilarity</em></li>
+  <li><strong>Intrinsic</strong>: assigned to items as metadata. Can be 
anything that describes the item. These will be used in search engine queries 
to implement business rules.</li>
+</ol>
+
+<p>The query for recommendations will be a mix of values meant to match one of 
your indicators. The query can be constructed 
+from user history and values derived from context (category being viewed for 
instance) or special pre-calculated data 
+(popularity rank for instance). This blending of indicators allows for 
creating many flavors or recommendations to fit 
+a very wide variety of circumstances.</p>
+
+<p>With the right mix of indicators developers can construct a single query 
that works for completely new items and new users 
+while working well for items with lots of interactions and users with many 
recorded indicators. In other words by adding in content and intrinsic 
indicators developers can create a solution for the “cold-start” problem 
that gracefully improves with more user history
+and as items have more interactions. It is also possible to create a 
completely content-based recommender that personalizes 
+recommendations.</p>
+
+<h2 id="example-with-3-indicators">Example with 3 Indicators</h2>
+
+<p>You will need to decide how you store user indicator data so they can be 
processed by the item and row similarity jobs and 
+this is most easily done by using text files as described above. The data that 
is processed by these jobs is considered the 
+training data. You will need some amount of user history in your recs query. 
It is typical to use the most recent user history 
+but need not be exactly what is in the training set, which may include a 
greater volume of historical data. Keeping the user 
+history for query purposes could be done with a database by storing it in a 
users table. In the example above the two 
+collaborative filtering indicators are “purchase” and “view”, but 
let’s also add tags (taken from catalog categories or other 
+descriptive metadata).</p>
+
+<p>We will need to create 1 cooccurrence indicator from the primary indicator 
(purchase) 1 cross-occurrence indicator 
+from the secondary indicator (view) 
+and 1 content indicator (tags). We’ll have to run 
<em>spark-itemsimilarity</em> once and <em>spark-rowsimilarity</em> once.</p>
+
+<p>We have described how to create the collaborative filtering indicators for 
purchase and view (the <a href="#multiple-actions">How to use Multiple User 
+Indicators</a> section) but tags will be a slightly different process. We want 
to use the fact that 
+certain items have tags similar to the ones associated with a user’s 
purchases. This is not a collaborative filtering indicator 
+but rather a “content” or “metadata” type indicator since you are not 
using other users’ history, only the 
+individual that you are making recs for. This means that this method will make 
recommendations for items that have 
+no collaborative filtering data, as happens with new items in a catalog. New 
items may have tags assigned but no one
+ has purchased or viewed them yet. In the final query we will mix all 3 
indicators.</p>
+
+<h2 id="content-indicator">Content Indicator</h2>
+
+<p>To create a content-indicator we’ll make use of the fact that the user 
has purchased items with certain tags. We want to find 
+items with the most similar tags. Notice that other users’ behavior is not 
considered–only other item’s tags. This defines a 
+content or metadata indicator. They are used when you want to find items that 
are similar to other items by using their 
+content or metadata, not by which users interacted with them.</p>
+
+<p><strong>Note</strong>: It may be advisable to treat tags as 
cross-cooccurrence indicators but for the sake of an example they are treated 
here as content only.</p>
+
+<p>For this we need input of the form:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>itemID&lt;tab&gt;list-of-tags
+...
+</code></pre>
+</div>
+
+<p>The full collection will look like the tags column from a catalog DB. For 
our ecom example it might be:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>3459860b&lt;tab&gt;men long-sleeve chambray clothing 
casual
+9446577d&lt;tab&gt;women tops chambray clothing casual
+...
+</code></pre>
+</div>
+
+<p>We’ll use <em>spark-rowimilairity</em> because we are looking for similar 
rows, which encode items in this case. As with the 
+collaborative filtering indicators we use the –omitStrength option. The 
strengths created are 
+probabilistic log-likelihood ratios and so are used to filter unimportant 
similarities. Once the filtering or downsampling 
+is finished we no longer need the strengths. We will get an indicator matrix 
of the form:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>itemID&lt;tab&gt;list-of-item IDs
+...
+</code></pre>
+</div>
+
+<p>This is a content indicator since it has found other items with similar 
content or metadata.</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>3459860b&lt;tab&gt;3459860b 3459860b 6749860c 5959860a 
3434860a 3477860a
+9446577d&lt;tab&gt;9446577d 9496577d 0943577d 8346577d 9442277d 9446577e
+...  
+</code></pre>
+</div>
+
+<p>We now have three indicators, two collaborative filtering type and one 
content type.</p>
+
+<h2 id="multimodal-recommender-query">Multimodal Recommender Query</h2>
+
+<p>The actual form of the query for recommendations will vary depending on 
your search engine but the intent is the same. For a given user, map their 
history of an indicator or content to the correct indicator field and perform 
an OR’d query.</p>
+
+<p>We have 3 indicators, these are indexed by the search engine into 3 fields, 
we’ll call them “purchase”, “view”, and “tags”. 
+We take the user’s history that corresponds to each indicator and create a 
query of the form:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>Query:
+  field: purchase; q:user's-purchase-history
+  field: view; q:user's view-history
+  field: tags; q:user's-tags-associated-with-purchases
+</code></pre>
+</div>
+
+<p>The query will result in an ordered list of items recommended for purchase 
but skewed towards items with similar tags to 
+the ones the user has already purchased.</p>
+
+<p>This is only an example and not necessarily the optimal way to create recs. 
It illustrates how business rules can be 
+translated into recommendations. This technique can be used to skew 
recommendations towards intrinsic indicators also. 
+For instance you may want to put personalized popular item recs in a special 
place in the UI. Create a popularity indicator 
+by tagging items with some category of popularity (hot, warm, cold for 
instance) then
+index that as a new indicator field and include the corresponding value in a 
query 
+on the popularity field. If we use the ecom example but use the query to get 
“hot” recommendations it might look like this:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>Query:
+  field: purchase; q:user's-purchase-history
+  field: view; q:user's view-history
+  field: popularity; q:"hot"
+</code></pre>
+</div>
+
+<p>This will return recommendations favoring ones that have the intrinsic 
indicator “hot”.</p>
+
+<h2 id="notes">Notes</h2>
+
+<ol>
+  <li>Use as much user indicator history as you can gather. Choose a primary 
indicator that is closest to what you want to recommend and the others will be 
used to create cross-cooccurrence indicators. Using more data in this fashion 
will almost always produce better recommendations.</li>
+  <li>Content can be used where there is no recorded user behavior or when 
items change too quickly to get much interaction history. They can be used 
alone or mixed with other indicators.</li>
+  <li>Most search engines support “boost” factors so you can favor one or 
more indicators. In the example query, if you want tags to only have a small 
effect you could boost the CF indicators.</li>
+  <li>In the examples we have used space delimited strings for lists of IDs in 
indicators and in queries. It may be better to use arrays of strings if your 
storage system and search engine support them. For instance Solr allows 
multi-valued fields, which correspond to arrays.</li>
+</ol>
+
+
+  <footer class="footer bg-light">
+    <div class="container text-center small">
+        Copyright &copy; 2014-2018 The Apache Software Foundation, Licensed 
under the Apache License, Version 2.0.
+    </div>
+</footer>
+
+  <script src="/assets/vendor/jquery/jquery-slim.min.js"></script>
+  <script src="/assets/vendor/popper/popper.min.js"></script>
+  <script src="/assets/vendor/bootstrap/js/bootstrap.min.js"></script>
+  <script src="/assets/header.js"></script>
+  <script 
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
 type="text/javascript"></script>
+
+</body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/mahout/blob/a9262d54/docs/latest/algorithms/recommenders/d-als.html
----------------------------------------------------------------------
diff --git a/docs/latest/algorithms/recommenders/d-als.html 
b/docs/latest/algorithms/recommenders/d-als.html
new file mode 100644
index 0000000..775d900
--- /dev/null
+++ b/docs/latest/algorithms/recommenders/d-als.html
@@ -0,0 +1,239 @@
+<!DOCTYPE html>
+<html lang=" en ">
+
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>
+    Mahout Samsara Distributed ALS
+    
+  </title>
+
+  <meta name="description" content="Distributed Linear Algebra">
+
+  <link rel="stylesheet" href="/assets/css/main.css">
+
+  <!-- Font Awesome -->
+  <link 
href="https://maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css";
 rel="stylesheet" 
integrity="sha384-wvfXpqpZZVQGK6TAh5PVlGOfQNHSoD2xbE+QkPxCAFlNEevoEH3Sl0sibVcOQVnN"
 crossorigin="anonymous">
+
+  <!-- Google Fonts -->
+  <link href="https://fonts.googleapis.com/css?family=Maven+Pro:400,500"; 
rel="stylesheet">
+  <link href="https://fonts.googleapis.com/css?family=Muli:400,400i,700,700i"; 
rel="stylesheet">
+
+  <link rel="canonical" 
href="http://mahout.apache.org//docs/latest/algorithms/recommenders/d-als.html";>
+  <link rel="alternate" type="application/rss+xml" title="Apache Mahout" 
href="/%20/feed.xml">
+
+
+</head>
+
+
+<body>
+
+  <nav class="navbar navbar-expand-lg navbar-light bg-light navbar-mahout">
+
+    <div class="container">
+
+        <a class="navbar-brand" href="/">
+            <img src="/assets/mahout-logo-blue.svg" alt="">
+        </a>
+
+        <button class="navbar-toggler" type="button" data-toggle="collapse" 
data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" 
aria-expanded="false" aria-label="Toggle navigation">
+            <span class="navbar-toggler-icon"></span>
+        </button>
+
+        <div class="collapse navbar-collapse" id="navbarSupportedContent">
+
+            <div class="navbar-nav ml-auto">
+
+                <!-- Quick Start -->
+                <li class="nav-item">
+                    <a class="nav-link" href="/docs/latest" >Overview</a>
+                </li>
+
+                <li class="nav-item dropdown">
+                    <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Key Concepts</a>
+                    <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
+                        <a class="dropdown-item"  
href="/docs/latest/index.html">Mahout Overview</a>
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Scala DSL</h6>
+                        <a class="dropdown-item"  
href="/docs/latest/mahout-samsara/in-core-reference.html">In-core Reference</a>
+                        <a class="dropdown-item"  
href="/docs/latest/mahout-samsara/out-of-core-reference.html">Out-of-core 
Reference</a>
+                        <a class="dropdown-item"  
href="/docs/latest/mahout-samsara/faq.html">Samsara FAQ</a>
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Distributed Engine 
Bindings</h6>
+                        <a class="dropdown-item"  
href="/docs/latest/distributed/spark-bindings/">Spark Bindings</a>
+                        <a class="dropdown-item"  
href="/docs/latest/distributed/flink-bindings.html">Flink Bindings</a>
+                        <a class="dropdown-item"  
href="/docs/latest/distributed/flink-bindings.html">H20 Bindings</a>
+                        <!--<div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Native Solvers</h6>
+                        <a class="dropdown-item"  
href="/docs/latest/native-solvers/viennacl.html">ViennaCL</a></li>
+                        <a class="dropdown-item"  
href="/docs/latest/native-solvers/viennacl-omp.html">ViennaCL-OMP</a></li>
+                        <a class="dropdown-item"  
href="/docs/latest/native-solvers/cuda.html">CUDA</a></li>-->
+                    </div>
+                </li>
+
+                <li class="nav-item dropdown">
+                    <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Tutorial</a>
+                    <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Recommenders</h6>
+                        <a class="dropdown-item"  
href="/docs/latest/tutorials/cco-lastfm">CCO Example with Last.FM Data</a>
+                        <a class="dropdown-item"  
href="/docs/latest/tutorials/intro-cooccurrence-spark">Introduction to 
Cooccurrence in Spark</a>
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Mahout Samsara</h6>
+                        <a class="dropdown-item"  
href="/docs/latest/tutorials/samsara/play-with-shell.html">Playing with Samsara 
in Spark Shell</a>
+                        <a class="dropdown-item"  
href="/docs/latest/tutorials/samsara/playing-with-samsara-flink-batch.html">Playing
 with Samsara in Flink Batch</a>
+                        <a class="dropdown-item"  
href="/docs/latest/tutorials/samsara/classify-a-doc-from-the-shell.html">Text 
Classification (Shell)</a>
+                        <a class="dropdown-item"  
href="/docs/latest/tutorials/samsara/spark-naive-bayes.html">Spark Naive 
Bayes</a>
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Misc</h6>
+                        <a class="dropdown-item"  
href="/docs/latest/tutorials/misc/mahout-in-zeppelin">Mahout in Apache 
Zeppelin</a>
+                        <a class="dropdown-item"  
href="/docs/latest/tutorials/misc/contributing-algos">How To Contribute a New 
Algorithm</a>
+                        <a class="dropdown-item"  
href="/docs/latest/tutorials/misc/how-to-build-an-app.html">How To Build An 
App</a>
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Deprecated</h6>
+                        <a class="dropdown-item"  
href="/docs/latest/tutorials/map-reduce">MapReduce</a>
+                    </div>
+                </li>
+
+
+                <!-- Algorithms (Samsara / MR) -->
+                <li class="nav-item dropdown">
+                    <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Algorithms</a>
+                    <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/linear-algebra">Distributed Linear Algebra</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/preprocessors">Preprocessors</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/regression">Regression</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/clustering">Clustering</a>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders">Recommenders</a>
+                        <div class="dropdown-divider"></div>
+                        <h6 class="dropdown-header">Deprecated</h6>
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/map-reduce">MapReduce <i>(deprecated)</i></a>
+                    </div>
+                        <!--<a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/recommender-overview.html">Reccomender
 Overview</a></li> Do we still need? seems like short version of next post-->
+                        <!--
+                        <a class="dropdown-item"  
href="/docs/latest/algorithms/recommenders/intro-cooccurrence-spark.html">Intro 
to Coocurrence With Spark</a></li>
+                        <li role="separator" class="divider"></li>
+                        <li><span>&nbsp;&nbsp;<a 
href="/docs/latest/algorithms/map-reduce"><b>MapReduce</b> 
(deprecated)</a><span></li>
+
+
+                     -->
+                </li>
+
+                <!-- Scala /docs -->
+                <li class="nav-item dropdown">
+                    <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">API /docs</a>
+                    <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
+                        <a class="dropdown-item"  
href="/docs/0.13.0/api/docs/">0.13.0</a>
+                    </div>
+                </li>
+
+                <!-- Apache -->
+                <li class="nav-item dropdown">
+                    <a class="nav-link dropdown-toggle" href="" 
id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" 
aria-expanded="false">Apache</a>
+                    <div class="dropdown-menu" 
aria-labelledby="navbarDropdownMenuLink">
+                        <a class="dropdown-item"  
href="http://www.apache.org/foundation/how-it-works.html";>Apache Software 
Foundation</a>
+                        <a class="dropdown-item"  
href="http://www.apache.org/licenses/";>Apache License</a>
+                        <a class="dropdown-item"  
href="http://www.apache.org/foundation/sponsorship.html";>Sponsorship</a>
+                        <a class="dropdown-item"  
href="http://www.apache.org/foundation/thanks.html";>Thanks</a>
+                    </div>
+                </li>
+
+            </ul>
+
+                <!--<form class="navbar-form navbar-left">-->
+                    <!--<div class="form-group">-->
+                        <!--<input type="text" class="form-control" 
placeholder="Search">-->
+                    <!--</div>-->
+                    <!--<button type="submit" class="btn 
btn-default">Submit</button>-->
+                <!--</form>-->
+                <!--<ul class="nav navbar-nav navbar-right">-->
+                    <!--<a class="dropdown-item"  
href="http://github.com/apache/mahout";>Github</a></li>-->
+
+
+
+                <!--</ul>-->
+        </div><!-- /.navbar-collapse -->
+    </div>
+</nav>
+
+
+  <div class="container mt-5 pb-4">
+
+  <div class="row">
+
+    <div class="col-lg-8">
+      <p>Seems like someone has jacked up this page? 
+TODO: Find the ALS Page</p>
+
+<h2 id="intro">Intro</h2>
+
+<p>Mahout has a distributed implementation of QR decomposition for tall thin 
matricies<a href="[Mahout Scala and Mahout Spark Bindings for Linear Algebra 
Subroutines](http://mahout.apache.org/users/sparkbindings/ScalaSparkBindings.pdf)">1</a>.</p>
+
+<h2 id="algorithm">Algorithm</h2>
+
+<p>For the classic QR decomposition of the form <code 
class="highlighter-rouge">\(\mathbf{A}=\mathbf{QR},\mathbf{A}\in\mathbb{R}^{m\times
 n}\)</code> a distributed version is fairly easily achieved if <code 
class="highlighter-rouge">\(\mathbf{A}\)</code> is tall and thin such that 
<code class="highlighter-rouge">\(\mathbf{A}^{\top}\mathbf{A}\)</code> fits in 
memory, i.e. <em>m</em> is large but <em>n</em> &lt; ~5000 Under such 
circumstances, only <code class="highlighter-rouge">\(\mathbf{A}\)</code> and 
<code class="highlighter-rouge">\(\mathbf{Q}\)</code> are distributed matricies 
and <code class="highlighter-rouge">\(\mathbf{A^{\top}A}\)</code> and <code 
class="highlighter-rouge">\(\mathbf{R}\)</code> are in-core products. We just 
compute the in-core version of the Cholesky decomposition in the form of <code 
class="highlighter-rouge">\(\mathbf{LL}^{\top}= 
\mathbf{A}^{\top}\mathbf{A}\)</code>.  After that we take <code 
class="highlighter-rouge">\(\mathbf{R}= \mathbf{L}^{\top}\)</co
 de> and <code 
class="highlighter-rouge">\(\mathbf{Q}=\mathbf{A}\left(\mathbf{L}^{\top}\right)^{-1}\)</code>.
  The latter is easily achieved by multiplying each verticle block of <code 
class="highlighter-rouge">\(\mathbf{A}\)</code> by <code 
class="highlighter-rouge">\(\left(\mathbf{L}^{\top}\right)^{-1}\)</code>.  
(There is no actual matrix inversion happening).</p>
+
+<h2 id="implementation">Implementation</h2>
+
+<p>Mahout <code class="highlighter-rouge">dqrThin(...)</code> is implemented 
in the mahout <code class="highlighter-rouge">math-scala</code> algebraic 
optimizer which translates Mahout’s R-like linear algebra operators into a 
physical plan for both Spark and H2O distributed engines.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>def dqrThin[K: 
ClassTag](A: DrmLike[K], checkRankDeficiency: Boolean = true): (DrmLike[K], 
Matrix) = {        
+    if (drmA.ncol &gt; 5000)
+        log.warn("A is too fat. A'A must fit in memory and easily 
broadcasted.")
+    implicit val ctx = drmA.context
+    val AtA = (drmA.t %*% drmA).checkpoint()
+    val inCoreAtA = AtA.collect
+    val ch = chol(inCoreAtA)
+    val inCoreR = (ch.getL cloned) t
+    if (checkRankDeficiency &amp;&amp; !ch.isPositiveDefinite)
+        throw new IllegalArgumentException("R is rank-deficient.")
+    val bcastAtA = sc.broadcast(inCoreAtA)
+    val Q = A.mapBlock() {
+        case (keys, block) =&gt; keys -&gt; chol(bcastAtA).solveRight(block)
+    }
+    Q -&gt; inCoreR
+}
+</code></pre>
+</div>
+
+<h2 id="usage">Usage</h2>
+
+<p>The scala <code class="highlighter-rouge">dqrThin(...)</code> method can 
easily be called in any Spark or H2O application built with the <code 
class="highlighter-rouge">math-scala</code> library and the corresponding <code 
class="highlighter-rouge">Spark</code> or <code 
class="highlighter-rouge">H2O</code> engine module as follows:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>import 
org.apache.mahout.math._
+import decompositions._
+import drm._
+
+val(drmQ, inCoreR) = dqrThin(drma)
+</code></pre>
+</div>
+
+<h2 id="references">References</h2>
+
+
+    </div>
+
+
+  </div>
+
+</div>
+
+
+  <footer class="footer bg-light">
+    <div class="container text-center small">
+        Copyright &copy; 2014-2018 The Apache Software Foundation, Licensed 
under the Apache License, Version 2.0.
+    </div>
+</footer>
+
+  <script src="/assets/vendor/jquery/jquery-slim.min.js"></script>
+  <script src="/assets/vendor/popper/popper.min.js"></script>
+  <script src="/assets/vendor/bootstrap/js/bootstrap.min.js"></script>
+  <script src="/assets/header.js"></script>
+  <script 
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
 type="text/javascript"></script>
+
+</body>
+
+</html>

Reply via email to