Modified: websites/staging/mahout/trunk/content/overview.html
==============================================================================
--- websites/staging/mahout/trunk/content/overview.html (original)
+++ websites/staging/mahout/trunk/content/overview.html Fri Mar 20 03:46:10 2015
@@ -155,61 +155,63 @@
<li><a
href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet
Allocation</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Spark<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout Environment<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="/users/sparkbindings/home.html">Scala &
Spark Bindings Overview</a></li>
- <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
- <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
- <li class="divider"></li>
+ <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
+ <li class="divider"></li>
<li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Classification<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Algorithms<b class="caret"></b></a>
+ <ul class="dropdown-menu">
+ <li class="nav-header">Matrix Decomposition</li>
+ <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/algorithms/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
+ <li class="nav-header">Classification</li>
+ <li><a href="/users/algorithms/spark-naive-bayes.html">Spark
Naive Bayes</a></li>
+ </ul>
+ </li>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
<ul class="dropdown-menu">
+ <li class="nav-header">Classification</li>
<li><a
href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
<li><a
href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov
Models</a></li>
<li><a
href="/users/mapreduce/classification/logistic-regression.html">Logistic
Regression</a></li>
<li><a
href="/users/mapreduce/classification/partial-implementation.html">Random
Forest</a></li>
-
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
+ <li class="nav-header">Classification Examples</li>
<li><a
href="/users/mapreduce/classification/breiman-example.html">Breiman
example</a></li>
<li><a
href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups
example</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Clustering<b class="caret"></b></a>
- <ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
- <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
- <li class="divider"></li>
- <li class="nav-header">Commandline usage</li>
- <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
- <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
- <li class="divider"></li>
- <li class="nav-header">Post processing</li>
- <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
- <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+ <li class="nav-header">Clustering</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
+ <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
+ <li class="nav-header">Clustering Commandline usage</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
+ <li class="nav-header">Clustering Examples</li>
+ <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
+ <li class="nav-header">Cluster Post processing</li>
+ <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
+ <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
+ <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
+ <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
+ </ul>
+ </li>
+ <!-- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
<ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
- <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
- <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
- <li class="divider"></li>
- <li class="nav-header">Hadoop</li>
- <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
- <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
- <li class="nav-header">Spark</li>
- <li><a
href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
- </ul>
+
+ </ul> -->
</li>
</ul>
</div><!--/.nav-collapse -->
Modified: websites/staging/mahout/trunk/content/users/algorithms/d-qr.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/algorithms/d-qr.html (original)
+++ websites/staging/mahout/trunk/content/users/algorithms/d-qr.html Fri Mar 20
03:46:10 2015
@@ -155,61 +155,63 @@
<li><a
href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet
Allocation</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Spark<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout Environment<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="/users/sparkbindings/home.html">Scala &
Spark Bindings Overview</a></li>
- <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
- <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
- <li class="divider"></li>
+ <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
+ <li class="divider"></li>
<li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Classification<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Algorithms<b class="caret"></b></a>
+ <ul class="dropdown-menu">
+ <li class="nav-header">Matrix Decomposition</li>
+ <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/algorithms/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
+ <li class="nav-header">Classification</li>
+ <li><a href="/users/algorithms/spark-naive-bayes.html">Spark
Naive Bayes</a></li>
+ </ul>
+ </li>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
<ul class="dropdown-menu">
+ <li class="nav-header">Classification</li>
<li><a
href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
<li><a
href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov
Models</a></li>
<li><a
href="/users/mapreduce/classification/logistic-regression.html">Logistic
Regression</a></li>
<li><a
href="/users/mapreduce/classification/partial-implementation.html">Random
Forest</a></li>
-
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
+ <li class="nav-header">Classification Examples</li>
<li><a
href="/users/mapreduce/classification/breiman-example.html">Breiman
example</a></li>
<li><a
href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups
example</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Clustering<b class="caret"></b></a>
- <ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
- <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
- <li class="divider"></li>
- <li class="nav-header">Commandline usage</li>
- <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
- <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
- <li class="divider"></li>
- <li class="nav-header">Post processing</li>
- <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
- <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+ <li class="nav-header">Clustering</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
+ <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
+ <li class="nav-header">Clustering Commandline usage</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
+ <li class="nav-header">Clustering Examples</li>
+ <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
+ <li class="nav-header">Cluster Post processing</li>
+ <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
+ <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
+ <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
+ <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
+ </ul>
+ </li>
+ <!-- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
<ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
- <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
- <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
- <li class="divider"></li>
- <li class="nav-header">Hadoop</li>
- <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
- <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
- <li class="nav-header">Spark</li>
- <li><a
href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
- </ul>
+
+ </ul> -->
</li>
</ul>
</div><!--/.nav-collapse -->
@@ -251,7 +253,7 @@
<p>Mahout has a distributed implementation of QR decomposition for tall thin
matricies[1].</p>
<h2 id="algorithm">Algorithm</h2>
<p>For the classic QR decomposition of the form
<code>\(\mathbf{A}=\mathbf{QR},\mathbf{A}\in\mathbb{R}^{m\times n}\)</code> a
distributed version is fairly easily achieved if <code>\(\mathbf{A}\)</code> is
tall and thin such that <code>\(\mathbf{A}^{\top}\mathbf{A}\)</code> fits in
memory, i.e. <em>m</em> is large but <em>n</em> < ~5000 Under such
circumstances, only <code>\(\mathbf{A}\)</code> and <code>\(\mathbf{Q}\)</code>
are distributed matricies and <code>\(\mathbf{A^{\top}A}\)</code> and
<code>\(\mathbf{R}\)</code> are in-core products. We just compute the in-core
version of the Cholesky decomposition in the form of
<code>\(\mathbf{LL}^{\top}= \mathbf{A}^{\top}\mathbf{A}\)</code>. After that
we take <code>\(\mathbf{R}= \mathbf{L}^{\top}\)</code> and
<code>\(\mathbf{Q}=\mathbf{A}\left(\mathbf{L}^{\top}\right)^{-1}\)</code>. The
latter is easily achieved by multiplying each verticle block of
<code>\(\mathbf{A}\)</code> by
<code>\(\left(\mathbf{L}^{\top}\right)^{-1}\)</code
>. (There is no actual matrix inversion happening). </p>
-<h2 id="implementations">Implementations</h2>
+<h2 id="implementation">Implementation</h2>
<p>Mahout <code>dqrThin(...)</code> is implemented in the mahout
<code>math-scala</code> algebraic optimizer which translates Mahout's R-like
linear algebra operators into a physical plan for both Spark and H2O
distributed engines.</p>
<div class="codehilite"><pre><span class="n">def</span> <span
class="n">dqrThin</span><span class="p">[</span><span class="n">K</span><span
class="p">:</span> <span class="n">ClassTag</span><span
class="p">](</span><span class="n">A</span><span class="p">:</span> <span
class="n">DrmLike</span><span class="p">[</span><span class="n">K</span><span
class="p">],</span> <span class="n">checkRankDeficiency</span><span
class="p">:</span> <span class="n">Boolean</span> <span class="p">=</span>
<span class="n">true</span><span class="p">):</span> <span
class="p">(</span><span class="n">DrmLike</span><span class="p">[</span><span
class="n">K</span><span class="p">],</span> <span class="n">Matrix</span><span
class="p">)</span> <span class="p">=</span> <span class="p">{</span>
<span class="k">if</span> <span class="p">(</span><span
class="n">drmA</span><span class="p">.</span><span class="n">ncol</span> <span
class="o">></span> 5000<span class="p">)</span>
Modified:
websites/staging/mahout/trunk/content/users/algorithms/intro-cooccurrence-spark.html
==============================================================================
---
websites/staging/mahout/trunk/content/users/algorithms/intro-cooccurrence-spark.html
(original)
+++
websites/staging/mahout/trunk/content/users/algorithms/intro-cooccurrence-spark.html
Fri Mar 20 03:46:10 2015
@@ -155,61 +155,63 @@
<li><a
href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet
Allocation</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Spark<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout Environment<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="/users/sparkbindings/home.html">Scala &
Spark Bindings Overview</a></li>
- <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
- <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
- <li class="divider"></li>
+ <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
+ <li class="divider"></li>
<li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Classification<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Algorithms<b class="caret"></b></a>
+ <ul class="dropdown-menu">
+ <li class="nav-header">Matrix Decomposition</li>
+ <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/algorithms/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
+ <li class="nav-header">Classification</li>
+ <li><a href="/users/algorithms/spark-naive-bayes.html">Spark
Naive Bayes</a></li>
+ </ul>
+ </li>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
<ul class="dropdown-menu">
+ <li class="nav-header">Classification</li>
<li><a
href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
<li><a
href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov
Models</a></li>
<li><a
href="/users/mapreduce/classification/logistic-regression.html">Logistic
Regression</a></li>
<li><a
href="/users/mapreduce/classification/partial-implementation.html">Random
Forest</a></li>
-
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
+ <li class="nav-header">Classification Examples</li>
<li><a
href="/users/mapreduce/classification/breiman-example.html">Breiman
example</a></li>
<li><a
href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups
example</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Clustering<b class="caret"></b></a>
- <ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
- <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
- <li class="divider"></li>
- <li class="nav-header">Commandline usage</li>
- <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
- <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
- <li class="divider"></li>
- <li class="nav-header">Post processing</li>
- <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
- <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+ <li class="nav-header">Clustering</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
+ <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
+ <li class="nav-header">Clustering Commandline usage</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
+ <li class="nav-header">Clustering Examples</li>
+ <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
+ <li class="nav-header">Cluster Post processing</li>
+ <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
+ <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
+ <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
+ <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
+ </ul>
+ </li>
+ <!-- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
<ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
- <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
- <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
- <li class="divider"></li>
- <li class="nav-header">Hadoop</li>
- <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
- <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
- <li class="nav-header">Spark</li>
- <li><a
href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
- </ul>
+
+ </ul> -->
</li>
</ul>
</div><!--/.nav-collapse -->
Modified:
websites/staging/mahout/trunk/content/users/algorithms/spark-naive-bayes.html
==============================================================================
---
websites/staging/mahout/trunk/content/users/algorithms/spark-naive-bayes.html
(original)
+++
websites/staging/mahout/trunk/content/users/algorithms/spark-naive-bayes.html
Fri Mar 20 03:46:10 2015
@@ -155,61 +155,63 @@
<li><a
href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet
Allocation</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Spark<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout Environment<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="/users/sparkbindings/home.html">Scala &
Spark Bindings Overview</a></li>
- <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
- <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
- <li class="divider"></li>
+ <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
+ <li class="divider"></li>
<li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Classification<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Algorithms<b class="caret"></b></a>
+ <ul class="dropdown-menu">
+ <li class="nav-header">Matrix Decomposition</li>
+ <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/algorithms/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
+ <li class="nav-header">Classification</li>
+ <li><a href="/users/algorithms/spark-naive-bayes.html">Spark
Naive Bayes</a></li>
+ </ul>
+ </li>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
<ul class="dropdown-menu">
+ <li class="nav-header">Classification</li>
<li><a
href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
<li><a
href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov
Models</a></li>
<li><a
href="/users/mapreduce/classification/logistic-regression.html">Logistic
Regression</a></li>
<li><a
href="/users/mapreduce/classification/partial-implementation.html">Random
Forest</a></li>
-
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
+ <li class="nav-header">Classification Examples</li>
<li><a
href="/users/mapreduce/classification/breiman-example.html">Breiman
example</a></li>
<li><a
href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups
example</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Clustering<b class="caret"></b></a>
- <ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
- <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
- <li class="divider"></li>
- <li class="nav-header">Commandline usage</li>
- <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
- <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
- <li class="divider"></li>
- <li class="nav-header">Post processing</li>
- <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
- <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+ <li class="nav-header">Clustering</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
+ <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
+ <li class="nav-header">Clustering Commandline usage</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
+ <li class="nav-header">Clustering Examples</li>
+ <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
+ <li class="nav-header">Cluster Post processing</li>
+ <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
+ <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
+ <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
+ <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
+ </ul>
+ </li>
+ <!-- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
<ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
- <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
- <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
- <li class="divider"></li>
- <li class="nav-header">Hadoop</li>
- <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
- <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
- <li class="nav-header">Spark</li>
- <li><a
href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
- </ul>
+
+ </ul> -->
</li>
</ul>
</div><!--/.nav-collapse -->
@@ -246,12 +248,12 @@
<div id="content-wrap" class="clearfix">
<div id="main">
- <h1 id="naive-bayes">Naive Bayes</h1>
+ <h1 id="spark-naive-bayes">Spark Naive Bayes</h1>
<h2 id="intro">Intro</h2>
-<p>Mahout currently has two Naive Bayes implementations. The first is
standard Multinomial Naive Bayes. The second is an implementation of
Transformed Weight-normalized Complement Naive Bayes as introduced by Rennie et
al. <a href="http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf">[1]</a>.
We refer to the former as Bayes and the latter as CBayes.</p>
+<p>Mahout currently has two flavors of Naive Bayes. The first is standard
Multinomial Naive Bayes. The second is an implementation of Transformed
Weight-normalized Complement Naive Bayes as introduced by Rennie et al. <a
href="http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf">[1]</a>. We
refer to the former as Bayes and the latter as CBayes.</p>
<p>Where Bayes has long been a standard in text classification, CBayes is an
extension of Bayes that performs particularly well on datasets with skewed
classes and has been shown to be competitive with algorithms of higher
complexity such as Support Vector Machines. </p>
<h2 id="implementations">Implementations</h2>
-<p>Both Bayes and CBayes are currently trained via MapReduce Jobs. Testing and
classification can be done via a MapReduce Job or sequentially. Mahout
provides CLI drivers for preprocessing, training and testing. A Spark
implementation is currently in the works (<a
href="https://issues.apache.org/jira/browse/MAHOUT-1493">MAHOUT-1493</a>).</p>
+<p>The mahout <code>math-scala</code> library has an implemetation of both
Bayes and CBayes which is further optimized in the <code>spark</code> module.
Currently the Spark optimized version provides CLI drivers for training and
testing. Mahout Spark-Naive-Bayes models can also be trained, tested and saved
to the filesystem from the Mahout Spark Shell. </p>
<h2 id="preprocessing-and-algorithm">Preprocessing and Algorithm</h2>
<p>As described in <a
href="http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf">[1]</a> Mahout
Naive Bayes is broken down into the following steps (assignments are over all
possible index values): </p>
<ul>
@@ -288,7 +290,7 @@
<li>
<p><strong>Preprocessing:</strong>
For a set of Sequence File Formatted documents in PATH_TO_SEQUENCE_FILES the
<a
href="https://mahout.apache.org/users/basics/creating-vectors-from-text.html">mahout
seq2sparse</a> command performs the TF-IDF transformations (-wt tfidf option)
and L2 length normalization (-n 2 option) as follows:</p>
-<div class="codehilite"><pre>mahout seq2sparse
+<div class="codehilite"><pre>$ mahout seq2sparse
-i <span class="cp">${</span><span
class="n">PATH_TO_SEQUENCE_FILES</span><span class="cp">}</span>
-o <span class="cp">${</span><span
class="n">PATH_TO_TFIDF_VECTORS</span><span class="cp">}</span>
-nv
@@ -300,12 +302,10 @@ For a set of Sequence File Formatted doc
</li>
<li>
<p><strong>Training:</strong>
-The model is then trained using <code>mahout spark-trainnb</code> . The
default is to train a Bayes model. The -c option is given to train a CBayes
model:</p>
-<div class="codehilite"><pre>mahout spark-trainnb
+The model is then trained using <code>mahout spark-trainnb</code>. The
default is to train a Bayes model. The -c option is given to train a CBayes
model:</p>
+<div class="codehilite"><pre>$ mahout spark-trainnb
-i <span class="cp">${</span><span
class="n">PATH_TO_TFIDF_VECTORS</span><span class="cp">}</span>
- -el
- -o <span class="cp">${</span><span class="n">PATH_TO_MODEL</span><span
class="cp">}</span>/model
- -li <span class="cp">${</span><span class="n">PATH_TO_MODEL</span><span
class="cp">}</span>/labelindex
+ -o <span class="cp">${</span><span class="n">PATH_TO_MODEL</span><span
class="cp">}</span>
-ow
-c
</pre></div>
@@ -314,10 +314,10 @@ The model is then trained using <code>ma
</li>
<li>
<p><strong>Label Assignment/Testing:</strong>
-Classification and testing on a holdout set can then be performed via
<code>mahout testnb</code>. Again, the -c option indicates that the model is
CBayes. The -seq option tells <code>mahout testnb</code> to run
sequentially:</p>
-<div class="codehilite"><pre>mahout spark-testnb
+Classification and testing on a holdout set can then be performed via
<code>mahout spark-testnb</code>. Again, the -c option indicates that the model
is CBayes:</p>
+<div class="codehilite"><pre>$ mahout spark-testnb
-i <span class="cp">${</span><span
class="n">PATH_TO_TFIDF_TEST_VECTORS</span><span class="cp">}</span>
- -m <span class="cp">${</span><span class="n">PATH_TO_MODEL</span><span
class="cp">}</span>/model
+ -m <span class="cp">${</span><span class="n">PATH_TO_MODEL</span><span
class="cp">}</span>
-ow
-c
</pre></div>
@@ -327,10 +327,10 @@ Classification and testing on a holdout
</ul>
<h2 id="command-line-options">Command line options</h2>
<ul>
-<li><strong>Preprocessing:</strong></li>
+<li><strong>Preprocessing:</strong> <em>note: still reliant on MapReduce
seq2sparse</em> </li>
</ul>
<p>Only relevant parameters used for Bayes/CBayes as detailed above are shown.
Several other transformations can be performed by <code>mahout
seq2sparse</code> and used as input to Bayes/CBayes. For a full list of
<code>mahout seq2Sparse</code> options see the <a
href="https://mahout.apache.org/users/basics/creating-vectors-from-text.html">Creating
vectors from text</a> page.</p>
-<div class="codehilite"><pre> <span class="n">mahout</span> <span
class="n">seq2sparse</span>
+<div class="codehilite"><pre> $ <span class="n">mahout</span> <span
class="n">seq2sparse</span>
<span class="o">--</span><span class="n">output</span> <span
class="p">(</span><span class="o">-</span><span class="n">o</span><span
class="p">)</span> <span class="n">output</span> <span
class="n">The</span> <span class="n">directory</span> <span
class="n">pathname</span> <span class="k">for</span> <span
class="n">output</span><span class="p">.</span>
<span class="o">--</span><span class="n">input</span> <span
class="p">(</span><span class="o">-</span><span class="nb">i</span><span
class="p">)</span> <span class="n">input</span> <span
class="n">Path</span> <span class="n">to</span> <span class="n">job</span>
<span class="n">input</span> <span class="n">directory</span><span
class="p">.</span>
<span class="o">--</span><span class="n">weight</span> <span
class="p">(</span><span class="o">-</span><span class="n">wt</span><span
class="p">)</span> <span class="n">weight</span> <span
class="n">The</span> <span class="n">kind</span> <span class="n">of</span>
<span class="n">weight</span> <span class="n">to</span> <span
class="n">use</span><span class="p">.</span> <span class="n">Currently</span>
<span class="n">TF</span>
@@ -351,51 +351,31 @@ Classification and testing on a holdout
<ul>
<li>
<p><strong>Training:</strong></p>
-<div class="codehilite"><pre><span class="n">mahout</span> <span
class="n">trainnb</span>
+<div class="codehilite"><pre>$ <span class="n">mahout</span> <span
class="n">spark</span><span class="o">-</span><span class="n">trainnb</span>
<span class="o">--</span><span class="n">input</span> <span
class="p">(</span><span class="o">-</span><span class="nb">i</span><span
class="p">)</span> <span class="n">input</span> <span
class="n">Path</span> <span class="n">to</span> <span class="n">job</span>
<span class="n">input</span> <span class="n">directory</span><span
class="p">.</span>
<span class="o">--</span><span class="n">output</span> <span
class="p">(</span><span class="o">-</span><span class="n">o</span><span
class="p">)</span> <span class="n">output</span> <span
class="n">The</span> <span class="n">directory</span> <span
class="n">pathname</span> <span class="k">for</span> <span
class="n">output</span><span class="p">.</span>
- <span class="o">--</span><span class="n">labels</span> <span
class="p">(</span><span class="o">-</span><span class="n">l</span><span
class="p">)</span> <span class="n">labels</span> <span
class="n">Comma</span><span class="o">-</span><span class="n">separated</span>
<span class="n">list</span> <span class="n">of</span> <span
class="n">labels</span> <span class="n">to</span> <span
class="n">include</span> <span class="n">in</span>
- <span class="n">training</span>
- <span class="o">--</span><span class="n">extractLabels</span> <span
class="p">(</span><span class="o">-</span><span class="n">el</span><span
class="p">)</span> <span class="n">Extract</span> <span
class="n">the</span> <span class="n">labels</span> <span class="n">from</span>
<span class="n">the</span> <span class="n">input</span>
<span class="o">--</span><span class="n">alphaI</span> <span
class="p">(</span><span class="o">-</span><span class="n">a</span><span
class="p">)</span> <span class="n">alphaI</span> <span
class="n">Smoothing</span> <span class="n">parameter</span><span
class="p">.</span> <span class="n">Default</span> <span class="n">is</span>
1<span class="p">.</span>0
+ <span class="o">--</span><span class="n">overwrite</span> <span
class="p">(</span><span class="o">-</span><span class="n">ow</span><span
class="p">)</span> <span class="n">If</span> <span
class="n">present</span><span class="p">,</span> <span
class="n">overwrite</span> <span class="n">the</span> <span
class="n">output</span> <span class="n">directory</span><span
class="p">.</span> <span class="n">Default</span> <span class="n">is</span>
<span class="n">false</span><span class="p">.</span>
<span class="o">--</span><span class="n">trainComplementary</span> <span
class="p">(</span><span class="o">-</span><span class="n">c</span><span
class="p">)</span> <span class="n">Train</span> <span
class="n">complementary</span>? <span class="n">Default</span> <span
class="n">is</span> <span class="n">false</span><span class="p">.</span>
- <span class="o">--</span><span class="n">labelIndex</span> <span
class="p">(</span><span class="o">-</span><span class="n">li</span><span
class="p">)</span> <span class="n">labelIndex</span> <span
class="n">The</span> <span class="n">path</span> <span class="n">to</span>
<span class="n">store</span> <span class="n">the</span> <span
class="n">label</span> <span class="n">index</span> <span class="n">in</span>
- <span class="o">--</span><span class="n">overwrite</span> <span
class="p">(</span><span class="o">-</span><span class="n">ow</span><span
class="p">)</span> <span class="n">If</span> <span
class="n">present</span><span class="p">,</span> <span
class="n">overwrite</span> <span class="n">the</span> <span
class="n">output</span> <span class="n">directory</span>
- <span class="n">before</span> <span
class="n">running</span> <span class="n">job</span>
- <span class="o">--</span><span class="n">help</span> <span
class="p">(</span><span class="o">-</span><span class="n">h</span><span
class="p">)</span> <span class="n">Print</span> <span
class="n">out</span> <span class="n">help</span>
- <span class="o">--</span><span class="n">tempDir</span> <span
class="n">tempDir</span> <span class="n">Intermediate</span>
<span class="n">output</span> <span class="n">directory</span>
- <span class="o">--</span><span class="n">startPhase</span> <span
class="n">startPhase</span> <span class="n">First</span> <span
class="n">phase</span> <span class="n">to</span> <span class="n">run</span>
- <span class="o">--</span><span class="n">endPhase</span> <span
class="n">endPhase</span> <span class="n">Last</span> <span
class="n">phase</span> <span class="n">to</span> <span class="n">run</span>
+ <span class="o">--</span><span class="n">help</span> <span
class="p">(</span><span class="o">-</span><span class="n">h</span><span
class="p">)</span> <span class="n">Print</span> <span
class="n">out</span> <span class="n">help</span>
</pre></div>
</li>
<li>
<p><strong>Testing:</strong></p>
-<div class="codehilite"><pre><span class="n">mahout</span> <span
class="n">testnb</span>
+<div class="codehilite"><pre>$ <span class="n">mahout</span> <span
class="n">spark</span><span class="o">-</span><span class="n">testnb</span>
<span class="o">--</span><span class="n">input</span> <span
class="p">(</span><span class="o">-</span><span class="nb">i</span><span
class="p">)</span> <span class="n">input</span> <span
class="n">Path</span> <span class="n">to</span> <span class="n">job</span>
<span class="n">input</span> <span class="n">directory</span><span
class="p">.</span>
<span class="o">--</span><span class="n">output</span> <span
class="p">(</span><span class="o">-</span><span class="n">o</span><span
class="p">)</span> <span class="n">output</span> <span
class="n">The</span> <span class="n">directory</span> <span
class="n">pathname</span> <span class="k">for</span> <span
class="n">output</span><span class="p">.</span>
- <span class="o">--</span><span class="n">overwrite</span> <span
class="p">(</span><span class="o">-</span><span class="n">ow</span><span
class="p">)</span> <span class="n">If</span> <span
class="n">present</span><span class="p">,</span> <span
class="n">overwrite</span> <span class="n">the</span> <span
class="n">output</span> <span class="n">directory</span>
- <span class="n">before</span> <span
class="n">running</span> <span class="n">job</span>
-
- <span class="o">--</span><span class="n">model</span> <span
class="p">(</span><span class="o">-</span><span class="n">m</span><span
class="p">)</span> <span class="n">model</span> <span
class="n">The</span> <span class="n">path</span> <span class="n">to</span>
<span class="n">the</span> <span class="n">model</span> <span
class="n">built</span> <span class="n">during</span> <span
class="n">training</span>
+ <span class="o">--</span><span class="n">model</span> <span
class="p">(</span><span class="o">-</span><span class="n">m</span><span
class="p">)</span> <span class="n">model</span> <span
class="n">The</span> <span class="n">path</span> <span class="n">to</span>
<span class="n">the</span> <span class="n">model</span> <span
class="n">built</span> <span class="n">during</span> <span
class="n">training</span><span class="p">.</span>
+ <span class="o">--</span><span class="n">overwrite</span> <span
class="p">(</span><span class="o">-</span><span class="n">ow</span><span
class="p">)</span> <span class="n">If</span> <span
class="n">present</span><span class="p">,</span> <span
class="n">overwrite</span> <span class="n">the</span> <span
class="n">output</span> <span class="n">directory</span>
<span class="o">--</span><span class="n">testComplementary</span> <span
class="p">(</span><span class="o">-</span><span class="n">c</span><span
class="p">)</span> <span class="n">Test</span> <span
class="n">complementary</span>? <span class="n">Default</span> <span
class="n">is</span> <span class="n">false</span><span class="p">.</span>
- <span class="o">--</span><span class="n">runSequential</span> <span
class="p">(</span><span class="o">-</span><span class="n">seq</span><span
class="p">)</span> <span class="n">Run</span> <span
class="n">sequential</span>?
- <span class="o">--</span><span class="n">labelIndex</span> <span
class="p">(</span><span class="o">-</span><span class="n">l</span><span
class="p">)</span> <span class="n">labelIndex</span> <span
class="n">The</span> <span class="n">path</span> <span class="n">to</span>
<span class="n">the</span> <span class="n">location</span> <span
class="n">of</span> <span class="n">the</span> <span class="n">label</span>
<span class="n">index</span>
- <span class="o">--</span><span class="n">help</span> <span
class="p">(</span><span class="o">-</span><span class="n">h</span><span
class="p">)</span> <span class="n">Print</span> <span
class="n">out</span> <span class="n">help</span>
- <span class="o">--</span><span class="n">tempDir</span> <span
class="n">tempDir</span> <span class="n">Intermediate</span>
<span class="n">output</span> <span class="n">directory</span>
- <span class="o">--</span><span class="n">startPhase</span> <span
class="n">startPhase</span> <span class="n">First</span> <span
class="n">phase</span> <span class="n">to</span> <span class="n">run</span>
- <span class="o">--</span><span class="n">endPhase</span> <span
class="n">endPhase</span> <span class="n">Last</span> <span
class="n">phase</span> <span class="n">to</span> <span class="n">run</span>
+ <span class="o">--</span><span class="n">help</span> <span
class="p">(</span><span class="o">-</span><span class="n">h</span><span
class="p">)</span> <span class="n">Print</span> <span
class="n">out</span> <span class="n">help</span>
</pre></div>
</li>
</ul>
-<h2 id="examples">Examples</h2>
-<p>Mahout provides an example for Naive Bayes classification:</p>
-<ol>
-<li><a href="twenty-newsgroups.html">Classify 20 Newsgroups</a></li>
-</ol>
<h2 id="references">References</h2>
<p>[1]: Jason D. M. Rennie, Lawerence Shih, Jamie Teevan, David Karger (2003).
<a href="http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf">Tackling the
Poor Assumptions of Naive Bayes Text Classifiers</a>. Proceedings of the
Twentieth International Conference on Machine Learning (ICML-2003).</p>
</div>
Modified: websites/staging/mahout/trunk/content/users/basics/algorithms.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/basics/algorithms.html
(original)
+++ websites/staging/mahout/trunk/content/users/basics/algorithms.html Fri Mar
20 03:46:10 2015
@@ -155,61 +155,63 @@
<li><a
href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet
Allocation</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Spark<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout Environment<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="/users/sparkbindings/home.html">Scala &
Spark Bindings Overview</a></li>
- <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
- <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
- <li class="divider"></li>
+ <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
+ <li class="divider"></li>
<li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Classification<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Algorithms<b class="caret"></b></a>
+ <ul class="dropdown-menu">
+ <li class="nav-header">Matrix Decomposition</li>
+ <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/algorithms/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
+ <li class="nav-header">Classification</li>
+ <li><a href="/users/algorithms/spark-naive-bayes.html">Spark
Naive Bayes</a></li>
+ </ul>
+ </li>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
<ul class="dropdown-menu">
+ <li class="nav-header">Classification</li>
<li><a
href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
<li><a
href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov
Models</a></li>
<li><a
href="/users/mapreduce/classification/logistic-regression.html">Logistic
Regression</a></li>
<li><a
href="/users/mapreduce/classification/partial-implementation.html">Random
Forest</a></li>
-
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
+ <li class="nav-header">Classification Examples</li>
<li><a
href="/users/mapreduce/classification/breiman-example.html">Breiman
example</a></li>
<li><a
href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups
example</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Clustering<b class="caret"></b></a>
- <ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
- <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
- <li class="divider"></li>
- <li class="nav-header">Commandline usage</li>
- <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
- <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
- <li class="divider"></li>
- <li class="nav-header">Post processing</li>
- <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
- <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+ <li class="nav-header">Clustering</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
+ <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
+ <li class="nav-header">Clustering Commandline usage</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
+ <li class="nav-header">Clustering Examples</li>
+ <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
+ <li class="nav-header">Cluster Post processing</li>
+ <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
+ <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
+ <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
+ <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
+ </ul>
+ </li>
+ <!-- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
<ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
- <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
- <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
- <li class="divider"></li>
- <li class="nav-header">Hadoop</li>
- <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
- <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
- <li class="nav-header">Spark</li>
- <li><a
href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
- </ul>
+
+ </ul> -->
</li>
</ul>
</div><!--/.nav-collapse -->
@@ -275,7 +277,7 @@
<td align="center"></td>
<td align="center"><a
href="https://mahout.apache.org/users/sparkbindings/ScalaSparkBindings.pdf">x</a></td>
<td align="center"><a
href="https://github.com/apache/mahout/tree/master/h2o">x</a></td>
-<td align="center"><a
href="https://github.com/tillrohrmann/mahout/tree/flink-bindings/flink"><em>in
development</em></a></td>
+<td align="center"><a
href="https://issues.apache.org/jira/browse/MAHOUT-1570"><em>in
development</em></a></td>
</tr>
<tr>
<td></td>
@@ -321,22 +323,22 @@
<td>User-Based Collaborative Filtering</td>
<td align="center">x</td>
<td align="center"></td>
-<td align="center"><a
href="https://github.com/apache/mahout/blob/master/spark/src/test/scala/org/apache/mahout/drivers/RowSimilarityDriverSuite.scala">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/algorithms/intro-cooccurrence-spark.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td>Item-Based Collaborative Filtering</td>
<td align="center">x</td>
-<td align="center"><a
href="https://mahout.apache.org/users/recommender/intro-itembased-hadoop.html">x</a></td>
-<td align="center"><a
href="https://mahout.apache.org/users/recommender/intro-cooccurrence-spark.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/recommender/intro-itembased-hadoop.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/algorithms/intro-cooccurrence-spark.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td>Matrix Factorization with ALS</td>
<td align="center">x</td>
-<td align="center"><a
href="https://mahout.apache.org/users/recommender/intro-als-hadoop.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/recommender/intro-als-hadoop.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
@@ -344,7 +346,7 @@
<tr>
<td>Matrix Factorization with ALS on Implicit Feedback</td>
<td align="center">x</td>
-<td align="center"><a
href="https://mahout.apache.org/users/recommender/intro-als-hadoop.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/recommender/intro-als-hadoop.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
@@ -375,7 +377,7 @@
</tr>
<tr>
<td>Logistic Regression - trained via SGD</td>
-<td align="center"><a
href="http://mahout.apache.org/users/classification/logistic-regression.html">x</a></td>
+<td align="center"><a
href="http://mahout.apache.org/users/mapreduce/classification/logistic-regression.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
@@ -384,22 +386,22 @@
<tr>
<td>Naive Bayes / Complementary Naive Bayes</td>
<td align="center"></td>
-<td align="center"><a
href="https://mahout.apache.org/users/classification/bayesian.html">x</a></td>
-<td align="center"><a
href="https://issues.apache.org/jira/browse/MAHOUT-1493">x</a></td>
-<td align="center"><em>in development</em></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/classification/bayesian.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/algorithms/spark-naive-bayes.html">x</a></td>
+<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td>Random Forest</td>
<td align="center"></td>
-<td align="center"><a
href="https://mahout.apache.org/users/classification/partial-implementation.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/classification/partial-implementation.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td>Hidden Markov Models</td>
-<td align="center"><a
href="https://mahout.apache.org/users/classification/hidden-markov-models.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/classification/hidden-markov-models.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
@@ -431,32 +433,32 @@
</tr>
<tr>
<td>Canopy Clustering</td>
-<td align="center"><a
href="https://mahout.apache.org/users/clustering/canopy-clustering.html"><em>deprecated</em></a></td>
-<td align="center"><a
href="https://mahout.apache.org/users/clustering/canopy-clustering.html"><em>deprecated</em></a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/clustering/canopy-clustering.html"><em>deprecated</em></a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/clustering/canopy-clustering.html"><em>deprecated</em></a></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td>k-Means Clustering</td>
-<td align="center"><a
href="https://mahout.apache.org/users/clustering/k-means-clustering.html">x</a></td>
-<td align="center"><a
href="https://mahout.apache.org/users/clustering/k-means-clustering.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/clustering/k-means-clustering.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/clustering/k-means-clustering.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td>Fuzzy k-Means</td>
-<td align="center"><a
href="https://mahout.apache.org/users/clustering/fuzzy-k-means.html">x</a></td>
-<td align="center"><a
href="https://mahout.apache.org/users/clustering/fuzzy-k-means.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/clustering/fuzzy-k-means.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/clustering/fuzzy-k-means.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td>Streaming k-Means</td>
-<td align="center"><a
href="https://mahout.apache.org/users/clustering/streaming-k-means.html">x</a></td>
-<td align="center"><a
href="https://mahout.apache.org/users/clustering/streaming-k-means.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/clustering/streaming-k-means.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/clustering/streaming-k-means.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
@@ -464,7 +466,7 @@
<tr>
<td>Spectral Clustering</td>
<td align="center"></td>
-<td align="center"><a
href="https://mahout.apache.org/users/clustering/spectral-clustering.html">x</a></td>
+<td align="center"><a
href="https://mahout.apache.org/users/mapreduce/clustering/spectral-clustering.html">x</a></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
Modified: websites/staging/mahout/trunk/content/users/basics/collections.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/basics/collections.html
(original)
+++ websites/staging/mahout/trunk/content/users/basics/collections.html Fri Mar
20 03:46:10 2015
@@ -155,61 +155,63 @@
<li><a
href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet
Allocation</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Spark<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout Environment<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="/users/sparkbindings/home.html">Scala &
Spark Bindings Overview</a></li>
- <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
- <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
- <li class="divider"></li>
+ <li><a
href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark
Shell</a></li>
+ <li class="divider"></li>
<li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
</ul>
</li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Classification<b class="caret"></b></a>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Algorithms<b class="caret"></b></a>
+ <ul class="dropdown-menu">
+ <li class="nav-header">Matrix Decomposition</li>
+ <li><a href="/users/algorithms/d-qr.html">Distributed
QR</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/algorithms/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
+ <li class="nav-header">Classification</li>
+ <li><a href="/users/algorithms/spark-naive-bayes.html">Spark
Naive Bayes</a></li>
+ </ul>
+ </li>
+ <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
<ul class="dropdown-menu">
+ <li class="nav-header">Classification</li>
<li><a
href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
<li><a
href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov
Models</a></li>
<li><a
href="/users/mapreduce/classification/logistic-regression.html">Logistic
Regression</a></li>
<li><a
href="/users/mapreduce/classification/partial-implementation.html">Random
Forest</a></li>
-
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
+ <li class="nav-header">Classification Examples</li>
<li><a
href="/users/mapreduce/classification/breiman-example.html">Breiman
example</a></li>
<li><a
href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups
example</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Clustering<b class="caret"></b></a>
- <ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
- <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
- <li class="divider"></li>
- <li class="nav-header">Commandline usage</li>
- <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
- <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
- <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
- <li class="divider"></li>
- <li class="nav-header">Examples</li>
- <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
- <li class="divider"></li>
- <li class="nav-header">Post processing</li>
- <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
- <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
- </ul></li>
- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+ <li class="nav-header">Clustering</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/streaming-k-means.html">Streaming
KMeans</a></li>
+ <li><a
href="/users/mapreduce/clustering/spectral-clustering.html">Spectral
Clustering</a></li>
+ <li class="nav-header">Clustering Commandline usage</li>
+ <li><a
href="/users/mapreduce/clustering/k-means-commandline.html">Options for
k-Means</a></li>
+ <li><a
href="/users/mapreduce/clustering/canopy-commandline.html">Options for
Canopy</a></li>
+ <li><a
href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for
Fuzzy k-Means</a></li>
+ <li class="nav-header">Clustering Examples</li>
+ <li><a
href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic
data</a></li>
+ <li class="nav-header">Cluster Post processing</li>
+ <li><a
href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper
tool</a></li>
+ <li><a
href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster
visualisation</a></li>
+ <li class="nav-header">Recommendations</li>
+ <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
+ <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
+ <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
+ <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
+ <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
+ </ul>
+ </li>
+ <!-- <li class="dropdown"> <a href="#" class="dropdown-toggle"
data-toggle="dropdown">Recommendations<b class="caret"></b></a>
<ul class="dropdown-menu">
- <li><a
href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First
Timer FAQ</a></li>
- <li><a
href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based
recommender <br/>in 5 minutes</a></li>
- <li><a
href="/users/mapreduce/recommender/matrix-factorization.html">Matrix
factorization-based<br/> recommenders</a></li>
- <li><a
href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
- <li class="divider"></li>
- <li class="nav-header">Hadoop</li>
- <li><a
href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to
item-based recommendations<br/> with Hadoop</a></li>
- <li><a
href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS
recommendations<br/> with Hadoop</a></li>
- <li class="nav-header">Spark</li>
- <li><a
href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to
cooccurrence-based<br/> recommendations with Spark</a></li>
- </ul>
+
+ </ul> -->
</li>
</ul>
</div><!--/.nav-collapse -->