http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/921d6861/customize/troubleshooting/index.html
----------------------------------------------------------------------
diff --git a/customize/troubleshooting/index.html 
b/customize/troubleshooting/index.html
new file mode 100644
index 0000000..65baf73
--- /dev/null
+++ b/customize/troubleshooting/index.html
@@ -0,0 +1,85 @@
+<!DOCTYPE html><html><head><title>Engine Development - 
Troubleshoot</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Engine Development - 
Troubleshoot"/><link rel="canonical" 
href="https://docs.prediction.io/customize/troubleshooting/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-3a3867f7.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src="//cdn.
 
mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.incubator.apache.org/";><img 
alt="PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div 
id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/incubator-predictionio/">OPEN 
SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md 
hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 co
 l-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Troubleshooting 
Engine Development</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO 
Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img 
id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div 
class="row"><di
 v id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li 
class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO 
(incubating) Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO 
(incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Started</span></a><ul><li class="level-2"><a 
class="final" href="/start/"><span>A Quick Intro</span></a></li><li 
class="level-2"><a class="final" href="/install/"><span>Installing Apache 
PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Integr
 ating with Your App</span></a><ul><li class="level-2"><a class="final" 
href="/appintegration/"><span>App Integration Overview</span></a></li><li 
class="level-2"><a class="expandible" href="/sdk/"><span>List of 
SDKs</span></a><ul><li class="level-3"><a class="final" 
href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/cli/#engine-commands"><span>Engine Command-line Interfac
 e</span></a></li><li class="level-2"><a class="final" 
href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li 
class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting 
Engine Parameters</span></a></li><li class="level-2"><a class="final" 
href="/deploy/enginevariants/"><span>Deploying Multiple Engine 
Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a 
class="final" href="/customize/"><span>Learning DASE</span></a></li><li 
class="level-2"><a class="final" href="/customize/dase/"><span>Implement 
DASE</span></a></li><li class="level-2"><a class="final active" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Collecting and Analyzing Data</span></a><u
 l><li class="level-2"><a class="final" href="/datacollection/"><span>Event 
Server Overview</span></a></li><li class="level-2"><a class="final" 
href="/cli/#event-server-commands"><span>Event Server Command-line 
Interface</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Ch
 oosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" 
href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li 
class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to 
Another Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metricchoose/"><span>Choosing Eva
 luation Metrics</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metricbuild/"><span>Building Evaluation 
Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>System Architecture</span></a><ul><li class="level-2"><a 
class="final" href="/system/"><span>Architecture Overview</span></a></li><li 
class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using 
Another Data Store</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a 
class="final" href="/demo/tapster/"><span>Comics Recommendation 
Demo</span></a></li><l
 i class="level-2"><a class="final" href="/demo/community/"><span>Community 
Contributed Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/textclassification/"><span>Text Classification Engine 
Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a 
class="final" href="/community/contribute-code/"><span>Contribute 
Code</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2"><a class="final" 
href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Help</span></
 a><ul><li class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a 
href="#">Customizing an Engine</a><span 
class="spacer">&gt;</span></li><li><span class="last">Troubleshooting Engine 
Development</span></li></ul></div><div id="page-title"><h1>Engine Development - 
Troubleshoot</h1></div></div><div id="tab
 le-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> 
<li> <a href="#stop-training-between-stages">Stop Training between Stages</a> 
</li> <li> <a href="#sanity-check">Sanity Check</a> </li> <li> <a 
href="#engine-status-page">Engine Status Page</a> </li> <li> <a 
href="#pio-shell">pio-shell</a> </li> </ul> </aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/customize/troubleshooting.html.md";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" 
class="hidden-sm hidden xs"><ul><li><a href="#">Customizing an Engine</a><span 
class="spacer">&gt;</span></li><li><span class="last">Troubleshooting Engine 
Development</span></li></ul></div><div id="page-title"><h1>Engine Development - 
Troubleshoot</h1></div></div><div class="content"><p>Apache PredictionIO 
(incubating) provides the following features to help y
 ou debug engines during development cycle.</p><h2 
id='stop-training-between-stages' class='header-anchors'>Stop Training between 
Stages</h2><p>By default <code>pio train</code> runs through the whole training 
process including <a href="/templates/recommendation/dase/">DataSource, 
Preparator and Algorithm</a>. To speed up the development and debug cycle, you 
can stop the process after each stage to verify it has completed 
correctly.</p><p>If you have modified DataSource and want to confirm the 
TrainingData is generated as expected, you can run <code>pio train</code> with 
<code>--stop-after-read</code> option:</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1</pre></td><td class="code"><pre>pio train 
--stop-after-read
+</pre></td></tr></tbody></table> </div> <p>This would stop the training 
process after the TrainingData is generated.</p><p>For example, if you are 
running <a href="/templates/recommendation/quickstart/">Recommendation 
Template</a>, you should see the the training process stops after the 
TrainingData is printed.</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="o">[</span>INFO] <span 
class="o">[</span>CoreWorkflow<span class="nv">$]</span> TrainingData:
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> ratings: <span class="o">[</span>1501] <span 
class="o">(</span>List<span class="o">(</span>Rating<span 
class="o">(</span>3,0,4.0<span class="o">)</span>, Rating<span 
class="o">(</span>3,1,4.0<span class="o">))</span>...<span class="o">)</span>
+...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> Training interrupted by 
org.apache.predictionio.workflow.StopAfterReadInterruption.
+</pre></td></tr></tbody></table> </div> <p>Similarly, you can stop the 
training after the Preparator phase by using --stop-after-prepare option and it 
would stop after PreparedData is generated:</p><div class="highlight 
shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" 
style="text-align: right"><pre class="lineno">1</pre></td><td 
class="code"><pre>pio train --stop-after-prepare
+</pre></td></tr></tbody></table> </div> <h2 id='sanity-check' 
class='header-anchors'>Sanity Check</h2><p>You can extend a trait 
<code>SanityCheck</code> and implement the method <code>sanityCheck()</code> 
with your error checking code. The <code>sanityCheck()</code> is called when 
the data is generated. This can be applied to <code>TrainingData</code>, 
<code>PreparedData</code> and the <code>Model</code> classes, which are outputs 
of DataSource&#39;s <code>readTraining()</code>, Preparator&#39;s 
<code>prepare()</code> and Algorithm&#39;s <code>train()</code> methods, 
respectively.</p><p>For example, one frequent error with the Recommendation 
Template is that the TrainingData is empty because the DataSource is not 
reading data correctly. You can add the check of empty data inside the 
<code>sanityCheck()</code> function. You can easily add other checking logic 
into the <code>sanityCheck()</code> function based on your own needs. Also, If 
you implement <code>toString()</code> method in
  your TrainingData. You can call <code>toString()</code> inside 
<code>sanityCheck()</code> to print out some data for visual 
checking.</p><p>For example, to print TrainingData to console and check if the 
<code>ratings</code> is empty, you can do the following:</p><div 
class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16</pre></td><td class="code"><pre><span class="k">import</span> <span 
class="nn">org.apache.predictionio.controller.SanityCheck</span> <span 
class="c1">// ADDED
+</span>
+<span class="k">class</span> <span class="nc">TrainingData</span><span 
class="o">(</span>
+  <span class="k">val</span> <span class="n">ratings</span><span 
class="k">:</span> <span class="kt">RDD</span><span class="o">[</span><span 
class="kt">Rating</span><span class="o">]</span>
+<span class="o">)</span> <span class="k">extends</span> <span 
class="nc">Serializable</span> <span class="k">with</span> <span 
class="nc">SanityCheck</span> <span class="o">{</span> <span class="c1">// 
EXTEND SanityCheck
+</span>  <span class="k">override</span> <span class="k">def</span> <span 
class="n">toString</span> <span class="k">=</span> <span class="o">{</span>
+    <span class="n">s</span><span class="s">"ratings: [${ratings.count()}] 
(${ratings.take(2).toList}...)"</span>
+  <span class="o">}</span>
+
+  <span class="c1">// IMPLEMENT sanityCheck()
+</span>  <span class="k">override</span> <span class="k">def</span> <span 
class="n">sanityCheck</span><span class="o">()</span><span class="k">:</span> 
<span class="kt">Unit</span> <span class="o">=</span> <span class="o">{</span>
+    <span class="n">println</span><span class="o">(</span><span 
class="n">toString</span><span class="o">())</span>
+    <span class="c1">// add your other checking here
+</span>    <span class="n">require</span><span class="o">(!</span><span 
class="n">ratings</span><span class="o">.</span><span 
class="n">take</span><span class="o">(</span><span class="mi">1</span><span 
class="o">).</span><span class="n">isEmpty</span><span class="o">,</span> <span 
class="n">s</span><span class="s">"ratings cannot be empty!"</span><span 
class="o">)</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>You may also use together with 
--stop-after-read flag to debug the DataSource:</p><div class="highlight 
shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" 
style="text-align: right"><pre class="lineno">1
+2</pre></td><td class="code"><pre>pio build
+pio train --stop-after-read
+</pre></td></tr></tbody></table> </div> <p>If your data is empty, you should 
see the following error thrown by the <code>sanityCheck()</code> 
function:</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9</pre></td><td class="code"><pre><span class="o">[</span>INFO] <span 
class="o">[</span>CoreWorkflow<span class="nv">$]</span> Performing data sanity 
check on training data.
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> org.template.recommendation.TrainingData supports data 
sanity check. Performing check.
+Exception <span class="k">in </span>thread <span class="s2">"main"</span> 
java.lang.IllegalArgumentException: requirement failed: ratings cannot be empty!
+    at scala.Predef<span class="nv">$.</span>require<span 
class="o">(</span>Predef.scala:233<span class="o">)</span>
+    at org.template.recommendation.TrainingData.sanityCheck<span 
class="o">(</span>DataSource.scala:73<span class="o">)</span>
+    at org.apache.predictionio.workflow.CoreWorkflow<span 
class="nv">$$</span>anonfun<span 
class="nv">$runTypelessContext$7</span>.apply<span 
class="o">(</span>Workflow.scala:474<span class="o">)</span>
+    at org.apache.predictionio.workflow.CoreWorkflow<span 
class="nv">$$</span>anonfun<span 
class="nv">$runTypelessContext$7</span>.apply<span 
class="o">(</span>Workflow.scala:465<span class="o">)</span>
+    at scala.collection.immutable.Map<span 
class="nv">$Map1</span>.foreach<span class="o">(</span>Map.scala:109<span 
class="o">)</span>
+  ...
+</pre></td></tr></tbody></table> </div> <p>You can specify the 
<code>--skip-sanity-check</code> option to turn off sanityCheck:</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td 
class="code"><pre>pio train --stop-after-read --skip-sanity-check
+</pre></td></tr></tbody></table> </div> <p>You should see the checking is 
skipped such as the following output:</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="o">[</span>INFO] <span 
class="o">[</span>CoreWorkflow<span class="nv">$]</span> Data sanity checking 
is off.
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> Data Source
+...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> Training interrupted by 
org.apache.predictionio.workflow.StopAfterReadInterruption.
+</pre></td></tr></tbody></table> </div> <h2 id='engine-status-page' 
class='header-anchors'>Engine Status Page</h2><p>After run <code>pio 
deploy</code>, you can access the engine status page by go to same URL and port 
of the deployed engine with your browser, which is &quot;<a 
href="http://localhost:8000";>http://localhost:8000</a>&quot; by default. In the 
engine status page, you can find the Engine information, and parameters of each 
DASE components. In particular, you can also see the &quot;Model&quot; trained 
by the algorithm based on how <code>toString()</code> method is implemented in 
the Algorithm&#39;s Model class.</p><h2 id='pio-shell' 
class='header-anchors'>pio-shell</h2><p>Apache PredictionIO (incubating) also 
provides <code>pio-shell</code> in which you can easily access Apache 
PredictionIO (incubating) API, Spark context and Spark API for quickly testing 
code or debugging purposes.</p><p>To bring up the shell, simply run:</p><div 
class="highlight shell"><table style="borde
 r-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ 
</span>pio-shell --with-spark
+</pre></td></tr></tbody></table> </div> <p>(<code>pio-shell</code> is 
available inside <code>bin/</code> directory of installed Apache PredictionIO 
(incubating) directory, you should be able to access it if you have added 
PredictionIO/bin into your environment variable <code>PATH</code>)</p><p>Note 
that the Spark context is available as variable <code>sc</code> inside the 
shell.</p><p>For example, to get the events of <code>MyApp1</code> using 
PEventStore API inside the pio-shell and collect them into an array 
<code>c</code>. run the following in the shell:</p><div class="highlight 
shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" 
style="text-align: right"><pre class="lineno">1
+2
+3</pre></td><td class="code"><pre><span class="gp">&gt; </span>import 
org.apache.predictionio.data.store.PEventStore
+<span class="gp">&gt; </span>val eventsRDD <span class="o">=</span> 
PEventStore.find<span class="o">(</span><span class="nv">appName</span><span 
class="o">=</span><span class="s2">"MyApp1"</span><span 
class="o">)(</span>sc<span class="o">)</span>
+<span class="gp">&gt; </span>val c <span class="o">=</span> 
eventsRDD.collect<span class="o">()</span>
+</pre></td></tr></tbody></table> </div> <p>Then you should see following 
returned in the shell:</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
+2
+3</pre></td><td class="code"><pre>...
+15/05/18 14:24:42 INFO DAGScheduler: Job 0 finished: collect at 
&lt;console&gt;:24, took 1.850779 s
+c: Array[org.apache.predictionio.data.storage.Event] <span class="o">=</span> 
Array<span class="o">(</span>Event<span class="o">(</span><span 
class="nv">id</span><span class="o">=</span>Some<span 
class="o">(</span>AaQUUBsFZxteRpDV_7fDGQAAAU1ZfRW1tX9LSWdZSb0<span 
class="o">)</span>,event<span class="o">=</span><span 
class="nv">$set</span>,eType<span class="o">=</span>item,eId<span 
class="o">=</span>i42,tType<span class="o">=</span>None,tId<span 
class="o">=</span>None,p<span class="o">=</span>DataMap<span 
class="o">(</span>Map<span class="o">(</span>categories -&gt; JArray<span 
class="o">(</span>List<span class="o">(</span>JString<span 
class="o">(</span>c2<span class="o">)</span>, JString<span 
class="o">(</span>c1<span class="o">)</span>, JString<span 
class="o">(</span>c6<span class="o">)</span>, JString<span 
class="o">(</span>c3<span class="o">)))))</span>,t<span 
class="o">=</span>2015-05-15T21:31:19.349Z,tags<span 
class="o">=</span>List<span class="o">()</span>,pKey<span class="o">=
 </span>None,ct<span class="o">=</span>2015-05-15T21:31:19.354Z<span 
class="o">)</span>, Event<span class="o">(</span><span 
class="nv">id</span><span class="o">=</span>Some<span 
class="o">(</span>DjvP3Dnci9F4CWmiqoLabQAAAU1ZfROaqdRYO-pZ_no<span 
class="o">)</span>,event<span class="o">=</span><span 
class="nv">$set</span>,eType<span class="o">=</span>user,eId<span 
class="o">=</span>u9,tType<span class="o">=</span>None,tId<span 
class="o">=</span>None,p<span class="o">=</span>DataMap<span 
class="o">(</span>Map<span class="o">())</span>,t<span 
class="o">=</span>2015-05-15T21:31:18.810Z,tags<span 
class="o">=</span>List<span class="o">()</span>,pKey<span 
class="o">=</span>None,ct<span class="o">=</span>2015-05-15T21:31:18.817Z<span 
class="o">)</span>, Event<span class="o">(</span><span 
class="nv">id</span><span class="o">=</span>Some<span 
class="o">(</span>DjvP3Dnci9F4CWmiqoLabQAAAU1ZfRq7tsanlemwmZQ<span 
class="o">)</span>,event<span class="o">=</span>view,eType<span 
class="o">=</span>user,
 eId<span class="o">=</span>u9,tType<span class="o">=</span>Some<span 
class="o">(</span>item<span class="o">)</span>,tId<span 
class="o">=</span>Some<span class="o">(</span>i25<span 
class="o">)</span>,p<span class="o">=</span>DataMap<span 
class="o">(</span>Map<span class="o">())</span>,t<span 
class="o">=</span>2015-05-15T21:31:20.635Z,tags<span 
class="o">=</span>List<span class="o">()</span>,pKey<span 
class="o">=</span>None,ct<span class="o">=</span>2015-05-15T21:31:20.639Z<span 
class="o">)</span>, Event<span class="o">(</span><span 
class="nv">id</span><span class="o">=</span>Some<span 
class="o">(</span>DjvP3Dnci9F4CWmiqoLabQAAAU1ZfR...
+</pre></td></tr></tbody></table> </div> </div></div></div></div><footer><div 
class="container"><div class="seperator"></div><div class="row"><div 
class="col-md-6 col-xs-6 footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a 
href="//docs.prediction.io/" target="blank">Docs</a></li><li><a 
href="//github.com/apache/incubator-predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:[email protected]"; 
target="blank">Subscribe to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
col-xs-6 footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.incubator.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/incubator-predictionio" ta
 rget="blank">Source Code</a></li><li><a 
href="//issues.apache.org/jira/browse/PIO" target="blank">Bug 
Tracker</a></li><li><a 
href="mailto:[email protected]"; 
target="blank">Subscribe to Development Mailing 
List</a></li></ul></div></div></div></div><div id="footer-bottom"><div 
class="container"><div class="row"><div class="col-md-12"><div 
id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/incubator-predictionio"; data-style="mega" 
data-count-href="/apache/incubator-predictionio/stargazers" 
data-count-api="/repos/apache/incubator-predictionio#stargazers_count" 
data-count-aria-label="# stargazers on GitHub" aria-label="Star 
apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" 
href="https://github.com/apache/incubator-predictionio/fork"; 
data-icon="octicon-git-branch" data-style="mega" data-count-href="/
 apache/incubator-predictionio/network" 
data-count-api="/repos/apache/incubator-predictionio#forks_count" 
data-count-aria-label="# forks on GitHub" aria-label="Fork 
apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" 
async="" defer="" src="https://buttons.github.io/buttons.js";></script><a 
href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on 
Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on 
Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script 
src="/javascripts/application-f819cf19.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/921d6861/customize/troubleshooting/index.html.gz
----------------------------------------------------------------------
diff --git a/customize/troubleshooting/index.html.gz 
b/customize/troubleshooting/index.html.gz
new file mode 100644
index 0000000..c1a2777
Binary files /dev/null and b/customize/troubleshooting/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/921d6861/datacollection/analytics-ipynb/index.html
----------------------------------------------------------------------
diff --git a/datacollection/analytics-ipynb/index.html 
b/datacollection/analytics-ipynb/index.html
new file mode 100644
index 0000000..3adfffa
--- /dev/null
+++ b/datacollection/analytics-ipynb/index.html
@@ -0,0 +1,87 @@
+<!DOCTYPE html><html><head><title>Machine Learning Analytics with IPython 
Notebook</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Machine Learning Analytics with 
IPython Notebook"/><link rel="canonical" 
href="https://docs.prediction.io/datacollection/analytics-ipynb/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-3a3867f7.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.m
 in.js"></script><script 
src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.incubator.apache.org/";><img 
alt="PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div 
id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/incubator-predictionio/">OPEN 
SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md 
hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row
 "><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning 
Analytics with IPython Notebook</h4></div><h4 class="hidden-sm 
hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 
hidden-md hidden-lg"><img id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="p
 age" class="container-fluid"><div class="row"><div id="left-menu-wrapper" 
class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a 
class="expandible" href="/"><span>Apache PredictionIO (incubating) 
Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO 
(incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Started</span></a><ul><li class="level-2"><a 
class="final" href="/start/"><span>A Quick Intro</span></a></li><li 
class="level-2"><a class="final" href="/install/"><span>Installing Apache 
PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="lev
 el-1"><a class="expandible" href="#"><span>Integrating with Your 
App</span></a><ul><li class="level-2"><a class="final" 
href="/appintegration/"><span>App Integration Overview</span></a></li><li 
class="level-2"><a class="expandible" href="/sdk/"><span>List of 
SDKs</span></a><ul><li class="level-3"><a class="final" 
href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" href="/cli/#en
 gine-commands"><span>Engine Command-line Interface</span></a></li><li 
class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring 
Engine</span></a></li><li class="level-2"><a class="final" 
href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li 
class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying 
Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/customize/"><span>Learning 
DASE</span></a></li><li class="level-2"><a class="final" 
href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>
 Collecting and Analyzing Data</span></a><ul><li class="level-2"><a 
class="final" href="/datacollection/"><span>Event Server 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/cli/#event-server-commands"><span>Event Server Command-line 
Interface</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li></ul></li><li class="level-
 1"><a class="expandible" href="#"><span>Choosing an 
Algorithm(s)</span></a><ul><li class="level-2"><a class="final" 
href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li 
class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to 
Another Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="final" href="/ev
 aluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li 
class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building 
Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>System Architecture</span></a><ul><li 
class="level-2"><a class="final" href="/system/"><span>Architecture 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/system/anotherdatastore/"><span>Using Another Data 
Store</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a 
class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a 
class="final" href="/demo/tapster/"><span>Co
 mics Recommendation Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/community/"><span>Community Contributed Demo</span></a></li><li 
class="level-2"><a class="final" href="/demo/textclassification/"><span>Text 
Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li 
class="level-2"><a class="final" 
href="/community/contribute-code/"><span>Contribute Code</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2"><a class="final" 
href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expand
 ible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a 
class="final" href="/resources/faq/"><span>FAQs</span></a></li><li 
class="level-2"><a class="final" 
href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Resources</span></a><ul><li 
class="level-2"><a class="final" href="/resources/intellij/"><span>Developing 
Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with IPython 
Notebook</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
page</h5><aside id="table-of-contents"><ul> <li> <a 
href="#prerequisites">Prerequisites</a> </li> <li> <a href="#prepari
 ng-ipython-notebook">Preparing IPython Notebook</a> </li> <li> <a 
href="#performing-analysis-with-spark-sql">Performing Analysis with Spark 
SQL</a> </li> </ul> </aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-ipynb.html.md.erb";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine 
Learning Analytics with IPython Notebook</h1></div></div><div 
class="content"><p><a href="http://ipython.org/notebook.html";>IPython 
Notebook</a> is a very powerful interactive computational environment, and with 
<a href="http://predictionio.incubator.apache.org";>Apache PredictionIO 
(incubating)</a>, <a 
href="http://spark.apache.org/docs/latest/api/python/";>PySpark</a> and <a 
href="https://spark.apache.org/sql/";>Spark SQL</a>, you can easily analyze your 
collected events when you are developing or tuning your 
 engine.</p><h2 id='prerequisites' 
class='header-anchors'>Prerequisites</h2><p>Before you begin, please make sure 
you have the latest stable IPython installed, and that the command 
<code>ipython</code> can be accessed from your shell&#39;s search 
path.</p><p><h2 id='export-events-to-apache-parquet' 
class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO 
supports exporting your events to <a 
href="http://parquet.incubator.apache.org/";>Apache Parquet</a>, a columnar 
storage format that allows you to query quickly.</p><p>Let&#39;s export the 
data we imported in <a 
href="/templates/recommendation/quickstart/#import-sample-data">Recommendation 
Engine Template Quick Start</a>, and assume the App ID is 1.</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td 
class="code"><pre><span class="gp">$ </span><span 
class="nv">$PIO_HOME</span>/bin/pio <span class="nb">export</s
 pan> --appid 1 --output /tmp/movies --format parquet
+</pre></td></tr></tbody></table> </div> <p>After the command has finished 
successfully, you should see something similar to the following.</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11</pre></td><td class="code"><pre>root
+ |-- creationTime: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- entityId: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- entityType: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- event: string <span class="o">(</span>nullable <span class="o">=</span> 
<span class="nb">true</span><span class="o">)</span>
+ |-- eventId: string <span class="o">(</span>nullable <span class="o">=</span> 
<span class="nb">true</span><span class="o">)</span>
+ |-- eventTime: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- properties: struct <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |    |-- rating: double <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- targetEntityId: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- targetEntityType: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+</pre></td></tr></tbody></table> </div></p><h2 id='preparing-ipython-notebook' 
class='header-anchors'>Preparing IPython Notebook</h2><p>Launch IPython 
Notebook with PySpark using the following command, with 
<code>$SPARK_HOME</code> replaced by the location of Apache Spark.</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td 
class="code"><pre><span class="gp">$ </span><span 
class="nv">PYSPARK_DRIVER_PYTHON</span><span class="o">=</span>ipython <span 
class="nv">PYSPARK_DRIVER_PYTHON_OPTS</span><span class="o">=</span><span 
class="s2">"notebook --pylab inline"</span> <span 
class="nv">$SPARK_HOME</span>/bin/pyspark
+</pre></td></tr></tbody></table> </div> <p>By default, you should be able to 
access your IPython Notebook via web browser at <a 
href="http://localhost:8888";>http://localhost:8888</a>.</p><p>Let&#39;s 
initialize our notebook for the following code in the first cell.</p><div 
class="highlight python"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7</pre></td><td class="code"><pre><span class="kn">import</span> <span 
class="nn">pandas</span> <span class="kn">as</span> <span class="nn">pd</span>
+<span class="k">def</span> <span class="nf">rows_to_df</span><span 
class="p">(</span><span class="n">rows</span><span class="p">):</span>
+    <span class="k">return</span> <span class="n">pd</span><span 
class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span 
class="nb">map</span><span class="p">(</span><span class="k">lambda</span> 
<span class="n">e</span><span class="p">:</span> <span class="n">e</span><span 
class="o">.</span><span class="n">asDict</span><span class="p">(),</span> <span 
class="n">rows</span><span class="p">))</span>
+<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span 
class="kn">import</span> <span class="n">SQLContext</span>
+<span class="n">sqlc</span> <span class="o">=</span> <span 
class="n">SQLContext</span><span class="p">(</span><span 
class="n">sc</span><span class="p">)</span>
+<span class="n">rdd</span> <span class="o">=</span> <span 
class="n">sqlc</span><span class="o">.</span><span 
class="n">parquetFile</span><span class="p">(</span><span 
class="s">"/tmp/movies"</span><span class="p">)</span>
+<span class="n">rdd</span><span class="o">.</span><span 
class="n">registerTempTable</span><span class="p">(</span><span 
class="s">"events"</span><span class="p">)</span>
+</pre></td></tr></tbody></table> </div> <p><img alt="Initialization for 
IPython Notebook" 
src="/images/datacollection/ipynb-01-004d791e.png"/></p><p><code>rows_to_df(rows)</code>
 will come in handy when we want to dump the results from Spark SQL using 
IPython Notebook&#39;s native table rendering.</p><h2 
id='performing-analysis-with-spark-sql' class='header-anchors'>Performing 
Analysis with Spark SQL</h2><p>If all steps above ran successfully, you should 
have a ready-to-use analytics environment by now. Let&#39;s try a few examples 
to see if everything is functional.</p><p>In the second cell, put in this piece 
of code and run it.</p><div class="highlight python"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
+2
+3
+4
+5</pre></td><td class="code"><pre><span class="n">summary</span> <span 
class="o">=</span> <span class="n">sqlc</span><span class="o">.</span><span 
class="n">sql</span><span class="p">(</span><span class="s">"SELECT "</span>
+                   <span class="s">"entityType, event, targetEntityType, 
COUNT(*) AS c "</span>
+                   <span class="s">"FROM events "</span>
+                   <span class="s">"GROUP BY entityType, event, 
targetEntityType"</span><span class="p">)</span><span class="o">.</span><span 
class="n">collect</span><span class="p">()</span>
+<span class="n">rows_to_df</span><span class="p">(</span><span 
class="n">summary</span><span class="p">)</span>
+</pre></td></tr></tbody></table> </div> <p>You should see the following 
screen.</p><p><img alt="Summary of Events" 
src="/images/datacollection/ipynb-02-cd8b12e4.png"/></p><p>We can also plot our 
data, in the next two cells.</p><div class="highlight python"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7</pre></td><td class="code"><pre><span class="kn">import</span> <span 
class="nn">matplotlib.pyplot</span> <span class="kn">as</span> <span 
class="nn">plt</span>
+<span class="n">count</span> <span class="o">=</span> <span 
class="nb">map</span><span class="p">(</span><span class="k">lambda</span> 
<span class="n">e</span><span class="p">:</span> <span class="n">e</span><span 
class="o">.</span><span class="n">c</span><span class="p">,</span> <span 
class="n">summary</span><span class="p">)</span>
+<span class="n">event</span> <span class="o">=</span> <span 
class="nb">map</span><span class="p">(</span><span class="k">lambda</span> 
<span class="n">e</span><span class="p">:</span> <span class="s">"</span><span 
class="si">%</span><span class="s">s (</span><span class="si">%</span><span 
class="s">d)"</span> <span class="o">%</span> <span class="p">(</span><span 
class="n">e</span><span class="o">.</span><span class="n">event</span><span 
class="p">,</span> <span class="n">e</span><span class="o">.</span><span 
class="n">c</span><span class="p">),</span> <span class="n">summary</span><span 
class="p">)</span>
+<span class="n">colors</span> <span class="o">=</span> <span 
class="p">[</span><span class="s">'gold'</span><span class="p">,</span> <span 
class="s">'lightskyblue'</span><span class="p">]</span>
+<span class="n">plt</span><span class="o">.</span><span 
class="n">pie</span><span class="p">(</span><span class="n">count</span><span 
class="p">,</span> <span class="n">labels</span><span class="o">=</span><span 
class="n">event</span><span class="p">,</span> <span 
class="n">colors</span><span class="o">=</span><span 
class="n">colors</span><span class="p">,</span> <span 
class="n">startangle</span><span class="o">=</span><span 
class="mi">90</span><span class="p">,</span> <span 
class="n">autopct</span><span class="o">=</span><span class="s">"</span><span 
class="si">%1.1</span><span class="s">f</span><span class="si">%%</span><span 
class="s">"</span><span class="p">)</span>
+<span class="n">plt</span><span class="o">.</span><span 
class="n">axis</span><span class="p">(</span><span 
class="s">'equal'</span><span class="p">)</span>
+<span class="n">plt</span><span class="o">.</span><span 
class="n">show</span><span class="p">()</span>
+</pre></td></tr></tbody></table> </div> <p><img alt="Summary in Pie Chart" 
src="/images/datacollection/ipynb-03-28f3aa3d.png"/></p><div class="highlight 
python"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" 
style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12</pre></td><td class="code"><pre><span class="n">ratings</span> <span 
class="o">=</span> <span class="n">sqlc</span><span class="o">.</span><span 
class="n">sql</span><span class="p">(</span><span class="s">"SELECT 
properties.rating AS r, COUNT(*) AS c "</span>
+                   <span class="s">"FROM events "</span>
+                   <span class="s">"WHERE properties.rating IS NOT NULL 
"</span>
+                   <span class="s">"GROUP BY properties.rating "</span>
+                   <span class="s">"ORDER BY r"</span><span 
class="p">)</span><span class="o">.</span><span class="n">collect</span><span 
class="p">()</span>
+<span class="n">count</span> <span class="o">=</span> <span 
class="nb">map</span><span class="p">(</span><span class="k">lambda</span> 
<span class="n">e</span><span class="p">:</span> <span class="n">e</span><span 
class="o">.</span><span class="n">c</span><span class="p">,</span> <span 
class="n">ratings</span><span class="p">)</span>
+<span class="n">rating</span> <span class="o">=</span> <span 
class="nb">map</span><span class="p">(</span><span class="k">lambda</span> 
<span class="n">e</span><span class="p">:</span> <span class="s">"</span><span 
class="si">%</span><span class="s">s (</span><span class="si">%</span><span 
class="s">d)"</span> <span class="o">%</span> <span class="p">(</span><span 
class="n">e</span><span class="o">.</span><span class="n">r</span><span 
class="p">,</span> <span class="n">e</span><span class="o">.</span><span 
class="n">c</span><span class="p">),</span> <span class="n">ratings</span><span 
class="p">)</span>
+<span class="n">colors</span> <span class="o">=</span> <span 
class="p">[</span><span class="s">'yellowgreen'</span><span class="p">,</span> 
<span class="s">'plum'</span><span class="p">,</span> <span 
class="s">'gold'</span><span class="p">,</span> <span 
class="s">'lightskyblue'</span><span class="p">,</span> <span 
class="s">'lightcoral'</span><span class="p">]</span>
+<span class="n">plt</span><span class="o">.</span><span 
class="n">pie</span><span class="p">(</span><span class="n">count</span><span 
class="p">,</span> <span class="n">labels</span><span class="o">=</span><span 
class="n">rating</span><span class="p">,</span> <span 
class="n">colors</span><span class="o">=</span><span 
class="n">colors</span><span class="p">,</span> <span 
class="n">startangle</span><span class="o">=</span><span 
class="mi">90</span><span class="p">,</span>
+        <span class="n">autopct</span><span class="o">=</span><span 
class="s">"</span><span class="si">%1.1</span><span class="s">f</span><span 
class="si">%%</span><span class="s">"</span><span class="p">)</span>
+<span class="n">plt</span><span class="o">.</span><span 
class="n">axis</span><span class="p">(</span><span 
class="s">'equal'</span><span class="p">)</span>
+<span class="n">plt</span><span class="o">.</span><span 
class="n">show</span><span class="p">()</span>
+</pre></td></tr></tbody></table> </div> <p><img alt="Breakdown of Ratings" 
src="/images/datacollection/ipynb-04-797d73f1.png"/></p><p>Happy 
analyzing!</p></div></div></div></div><footer><div class="container"><div 
class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a 
href="//docs.prediction.io/" target="blank">Docs</a></li><li><a 
href="//github.com/apache/incubator-predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:[email protected]"; 
target="blank">Subscribe to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
col-xs-6 footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.incubator.apache.org/communi
 ty/contribute-code/" target="blank">Contribute</a></li><li><a 
href="//github.com/apache/incubator-predictionio" target="blank">Source 
Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" 
target="blank">Bug Tracker</a></li><li><a 
href="mailto:[email protected]"; 
target="blank">Subscribe to Development Mailing 
List</a></li></ul></div></div></div></div><div id="footer-bottom"><div 
class="container"><div class="row"><div class="col-md-12"><div 
id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/incubator-predictionio"; data-style="mega" 
data-count-href="/apache/incubator-predictionio/stargazers" 
data-count-api="/repos/apache/incubator-predictionio#stargazers_count" 
data-count-aria-label="# stargazers on GitHub" aria-label="Star 
apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" 
href="https://
 github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" 
data-style="mega" data-count-href="/apache/incubator-predictionio/network" 
data-count-api="/repos/apache/incubator-predictionio#forks_count" 
data-count-aria-label="# forks on GitHub" aria-label="Fork 
apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" 
async="" defer="" src="https://buttons.github.io/buttons.js";></script><a 
href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on 
Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on 
Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script 
src="/javascripts/application-f819cf19.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/921d6861/datacollection/analytics-ipynb/index.html.gz
----------------------------------------------------------------------
diff --git a/datacollection/analytics-ipynb/index.html.gz 
b/datacollection/analytics-ipynb/index.html.gz
new file mode 100644
index 0000000..110f3d6
Binary files /dev/null and b/datacollection/analytics-ipynb/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/921d6861/datacollection/analytics-tableau/index.html
----------------------------------------------------------------------
diff --git a/datacollection/analytics-tableau/index.html 
b/datacollection/analytics-tableau/index.html
new file mode 100644
index 0000000..dd0adea
--- /dev/null
+++ b/datacollection/analytics-tableau/index.html
@@ -0,0 +1,91 @@
+<!DOCTYPE html><html><head><title>Machine Learning Analytics with 
Tableau</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Machine Learning Analytics with 
Tableau"/><link rel="canonical" 
href="https://docs.prediction.io/datacollection/analytics-tableau/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-3a3867f7.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script>
 <script 
src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.incubator.apache.org/";><img 
alt="PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div 
id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/incubator-predictionio/">OPEN 
SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md 
hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="co
 l-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning 
Analytics with Tableau</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO 
Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img 
id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-flu
 id"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav 
id="nav-main"><ul><li class="level-1"><a class="expandible" 
href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li 
class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO 
(incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Started</span></a><ul><li class="level-2"><a 
class="final" href="/start/"><span>A Quick Intro</span></a></li><li 
class="level-2"><a class="final" href="/install/"><span>Installing Apache 
PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandibl
 e" href="#"><span>Integrating with Your App</span></a><ul><li 
class="level-2"><a class="final" href="/appintegration/"><span>App Integration 
Overview</span></a></li><li class="level-2"><a class="expandible" 
href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a 
class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/cli/#engine-commands"><span>Engi
 ne Command-line Interface</span></a></li><li class="level-2"><a class="final" 
href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li 
class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting 
Engine Parameters</span></a></li><li class="level-2"><a class="final" 
href="/deploy/enginevariants/"><span>Deploying Multiple Engine 
Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a 
class="final" href="/customize/"><span>Learning DASE</span></a></li><li 
class="level-2"><a class="final" href="/customize/dase/"><span>Implement 
DASE</span></a></li><li class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Collecting and Analyzing 
 Data</span></a><ul><li class="level-2"><a class="final" 
href="/datacollection/"><span>Event Server Overview</span></a></li><li 
class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event 
Server Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" 
 href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a 
class="final" href="/algorithm/"><span>Built-in Algorithm 
Libraries</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/switch/"><span>Switching to Another 
Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metricchoose/"><
 span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a 
class="final" href="/evaluation/metricbuild/"><span>Building Evaluation 
Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>System Architecture</span></a><ul><li class="level-2"><a 
class="final" href="/system/"><span>Architecture Overview</span></a></li><li 
class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using 
Another Data Store</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a 
class="final" href="/demo/tapster/"><span>Comics Recommendation Demo<
 /span></a></li><li class="level-2"><a class="final" 
href="/demo/community/"><span>Community Contributed Demo</span></a></li><li 
class="level-2"><a class="final" href="/demo/textclassification/"><span>Text 
Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li 
class="level-2"><a class="final" 
href="/community/contribute-code/"><span>Contribute Code</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2"><a class="final" 
href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Gett
 ing Help</span></a><ul><li class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with 
Tableau</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
page</h5><aside id="table-of-contents"><ul> <li> <a 
href="#prerequisites">Prerequisites</a> </li> <li> <a 
href="#creating-hive-tables">Creating Hive Table
 s</a> </li> <li> <a href="#launch-spark-sql-s-thrift-jdbc-odbc-server">Launch 
Spark SQL's Thrift JDBC/ODBC Server</a> </li> <li> <a 
href="#performing-analysis-with-tableau">Performing Analysis with Tableau</a> 
</li> </ul> </aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-tableau.html.md.erb";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine 
Learning Analytics with Tableau</h1></div></div><div class="content"><p>With 
Spark SQL, it is possible to connect Tableau to Apache PredictionIO 
(incubating) Event Server for interactive analysis of event data.</p><h2 
id='prerequisites' class='header-anchors'>Prerequisites</h2> <ul> <li>Tableau 
Desktop 8.3+ with a proper license key that supports Spark SQL;</li> <li>Spark 
ODBC Driver from Databricks (<a href="https://databricks.com/spark/odbc-dri
 ver-download">https://databricks.com/spark/odbc-driver-download</a>);</li> 
<li>Apache Hadoop 2.4+</li> <li>Apache Hive 0.3.1+</li> </ul> <div 
class="alert-message info"><p>In this article, we will assume that you have a 
working HDFS, and that your environmental variable <code>HADOOP_HOME</code> has 
been properly set. This is essential for Apache Hive to function properly. In 
addition, <code>HADOOP_CONF_DIR</code> in 
<code>$PIO_HOME/conf/pio-env.sh</code> must also be properly set for the 
<code>pio export</code> command to write to HDFS instead of the local 
filesystem.</p></div><p><h2 id='export-events-to-apache-parquet' 
class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO 
supports exporting your events to <a 
href="http://parquet.incubator.apache.org/";>Apache Parquet</a>, a columnar 
storage format that allows you to query quickly.</p><p>Let&#39;s export the 
data we imported in <a 
href="/templates/recommendation/quickstart/#import-sample-data">Recommendation 
Engi
 ne Template Quick Start</a>, and assume the App ID is 1.</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td 
class="code"><pre><span class="gp">$ </span><span 
class="nv">$PIO_HOME</span>/bin/pio <span class="nb">export</span> --appid 1 
--output /tmp/movies --format parquet
+</pre></td></tr></tbody></table> </div> <p>After the command has finished 
successfully, you should see something similar to the following.</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11</pre></td><td class="code"><pre>root
+ |-- creationTime: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- entityId: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- entityType: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- event: string <span class="o">(</span>nullable <span class="o">=</span> 
<span class="nb">true</span><span class="o">)</span>
+ |-- eventId: string <span class="o">(</span>nullable <span class="o">=</span> 
<span class="nb">true</span><span class="o">)</span>
+ |-- eventTime: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- properties: struct <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |    |-- rating: double <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- targetEntityId: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- targetEntityType: string <span class="o">(</span>nullable <span 
class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+</pre></td></tr></tbody></table> </div></p><h2 id='creating-hive-tables' 
class='header-anchors'>Creating Hive Tables</h2><p>Before you can use Spark 
SQL&#39;s Thrift JDBC/ODBC Server, you will need to create the table schema in 
Hive first. Please make sure to replace <code>path_of_hive</code> with the real 
path.</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="gp">$ </span><span 
class="nb">cd </span>path_of_hive
+<span class="gp">$ </span>bin/hive
+<span class="gp">hive&gt; </span>CREATE EXTERNAL TABLE events <span 
class="o">(</span>event STRING, entityType STRING, entityId STRING, 
targetEntityType STRING, targetEntityId STRING, properties 
STRUCT&lt;rating:DOUBLE&gt;<span class="o">)</span> STORED AS parquet LOCATION 
<span class="s1">'/tmp/movies'</span>;
+<span class="gp">hive&gt; </span><span class="nb">exit</span>;
+</pre></td></tr></tbody></table> </div> <h2 
id='launch-spark-sql&#39;s-thrift-jdbc/odbc-server' 
class='header-anchors'>Launch Spark SQL&#39;s Thrift JDBC/ODBC 
Server</h2><p>Once you have created your Hive tables, create a Hive 
configuration in your Spark installation. If you have a custom 
<code>hive-site.xml</code>, simply copy or link it to 
<code>$SPARK_HOME/conf</code>. Otherwise, Hive would have created a local Derby 
database, and you will need to let Spark knows about it. Create 
<code>$SPARK_HOME/conf/hive-site.xml</code> from scratch with the following 
template.</p><div class="alert-message warning"><p>You must change 
<code>/opt/apache-hive-0.13.1-bin</code> below to a real Hive 
path.</p></div><div class="highlight xml"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6
+7
+8</pre></td><td class="code"><pre><span class="cp">&lt;?xml version="1.0" 
encoding="UTF-8" standalone="no"?&gt;</span>
+<span class="cp">&lt;?xml-stylesheet type="text/xsl" 
href="configuration.xsl"?&gt;</span>
+<span class="nt">&lt;configuration&gt;</span>
+  <span class="nt">&lt;property&gt;</span>
+    <span class="nt">&lt;name&gt;</span>javax.jdo.option.ConnectionURL<span 
class="nt">&lt;/name&gt;</span>
+    <span 
class="nt">&lt;value&gt;</span>jdbc:derby:;databaseName=/opt/apache-hive-0.13.1-bin/metastore_db;create=true<span
 class="nt">&lt;/value&gt;</span>
+  <span class="nt">&lt;/property&gt;</span>
+<span class="nt">&lt;/configuration&gt;</span>
+</pre></td></tr></tbody></table> </div> <p>Launch Spark SQL&#39;s Thift 
JDBC/ODBC Server by</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ 
</span><span class="nv">$SPARK_HOME</span>/sbin/start-thriftserver.sh
+</pre></td></tr></tbody></table> </div> <p>You can test the server using the 
included Beeline client.</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20</pre></td><td class="code"><pre><span class="gp">$ </span><span 
class="nv">$SPARK_HOME</span>/bin/beeline
+<span class="gp">beeline&gt; </span>!connect jdbc:hive2://localhost:10000
+<span class="o">(</span>Use empty username and password when prompted<span 
class="o">)</span>
+0: jdbc:hive2://localhost:10000&gt; <span class="k">select</span> <span 
class="k">*</span> from events limit 10;
++--------+-------------+-----------+-------------------+-----------------+------------------+
+| event  | entitytype  | entityid  | targetentitytype  | targetentityid  |    
properties    |
++--------+-------------+-----------+-------------------+-----------------+------------------+
+| buy    | user        | 3         | item              | 0               | 
<span class="o">{</span><span class="s2">"rating"</span>:null<span 
class="o">}</span>  |
+| buy    | user        | 3         | item              | 1               | 
<span class="o">{</span><span class="s2">"rating"</span>:null<span 
class="o">}</span>  |
+| rate   | user        | 3         | item              | 2               | 
<span class="o">{</span><span class="s2">"rating"</span>:1.0<span 
class="o">}</span>   |
+| buy    | user        | 3         | item              | 7               | 
<span class="o">{</span><span class="s2">"rating"</span>:null<span 
class="o">}</span>  |
+| buy    | user        | 3         | item              | 8               | 
<span class="o">{</span><span class="s2">"rating"</span>:null<span 
class="o">}</span>  |
+| buy    | user        | 3         | item              | 9               | 
<span class="o">{</span><span class="s2">"rating"</span>:null<span 
class="o">}</span>  |
+| rate   | user        | 3         | item              | 14              | 
<span class="o">{</span><span class="s2">"rating"</span>:1.0<span 
class="o">}</span>   |
+| buy    | user        | 3         | item              | 15              | 
<span class="o">{</span><span class="s2">"rating"</span>:null<span 
class="o">}</span>  |
+| buy    | user        | 3         | item              | 16              | 
<span class="o">{</span><span class="s2">"rating"</span>:null<span 
class="o">}</span>  |
+| buy    | user        | 3         | item              | 18              | 
<span class="o">{</span><span class="s2">"rating"</span>:null<span 
class="o">}</span>  |
++--------+-------------+-----------+-------------------+-----------------+------------------+
+10 rows selected <span class="o">(</span>0.515 seconds<span class="o">)</span>
+0: jdbc:hive2://localhost:10000&gt;
+</pre></td></tr></tbody></table> </div> <p>Now you are ready to use 
Tableau!</p><h2 id='performing-analysis-with-tableau' 
class='header-anchors'>Performing Analysis with Tableau</h2><p>Launch Tableau 
and Connect to Data. Click on <strong>Spark SQL (Beta)</strong> and enter Spark 
SQL&#39;s Thrift JDBC/ODBC Server information. Make sure to pick <strong>User 
Name</strong> as <strong>Authentication</strong>. Click 
<strong>Connect</strong>.</p><p><img alt="Tableau and Spark SQL" 
src="/images/datacollection/tableau-01-b5a23839.png"/></p><p>On the next page, 
pick <strong>default</strong> under <strong>Schema</strong>.</p><div 
class="alert-message info"><p>You may not see any choices when you click on 
Schema. Simply press Enter and Tableau will try to list all 
schemas.</p></div><p>Once you see a list of tables that includes 
<strong>events</strong>, click <strong>New Custom SQL</strong>, then enter the 
following.</p><div class="highlight sql"><table style="border-spacing: 
0"><tbody><tr><td c
 lass="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td 
class="code"><pre><span class="k">SELECT</span> <span 
class="n">event</span><span class="p">,</span> <span 
class="n">entityType</span><span class="p">,</span> <span 
class="n">entityId</span><span class="p">,</span> <span 
class="n">targetEntityType</span><span class="p">,</span> <span 
class="n">targetEntityId</span><span class="p">,</span> <span 
class="n">properties</span><span class="p">.</span><span 
class="n">rating</span> <span class="k">FROM</span> <span 
class="n">events</span>
+</pre></td></tr></tbody></table> </div> <p>Click <strong>Update Now</strong>. 
You should see the following screen by now, indicating success in loading data. 
Using a custom SQL allows you to extract arbitrary fields from within 
properties.</p><p><img alt="Setting up Tableau" 
src="/images/datacollection/tableau-02-76e93443.png"/></p><p>Click <strong>Go 
to Worksheet</strong> and start analyzing. The following shows an example of 
breaking down different rating values.</p><p><img alt="Rating Values Breakdown" 
src="/images/datacollection/tableau-03-e389351e.png"/></p><p>The following 
shows a summary of interactions.</p><p><img alt="Interactions" 
src="/images/datacollection/tableau-04-c8c31bb7.png"/></p><p>Happy 
analyzing!</p></div></div></div></div><footer><div class="container"><div 
class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//docs.prediction.io/install/" target="blank"
 >Download</a></li><li><a href="//docs.prediction.io/" 
 >target="blank">Docs</a></li><li><a 
 >href="//github.com/apache/incubator-predictionio" 
 >target="blank">GitHub</a></li><li><a 
 >href="mailto:[email protected]"; 
 >target="blank">Subscribe to User Mailing List</a></li><li><a 
 >href="//stackoverflow.com/questions/tagged/predictionio" 
 >target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
 >col-xs-6 footer-link-column"><div 
 >class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
 >href="//predictionio.incubator.apache.org/community/contribute-code/" 
 >target="blank">Contribute</a></li><li><a 
 >href="//github.com/apache/incubator-predictionio" target="blank">Source 
 >Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" 
 >target="blank">Bug Tracker</a></li><li><a 
 >href="mailto:[email protected]"; 
 >target="blank">Subscribe to Development Mailing 
 >List</a></li></ul></div></div></div></div><div id="footer-bottom"><div c
 lass="container"><div class="row"><div class="col-md-12"><div 
id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/incubator-predictionio"; data-style="mega" 
data-count-href="/apache/incubator-predictionio/stargazers" 
data-count-api="/repos/apache/incubator-predictionio#stargazers_count" 
data-count-aria-label="# stargazers on GitHub" aria-label="Star 
apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" 
href="https://github.com/apache/incubator-predictionio/fork"; 
data-icon="octicon-git-branch" data-style="mega" 
data-count-href="/apache/incubator-predictionio/network" 
data-count-api="/repos/apache/incubator-predictionio#forks_count" 
data-count-aria-label="# forks on GitHub" aria-label="Fork 
apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" 
async="" defer="" src="https://buttons.github.io/buttons.js";></script
 ><a href="//www.facebook.com/predictionio" target="blank"><img 
 >alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> 
 ><a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on 
 >Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
 ></div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script 
src="/javascripts/application-f819cf19.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/921d6861/datacollection/analytics-tableau/index.html.gz
----------------------------------------------------------------------
diff --git a/datacollection/analytics-tableau/index.html.gz 
b/datacollection/analytics-tableau/index.html.gz
new file mode 100644
index 0000000..28aea5e
Binary files /dev/null and b/datacollection/analytics-tableau/index.html.gz 
differ

Reply via email to