http://git-wip-us.apache.org/repos/asf/predictionio-site/blob/b1f44c15/datacollection/analytics-tableau/index.html ---------------------------------------------------------------------- diff --git a/datacollection/analytics-tableau/index.html b/datacollection/analytics-tableau/index.html index 424b7ef..41122a4 100644 --- a/datacollection/analytics-tableau/index.html +++ b/datacollection/analytics-tableau/index.html @@ -1,4 +1,4 @@ -<!DOCTYPE html><html><head><title>Machine Learning Analytics with Tableau</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Machine Learning Analytics with Tableau"/><link rel="canonical" href="https://predictionio.apache.org/datacollection/analytics-tableau/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></sc ript><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.apache.org/"><img alt="Apache PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div cl ass="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning Analytics with Tableau</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="contai ner-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO® Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integratin g with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/batchpredict/"><span>Batch Predictions</span></a></li><li cl ass="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a clas s="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li><li class="level-2"><a class="final" href="/datacollection/plugin/"><span>Event Server Plugin</span></a></li></ul></li><li cla ss="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>PredictionIO® Official Templates</span></a><ul><li class="level-2"><a class="final" href="/templates/"><span>Intro</span></a></li><li class="level-2"><a class="expandible" href="#"><span>Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/recommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/dase/"><span>DA SE</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/evaluation/"><span>Evaluation Explained</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/reading-custom-events/"><span>Read Custom Events</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-data-prep/"><span>Customize Data Preparator</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-serving/"><span>Customize Serving</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/training-with-implicit-preference/"><span>Train with Implicit Preference</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/blacklist-items/"><span>Filter Recommended Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" h ref="/templates/recommendation/batch-evaluator/"><span>Batch Persistable Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust Score</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Similar Product</span></a><ul><li class="level-3"><a class="final" href="/templates/similarproduct/quic kstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple Events and Multiple Algorithms</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/return-item-properties/"><span>Returns Item Properties</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events for Users</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/recommended-user/"><span>Recommend Users</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Classification</span></a><ul><li class="level-3"><a class="final" href="/templates/classification/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/classification/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/classification/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/classification/add-algorithm/"><span>Use Alternative Algorithm</span></a></li><li class="level-3"><a class="final" href="/templates/classification/reading-custom-properties/"><span>Read Custom Properties</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li cl ass="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/resources/release/"><span>Release Cadence</span></a></li><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li class="l evel-1"><a class="expandible" href="#"><span>Apache Software Foundation</span></a><ul><li class="level-2"><a class="final" href="https://www.apache.org/"><span>Apache Homepage</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/licenses/"><span>License</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/sponsorship.html"><span>Sponsorship</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/thanks.html"><span>Thanks</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/security/"><span>Security</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with Tableau</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#prerequisites">Prerequisites</a> </li> <li> <a href=" #creating-hive-tables">Creating Hive Tables</a> </li> <li> <a href="#launch-spark-sql-s-thrift-jdbc-odbc-server">Launch Spark SQL's Thrift JDBC/ODBC Server</a> </li> <li> <a href="#performing-analysis-with-tableau">Performing Analysis with Tableau</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-tableau.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine Learning Analytics with Tableau</h1></div></div><div class="content"> <p>With Spark SQL, it is possible to connect Tableau to Apache PredictionIO Event Server for interactive analysis of event data.</p><h2 id='prerequisites' class='header-anchors'>Prerequisites</h2> <ul> <li>Tableau Desktop 8.3+ with a proper license key that supports Spark SQL;</li> <li>Spark ODBC Driver from Databricks (<a href="https://databrick s.com/spark/odbc-driver-download">https://databricks.com/spark/odbc-driver-download</a>);</li> <li>Apache Hadoop 2.4+</li> <li>Apache Hive 0.3.1+</li> </ul> <div class="alert-message info"><p>In this article, we will assume that you have a working HDFS, and that your environmental variable <code>HADOOP_HOME</code> has been properly set. This is essential for Apache Hive to function properly. In addition, <code>HADOOP_CONF_DIR</code> in <code>$PIO_HOME/conf/pio-env.sh</code> must also be properly set for the <code>pio export</code> command to write to HDFS instead of the local filesystem.</p></div> <p><h2 id='export-events-to-apache-parquet' class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO supports exporting your events to <a href="http://parquet.apache.org/">Apache Parquet</a>, a columnar storage format that allows you to query quickly.</p><p>Let's export the data we imported in <a href="/templates/recommendation/quickstart/#import-sample-data">Recommen dation Engine Template Quick Start</a>, and assume the App ID is 1.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span><span class="nv">$PIO_HOME</span>/bin/pio <span class="nb">export</span> --appid 1 --output /tmp/movies --format parquet +<!DOCTYPE html><html><head><title>Machine Learning Analytics with Tableau</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Machine Learning Analytics with Tableau"/><link rel="canonical" href="https://predictionio.apache.org/datacollection/analytics-tableau/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></sc ript><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.apache.org/"><img alt="Apache PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div cl ass="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning Analytics with Tableau</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="contai ner-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO® Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integratin g with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/batchpredict/"><span>Batch Predictions</span></a></li><li cl ass="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a clas s="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li><li class="level-2"><a class="final" href="/datacollection/plugin/"><span>Event Server Plugin</span></a></li></ul></li><li cla ss="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" hr ef="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>PredictionIO® Official Templates</span></a><ul><li class="level-2"><a class="final" href="/templates/"><span>Intro</span></a></li><li class="level-2"><a class="expandible" href="#"><span>Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/recommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/dase/"><span>DASE< /span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/evaluation/"><span>Evaluation Explained</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/reading-custom-events/"><span>Read Custom Events</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-data-prep/"><span>Customize Data Preparator</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-serving/"><span>Customize Serving</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/training-with-implicit-preference/"><span>Train with Implicit Preference</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/blacklist-items/"><span>Filter Recommended Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" href ="/templates/recommendation/batch-evaluator/"><span>Batch Persistable Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust Score</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Similar Product</span></a><ul><li class="level-3"><a class="final" href="/templates/similarproduct/quickst art/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple Events and Multiple Algorithms</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/return-item-properties/"><span>Returns Item Properties</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events for Users</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/recommended-user/"><span>Recommend Users</span></a></li></ul></li><li class="level-2"><a class="expandible" hr ef="#"><span>Classification</span></a><ul><li class="level-3"><a class="final" href="/templates/classification/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/classification/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/classification/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/classification/add-algorithm/"><span>Use Alternative Algorithm</span></a></li><li class="level-3"><a class="final" href="/templates/classification/reading-custom-properties/"><span>Read Custom Properties</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class ="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li cla ss="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/resources/release/"><span>Release Cadence</span></a></li><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li class="leve l-1"><a class="expandible" href="#"><span>Apache Software Foundation</span></a><ul><li class="level-2"><a class="final" href="https://www.apache.org/"><span>Apache Homepage</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/licenses/"><span>License</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/sponsorship.html"><span>Sponsorship</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/thanks.html"><span>Thanks</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/security/"><span>Security</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with Tableau</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#prerequisites">Prerequisites</a> </li> <li> <a href="#cr eating-hive-tables">Creating Hive Tables</a> </li> <li> <a href="#launch-spark-sql-s-thrift-jdbc-odbc-server">Launch Spark SQL's Thrift JDBC/ODBC Server</a> </li> <li> <a href="#performing-analysis-with-tableau">Performing Analysis with Tableau</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-tableau.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine Learning Analytics with Tableau</h1></div></div><div class="content"> <p>With Spark SQL, it is possible to connect Tableau to Apache PredictionIO Event Server for interactive analysis of event data.</p><h2 id='prerequisites' class='header-anchors'>Prerequisites</h2> <ul> <li>Tableau Desktop 8.3+ with a proper license key that supports Spark SQL;</li> <li>Spark ODBC Driver from Databricks (<a href="https://databricks.c om/spark/odbc-driver-download">https://databricks.com/spark/odbc-driver-download</a>);</li> <li>Apache Hadoop 2.4+</li> <li>Apache Hive 0.3.1+</li> </ul> <div class="alert-message info"><p>In this article, we will assume that you have a working HDFS, and that your environmental variable <code>HADOOP_HOME</code> has been properly set. This is essential for Apache Hive to function properly. In addition, <code>HADOOP_CONF_DIR</code> in <code>$PIO_HOME/conf/pio-env.sh</code> must also be properly set for the <code>pio export</code> command to write to HDFS instead of the local filesystem.</p></div> <p><h2 id='export-events-to-apache-parquet' class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO supports exporting your events to <a href="http://parquet.apache.org/">Apache Parquet</a>, a columnar storage format that allows you to query quickly.</p><p>Let's export the data we imported in <a href="/templates/recommendation/quickstart/#import-sample-data">Recommendat ion Engine Template Quick Start</a>, and assume the App ID is 1.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span><span class="nv">$PIO_HOME</span>/bin/pio <span class="nb">export</span> --appid 1 --output /tmp/movies --format parquet </pre></td></tr></tbody></table> </div> <p>After the command has finished successfully, you should see something similar to the following.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1 2 3 @@ -83,7 +83,7 @@ 10 rows selected <span class="o">(</span>0.515 seconds<span class="o">)</span> 0: jdbc:hive2://localhost:10000> </pre></td></tr></tbody></table> </div> <p>Now you are ready to use Tableau!</p><h2 id='performing-analysis-with-tableau' class='header-anchors'>Performing Analysis with Tableau</h2><p>Launch Tableau and Connect to Data. Click on <strong>Spark SQL (Beta)</strong> and enter Spark SQL's Thrift JDBC/ODBC Server information. Make sure to pick <strong>User Name</strong> as <strong>Authentication</strong>. Click <strong>Connect</strong>.</p><p><img alt="Tableau and Spark SQL" src="/images/datacollection/tableau-01-b5a23839.png"/></p><p>On the next page, pick <strong>default</strong> under <strong>Schema</strong>.</p><div class="alert-message info"><p>You may not see any choices when you click on Schema. Simply press Enter and Tableau will try to list all schemas.</p></div><p>Once you see a list of tables that includes <strong>events</strong>, click <strong>New Custom SQL</strong>, then enter the following.</p><div class="highlight sql"><table style="border-spacing: 0"><tbody><tr><td c lass="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="k">SELECT</span> <span class="n">event</span><span class="p">,</span> <span class="n">entityType</span><span class="p">,</span> <span class="n">entityId</span><span class="p">,</span> <span class="n">targetEntityType</span><span class="p">,</span> <span class="n">targetEntityId</span><span class="p">,</span> <span class="n">properties</span><span class="p">.</span><span class="n">rating</span> <span class="k">FROM</span> <span class="n">events</span> -</pre></td></tr></tbody></table> </div> <p>Click <strong>Update Now</strong>. You should see the following screen by now, indicating success in loading data. Using a custom SQL allows you to extract arbitrary fields from within properties.</p><p><img alt="Setting up Tableau" src="/images/datacollection/tableau-02-76e93443.png"/></p><p>Click <strong>Go to Worksheet</strong> and start analyzing. The following shows an example of breaking down different rating values.</p><p><img alt="Rating Values Breakdown" src="/images/datacollection/tableau-03-e389351e.png"/></p><p>The following shows a summary of interactions.</p><p><img alt="Interactions" src="/images/datacollection/tableau-04-c8c31bb7.png"/></p><p>Happy analyzing!</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//predictionio.apache.org/install/" target="blank">Dow nload</a></li><li><a href="//predictionio.apache.org/" target="blank">Docs</a></li><li><a href="//github.com/apache/predictionio" target="blank">GitHub</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionI O, Apache, the Apache feather logo, and the Apache PredictionIO project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p><p>All other marks mentioned may be trademarks or registered trademarks of their respective owners.</p></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/predictionio" data-style="mega" data-count-href="/apache/predictionio/stargazers" data-count-api="/repos/apache/predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-c ount-href="/apache/predictionio/network" data-count-api="/repos/apache/predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="https://twitter.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="https://www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){ +</pre></td></tr></tbody></table> </div> <p>Click <strong>Update Now</strong>. You should see the following screen by now, indicating success in loading data. Using a custom SQL allows you to extract arbitrary fields from within properties.</p><p><img alt="Setting up Tableau" src="/images/datacollection/tableau-02-76e93443.png"/></p><p>Click <strong>Go to Worksheet</strong> and start analyzing. The following shows an example of breaking down different rating values.</p><p><img alt="Rating Values Breakdown" src="/images/datacollection/tableau-03-e389351e.png"/></p><p>The following shows a summary of interactions.</p><p><img alt="Interactions" src="/images/datacollection/tableau-04-c8c31bb7.png"/></p><p>Happy analyzing!</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//predictionio.apache.org/install/" target="blank">Dow nload</a></li><li><a href="//predictionio.apache.org/" target="blank">Docs</a></li><li><a href="//github.com/apache/predictionio" target="blank">GitHub</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionI O, Apache, the Apache feather logo, and the Apache PredictionIO project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p><p>All other marks mentioned may be trademarks or registered trademarks of their respective owners.</p></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/predictionio" data-icon="octicon-star" data-show-count="true" aria-label="Star apache/predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/predictionio/fork" data-icon="octicon-repo-forked" data-show-count="true" aria-label="Fork apache/predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.i o/buttons.js"></script><a href="https://twitter.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="https://www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){ (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t); e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e); })(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
http://git-wip-us.apache.org/repos/asf/predictionio-site/blob/b1f44c15/datacollection/analytics-zeppelin/index.html ---------------------------------------------------------------------- diff --git a/datacollection/analytics-zeppelin/index.html b/datacollection/analytics-zeppelin/index.html index c1d6385..dd93f4b 100644 --- a/datacollection/analytics-zeppelin/index.html +++ b/datacollection/analytics-zeppelin/index.html @@ -1,4 +1,4 @@ -<!DOCTYPE html><html><head><title>Machine Learning Analytics with Zeppelin</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Machine Learning Analytics with Zeppelin"/><link rel="canonical" href="https://predictionio.apache.org/datacollection/analytics-zeppelin/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js">< /script><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.apache.org/"><img alt="Apache PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning Analytics with Zeppelin</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="co ntainer-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO® Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integr ating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/batchpredict/"><span>Batch Predictions</span></a></li><l i class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li><li class="level-2"><a class="final" href="/datacollection/plugin/"><span>Event Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="fi nal" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>PredictionIO® Official Templates</span></a><ul><li class="level-2"><a class="final" href="/templates/"><span>Intro</span></a></li><li class="level-2"><a class="expandible" href="#"><span>Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/recommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/dase/"><spa n>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/evaluation/"><span>Evaluation Explained</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/reading-custom-events/"><span>Read Custom Events</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-data-prep/"><span>Customize Data Preparator</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-serving/"><span>Customize Serving</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/training-with-implicit-preference/"><span>Train with Implicit Preference</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/blacklist-items/"><span>Filter Recommended Items by Blacklist in Query</span></a></li><li class="level-3"><a class="fina l" href="/templates/recommendation/batch-evaluator/"><span>Batch Persistable Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust Score</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Similar Product</span></a><ul><li class="level-3"><a class="final" href="/templates/similarproduct/ quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple Events and Multiple Algorithms</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/return-item-properties/"><span>Returns Item Properties</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events for Users</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/recommended-user/"><span>Recommend Users</span></a></li></ul></li><li class="level-2"><a class="expandi ble" href="#"><span>Classification</span></a><ul><li class="level-3"><a class="final" href="/templates/classification/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/classification/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/classification/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/classification/add-algorithm/"><span>Use Alternative Algorithm</span></a></li><li class="level-3"><a class="final" href="/templates/classification/reading-custom-properties/"><span>Read Custom Properties</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><l i class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li> <li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/resources/release/"><span>Release Cadence</span></a></li><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li clas s="level-1"><a class="expandible" href="#"><span>Apache Software Foundation</span></a><ul><li class="level-2"><a class="final" href="https://www.apache.org/"><span>Apache Homepage</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/licenses/"><span>License</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/sponsorship.html"><span>Sponsorship</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/thanks.html"><span>Thanks</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/security/"><span>Security</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with Zeppelin</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#prerequisites">Prerequisites</a> </li> <li> <a h ref="#building-zeppelin-for-apache-spark-1-2">Building Zeppelin for Apache Spark 1.2+</a> </li> <li> <a href="#preparing-zeppelin">Preparing Zeppelin</a> </li> <li> <a href="#performing-analysis-with-zeppelin">Performing Analysis with Zeppelin</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-zeppelin.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine Learning Analytics with Zeppelin</h1></div></div><div class="content"> <p><a href="http://zeppelin-project.org/">Apache Zeppelin</a> is an interactive computational environment built on Apache Spark like the IPython Notebook. With <a href="http://predictionio.apache.org">Apache PredictionIO</a> and <a href="https://spark.apache.org/sql/">Spark SQL</a>, you can easily analyze your collected events when you are developi ng or tuning your engine.</p><h2 id='prerequisites' class='header-anchors'>Prerequisites</h2><p>The following instructions assume that you have the command <code>sbt</code> accessible in your shell's search path. Alternatively, you can use the <code>sbt</code> command that comes with Apache PredictionIO at <code>$PIO_HOME/sbt/sbt</code>.</p> <p><h2 id='export-events-to-apache-parquet' class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO supports exporting your events to <a href="http://parquet.apache.org/">Apache Parquet</a>, a columnar storage format that allows you to query quickly.</p><p>Let's export the data we imported in <a href="/templates/recommendation/quickstart/#import-sample-data">Recommendation Engine Template Quick Start</a>, and assume the App ID is 1.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class ="gp">$ </span><span class="nv">$PIO_HOME</span>/bin/pio <span class="nb">export</span> --appid 1 --output /tmp/movies --format parquet +<!DOCTYPE html><html><head><title>Machine Learning Analytics with Zeppelin</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Machine Learning Analytics with Zeppelin"/><link rel="canonical" href="https://predictionio.apache.org/datacollection/analytics-zeppelin/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js">< /script><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.apache.org/"><img alt="Apache PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning Analytics with Zeppelin</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="co ntainer-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO® Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integr ating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/batchpredict/"><span>Batch Predictions</span></a></li><l i class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li><li class="level-2"><a class="final" href="/datacollection/plugin/"><span>Event Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final " href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>PredictionIO® Official Templates</span></a><ul><li class="level-2"><a class="final" href="/templates/"><span>Intro</span></a></li><li class="level-2"><a class="expandible" href="#"><span>Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/recommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/dase/"><span>D ASE</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/evaluation/"><span>Evaluation Explained</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/reading-custom-events/"><span>Read Custom Events</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-data-prep/"><span>Customize Data Preparator</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-serving/"><span>Customize Serving</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/training-with-implicit-preference/"><span>Train with Implicit Preference</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/blacklist-items/"><span>Filter Recommended Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/batch-evaluator/"><span>Batch Persistable Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust Score</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Similar Product</span></a><ul><li class="level-3"><a class="final" href="/templates/similarproduct/qui ckstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple Events and Multiple Algorithms</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/return-item-properties/"><span>Returns Item Properties</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events for Users</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/recommended-user/"><span>Recommend Users</span></a></li></ul></li><li class="level-2"><a class="expandible " href="#"><span>Classification</span></a><ul><li class="level-3"><a class="final" href="/templates/classification/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/classification/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/classification/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/classification/add-algorithm/"><span>Use Alternative Algorithm</span></a></li><li class="level-3"><a class="final" href="/templates/classification/reading-custom-properties/"><span>Read Custom Properties</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li c lass="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/resources/release/"><span>Release Cadence</span></a></li><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li class=" level-1"><a class="expandible" href="#"><span>Apache Software Foundation</span></a><ul><li class="level-2"><a class="final" href="https://www.apache.org/"><span>Apache Homepage</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/licenses/"><span>License</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/sponsorship.html"><span>Sponsorship</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/thanks.html"><span>Thanks</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/security/"><span>Security</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with Zeppelin</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#prerequisites">Prerequisites</a> </li> <li> <a href ="#building-zeppelin-for-apache-spark-1-2">Building Zeppelin for Apache Spark 1.2+</a> </li> <li> <a href="#preparing-zeppelin">Preparing Zeppelin</a> </li> <li> <a href="#performing-analysis-with-zeppelin">Performing Analysis with Zeppelin</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-zeppelin.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine Learning Analytics with Zeppelin</h1></div></div><div class="content"> <p><a href="http://zeppelin-project.org/">Apache Zeppelin</a> is an interactive computational environment built on Apache Spark like the IPython Notebook. With <a href="http://predictionio.apache.org">Apache PredictionIO</a> and <a href="https://spark.apache.org/sql/">Spark SQL</a>, you can easily analyze your collected events when you are developing or tuning your engine.</p><h2 id='prerequisites' class='header-anchors'>Prerequisites</h2><p>The following instructions assume that you have the command <code>sbt</code> accessible in your shell's search path. Alternatively, you can use the <code>sbt</code> command that comes with Apache PredictionIO at <code>$PIO_HOME/sbt/sbt</code>.</p> <p><h2 id='export-events-to-apache-parquet' class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO supports exporting your events to <a href="http://parquet.apache.org/">Apache Parquet</a>, a columnar storage format that allows you to query quickly.</p><p>Let's export the data we imported in <a href="/templates/recommendation/quickstart/#import-sample-data">Recommendation Engine Template Quick Start</a>, and assume the App ID is 1.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="g p">$ </span><span class="nv">$PIO_HOME</span>/bin/pio <span class="nb">export</span> --appid 1 --output /tmp/movies --format parquet </pre></td></tr></tbody></table> </div> <p>After the command has finished successfully, you should see something similar to the following.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1 2 3 @@ -39,7 +39,7 @@ SELECT event, COUNT<span class="o">(</span><span class="k">*</span><span class=" 3</pre></td><td class="code"><pre>%sql SELECT properties.rating AS r, COUNT<span class="o">(</span><span class="k">*</span><span class="o">)</span> AS c FROM events WHERE properties.rating IS NOT NULL GROUP BY properties.rating ORDER BY r -</pre></td></tr></tbody></table> </div> <p><img alt="Breakdown of Rating Values" src="/images/datacollection/zeppelin-04-d646c299.png"/></p><p>Happy analyzing!</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//predictionio.apache.org/install/" target="blank">Download</a></li><li><a href="//predictionio.apache.org/" target="blank">Docs</a></li><li><a href="//github.com/apache/predictionio" target="blank">GitHub</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.apache.org/community/contribute-code/" target=" blank">Contribute</a></li><li><a href="//github.com/apache/predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache feather logo, and the Apache PredictionIO project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p><p>All other marks mentioned may be trademarks or registered trademarks of their respective owners.</p></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div id="social-icons-wrapper"><a cl ass="github-button" href="https://github.com/apache/predictionio" data-style="mega" data-count-href="/apache/predictionio/stargazers" data-count-api="/repos/apache/predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/predictionio/network" data-count-api="/repos/apache/predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="https://twitter.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="https://www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.p ng"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){ +</pre></td></tr></tbody></table> </div> <p><img alt="Breakdown of Rating Values" src="/images/datacollection/zeppelin-04-d646c299.png"/></p><p>Happy analyzing!</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//predictionio.apache.org/install/" target="blank">Download</a></li><li><a href="//predictionio.apache.org/" target="blank">Docs</a></li><li><a href="//github.com/apache/predictionio" target="blank">GitHub</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.apache.org/community/contribute-code/" target=" blank">Contribute</a></li><li><a href="//github.com/apache/predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache feather logo, and the Apache PredictionIO project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p><p>All other marks mentioned may be trademarks or registered trademarks of their respective owners.</p></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div id="social-icons-wrapper"><a cl ass="github-button" href="https://github.com/apache/predictionio" data-icon="octicon-star" data-show-count="true" aria-label="Star apache/predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/predictionio/fork" data-icon="octicon-repo-forked" data-show-count="true" aria-label="Fork apache/predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="https://twitter.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="https://www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){ (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t); e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e); })(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
