http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/452034c1/datacollection/analytics-ipynb/index.html ---------------------------------------------------------------------- diff --git a/datacollection/analytics-ipynb/index.html b/datacollection/analytics-ipynb/index.html index 9d84b61..84b9c43 100644 --- a/datacollection/analytics-ipynb/index.html +++ b/datacollection/analytics-ipynb/index.html @@ -1,4 +1,4 @@ -<!DOCTYPE html><html><head><title>Machine Learning Analytics with IPython Notebook</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Machine Learning Analytics with IPython Notebook"/><link rel="canonical" href="https://predictionio.incubator.apache.org/datacollection/analytics-ipynb/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3. 7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a><span>â¢</span></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id=" search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning Analytics with IPython Notebook</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></d iv></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO⢠(incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span ></a></li></ul></li><li class="level-1"><a class="expandible" >href="#"><span>Integrating with Your App</span></a><ul><li class="level-2"><a >class="final" href="/appintegration/"><span>App Integration >Overview</span></a></li><li class="level-2"><a class="expandible" >href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a >class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li >class="level-3"><a class="final" href="/sdk/php/"><span>PHP >SDK</span></a></li><li class="level-3"><a class="final" >href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a >class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li >class="level-3"><a class="final" href="/sdk/community/"><span>Community >Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a >class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li >class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web >Service</span></a></li><li class="level- 2"><a class="final" href="/batchpredict/"><span>Batch Predictions</span></a></li><li class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current /#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li><li class="level-2"><a class="final" hr ef="/datacollection/plugin/"><span>Event Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdas hboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>PredictionIO Official Templates</span></a><ul><li class="level-2"><a class="final" href="/templates/"><span>Intro</span></a></li><li class="level-2"><a class="expandible" href="#"><span>Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/recommendation/quickstart/"><span>Quick Start</span></a></li ><li class="level-3"><a class="final" >href="/templates/recommendation/dase/"><span>DASE</span></a></li><li >class="level-3"><a class="final" >href="/templates/recommendation/evaluation/"><span>Evaluation >Explained</span></a></li><li class="level-3"><a class="final" >href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li >class="level-3"><a class="final" >href="/templates/recommendation/reading-custom-events/"><span>Read Custom >Events</span></a></li><li class="level-3"><a class="final" >href="/templates/recommendation/customize-data-prep/"><span>Customize Data >Preparator</span></a></li><li class="level-3"><a class="final" >href="/templates/recommendation/customize-serving/"><span>Customize >Serving</span></a></li><li class="level-3"><a class="final" >href="/templates/recommendation/training-with-implicit-preference/"><span>Train > with Implicit Preference</span></a></li><li class="level-3"><a class="final" >href="/templates/recommendation/blacklist-items/"><span>Filter Recommen ded Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/batch-evaluator/"><span>Batch Persistable Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust Score</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Similar Product</s pan></a><ul><li class="level-3"><a class="final" href="/templates/similarproduct/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple Events and Multiple Algorithms</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/return-item-properties/"><span>Returns Item Properties</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events for Users</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/recommended-user/"><sp an>Recommend Users</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Classification</span></a><ul><li class="level-3"><a class="final" href="/templates/classification/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/classification/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/classification/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/classification/add-algorithm/"><span>Use Alternative Algorithm</span></a></li><li class="level-3"><a class="final" href="/templates/classification/reading-custom-properties/"><span>Read Custom Properties</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/sub mit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/resources/release/"><span>Release Cadence</span></a></li><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="fi nal" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Apache Software Foundation</span></a><ul><li class="level-2"><a class="final" href="https://www.apache.org/"><span>Apache Homepage</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/licenses/"><span>License</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/sponsorship.html"><span>Sponsorship</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/thanks.html"><span>Thanks</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/security/"><span>Security</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with IPython Notebook</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id ="table-of-contents"><ul> <li> <a href="#prerequisites">Prerequisites</a> </li> <li> <a href="#preparing-ipython-notebook">Preparing IPython Notebook</a> </li> <li> <a href="#performing-analysis-with-spark-sql">Performing Analysis with Spark SQL</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-ipynb.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine Learning Analytics with IPython Notebook</h1></div></div><div class="content"> <p><a href="http://ipython.org/notebook.html">IPython Notebook</a> is a very powerful interactive computational environment, and with <a href="http://predictionio.incubator.apache.org">Apache PredictionIO (incubating)</a>, <a href="http://spark.apache.org/docs/latest/api/python/">PySpark</a> and <a href="https://spark.apache.org/sq l/">Spark SQL</a>, you can easily analyze your collected events when you are developing or tuning your engine.</p><h2 id='prerequisites' class='header-anchors'>Prerequisites</h2><p>Before you begin, please make sure you have the latest stable IPython installed, and that the command <code>ipython</code> can be accessed from your shell's search path.</p> <p><h2 id='export-events-to-apache-parquet' class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO supports exporting your events to <a href="http://parquet.incubator.apache.org/">Apache Parquet</a>, a columnar storage format that allows you to query quickly.</p><p>Let's export the data we imported in <a href="/templates/recommendation/quickstart/#import-sample-data">Recommendation Engine Template Quick Start</a>, and assume the App ID is 1.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="c ode"><pre><span class="gp">$ </span><span class="nv">$PIO_HOME</span>/bin/pio <span class="nb">export</span> --appid 1 --output /tmp/movies --format parquet +<!DOCTYPE html><html><head><title>Machine Learning Analytics with IPython Notebook</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Machine Learning Analytics with IPython Notebook"/><link rel="canonical" href="https://predictionio.apache.org/datacollection/analytics-ipynb/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5s hiv.min.js"></script><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.apache.org/"><img alt="Apache PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a><span>â¢</span></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-ro w"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning Analytics with IPython Notebook</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></di v></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO⢠Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="ex pandible" href="#"><span>Integrating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/batchpredict/"><span>Batc h Predictions</span></a></li><li class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li><li class="level-2"><a class="final" href="/datacollection/plugin/"><span>Event Server Plug in</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li>< li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>PredictionIO Official Templates</span></a><ul><li class="level-2"><a class="final" href="/templates/"><span>Intro</span></a></li><li class="level-2"><a class="expandible" href="#"><span>Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/recommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templat es/recommendation/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/evaluation/"><span>Evaluation Explained</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/reading-custom-events/"><span>Read Custom Events</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-data-prep/"><span>Customize Data Preparator</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-serving/"><span>Customize Serving</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/training-with-implicit-preference/"><span>Train with Implicit Preference</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/blacklist-items/"><span>Filter Recommended Items by Blacklist in Query</span></a></li><li c lass="level-3"><a class="final" href="/templates/recommendation/batch-evaluator/"><span>Batch Persistable Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust Score</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Similar Product</span></a><ul><li class="level-3"><a class="final" hre f="/templates/similarproduct/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple Events and Multiple Algorithms</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/return-item-properties/"><span>Returns Item Properties</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events for Users</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/recommended-user/"><span>Recommend Users</span></a></li></ul></li><li clas s="level-2"><a class="expandible" href="#"><span>Classification</span></a><ul><li class="level-3"><a class="final" href="/templates/classification/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/classification/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/classification/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/classification/add-algorithm/"><span>Use Alternative Algorithm</span></a></li><li class="level-3"><a class="final" href="/templates/classification/reading-custom-properties/"><span>Read Custom Properties</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Templat e</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contrib ute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/resources/release/"><span>Release Cadence</span></a></li><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</spa n></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Apache Software Foundation</span></a><ul><li class="level-2"><a class="final" href="https://www.apache.org/"><span>Apache Homepage</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/licenses/"><span>License</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/sponsorship.html"><span>Sponsorship</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/thanks.html"><span>Thanks</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/security/"><span>Security</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with IPython Notebook</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#prerequisit es">Prerequisites</a> </li> <li> <a href="#preparing-ipython-notebook">Preparing IPython Notebook</a> </li> <li> <a href="#performing-analysis-with-spark-sql">Performing Analysis with Spark SQL</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-ipynb.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine Learning Analytics with IPython Notebook</h1></div></div><div class="content"> <p><a href="http://ipython.org/notebook.html">IPython Notebook</a> is a very powerful interactive computational environment, and with <a href="http://predictionio.apache.org">Apache PredictionIO</a>, <a href="http://spark.apache.org/docs/latest/api/python/">PySpark</a> and <a href="https://spark.apache.org/sql/">Spark SQL</a>, you can easily analyze your collected events when you ar e developing or tuning your engine.</p><h2 id='prerequisites' class='header-anchors'>Prerequisites</h2><p>Before you begin, please make sure you have the latest stable IPython installed, and that the command <code>ipython</code> can be accessed from your shell's search path.</p> <p><h2 id='export-events-to-apache-parquet' class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO supports exporting your events to <a href="http://parquet.apache.org/">Apache Parquet</a>, a columnar storage format that allows you to query quickly.</p><p>Let's export the data we imported in <a href="/templates/recommendation/quickstart/#import-sample-data">Recommendation Engine Template Quick Start</a>, and assume the App ID is 1.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span><span class="nv">$PIO_HOME</span>/bin/pio <span c lass="nb">export</span> --appid 1 --output /tmp/movies --format parquet </pre></td></tr></tbody></table> </div> <p>After the command has finished successfully, you should see something similar to the following.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1 2 3 @@ -24,7 +24,7 @@ </pre></td></tr></tbody></table> </div> <p>If you see a error appearing in the console like this:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1 2</pre></td><td class="code"><pre><span class="o">[</span>E 10:07:53.900 NotebookApp] Support <span class="k">for </span>specifying --pylab on the <span class="nb">command </span>line has been removed. <span class="o">[</span>E 10:07:53.901 NotebookApp] Please use <span class="sb">`</span>%pylab inline<span class="sb">`</span> or <span class="sb">`</span>%matplotlib inline<span class="sb">`</span> <span class="k">in </span>the notebook itself. -</pre></td></tr></tbody></table> </div> <p>Then you can use the following command. </p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="nv">PYSPARK_DRIVER_PYTHON</span><span class="o">=</span>ipython <span class="nv">PYSPARK_DRIVER_PYTHON_OPTS</span><span class="o">=</span><span class="s2">"notebook --</span><span class="sb">`</span>%pylab inline<span class="sb">`</span><span class="s2">"</span> <span class="nv">$SPARK_HOME</span>/bin/pyspark +</pre></td></tr></tbody></table> </div> <p>Then you can use the following command.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="nv">PYSPARK_DRIVER_PYTHON</span><span class="o">=</span>ipython <span class="nv">PYSPARK_DRIVER_PYTHON_OPTS</span><span class="o">=</span><span class="s2">"notebook --</span><span class="sb">`</span>%pylab inline<span class="sb">`</span><span class="s2">"</span> <span class="nv">$SPARK_HOME</span>/bin/pyspark </pre></td></tr></tbody></table> </div> <p>By default, you should be able to access your IPython Notebook via web browser at <a href="http://localhost:8888">http://localhost:8888</a>.</p><p>Let's initialize our notebook for the following code in the first cell.</p><div class="highlight python"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1 2 3 @@ -83,7 +83,7 @@ <span class="n">autopct</span><span class="o">=</span><span class="s">"</span><span class="si">%1.1</span><span class="s">f</span><span class="si">%%</span><span class="s">"</span><span class="p">)</span> <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s">'equal'</span><span class="p">)</span> <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> -</pre></td></tr></tbody></table> </div> <p><img alt="Breakdown of Ratings" src="/images/datacollection/ipynb-04-797d73f1.png"/></p><p>Happy analyzing!</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//predictionio.incubator.apache.org/install/" target="blank">Download</a></li><li><a href="//predictionio.incubator.apache.org/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache .org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache feather logo, and the Apache PredictionIO project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p><p>All other marks mentioned may be trademarks or registered trademarks of their respective owners.</p></div></div><div class="row"><div class="col-md-12 footer-link-column"><a class="pull-right" href="http://incubator.apache.org/projects/predictionio.html"><img alt="Apache Incubator" src="/images/logo s/apache_incubator-6954bd16.png"/></a><span>Apache PredictionIO is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.</span></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/><span>â¢</span></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/star gazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="https://twitter.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="https://www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(fu nction(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){ +</pre></td></tr></tbody></table> </div> <p><img alt="Breakdown of Ratings" src="/images/datacollection/ipynb-04-797d73f1.png"/></p><p>Happy analyzing!</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//predictionio.apache.org/install/" target="blank">Download</a></li><li><a href="//predictionio.apache.org/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.apache.org/community/contribute-code/" target= "blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache feather logo, and the Apache PredictionIO project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p><p>All other marks mentioned may be trademarks or registered trademarks of their respective owners.</p></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/><span>â¢</span></div><div id="social-icons-wr apper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="https://twitter.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="https://www.facebook.com/predictioni o" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){ (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t); e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e); })(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/452034c1/datacollection/analytics-ipynb/index.html.gz ---------------------------------------------------------------------- diff --git a/datacollection/analytics-ipynb/index.html.gz b/datacollection/analytics-ipynb/index.html.gz index 96e0a2d..c421a80 100644 Binary files a/datacollection/analytics-ipynb/index.html.gz and b/datacollection/analytics-ipynb/index.html.gz differ
