http://git-wip-us.apache.org/repos/asf/predictionio-site/blob/9d2bd407/demo/textclassification/index.html
----------------------------------------------------------------------
diff --git a/demo/textclassification/index.html 
b/demo/textclassification/index.html
index 0d1d516..58a604e 100644
--- a/demo/textclassification/index.html
+++ b/demo/textclassification/index.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><title>Text Classification Engine 
Tutorial</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Text Classification Engine 
Tutorial"/><link rel="canonical" 
href="https://predictionio.apache.org/demo/textclassification/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src=
 
"//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.apache.org/";><img alt="Apache 
PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div 
id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/incubator-predictionio/">OPEN 
SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md 
hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="co
 l-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Text 
Classification Engine Tutorial</h4></div><h4 class="hidden-sm 
hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 
hidden-md hidden-lg"><img id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid">
 <div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav 
id="nav-main"><ul><li class="level-1"><a class="expandible" 
href="/"><span>Apache PredictionIO® Documentation</span></a><ul><li 
class="level-2"><a class="final" href="/"><span>Welcome to Apache 
PredictionIO®</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Getting Started</span></a><ul><li 
class="level-2"><a class="final" href="/start/"><span>A Quick 
Intro</span></a></li><li class="level-2"><a class="final" 
href="/install/"><span>Installing Apache PredictionIO</span></a></li><li 
class="level-2"><a class="final" href="/start/download/"><span>Downloading an 
Engine Template</span></a></li><li class="level-2"><a class="final" 
href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li 
class="level-2"><a class="final" href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Integrating with Your
  App</span></a><ul><li class="level-2"><a class="final" 
href="/appintegration/"><span>App Integration Overview</span></a></li><li 
class="level-2"><a class="expandible" href="/sdk/"><span>List of 
SDKs</span></a><ul><li class="level-3"><a class="final" 
href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/batchpredict/"><span>Batch Predictions</span></a></li><li class="level-
 2"><a class="final" href="/deploy/monitoring/"><span>Monitoring 
Engine</span></a></li><li class="level-2"><a class="final" 
href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li 
class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying 
Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" 
href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Customizing an 
Engine</span></a><ul><li class="level-2"><a class="final" 
href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a 
class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandib
 le" href="#"><span>Collecting and Analyzing Data</span></a><ul><li 
class="level-2"><a class="final" href="/datacollection/"><span>Event Server 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/plugin/"><span>Event Server 
Plugin</span></a></li></ul></li><li class="level-1
 "><a class="expandible" href="#"><span>Choosing an 
Algorithm(s)</span></a><ul><li class="level-2"><a class="final" 
href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li 
class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to 
Another Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="final" href="/eva
 luation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li 
class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building 
Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>System Architecture</span></a><ul><li 
class="level-2"><a class="final" href="/system/"><span>Architecture 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/system/anotherdatastore/"><span>Using Another Data 
Store</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>PredictionIO® Official Templates</span></a><ul><li 
class="level-2"><a class="final" 
href="/templates/"><span>Intro</span></a></li><li class="level-2"><a 
class="expandible" href="#"><span>Recommendation</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/recommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/dase/"><span>DASE</span></
 a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/evaluation/"><span>Evaluation 
Explained</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/recommendation/reading-custom-events/"><span>Read Custom 
Events</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-data-prep/"><span>Customize Data 
Preparator</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-serving/"><span>Customize 
Serving</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/training-with-implicit-preference/"><span>Train 
with Implicit Preference</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/blacklist-items/"><span>Filter Recommended 
Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" 
href="/templ
 ates/recommendation/batch-evaluator/"><span>Batch Persistable 
Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a 
class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li
 class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train 
with Rate Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust 
Score</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Similar Product</span></a><ul><li class="level-3"><a 
class="final" href="/templates/similarproduct/quickstart/"><s
 pan>Quick Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple 
Events and Multiple Algorithms</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/return-item-properties/"><span>Returns Item 
Properties</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate 
Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events 
for Users</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/recommended-user/"><span>Recommend 
Users</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><
 span>Classification</span></a><ul><li class="level-3"><a class="final" 
href="/templates/classification/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/add-algorithm/"><span>Use Alternative 
Algorithm</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/reading-custom-properties/"><span>Read Custom 
Properties</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li class="level-
 1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li 
class="level-2"><a class="final" href="/demo/tapster/"><span>Comics 
Recommendation Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/community/"><span>Community Contributed Demo</span></a></li><li 
class="level-2"><a class="final active" 
href="/demo/textclassification/"><span>Text Classification Engine 
Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a 
class="final" href="/community/contribute-code/"><span>Contribute 
Code</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li clas
 s="level-2"><a class="final" href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/resources/release/"><span>Release 
Cadence</span></a></li><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li 
class="level
 -1"><a class="expandible" href="#"><span>Apache Software 
Foundation</span></a><ul><li class="level-2"><a class="final" 
href="https://www.apache.org/";><span>Apache Homepage</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/licenses/";><span>License</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/foundation/sponsorship.html";><span>Sponsorship</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/foundation/thanks.html";><span>Thanks</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/security/";><span>Security</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a 
href="#">Demo Tutorials</a><span class="spacer">&gt;</span></li><li><span 
class="last">Text Classification Engine Tutorial</span></li></ul></div><div 
id="page-title"><h1>Text Classifi
 cation Engine Tutorial</h1></div></div><div 
id="table-of-content-wrapper"><h5>On this page</h5><aside 
id="table-of-contents"><ul> <li> <a href="#introduction">Introduction</a> </li> 
<li> <a href="#prerequisites">Prerequisites</a> </li> <li> <a 
href="#engine-overview">Engine Overview</a> </li> <li> <a 
href="#quick-start">Quick Start</a> </li> </ul> </li> <li> <a 
href="#detailed-explanation-of-dase">Detailed Explanation of DASE</a> <ul> <li> 
<a href="#importing-data">Importing Data</a> </li> <li> <a 
href="#data-source-reading-event-data">Data Source: Reading Event Data</a> 
</li> <li> <a href="#preparator-data-processing-with-dase">Preparator : Data 
Processing With DASE</a> </li> <li> <a href="#algorithm-component">Algorithm 
Component</a> </li> <li> <a 
href="#serving-delivering-the-final-prediction">Serving: Delivering the Final 
Prediction</a> </li> <li> <a 
href="#evaluation-model-assessment-and-selection">Evaluation: Model Assessment 
and Selection</a> </li> <li> <a href="#engine-deplo
 yment">Engine Deployment</a> </li> </ul> </aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/demo/textclassification.html.md.erb";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" 
class="hidden-sm hidden xs"><ul><li><a href="#">Demo Tutorials</a><span 
class="spacer">&gt;</span></li><li><span class="last">Text Classification 
Engine Tutorial</span></li></ul></div><div id="page-title"><h1>Text 
Classification Engine Tutorial</h1></div></div><div class="content"> 
<p>(Updated for Text Classification Template version 3.1)</p><h2 
id='introduction' class='header-anchors'>Introduction</h2><p>In the real world, 
there are many applications that collect text as data. For example, spam 
detectors take email and header content to automatically determine what is or 
is not spam; applications can gague the general sentiment in a geographi
 cal area by analyzing Twitter data; and news articles can be automatically 
categorized based solely on the text content.There are a wide array of machine 
learning models you can use to create, or train, a predictive model to assign 
an incoming article, or query, to an existing category. Before you can use 
these techniques you must first transform the text data (in this case the set 
of news articles) into numeric vectors, or feature vectors, that can be used to 
train your model.</p><p>The purpose of this tutorial is to illustrate how you 
can go about doing this using PredictionIO&#39;s platform. The advantages of 
using this platform include: a dynamic engine that responds to queries in 
real-time; <a 
href="http://en.wikipedia.org/wiki/Separation_of_concerns";>separation of 
concerns</a>, which offers code re-use and maintainability, and distributed 
computing capabilities for scalability and efficiency. Moreover, it is easy to 
incorporate non-trivial data modeling tasks into the DASE arc
 hitecture allowing Data Scientists to focus on tasks related to modeling. This 
tutorial will exemplify some of these ideas by guiding you through 
PredictionIO&#39;s <a 
href="/gallery/template-gallery/#natural-language-processing">text 
classification template</a>.</p><h2 id='prerequisites' 
class='header-anchors'>Prerequisites</h2><p>Before getting started, please make 
sure that you have the latest version of Apache PredictionIO <a 
href="http://predictionio.apache.org/install/";>installed</a>. We emphasize here 
that this is an engine template written in <strong>Scala</strong> and can be 
more generally thought of as an SBT project containing all the necessary 
components.</p><p>You should also download the engine template named Text 
Classification Engine that accompanies this tutorial by cloning the template 
repository:</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1</pre></td><td class="cod
 e"><pre>git clone 
https://github.com/apache/incubator-predictionio-template-text-classifier.git 
&lt; Your new engine directory &gt;
+<!DOCTYPE html><html><head><title>Text Classification Engine 
Tutorial</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Text Classification Engine 
Tutorial"/><link rel="canonical" 
href="https://predictionio.apache.org/demo/textclassification/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src=
 
"//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.apache.org/";><img alt="Apache 
PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div 
id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img 
class="mobile-search-bar-toggler hidden-md hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="col-md-9 col
 -sm-11 col-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Text 
Classification Engine Tutorial</h4></div><h4 class="hidden-sm 
hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 
hidden-md hidden-lg"><img id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div 
class
 ="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li 
class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO® 
Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Getting 
Started</span></a><ul><li class="level-2"><a class="final" 
href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a 
class="final" href="/install/"><span>Installing Apache 
PredictionIO</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Integrating with Your App</span
 ></a><ul><li class="level-2"><a class="final" 
 >href="/appintegration/"><span>App Integration Overview</span></a></li><li 
 >class="level-2"><a class="expandible" href="/sdk/"><span>List of 
 >SDKs</span></a><ul><li class="level-3"><a class="final" 
 >href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
 >class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
 >SDK</span></a></li><li class="level-3"><a class="final" 
 >href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
 >class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
 >class="level-3"><a class="final" href="/sdk/community/"><span>Community 
 >Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
 >class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
 >class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
 >Service</span></a></li><li class="level-2"><a class="final" 
 >href="/batchpredict/"><span>Batch Predictions</span></a></li><li 
 >class="level-2"><a clas
 s="final" href="/deploy/monitoring/"><span>Monitoring 
Engine</span></a></li><li class="level-2"><a class="final" 
href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li 
class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying 
Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" 
href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Customizing an 
Engine</span></a><ul><li class="level-2"><a class="final" 
href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a 
class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="
 #"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a 
class="final" href="/datacollection/"><span>Event Server 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/plugin/"><span>Event Server 
Plugin</span></a></li></ul></li><li class="level-1"><a class
 ="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li 
class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm 
Libraries</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/switch/"><span>Switching to Another 
Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/me
 tricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li 
class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building 
Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>System Architecture</span></a><ul><li 
class="level-2"><a class="final" href="/system/"><span>Architecture 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/system/anotherdatastore/"><span>Using Another Data 
Store</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>PredictionIO® Official Templates</span></a><ul><li 
class="level-2"><a class="final" 
href="/templates/"><span>Intro</span></a></li><li class="level-2"><a 
class="expandible" href="#"><span>Recommendation</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/recommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/dase/"><span>DASE</span></a></li><li
  class="level-3"><a class="final" 
href="/templates/recommendation/evaluation/"><span>Evaluation 
Explained</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/recommendation/reading-custom-events/"><span>Read Custom 
Events</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-data-prep/"><span>Customize Data 
Preparator</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-serving/"><span>Customize 
Serving</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/training-with-implicit-preference/"><span>Train 
with Implicit Preference</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/blacklist-items/"><span>Filter Recommended 
Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" 
href="/templates/recom
 mendation/batch-evaluator/"><span>Batch Persistable 
Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a 
class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li
 class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train 
with Rate Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust 
Score</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Similar Product</span></a><ul><li class="level-3"><a 
class="final" href="/templates/similarproduct/quickstart/"><span>Quick 
 Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple 
Events and Multiple Algorithms</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/return-item-properties/"><span>Returns Item 
Properties</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate 
Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events 
for Users</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/recommended-user/"><span>Recommend 
Users</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Class
 ification</span></a><ul><li class="level-3"><a class="final" 
href="/templates/classification/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/add-algorithm/"><span>Use Alternative 
Algorithm</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/reading-custom-properties/"><span>Read Custom 
Properties</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li class="level-1"><a clas
 s="expandible" href="#"><span>Demo Tutorials</span></a><ul><li 
class="level-2"><a class="final" href="/demo/tapster/"><span>Comics 
Recommendation Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/community/"><span>Community Contributed Demo</span></a></li><li 
class="level-2"><a class="final active" 
href="/demo/textclassification/"><span>Text Classification Engine 
Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a 
class="final" href="/community/contribute-code/"><span>Contribute 
Code</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2
 "><a class="final" href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/resources/release/"><span>Release 
Cadence</span></a></li><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li 
class="level-1"><a cla
 ss="expandible" href="#"><span>Apache Software Foundation</span></a><ul><li 
class="level-2"><a class="final" href="https://www.apache.org/";><span>Apache 
Homepage</span></a></li><li class="level-2"><a class="final" 
href="https://www.apache.org/licenses/";><span>License</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/foundation/sponsorship.html";><span>Sponsorship</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/foundation/thanks.html";><span>Thanks</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/security/";><span>Security</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a 
href="#">Demo Tutorials</a><span class="spacer">&gt;</span></li><li><span 
class="last">Text Classification Engine Tutorial</span></li></ul></div><div 
id="page-title"><h1>Text Classification Eng
 ine Tutorial</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
page</h5><aside id="table-of-contents"><ul> <li> <a 
href="#introduction">Introduction</a> </li> <li> <a 
href="#prerequisites">Prerequisites</a> </li> <li> <a 
href="#engine-overview">Engine Overview</a> </li> <li> <a 
href="#quick-start">Quick Start</a> </li> </ul> </li> <li> <a 
href="#detailed-explanation-of-dase">Detailed Explanation of DASE</a> <ul> <li> 
<a href="#importing-data">Importing Data</a> </li> <li> <a 
href="#data-source-reading-event-data">Data Source: Reading Event Data</a> 
</li> <li> <a href="#preparator-data-processing-with-dase">Preparator : Data 
Processing With DASE</a> </li> <li> <a href="#algorithm-component">Algorithm 
Component</a> </li> <li> <a 
href="#serving-delivering-the-final-prediction">Serving: Delivering the Final 
Prediction</a> </li> <li> <a 
href="#evaluation-model-assessment-and-selection">Evaluation: Model Assessment 
and Selection</a> </li> <li> <a href="#engine-deployment">Eng
 ine Deployment</a> </li> </ul> </aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/demo/textclassification.html.md.erb";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" 
class="hidden-sm hidden xs"><ul><li><a href="#">Demo Tutorials</a><span 
class="spacer">&gt;</span></li><li><span class="last">Text Classification 
Engine Tutorial</span></li></ul></div><div id="page-title"><h1>Text 
Classification Engine Tutorial</h1></div></div><div class="content"> 
<p>(Updated for Text Classification Template version 3.1)</p><h2 
id='introduction' class='header-anchors'>Introduction</h2><p>In the real world, 
there are many applications that collect text as data. For example, spam 
detectors take email and header content to automatically determine what is or 
is not spam; applications can gague the general sentiment in a geographical 
area by analyzin
 g Twitter data; and news articles can be automatically categorized based 
solely on the text content.There are a wide array of machine learning models 
you can use to create, or train, a predictive model to assign an incoming 
article, or query, to an existing category. Before you can use these techniques 
you must first transform the text data (in this case the set of news articles) 
into numeric vectors, or feature vectors, that can be used to train your 
model.</p><p>The purpose of this tutorial is to illustrate how you can go about 
doing this using PredictionIO&#39;s platform. The advantages of using this 
platform include: a dynamic engine that responds to queries in real-time; <a 
href="http://en.wikipedia.org/wiki/Separation_of_concerns";>separation of 
concerns</a>, which offers code re-use and maintainability, and distributed 
computing capabilities for scalability and efficiency. Moreover, it is easy to 
incorporate non-trivial data modeling tasks into the DASE architecture allowing 
D
 ata Scientists to focus on tasks related to modeling. This tutorial will 
exemplify some of these ideas by guiding you through PredictionIO&#39;s <a 
href="/gallery/template-gallery/#natural-language-processing">text 
classification template</a>.</p><h2 id='prerequisites' 
class='header-anchors'>Prerequisites</h2><p>Before getting started, please make 
sure that you have the latest version of Apache PredictionIO <a 
href="http://predictionio.apache.org/install/";>installed</a>. We emphasize here 
that this is an engine template written in <strong>Scala</strong> and can be 
more generally thought of as an SBT project containing all the necessary 
components.</p><p>You should also download the engine template named Text 
Classification Engine that accompanies this tutorial by cloning the template 
repository:</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1</pre></td><td class="code"><pre>git clone ht
 tps://github.com/apache/predictionio-template-text-classifier.git &lt; Your 
new engine directory &gt;
 </pre></td></tr></tbody></table> </div> <h2 id='engine-overview' 
class='header-anchors'>Engine Overview</h2><p>The engine follows the DASE 
architecture which we briefly review here. As a user, you are tasked with 
collecting data for your web or application, and importing it into 
PredictionIO&#39;s Event Server. Once the data is in the server, it can be read 
and processed by the engine via the Data Source and Preparation components, 
respectively. The Algorithm component uses the processed, or prepared, data to 
train a set of predictive models. Once you have trained these models, you are 
ready to deploy your engine and respond to real-time queries via the Serving 
component which combines the results from different fitted models. The 
Evaluation component is used to compute an appropriate metric to test the 
performance of a fitted model, as well as aid in the tuning of model hyper 
parameters.</p><p>This engine template is meant to handle text classification 
which means you will be worki
 ng with text data. This means that a query, or newly observed documents, will 
be of the form:</p><p><code>{text : String}</code>.</p><p>In the running 
example, a query would be an incoming news article. Once the engine is deployed 
it can process the query, and then return a Predicted Result of the 
form</p><p><code>{category : String, confidence : Double}</code>.</p><p>Here 
category is the model&#39;s class assignment for this new text document (i.e. 
the best guess for this article&#39;s categorization), and confidence, a value 
between 0 and 1 representing your confidence in the category prediction (0 
meaning you have no confidence in the prediction). The Actual Result is of the 
form</p><p><code>{category : String}</code>.</p><p>This is used in the 
evaluation stage when estimating the performance of your predictive model (how 
well does the model predict categories). Please refer to the <a 
href="https://predictionio.apache.org/customize/";>following tutorial</a> for a 
more detailed exp
 lanation of how your engine will interact with your web application, as well 
as an in depth-overview of DASE.</p><h2 id='quick-start' 
class='header-anchors'>Quick Start</h2><p>This is a quick start guide in case 
you want to start using the engine right away. Sample email data for spam 
classification will be used. For more detailed information, read the subsequent 
sections.</p><h3 id='1.-create-a-new-application.' class='header-anchors'>1. 
Create a new application.</h3><p>After the application is created, you will be 
given an access key and application ID for the application.</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td 
class="code"><pre><span class="gp">$ </span>pio app new MyTextApp
 </pre></td></tr></tbody></table> </div> <h3 id='2.-import-the-tutorial-data.' 
class='header-anchors'>2. Import the tutorial data.</h3><p>There are three 
different data sets available, each giving a different use case for this 
engine. Please refer to the <strong>Data Source: Reading Event Data</strong> 
section to see how to appropriate modify the <code>DataSource</code> class for 
use with each respective data set. The default data set is an e-mail spam data 
set.</p><p>These data sets have already been processed and are ready for <a 
href="/datacollection/batchimport/">batch import</a>. Replace <code>***</code> 
with your actual application ID.</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
 2
@@ -33,7 +33,7 @@
 26</pre></td><td class="code"><pre><span class="o">{</span>
   <span class="s2">"id"</span>: <span class="s2">"default"</span>,
   <span class="s2">"description"</span>: <span class="s2">"Default 
settings"</span>,
-  <span class="s2">"engineFactory"</span>: <span 
class="s2">"org.template.textclassification.TextClassificationEngine"</span>,
+  <span class="s2">"engineFactory"</span>: <span 
class="s2">"org.example.textclassification.TextClassificationEngine"</span>,
   <span class="s2">"datasource"</span>: <span class="o">{</span>
     <span class="s2">"params"</span>: <span class="o">{</span>
       <span class="s2">"appName"</span>: <span class="s2">"MyTextApp"</span>
@@ -70,7 +70,7 @@
 </pre></td></tr></tbody></table> </div> <p>you should see following outputs 
returned by the engine:</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1</pre></td><td class="code"><pre><span 
class="o">{</span><span class="s2">"category"</span>:<span class="s2">"not 
spam"</span>,<span class="s2">"confidence"</span>:0.852619510921587<span 
class="o">}</span>
 </pre></td></tr></tbody></table> </div> <p>Try another query:</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td 
class="code"><pre><span class="gp">$ </span>curl -H <span 
class="s2">"Content-Type: application/json"</span> -d <span class="s1">'{ 
"text":"Earn extra cash!" }'</span> http://localhost:8000/queries.json
 </pre></td></tr></tbody></table> </div> <p>you should see following outputs 
returned by the engine:</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1</pre></td><td class="code"><pre><span 
class="o">{</span><span class="s2">"category"</span>:<span 
class="s2">"spam"</span>,<span 
class="s2">"confidence"</span>:0.5268770133242983<span class="o">}</span>
-</pre></td></tr></tbody></table> </div> <h3 
id='5.b.evaluate-your-training-model-and-tune-parameters.' 
class='header-anchors'>5.b.Evaluate your training model and tune 
parameters.</h3><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span>pio 
<span class="nb">eval </span>org.template.textclassification.AccuracyEvaluation 
org.template.textclassification.EngineParamsList
+</pre></td></tr></tbody></table> </div> <h3 
id='5.b.evaluate-your-training-model-and-tune-parameters.' 
class='header-anchors'>5.b.Evaluate your training model and tune 
parameters.</h3><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span>pio 
<span class="nb">eval </span>org.example.textclassification.AccuracyEvaluation 
org.example.textclassification.EngineParamsList
 </pre></td></tr></tbody></table> </div> <p><strong>Note:</strong> Training and 
evaluation stages are generally different stages of engine development. 
Evaluation is there to help you choose the best <a 
href="/evaluation/paramtuning/">algorithm parameters</a> to use for training an 
engine that is to be deployed as a web service.</p><p>Depending on your needs, 
in steps (5.x.) above, you can configure your Spark settings by typing a 
command of the form:</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ 
</span>pio <span class="nb">command </span>command_parameters -- --master url 
--driver-memory <span class="o">{</span>0<span class="o">}</span>G 
--executor-memory <span class="o">{</span>1<span class="o">}</span>G --conf 
spark.akka.framesize<span class="o">={</span>2<span class="o">}</span> 
--total_executor_cores <span class="o">{</span>3<span
  class="o">}</span>
 </pre></td></tr></tbody></table> </div> <p>Only the latter commands are listed 
as these are some of the more commonly modified values. See the <a 
href="https://spark.apache.org/docs/latest/spark-standalone.html";>Spark 
documentation</a> and the <a 
href="http://predictionio.apache.org/resources/faq/";>PredictionIO FAQ&#39;s</a> 
for more information.</p><p><strong>Note:</strong> We recommend you set your 
driver memory to <code>1G</code> or <code>2G</code> as the data size when 
dealing with text can be very large.</p><h1 id='detailed-explanation-of-dase' 
class='header-anchors'>Detailed Explanation of DASE</h1><h2 id='importing-data' 
class='header-anchors'>Importing Data</h2><p>In the quick start, email spam 
classification is used. This template can easily be modified for other types 
text classification.</p><p>If you want to import different sets of data, follow 
the Quick Start instructions to import data from different files. Make sure 
that the Data Source is modified accordingly to matc
 h the <code>event</code>, <code>entityType</code>, and <code>properties</code> 
fields set for the specific dataset. The following section explains this in 
more detail.</p><h2 id='data-source:-reading-event-data' 
class='header-anchors'>Data Source: Reading Event Data</h2><p>Now that the data 
has been imported into PredictionIO&#39;s Event Server, it needs to be read 
from storage to be used by the engine. This is precisely what the DataSource 
engine component is for, which is implemented in the template script 
<code>DataSource.scala</code>. The class <code>Observation</code> serves as a 
wrapper for storing the information about a news document needed to train a 
model. The attribute label refers to the label of the category a document 
belongs to, and text, stores the actual document content as a string. The class 
TrainingData is used to store an RDD of Observation objects along with the set 
of stop words.</p><p>The class <code>DataSourceParams</code> is used to specify 
the parameters n
 eeded to read and prepare the data for processing. This class is initialized 
with two parameters <code>appName</code> and <code>evalK</code>. The first 
parameter specifies your application name (i.e. MyTextApp), which is needed so 
that the DataSource component knows where to pull the event data from. The 
second parameter is used for model evaluation and specifies the number of folds 
to use in <a 
href="http://en.wikipedia.org/wiki/Cross-validation_%28statistics%29";>cross-validation</a>
 when estimating a model performance metric.</p><p>The final and most important 
ingredient is the DataSource class. This is initialized with its corresponding 
parameter class, and extends <code>PDataSource</code>. This 
<strong>must</strong> implement the method <code>readTraining</code> which 
returns an instance of type TrainingData. This method completely relies on the 
defined private methods readEventData and readStopWords. Both of these 
functions read data observations as Event instances, create an R
 DD containing these events and finally transforms the RDD of events into an 
object of the appropriate type as seen below:</p><div class="highlight 
scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" 
style="text-align: right"><pre class="lineno">1
 2
@@ -493,7 +493,7 @@
 23</pre></td><td class="code"><pre><span class="w">  </span><span 
class="p">{</span><span class="w">
   </span><span class="s2">"id"</span><span class="p">:</span><span class="w"> 
</span><span class="s2">"default"</span><span class="p">,</span><span class="w">
   </span><span class="s2">"description"</span><span class="p">:</span><span 
class="w"> </span><span class="s2">"Default settings"</span><span 
class="p">,</span><span class="w">
-  </span><span class="s2">"engineFactory"</span><span class="p">:</span><span 
class="w"> </span><span 
class="s2">"org.template.textclassification.TextClassificationEngine"</span><span
 class="p">,</span><span class="w">
+  </span><span class="s2">"engineFactory"</span><span class="p">:</span><span 
class="w"> </span><span 
class="s2">"org.example.textclassification.TextClassificationEngine"</span><span
 class="p">,</span><span class="w">
   </span><span class="s2">"datasource"</span><span class="p">:</span><span 
class="w"> </span><span class="p">{</span><span class="w">
     </span><span class="s2">"params"</span><span class="p">:</span><span 
class="w"> </span><span class="p">{</span><span class="w">
       </span><span class="s2">"appName"</span><span class="p">:</span><span 
class="w"> </span><span class="s2">"MyTextApp"</span><span class="w">
@@ -543,11 +543,11 @@
 </pre></td></tr></tbody></table> </div> <h2 id='engine-deployment' 
class='header-anchors'>Engine Deployment</h2><p>Once an engine is ready for 
deployment it can interact with your web application in real-time. This section 
will cover how to send and receive queries from your engine, gather more data, 
and re-training your model with the newly gathered data.</p><h3 
id='sending-queries' class='header-anchors'>Sending Queries</h3><p>Recall that 
one of the greatest advantages of using the PredictionIO platform is that once 
your engine is deployed, you can respond to queries in real-time. Recall that 
our queries are of the form</p><p><code>{&quot;text&quot; : 
&quot;...&quot;}</code>.</p><p>To actually send a query you can use our REST 
API by typing in the following shell command:</p><div class="highlight 
shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" 
style="text-align: right"><pre class="lineno">1</pre></td><td 
class="code"><pre>curl -H <span class="s2">"Content-
 Type: application/json"</span> -d <span class="s1">'{ "text":"I like speed and 
fast motorcycles." }'</span> http://localhost:8000/queries.json
 </pre></td></tr></tbody></table> </div> <p>There are a number of <a 
href="https://github.com/PredictionIO";>SDK&#39;s</a> you can use to send your 
queries and obtain a response. Recall that our predicted response is of the 
form</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1</pre></td><td class="code"><pre><span class="o">{</span><span 
class="s2">"category"</span> : <span class="s2">"class"</span>, <span 
class="s2">"confidence"</span> : 1.0<span class="o">}</span>
 </pre></td></tr></tbody></table> </div> <p>which is what you should see upon 
inputting the latter command for querying.</p><h3 
id='gathering-more-data-and-retraining-your-model' 
class='header-anchors'>Gathering More Data and Retraining Your Model</h3><p>The 
importing data section that is included in this tutorial uses a sample data set 
for illustration purposes, and uses the PredictionIO Python SDK to import the 
data. However, there are a variety of ways that you can <a 
href="//predictionio.apache.org/datacollection/eventapi/">import</a> your 
collected data (via REST or other SDKs).</p><p>As you continue to collect your 
data, it is quite easy to retrain your model once you actually import your data 
into the Event Server. You simply repeat the steps listed in the Quick Start 
guide. We re-list them here again:</p><p><strong>1.</strong> Build your 
engine.</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre cla
 ss="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span>pio 
build
-</pre></td></tr></tbody></table> </div> <p><strong>2.a.</strong> Evaluate your 
training model and tune parameters.</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ 
</span>pio <span class="nb">eval 
</span>org.template.textclassification.AccuracyEvaluation 
org.template.textclassification.EngineParamsList
+</pre></td></tr></tbody></table> </div> <p><strong>2.a.</strong> Evaluate your 
training model and tune parameters.</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ 
</span>pio <span class="nb">eval 
</span>org.example.textclassification.AccuracyEvaluation 
org.example.textclassification.EngineParamsList
 </pre></td></tr></tbody></table> </div> <p><strong>2.b.</strong> Train your 
model and deploy.</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
 2</pre></td><td class="code"><pre><span class="gp">$ </span>pio train
 <span class="gp">$ </span>pio deploy
-</pre></td></tr></tbody></table> </div> </div></div></div></div><footer><div 
class="container"><div class="seperator"></div><div class="row"><div 
class="col-md-6 footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//predictionio.apache.org/install/" 
target="blank">Download</a></li><li><a href="//predictionio.apache.org/" 
target="blank">Docs</a></li><li><a 
href="//github.com/apache/incubator-predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:[email protected]"; target="blank">Subscribe 
to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a>
 </li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug 
Tracker</a></li><li><a href="mailto:[email protected]"; 
target="blank">Subscribe to Development Mailing 
List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 
footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache 
feather logo, and the Apache PredictionIO project logo are either registered 
trademarks or trademarks of The Apache Software Foundation in the United States 
and other countries.</p><p>All other marks mentioned may be trademarks or 
registered trademarks of their respective owners.</p></div></div></div><div 
id="footer-bottom"><div class="container"><div class="row"><div 
class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/incubator-predictionio"; data-style="mega" 
data-coun
 t-href="/apache/incubator-predictionio/stargazers" 
data-count-api="/repos/apache/incubator-predictionio#stargazers_count" 
data-count-aria-label="# stargazers on GitHub" aria-label="Star 
apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" 
href="https://github.com/apache/incubator-predictionio/fork"; 
data-icon="octicon-git-branch" data-style="mega" 
data-count-href="/apache/incubator-predictionio/network" 
data-count-api="/repos/apache/incubator-predictionio#forks_count" 
data-count-aria-label="# forks on GitHub" aria-label="Fork 
apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" 
async="" defer="" src="https://buttons.github.io/buttons.js";></script><a 
href="https://twitter.com/predictionio"; target="blank"><img alt="PredictionIO 
on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="https://www.facebook.com/predictionio"; target="blank"><img 
alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div><
 
/div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+</pre></td></tr></tbody></table> </div> </div></div></div></div><footer><div 
class="container"><div class="seperator"></div><div class="row"><div 
class="col-md-6 footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//predictionio.apache.org/install/" 
target="blank">Download</a></li><li><a href="//predictionio.apache.org/" 
target="blank">Docs</a></li><li><a href="//github.com/apache/predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:[email protected]"; target="blank">Subscribe 
to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/predictionio" target="blank">Source 
Code</a></li><li><a href="//
 issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a 
href="mailto:[email protected]"; target="blank">Subscribe to 
Development Mailing List</a></li></ul></div></div></div><div class="row"><div 
class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, 
Apache, the Apache feather logo, and the Apache PredictionIO project logo are 
either registered trademarks or trademarks of The Apache Software Foundation in 
the United States and other countries.</p><p>All other marks mentioned may be 
trademarks or registered trademarks of their respective 
owners.</p></div></div></div><div id="footer-bottom"><div 
class="container"><div class="row"><div class="col-md-12"><div 
id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/predictionio"; data-style="mega" 
data-count-href="/apache/predictionio/s
 targazers" data-count-api="/repos/apache/predictionio#stargazers_count" 
data-count-aria-label="# stargazers on GitHub" aria-label="Star 
apache/predictionio on GitHub">Star</a> <a class="github-button" 
href="https://github.com/apache/predictionio/fork"; 
data-icon="octicon-git-branch" data-style="mega" 
data-count-href="/apache/predictionio/network" 
data-count-api="/repos/apache/predictionio#forks_count" 
data-count-aria-label="# forks on GitHub" aria-label="Fork apache/predictionio 
on GitHub">Fork</a> <script id="github-bjs" async="" defer="" 
src="https://buttons.github.io/buttons.js";></script><a 
href="https://twitter.com/predictionio"; target="blank"><img alt="PredictionIO 
on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="https://www.facebook.com/predictionio"; target="blank"><img 
alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||fu
 nction(){
 (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
 
e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
 })(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');

Reply via email to