http://git-wip-us.apache.org/repos/asf/predictionio-site/blob/765e178c/machinelearning/dimensionalityreduction/index.html
----------------------------------------------------------------------
diff --git a/machinelearning/dimensionalityreduction/index.html 
b/machinelearning/dimensionalityreduction/index.html
index 7ada946..ae8925f 100644
--- a/machinelearning/dimensionalityreduction/index.html
+++ b/machinelearning/dimensionalityreduction/index.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><title>Dimensionality Reduction With 
PredictionIO</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Dimensionality Reduction With 
PredictionIO"/><link rel="canonical" 
href="https://predictionio.apache.org/machinelearning/dimensionalityreduction/"/><link
 href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv
 .min.js"></script><script 
src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.apache.org/";><img alt="Apache 
PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div 
id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img 
class="mobile-search-bar-toggler hidden-md hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class=
 "row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md 
hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO 
Docs</p><h4>Dimensionality Reduction With PredictionIO</h4></div><h4 
class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 
col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="pag
 e" class="container-fluid"><div class="row"><div id="left-menu-wrapper" 
class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a 
class="expandible" href="/"><span>Apache PredictionIO® 
Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Getting 
Started</span></a><ul><li class="level-2"><a class="final" 
href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a 
class="final" href="/install/"><span>Installing Apache 
PredictionIO</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#">
 <span>Integrating with Your App</span></a><ul><li class="level-2"><a 
class="final" href="/appintegration/"><span>App Integration 
Overview</span></a></li><li class="level-2"><a class="expandible" 
href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a 
class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/batchpredict/"><span>Batch Predictions</span
 ></a></li><li class="level-2"><a class="final" 
 >href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li 
 >class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting 
 >Engine Parameters</span></a></li><li class="level-2"><a class="final" 
 >href="/deploy/enginevariants/"><span>Deploying Multiple Engine 
 >Variants</span></a></li><li class="level-2"><a class="final" 
 >href="/deploy/plugin/"><span>Engine Server 
 >Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" 
 >href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a 
 >class="final" href="/customize/"><span>Learning DASE</span></a></li><li 
 >class="level-2"><a class="final" href="/customize/dase/"><span>Implement 
 >DASE</span></a></li><li class="level-2"><a class="final" 
 >href="/customize/troubleshooting/"><span>Troubleshooting Engine 
 >Development</span></a></li><li class="level-2"><a class="final" 
 >href="/api/current/#package"><span>Engine Scala 
 >APIs</span></a></li></ul></li><li class="
 level-1"><a class="expandible" href="#"><span>Collecting and Analyzing 
Data</span></a><ul><li class="level-2"><a class="final" 
href="/datacollection/"><span>Event Server Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/plugin/"><span>Event Server Plugin</span></a></li><
 /ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an 
Algorithm</span></a><ul><li class="level-2"><a class="final" 
href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li 
class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to 
Another Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a cla
 ss="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation 
Metrics</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metricbuild/"><span>Building Evaluation 
Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>System Architecture</span></a><ul><li class="level-2"><a 
class="final" href="/system/"><span>Architecture Overview</span></a></li><li 
class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using 
Another Data Store</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>PredictionIO® Official 
Templates</span></a><ul><li class="level-2"><a class="final" 
href="/templates/"><span>Intro</span></a></li><li class="level-2"><a 
class="expandible" href="#"><span>Recommendation</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/recommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/dase/
 "><span>DASE</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/evaluation/"><span>Evaluation 
Explained</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/recommendation/reading-custom-events/"><span>Read Custom 
Events</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-data-prep/"><span>Customize Data 
Preparator</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-serving/"><span>Customize 
Serving</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/training-with-implicit-preference/"><span>Train 
with Implicit Preference</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/blacklist-items/"><span>Filter Recommended 
Items by Blacklist in Query</span></a></li><li class="level-3"><a class
 ="final" href="/templates/recommendation/batch-evaluator/"><span>Batch 
Persistable Evaluator</span></a></li></ul></li><li class="level-2"><a 
class="expandible" href="#"><span>E-Commerce Recommendation</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li
 class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train 
with Rate Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust 
Score</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Similar Product</span></a><ul><li class="level-3"><a 
class="final" href="/templates/similarpr
 oduct/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple 
Events and Multiple Algorithms</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/return-item-properties/"><span>Returns Item 
Properties</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate 
Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events 
for Users</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/recommended-user/"><span>Recommend 
Users</span></a></li></ul></li><li class="level-2"><a class="e
 xpandible" href="#"><span>Classification</span></a><ul><li class="level-3"><a 
class="final" href="/templates/classification/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/add-algorithm/"><span>Use Alternative 
Algorithm</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/reading-custom-properties/"><span>Read Custom 
Properties</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul><
 /li><li class="level-1"><a class="expandible" href="#"><span>Demo 
Tutorials</span></a><ul><li class="level-2"><a class="final" 
href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li 
class="level-2"><a class="final" href="/demo/community/"><span>Community 
Contributed Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/textclassification/"><span>Text Classification Engine 
Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a 
class="final" href="/community/contribute-code/"><span>Contribute 
Code</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a
 ></li><li class="level-2"><a class="final" 
 >href="/community/projects/"><span>Community 
 >Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
 >href="#"><span>Getting Help</span></a><ul><li class="level-2"><a 
 >class="final" href="/resources/faq/"><span>FAQs</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="/support/"><span>Support</span></a></li></ul></li><li 
 >class="level-1"><a class="expandible" 
 >href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
 >href="/cli/"><span>Command-line Interface</span></a></li><li 
 >class="level-2"><a class="final" href="/resources/release/"><span>Release 
 >Cadence</span></a></li><li class="level-2"><a class="final" 
 >href="/resources/intellij/"><span>Developing Engines with IntelliJ 
 >IDEA</span></a></li><li class="level-2"><a class="final" 
 >href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><l
 i class="level-1"><a class="expandible" href="#"><span>Apache Software 
Foundation</span></a><ul><li class="level-2"><a class="final" 
href="https://www.apache.org/";><span>Apache Homepage</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/licenses/";><span>License</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/foundation/sponsorship.html";><span>Sponsorship</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/foundation/thanks.html";><span>Thanks</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/security/";><span>Security</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="page-title"><h1>Dimensionality Reduction With 
PredictionIO</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
page</h5><aside id="table-of-contents"><ul> <li> <a href="#data-example">Data 
Example</a> </li> <li
 > <a href="#principal-component-analysis">Principal Component Analysis</a> 
 > </li> <li> <a href="#modifying-the-engine-template">Modifying the Engine 
 > Template</a> </li> <li> <a href="#testing-the-engine">Testing the Engine</a> 
 > </li> </ul> </aside><hr/><a id="edit-page-link" 
 > href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/machinelearning/dimensionalityreduction.html.md";><img
 >  src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
 > class="content-header hidden-sm hidden-xs"><div 
 > id="page-title"><h1>Dimensionality Reduction With 
 > PredictionIO</h1></div></div><div class="content"> <p>The purpose of this 
 > guide is to teach developers how to incorporate &quot;dimensionality 
 > reduction&quot; into a PredictionIO engine <a 
 > href="https://en.wikipedia.org/wiki/Principal_component_analysis";>Principal 
 > Component Analysis</a> (PCA) on the <a 
 > href="https://www.kaggle.com/c/digit-recognizer";>MNIST digit recognition 
 > dataset</a>. To do this, you will be modify
 ing the PredictionIO <a 
href="/gallery/template-gallery/#classification">classification engine 
template</a>. This guide will demonstrate how to import the specific data set 
in batch, and also how to change the engine components in order to incorporate 
the new sample data and implement PCA.</p><p>In machine learning, specifically 
in <a href="http://en.wikipedia.org/wiki/Supervised_learning";>supervised 
learning</a>, the general problem at hand is to predict a numeric outcome \(y\) 
from a numeric vector \(\bf{x}\). The different components of \(\bf{x}\) are 
called <strong>features</strong>, and usually represent observed values such as 
a hospital patient&#39;s age, weight, height, sex, etc. There are subtle issues 
that begin to arise as the number of features contained in each feature vector 
increases. We briefly list some of the issues that arise as the number of 
features grows in size:</p> <ul> <li><p><strong>Computation:</strong> The time 
complexity of machine learning algorithms of
 ten times depends on the number of features used. That is, the more features 
one uses for prediction, the more time it takes to train a model.</p></li> 
<li><p><strong>Prediction Performance:</strong> Often times there will be 
features that, when used in training, will actually decrease the predictive 
performance of a particular algorithm. </p></li> <li><p><strong>Curse of 
Dimensionality:</strong> It is harder to make inference and predictions in high 
dimensional spaces simply due to the fact that we need to sample a lot more 
observations. Think about it in this way, suppose that we sample 100 points 
lying on a flat solid square, and 100 points in a solid cube. The 100 points 
from the square will likely take up a larger proportion of its area, in 
comparison to the proportion of the cube&#39;s volume that the points sampled 
from it occupy. Hence we would need to sample more points from the cube in 
order to get better estimates of the different properties of the cube, such as 
height, l
 ength, and width. This is shown in the following figure:</p></li> </ul> 
<table><thead> <tr> <th>100 Points Sampled From Unit Square</th> <th>100 Points 
Sampled From Unit Cube</th> </tr> </thead><tbody> <tr> <td></td> <td></td> 
</tr> <tr> <td><img alt="Square Samples" 
src="/images/machinelearning/featureselection/square100-df83c1ae.png"/></td> 
<td><img alt="Cube Samples" 
src="/images/machinelearning/featureselection/cube100-a8fe5433.png"/></td> 
</tr> <tr> <td></td> <td></td> </tr> </tbody></table> <p>Dimensionality 
reduction is the process of applying a transformation to your feature vectors 
in order to produce a vector with the same or less number of features. 
Principal component Analysis (PCA) is a technique for dimensionality reduction. 
This can be treated as a data processing technique, and so with respect to the 
<a href="/customize/">DASE</a> framework, it will fall into the Data Preparator 
engine component.</p><p>This guide will also help to solidify the concept of 
taking an en
 gine template and customizing it for a particular use case: hand-written 
numeric digit recognition.</p><h2 id='data-example' class='header-anchors'>Data 
Example</h2><p>As a guiding example, a base data set, the <a 
href="https://www.kaggle.com/c/digit-recognizer/data";>MNIST digit recognition 
dataset</a>, is used. This is a perfect data set for dimensionality reduction, 
for, in this data set, the features that will be used for learning are pixel 
entries in a \(28 \times 28\) pixel image. There is really no direct 
interpretation of any one feature, so that you do not lose anything in applying 
a transformation that will treat the features as <a 
href="https://en.wikipedia.org/wiki/Linear_combination";>linear combinations</a> 
of some set &quot;convenient&quot; vectors.</p><p>Now, we first pull the <a 
href="/gallery/template-gallery/#classification">classification engine 
template</a> via the following bash line</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td 
 class="gutter gl" style="text-align: right"><pre 
class="lineno">1</pre></td><td class="code"><pre>git clone 
https://github.com/apache/predictionio-template-attribute-based-classifier.git 
&lt;Your new engine directory&gt;
+<!DOCTYPE html><html><head><title>Dimensionality Reduction With 
PredictionIO</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Dimensionality Reduction With 
PredictionIO"/><link rel="canonical" 
href="https://predictionio.apache.org/machinelearning/dimensionalityreduction/"/><link
 href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv
 .min.js"></script><script 
src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.apache.org/";><img alt="Apache 
PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div 
id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img 
class="mobile-search-bar-toggler hidden-md hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class=
 "row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md 
hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO 
Docs</p><h4>Dimensionality Reduction With PredictionIO</h4></div><h4 
class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 
col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="pag
 e" class="container-fluid"><div class="row"><div id="left-menu-wrapper" 
class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a 
class="expandible" href="/"><span>Apache PredictionIO® 
Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Getting 
Started</span></a><ul><li class="level-2"><a class="final" 
href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a 
class="final" href="/install/"><span>Installing Apache 
PredictionIO</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#">
 <span>Integrating with Your App</span></a><ul><li class="level-2"><a 
class="final" href="/appintegration/"><span>App Integration 
Overview</span></a></li><li class="level-2"><a class="expandible" 
href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a 
class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" 
href="/community/projects/#sdks"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/batchpredict/"><span>Batch Predict
 ions</span></a></li><li class="level-2"><a class="final" 
href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li 
class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting 
Engine Parameters</span></a></li><li class="level-2"><a class="final" 
href="/deploy/enginevariants/"><span>Deploying Multiple Engine 
Variants</span></a></li><li class="level-2"><a class="final" 
href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Customizing an 
Engine</span></a><ul><li class="level-2"><a class="final" 
href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a 
class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><
 li class="level-1"><a class="expandible" href="#"><span>Collecting and 
Analyzing Data</span></a><ul><li class="level-2"><a class="final" 
href="/datacollection/"><span>Event Server Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/plugin/"><span>Event Server Plugin</span>
 </a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Choosing an Algorithm</span></a><ul><li class="level-2"><a 
class="final" href="/algorithm/"><span>Built-in Algorithm 
Libraries</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/switch/"><span>Switching to Another 
Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level
 -2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing 
Evaluation Metrics</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metricbuild/"><span>Building Evaluation 
Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>System Architecture</span></a><ul><li class="level-2"><a 
class="final" href="/system/"><span>Architecture Overview</span></a></li><li 
class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using 
Another Data Store</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>PredictionIO® Official 
Templates</span></a><ul><li class="level-2"><a class="final" 
href="/templates/"><span>Intro</span></a></li><li class="level-2"><a 
class="expandible" href="#"><span>Recommendation</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/recommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommenda
 tion/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/evaluation/"><span>Evaluation 
Explained</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/recommendation/reading-custom-events/"><span>Read Custom 
Events</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-data-prep/"><span>Customize Data 
Preparator</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-serving/"><span>Customize 
Serving</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/training-with-implicit-preference/"><span>Train 
with Implicit Preference</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/blacklist-items/"><span>Filter Recommended 
Items by Blacklist in Query</span></a></li><li class="level-3
 "><a class="final" 
href="/templates/recommendation/batch-evaluator/"><span>Batch Persistable 
Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a 
class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li
 class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train 
with Rate Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust 
Score</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Similar Product</span></a><ul><li class="level-3"><a 
class="final" href="/templates
 /similarproduct/quickstart/"><span>Quick Start</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple 
Events and Multiple Algorithms</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/return-item-properties/"><span>Returns Item 
Properties</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate 
Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events 
for Users</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/recommended-user/"><span>Recommend 
Users</span></a></li></ul></li><li class="level-2"><
 a class="expandible" href="#"><span>Classification</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/classification/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/add-algorithm/"><span>Use Alternative 
Algorithm</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/reading-custom-properties/"><span>Read Custom 
Properties</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a><
 /li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo 
Tutorials</span></a><ul><li class="level-2"><a class="final" 
href="/community/projects/#demos"><span>Community Contributed 
Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/textclassification/"><span>Text Classification Engine 
Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a 
class="final" href="/community/contribute-code/"><span>Contribute 
Code</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2"><a class="final" 
href="/community/projects/"><span>Community 
 Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/resources/release/"><span>Release 
Cadence</span></a></li><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Apache Software 
Foundation</span
 ></a><ul><li class="level-2"><a class="final" 
 >href="https://www.apache.org/";><span>Apache Homepage</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="https://www.apache.org/licenses/";><span>License</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="https://www.apache.org/foundation/sponsorship.html";><span>Sponsorship</span></a></li><li
 > class="level-2"><a class="final" 
 >href="https://www.apache.org/foundation/thanks.html";><span>Thanks</span></a></li><li
 > class="level-2"><a class="final" 
 >href="https://www.apache.org/security/";><span>Security</span></a></li></ul></li></ul></nav></div><div
 > class="col-md-9 col-sm-12"><div class="content-header hidden-md 
 >hidden-lg"><div id="page-title"><h1>Dimensionality Reduction With 
 >PredictionIO</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
 >page</h5><aside id="table-of-contents"><ul> <li> <a href="#data-example">Data 
 >Example</a> </li> <li> <a href="#principal-component-analysis">Principal 
 >Component Analysis</a> </li> <li> <
 a href="#modifying-the-engine-template">Modifying the Engine Template</a> 
</li> <li> <a href="#testing-the-engine">Testing the Engine</a> </li> </ul> 
</aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/machinelearning/dimensionalityreduction.html.md";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div 
id="page-title"><h1>Dimensionality Reduction With 
PredictionIO</h1></div></div><div class="content"> <p>The purpose of this guide 
is to teach developers how to incorporate &quot;dimensionality reduction&quot; 
into a PredictionIO engine <a 
href="https://en.wikipedia.org/wiki/Principal_component_analysis";>Principal 
Component Analysis</a> (PCA) on the <a 
href="https://www.kaggle.com/c/digit-recognizer";>MNIST digit recognition 
dataset</a>. To do this, you will be modifying the PredictionIO <a 
href="/gallery/template-gallery/#classification">classification
  engine template</a>. This guide will demonstrate how to import the specific 
data set in batch, and also how to change the engine components in order to 
incorporate the new sample data and implement PCA.</p><p>In machine learning, 
specifically in <a 
href="http://en.wikipedia.org/wiki/Supervised_learning";>supervised 
learning</a>, the general problem at hand is to predict a numeric outcome \(y\) 
from a numeric vector \(\bf{x}\). The different components of \(\bf{x}\) are 
called <strong>features</strong>, and usually represent observed values such as 
a hospital patient&#39;s age, weight, height, sex, etc. There are subtle issues 
that begin to arise as the number of features contained in each feature vector 
increases. We briefly list some of the issues that arise as the number of 
features grows in size:</p> <ul> <li><p><strong>Computation:</strong> The time 
complexity of machine learning algorithms often times depends on the number of 
features used. That is, the more features one uses f
 or prediction, the more time it takes to train a model.</p></li> 
<li><p><strong>Prediction Performance:</strong> Often times there will be 
features that, when used in training, will actually decrease the predictive 
performance of a particular algorithm. </p></li> <li><p><strong>Curse of 
Dimensionality:</strong> It is harder to make inference and predictions in high 
dimensional spaces simply due to the fact that we need to sample a lot more 
observations. Think about it in this way, suppose that we sample 100 points 
lying on a flat solid square, and 100 points in a solid cube. The 100 points 
from the square will likely take up a larger proportion of its area, in 
comparison to the proportion of the cube&#39;s volume that the points sampled 
from it occupy. Hence we would need to sample more points from the cube in 
order to get better estimates of the different properties of the cube, such as 
height, length, and width. This is shown in the following figure:</p></li> 
</ul> <table><thead> 
 <tr> <th>100 Points Sampled From Unit Square</th> <th>100 Points Sampled From 
Unit Cube</th> </tr> </thead><tbody> <tr> <td></td> <td></td> </tr> <tr> 
<td><img alt="Square Samples" 
src="/images/machinelearning/featureselection/square100-df83c1ae.png"/></td> 
<td><img alt="Cube Samples" 
src="/images/machinelearning/featureselection/cube100-a8fe5433.png"/></td> 
</tr> <tr> <td></td> <td></td> </tr> </tbody></table> <p>Dimensionality 
reduction is the process of applying a transformation to your feature vectors 
in order to produce a vector with the same or less number of features. 
Principal component Analysis (PCA) is a technique for dimensionality reduction. 
This can be treated as a data processing technique, and so with respect to the 
<a href="/customize/">DASE</a> framework, it will fall into the Data Preparator 
engine component.</p><p>This guide will also help to solidify the concept of 
taking an engine template and customizing it for a particular use case: 
hand-written numeric digit 
 recognition.</p><h2 id='data-example' class='header-anchors'>Data 
Example</h2><p>As a guiding example, a base data set, the <a 
href="https://www.kaggle.com/c/digit-recognizer/data";>MNIST digit recognition 
dataset</a>, is used. This is a perfect data set for dimensionality reduction, 
for, in this data set, the features that will be used for learning are pixel 
entries in a \(28 \times 28\) pixel image. There is really no direct 
interpretation of any one feature, so that you do not lose anything in applying 
a transformation that will treat the features as <a 
href="https://en.wikipedia.org/wiki/Linear_combination";>linear combinations</a> 
of some set &quot;convenient&quot; vectors.</p><p>Now, we first pull the <a 
href="/gallery/template-gallery/#classification">classification engine 
template</a> via the following bash line</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1</pre></td><td class="
 code"><pre>git clone 
https://github.com/apache/predictionio-template-attribute-based-classifier.git 
&lt;Your new engine directory&gt;
 </pre></td></tr></tbody></table> </div> <p>You should immediately be prompted 
with the following message:</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1</pre></td><td class="code"><pre>Please enter the 
template<span class="s1">'s Scala package name (e.g. com.mycompany):
 </span></pre></td></tr></tbody></table> </div> <p>Go ahead and input 
<code>FeatureReduction</code>, and feel free to just press enter for the 
remaining message prompts. For the remainder of this guide, you will be working 
in your new engine directory, so go ahead and <code>cd</code> into your new 
engine directory. At this point, go ahead and run the command</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td 
class="code"><pre>pio build
 </pre></td></tr></tbody></table> </div> <p>This will make sure that the 
PredictionIO dependency version for your project matches the version installed 
on your computer. Now, download the MNIST <code>train.csv</code> data set from 
the link above, and put this file in the <code>data</code> directory contained 
in the new engine directory.</p><h3 
id='<strong>optional</strong>:-visualizing-observations' class='header-anchors' 
><strong>Optional</strong>: Visualizing Observations</h3><p>If you want to 
actually convert the observation pixel data to an image go ahead and create a 
Python script called <code>picture_processing.py</code> into your data 
directory and copy and paste the following code into the script:</p><div 
class="highlight python"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1

http://git-wip-us.apache.org/repos/asf/predictionio-site/blob/765e178c/machinelearning/modelingworkflow/index.html
----------------------------------------------------------------------
diff --git a/machinelearning/modelingworkflow/index.html 
b/machinelearning/modelingworkflow/index.html
index 9716b45..66b67dc 100644
--- a/machinelearning/modelingworkflow/index.html
+++ b/machinelearning/modelingworkflow/index.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><title>Modeling Workflow and DASE</title><meta 
charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Modeling Workflow and DASE"/><link 
rel="canonical" 
href="https://predictionio.apache.org/machinelearning/modelingworkflow/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src="//cdn.ma
 
thjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.apache.org/";><img alt="Apache 
PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div 
id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img 
class="mobile-search-bar-toggler hidden-md hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 co
 l-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Modeling Workflow 
and DASE</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO 
Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img 
id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div 
class="row"><div id="le
 ft-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li 
class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO® 
Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Getting 
Started</span></a><ul><li class="level-2"><a class="final" 
href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a 
class="final" href="/install/"><span>Installing Apache 
PredictionIO</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Integrating with Your App</span></a><ul><li class
 ="level-2"><a class="final" href="/appintegration/"><span>App Integration 
Overview</span></a></li><li class="level-2"><a class="expandible" 
href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a 
class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/batchpredict/"><span>Batch Predictions</span></a></li><li 
class="level-2"><a class="final" href="/d
 eploy/monitoring/"><span>Monitoring Engine</span></a></li><li 
class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting 
Engine Parameters</span></a></li><li class="level-2"><a class="final" 
href="/deploy/enginevariants/"><span>Deploying Multiple Engine 
Variants</span></a></li><li class="level-2"><a class="final" 
href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Customizing an 
Engine</span></a><ul><li class="level-2"><a class="final" 
href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a 
class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Collectin
 g and Analyzing Data</span></a><ul><li class="level-2"><a class="final" 
href="/datacollection/"><span>Event Server Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/plugin/"><span>Event Server 
Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href
 ="#"><span>Choosing an Algorithm</span></a><ul><li class="level-2"><a 
class="final" href="/algorithm/"><span>Built-in Algorithm 
Libraries</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/switch/"><span>Switching to Another 
Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metricchoose/"><span>Choos
 ing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metricbuild/"><span>Building Evaluation 
Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>System Architecture</span></a><ul><li class="level-2"><a 
class="final" href="/system/"><span>Architecture Overview</span></a></li><li 
class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using 
Another Data Store</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>PredictionIO® Official 
Templates</span></a><ul><li class="level-2"><a class="final" 
href="/templates/"><span>Intro</span></a></li><li class="level-2"><a 
class="expandible" href="#"><span>Recommendation</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/recommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a clas
 s="final" href="/templates/recommendation/evaluation/"><span>Evaluation 
Explained</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/recommendation/reading-custom-events/"><span>Read Custom 
Events</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-data-prep/"><span>Customize Data 
Preparator</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-serving/"><span>Customize 
Serving</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/training-with-implicit-preference/"><span>Train 
with Implicit Preference</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/blacklist-items/"><span>Filter Recommended 
Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/batch-evaluato
 r/"><span>Batch Persistable Evaluator</span></a></li></ul></li><li 
class="level-2"><a class="expandible" href="#"><span>E-Commerce 
Recommendation</span></a><ul><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li
 class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train 
with Rate Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust 
Score</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Similar Product</span></a><ul><li class="level-3"><a 
class="final" href="/templates/similarproduct/quickstart/"><span>Quick 
Start</span></a></li><li
  class="level-3"><a class="final" 
href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple 
Events and Multiple Algorithms</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/return-item-properties/"><span>Returns Item 
Properties</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate 
Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events 
for Users</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/recommended-user/"><span>Recommend 
Users</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Classification</span></a><ul>
 <li class="level-3"><a class="final" 
href="/templates/classification/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/add-algorithm/"><span>Use Alternative 
Algorithm</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/reading-custom-properties/"><span>Read Custom 
Properties</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#">
 <span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" 
href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li 
class="level-2"><a class="final" href="/demo/community/"><span>Community 
Contributed Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/textclassification/"><span>Text Classification Engine 
Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a 
class="final" href="/community/contribute-code/"><span>Contribute 
Code</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2"><a class="final" href="/commu
 nity/projects/"><span>Community Projects</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Getting 
Help</span></a><ul><li class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/resources/release/"><span>Release 
Cadence</span></a></li><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>
 Apache Software Foundation</span></a><ul><li class="level-2"><a class="final" 
href="https://www.apache.org/";><span>Apache Homepage</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/licenses/";><span>License</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/foundation/sponsorship.html";><span>Sponsorship</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/foundation/thanks.html";><span>Thanks</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/security/";><span>Security</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="page-title"><h1>Modeling Workflow and 
DASE</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
page</h5><aside id="table-of-contents"><ul> <li> <a 
href="#training-the-model">Training The Model</a> </li> </ul> </aside><hr/><a 
id="edit-page-link" href="https://github
 
.com/apache/predictionio/tree/livedoc/docs/manual/source/machinelearning/modelingworkflow.html.md"><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Modeling 
Workflow and DASE</h1></div></div><div class="content"> <p>In addition to the 
DASE components, we also introduce the Data Model and Training Model 
abstractions. The Data Model abstraction refers to the set of Scala classes 
dealing with the implementation of modeling choices relating to feature 
<strong>extraction</strong>, <strong>preparation</strong>, and/or 
<strong>selection</strong>. For this illustration, this only includes the 
vectorization of text and t.f.-i.d.f. processing which is entirely implemented 
in the PreparedData class. The Training Model abstraction refers to any set of 
classes that individually take in a set of feature observations and output a 
predictive model. This predictive model is leveraged by the Algorithm c
 omponent to produce prediction results to queries in real-time. In the engine 
template, this abstraction is implemented in the NBModel class. <strong>Please 
note that these are conceptual abstractions that are designed to make engine 
development easier by decoupling class functionality.</strong> Keeping these 
abstractions in mind will help you in the future with debugging your code, and 
also make it easier to incorporate different modeling ideas into your 
engine.</p><p>The figure below shows a graphical representation of the engine 
architecture just described, as well as its interactions with your web/app and 
a provided Event Server:</p><p><img alt="Engine Overview" 
src="/images/demo/text_classification_template/engine_overview-27e09a89.png"/></p><h2
 id='training-the-model' class='header-anchors'>Training The Model</h2><p>This 
section will guide you through the two Training Model implementations that come 
with this engine template. Recall that the Training Model abstraction refers t
 o an arbitrary set Scala Class that outputs a predictive model (i.e. 
implements some method that can be used for prediction). The general problem 
this engine template is tackling is text classification, so that our Training 
Model abstraction domain is restricted to implementations producing 
classifiers. In particular, the classification model that is implemented in 
this engine template is based on Multinomial Naive Bayes using t.f.-i.d.f. 
vectorized text.</p></div></div></div></div><footer><div class="container"><div 
class="seperator"></div><div class="row"><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//predictionio.apache.org/install/" 
target="blank">Download</a></li><li><a href="//predictionio.apache.org/" 
target="blank">Docs</a></li><li><a href="//github.com/apache/predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:[email protected]"; target="blank">Subscribe 
to User Mailing List</
 a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/predictionio" target="blank">Source 
Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" 
target="blank">Bug Tracker</a></li><li><a 
href="mailto:[email protected]"; target="blank">Subscribe to 
Development Mailing List</a></li></ul></div></div></div><div class="row"><div 
class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, 
Apache, the Apache feather logo, and the Apache PredictionIO project logo are 
either registered trademarks or trademarks of The Apache Software Foundation in 
the United States and other countries.</p><p>All other marks mentioned may be 
trademarks or registered tra
 demarks of their respective owners.</p></div></div></div><div 
id="footer-bottom"><div class="container"><div class="row"><div 
class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/predictionio"; data-icon="octicon-star" 
data-show-count="true" aria-label="Star apache/predictionio on GitHub">Star</a> 
<a class="github-button" href="https://github.com/apache/predictionio/fork"; 
data-icon="octicon-repo-forked" data-show-count="true" aria-label="Fork 
apache/predictionio on GitHub">Fork</a> <script id="github-bjs" async="" 
defer="" src="https://buttons.github.io/buttons.js";></script><a 
href="https://twitter.com/predictionio"; target="blank"><img alt="PredictionIO 
on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="https://www.facebook.com/predictionio"; target="blank"><img 
alt="PredictionIO on Facebook" src="
 /images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+<!DOCTYPE html><html><head><title>Modeling Workflow and DASE</title><meta 
charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Modeling Workflow and DASE"/><link 
rel="canonical" 
href="https://predictionio.apache.org/machinelearning/modelingworkflow/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src="//cdn.ma
 
thjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.apache.org/";><img alt="Apache 
PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div 
id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img 
class="mobile-search-bar-toggler hidden-md hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 co
 l-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Modeling Workflow 
and DASE</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO 
Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img 
id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div 
class="row"><div id="le
 ft-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li 
class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO® 
Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Getting 
Started</span></a><ul><li class="level-2"><a class="final" 
href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a 
class="final" href="/install/"><span>Installing Apache 
PredictionIO</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Integrating with Your App</span></a><ul><li class
 ="level-2"><a class="final" href="/appintegration/"><span>App Integration 
Overview</span></a></li><li class="level-2"><a class="expandible" 
href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a 
class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" 
href="/community/projects/#sdks"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/batchpredict/"><span>Batch Predictions</span></a></li><li 
class="level-2"><a class="final
 " href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li 
class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting 
Engine Parameters</span></a></li><li class="level-2"><a class="final" 
href="/deploy/enginevariants/"><span>Deploying Multiple Engine 
Variants</span></a></li><li class="level-2"><a class="final" 
href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Customizing an 
Engine</span></a><ul><li class="level-2"><a class="final" 
href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a 
class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span
 >Collecting and Analyzing Data</span></a><ul><li class="level-2"><a 
 >class="final" href="/datacollection/"><span>Event Server 
 >Overview</span></a></li><li class="level-2"><a class="final" 
 >href="/datacollection/eventapi/"><span>Collecting Data with 
 >REST/SDKs</span></a></li><li class="level-2"><a class="final" 
 >href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
 >Webhooks</span></a></li><li class="level-2"><a class="final" 
 >href="/datacollection/channel/"><span>Channel</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="/datacollection/batchimport/"><span>Importing Data in 
 >Batch</span></a></li><li class="level-2"><a class="final" 
 >href="/datacollection/analytics/"><span>Using Analytics 
 >Tools</span></a></li><li class="level-2"><a class="final" 
 >href="/datacollection/plugin/"><span>Event Server 
 >Plugin</span></a></li></ul></li><li class="level-1"><a class="expand
 ible" href="#"><span>Choosing an Algorithm</span></a><ul><li 
class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm 
Libraries</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/switch/"><span>Switching to Another 
Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metricchoose/"><
 span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a 
class="final" href="/evaluation/metricbuild/"><span>Building Evaluation 
Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>System Architecture</span></a><ul><li class="level-2"><a 
class="final" href="/system/"><span>Architecture Overview</span></a></li><li 
class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using 
Another Data Store</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>PredictionIO® Official 
Templates</span></a><ul><li class="level-2"><a class="final" 
href="/templates/"><span>Intro</span></a></li><li class="level-2"><a 
class="expandible" href="#"><span>Recommendation</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/recommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/dase/"><span>DASE</span></a></li><li 
class="level-
 3"><a class="final" 
href="/templates/recommendation/evaluation/"><span>Evaluation 
Explained</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/recommendation/reading-custom-events/"><span>Read Custom 
Events</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-data-prep/"><span>Customize Data 
Preparator</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-serving/"><span>Customize 
Serving</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/training-with-implicit-preference/"><span>Train 
with Implicit Preference</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/blacklist-items/"><span>Filter Recommended 
Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/batc
 h-evaluator/"><span>Batch Persistable Evaluator</span></a></li></ul></li><li 
class="level-2"><a class="expandible" href="#"><span>E-Commerce 
Recommendation</span></a><ul><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li
 class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train 
with Rate Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust 
Score</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Similar Product</span></a><ul><li class="level-3"><a 
class="final" href="/templates/similarproduct/quickstart/"><span>Quick 
Start</span></
 a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple 
Events and Multiple Algorithms</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/return-item-properties/"><span>Returns Item 
Properties</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate 
Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events 
for Users</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/recommended-user/"><span>Recommend 
Users</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Classification</spa
 n></a><ul><li class="level-3"><a class="final" 
href="/templates/classification/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/add-algorithm/"><span>Use Alternative 
Algorithm</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/reading-custom-properties/"><span>Read Custom 
Properties</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li class="level-1"><a class="expandible"
  href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a 
class="final" href="/community/projects/#demos"><span>Community Contributed 
Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/textclassification/"><span>Text Classification Engine 
Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a 
class="final" href="/community/contribute-code/"><span>Contribute 
Code</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2"><a class="final" 
href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><
 a class="expandible" href="#"><span>Getting Help</span></a><ul><li 
class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/resources/release/"><span>Release 
Cadence</span></a></li><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Apache Software 
Foundation</span></a><ul><li class="level-2"><a class="final" href="htt
 ps://www.apache.org/"><span>Apache Homepage</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/licenses/";><span>License</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/foundation/sponsorship.html";><span>Sponsorship</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/foundation/thanks.html";><span>Thanks</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/security/";><span>Security</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="page-title"><h1>Modeling Workflow and 
DASE</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
page</h5><aside id="table-of-contents"><ul> <li> <a 
href="#training-the-model">Training The Model</a> </li> </ul> </aside><hr/><a 
id="edit-page-link" 
href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/machinelearning/modelingworkfl
 ow.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this 
page</a></div><div class="content-header hidden-sm hidden-xs"><div 
id="page-title"><h1>Modeling Workflow and DASE</h1></div></div><div 
class="content"> <p>In addition to the DASE components, we also introduce the 
Data Model and Training Model abstractions. The Data Model abstraction refers 
to the set of Scala classes dealing with the implementation of modeling choices 
relating to feature <strong>extraction</strong>, <strong>preparation</strong>, 
and/or <strong>selection</strong>. For this illustration, this only includes 
the vectorization of text and t.f.-i.d.f. processing which is entirely 
implemented in the PreparedData class. The Training Model abstraction refers to 
any set of classes that individually take in a set of feature observations and 
output a predictive model. This predictive model is leveraged by the Algorithm 
component to produce prediction results to queries in real-time. In the engine 
template,
  this abstraction is implemented in the NBModel class. <strong>Please note 
that these are conceptual abstractions that are designed to make engine 
development easier by decoupling class functionality.</strong> Keeping these 
abstractions in mind will help you in the future with debugging your code, and 
also make it easier to incorporate different modeling ideas into your 
engine.</p><p>The figure below shows a graphical representation of the engine 
architecture just described, as well as its interactions with your web/app and 
a provided Event Server:</p><p><img alt="Engine Overview" 
src="/images/demo/text_classification_template/engine_overview-27e09a89.png"/></p><h2
 id='training-the-model' class='header-anchors'>Training The Model</h2><p>This 
section will guide you through the two Training Model implementations that come 
with this engine template. Recall that the Training Model abstraction refers to 
an arbitrary set Scala Class that outputs a predictive model (i.e. implements 
some me
 thod that can be used for prediction). The general problem this engine 
template is tackling is text classification, so that our Training Model 
abstraction domain is restricted to implementations producing classifiers. In 
particular, the classification model that is implemented in this engine 
template is based on Multinomial Naive Bayes using t.f.-i.d.f. vectorized 
text.</p></div></div></div></div><footer><div class="container"><div 
class="seperator"></div><div class="row"><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//predictionio.apache.org/install/" 
target="blank">Download</a></li><li><a href="//predictionio.apache.org/" 
target="blank">Docs</a></li><li><a href="//github.com/apache/predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:[email protected]"; target="blank">Subscribe 
to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" target="blank">S
 tackoverflow</a></li></ul></div></div><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/predictionio" target="blank">Source 
Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" 
target="blank">Bug Tracker</a></li><li><a 
href="mailto:[email protected]"; target="blank">Subscribe to 
Development Mailing List</a></li></ul></div></div></div><div class="row"><div 
class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, 
Apache, the Apache feather logo, and the Apache PredictionIO project logo are 
either registered trademarks or trademarks of The Apache Software Foundation in 
the United States and other countries.</p><p>All other marks mentioned may be 
trademarks or registered trademarks of their respective 
owners.</p></div></div></div><div id="footer-bottom"><div c
 lass="container"><div class="row"><div class="col-md-12"><div 
id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/predictionio"; data-icon="octicon-star" 
data-show-count="true" aria-label="Star apache/predictionio on GitHub">Star</a> 
<a class="github-button" href="https://github.com/apache/predictionio/fork"; 
data-icon="octicon-repo-forked" data-show-count="true" aria-label="Fork 
apache/predictionio on GitHub">Fork</a> <script id="github-bjs" async="" 
defer="" src="https://buttons.github.io/buttons.js";></script><a 
href="https://twitter.com/predictionio"; target="blank"><img alt="PredictionIO 
on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="https://www.facebook.com/predictionio"; target="blank"><img 
alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div
 ><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
 (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
 
e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
 })(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');

Reply via email to