http://git-wip-us.apache.org/repos/asf/predictionio-site/blob/9d2bd407/demo/supervisedlearning/index.html
----------------------------------------------------------------------
diff --git a/demo/supervisedlearning/index.html 
b/demo/supervisedlearning/index.html
index ea3d912..53fe6f4 100644
--- a/demo/supervisedlearning/index.html
+++ b/demo/supervisedlearning/index.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><title>Machine Learning With 
PredictionIO</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Machine Learning With 
PredictionIO"/><link rel="canonical" 
href="https://predictionio.apache.org/demo/supervisedlearning/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src="/
 
/cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.apache.org/";><img alt="Apache 
PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div 
id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/incubator-predictionio/">OPEN 
SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md 
hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="col-
 md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning 
With PredictionIO</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO 
Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img 
id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><di
 v class="row"><div id="left-menu-wrapper" class="col-md-3"><nav 
id="nav-main"><ul><li class="level-1"><a class="expandible" 
href="/"><span>Apache PredictionIO® Documentation</span></a><ul><li 
class="level-2"><a class="final" href="/"><span>Welcome to Apache 
PredictionIO®</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Getting Started</span></a><ul><li 
class="level-2"><a class="final" href="/start/"><span>A Quick 
Intro</span></a></li><li class="level-2"><a class="final" 
href="/install/"><span>Installing Apache PredictionIO</span></a></li><li 
class="level-2"><a class="final" href="/start/download/"><span>Downloading an 
Engine Template</span></a></li><li class="level-2"><a class="final" 
href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li 
class="level-2"><a class="final" href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Integrating with Your Ap
 p</span></a><ul><li class="level-2"><a class="final" 
href="/appintegration/"><span>App Integration Overview</span></a></li><li 
class="level-2"><a class="expandible" href="/sdk/"><span>List of 
SDKs</span></a><ul><li class="level-3"><a class="final" 
href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/batchpredict/"><span>Batch Predictions</span></a></li><li 
class="level-2">
 <a class="final" href="/deploy/monitoring/"><span>Monitoring 
Engine</span></a></li><li class="level-2"><a class="final" 
href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li 
class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying 
Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" 
href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Customizing an 
Engine</span></a><ul><li class="level-2"><a class="final" 
href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a 
class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandible"
  href="#"><span>Collecting and Analyzing Data</span></a><ul><li 
class="level-2"><a class="final" href="/datacollection/"><span>Event Server 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/plugin/"><span>Event Server 
Plugin</span></a></li></ul></li><li class="level-1"><
 a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li 
class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm 
Libraries</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/switch/"><span>Switching to Another 
Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="final" href="/evalua
 tion/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li 
class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building 
Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>System Architecture</span></a><ul><li 
class="level-2"><a class="final" href="/system/"><span>Architecture 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/system/anotherdatastore/"><span>Using Another Data 
Store</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>PredictionIO® Official Templates</span></a><ul><li 
class="level-2"><a class="final" 
href="/templates/"><span>Intro</span></a></li><li class="level-2"><a 
class="expandible" href="#"><span>Recommendation</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/recommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/dase/"><span>DASE</span></a><
 /li><li class="level-3"><a class="final" 
href="/templates/recommendation/evaluation/"><span>Evaluation 
Explained</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/recommendation/reading-custom-events/"><span>Read Custom 
Events</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-data-prep/"><span>Customize Data 
Preparator</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-serving/"><span>Customize 
Serving</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/training-with-implicit-preference/"><span>Train 
with Implicit Preference</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/blacklist-items/"><span>Filter Recommended 
Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" 
href="/template
 s/recommendation/batch-evaluator/"><span>Batch Persistable 
Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a 
class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li
 class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train 
with Rate Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust 
Score</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Similar Product</span></a><ul><li class="level-3"><a 
class="final" href="/templates/similarproduct/quickstart/"><span
 >Quick Start</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li 
 >class="level-3"><a class="final" 
 >href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li 
 >class="level-3"><a class="final" 
 >href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple 
 >Events and Multiple Algorithms</span></a></li><li class="level-3"><a 
 >class="final" 
 >href="/templates/similarproduct/return-item-properties/"><span>Returns Item 
 >Properties</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate 
 >Event</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events 
 >for Users</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/similarproduct/recommended-user/"><span>Recommend 
 >Users</span></a></li></ul></li><li class="level-2"><a class="expandible" 
 >href="#"><spa
 n>Classification</span></a><ul><li class="level-3"><a class="final" 
href="/templates/classification/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/add-algorithm/"><span>Use Alternative 
Algorithm</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/reading-custom-properties/"><span>Read Custom 
Properties</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li class="level-1">
 <a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li 
class="level-2"><a class="final" href="/demo/tapster/"><span>Comics 
Recommendation Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/community/"><span>Community Contributed Demo</span></a></li><li 
class="level-2"><a class="final" href="/demo/textclassification/"><span>Text 
Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li 
class="level-2"><a class="final" 
href="/community/contribute-code/"><span>Contribute Code</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2
 "><a class="final" href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/resources/release/"><span>Release 
Cadence</span></a></li><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li 
class="level-1"><a cla
 ss="expandible" href="#"><span>Apache Software Foundation</span></a><ul><li 
class="level-2"><a class="final" href="https://www.apache.org/";><span>Apache 
Homepage</span></a></li><li class="level-2"><a class="final" 
href="https://www.apache.org/licenses/";><span>License</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/foundation/sponsorship.html";><span>Sponsorship</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/foundation/thanks.html";><span>Thanks</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/security/";><span>Security</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="page-title"><h1>Machine Learning With 
PredictionIO</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
page</h5><aside id="table-of-contents"><ul> <li> <a 
href="#introduction-to-supervised-learning">Introduction to Supervised Learning
 </a> </li> <li> <a href="#predictionio-and-supervised-learning">PredictionIO 
and Supervised Learning</a> </li> </ul> </aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/demo/supervisedlearning.html.md";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine 
Learning With PredictionIO</h1></div></div><div class="content"> <p>This guide 
is designed to give developers a brief introduction to fundamental concepts in 
machine learning, as well as an explanation of how these concept tie into 
PredictionIO&#39;s engine development platform. This particular guide will 
largely deal with giving some</p><h2 id='introduction-to-supervised-learning' 
class='header-anchors'>Introduction to Supervised Learning</h2><p>The first 
question we must ask is: what is machine learning? <strong>Machine 
learning</strong> is the field of study 
 at the intersection of computer science, engineering, mathematics, and 
statistics which seeks to discover or infer patterns hidden within a set of 
observations, which we call our data. Some examples of problems that machine 
learning seeks to solve are:</p> <ul> <li>Predict whether a patient has breast 
cancer based on their mammogram results.</li> <li>Predict whether an e-mail is 
spam or not based on the e-mail&#39;s content.</li> <li>Predict today&#39;s 
temperature based on climate variables collected for the previous week.</li> 
</ul> <h3 id='thinking-about-data' class='header-anchors'>Thinking About 
Data</h3><p>In the latter examples, we are trying to predict an outcome \(Y\), 
or <strong>response</strong>, based on some recorded or observed variables 
\(X\), or <strong>features</strong>. For example: in the third problem each 
observation is a patient, the response variable \(Y\) is equal to 1 if this 
patient has breast cancer and 0 otherwise, and \(X\) represents the mammogram 
resul
 ts.</p><p>When we say we want to predict \(Y\) using \(X\), we are trying to 
answer the question: how does a response \(Y\) depend on a set of features 
\(X\) affect the response \(Y\)? To do this we need a set of observations, 
which we call our <strong>training data</strong>, consisting of observations 
for which we have observed both \(Y\) and \(X\), in order to make inference 
about this relationship.</p><h3 
id='different-types-of-supervised-learning-problems' 
class='header-anchors'>Different Types of Supervised Learning 
Problems</h3><p>Note that in the first two examples, the outcome \(Y\) can only 
take on two values (1 : cancer/spam, 0: no cancer/ no spam). Whenever the 
outcome variable \(Y\) denotes a label associated to a particular group of 
observations (i.e. cancer group), the <strong>supervised learning</strong> 
problem is also called a <strong>classification</strong> problem. In the third 
example, however, \(Y\) can take on any numerical value since it denotes some 
temperatu
 re reading (i.e. 25.143, 25.14233, 32.0). These types of supervised learning 
problems are also called <strong>regression</strong> problems.</p><h3 
id='training-a-predictive-model' class='header-anchors'>Training a Predictive 
Model</h3><p>A predictive model should be thought of as a function \(f\) that 
takes as input a set of features, and outputs a predicted outcome (i.e. \(f(X) 
= Y\)). The phrase <strong>training a model</strong> simply refers to the 
process of using the training data to estimate such a function. </p><h2 
id='predictionio-and-supervised-learning' class='header-anchors'>PredictionIO 
and Supervised Learning</h2><p>Machine learning methods generally assume that 
our observation responses and features are numeric vectors. We will say that 
observations in this format are in <strong>standard form</strong>. However, 
when you are working with real-life data this will often not be the case. The 
data will often be formatted in a manner that is specific to the 
application&#39;s
  needs. As an example, let&#39;s suppose our application is <a 
href="http://stackoverflow.com";>StackOverFlow</a>. The data we want to analyze 
are questions, and we want to predict based on a question&#39;s content whether 
or not it is related to Scala.</p><p><strong>Self-check:</strong> Is this a 
classification or regression problem?</p><h3 
id='thinking-about-data-with-predictionio' class='header-anchors'>Thinking 
About Data With PredictionIO</h3><p>PredictionIO&#39;s predictive engine 
development platform allows you to easily incorporate observations that are not 
in standard form. Continuing with our example, we can import the observations, 
or StackOverFlow questions, into <a href="/datacollection/">PredictionIO&#39;s 
Event Server</a> as events with the following 
properties:</p><p><code>properties = {question : String, topic : 
String}</code></p><p>The value <code>question</code> is the actual question 
stored as a <code>String</code>, and topic is also a string equal to either 
<code
 >&quot;Scala&quot;</code> or <code>&quot;Other&quot;</code>. Our outcome here 
 >is <code>topic</code>, and <code>question</code> will provide a source for 
 >extracting features. That is, we will be using <code>question</code> to 
 >predict the outcome <code>topic</code>.</p><p>Once the observations are 
 >loaded as events into the Event Server, the engine&#39;s <a 
 >href="/customize/">Data Source</a> component is able to read them, which 
 >allows you to treat them as objects in a Scala project. The engine&#39;s 
 >Preparator component is in charge of converting these observations into 
 >standard form. To do this, we can first map the topic values as 
 >follows:</p><p><code>Map(&quot;Other&quot; -&gt; 0, &quot;Scala&quot; -&gt; 
 >1)</code>.</p><p>We can then vectorize the observation&#39;s associated 
 >question text to obtain a numeric feature vector for each of our 
 >observations. This text vectorization procedure is an example of a general 
 >concept in machine learning called <strong>feature extraction</strong>
 . After performing these transformations of our observations, they are now in 
standard form and can be used for training a large quantity of machine learning 
models.</p><h3 id='training-the-model-with-predictionio' 
class='header-anchors'>Training the Model With PredictionIO</h3><p>The 
Algorithm engine component serves two purposes: outputting a predictive model 
\(f\) and using this to predict the outcome variable. Here \(f\) takes as input 
a vectorized question and outputs either 0 or 1. However, our 
<code>Query</code> input will be again a question, and our 
<code>PredictedResult</code> the topic associated to the predicted label (0 or 
1):</p><p><code>Query = {question : String}</code> <code>PredictedResult = 
{topic : String}</code></p><p>With PredictionIO&#39;s engine development 
platform, you can easily automate the vectorization of the Query question, as 
well as mapping the predicted label to the appropriate topic output 
format.</p></div></div></div></div><footer><div class="cont
 ainer"><div class="seperator"></div><div class="row"><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//predictionio.apache.org/install/" 
target="blank">Download</a></li><li><a href="//predictionio.apache.org/" 
target="blank">Docs</a></li><li><a 
href="//github.com/apache/incubator-predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:[email protected]"; target="blank">Subscribe 
to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/incubator-predictionio" target="blank">Source 
Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" 
target="blank">Bug Tracker</a></l
 i><li><a href="mailto:[email protected]"; 
target="blank">Subscribe to Development Mailing 
List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 
footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache 
feather logo, and the Apache PredictionIO project logo are either registered 
trademarks or trademarks of The Apache Software Foundation in the United States 
and other countries.</p><p>All other marks mentioned may be trademarks or 
registered trademarks of their respective owners.</p></div></div></div><div 
id="footer-bottom"><div class="container"><div class="row"><div 
class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/incubator-predictionio"; data-style="mega" 
data-count-href="/apache/incubator-predictionio/stargazers" 
data-count-api="/repos/apache/incubat
 or-predictionio#stargazers_count" data-count-aria-label="# stargazers on 
GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a 
class="github-button" 
href="https://github.com/apache/incubator-predictionio/fork"; 
data-icon="octicon-git-branch" data-style="mega" 
data-count-href="/apache/incubator-predictionio/network" 
data-count-api="/repos/apache/incubator-predictionio#forks_count" 
data-count-aria-label="# forks on GitHub" aria-label="Fork 
apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" 
async="" defer="" src="https://buttons.github.io/buttons.js";></script><a 
href="https://twitter.com/predictionio"; target="blank"><img alt="PredictionIO 
on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="https://www.facebook.com/predictionio"; target="blank"><img 
alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w
 [n]=w[n]||function(){
+<!DOCTYPE html><html><head><title>Machine Learning With 
PredictionIO</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Machine Learning With 
PredictionIO"/><link rel="canonical" 
href="https://predictionio.apache.org/demo/supervisedlearning/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src="/
 
/cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.apache.org/";><img alt="Apache 
PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div 
id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img 
class="mobile-search-bar-toggler hidden-md hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="col-md-9 col-s
 m-11 col-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning 
With PredictionIO</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO 
Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img 
id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div 
class="r
 ow"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li 
class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO® 
Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Getting 
Started</span></a><ul><li class="level-2"><a class="final" 
href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a 
class="final" href="/install/"><span>Installing Apache 
PredictionIO</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Integrating with Your App</span></
 a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App 
Integration Overview</span></a></li><li class="level-2"><a class="expandible" 
href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a 
class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/batchpredict/"><span>Batch Predictions</span></a></li><li 
class="level-2"><a class="
 final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li 
class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting 
Engine Parameters</span></a></li><li class="level-2"><a class="final" 
href="/deploy/enginevariants/"><span>Deploying Multiple Engine 
Variants</span></a></li><li class="level-2"><a class="final" 
href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Customizing an 
Engine</span></a><ul><li class="level-2"><a class="final" 
href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a 
class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#">
 <span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a 
class="final" href="/datacollection/"><span>Event Server 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/plugin/"><span>Event Server 
Plugin</span></a></li></ul></li><li class="level-1"><a class="e
 xpandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li 
class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm 
Libraries</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/switch/"><span>Switching to Another 
Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metri
 cchoose/"><span>Choosing Evaluation Metrics</span></a></li><li 
class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building 
Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>System Architecture</span></a><ul><li 
class="level-2"><a class="final" href="/system/"><span>Architecture 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/system/anotherdatastore/"><span>Using Another Data 
Store</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>PredictionIO® Official Templates</span></a><ul><li 
class="level-2"><a class="final" 
href="/templates/"><span>Intro</span></a></li><li class="level-2"><a 
class="expandible" href="#"><span>Recommendation</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/recommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/dase/"><span>DASE</span></a></li><li cl
 ass="level-3"><a class="final" 
href="/templates/recommendation/evaluation/"><span>Evaluation 
Explained</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/recommendation/reading-custom-events/"><span>Read Custom 
Events</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-data-prep/"><span>Customize Data 
Preparator</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-serving/"><span>Customize 
Serving</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/training-with-implicit-preference/"><span>Train 
with Implicit Preference</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/blacklist-items/"><span>Filter Recommended 
Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommen
 dation/batch-evaluator/"><span>Batch Persistable 
Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a 
class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li
 class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train 
with Rate Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust 
Score</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Similar Product</span></a><ul><li class="level-3"><a 
class="final" href="/templates/similarproduct/quickstart/"><span>Quick Sta
 rt</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple 
Events and Multiple Algorithms</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/return-item-properties/"><span>Returns Item 
Properties</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate 
Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events 
for Users</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/recommended-user/"><span>Recommend 
Users</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Classifi
 cation</span></a><ul><li class="level-3"><a class="final" 
href="/templates/classification/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/add-algorithm/"><span>Use Alternative 
Algorithm</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/reading-custom-properties/"><span>Read Custom 
Properties</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li class="level-1"><a class="
 expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a 
class="final" href="/demo/tapster/"><span>Comics Recommendation 
Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/community/"><span>Community Contributed Demo</span></a></li><li 
class="level-2"><a class="final" href="/demo/textclassification/"><span>Text 
Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li 
class="level-2"><a class="final" 
href="/community/contribute-code/"><span>Contribute Code</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2"><a class
 ="final" href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/resources/release/"><span>Release 
Cadence</span></a></li><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li 
class="level-1"><a class="expand
 ible" href="#"><span>Apache Software Foundation</span></a><ul><li 
class="level-2"><a class="final" href="https://www.apache.org/";><span>Apache 
Homepage</span></a></li><li class="level-2"><a class="final" 
href="https://www.apache.org/licenses/";><span>License</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/foundation/sponsorship.html";><span>Sponsorship</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/foundation/thanks.html";><span>Thanks</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/security/";><span>Security</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="page-title"><h1>Machine Learning With 
PredictionIO</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
page</h5><aside id="table-of-contents"><ul> <li> <a 
href="#introduction-to-supervised-learning">Introduction to Supervised 
Learning</a> </li>
  <li> <a href="#predictionio-and-supervised-learning">PredictionIO and 
Supervised Learning</a> </li> </ul> </aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/demo/supervisedlearning.html.md";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine 
Learning With PredictionIO</h1></div></div><div class="content"> <p>This guide 
is designed to give developers a brief introduction to fundamental concepts in 
machine learning, as well as an explanation of how these concept tie into 
PredictionIO&#39;s engine development platform. This particular guide will 
largely deal with giving some</p><h2 id='introduction-to-supervised-learning' 
class='header-anchors'>Introduction to Supervised Learning</h2><p>The first 
question we must ask is: what is machine learning? <strong>Machine 
learning</strong> is the field of study at the intersection 
 of computer science, engineering, mathematics, and statistics which seeks to 
discover or infer patterns hidden within a set of observations, which we call 
our data. Some examples of problems that machine learning seeks to solve 
are:</p> <ul> <li>Predict whether a patient has breast cancer based on their 
mammogram results.</li> <li>Predict whether an e-mail is spam or not based on 
the e-mail&#39;s content.</li> <li>Predict today&#39;s temperature based on 
climate variables collected for the previous week.</li> </ul> <h3 
id='thinking-about-data' class='header-anchors'>Thinking About Data</h3><p>In 
the latter examples, we are trying to predict an outcome \(Y\), or 
<strong>response</strong>, based on some recorded or observed variables \(X\), 
or <strong>features</strong>. For example: in the third problem each 
observation is a patient, the response variable \(Y\) is equal to 1 if this 
patient has breast cancer and 0 otherwise, and \(X\) represents the mammogram 
results.</p><p>When we sa
 y we want to predict \(Y\) using \(X\), we are trying to answer the question: 
how does a response \(Y\) depend on a set of features \(X\) affect the response 
\(Y\)? To do this we need a set of observations, which we call our 
<strong>training data</strong>, consisting of observations for which we have 
observed both \(Y\) and \(X\), in order to make inference about this 
relationship.</p><h3 id='different-types-of-supervised-learning-problems' 
class='header-anchors'>Different Types of Supervised Learning 
Problems</h3><p>Note that in the first two examples, the outcome \(Y\) can only 
take on two values (1 : cancer/spam, 0: no cancer/ no spam). Whenever the 
outcome variable \(Y\) denotes a label associated to a particular group of 
observations (i.e. cancer group), the <strong>supervised learning</strong> 
problem is also called a <strong>classification</strong> problem. In the third 
example, however, \(Y\) can take on any numerical value since it denotes some 
temperature reading (i.e. 25.
 143, 25.14233, 32.0). These types of supervised learning problems are also 
called <strong>regression</strong> problems.</p><h3 
id='training-a-predictive-model' class='header-anchors'>Training a Predictive 
Model</h3><p>A predictive model should be thought of as a function \(f\) that 
takes as input a set of features, and outputs a predicted outcome (i.e. \(f(X) 
= Y\)). The phrase <strong>training a model</strong> simply refers to the 
process of using the training data to estimate such a function. </p><h2 
id='predictionio-and-supervised-learning' class='header-anchors'>PredictionIO 
and Supervised Learning</h2><p>Machine learning methods generally assume that 
our observation responses and features are numeric vectors. We will say that 
observations in this format are in <strong>standard form</strong>. However, 
when you are working with real-life data this will often not be the case. The 
data will often be formatted in a manner that is specific to the 
application&#39;s needs. As an exampl
 e, let&#39;s suppose our application is <a 
href="http://stackoverflow.com";>StackOverFlow</a>. The data we want to analyze 
are questions, and we want to predict based on a question&#39;s content whether 
or not it is related to Scala.</p><p><strong>Self-check:</strong> Is this a 
classification or regression problem?</p><h3 
id='thinking-about-data-with-predictionio' class='header-anchors'>Thinking 
About Data With PredictionIO</h3><p>PredictionIO&#39;s predictive engine 
development platform allows you to easily incorporate observations that are not 
in standard form. Continuing with our example, we can import the observations, 
or StackOverFlow questions, into <a href="/datacollection/">PredictionIO&#39;s 
Event Server</a> as events with the following 
properties:</p><p><code>properties = {question : String, topic : 
String}</code></p><p>The value <code>question</code> is the actual question 
stored as a <code>String</code>, and topic is also a string equal to either 
<code>&quot;Scala&quot;</
 code> or <code>&quot;Other&quot;</code>. Our outcome here is 
<code>topic</code>, and <code>question</code> will provide a source for 
extracting features. That is, we will be using <code>question</code> to predict 
the outcome <code>topic</code>.</p><p>Once the observations are loaded as 
events into the Event Server, the engine&#39;s <a href="/customize/">Data 
Source</a> component is able to read them, which allows you to treat them as 
objects in a Scala project. The engine&#39;s Preparator component is in charge 
of converting these observations into standard form. To do this, we can first 
map the topic values as follows:</p><p><code>Map(&quot;Other&quot; -&gt; 0, 
&quot;Scala&quot; -&gt; 1)</code>.</p><p>We can then vectorize the 
observation&#39;s associated question text to obtain a numeric feature vector 
for each of our observations. This text vectorization procedure is an example 
of a general concept in machine learning called <strong>feature 
extraction</strong>. After performing t
 hese transformations of our observations, they are now in standard form and 
can be used for training a large quantity of machine learning models.</p><h3 
id='training-the-model-with-predictionio' class='header-anchors'>Training the 
Model With PredictionIO</h3><p>The Algorithm engine component serves two 
purposes: outputting a predictive model \(f\) and using this to predict the 
outcome variable. Here \(f\) takes as input a vectorized question and outputs 
either 0 or 1. However, our <code>Query</code> input will be again a question, 
and our <code>PredictedResult</code> the topic associated to the predicted 
label (0 or 1):</p><p><code>Query = {question : String}</code> 
<code>PredictedResult = {topic : String}</code></p><p>With PredictionIO&#39;s 
engine development platform, you can easily automate the vectorization of the 
Query question, as well as mapping the predicted label to the appropriate topic 
output format.</p></div></div></div></div><footer><div class="container"><div 
class="s
 eperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//predictionio.apache.org/install/" 
target="blank">Download</a></li><li><a href="//predictionio.apache.org/" 
target="blank">Docs</a></li><li><a href="//github.com/apache/predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:[email protected]"; target="blank">Subscribe 
to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/predictionio" target="blank">Source 
Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" 
target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@pred
 ictionio.apache.org" target="blank">Subscribe to Development Mailing 
List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 
footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache 
feather logo, and the Apache PredictionIO project logo are either registered 
trademarks or trademarks of The Apache Software Foundation in the United States 
and other countries.</p><p>All other marks mentioned may be trademarks or 
registered trademarks of their respective owners.</p></div></div></div><div 
id="footer-bottom"><div class="container"><div class="row"><div 
class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/predictionio"; data-style="mega" 
data-count-href="/apache/predictionio/stargazers" 
data-count-api="/repos/apache/predictionio#stargazers_count" 
data-count-aria-label="# stargazers 
 on GitHub" aria-label="Star apache/predictionio on GitHub">Star</a> <a 
class="github-button" href="https://github.com/apache/predictionio/fork"; 
data-icon="octicon-git-branch" data-style="mega" 
data-count-href="/apache/predictionio/network" 
data-count-api="/repos/apache/predictionio#forks_count" 
data-count-aria-label="# forks on GitHub" aria-label="Fork apache/predictionio 
on GitHub">Fork</a> <script id="github-bjs" async="" defer="" 
src="https://buttons.github.io/buttons.js";></script><a 
href="https://twitter.com/predictionio"; target="blank"><img alt="PredictionIO 
on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="https://www.facebook.com/predictionio"; target="blank"><img 
alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
 (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
 
e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
 })(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');

Reply via email to