incubator-predictionio#0acaa004debb0835880304fe2f173132a74f1498

git-site-role Mon, 16 Oct 2017 14:08:49 -0700

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/1b72b303/evaluation/metricchoose/index.html
----------------------------------------------------------------------
diff --git a/evaluation/metricchoose/index.html 
b/evaluation/metricchoose/index.html
new file mode 100644
index 0000000..51a471b
--- /dev/null
+++ b/evaluation/metricchoose/index.html
@@ -0,0 +1,6 @@
+<!DOCTYPE html><html><head><title>Choosing Evaluation Metrics</title><meta 
charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Choosing Evaluation Metrics"/><link 
rel="canonical" 
href="https://predictionio.incubator.apache.org/evaluation/metricchoose/"/><link
 href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src="//cdn
 
.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script
 src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.incubator.apache.org/";><img 
alt="PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a><span>â¢</span></div><div 
id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/incubator-predictionio/">OPEN 
SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md 
hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="col-
 md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Choosing 
Evaluation Metrics</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO 
Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img 
id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div 
class
 ="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li 
class="level-1"><a class="expandible" href="/"><span>Apache PredictionIOâ¢ 
(incubating) Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO 
(incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Started</span></a><ul><li class="level-2"><a 
class="final" href="/start/"><span>A Quick Intro</span></a></li><li 
class="level-2"><a class="final" href="/install/"><span>Installing Apache 
PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"
 ><span>Integrating with Your App</span></a><ul><li class="level-2"><a 
 >class="final" href="/appintegration/"><span>App Integration 
 >Overview</span></a></li><li class="level-2"><a class="expandible" 
 >href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a 
 >class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
 >class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
 >SDK</span></a></li><li class="level-3"><a class="final" 
 >href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
 >class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
 >class="level-3"><a class="final" href="/sdk/community/"><span>Community 
 >Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
 >class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
 >class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
 >Service</span></a></li><li class="level-2"><a class="final" 
 >href="/batchpredict/"><span>Batch Predictions</spa
 n></a></li><li class="level-2"><a class="final" 
href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li 
class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting 
Engine Parameters</span></a></li><li class="level-2"><a class="final" 
href="/deploy/enginevariants/"><span>Deploying Multiple Engine 
Variants</span></a></li><li class="level-2"><a class="final" 
href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Customizing an 
Engine</span></a><ul><li class="level-2"><a class="final" 
href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a 
class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class=
 "level-1"><a class="expandible" href="#"><span>Collecting and Analyzing 
Data</span></a><ul><li class="level-2"><a class="final" 
href="/datacollection/"><span>Event Server Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/plugin/"><span>Event Server Plugin</span></a></li>
 </ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an 
Algorithm(s)</span></a><ul><li class="level-2"><a class="final" 
href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li 
class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to 
Another Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"
 ><a class="final active" href="/evaluation/metricchoose/"><span>Choosing 
 >Evaluation Metrics</span></a></li><li class="level-2"><a class="final" 
 >href="/evaluation/metricbuild/"><span>Building Evaluation 
 >Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" 
 >href="#"><span>System Architecture</span></a><ul><li class="level-2"><a 
 >class="final" href="/system/"><span>Architecture Overview</span></a></li><li 
 >class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using 
 >Another Data Store</span></a></li></ul></li><li class="level-1"><a 
 >class="expandible" href="#"><span>PredictionIO Official 
 >Templates</span></a><ul><li class="level-2"><a class="final" 
 >href="/templates/"><span>Intro</span></a></li><li class="level-2"><a 
 >class="expandible" href="#"><span>Recommendation</span></a><ul><li 
 >class="level-3"><a class="final" 
 >href="/templates/recommendation/quickstart/"><span>Quick 
 >Start</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/recommen
 dation/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/evaluation/"><span>Evaluation 
Explained</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/recommendation/reading-custom-events/"><span>Read Custom 
Events</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-data-prep/"><span>Customize Data 
Preparator</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/customize-serving/"><span>Customize 
Serving</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/training-with-implicit-preference/"><span>Train 
with Implicit Preference</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/blacklist-items/"><span>Filter Recommended 
Items by Blacklist in Query</span></a></li><li class="level
 -3"><a class="final" 
href="/templates/recommendation/batch-evaluator/"><span>Batch Persistable 
Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a 
class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li
 class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train 
with Rate Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust 
Score</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Similar Product</span></a><ul><li class="level-3"><a 
class="final" href="/templat
 es/similarproduct/quickstart/"><span>Quick Start</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple 
Events and Multiple Algorithms</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/return-item-properties/"><span>Returns Item 
Properties</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate 
Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events 
for Users</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/recommended-user/"><span>Recommend 
Users</span></a></li></ul></li><li class="level-2"
 ><a class="expandible" href="#"><span>Classification</span></a><ul><li 
 >class="level-3"><a class="final" 
 >href="/templates/classification/quickstart/"><span>Quick 
 >Start</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/classification/dase/"><span>DASE</span></a></li><li 
 >class="level-3"><a class="final" 
 >href="/templates/classification/how-to/"><span>How-To</span></a></li><li 
 >class="level-3"><a class="final" 
 >href="/templates/classification/add-algorithm/"><span>Use Alternative 
 >Algorithm</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/classification/reading-custom-properties/"><span>Read Custom 
 >Properties</span></a></li></ul></li></ul></li><li class="level-1"><a 
 >class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
 >class="level-2"><a class="final" 
 >href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="/community/submit-template/"><span>Submit your Engine as a 
 >Template</span></a
 ></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo 
 >Tutorials</span></a><ul><li class="level-2"><a class="final" 
 >href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li 
 >class="level-2"><a class="final" href="/demo/community/"><span>Community 
 >Contributed Demo</span></a></li><li class="level-2"><a class="final" 
 >href="/demo/textclassification/"><span>Text Classification Engine 
 >Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" 
 >href="/community/"><span>Getting Involved</span></a><ul><li 
 >class="level-2"><a class="final" 
 >href="/community/contribute-code/"><span>Contribute Code</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="/community/contribute-documentation/"><span>Contribute 
 >Documentation</span></a></li><li class="level-2"><a class="final" 
 >href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="/community/contribute-webhook/"><span>Contribute a Webho
 ok</span></a></li><li class="level-2"><a class="final" 
href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/resources/release/"><span>Release 
Cadence</span></a></li><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li>
 </ul></li><li class="level-1"><a class="expandible" href="#"><span>Apache 
Software Foundation</span></a><ul><li class="level-2"><a class="final" 
href="https://www.apache.org/";><span>Apache Homepage</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/licenses/";><span>License</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/foundation/sponsorship.html";><span>Sponsorship</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/foundation/thanks.html";><span>Thanks</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/security/";><span>Security</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a 
href="#">ML Tuning and Evaluation</a><span 
class="spacer">&gt;</span></li><li><span class="last">Choosing Evaluation 
Metrics</span></li></ul></div><div id="p
 age-title"><h1>Choosing Evaluation Metrics</h1></div></div><div 
id="table-of-content-wrapper"><h5>On this page</h5><aside 
id="table-of-contents"><ul> <li> <a href="#defining-metric">Defining Metric</a> 
</li> <li> <a href="#common-metrics">Common Metrics</a> </li> </ul> 
</aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/evaluation/metricchoose.html.md";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" 
class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and 
Evaluation</a><span class="spacer">&gt;</span></li><li><span 
class="last">Choosing Evaluation Metrics</span></li></ul></div><div 
id="page-title"><h1>Choosing Evaluation Metrics</h1></div></div><div 
class="content"> <p>The <a href="/evaluation/paramtuning/">hyperparameter 
tuning module</a> allows us to select the optimal engine parameter defined by a 
<code>Metric
 </code>. <code>Metric</code> determines the quality of an engine variant. We 
have skimmmed through the process of choosing the right <code>Metric</code> in 
previous sections.</p><p>This secion discusses basic evaluation metrics 
commonly used for classification problems. If you are more interested in 
knowing how to <em>implement</em> a custom metric, please skip to <a 
href="/evaluation/metricbuild/">the next section</a>.</p><h2 
id='defining-metric' class='header-anchors'>Defining Metric</h2><p>Metric 
evaluates the quality of an engine by comparing engine&#39;s output (predicted 
result) with the original label (actual result). A engine serving better 
prediction should yield a higher metric score, the tuning module returns the 
engine parameter with the highest score. It is sometimes called <a 
href="http://en.wikipedia.org/wiki/Loss_function";><em>loss function</em></a> in 
literature, where the goal is to minimize the loss function.</p><p>During 
tuning, it is important for us to understa
 nd the definition of the metric, to make sure it is aligned with the 
prediction engine&#39;s goal.</p><p>In the classificaiton template, we use 
<em>Accuracy</em> as our metric. <em>Accuracy</em> is defined as: the 
percentage of queries which the engine is able to predict the correct 
label.</p><h2 id='common-metrics' class='header-anchors'>Common 
Metrics</h2><p>We illustrate the choice of metric with the following confusion 
matrix. Row represents the engine predicted label, column represents the acutal 
label. The second row means that of the 200 testing data points, the engine 
predicted 60 (15 + 35 + 10) of them as label 2.0, among which 35 are correct 
prediction (i.e. actual label is 2.0, matches with the prediction), and 25 are 
wrong.</p> <table><thead> <tr> <th style="text-align: center"></th> <th 
style="text-align: center">Actual = 1.0</th> <th style="text-align: 
center">Actual = 2.0</th> <th style="text-align: center">Actual = 3.0</th> 
</tr> </thead><tbody> <tr> <td style="text-
 align: center"><strong>Predicted = 1.0</strong></td> <td style="text-align: 
center">30</td> <td style="text-align: center">0</td> <td style="text-align: 
center">60</td> </tr> <tr> <td style="text-align: center"><strong>Predicted = 
2.0</strong></td> <td style="text-align: center">15</td> <td style="text-align: 
center">35</td> <td style="text-align: center">10</td> </tr> <tr> <td 
style="text-align: center"><strong>Predicted = 3.0</strong></td> <td 
style="text-align: center">0</td> <td style="text-align: center">0</td> <td 
style="text-align: center">50</td> </tr> </tbody></table> <h3 id='accuracy' 
class='header-anchors'>Accuracy</h3><p>Accuracy means that how many data points 
are predicted correctly. It is one of the simplest form of evaluation metrics. 
The accuracy score is # of correct points / # total = (30 + 35 + 50) / 200 = 
0.575.</p><h3 id='precision' class='header-anchors'>Precision</h3><p>Precision 
is a metric for binary classifier which measures the correctness among all posit
 ive labels. A binary classifier gives only two output values (i.e. positive 
and negative). For problem where there are multiple values (3 in our example), 
we first have to tranform our problem into a binary classification problem. For 
example, we can have problem whether label = 1.0. The confusion matrix now 
becomes:</p> <table><thead> <tr> <th style="text-align: center"></th> <th 
style="text-align: center">Actual = 1.0</th> <th style="text-align: 
center">Actual != 1.0</th> </tr> </thead><tbody> <tr> <td style="text-align: 
center"><strong>Predicted = 1.0</strong></td> <td style="text-align: 
center">30</td> <td style="text-align: center">60</td> </tr> <tr> <td 
style="text-align: center"><strong>Predicted != 1.0</strong></td> <td 
style="text-align: center">15</td> <td style="text-align: center">95</td> </tr> 
</tbody></table> <p>Precision is the ratio between the number of correct 
positive answer (true positive) and the sum of correct positive answer (true 
positive) and wrong but posit
 ively labeled answer (false positive). In this case, the precision is 30 / (30 
+ 60) = ~0.3333.</p><h3 id='recall' class='header-anchors'>Recall</h3><p>Recall 
is a metric for binary classifier which measures how many positive labels are 
successfully predicted amongst all positive labels. Formally, it is the ratio 
between the number of correct positive answer (true positive) and the sum of 
correct positive answer (true positive) and wrongly negatively labeled asnwer 
(false negative). In this case, the recall is 30 / (30 + 15) = 
~0.6667.</p><p>As we have discussed several common metrics for classification 
problem, we can implement them using the <code>Metric</code> class in <a 
href="/evaluation/metricbuild">the next 
section</a>.</p></div></div></div></div><footer><div class="container"><div 
class="seperator"></div><div class="row"><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//predictionio.incubator.apache.org/install/
 " target="blank">Download</a></li><li><a 
href="//predictionio.incubator.apache.org/" target="blank">Docs</a></li><li><a 
href="//github.com/apache/incubator-predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:[email protected]"; 
target="blank">Subscribe to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.incubator.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/incubator-predictionio" target="blank">Source 
Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" 
target="blank">Bug Tracker</a></li><li><a 
href="mailto:[email protected]"; 
target="blank">Subscribe to Development Mailing 
List</a></li></ul></div></div></div><div class="ro
 w"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, 
PredictionIO, Apache, the Apache feather logo, and the Apache PredictionIO 
project logo are either registered trademarks or trademarks of The Apache 
Software Foundation in the United States and other countries.</p><p>All other 
marks mentioned may be trademarks or registered trademarks of their respective 
owners.</p></div></div><div class="row"><div class="col-md-12 
footer-link-column"><a class="pull-right" 
href="http://incubator.apache.org/projects/predictionio.html";><img alt="Apache 
Incubator" src="/images/logos/apache_incubator-6954bd16.png"/></a><span>Apache 
PredictionIO is an effort undergoing incubation at The Apache Software 
Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of 
all newly accepted projects until a further review indicates that the 
infrastructure, communications, and decision making process have stabilized in 
a manner consistent with other successful ASF projects. While
  incubation status is not necessarily a reflection of the completeness or 
stability of the code, it does indicate that the project has yet to be fully 
endorsed by the ASF.</span></div></div></div><div id="footer-bottom"><div 
class="container"><div class="row"><div class="col-md-12"><div 
id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/><span>â¢</span></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/incubator-predictionio"; data-style="mega" 
data-count-href="/apache/incubator-predictionio/stargazers" 
data-count-api="/repos/apache/incubator-predictionio#stargazers_count" 
data-count-aria-label="# stargazers on GitHub" aria-label="Star 
apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" 
href="https://github.com/apache/incubator-predictionio/fork"; 
data-icon="octicon-git-branch" data-style="mega" 
data-count-href="/apache/incubator-predictionio/network" 
data-count-api="/repos/apa
 che/incubator-predictionio#forks_count" data-count-aria-label="# forks on 
GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> 
<script id="github-bjs" async="" defer="" 
src="https://buttons.github.io/buttons.js";></script><a 
href="https://twitter.com/predictionio"; target="blank"><img alt="PredictionIO 
on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="https://www.facebook.com/predictionio"; target="blank"><img 
alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script 
src="/javascripts/application-a6acb1f5.js"></script></body></html>
\ No newline at end of file


http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/1b72b303/evaluation/metricchoose/index.html.gz
----------------------------------------------------------------------
diff --git a/evaluation/metricchoose/index.html.gz 
b/evaluation/metricchoose/index.html.gz
new file mode 100644
index 0000000..a5e6e12
Binary files /dev/null and b/evaluation/metricchoose/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/1b72b303/evaluation/paramtuning/index.html
----------------------------------------------------------------------
diff --git a/evaluation/paramtuning/index.html 
b/evaluation/paramtuning/index.html
new file mode 100644
index 0000000..84982dd
--- /dev/null
+++ b/evaluation/paramtuning/index.html
@@ -0,0 +1,393 @@
+<!DOCTYPE html><html><head><title>Hyperparameter Tuning</title><meta 
charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Hyperparameter Tuning"/><link 
rel="canonical" 
href="https://predictionio.incubator.apache.org/evaluation/paramtuning/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src="//cdn.mathjax.org/
 mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script 
src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.incubator.apache.org/";><img 
alt="PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a><span>â¢</span></div><div 
id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/incubator-predictionio/">OPEN 
SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md 
hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-1
 1 col-xs-11"><div class="hidden-md hidden-lg" 
id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Hyperparameter 
Tuning</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO 
Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img 
id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div 
class="row"><div id="lef
 t-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a 
class="expandible" href="/"><span>Apache PredictionIOâ¢ (incubating) 
Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO 
(incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Started</span></a><ul><li class="level-2"><a 
class="final" href="/start/"><span>A Quick Intro</span></a></li><li 
class="level-2"><a class="final" href="/install/"><span>Installing Apache 
PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Integrating 
 with Your App</span></a><ul><li class="level-2"><a class="final" 
href="/appintegration/"><span>App Integration Overview</span></a></li><li 
class="level-2"><a class="expandible" href="/sdk/"><span>List of 
SDKs</span></a><ul><li class="level-3"><a class="final" 
href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/batchpredict/"><span>Batch Predictions</span></a></li><li clas
 s="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring 
Engine</span></a></li><li class="level-2"><a class="final" 
href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li 
class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying 
Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" 
href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li 
class="level-1"><a class="expandible" href="#"><span>Customizing an 
Engine</span></a><ul><li class="level-2"><a class="final" 
href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a 
class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class=
 "expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li 
class="level-2"><a class="final" href="/datacollection/"><span>Event Server 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/plugin/"><span>Event Server 
Plugin</span></a></li></ul></li><li class
 ="level-1"><a class="expandible" href="#"><span>Choosing an 
Algorithm(s)</span></a><ul><li class="level-2"><a class="final" 
href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li 
class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to 
Another Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final active" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="f
 inal" href="/evaluation/metricchoose/"><span>Choosing Evaluation 
Metrics</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metricbuild/"><span>Building Evaluation 
Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>System Architecture</span></a><ul><li class="level-2"><a 
class="final" href="/system/"><span>Architecture Overview</span></a></li><li 
class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using 
Another Data Store</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>PredictionIO Official 
Templates</span></a><ul><li class="level-2"><a class="final" 
href="/templates/"><span>Intro</span></a></li><li class="level-2"><a 
class="expandible" href="#"><span>Recommendation</span></a><ul><li 
class="level-3"><a class="final" 
href="/templates/recommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/recommendation/dase/"><span
 >DASE</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/recommendation/evaluation/"><span>Evaluation 
 >Explained</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li 
 >class="level-3"><a class="final" 
 >href="/templates/recommendation/reading-custom-events/"><span>Read Custom 
 >Events</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/recommendation/customize-data-prep/"><span>Customize Data 
 >Preparator</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/recommendation/customize-serving/"><span>Customize 
 >Serving</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/recommendation/training-with-implicit-preference/"><span>Train
 > with Implicit Preference</span></a></li><li class="level-3"><a class="final" 
 >href="/templates/recommendation/blacklist-items/"><span>Filter Recommended 
 >Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final
 " href="/templates/recommendation/batch-evaluator/"><span>Batch Persistable 
Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a 
class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li
 class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train 
with Rate Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust 
Score</span></a></li></ul></li><li class="level-2"><a class="expandible" 
href="#"><span>Similar Product</span></a><ul><li class="level-3"><a 
class="final" href="/templates/similarproduct/q
 uickstart/"><span>Quick Start</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple 
Events and Multiple Algorithms</span></a></li><li class="level-3"><a 
class="final" 
href="/templates/similarproduct/return-item-properties/"><span>Returns Item 
Properties</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate 
Event</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events 
for Users</span></a></li><li class="level-3"><a class="final" 
href="/templates/similarproduct/recommended-user/"><span>Recommend 
Users</span></a></li></ul></li><li class="level-2"><a class="expandib
 le" href="#"><span>Classification</span></a><ul><li class="level-3"><a 
class="final" href="/templates/classification/quickstart/"><span>Quick 
Start</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/dase/"><span>DASE</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/how-to/"><span>How-To</span></a></li><li 
class="level-3"><a class="final" 
href="/templates/classification/add-algorithm/"><span>Use Alternative 
Algorithm</span></a></li><li class="level-3"><a class="final" 
href="/templates/classification/reading-custom-properties/"><span>Read Custom 
Properties</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
class="level-2"><a class="final" 
href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li
  class="level-1"><a class="expandible" href="#"><span>Demo 
Tutorials</span></a><ul><li class="level-2"><a class="final" 
href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li 
class="level-2"><a class="final" href="/demo/community/"><span>Community 
Contributed Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/textclassification/"><span>Text Classification Engine 
Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a 
class="final" href="/community/contribute-code/"><span>Contribute 
Code</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><
 li class="level-2"><a class="final" 
href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" 
href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/resources/release/"><span>Release 
Cadence</span></a></li><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li class
 ="level-1"><a class="expandible" href="#"><span>Apache Software 
Foundation</span></a><ul><li class="level-2"><a class="final" 
href="https://www.apache.org/";><span>Apache Homepage</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/licenses/";><span>License</span></a></li><li 
class="level-2"><a class="final" 
href="https://www.apache.org/foundation/sponsorship.html";><span>Sponsorship</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/foundation/thanks.html";><span>Thanks</span></a></li><li
 class="level-2"><a class="final" 
href="https://www.apache.org/security/";><span>Security</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a 
href="#">ML Tuning and Evaluation</a><span 
class="spacer">&gt;</span></li><li><span class="last">Hyperparameter 
Tuning</span></li></ul></div><div id="page-title"><h1>Hyperparam
 eter Tuning</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
page</h5><aside id="table-of-contents"><ul> <li> <a href="#quick-start">Quick 
Start</a> </li> <li> <a href="#detailed-explanation">Detailed Explanation</a> 
</li> <li> <a href="#the-evaluation-design">The Evaluation Design</a> </li> 
<li> <a href="#evaluation-data-generation">Evaluation Data Generation</a> </li> 
<li> <a href="#evaluation-metrics">Evaluation Metrics</a> </li> <li> <a 
href="#parameters-generation">Parameters Generation</a> </li> <li> <a 
href="#running-the-evaluation">Running the Evaluation</a> </li> <li> <a 
href="#notes">Notes</a> </li> </ul> </aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/evaluation/paramtuning.html.md";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" 
class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning an
 d Evaluation</a><span class="spacer">&gt;</span></li><li><span 
class="last">Hyperparameter Tuning</span></li></ul></div><div 
id="page-title"><h1>Hyperparameter Tuning</h1></div></div><div class="content"> 
<p>A PredictionIO engine is instantiated by a set of parameters. These 
parameters define which algorithm is to be used, as well supply the parameters 
for the algorithm itself. This naturally raises the question of how to choose 
the best set of parameters. The evaluation module streamlines the process of 
<em>tuning</em> the engine to the best parameter set and deploys it.</p><h2 
id='quick-start' class='header-anchors'>Quick Start</h2><p>We demonstrate the 
evaluation with <a href="/templates/classification/quickstart/">the 
classification template</a>. The classification template uses a naive bayesian 
algorithm that has a smoothing parameter. We evaluate the prediction quality 
against different parameter values to find the best parameter values, and then 
deploy it.</p><h3 id='edit-the
 -appid' class='header-anchors'>Edit the AppId</h3><p>Edit 
MyClassification/src/main/scala/<strong><em>Evaluation.scala</em></strong> to 
specify the <em>appId</em> you used to import the data.</p><div 
class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6</pre></td><td class="code"><pre><span class="k">object</span> <span 
class="nc">EngineParamsList</span> <span class="k">extends</span> <span 
class="nc">EngineParamsGenerator</span> <span class="o">{</span>
+  <span class="o">...</span>
+  <span class="k">private</span><span class="o">[</span><span 
class="kt">this</span><span class="o">]</span> <span class="k">val</span> <span 
class="n">baseEP</span> <span class="k">=</span> <span 
class="nc">EngineParams</span><span class="o">(</span>
+    <span class="n">dataSourceParams</span> <span class="k">=</span> <span 
class="nc">DataSourceParams</span><span class="o">(</span><span 
class="n">appId</span> <span class="k">=</span> <span 
class="o">&lt;</span><span class="nc">YOUR_APP_ID</span><span 
class="o">&gt;,</span> <span class="n">evalK</span> <span class="k">=</span> 
<span class="nc">Some</span><span class="o">(</span><span 
class="mi">5</span><span class="o">)))</span>
+  <span class="o">...</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <h3 id='build-and-run-the-evaluation' 
class='header-anchors'>Build and run the evaluation</h3><p>To run an 
evaluation, the command <code>pio eval</code> is used. It takes two mandatory 
parameter, 1. the <code>Evaluation</code> object, which tells PredictionIO the 
engine and metric we use for the evaluation; and 2. the 
<code>EngineParamsGenerator</code>, which contains a list of engine params to 
test against. The following command kickstarts the evaluation workflow for the 
classification template.</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="gp">$ </span>pio build
+...
+<span class="gp">$ </span>pio <span class="nb">eval 
</span>org.template.classification.AccuracyEvaluation <span class="se">\</span>
+    org.template.classification.EngineParamsList
+</pre></td></tr></tbody></table> </div> <p>You will see the following 
output:</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45</pre></td><td class="code"><pre>...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> runEvaluation started
+...
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 0
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:10.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.9281045751633987,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 1
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:100.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.9150326797385621,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 2
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:1000.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.4444444444444444,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Writing 
best variant params to disk...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> Updating evaluation instance with result: 
MetricEvaluatorResult:
+  <span class="c"># engine params evaluated: 3</span>
+Optimal Engine Params:
+  <span class="o">{</span>
+  <span class="s2">"dataSourceParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+      <span class="s2">"appId"</span>:19,
+      <span class="s2">"evalK"</span>:5
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"preparatorParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"algorithmParamsList"</span>:[
+    <span class="o">{</span>
+      <span class="s2">"naive"</span>:<span class="o">{</span>
+        <span class="s2">"lambda"</span>:10.0
+      <span class="o">}</span>
+    <span class="o">}</span>
+  <span class="o">]</span>,
+  <span class="s2">"servingParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+Metrics:
+  org.template.classification.Accuracy: 0.9281045751633987
+The best variant params can be found <span class="k">in </span>best.json
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> runEvaluation completed
+</pre></td></tr></tbody></table> </div> <p>The console prints out the 
evaluation metric score of each engine params, and finally pretty print the 
optimal engine params. Amongst the 3 engine params we evaluate, <em>lambda = 
10.0</em> yields the highest accuracy score of ~0.9281.</p><h3 
id='deploy-the-best-engine-parameter' class='header-anchors'>Deploy the best 
engine parameter</h3><p>The evaluation module also writes out the best engine 
parameter to disk at <code>best.json</code>. We can train and deploy this 
specify engine variant using the extra parameter <code>-v</code>. For 
example:</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6
+7</pre></td><td class="code"><pre><span class="gp">$ </span>pio train -v 
best.json
+...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> Training completed successfully.
+<span class="gp">$ </span>pio deploy -v best.json
+...
+<span class="o">[</span>INFO] <span class="o">[</span>HttpListener] Bound to 
localhost/127.0.0.1:8000
+<span class="o">[</span>INFO] <span class="o">[</span>MasterActor] Bind 
successful. Ready to serve.
+</pre></td></tr></tbody></table> </div> <p>At this point, we have successfully 
deployed the best engine variant we found through the evaluation 
process.</p><h2 id='detailed-explanation' class='header-anchors'>Detailed 
Explanation</h2><p>An engine often depends on a number of parameters, for 
example, the naive bayesian classification algorithm has a smoothing parameter 
to make the model more adaptive to unseen data. Compared with parameters which 
are <em>learnt</em> by the machine learning algorithm, this smoothing parameter 
<em>teaches</em> the algorithm how to work. Therefore, such parameters are 
usually called <em>hyperparameters</em>.</p><p>In PredictionIO, we always take 
a holistic view of an engine. An engine is comprised of a set of 
<strong><em>DAS</em></strong> controllers, as well as the necessary parameters 
for the controllers themselves. In the evaluation, we attempt to find out the 
best hyperparameters for an <em>engine</em>, which we call <strong><em>engine 
params</em></
 strong>. Using engine params we can deploy a complete engine.</p><p>This 
section demonstrates how to select the optimal engine params whilst ensuring 
the model doesn&#39;t overfit using PredictionIO&#39;s evaluation 
module.</p><h2 id='the-evaluation-design' class='header-anchors'>The Evaluation 
Design</h2><p>The PredictionIO evaluation module tests for the best engine 
params for an engine.</p><p>Given a set of engine params, we instantiate an 
engine and evaluate it with existing data. The data is split into two sets, a 
training set and a validation set. The training set is used to train the 
engine, which is deployed using the same steps described in earlier sections. 
We query the engine with the test set data, and compare the predicted values in 
the response with the actual data contained in the validation set. We define a 
<strong><em>metric</em></strong> to compare <strong><em>predicted 
result</em></strong> returned from the engine with the <strong><em>actual 
result</em></strong> w
 hich we obtained from the test data. The goal is to maximize the metric 
score.</p><p>This process is repeated many times with a series of engine 
params. At the end, PredictionIO returns the best engine params.</p><p>We 
demonstrate the evaluation with <a 
href="/templates/classification/quickstart/">the classification 
template</a>.</p><h2 id='evaluation-data-generation' 
class='header-anchors'>Evaluation Data Generation</h2><p>In evaluation data 
generation, the goal is to generate a sequence of (training, validation) data 
tuple. A common way is to use a <em>k-fold</em> generation process. The data 
set is split into <em>k folds</em>. We generate k tuples of training and 
validation sets, for each tuple, the training set takes <em>k - 1</em> of the 
folds and the validation set takes the remaining fold.</p><p>To enable 
evaluation data generation, we need to define the <strong><em>actual 
result</em></strong> and implement the method for generating the (training, 
validation) data tuple.</p><
 h3 id='actual-result' class='header-anchors'>Actual Result</h3><p>In 
MyClassification/src/main/scala/<strong><em>Engine.scala</em></strong>, the 
<code>ActualResult</code> class defines the <strong><em>actual 
result</em></strong>:</p><div class="highlight scala"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
+2
+3</pre></td><td class="code"><pre><span class="k">class</span> <span 
class="nc">ActualResult</span><span class="o">(</span>
+  <span class="k">val</span> <span class="n">label</span><span 
class="k">:</span> <span class="kt">Double</span>
+<span class="o">)</span> <span class="k">extends</span> <span 
class="nc">Serializable</span>
+</pre></td></tr></tbody></table> </div> <p>This class is used to store the 
actual label of the data (contrast to <code>PredictedResult</code> which is 
output of the engine).</p><h3 
id='implement-data-generation-method-in-datasource' 
class='header-anchors'>Implement Data Generation Method in DataSource</h3><p>In 
MyClassification/src/main/scala/<strong><em>DataSource.scala</em></strong>, the 
method <code>readEval</code> reads and selects data from datastore and returns 
a sequence of (training, validation) data.</p><div class="highlight 
scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" 
style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60</pre></td><td class="code"><pre><span class="k">class</span> <span 
class="nc">DataSource</span><span class="o">(</span><span class="k">val</span> 
<span class="n">dsp</span><span class="k">:</span> <span 
class="kt">DataSourceParams</span><span class="o">)</span>
+  <span class="k">extends</span> <span class="nc">PDataSource</span><span 
class="o">[</span><span class="kt">TrainingData</span>, <span 
class="kt">EmptyEvaluationInfo</span>, <span class="kt">Query</span>, <span 
class="kt">ActualResult</span><span class="o">]</span> <span class="o">{</span>
+
+  <span class="o">...</span>
+
+  <span class="k">override</span>
+  <span class="k">def</span> <span class="n">readEval</span><span 
class="o">(</span><span class="n">sc</span><span class="k">:</span> <span 
class="kt">SparkContext</span><span class="o">)</span>
+  <span class="k">:</span> <span class="kt">Seq</span><span 
class="o">[(</span><span class="kt">TrainingData</span>, <span 
class="kt">EmptyEvaluationInfo</span>, <span class="kt">RDD</span><span 
class="o">[(</span><span class="kt">Query</span>, <span 
class="kt">ActualResult</span><span class="o">)])]</span> <span 
class="k">=</span> <span class="o">{</span>
+    <span class="n">require</span><span class="o">(!</span><span 
class="n">dsp</span><span class="o">.</span><span class="n">evalK</span><span 
class="o">.</span><span class="n">isEmpty</span><span class="o">,</span> <span 
class="s">"DataSourceParams.evalK must not be None"</span><span 
class="o">)</span>
+
+    <span class="c1">// The following code reads the data from data store. It 
is equivalent to
+</span>    <span class="c1">// the readTraining method. We copy-and-paste the 
exact code here for
+</span>    <span class="c1">// illustration purpose, a recommended approach is 
to factor out this logic
+</span>    <span class="c1">// into a helper function and have both 
readTraining and readEval call the
+</span>    <span class="c1">// helper.
+</span>    <span class="k">val</span> <span class="n">eventsDb</span> <span 
class="k">=</span> <span class="nc">Storage</span><span class="o">.</span><span 
class="n">getPEvents</span><span class="o">()</span>
+    <span class="k">val</span> <span class="n">labeledPoints</span><span 
class="k">:</span> <span class="kt">RDD</span><span class="o">[</span><span 
class="kt">LabeledPoint</span><span class="o">]</span> <span class="k">=</span> 
<span class="n">eventsDb</span><span class="o">.</span><span 
class="n">aggregateProperties</span><span class="o">(</span>
+      <span class="n">appId</span> <span class="k">=</span> <span 
class="n">dsp</span><span class="o">.</span><span class="n">appId</span><span 
class="o">,</span>
+      <span class="n">entityType</span> <span class="k">=</span> <span 
class="s">"user"</span><span class="o">,</span>
+      <span class="c1">// only keep entities with these required properties 
defined
+</span>      <span class="n">required</span> <span class="k">=</span> <span 
class="nc">Some</span><span class="o">(</span><span class="nc">List</span><span 
class="o">(</span><span class="s">"plan"</span><span class="o">,</span> <span 
class="s">"attr0"</span><span class="o">,</span> <span 
class="s">"attr1"</span><span class="o">,</span> <span 
class="s">"attr2"</span><span class="o">)))(</span><span 
class="n">sc</span><span class="o">)</span>
+      <span class="c1">// aggregateProperties() returns RDD pair of
+</span>      <span class="c1">// entity ID and its aggregated properties
+</span>      <span class="o">.</span><span class="n">map</span> <span 
class="o">{</span> <span class="k">case</span> <span class="o">(</span><span 
class="n">entityId</span><span class="o">,</span> <span 
class="n">properties</span><span class="o">)</span> <span class="k">=&gt;</span>
+        <span class="k">try</span> <span class="o">{</span>
+          <span class="nc">LabeledPoint</span><span class="o">(</span><span 
class="n">properties</span><span class="o">.</span><span 
class="n">get</span><span class="o">[</span><span class="kt">Double</span><span 
class="o">](</span><span class="s">"plan"</span><span class="o">),</span>
+            <span class="nc">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="o">(</span><span 
class="nc">Array</span><span class="o">(</span>
+              <span class="n">properties</span><span class="o">.</span><span 
class="n">get</span><span class="o">[</span><span class="kt">Double</span><span 
class="o">](</span><span class="s">"attr0"</span><span class="o">),</span>
+              <span class="n">properties</span><span class="o">.</span><span 
class="n">get</span><span class="o">[</span><span class="kt">Double</span><span 
class="o">](</span><span class="s">"attr1"</span><span class="o">),</span>
+              <span class="n">properties</span><span class="o">.</span><span 
class="n">get</span><span class="o">[</span><span class="kt">Double</span><span 
class="o">](</span><span class="s">"attr2"</span><span class="o">)</span>
+            <span class="o">))</span>
+          <span class="o">)</span>
+        <span class="o">}</span> <span class="k">catch</span> <span 
class="o">{</span>
+          <span class="k">case</span> <span class="n">e</span><span 
class="k">:</span> <span class="kt">Exception</span> <span 
class="o">=&gt;</span> <span class="o">{</span>
+            <span class="n">logger</span><span class="o">.</span><span 
class="n">error</span><span class="o">(</span><span class="n">s</span><span 
class="s">"Failed to get properties ${properties} of"</span> <span 
class="o">+</span>
+              <span class="n">s</span><span class="s">" ${entityId}. 
Exception: ${e}."</span><span class="o">)</span>
+            <span class="k">throw</span> <span class="n">e</span>
+          <span class="o">}</span>
+        <span class="o">}</span>
+      <span class="o">}.</span><span class="n">cache</span><span 
class="o">()</span>
+    <span class="c1">// End of reading from data store
+</span>
+    <span class="c1">// K-fold splitting
+</span>    <span class="k">val</span> <span class="n">evalK</span> <span 
class="k">=</span> <span class="n">dsp</span><span class="o">.</span><span 
class="n">evalK</span><span class="o">.</span><span class="n">get</span>
+    <span class="k">val</span> <span class="n">indexedPoints</span><span 
class="k">:</span> <span class="kt">RDD</span><span class="o">[(</span><span 
class="kt">LabeledPoint</span>, <span class="kt">Long</span><span 
class="o">)]</span> <span class="k">=</span> <span 
class="n">labeledPoints</span><span class="o">.</span><span 
class="n">zipWithIndex</span>
+
+    <span class="o">(</span><span class="mi">0</span> <span 
class="n">until</span> <span class="n">evalK</span><span 
class="o">).</span><span class="n">map</span> <span class="o">{</span> <span 
class="n">idx</span> <span class="k">=&gt;</span>
+      <span class="k">val</span> <span class="n">trainingPoints</span> <span 
class="k">=</span> <span class="n">indexedPoints</span><span 
class="o">.</span><span class="n">filter</span><span class="o">(</span><span 
class="k">_</span><span class="o">.</span><span class="n">_2</span> <span 
class="o">%</span> <span class="n">evalK</span> <span class="o">!=</span> <span 
class="n">idx</span><span class="o">).</span><span class="n">map</span><span 
class="o">(</span><span class="k">_</span><span class="o">.</span><span 
class="n">_1</span><span class="o">)</span>
+      <span class="k">val</span> <span class="n">testingPoints</span> <span 
class="k">=</span> <span class="n">indexedPoints</span><span 
class="o">.</span><span class="n">filter</span><span class="o">(</span><span 
class="k">_</span><span class="o">.</span><span class="n">_2</span> <span 
class="o">%</span> <span class="n">evalK</span> <span class="o">==</span> <span 
class="n">idx</span><span class="o">).</span><span class="n">map</span><span 
class="o">(</span><span class="k">_</span><span class="o">.</span><span 
class="n">_1</span><span class="o">)</span>
+
+      <span class="o">(</span>
+        <span class="k">new</span> <span class="nc">TrainingData</span><span 
class="o">(</span><span class="n">trainingPoints</span><span class="o">),</span>
+        <span class="k">new</span> <span 
class="nc">EmptyEvaluationInfo</span><span class="o">(),</span>
+        <span class="n">testingPoints</span><span class="o">.</span><span 
class="n">map</span> <span class="o">{</span>
+          <span class="n">p</span> <span class="k">=&gt;</span> <span 
class="o">(</span><span class="k">new</span> <span class="nc">Query</span><span 
class="o">(</span><span class="n">p</span><span class="o">.</span><span 
class="n">features</span><span class="o">.</span><span 
class="n">toArray</span><span class="o">),</span> <span class="k">new</span> 
<span class="nc">ActualResult</span><span class="o">(</span><span 
class="n">p</span><span class="o">.</span><span class="n">label</span><span 
class="o">))</span>
+        <span class="o">}</span>
+      <span class="o">)</span>
+    <span class="o">}</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>The <code>readEval</code> method 
returns a sequence of (<code>TrainingData</code>, <code>EvaluationInfo</code>, 
<code>RDD[(Query, ActualResult)]</code>. <code>TrainingData</code> is the same 
class we use for deploy, <code>RDD[(Query, ActualResult)]</code> is the 
validation set, <code>EvaluationInfo</code> can be used to hold some global 
evaluation data ; it is not used in the current example.</p><p>Lines 11 to 41 
is the logic of reading and transforming data from the datastore; it is 
equvialent to the existing <code>readTraining</code> method. After line 41, the 
variable <code>labeledPoints</code> contains the complete dataset with which we 
use to generate the (training, validation) sequence.</p><p>Lines 43 to 57 is 
the <em>k-fold</em> logic. Line 45 gives each data point a unique id, and we 
decide whether the point belongs to the training or validation set depends on 
the <em>mod</em> of the id (lines 48 to 49). For each point in the valida
 tion set, we construct the <code>Query</code> and <code>ActualResult</code> 
(line 55) which is used validate the engine.</p><h2 id='evaluation-metrics' 
class='header-anchors'>Evaluation Metrics</h2><p>We define a 
<code>Metric</code> which gives a <em>score</em> to engine params. The higher 
the score, the better the engine params are. In this template, we use accuray 
score which measures the portion of correct prediction among all data 
points.</p><p>In 
MyClassification/src/main/scala/<strong>Evaluation.scala</strong>, the class 
<code>Accuracy</code> implements the <em>accuracy</em> score. It extends a base 
helper class <code>AverageMetric</code> which calculates the average score 
overall <em>(Query, PredictionResult, ActualResult)</em> tuple.</p><div 
class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5</pre></td><td class="code"><pre><span class="k">case</span> <span 
class="k">class</span> <span class="nc">Accuracy</span>
+  <span class="k">extends</span> <span class="nc">AverageMetric</span><span 
class="o">[</span><span class="kt">EmptyEvaluationInfo</span>, <span 
class="kt">Query</span>, <span class="kt">PredictedResult</span>, <span 
class="kt">ActualResult</span><span class="o">]</span> <span class="o">{</span>
+  <span class="k">def</span> <span class="n">calculate</span><span 
class="o">(</span><span class="n">query</span><span class="k">:</span> <span 
class="kt">Query</span><span class="o">,</span> <span 
class="n">predicted</span><span class="k">:</span> <span 
class="kt">PredictedResult</span><span class="o">,</span> <span 
class="n">actual</span><span class="k">:</span> <span 
class="kt">ActualResult</span><span class="o">)</span>
+  <span class="k">:</span> <span class="kt">Double</span> <span 
class="o">=</span> <span class="o">(</span><span class="k">if</span> <span 
class="o">(</span><span class="n">predicted</span><span class="o">.</span><span 
class="n">label</span> <span class="o">==</span> <span 
class="n">actual</span><span class="o">.</span><span 
class="n">label</span><span class="o">)</span> <span class="mf">1.0</span> 
<span class="k">else</span> <span class="mf">0.0</span><span class="o">)</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>Then, implement a 
<code>Evaluation</code> object to define the engine and metric used in this 
evaluation.</p><div class="highlight scala"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3</pre></td><td class="code"><pre><span class="k">object</span> <span 
class="nc">AccuracyEvaluation</span> <span class="k">extends</span> <span 
class="nc">Evaluation</span> <span class="o">{</span>
+  <span class="n">engineMetric</span> <span class="k">=</span> <span 
class="o">(</span><span class="nc">ClassificationEngine</span><span 
class="o">(),</span> <span class="k">new</span> <span 
class="nc">Accuracy</span><span class="o">())</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <h2 id='parameters-generation' 
class='header-anchors'>Parameters Generation</h2><p>The last component is to 
specify the list of engine params we want to evaluate. In this guide, we 
discuss the simplest method. We specify an explicit list of engine params to be 
evaluated.</p><p>In 
MyClassification/src/main/scala/<strong>Evaluation.scala</strong>, the object 
<code>EngineParamsList</code> specifies the engine params list to be 
used.</p><div class="highlight scala"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17</pre></td><td class="code"><pre><span class="k">object</span> <span 
class="nc">EngineParamsList</span> <span class="k">extends</span> <span 
class="nc">EngineParamsGenerator</span> <span class="o">{</span>
+  <span class="c1">// Define list of EngineParams used in Evaluation
+</span>
+  <span class="c1">// First, we define the base engine params. It specifies 
the appId from which
+</span>  <span class="c1">// the data is read, and a evalK parameter is used 
to define the
+</span>  <span class="c1">// cross-validation.
+</span>  <span class="k">private</span><span class="o">[</span><span 
class="kt">this</span><span class="o">]</span> <span class="k">val</span> <span 
class="n">baseEP</span> <span class="k">=</span> <span 
class="nc">EngineParams</span><span class="o">(</span>
+    <span class="n">dataSourceParams</span> <span class="k">=</span> <span 
class="nc">DataSourceParams</span><span class="o">(</span><span 
class="n">appId</span> <span class="k">=</span> <span class="mi">18</span><span 
class="o">,</span> <span class="n">evalK</span> <span class="k">=</span> <span 
class="nc">Some</span><span class="o">(</span><span class="mi">5</span><span 
class="o">)))</span>
+
+  <span class="c1">// Second, we specify the engine params list by explicitly 
listing all
+</span>  <span class="c1">// algorithm parameters. In this case, we evaluate 3 
engine params, each with
+</span>  <span class="c1">// a different algorithm params value.
+</span>  <span class="n">engineParamsList</span> <span class="k">=</span> 
<span class="nc">Seq</span><span class="o">(</span>
+    <span class="n">baseEP</span><span class="o">.</span><span 
class="n">copy</span><span class="o">(</span><span 
class="n">algorithmParamsList</span> <span class="k">=</span> <span 
class="nc">Seq</span><span class="o">((</span><span 
class="s">"naive"</span><span class="o">,</span> <span 
class="nc">AlgorithmParams</span><span class="o">(</span><span 
class="mf">10.0</span><span class="o">)))),</span>
+    <span class="n">baseEP</span><span class="o">.</span><span 
class="n">copy</span><span class="o">(</span><span 
class="n">algorithmParamsList</span> <span class="k">=</span> <span 
class="nc">Seq</span><span class="o">((</span><span 
class="s">"naive"</span><span class="o">,</span> <span 
class="nc">AlgorithmParams</span><span class="o">(</span><span 
class="mf">100.0</span><span class="o">)))),</span>
+    <span class="n">baseEP</span><span class="o">.</span><span 
class="n">copy</span><span class="o">(</span><span 
class="n">algorithmParamsList</span> <span class="k">=</span> <span 
class="nc">Seq</span><span class="o">((</span><span 
class="s">"naive"</span><span class="o">,</span> <span 
class="nc">AlgorithmParams</span><span class="o">(</span><span 
class="mf">1000.0</span><span class="o">)))))</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>A good practise is to first define 
a base engine params, it contains the common parameters used in all evaluations 
(lines 7 to 8). With the base params, we construct the list of engine params we 
want to evaluation by adding or replacing the controller parameter. Lines 13 to 
16 generate 3 engine parameters, each has a different smoothing 
parameters.</p><h2 id='running-the-evaluation' class='header-anchors'>Running 
the Evaluation</h2><p>It remains to run the evaluation. Let&#39;s recap the 
quick start section above. The <code>pio eval</code> command kick starts the 
evaluation, and the result can be seen from the console.</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="gp">$ </span>pio build
+...
+<span class="gp">$ </span>pio <span class="nb">eval 
</span>org.template.classification.AccuracyEvaluation <span class="se">\</span>
+    org.template.classification.EngineParamsList
+</pre></td></tr></tbody></table> </div> <p>You will see the following 
output:</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45</pre></td><td class="code"><pre>...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> runEvaluation started
+...
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 0
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:10.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.9281045751633987,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 1
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:100.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.9150326797385621,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 2
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:1000.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.4444444444444444,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Writing 
best variant params to disk...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> Updating evaluation instance with result: 
MetricEvaluatorResult:
+  <span class="c"># engine params evaluated: 3</span>
+Optimal Engine Params:
+  <span class="o">{</span>
+  <span class="s2">"dataSourceParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+      <span class="s2">"appId"</span>:19,
+      <span class="s2">"evalK"</span>:5
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"preparatorParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"algorithmParamsList"</span>:[
+    <span class="o">{</span>
+      <span class="s2">"naive"</span>:<span class="o">{</span>
+        <span class="s2">"lambda"</span>:10.0
+      <span class="o">}</span>
+    <span class="o">}</span>
+  <span class="o">]</span>,
+  <span class="s2">"servingParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+Metrics:
+  org.template.classification.Accuracy: 0.9281045751633987
+The best variant params can be found <span class="k">in </span>best.json
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> runEvaluation completed
+</pre></td></tr></tbody></table> </div> <h2 id='notes' 
class='header-anchors'>Notes</h2> <ul> <li>We deliberately not metion 
<strong><em>test set</em></strong> in this hyperparameter tuning guide. In 
machine learning literature, the <strong><em>test set</em></strong> is a 
separate piece of data which is used to evaluate the final engine params 
outputted by the evaluation process. This guarantees that no information in the 
training / validation set is <em>leaked</em> into the engine params and yields 
a biased outcome. With PredictionIO, there are multiple ways of conducting 
robust tuning, we will cover this topic in the coming sections.</li> </ul> 
</div></div></div></div><footer><div class="container"><div 
class="seperator"></div><div class="row"><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//predictionio.incubator.apache.org/install/" 
target="blank">Download</a></li><li><a 
href="//predictionio.incubator.apache.org/" 
 target="blank">Docs</a></li><li><a 
href="//github.com/apache/incubator-predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:[email protected]"; 
target="blank">Subscribe to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.incubator.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/incubator-predictionio" target="blank">Source 
Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" 
target="blank">Bug Tracker</a></li><li><a 
href="mailto:[email protected]"; 
target="blank">Subscribe to Development Mailing 
List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 
footer-link-column"><p>Apache PredictionIO, PredictionIO, Ap
 ache, the Apache feather logo, and the Apache PredictionIO project logo are 
either registered trademarks or trademarks of The Apache Software Foundation in 
the United States and other countries.</p><p>All other marks mentioned may be 
trademarks or registered trademarks of their respective 
owners.</p></div></div><div class="row"><div class="col-md-12 
footer-link-column"><a class="pull-right" 
href="http://incubator.apache.org/projects/predictionio.html";><img alt="Apache 
Incubator" src="/images/logos/apache_incubator-6954bd16.png"/></a><span>Apache 
PredictionIO is an effort undergoing incubation at The Apache Software 
Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of 
all newly accepted projects until a further review indicates that the 
infrastructure, communications, and decision making process have stabilized in 
a manner consistent with other successful ASF projects. While incubation status 
is not necessarily a reflection of the completeness or stability o
 f the code, it does indicate that the project has yet to be fully endorsed by 
the ASF.</span></div></div></div><div id="footer-bottom"><div 
class="container"><div class="row"><div class="col-md-12"><div 
id="footer-logo-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/><span>â¢</span></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/incubator-predictionio"; data-style="mega" 
data-count-href="/apache/incubator-predictionio/stargazers" 
data-count-api="/repos/apache/incubator-predictionio#stargazers_count" 
data-count-aria-label="# stargazers on GitHub" aria-label="Star 
apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" 
href="https://github.com/apache/incubator-predictionio/fork"; 
data-icon="octicon-git-branch" data-style="mega" 
data-count-href="/apache/incubator-predictionio/network" 
data-count-api="/repos/apache/incubator-predictionio#forks_count" 
data-count-aria-label="# forks on GitHub" ari
 a-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script 
id="github-bjs" async="" defer="" 
src="https://buttons.github.io/buttons.js";></script><a 
href="https://twitter.com/predictionio"; target="blank"><img alt="PredictionIO 
on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="https://www.facebook.com/predictionio"; target="blank"><img 
alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script 
src="/javascripts/application-a6acb1f5.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/1b72b303/evaluation/paramtuning/index.html.gz
----------------------------------------------------------------------
diff --git a/evaluation/paramtuning/index.html.gz 
b/evaluation/paramtuning/index.html.gz
new file mode 100644
index 0000000..572b28f
Binary files /dev/null and b/evaluation/paramtuning/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/1b72b303/favicon.ico
----------------------------------------------------------------------
diff --git a/favicon.ico b/favicon.ico
new file mode 100644
index 0000000..92d6430
Binary files /dev/null and b/favicon.ico differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/1b72b303/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff
----------------------------------------------------------------------
diff --git a/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff 
b/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff
new file mode 100644
index 0000000..9e61285
Binary files /dev/null and 
b/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/1b72b303/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf
----------------------------------------------------------------------
diff --git a/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf 
b/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf
new file mode 100644
index 0000000..1413fc6
Binary files /dev/null and 
b/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/1b72b303/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot
----------------------------------------------------------------------
diff --git a/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot 
b/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot
new file mode 100644
index 0000000..b93a495
Binary files /dev/null and 
b/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot differ

[32/51] [abbrv] [partial] incubator-predictionio-site git commit: Documentation based on apache/incubator-predictionio#0acaa004debb0835880304fe2f173132a74f1498

Reply via email to