incubator-predictionio#03e99814384134331cce558c9d89d93f9a7df347

donald Wed, 02 Nov 2016 11:08:24 -0700

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/25938169/evaluation/metricchoose/index.html
----------------------------------------------------------------------
diff --git a/evaluation/metricchoose/index.html 
b/evaluation/metricchoose/index.html
new file mode 100644
index 0000000..8d52994
--- /dev/null
+++ b/evaluation/metricchoose/index.html
@@ -0,0 +1,6 @@
+<!DOCTYPE html><html><head><title>Choosing Evaluation Metrics</title><meta 
charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Choosing Evaluation Metrics"/><link 
rel="canonical" 
href="https://docs.prediction.io/evaluation/metricchoose/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src="//cdn.mathjax.org/ma
 thjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script 
src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.incubator.apache.org/";><img 
alt="PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div 
id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/incubator-predictionio/">OPEN 
SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md 
hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 
col-xs-11"><div 
 class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO 
Docs</p><h4>Choosing Evaluation Metrics</h4></div><h4 class="hidden-sm 
hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 
hidden-md hidden-lg"><img id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div 
class="row"><div id="left-menu-wrapp
 er" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a 
class="expandible" href="/"><span>Apache PredictionIO (incubating) 
Documentation</span></a><ul><li class="level-2"><a class="final" 
href="/"><span>Welcome to Apache PredictionIO 
(incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Started</span></a><ul><li class="level-2"><a 
class="final" href="/start/"><span>A Quick Intro</span></a></li><li 
class="level-2"><a class="final" href="/install/"><span>Installing Apache 
PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Integrating with Your App</
 span></a><ul><li class="level-2"><a class="final" 
href="/appintegration/"><span>App Integration Overview</span></a></li><li 
class="level-2"><a class="expandible" href="/sdk/"><span>List of 
SDKs</span></a><ul><li class="level-3"><a class="final" 
href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/cli/#engine-commands"><span>Engine Command-line 
Interface</span></a></li><li 
 class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring 
Engine</span></a></li><li class="level-2"><a class="final" 
href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li 
class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying 
Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/customize/"><span>Learning 
DASE</span></a></li><li class="level-2"><a class="final" 
href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Collecting and Analyzing Data</span></a><ul><li 
class="level-2"><a cla
 ss="final" href="/datacollection/"><span>Event Server 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/cli/#event-server-commands"><span>Event Server Command-line 
Interface</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Choosing an Algorithm(s)</span
 ></a><ul><li class="level-2"><a class="final" 
 >href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li 
 >class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to 
 >Another Algorithm</span></a></li><li class="level-2"><a class="final" 
 >href="/algorithm/multiple/"><span>Combining Multiple 
 >Algorithms</span></a></li><li class="level-2"><a class="final" 
 >href="/algorithm/custom/"><span>Adding Your Own 
 >Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
 >href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
 >class="final" href="/evaluation/"><span>Overview</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="/evaluation/paramtuning/"><span>Hyperparameter 
 >Tuning</span></a></li><li class="level-2"><a class="final" 
 >href="/evaluation/evaluationdashboard/"><span>Evaluation 
 >Dashboard</span></a></li><li class="level-2"><a class="final active" 
 >href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span
 ></a></li><li class="level-2"><a class="final" 
 >href="/evaluation/metricbuild/"><span>Building Evaluation 
 >Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" 
 >href="#"><span>System Architecture</span></a><ul><li class="level-2"><a 
 >class="final" href="/system/"><span>Architecture Overview</span></a></li><li 
 >class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using 
 >Another Data Store</span></a></li></ul></li><li class="level-1"><a 
 >class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li 
 >class="level-2"><a class="final" 
 >href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
 >class="level-2"><a class="final" 
 >href="/community/submit-template/"><span>Submit your Engine as a 
 >Template</span></a></li></ul></li><li class="level-1"><a class="expandible" 
 >href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a 
 >class="final" href="/demo/tapster/"><span>Comics Recommendation 
 >Demo</span></a></li><li class="level-2"><a 
 class="final" href="/demo/community/"><span>Community Contributed 
Demo</span></a></li><li class="level-2"><a class="final" 
href="/demo/textclassification/"><span>Text Classification Engine 
Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a 
class="final" href="/community/contribute-code/"><span>Contribute 
Code</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2"><a class="final" 
href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Help</span></a><ul><li class="leve
 l-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li 
class="level-2"><a class="final" 
href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Resources</span></a><ul><li 
class="level-2"><a class="final" href="/resources/intellij/"><span>Developing 
Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a 
href="#">ML Tuning and Evaluation</a><span 
class="spacer">&gt;</span></li><li><span class="last">Choosing Evaluation 
Metrics</span></li></ul></div><div id="page-title"><h1>Choosing Evaluation 
Metrics</h1></div></div><div id="table-of-content-wrapper"><h5>On t
 his page</h5><aside id="table-of-contents"><ul> <li> <a 
href="#defining-metric">Defining Metric</a> </li> <li> <a 
href="#common-metrics">Common Metrics</a> </li> </ul> </aside><hr/><a 
id="edit-page-link" 
href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/evaluation/metricchoose.html.md";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" 
class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and 
Evaluation</a><span class="spacer">&gt;</span></li><li><span 
class="last">Choosing Evaluation Metrics</span></li></ul></div><div 
id="page-title"><h1>Choosing Evaluation Metrics</h1></div></div><div 
class="content"><p>The <a href="/evaluation/paramtuning/">hyperparameter tuning 
module</a> allows us to select the optimal engine parameter defined by a 
<code>Metric</code>. <code>Metric</code> determines the quality of an engine 
variant. We have skimmmed through the 
 process of choosing the right <code>Metric</code> in previous 
sections.</p><p>This secion discusses basic evaluation metrics commonly used 
for classification problems. If you are more interested in knowing how to 
<em>implement</em> a custom metric, please skip to <a 
href="/evaluation/metricbuild/">the next section</a>.</p><h2 
id='defining-metric' class='header-anchors'>Defining Metric</h2><p>Metric 
evaluates the quality of an engine by comparing engine&#39;s output (predicted 
result) with the original label (actual result). A engine serving better 
prediction should yield a higher metric score, the tuning module returns the 
engine parameter with the highest score. It is sometimes called <a 
href="http://en.wikipedia.org/wiki/Loss_function";><em>loss function</em></a> in 
literature, where the goal is to minimize the loss function. </p><p>During 
tuning, it is important for us to understand the definition of the metric, to 
make sure it is aligned with the prediction engine&#39;s goal.</p>
 <p>In the classificaiton template, we use <em>Accuracy</em> as our metric. 
<em>Accuracy</em> is defined as: the percentage of queries which the engine is 
able to predict the correct label. </p><h2 id='common-metrics' 
class='header-anchors'>Common Metrics</h2><p>We illustrate the choice of metric 
with the following confusion matrix. Row represents the engine predicted label, 
column represents the acutal label. The second row means that of the 200 
testing data points, the engine predicted 60 (15 + 35 + 10) of them as label 
2.0, among which 35 are correct prediction (i.e. actual label is 2.0, matches 
with the prediction), and 25 are wrong.</p> <table><thead> <tr> <th 
style="text-align: center"></th> <th style="text-align: center">Actual = 
1.0</th> <th style="text-align: center">Actual = 2.0</th> <th 
style="text-align: center">Actual = 3.0</th> </tr> </thead><tbody> <tr> <td 
style="text-align: center"><strong>Predicted = 1.0</strong></td> <td 
style="text-align: center">30</td> <td style
 ="text-align: center">0</td> <td style="text-align: center">60</td> </tr> <tr> 
<td style="text-align: center"><strong>Predicted = 2.0</strong></td> <td 
style="text-align: center">15</td> <td style="text-align: center">35</td> <td 
style="text-align: center">10</td> </tr> <tr> <td style="text-align: 
center"><strong>Predicted = 3.0</strong></td> <td style="text-align: 
center">0</td> <td style="text-align: center">0</td> <td style="text-align: 
center">50</td> </tr> </tbody></table> <h3 id='accuracy' 
class='header-anchors'>Accuracy</h3><p>Accuracy means that how many data points 
are predicted correctly. It is one of the simplest form of evaluation metrics. 
The accuracy score is # of correct points / # total = (30 + 35 + 50) / 200 = 
0.575.</p><h3 id='precision' class='header-anchors'>Precision</h3><p>Precision 
is a metric for binary classifier which measures the correctness among all 
positive labels. A binary classifier gives only two output values (i.e. 
positive and negative). For proble
 m where there are multiple values (3 in our example), we first have to 
tranform our problem into a binary classification problem. For example, we can 
have problem whether label = 1.0. The confusion matrix now becomes:</p> 
<table><thead> <tr> <th style="text-align: center"></th> <th style="text-align: 
center">Actual = 1.0</th> <th style="text-align: center">Actual != 1.0</th> 
</tr> </thead><tbody> <tr> <td style="text-align: center"><strong>Predicted = 
1.0</strong></td> <td style="text-align: center">30</td> <td style="text-align: 
center">60</td> </tr> <tr> <td style="text-align: center"><strong>Predicted != 
1.0</strong></td> <td style="text-align: center">15</td> <td style="text-align: 
center">95</td> </tr> </tbody></table> <p>Precision is the ratio between the 
number of correct positive answer (true positive) and the sum of correct 
positive answer (true positive) and wrong but positively labeled answer (false 
positive). In this case, the precision is 30 / (30 + 60) = ~0.3333.</p><h
 3 id='recall' class='header-anchors'>Recall</h3><p>Recall is a metric for 
binary classifier which measures how many positive labels are successfully 
predicted amongst all positive labels. Formally, it is the ratio between the 
number of correct positive answer (true positive) and the sum of correct 
positive answer (true positive) and wrongly negatively labeled asnwer (false 
negative). In this case, the recall is 30 / (30 + 15) = ~0.6667.</p><p>As we 
have discussed several common metrics for classification problem, we can 
implement them using the <code>Metric</code> class in <a 
href="/evaluation/metricbuild">the next 
section</a>.</p></div></div></div></div><footer><div class="container"><div 
class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a 
href="//docs.prediction.io/" target="blank">Docs</a></li><li><a hr
 ef="//github.com/apache/incubator-predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:user-subscr...@predictionio.incubator.apache.org"; 
target="blank">Subscribe to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
col-xs-6 footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.incubator.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/incubator-predictionio" target="blank">Source 
Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" 
target="blank">Bug Tracker</a></li><li><a 
href="mailto:dev-subscr...@predictionio.incubator.apache.org"; 
target="blank">Subscribe to Development Mailing 
List</a></li></ul></div></div></div></div><div id="footer-bottom"><div 
class="container"><div class="row"><div class="col-md-12"><div 
id="footer-logo-wrapper"><img
  alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/incubator-predictionio"; data-style="mega" 
data-count-href="/apache/incubator-predictionio/stargazers" 
data-count-api="/repos/apache/incubator-predictionio#stargazers_count" 
data-count-aria-label="# stargazers on GitHub" aria-label="Star 
apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" 
href="https://github.com/apache/incubator-predictionio/fork"; 
data-icon="octicon-git-branch" data-style="mega" 
data-count-href="/apache/incubator-predictionio/network" 
data-count-api="/repos/apache/incubator-predictionio#forks_count" 
data-count-aria-label="# forks on GitHub" aria-label="Fork 
apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" 
async="" defer="" src="https://buttons.github.io/buttons.js";></script><a 
href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on 
Twitter
 " src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on 
Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script 
src="/javascripts/application-280db181.js"></script></body></html>
\ No newline at end of file


http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/25938169/evaluation/metricchoose/index.html.gz
----------------------------------------------------------------------
diff --git a/evaluation/metricchoose/index.html.gz 
b/evaluation/metricchoose/index.html.gz
new file mode 100644
index 0000000..4bd0158
Binary files /dev/null and b/evaluation/metricchoose/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/25938169/evaluation/paramtuning/index.html
----------------------------------------------------------------------
diff --git a/evaluation/paramtuning/index.html 
b/evaluation/paramtuning/index.html
new file mode 100644
index 0000000..88df894
--- /dev/null
+++ b/evaluation/paramtuning/index.html
@@ -0,0 +1,393 @@
+<!DOCTYPE html><html><head><title>Hyperparameter Tuning</title><meta 
charset="utf-8"/><meta content="IE=edge,chrome=1" 
http-equiv="X-UA-Compatible"/><meta name="viewport" 
content="width=device-width, initial-scale=1.0"/><meta class="swiftype" 
name="title" data-type="string" content="Hyperparameter Tuning"/><link 
rel="canonical" 
href="https://docs.prediction.io/evaluation/paramtuning/"/><link 
href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link 
href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link 
href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800"
 rel="stylesheet"/><link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" 
rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" 
rel="stylesheet" type="text/css"/><script 
src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script
 src="//cdn.mathjax.org/mathjax/latest/
 MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script 
src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: 
true });}catch(e){}</script></head><body><div id="global"><header><div 
class="container" id="header-wrapper"><div class="row"><div 
class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a 
href="#"></a><a href="http://predictionio.incubator.apache.org/";><img 
alt="PredictionIO" id="logo" 
src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div 
id="pill-wrapper"><a class="pill left" 
href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" 
href="//github.com/apache/incubator-predictionio/">OPEN 
SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md 
hidden-lg" 
src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div 
id="search-bar-row-wrapper"><div class="container-fluid" 
id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 
col-xs-11"><div class="hidden
 -md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO 
Docs</p><h4>Hyperparameter Tuning</h4></div><h4 class="hidden-sm 
hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 
hidden-md hidden-lg"><img id="left-menu-indicator" 
src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 
col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form 
class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" 
src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img 
src="/images/icons/search-glass-704bd4ff.png"/><input type="text" 
id="st-search-input" class="st-search-input" placeholder="Search 
Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" 
src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div 
class="mobile-left-menu-toggler hidden-md 
hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div 
class="row"><div id="left-menu-wrapper" class="col-md-3
 "><nav id="nav-main"><ul><li class="level-1"><a class="expandible" 
href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li 
class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO 
(incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Started</span></a><ul><li class="level-2"><a 
class="final" href="/start/"><span>A Quick Intro</span></a></li><li 
class="level-2"><a class="final" href="/install/"><span>Installing Apache 
PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" 
href="/start/download/"><span>Downloading an Engine Template</span></a></li><li 
class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your 
First Engine</span></a></li><li class="level-2"><a class="final" 
href="/start/customize/"><span>Customizing the 
Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Integrating with Your App</span></a><ul><li cl
 ass="level-2"><a class="final" href="/appintegration/"><span>App Integration 
Overview</span></a></li><li class="level-2"><a class="expandible" 
href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a 
class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/php/"><span>PHP 
SDK</span></a></li><li class="level-3"><a class="final" 
href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a 
class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li 
class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered 
SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web 
Service</span></a></li><li class="level-2"><a class="final" 
href="/cli/#engine-commands"><span>Engine Command-line 
Interface</span></a></li><li class="level-2"><a 
 class="final" href="/deploy/monitoring/"><span>Monitoring 
Engine</span></a></li><li class="level-2"><a class="final" 
href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li 
class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying 
Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li 
class="level-2"><a class="final" href="/customize/"><span>Learning 
DASE</span></a></li><li class="level-2"><a class="final" 
href="/customize/dase/"><span>Implement DASE</span></a></li><li 
class="level-2"><a class="final" 
href="/customize/troubleshooting/"><span>Troubleshooting Engine 
Development</span></a></li><li class="level-2"><a class="final" 
href="/api/current/#package"><span>Engine Scala 
APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Collecting and Analyzing Data</span></a><ul><li 
class="level-2"><a class="final" href="/d
 atacollection/"><span>Event Server Overview</span></a></li><li 
class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event 
Server Command-line Interface</span></a></li><li class="level-2"><a 
class="final" href="/datacollection/eventapi/"><span>Collecting Data with 
REST/SDKs</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/webhooks/"><span>Unifying Multichannel Data with 
Webhooks</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/channel/"><span>Channel</span></a></li><li 
class="level-2"><a class="final" 
href="/datacollection/batchimport/"><span>Importing Data in 
Batch</span></a></li><li class="level-2"><a class="final" 
href="/datacollection/analytics/"><span>Using Analytics 
Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class=
 "level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm 
Libraries</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/switch/"><span>Switching to Another 
Algorithm</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/multiple/"><span>Combining Multiple 
Algorithms</span></a></li><li class="level-2"><a class="final" 
href="/algorithm/custom/"><span>Adding Your Own 
Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a 
class="final" href="/evaluation/"><span>Overview</span></a></li><li 
class="level-2"><a class="final active" 
href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li 
class="level-2"><a class="final" 
href="/evaluation/evaluationdashboard/"><span>Evaluation 
Dashboard</span></a></li><li class="level-2"><a class="final" 
href="/evaluation/metricchoose/"><span>Choosing Evaluation 
Metrics</span></a></li><li class
 ="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building 
Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="#"><span>System Architecture</span></a><ul><li 
class="level-2"><a class="final" href="/system/"><span>Architecture 
Overview</span></a></li><li class="level-2"><a class="final" 
href="/system/anotherdatastore/"><span>Using Another Data 
Store</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a 
class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li 
class="level-2"><a class="final" 
href="/community/submit-template/"><span>Submit your Engine as a 
Template</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a 
class="final" href="/demo/tapster/"><span>Comics Recommendation 
Demo</span></a></li><li class="level-2"><a class="final" href=
 "/demo/community/"><span>Community Contributed Demo</span></a></li><li 
class="level-2"><a class="final" href="/demo/textclassification/"><span>Text 
Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a 
class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li 
class="level-2"><a class="final" 
href="/community/contribute-code/"><span>Contribute Code</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-documentation/"><span>Contribute 
Documentation</span></a></li><li class="level-2"><a class="final" 
href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li 
class="level-2"><a class="final" 
href="/community/contribute-webhook/"><span>Contribute a 
Webhook</span></a></li><li class="level-2"><a class="final" 
href="/community/projects/"><span>Community 
Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" 
href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="fina
 l" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a 
class="final" href="/support/"><span>Support</span></a></li></ul></li><li 
class="level-1"><a class="expandible" 
href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" 
href="/resources/intellij/"><span>Developing Engines with IntelliJ 
IDEA</span></a></li><li class="level-2"><a class="final" 
href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li 
class="level-2"><a class="final" 
href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div
 class="col-md-9 col-sm-12"><div class="content-header hidden-md 
hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a 
href="#">ML Tuning and Evaluation</a><span 
class="spacer">&gt;</span></li><li><span class="last">Hyperparameter 
Tuning</span></li></ul></div><div id="page-title"><h1>Hyperparameter 
Tuning</h1></div></div><div id="table-of-content-wrapper"><h5>On this 
page</h5><aside id="table-o
 f-contents"><ul> <li> <a href="#quick-start">Quick Start</a> </li> <li> <a 
href="#detailed-explanation">Detailed Explanation</a> </li> <li> <a 
href="#the-evaluation-design">The Evaluation Design</a> </li> <li> <a 
href="#evaluation-data-generation">Evaluation Data Generation</a> </li> <li> <a 
href="#evaluation-metrics">Evaluation Metrics</a> </li> <li> <a 
href="#parameters-generation">Parameters Generation</a> </li> <li> <a 
href="#running-the-evaluation">Running the Evaluation</a> </li> <li> <a 
href="#notes">Notes</a> </li> </ul> </aside><hr/><a id="edit-page-link" 
href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/evaluation/paramtuning.html.md";><img
 src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div 
class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" 
class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and 
Evaluation</a><span class="spacer">&gt;</span></li><li><span 
class="last">Hyperparameter Tuning</spa
 n></li></ul></div><div id="page-title"><h1>Hyperparameter 
Tuning</h1></div></div><div class="content"><p>A PredictionIO engine is 
instantiated by a set of parameters. These parameters define which algorithm is 
to be used, as well supply the parameters for the algorithm itself. This 
naturally raises the question of how to choose the best set of parameters. The 
evaluation module streamlines the process of <em>tuning</em> the engine to the 
best parameter set and deploys it.</p><h2 id='quick-start' 
class='header-anchors'>Quick Start</h2><p>We demonstrate the evaluation with <a 
href="/templates/classification/quickstart/">the classification template</a>. 
The classification template uses a naive bayesian algorithm that has a 
smoothing parameter. We evaluate the prediction quality against different 
parameter values to find the best parameter values, and then deploy it.</p><h3 
id='edit-the-appid' class='header-anchors'>Edit the AppId</h3><p>Edit 
MyClassification/src/main/scala/<strong><em>E
 valuation.scala</em></strong> to specify the <em>appId</em> you used to import 
the data.</p><div class="highlight scala"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6</pre></td><td class="code"><pre><span class="k">object</span> <span 
class="nc">EngineParamsList</span> <span class="k">extends</span> <span 
class="nc">EngineParamsGenerator</span> <span class="o">{</span>
+  <span class="o">...</span>
+  <span class="k">private</span><span class="o">[</span><span 
class="kt">this</span><span class="o">]</span> <span class="k">val</span> <span 
class="n">baseEP</span> <span class="k">=</span> <span 
class="nc">EngineParams</span><span class="o">(</span>
+    <span class="n">dataSourceParams</span> <span class="k">=</span> <span 
class="nc">DataSourceParams</span><span class="o">(</span><span 
class="n">appId</span> <span class="k">=</span> <span 
class="o">&lt;</span><span class="nc">YOUR_APP_ID</span><span 
class="o">&gt;,</span> <span class="n">evalK</span> <span class="k">=</span> 
<span class="nc">Some</span><span class="o">(</span><span 
class="mi">5</span><span class="o">)))</span>
+  <span class="o">...</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <h3 id='build-and-run-the-evaluation' 
class='header-anchors'>Build and run the evaluation</h3><p>To run an 
evaluation, the command <code>pio eval</code> is used. It takes two mandatory 
parameter, 1. the <code>Evaluation</code> object, which tells PredictionIO the 
engine and metric we use for the evaluation; and 2. the 
<code>EngineParamsGenerator</code>, which contains a list of engine params to 
test against. The following command kickstarts the evaluation workflow for the 
classification template.</p><div class="highlight shell"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="gp">$ </span>pio build
+...
+<span class="gp">$ </span>pio <span class="nb">eval 
</span>org.template.classification.AccuracyEvaluation <span class="se">\</span>
+    org.template.classification.EngineParamsList 
+</pre></td></tr></tbody></table> </div> <p>You will see the following 
output:</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45</pre></td><td class="code"><pre>...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> runEvaluation started
+...
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 0
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:10.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.9281045751633987,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 1
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:100.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.9150326797385621,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 2
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:1000.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.4444444444444444,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Writing 
best variant params to disk...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> Updating evaluation instance with result: 
MetricEvaluatorResult:
+  <span class="c"># engine params evaluated: 3</span>
+Optimal Engine Params:
+  <span class="o">{</span>
+  <span class="s2">"dataSourceParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+      <span class="s2">"appId"</span>:19,
+      <span class="s2">"evalK"</span>:5
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"preparatorParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"algorithmParamsList"</span>:[
+    <span class="o">{</span>
+      <span class="s2">"naive"</span>:<span class="o">{</span>
+        <span class="s2">"lambda"</span>:10.0
+      <span class="o">}</span>
+    <span class="o">}</span>
+  <span class="o">]</span>,
+  <span class="s2">"servingParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+Metrics:
+  org.template.classification.Accuracy: 0.9281045751633987
+The best variant params can be found <span class="k">in </span>best.json
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> runEvaluation completed
+</pre></td></tr></tbody></table> </div> <p>The console prints out the 
evaluation metric score of each engine params, and finally pretty print the 
optimal engine params. Amongst the 3 engine params we evaluate, <em>lambda = 
10.0</em> yields the highest accuracy score of ~0.9281.</p><h3 
id='deploy-the-best-engine-parameter' class='header-anchors'>Deploy the best 
engine parameter</h3><p>The evaluation module also writes out the best engine 
parameter to disk at <code>best.json</code>. We can train and deploy this 
specify engine variant using the extra parameter <code>-v</code>. For 
example:</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6
+7</pre></td><td class="code"><pre><span class="gp">$ </span>pio train -v 
best.json
+...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> Training completed successfully.
+<span class="gp">$ </span>pio deploy -v best.json
+...
+<span class="o">[</span>INFO] <span class="o">[</span>HttpListener] Bound to 
localhost/127.0.0.1:8000
+<span class="o">[</span>INFO] <span class="o">[</span>MasterActor] Bind 
successful. Ready to serve.
+</pre></td></tr></tbody></table> </div> <p>At this point, we have successfully 
deployed the best engine variant we found through the evaluation 
process.</p><h2 id='detailed-explanation' class='header-anchors'>Detailed 
Explanation</h2><p>An engine often depends on a number of parameters, for 
example, the naive bayesian classification algorithm has a smoothing parameter 
to make the model more adaptive to unseen data. Compared with parameters which 
are <em>learnt</em> by the machine learning algorithm, this smoothing parameter 
<em>teaches</em> the algorithm how to work. Therefore, such parameters are 
usually called <em>hyperparameters</em>.</p><p>In PredictionIO, we always take 
a holistic view of an engine. An engine is comprised of a set of 
<strong><em>DAS</em></strong> controllers, as well as the necessary parameters 
for the controllers themselves. In the evaluation, we attempt to find out the 
best hyperparameters for an <em>engine</em>, which we call <strong><em>engine 
params</em></
 strong>. Using engine params we can deploy a complete engine.</p><p>This 
section demonstrates how to select the optimal engine params whilst ensuring 
the model doesn&#39;t overfit using PredictionIO&#39;s evaluation 
module.</p><h2 id='the-evaluation-design' class='header-anchors'>The Evaluation 
Design</h2><p>The PredictionIO evaluation module tests for the best engine 
params for an engine.</p><p>Given a set of engine params, we instantiate an 
engine and evaluate it with existing data. The data is split into two sets, a 
training set and a validation set. The training set is used to train the 
engine, which is deployed using the same steps described in earlier sections. 
We query the engine with the test set data, and compare the predicted values in 
the response with the actual data contained in the validation set. We define a 
<strong><em>metric</em></strong> to compare <strong><em>predicted 
result</em></strong> returned from the engine with the <strong><em>actual 
result</em></strong> w
 hich we obtained from the test data. The goal is to maximize the metric 
score.</p><p>This process is repeated many times with a series of engine 
params. At the end, PredictionIO returns the best engine params.</p><p>We 
demonstrate the evaluation with <a 
href="/templates/classification/quickstart/">the classification 
template</a>.</p><h2 id='evaluation-data-generation' 
class='header-anchors'>Evaluation Data Generation</h2><p>In evaluation data 
generation, the goal is to generate a sequence of (training, validation) data 
tuple. A common way is to use a <em>k-fold</em> generation process. The data 
set is split into <em>k folds</em>. We generate k tuples of training and 
validation sets, for each tuple, the training set takes <em>k - 1</em> of the 
folds and the validation set takes the remaining fold.</p><p>To enable 
evaluation data generation, we need to define the <strong><em>actual 
result</em></strong> and implement the method for generating the (training, 
validation) data tuple.</p><
 h3 id='actual-result' class='header-anchors'>Actual Result</h3><p>In 
MyClassification/src/main/scala/<strong><em>Engine.scala</em></strong>, the 
<code>ActualResult</code> class defines the <strong><em>actual 
result</em></strong>:</p><div class="highlight scala"><table 
style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: 
right"><pre class="lineno">1
+2
+3</pre></td><td class="code"><pre><span class="k">class</span> <span 
class="nc">ActualResult</span><span class="o">(</span>
+  <span class="k">val</span> <span class="n">label</span><span 
class="k">:</span> <span class="kt">Double</span>
+<span class="o">)</span> <span class="k">extends</span> <span 
class="nc">Serializable</span>
+</pre></td></tr></tbody></table> </div> <p>This class is used to store the 
actual label of the data (contrast to <code>PredictedResult</code> which is 
output of the engine).</p><h3 
id='implement-data-generation-method-in-datasource' 
class='header-anchors'>Implement Data Generation Method in DataSource</h3><p>In 
MyClassification/src/main/scala/<strong><em>DataSource.scala</em></strong>, the 
method <code>readEval</code> reads and selects data from datastore and returns 
a sequence of (training, validation) data.</p><div class="highlight 
scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" 
style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60</pre></td><td class="code"><pre><span class="k">class</span> <span 
class="nc">DataSource</span><span class="o">(</span><span class="k">val</span> 
<span class="n">dsp</span><span class="k">:</span> <span 
class="kt">DataSourceParams</span><span class="o">)</span>
+  <span class="k">extends</span> <span class="nc">PDataSource</span><span 
class="o">[</span><span class="kt">TrainingData</span>, <span 
class="kt">EmptyEvaluationInfo</span>, <span class="kt">Query</span>, <span 
class="kt">ActualResult</span><span class="o">]</span> <span class="o">{</span>
+
+  <span class="o">...</span>
+
+  <span class="k">override</span>
+  <span class="k">def</span> <span class="n">readEval</span><span 
class="o">(</span><span class="n">sc</span><span class="k">:</span> <span 
class="kt">SparkContext</span><span class="o">)</span>
+  <span class="k">:</span> <span class="kt">Seq</span><span 
class="o">[(</span><span class="kt">TrainingData</span>, <span 
class="kt">EmptyEvaluationInfo</span>, <span class="kt">RDD</span><span 
class="o">[(</span><span class="kt">Query</span>, <span 
class="kt">ActualResult</span><span class="o">)])]</span> <span 
class="k">=</span> <span class="o">{</span>
+    <span class="n">require</span><span class="o">(!</span><span 
class="n">dsp</span><span class="o">.</span><span class="n">evalK</span><span 
class="o">.</span><span class="n">isEmpty</span><span class="o">,</span> <span 
class="s">"DataSourceParams.evalK must not be None"</span><span 
class="o">)</span>
+
+    <span class="c1">// The following code reads the data from data store. It 
is equivalent to
+</span>    <span class="c1">// the readTraining method. We copy-and-paste the 
exact code here for
+</span>    <span class="c1">// illustration purpose, a recommended approach is 
to factor out this logic
+</span>    <span class="c1">// into a helper function and have both 
readTraining and readEval call the
+</span>    <span class="c1">// helper.
+</span>    <span class="k">val</span> <span class="n">eventsDb</span> <span 
class="k">=</span> <span class="nc">Storage</span><span class="o">.</span><span 
class="n">getPEvents</span><span class="o">()</span>
+    <span class="k">val</span> <span class="n">labeledPoints</span><span 
class="k">:</span> <span class="kt">RDD</span><span class="o">[</span><span 
class="kt">LabeledPoint</span><span class="o">]</span> <span class="k">=</span> 
<span class="n">eventsDb</span><span class="o">.</span><span 
class="n">aggregateProperties</span><span class="o">(</span>
+      <span class="n">appId</span> <span class="k">=</span> <span 
class="n">dsp</span><span class="o">.</span><span class="n">appId</span><span 
class="o">,</span>
+      <span class="n">entityType</span> <span class="k">=</span> <span 
class="s">"user"</span><span class="o">,</span>
+      <span class="c1">// only keep entities with these required properties 
defined
+</span>      <span class="n">required</span> <span class="k">=</span> <span 
class="nc">Some</span><span class="o">(</span><span class="nc">List</span><span 
class="o">(</span><span class="s">"plan"</span><span class="o">,</span> <span 
class="s">"attr0"</span><span class="o">,</span> <span 
class="s">"attr1"</span><span class="o">,</span> <span 
class="s">"attr2"</span><span class="o">)))(</span><span 
class="n">sc</span><span class="o">)</span>
+      <span class="c1">// aggregateProperties() returns RDD pair of
+</span>      <span class="c1">// entity ID and its aggregated properties
+</span>      <span class="o">.</span><span class="n">map</span> <span 
class="o">{</span> <span class="k">case</span> <span class="o">(</span><span 
class="n">entityId</span><span class="o">,</span> <span 
class="n">properties</span><span class="o">)</span> <span class="k">=&gt;</span>
+        <span class="k">try</span> <span class="o">{</span>
+          <span class="nc">LabeledPoint</span><span class="o">(</span><span 
class="n">properties</span><span class="o">.</span><span 
class="n">get</span><span class="o">[</span><span class="kt">Double</span><span 
class="o">](</span><span class="s">"plan"</span><span class="o">),</span>
+            <span class="nc">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="o">(</span><span 
class="nc">Array</span><span class="o">(</span>
+              <span class="n">properties</span><span class="o">.</span><span 
class="n">get</span><span class="o">[</span><span class="kt">Double</span><span 
class="o">](</span><span class="s">"attr0"</span><span class="o">),</span>
+              <span class="n">properties</span><span class="o">.</span><span 
class="n">get</span><span class="o">[</span><span class="kt">Double</span><span 
class="o">](</span><span class="s">"attr1"</span><span class="o">),</span>
+              <span class="n">properties</span><span class="o">.</span><span 
class="n">get</span><span class="o">[</span><span class="kt">Double</span><span 
class="o">](</span><span class="s">"attr2"</span><span class="o">)</span>
+            <span class="o">))</span>
+          <span class="o">)</span>
+        <span class="o">}</span> <span class="k">catch</span> <span 
class="o">{</span>
+          <span class="k">case</span> <span class="n">e</span><span 
class="k">:</span> <span class="kt">Exception</span> <span 
class="o">=&gt;</span> <span class="o">{</span>
+            <span class="n">logger</span><span class="o">.</span><span 
class="n">error</span><span class="o">(</span><span class="n">s</span><span 
class="s">"Failed to get properties ${properties} of"</span> <span 
class="o">+</span>
+              <span class="n">s</span><span class="s">" ${entityId}. 
Exception: ${e}."</span><span class="o">)</span>
+            <span class="k">throw</span> <span class="n">e</span>
+          <span class="o">}</span>
+        <span class="o">}</span>
+      <span class="o">}.</span><span class="n">cache</span><span 
class="o">()</span>
+    <span class="c1">// End of reading from data store
+</span>
+    <span class="c1">// K-fold splitting
+</span>    <span class="k">val</span> <span class="n">evalK</span> <span 
class="k">=</span> <span class="n">dsp</span><span class="o">.</span><span 
class="n">evalK</span><span class="o">.</span><span class="n">get</span>
+    <span class="k">val</span> <span class="n">indexedPoints</span><span 
class="k">:</span> <span class="kt">RDD</span><span class="o">[(</span><span 
class="kt">LabeledPoint</span>, <span class="kt">Long</span><span 
class="o">)]</span> <span class="k">=</span> <span 
class="n">labeledPoints</span><span class="o">.</span><span 
class="n">zipWithIndex</span>
+
+    <span class="o">(</span><span class="mi">0</span> <span 
class="n">until</span> <span class="n">evalK</span><span 
class="o">).</span><span class="n">map</span> <span class="o">{</span> <span 
class="n">idx</span> <span class="k">=&gt;</span> 
+      <span class="k">val</span> <span class="n">trainingPoints</span> <span 
class="k">=</span> <span class="n">indexedPoints</span><span 
class="o">.</span><span class="n">filter</span><span class="o">(</span><span 
class="k">_</span><span class="o">.</span><span class="n">_2</span> <span 
class="o">%</span> <span class="n">evalK</span> <span class="o">!=</span> <span 
class="n">idx</span><span class="o">).</span><span class="n">map</span><span 
class="o">(</span><span class="k">_</span><span class="o">.</span><span 
class="n">_1</span><span class="o">)</span>
+      <span class="k">val</span> <span class="n">testingPoints</span> <span 
class="k">=</span> <span class="n">indexedPoints</span><span 
class="o">.</span><span class="n">filter</span><span class="o">(</span><span 
class="k">_</span><span class="o">.</span><span class="n">_2</span> <span 
class="o">%</span> <span class="n">evalK</span> <span class="o">==</span> <span 
class="n">idx</span><span class="o">).</span><span class="n">map</span><span 
class="o">(</span><span class="k">_</span><span class="o">.</span><span 
class="n">_1</span><span class="o">)</span>
+
+      <span class="o">(</span>
+        <span class="k">new</span> <span class="nc">TrainingData</span><span 
class="o">(</span><span class="n">trainingPoints</span><span class="o">),</span>
+        <span class="k">new</span> <span 
class="nc">EmptyEvaluationInfo</span><span class="o">(),</span>
+        <span class="n">testingPoints</span><span class="o">.</span><span 
class="n">map</span> <span class="o">{</span> 
+          <span class="n">p</span> <span class="k">=&gt;</span> <span 
class="o">(</span><span class="k">new</span> <span class="nc">Query</span><span 
class="o">(</span><span class="n">p</span><span class="o">.</span><span 
class="n">features</span><span class="o">.</span><span 
class="n">toArray</span><span class="o">),</span> <span class="k">new</span> 
<span class="nc">ActualResult</span><span class="o">(</span><span 
class="n">p</span><span class="o">.</span><span class="n">label</span><span 
class="o">))</span> 
+        <span class="o">}</span>
+      <span class="o">)</span>
+    <span class="o">}</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>The <code>readEval</code> method 
returns a sequence of (<code>TrainingData</code>, <code>EvaluationInfo</code>, 
<code>RDD[(Query, ActualResult)]</code>. <code>TrainingData</code> is the same 
class we use for deploy, <code>RDD[(Query, ActualResult)]</code> is the 
validation set, <code>EvaluationInfo</code> can be used to hold some global 
evaluation data ; it is not used in the current example.</p><p>Lines 11 to 41 
is the logic of reading and transforming data from the datastore; it is 
equvialent to the existing <code>readTraining</code> method. After line 41, the 
variable <code>labeledPoints</code> contains the complete dataset with which we 
use to generate the (training, validation) sequence.</p><p>Lines 43 to 57 is 
the <em>k-fold</em> logic. Line 45 gives each data point a unique id, and we 
decide whether the point belongs to the training or validation set depends on 
the <em>mod</em> of the id (lines 48 to 49). For each point in the valida
 tion set, we construct the <code>Query</code> and <code>ActualResult</code> 
(line 55) which is used validate the engine.</p><h2 id='evaluation-metrics' 
class='header-anchors'>Evaluation Metrics</h2><p>We define a 
<code>Metric</code> which gives a <em>score</em> to engine params. The higher 
the score, the better the engine params are. In this template, we use accuray 
score which measures the portion of correct prediction among all data 
points.</p><p>In 
MyClassification/src/main/scala/<strong>Evaluation.scala</strong>, the class 
<code>Accuracy</code> implements the <em>accuracy</em> score. It extends a base 
helper class <code>AverageMetric</code> which calculates the average score 
overall <em>(Query, PredictionResult, ActualResult)</em> tuple.</p><div 
class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5</pre></td><td class="code"><pre><span class="k">case</span> <span 
class="k">class</span> <span class="nc">Accuracy</span>
+  <span class="k">extends</span> <span class="nc">AverageMetric</span><span 
class="o">[</span><span class="kt">EmptyEvaluationInfo</span>, <span 
class="kt">Query</span>, <span class="kt">PredictedResult</span>, <span 
class="kt">ActualResult</span><span class="o">]</span> <span class="o">{</span>
+  <span class="k">def</span> <span class="n">calculate</span><span 
class="o">(</span><span class="n">query</span><span class="k">:</span> <span 
class="kt">Query</span><span class="o">,</span> <span 
class="n">predicted</span><span class="k">:</span> <span 
class="kt">PredictedResult</span><span class="o">,</span> <span 
class="n">actual</span><span class="k">:</span> <span 
class="kt">ActualResult</span><span class="o">)</span>
+  <span class="k">:</span> <span class="kt">Double</span> <span 
class="o">=</span> <span class="o">(</span><span class="k">if</span> <span 
class="o">(</span><span class="n">predicted</span><span class="o">.</span><span 
class="n">label</span> <span class="o">==</span> <span 
class="n">actual</span><span class="o">.</span><span 
class="n">label</span><span class="o">)</span> <span class="mf">1.0</span> 
<span class="k">else</span> <span class="mf">0.0</span><span class="o">)</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>Then, implement a 
<code>Evaluation</code> object to define the engine and metric used in this 
evaluation.</p><div class="highlight scala"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3</pre></td><td class="code"><pre><span class="k">object</span> <span 
class="nc">AccuracyEvaluation</span> <span class="k">extends</span> <span 
class="nc">Evaluation</span> <span class="o">{</span>
+  <span class="n">engineMetric</span> <span class="k">=</span> <span 
class="o">(</span><span class="nc">ClassificationEngine</span><span 
class="o">(),</span> <span class="k">new</span> <span 
class="nc">Accuracy</span><span class="o">())</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <h2 id='parameters-generation' 
class='header-anchors'>Parameters Generation</h2><p>The last component is to 
specify the list of engine params we want to evaluate. In this guide, we 
discuss the simplest method. We specify an explicit list of engine params to be 
evaluated. </p><p>In 
MyClassification/src/main/scala/<strong>Evaluation.scala</strong>, the object 
<code>EngineParamsList</code> specifies the engine params list to be 
used.</p><div class="highlight scala"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17</pre></td><td class="code"><pre><span class="k">object</span> <span 
class="nc">EngineParamsList</span> <span class="k">extends</span> <span 
class="nc">EngineParamsGenerator</span> <span class="o">{</span>
+  <span class="c1">// Define list of EngineParams used in Evaluation
+</span>
+  <span class="c1">// First, we define the base engine params. It specifies 
the appId from which
+</span>  <span class="c1">// the data is read, and a evalK parameter is used 
to define the
+</span>  <span class="c1">// cross-validation.
+</span>  <span class="k">private</span><span class="o">[</span><span 
class="kt">this</span><span class="o">]</span> <span class="k">val</span> <span 
class="n">baseEP</span> <span class="k">=</span> <span 
class="nc">EngineParams</span><span class="o">(</span>
+    <span class="n">dataSourceParams</span> <span class="k">=</span> <span 
class="nc">DataSourceParams</span><span class="o">(</span><span 
class="n">appId</span> <span class="k">=</span> <span class="mi">18</span><span 
class="o">,</span> <span class="n">evalK</span> <span class="k">=</span> <span 
class="nc">Some</span><span class="o">(</span><span class="mi">5</span><span 
class="o">)))</span>
+
+  <span class="c1">// Second, we specify the engine params list by explicitly 
listing all
+</span>  <span class="c1">// algorithm parameters. In this case, we evaluate 3 
engine params, each with
+</span>  <span class="c1">// a different algorithm params value.
+</span>  <span class="n">engineParamsList</span> <span class="k">=</span> 
<span class="nc">Seq</span><span class="o">(</span>
+    <span class="n">baseEP</span><span class="o">.</span><span 
class="n">copy</span><span class="o">(</span><span 
class="n">algorithmParamsList</span> <span class="k">=</span> <span 
class="nc">Seq</span><span class="o">((</span><span 
class="s">"naive"</span><span class="o">,</span> <span 
class="nc">AlgorithmParams</span><span class="o">(</span><span 
class="mf">10.0</span><span class="o">)))),</span>
+    <span class="n">baseEP</span><span class="o">.</span><span 
class="n">copy</span><span class="o">(</span><span 
class="n">algorithmParamsList</span> <span class="k">=</span> <span 
class="nc">Seq</span><span class="o">((</span><span 
class="s">"naive"</span><span class="o">,</span> <span 
class="nc">AlgorithmParams</span><span class="o">(</span><span 
class="mf">100.0</span><span class="o">)))),</span>
+    <span class="n">baseEP</span><span class="o">.</span><span 
class="n">copy</span><span class="o">(</span><span 
class="n">algorithmParamsList</span> <span class="k">=</span> <span 
class="nc">Seq</span><span class="o">((</span><span 
class="s">"naive"</span><span class="o">,</span> <span 
class="nc">AlgorithmParams</span><span class="o">(</span><span 
class="mf">1000.0</span><span class="o">)))))</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>A good practise is to first define 
a base engine params, it contains the common parameters used in all evaluations 
(lines 7 to 8). With the base params, we construct the list of engine params we 
want to evaluation by adding or replacing the controller parameter. Lines 13 to 
16 generate 3 engine parameters, each has a different smoothing 
parameters.</p><h2 id='running-the-evaluation' class='header-anchors'>Running 
the Evaluation</h2><p>It remains to run the evaluation. Let&#39;s recap the 
quick start section above. The <code>pio eval</code> command kick starts the 
evaluation, and the result can be seen from the console.</p><div 
class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td 
class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="gp">$ </span>pio build
+...
+<span class="gp">$ </span>pio <span class="nb">eval 
</span>org.template.classification.AccuracyEvaluation <span class="se">\</span>
+    org.template.classification.EngineParamsList 
+</pre></td></tr></tbody></table> </div> <p>You will see the following 
output:</p><div class="highlight shell"><table style="border-spacing: 
0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre 
class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45</pre></td><td class="code"><pre>...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> runEvaluation started
+...
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 0
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:10.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.9281045751633987,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 1
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:100.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.9150326797385621,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
Iteration 2
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] 
EngineParams: <span class="o">{</span><span 
class="s2">"dataSourceParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{</span><span 
class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span 
class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span 
class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span 
class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span 
class="s2">"naive"</span>:<span class="o">{</span><span 
class="s2">"lambda"</span>:1000.0<span class="o">}}]</span>,<span 
class="s2">"servingParams"</span>:<span class="o">{</span><span 
class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: 
MetricScores<span class="o">(</span>0.4444444444444444,List<span 
class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Writing 
best variant params to disk...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> Updating evaluation instance with result: 
MetricEvaluatorResult:
+  <span class="c"># engine params evaluated: 3</span>
+Optimal Engine Params:
+  <span class="o">{</span>
+  <span class="s2">"dataSourceParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+      <span class="s2">"appId"</span>:19,
+      <span class="s2">"evalK"</span>:5
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"preparatorParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"algorithmParamsList"</span>:[
+    <span class="o">{</span>
+      <span class="s2">"naive"</span>:<span class="o">{</span>
+        <span class="s2">"lambda"</span>:10.0
+      <span class="o">}</span>
+    <span class="o">}</span>
+  <span class="o">]</span>,
+  <span class="s2">"servingParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+Metrics:
+  org.template.classification.Accuracy: 0.9281045751633987
+The best variant params can be found <span class="k">in </span>best.json
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span 
class="nv">$]</span> runEvaluation completed
+</pre></td></tr></tbody></table> </div> <h2 id='notes' 
class='header-anchors'>Notes</h2> <ul> <li>We deliberately not metion 
<strong><em>test set</em></strong> in this hyperparameter tuning guide. In 
machine learning literature, the <strong><em>test set</em></strong> is a 
separate piece of data which is used to evaluate the final engine params 
outputted by the evaluation process. This guarantees that no information in the 
training / validation set is <em>leaked</em> into the engine params and yields 
a biased outcome. With PredictionIO, there are multiple ways of conducting 
robust tuning, we will cover this topic in the coming sections.</li> </ul> 
</div></div></div></div><footer><div class="container"><div 
class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 
footer-link-column"><div 
class="footer-link-column-row"><h4>Community</h4><ul><li><a 
href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a 
href="//docs.prediction.io/" target="blank">Docs</
 a></li><li><a href="//github.com/apache/incubator-predictionio" 
target="blank">GitHub</a></li><li><a 
href="mailto:user-subscr...@predictionio.incubator.apache.org"; 
target="blank">Subscribe to User Mailing List</a></li><li><a 
href="//stackoverflow.com/questions/tagged/predictionio" 
target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 
col-xs-6 footer-link-column"><div 
class="footer-link-column-row"><h4>Contribute</h4><ul><li><a 
href="//predictionio.incubator.apache.org/community/contribute-code/" 
target="blank">Contribute</a></li><li><a 
href="//github.com/apache/incubator-predictionio" target="blank">Source 
Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" 
target="blank">Bug Tracker</a></li><li><a 
href="mailto:dev-subscr...@predictionio.incubator.apache.org"; 
target="blank">Subscribe to Development Mailing 
List</a></li></ul></div></div></div></div><div id="footer-bottom"><div 
class="container"><div class="row"><div class="col-md-12"><div id="footer-lo
 go-wrapper"><img alt="PredictionIO" 
src="/images/logos/logo-white-d1e9c6e6.png"/></div><div 
id="social-icons-wrapper"><a class="github-button" 
href="https://github.com/apache/incubator-predictionio"; data-style="mega" 
data-count-href="/apache/incubator-predictionio/stargazers" 
data-count-api="/repos/apache/incubator-predictionio#stargazers_count" 
data-count-aria-label="# stargazers on GitHub" aria-label="Star 
apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" 
href="https://github.com/apache/incubator-predictionio/fork"; 
data-icon="octicon-git-branch" data-style="mega" 
data-count-href="/apache/incubator-predictionio/network" 
data-count-api="/repos/apache/incubator-predictionio#forks_count" 
data-count-aria-label="# forks on GitHub" aria-label="Fork 
apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" 
async="" defer="" src="https://buttons.github.io/buttons.js";></script><a 
href="//www.facebook.com/predictionio" target="blank"><img alt="Predict
 ionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a 
href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on 
Facebook" src="/images/icons/facebook-5c57939c.png"/></a> 
</div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script 
src="/javascripts/application-280db181.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/25938169/evaluation/paramtuning/index.html.gz
----------------------------------------------------------------------
diff --git a/evaluation/paramtuning/index.html.gz 
b/evaluation/paramtuning/index.html.gz
new file mode 100644
index 0000000..3c1abf0
Binary files /dev/null and b/evaluation/paramtuning/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/25938169/favicon.ico
----------------------------------------------------------------------
diff --git a/favicon.ico b/favicon.ico
new file mode 100644
index 0000000..92d6430
Binary files /dev/null and b/favicon.ico differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/25938169/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff
----------------------------------------------------------------------
diff --git a/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff 
b/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff
new file mode 100644
index 0000000..9e61285
Binary files /dev/null and 
b/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/25938169/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf
----------------------------------------------------------------------
diff --git a/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf 
b/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf
new file mode 100644
index 0000000..1413fc6
Binary files /dev/null and 
b/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/25938169/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot
----------------------------------------------------------------------
diff --git a/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot 
b/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot
new file mode 100644
index 0000000..b93a495
Binary files /dev/null and 
b/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot differ

[37/51] [abbrv] [partial] incubator-predictionio-site git commit: Documentation based on apache/incubator-predictionio#03e99814384134331cce558c9d89d93f9a7df347

Reply via email to