http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/c2c7b62a/system/deploy-cloudformation/index.html ---------------------------------------------------------------------- diff --git a/system/deploy-cloudformation/index.html b/system/deploy-cloudformation/index.html index 5fc4457..c6f9248 100644 --- a/system/deploy-cloudformation/index.html +++ b/system/deploy-cloudformation/index.html @@ -1,4 +1,4 @@ -<!DOCTYPE html><html><head><title>Deploying with AWS CloudFormation</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Deploying with AWS CloudFormation"/><link rel="canonical" href="https://docs.prediction.io/system/deploy-cloudformation/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//c dn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="//templates.prediction.io/">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-1 1 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Deploying with AWS CloudFormation</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"> <div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Int egrating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Inter face</span></a></li><li class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><l i class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosi ng an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluat ion Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li cl ass="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><u l><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>Deploying with AWS CloudFormation</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#overview">Overview</a> </li> <li> <a href="#step-by-step">Step-by-Step</a> </li> <li> <a href="#using-the-cluster">Using t he Cluster</a> </li> <li> <a href="#scaling-the-cluster">Scaling the Cluster</a> </li> <li> <a href="#support-and-pricing">Support and Pricing</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/system/deploy-cloudformation.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Deploying with AWS CloudFormation</h1></div></div><div class="content"><h2 id='overview' class='header-anchors'>Overview</h2><p>You can scale PredictionIO on AWS with <a href="http://aws.amazon.com/cloudformation/">CloudFormation</a>. Here we have defined a PredictionIO CloudFormation stack that you can deploy a functional, fully distributed PredictionIO cluster in minutes.</p><h3 id='instances' class='header-anchors'>Instances</h3><p>The PredictionIO CloudFormation stack creates two types of instance: <strong>compute a nd storage</strong>. By default, the stack will launch <strong>1 compute Instance and 3 Storage instances</strong>.</p><p>The compute instance <em>(ComputeInstance)</em> acts as Spark master. You can launch extra compute instances <em>(ComputeInstanceExtra)</em> by updating the stack. The storage instances <em>(StorageInstance)</em> form the core of the HDFS, ZooKeeper quorum, and HBase storage. Extra storage instances <em>(StorageInstanceExtra)</em> can be added to the cluster by updating the stack. They cannot be removed once they are spinned up.</p><p>PredictionIO Event Server will be launched on all storage instances.</p><h3 id='networking' class='header-anchors'>Networking</h3><p>The stack will automatically create a VPC and a subnet with an Internet gateway. All cluster instances will be launched inside this subnet using a single security group that enables all TCP communications among all instances within the same group. All compute instances (including those that are launche d after stack creation) will receive public IPs. All core storage instances will receive public Elastic IPs.</p><h2 id='step-by-step' class='header-anchors'>Step-by-Step</h2><p>First, you need to have an active Amazon Web Services account with permissions to use the following services:</p> <ul> <li>Auto Scaling</li> <li>CloudFormation</li> <li>EC2</li> <li>VPC</li> </ul> <h3 id='subscribe-to-predictionio-cluster' class='header-anchors'>Subscribe to PredictionIO Cluster</h3><p>Before you can start using PredictionIO CloudFormation template, you will need to subscribe to PredictionIO Cluster AMI through AWS Marketplace. To subscribe, go to this <a href="https://aws.amazon.com/marketplace/pp/B00S74CY0A">link</a>. Click the yellow "Continue" button on the next screen.</p><p><img alt="AWS Marketplace Subscription Page" src="/images/cloudformation/awsmp-1-609eb919.png"/></p><p>In the next screen, click the "Manual Launch" tab, then select your preferred pricing model. Pick your desired version, then click "Accept Terms".</p><p><img alt="AWS Marketplace Term Page" src="/images/cloudformation/awsmp-2-7dd613ba.png"/></p><p>Once your subscription is in place, you may proceed to the next section.</p><h3 id='start-using-cloudformation' class='header-anchors'>Start Using CloudFormation</h3><p>From your main AWS console, locate CloudFormation and click on it.</p><p><img alt="CloudFormation on AWS Console" src="/images/cloudformation/cf-01-13c7f924.png"/></p><p>This will bring you to the CloudFormation console below.</p><p><img alt="CloudFormation Console" src="/images/cloudformation/cf-02-61cd8e67.png"/></p><h3 id='select-the-predictionio-cloudformation-stack-template' class='header-anchors'>Select the PredictionIO CloudFormation Stack Template</h3><p>From the CloudFormation console, click on the <strong>Create New Stack</strong> blue button as shown above. This will bring up the <strong>Select Template</strong> screen. Name your stack as you l ike. Within the <em>Template</em> section, choose <strong>Specify an Amazon S3 template URL</strong>, and put <a href="https://s3.amazonaws.com/cloudformation.prediction.io/0.9.6/pio.json">https://s3.amazonaws.com/cloudformation.prediction.io/0.9.6/pio.json</a> as the value.</p><p><img alt="CloudFormation Stack Template Selection" src="/images/cloudformation/cf-03-d017fa2d.png"/></p><p>Click <strong>Next</strong> when you are done.</p><h3 id='specify-stack-parameters' class='header-anchors'>Specify Stack Parameters</h3><p>The next screen shows the stack parameters. You must enter your AWS SSH key pair. For the other parameters, you can change them to meet your needs or simply use the default values.</p><p><img alt="Stack Parameters" src="/images/cloudformation/cf-04-6b545f50.png"/></p> <table><thead> <tr> <th>Parameter</th> <th>Description</th> </tr> </thead><tbody> <tr> <td>AWS-KeyPair</td> <td>The AWS SSH key pair name that can be used to access all instances in the cluster.</td> </tr> <tr> <td>AvailabilityZone</td> <td>Specify the availability zone that the PredictionIO cluster will be launched in. All instances of the cluster will be launched into the same zone for optimal network performance between one another.</td> </tr> <tr> <td>ComputeInstanceType</td> <td>The EC2 instance type of all compute instances. Memory-optimized EC2 instances are recommended.</td> </tr> <tr> <td>ComputeInstanceExtra</td> <td>Number of extra compute instances besides the core compute instance. This can be increased and decreased.</td> </tr> <tr> <td>StorageInstanceExtra</td> <td>Number of extra storage instances besides core storage instances. <strong>Never decrease this value or you will risk data corruption.</strong></td> </tr> <tr> <td>StorageInstanceExtraSize</td> <td>Size in GB of each extra storage instance. This can be changed when you add an extra storage instance.</td> </tr> <tr> <td>StorageInstanceExtraVolumeType</td> <td>The EBS volume type of each extra storage inst ance. Valid values are <em>standard</em> and <em>gp2</em>. This can be changed when you add an extra storage instance.</td> </tr> <tr> <td>StorageInstanceType</td> <td>The EC2 instance type of all storage instances. General purpose EC2 instances are recommended.</td> </tr> <tr> <td>StorageInstanceSize</td> <td>Size in GB of each core storage instance. This cannot be changed once the cluster is launched.</td> </tr> <tr> <td>StorageInstanceVolumeType</td> <td>The EBS volume type of each core storage instance. Valid values are <em>standard</em> and <em>gp2</em>. This cannot be changed once the cluster is launched.</td> </tr> </tbody></table> <p>Click <strong>Next</strong> when you are done. You will arrive at the <strong>Options</strong> screen. You can skip this step if you do not have other options to specify.</p><p>At the <strong>Review</strong> screen, click <strong>Create</strong> to finish.</p><h2 id='using-the-cluster' class='header-anchors'>Using the Cluster</h2><p>You should s ee the following when the cluster is being created after the previous step.</p><p><img alt="Stack Creation" src="/images/cloudformation/cf-05-92ee63c2.png"/></p><p>Once the stack creation has finished, you can click on <strong>Events</strong> and select <strong>Outputs</strong> to arrive to the following screen.</p><p><img alt="Completed Stack" src="/images/cloudformation/cf-06-b36528fa.png"/></p><p>Take note of <strong>PIOComputeMasterPublicIp</strong> and <strong>PIOStorageMasterPublicIp</strong>. We will now access the cluster and make sure everything is in place.</p><div class="alert-message warning"><p>Sometimes the stack is created successfully but not all cluster services would launch due to potential network glitches or system issues within a cluster instance. In this case, simply delete and create the stack again.</p></div><h3 id='verify-compute-instances' class='header-anchors'>Verify Compute Instances</h3><p>SSH to the master compute instance using the <strong>PIOComputeM asterPublicIp</strong>. In this example, let us assume the IP address be 54.175.145.84, and your private key file be <strong>yourkey.pem</strong>.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span>ssh -i yourkey.pem -A -L 8080:localhost:8080 [email protected] +<!DOCTYPE html><html><head><title>Deploying with AWS CloudFormation</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Deploying with AWS CloudFormation"/><link rel="canonical" href="https://docs.prediction.io/system/deploy-cloudformation/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//c dn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Deploying with AWS CloudFormation</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row">< div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Inte grating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Interf ace</span></a></li><li class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosin g an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluati on Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li cla ss="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul ><li class="level-2"><a class="final" >href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a >class="final" href="/support/"><span>Support</span></a></li></ul></li><li >class="level-1"><a class="expandible" >href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" >href="/resources/intellij/"><span>Developing Engines with IntelliJ >IDEA</span></a></li><li class="level-2"><a class="final" >href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li >class="level-2"><a class="final" >href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div > class="col-md-9 col-sm-12"><div class="content-header hidden-md >hidden-lg"><div id="page-title"><h1>Deploying with AWS >CloudFormation</h1></div></div><div id="table-of-content-wrapper"><h5>On this >page</h5><aside id="table-of-contents"><ul> <li> <a >href="#overview">Overview</a> </li> <li> <a >href="#step-by-step">Step-by-Step</a> </li> <li> <a >href="#using-the-cluster">Using th e Cluster</a> </li> <li> <a href="#scaling-the-cluster">Scaling the Cluster</a> </li> <li> <a href="#support-and-pricing">Support and Pricing</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/system/deploy-cloudformation.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Deploying with AWS CloudFormation</h1></div></div><div class="content"><h2 id='overview' class='header-anchors'>Overview</h2><p>You can scale PredictionIO on AWS with <a href="http://aws.amazon.com/cloudformation/">CloudFormation</a>. Here we have defined a PredictionIO CloudFormation stack that you can deploy a functional, fully distributed PredictionIO cluster in minutes.</p><h3 id='instances' class='header-anchors'>Instances</h3><p>The PredictionIO CloudFormation stack creates two types of instance: <strong>compute an d storage</strong>. By default, the stack will launch <strong>1 compute Instance and 3 Storage instances</strong>.</p><p>The compute instance <em>(ComputeInstance)</em> acts as Spark master. You can launch extra compute instances <em>(ComputeInstanceExtra)</em> by updating the stack. The storage instances <em>(StorageInstance)</em> form the core of the HDFS, ZooKeeper quorum, and HBase storage. Extra storage instances <em>(StorageInstanceExtra)</em> can be added to the cluster by updating the stack. They cannot be removed once they are spinned up.</p><p>PredictionIO Event Server will be launched on all storage instances.</p><h3 id='networking' class='header-anchors'>Networking</h3><p>The stack will automatically create a VPC and a subnet with an Internet gateway. All cluster instances will be launched inside this subnet using a single security group that enables all TCP communications among all instances within the same group. All compute instances (including those that are launched after stack creation) will receive public IPs. All core storage instances will receive public Elastic IPs.</p><h2 id='step-by-step' class='header-anchors'>Step-by-Step</h2><p>First, you need to have an active Amazon Web Services account with permissions to use the following services:</p> <ul> <li>Auto Scaling</li> <li>CloudFormation</li> <li>EC2</li> <li>VPC</li> </ul> <h3 id='subscribe-to-predictionio-cluster' class='header-anchors'>Subscribe to PredictionIO Cluster</h3><p>Before you can start using PredictionIO CloudFormation template, you will need to subscribe to PredictionIO Cluster AMI through AWS Marketplace. To subscribe, go to this <a href="https://aws.amazon.com/marketplace/pp/B00S74CY0A">link</a>. Click the yellow "Continue" button on the next screen.</p><p><img alt="AWS Marketplace Subscription Page" src="/images/cloudformation/awsmp-1-609eb919.png"/></p><p>In the next screen, click the "Manual Launch" tab, then select your preferred pricing model. P ick your desired version, then click "Accept Terms".</p><p><img alt="AWS Marketplace Term Page" src="/images/cloudformation/awsmp-2-7dd613ba.png"/></p><p>Once your subscription is in place, you may proceed to the next section.</p><h3 id='start-using-cloudformation' class='header-anchors'>Start Using CloudFormation</h3><p>From your main AWS console, locate CloudFormation and click on it.</p><p><img alt="CloudFormation on AWS Console" src="/images/cloudformation/cf-01-13c7f924.png"/></p><p>This will bring you to the CloudFormation console below.</p><p><img alt="CloudFormation Console" src="/images/cloudformation/cf-02-61cd8e67.png"/></p><h3 id='select-the-predictionio-cloudformation-stack-template' class='header-anchors'>Select the PredictionIO CloudFormation Stack Template</h3><p>From the CloudFormation console, click on the <strong>Create New Stack</strong> blue button as shown above. This will bring up the <strong>Select Template</strong> screen. Name your stack as you li ke. Within the <em>Template</em> section, choose <strong>Specify an Amazon S3 template URL</strong>, and put <a href="https://s3.amazonaws.com/cloudformation.prediction.io/0.9.6/pio.json">https://s3.amazonaws.com/cloudformation.prediction.io/0.9.6/pio.json</a> as the value.</p><p><img alt="CloudFormation Stack Template Selection" src="/images/cloudformation/cf-03-d017fa2d.png"/></p><p>Click <strong>Next</strong> when you are done.</p><h3 id='specify-stack-parameters' class='header-anchors'>Specify Stack Parameters</h3><p>The next screen shows the stack parameters. You must enter your AWS SSH key pair. For the other parameters, you can change them to meet your needs or simply use the default values.</p><p><img alt="Stack Parameters" src="/images/cloudformation/cf-04-6b545f50.png"/></p> <table><thead> <tr> <th>Parameter</th> <th>Description</th> </tr> </thead><tbody> <tr> <td>AWS-KeyPair</td> <td>The AWS SSH key pair name that can be used to access all instances in the cluster.</td> < /tr> <tr> <td>AvailabilityZone</td> <td>Specify the availability zone that the PredictionIO cluster will be launched in. All instances of the cluster will be launched into the same zone for optimal network performance between one another.</td> </tr> <tr> <td>ComputeInstanceType</td> <td>The EC2 instance type of all compute instances. Memory-optimized EC2 instances are recommended.</td> </tr> <tr> <td>ComputeInstanceExtra</td> <td>Number of extra compute instances besides the core compute instance. This can be increased and decreased.</td> </tr> <tr> <td>StorageInstanceExtra</td> <td>Number of extra storage instances besides core storage instances. <strong>Never decrease this value or you will risk data corruption.</strong></td> </tr> <tr> <td>StorageInstanceExtraSize</td> <td>Size in GB of each extra storage instance. This can be changed when you add an extra storage instance.</td> </tr> <tr> <td>StorageInstanceExtraVolumeType</td> <td>The EBS volume type of each extra storage insta nce. Valid values are <em>standard</em> and <em>gp2</em>. This can be changed when you add an extra storage instance.</td> </tr> <tr> <td>StorageInstanceType</td> <td>The EC2 instance type of all storage instances. General purpose EC2 instances are recommended.</td> </tr> <tr> <td>StorageInstanceSize</td> <td>Size in GB of each core storage instance. This cannot be changed once the cluster is launched.</td> </tr> <tr> <td>StorageInstanceVolumeType</td> <td>The EBS volume type of each core storage instance. Valid values are <em>standard</em> and <em>gp2</em>. This cannot be changed once the cluster is launched.</td> </tr> </tbody></table> <p>Click <strong>Next</strong> when you are done. You will arrive at the <strong>Options</strong> screen. You can skip this step if you do not have other options to specify.</p><p>At the <strong>Review</strong> screen, click <strong>Create</strong> to finish.</p><h2 id='using-the-cluster' class='header-anchors'>Using the Cluster</h2><p>You should se e the following when the cluster is being created after the previous step.</p><p><img alt="Stack Creation" src="/images/cloudformation/cf-05-92ee63c2.png"/></p><p>Once the stack creation has finished, you can click on <strong>Events</strong> and select <strong>Outputs</strong> to arrive to the following screen.</p><p><img alt="Completed Stack" src="/images/cloudformation/cf-06-b36528fa.png"/></p><p>Take note of <strong>PIOComputeMasterPublicIp</strong> and <strong>PIOStorageMasterPublicIp</strong>. We will now access the cluster and make sure everything is in place.</p><div class="alert-message warning"><p>Sometimes the stack is created successfully but not all cluster services would launch due to potential network glitches or system issues within a cluster instance. In this case, simply delete and create the stack again.</p></div><h3 id='verify-compute-instances' class='header-anchors'>Verify Compute Instances</h3><p>SSH to the master compute instance using the <strong>PIOComputeMa sterPublicIp</strong>. In this example, let us assume the IP address be 54.175.145.84, and your private key file be <strong>yourkey.pem</strong>.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span>ssh -i yourkey.pem -A -L 8080:localhost:8080 [email protected] </pre></td></tr></tbody></table> </div> <p>Once you are in, point your web browser to <a href="http://localhost:8080">http://localhost:8080</a>. You should see something similar to the following.</p><p><img alt="Example Spark UI" src="/images/cloudformation/spark-019c2cf1.png"/></p><div class="alert-message note"><p>In the example above <strong>NumberOfComputeWorkers</strong> is <strong>2</strong>. This is because the example has 1 compute instance and 1 extra compute instance. If you do not have any extra compute instances, you will see only 1 worker on the above page.</p></div><h3 id='verify-storage-instances' class='header-anchors'>Verify Storage Instances</h3><p>SSH to the storage instance using the <strong>PIOStorageMasterPublicIp</strong>. In this example, let us assume the IP address be 54.175.1.36, and your private key file be <strong>yourkey.pem</strong>.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: rig ht"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span>ssh -i yourkey.pem -A -L 50070:localhost:50070 -L 16010:localhost:16010 -L 16030:localhost:16030 [email protected] </pre></td></tr></tbody></table> </div> <p>Once you are in, point your web browser to <a href="http://localhost:50070">http://localhost:50070</a> and click on <strong>Datainstances</strong> at the top menu. You should see the following page.</p><p><img alt="Example HDFS UI" src="/images/cloudformation/hdfs-51f0231f.png"/></p><div class="alert-message warning"><p>All <strong>3 storage instances</strong> must be up for proper operation.</p></div><p>If all <strong>3 storage instances</strong> are working properly, you can then verify HBase by pointing your web browser to <a href="http://localhost:16010">http://localhost:16010</a>. You should see something similar to the following.</p><p><img alt="Example HBase UI" src="/images/cloudformation/hbase-32538c47.png"/></p><p>If you do not specify any extra storage instances, you should see 2 region servers. There should also be 1 backup master.</p><h3 id='running-quick-start' class='header-anchors'>Running Quick Start</h3><p>You can now star t with the fully-distributed PredictionIO cluster. Let's start with the <a href="/templates/recommendation/quickstart/">recommendation quick start</a> with a few twists.</p> <ol> <li><p>Skip the installation steps and run <code>pio status</code>. You should see everything functional.</p></li> <li><p>Run through the section <strong>Create a Sample App</strong> as described. The installation directory of PredictionIO is <code>/opt/PredictionIO</code>.</p></li> <li><p>Run through the section <strong>Collecting Data</strong> as described, except that you will be connecting to the Event Server at the master core storage instance. Assuming the private IP of the master core storage instance is <code>10.0.0.123</code>, add <code>--url http://10.0.0.123:7070</code> to the <code>import_eventserver.py</code> command.</p></li> <li><p>Copy HBase configuration to the engine template directory. The full path of the configuration file is <code>/opt/hbase-0.98.9-hadoop2/conf/hbase-site.xml</code >. (This step will not be required in future releases.)</p></li> <li><p>Run >through the section <strong>Deploy the Engine as a Service</strong> up to the >subsection <strong>Training</strong>. Assuming the private DNS name of the >master compute instance is <code>ip-10-0-0-234.ec2.internal</code>, add ><code>-- --master spark://ip-10-0-0-234.ec2.internal:7077</code> after the ><code>pio train</code> command. This will send the training to the compute >cluster instead of the local machine. The Spark master URL must match exactly >the one shown on its web UI. Repeat the same steps for subsection ><strong>Deploying</strong>, which will create an Engine Server backed by the >compute cluster.</p></li> </ol> <h2 id='scaling-the-cluster' >class='header-anchors'>Scaling the Cluster</h2><p>As your data size and/or >audience grow, you can scale your cluster to handle more workload or decrease >turnaround time. In this section, we will provide some general guidelines >about when and how to scale your clus ter with CloudFormation.</p><h3 id='scaling-compute-instances' class='header-anchors'>Scaling Compute Instances</h3><p>You can increase compute instances to reduce training time <em>($pio train)</em> and the time to query an engine server. You can also check the [Spark Master Web UI] to see if you need additional compute power.</p><p>Notice that for compute instances, you can increase or decrease the number of extra compute instances <em>(ComputeInstanceExtra)</em> as much as you like. The extra compute instances will join the master and become slave compute instances as Spark workers.</p><p>Let us begin by adding 2 extra compute instances. At the CloudFormation console, right click on the cluster stack and click on <strong>Update Stack</strong>.</p><p><img alt="Updating Stack to Add Extra Compute Instances" src="/images/cloudformation/compute-1-c572ea84.png"/></p><p>At the <strong>Select Template</strong> screen, make sure <strong>Use existing template</strong> is picked, then clic k <strong>Next</strong>.</p><p>At the <strong>Specify Parameters</strong> screen, increase the value of <strong>ComputeInstanceExtra</strong> to <strong>2</strong>, then click <strong>Next</strong>.</p><p><img alt="Adding 2 Extra Compute Instances" src="/images/cloudformation/compute-2-f7da8e72.png"/></p><p>At the <strong>Options</strong> screen, leave everything unchanged, and click <strong>Next</strong>.</p><p>At the <strong>Review</strong> screen, make sure <strong>ComputeInstanceExtra</strong> is now updated to <strong>2</strong>. Finish by clicking <strong>Update</strong>.</p><p><img alt="Review Changes of Adding Extra Compute Instances" src="/images/cloudformation/compute-3-ee5fc6a8.png"/></p><p>You will be brought back to the CloudFormation console. You should see the stack status changed to <strong>UPDATE_IN_PROGRESS</strong>.</p><p><img alt="Adding Extra Compute Instances In Progress" src="/images/cloudformation/compute-4-67fe0caa.png"/></p><p>Once the status become <strong >UPDATE_COMPLETED</strong>, you will have 2 extra compute instances. Notice >that during the update, your cluster is still functional and any existing >work will not be affected. If you are downscaling, existing work might be >affected during the update process.</p><h3 id='scaling-storage-instances' >class='header-anchors'>Scaling Storage Instances</h3><p>You can scale your >storage instances when you are about to run out of space. You can check your >storage usage at the [Hadoop NameNode web UI].</p><div class="alert-message >warning"><p>For storage instances, you can only increase the number of extra >storage instances <em>(StorageInstanceExtra)</em> within the bounds of AWS >EC2 limits. Decreasing the instances will risk data >corruption.</p></div><p>Let us begin by adding 2 extra storage instances. At >the CloudFormation console, right click on the cluster stack and click on ><strong>Update Stack</strong>.</p><p><img alt="Updating Stack to Add Extra >Storage Instances" src="/images/cloudform ation/compute-1-c572ea84.png"/></p><p>At the <strong>Select Template</strong> screen, make sure <strong>Use existing template</strong> is picked, then click <strong>Next</strong>.</p><p>At the <strong>Specify Parameters</strong> screen, increase the value of <strong>StorageInstanceExtra</strong> to <strong>2</strong>, and set the value of <strong>StorageInstanceExtraSize</strong> to <strong>100</strong>, then click <strong>Next</strong>. Notice that whenever you add an extra storage instance, you can change its size to a new value. The new size will not affect existing storage instances and your data will be safe.</p><p><img alt="Adding 2 Extra Storage Instances" src="/images/cloudformation/storage-1-ca3ae3a6.png"/></p><p>At the <strong>Options</strong> screen, leave everything unchanged, and click <strong>Next</strong>.</p><p>At the <strong>Review</strong> screen, make sure <strong>StorageInstanceExtra</strong> is now updated to <strong>2</strong>, and <strong>StorageInstanceExtraS ize</strong> is updated to <strong>100</strong>. Finish by clicking <strong>Update</strong>.</p><p><img alt="Review Changes of Adding Extra Storage Instances" src="/images/cloudformation/storage-2-9395c9f2.png"/></p><p>You will be brought back to the CloudFormation console. You should see the stack status changed to <strong>UPDATE_IN_PROGRESS</strong>.</p><p><img alt="Adding Extra Compute Instances In Progress" src="/images/cloudformation/compute-4-67fe0caa.png"/></p><p>Once the status become <strong>UPDATE_COMPLETED</strong>, you will have 2 extra storage instances. Notice that during the up-scaling update, your cluster is still functional and existing work will not be affected. They may be affected during downscale.</p><h2 id='support-and-pricing' class='header-anchors'>Support and Pricing</h2><p>PredictionIO Cluster comes with Enterprise Support. For pricing and support details, please contact <a href="mailto:[email protected]">[email protected]</a>.</p></div></div></div> </div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apa che.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/re pos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){ (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/c2c7b62a/system/deploy-cloudformation/index.html.gz ---------------------------------------------------------------------- diff --git a/system/deploy-cloudformation/index.html.gz b/system/deploy-cloudformation/index.html.gz index 5ba05ff..0d5bbab 100644 Binary files a/system/deploy-cloudformation/index.html.gz and b/system/deploy-cloudformation/index.html.gz differ http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/c2c7b62a/system/index.html ---------------------------------------------------------------------- diff --git a/system/index.html b/system/index.html index a0fd216..e67b629 100644 --- a/system/index.html +++ b/system/index.html @@ -1,4 +1,4 @@ -<!DOCTYPE html><html><head><title>System Architecture and Dependencies</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="System Architecture and Dependencies"/><link rel="canonical" href="https://docs.prediction.io/system/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.org/m athjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="//templates.prediction.io/">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><di v class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Architecture Overview</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</span ></a><ul><li class="level-2"><a class="final" >href="/appintegration/"><span>App Integration Overview</span></a></li><li >class="level-2"><a class="expandible" href="/sdk/"><span>List of >SDKs</span></a><ul><li class="level-3"><a class="final" >href="/sdk/java/"><span>Java & Android SDK</span></a></li><li >class="level-3"><a class="final" href="/sdk/php/"><span>PHP >SDK</span></a></li><li class="level-3"><a class="final" >href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a >class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li >class="level-3"><a class="final" href="/sdk/community/"><span>Community >Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a >class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li >class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web >Service</span></a></li><li class="level-2"><a class="final" >href="/cli/#engine-commands"><span>Engine Command-line >Interface</span></a></li><li clas s="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class=" final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a ><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in >Algorithm Libraries</span></a></li><li class="level-2"><a class="final" >href="/algorithm/switch/"><span>Switching to Another >Algorithm</span></a></li><li class="level-2"><a class="final" >href="/algorithm/multiple/"><span>Combining Multiple >Algorithms</span></a></li><li class="level-2"><a class="final" >href="/algorithm/custom/"><span>Adding Your Own >Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" >href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a >class="final" href="/evaluation/"><span>Overview</span></a></li><li >class="level-2"><a class="final" >href="/evaluation/paramtuning/"><span>Hyperparameter >Tuning</span></a></li><li class="level-2"><a class="final" >href="/evaluation/evaluationdashboard/"><span>Evaluation >Dashboard</span></a></li><li class="level-2"><a class="final" >href="/evaluation/metricchoose/"><span>Choosing Evaluation >Metrics</span></a></li>< li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final active" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a clas s="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2" ><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li >class="level-2"><a class="final" >href="/support/"><span>Support</span></a></li></ul></li><li >class="level-1"><a class="expandible" >href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" >href="/resources/intellij/"><span>Developing Engines with IntelliJ >IDEA</span></a></li><li class="level-2"><a class="final" >href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li >class="level-2"><a class="final" >href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div > class="col-md-9 col-sm-12"><div class="content-header hidden-md >hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a >href="#">System Architecture</a><span >class="spacer">></span></li><li><span class="last">Architecture >Overview</span></li></ul></div><div id="page-title"><h1>System Architecture >and Dependencies</h1></div></div><div id="table-of-content-wrapper"><a >id="edit-pa ge-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/system/index.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">System Architecture</a><span class="spacer">></span></li><li><span class="last">Architecture Overview</span></li></ul></div><div id="page-title"><h1>System Architecture and Dependencies</h1></div></div><div class="content"><p>During the <a href="/install">installation</a>, you have installed the following software:</p> <ul> <li>Apache Hadoop 2.4.0 (required only if YARN and HDFS are needed)</li> <li>Apache HBase 0.98.6</li> <li>Apache Spark 1.2.0 for Hadoop 2.4</li> <li>Elasticsearch 1.4.0</li> </ul> <p>This section explains how they are used in PredictionIO.</p><p><img alt="PredictionIO Systems" src="/images/0.8-engine-data-pipeline-49b451b4.png"/></p><p><strong>HBase</stro ng>: Event Server uses Apache HBase as the data store. It stores imported events. If you are not using the PredictionIO Event Server, you do not need to install HBase.</p><p><strong>Apache Spark</strong>: Spark is a large-scale data processing engine that powers the algorithm, training, and serving processing.</p><p>A spark algorithm is different from conventional single machine algorithm in a way that spark algorithms use the <a href="http://spark.apache.org/docs/1.0.1/programming-guide.html#resilient-distributed-datasets-rdds">RDD</a> abstraction as its primary data type. PredictionIO framework natively support both RDD-based algorithms and traditional single-machine algorithms.</p><p><strong>HDFS</strong>: The output of training has two parts: a model and its meta-data. The model is then stored in HDFS or a local file system.</p><p><strong>Elasticsearch</strong>: It stores metadata such as model versions, engine versions, access key and app id mappings, evaluation results, etc.</ p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li>< a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){ +<!DOCTYPE html><html><head><title>System Architecture and Dependencies</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="System Architecture and Dependencies"/><link rel="canonical" href="https://docs.prediction.io/system/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.org/m athjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Architecture Overview</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapper" c lass="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</span> </a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Interface</span></a></li><li class ="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="f inal" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a> <ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><l i class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final active" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class ="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"> <a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">System Architecture</a><span class="spacer">></span></li><li><span class="last">Architecture Overview</span></li></ul></div><div id="page-title"><h1>System Architecture and Dependencies</h1></div></div><div id="table-of-content-wrapper"><a id="edit-pag e-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/system/index.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">System Architecture</a><span class="spacer">></span></li><li><span class="last">Architecture Overview</span></li></ul></div><div id="page-title"><h1>System Architecture and Dependencies</h1></div></div><div class="content"><p>During the <a href="/install">installation</a>, you have installed the following software:</p> <ul> <li>Apache Hadoop 2.4.0 (required only if YARN and HDFS are needed)</li> <li>Apache HBase 0.98.6</li> <li>Apache Spark 1.2.0 for Hadoop 2.4</li> <li>Elasticsearch 1.4.0</li> </ul> <p>This section explains how they are used in PredictionIO.</p><p><img alt="PredictionIO Systems" src="/images/0.8-engine-data-pipeline-49b451b4.png"/></p><p><strong>HBase</stron g>: Event Server uses Apache HBase as the data store. It stores imported events. If you are not using the PredictionIO Event Server, you do not need to install HBase.</p><p><strong>Apache Spark</strong>: Spark is a large-scale data processing engine that powers the algorithm, training, and serving processing.</p><p>A spark algorithm is different from conventional single machine algorithm in a way that spark algorithms use the <a href="http://spark.apache.org/docs/1.0.1/programming-guide.html#resilient-distributed-datasets-rdds">RDD</a> abstraction as its primary data type. PredictionIO framework natively support both RDD-based algorithms and traditional single-machine algorithms.</p><p><strong>HDFS</strong>: The output of training has two parts: a model and its meta-data. The model is then stored in HDFS or a local file system.</p><p><strong>Elasticsearch</strong>: It stores metadata such as model versions, engine versions, access key and app id mappings, evaluation results, etc.</p ></div></div></div></div><footer><div class="container"><div >class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 >footer-link-column"><div >class="footer-link-column-row"><h4>Community</h4><ul><li><a >href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a >href="//docs.prediction.io/" target="blank">Docs</a></li><li><a >href="//github.com/apache/incubator-predictionio" >target="blank">GitHub</a></li><li><a >href="mailto:[email protected]" >target="blank">Subscribe to User Mailing List</a></li><li><a >href="//stackoverflow.com/questions/tagged/predictionio" >target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 >col-xs-6 footer-link-column"><div >class="footer-link-column-row"><h4>Contribute</h4><ul><li><a >href="//predictionio.incubator.apache.org/community/contribute-code/" >target="blank">Contribute</a></li><li><a >href="//github.com/apache/incubator-predictionio" target="blank">Source >Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:[email protected]" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){ (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t); e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e); })(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st'); http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/c2c7b62a/system/index.html.gz ---------------------------------------------------------------------- diff --git a/system/index.html.gz b/system/index.html.gz index c2782c4..266b142 100644 Binary files a/system/index.html.gz and b/system/index.html.gz differ
