Repository: storm-site
Updated Branches:
  refs/heads/asf-site [created] d070189e0


http://git-wip-us.apache.org/repos/asf/storm-site/blob/d070189e/content/releases/0.10.0/Concepts.html
----------------------------------------------------------------------
diff --git a/content/releases/0.10.0/Concepts.html 
b/content/releases/0.10.0/Concepts.html
new file mode 100644
index 0000000..0b62cfc
--- /dev/null
+++ b/content/releases/0.10.0/Concepts.html
@@ -0,0 +1,366 @@
+<!DOCTYPE html>
+<html>
+    <head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+
+    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
+    <link rel="icon" href="/favicon.ico" type="image/x-icon">
+
+    <title>Concepts</title>
+
+    <!-- Bootstrap core CSS -->
+    <link href="/assets/css/bootstrap.min.css" rel="stylesheet">
+    <!-- Bootstrap theme -->
+    <link href="/assets/css/bootstrap-theme.min.css" rel="stylesheet">
+
+    <!-- Custom styles for this template -->
+    <link rel="stylesheet" 
href="http://fortawesome.github.io/Font-Awesome/assets/font-awesome/css/font-awesome.css";>
+    <link href="/css/style.css" rel="stylesheet">
+    <link href="/assets/css/owl.theme.css" rel="stylesheet">
+    <link href="/assets/css/owl.carousel.css" rel="stylesheet">
+    <script type="text/javascript" src="/assets/js/jquery.min.js"></script>
+    <script type="text/javascript" src="/assets/js/bootstrap.min.js"></script>
+    <script type="text/javascript" 
src="/assets/js/owl.carousel.min.js"></script>
+    <script type="text/javascript" src="/assets/js/storm.js"></script>
+    <!-- Just for debugging purposes. Don't actually copy these 2 lines! -->
+    <!--[if lt IE 9]><script 
src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
+    
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media 
queries -->
+    <!--[if lt IE 9]>
+      <script 
src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js";></script>
+      <script 
src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js";></script>
+    <![endif]-->
+  </head>
+
+
+  <body>
+    <header>
+  <div class="container-fluid">
+     <div class="row">
+          <div class="col-md-5">
+            <a href="/index.html"><img src="/images/logo.png" class="logo" 
/></a>
+          </div>
+          <div class="col-md-5">
+            
+              <h1>Version: 0.10.0</h1>
+            
+          </div>
+          <div class="col-md-2">
+            <a href="/downloads.html" class="btn-std btn-block 
btn-download">Download</a>
+          </div>
+        </div>
+    </div>
+</header>
+<!--Header End-->
+<!--Navigation Begin-->
+<div class="navbar" role="banner">
+  <div class="container-fluid">
+      <div class="navbar-header">
+          <button class="navbar-toggle" type="button" data-toggle="collapse" 
data-target=".bs-navbar-collapse">
+                <span class="icon-bar"></span>
+                <span class="icon-bar"></span>
+                <span class="icon-bar"></span>
+            </button>
+        </div>
+        <nav class="collapse navbar-collapse bs-navbar-collapse" 
role="navigation">
+          <ul class="nav navbar-nav">
+              <li><a href="/index.html" id="home">Home</a></li>
+                <li><a href="/getting-help.html" id="getting-help">Getting 
Help</a></li>
+                <li><a href="/about/integrates.html" id="project-info">Project 
Information</a></li>
+                <li class="dropdown">
+                    <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
id="documentation">Documentation <b class="caret"></b></a>
+                    <ul class="dropdown-menu">
+                      
+                        
+                          <li><a 
href="/releases/2.0.0-SNAPSHOT/index.html">2.0.0-SNAPSHOT</a></li>
+                        
+                      
+                        
+                          <li><a 
href="/releases/1.1.0/index.html">1.1.0</a></li>
+                        
+                      
+                        
+                      
+                        
+                          <li><a 
href="/releases/1.0.4/index.html">1.0.4</a></li>
+                        
+                      
+                        
+                      
+                        
+                          <li><a 
href="/releases/1.0.3/index.html">1.0.3</a></li>
+                        
+                      
+                        
+                          <li><a 
href="/releases/1.0.2/index.html">1.0.2</a></li>
+                        
+                      
+                        
+                          <li><a 
href="/releases/1.0.1/index.html">1.0.1</a></li>
+                        
+                      
+                        
+                          <li><a 
href="/releases/1.0.0/index.html">1.0.0</a></li>
+                        
+                      
+                        
+                          <li><a 
href="/releases/0.10.2/index.html">0.10.2</a></li>
+                        
+                      
+                        
+                          <li><a 
href="/releases/0.10.1/index.html">0.10.1</a></li>
+                        
+                      
+                        
+                      
+                        
+                          <li><a 
href="/releases/0.10.0/index.html">0.10.0</a></li>
+                        
+                      
+                        
+                          <li><a 
href="/releases/0.9.7/index.html">0.9.7</a></li>
+                        
+                      
+                        
+                          <li><a 
href="/releases/0.9.6/index.html">0.9.6</a></li>
+                        
+                      
+                        
+                      
+                        
+                      
+                        
+                      
+                        
+                      
+                        
+                      
+                    </ul>
+                </li>
+                <li><a href="/talksAndVideos.html">Talks and 
Slideshows</a></li>
+                <li class="dropdown">
+                    <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
id="contribute">Community <b class="caret"></b></a>
+                    <ul class="dropdown-menu">
+                        <li><a 
href="/contribute/Contributing-to-Storm.html">Contributing</a></li>
+                        <li><a href="/contribute/People.html">People</a></li>
+                        <li><a href="/contribute/BYLAWS.html">ByLaws</a></li>
+                    </ul>
+                </li>
+                <li><a href="/2017/07/28/storm104-released.html" 
id="news">News</a></li>
+            </ul>
+        </nav>
+    </div>
+</div>
+
+
+
+    <div class="container-fluid">
+    <h1 class="page-title">Concepts</h1>
+          <div class="row">
+               <div class="col-md-12">
+                    <!-- Documentation -->
+
+<p class="post-meta"></p>
+
+<p>This page lists the main concepts of Storm and links to resources where you 
can find more information. The concepts discussed are:</p>
+
+<ol>
+<li>Topologies</li>
+<li>Streams</li>
+<li>Spouts</li>
+<li>Bolts</li>
+<li>Stream groupings</li>
+<li>Reliability</li>
+<li>Tasks</li>
+<li>Workers</li>
+</ol>
+
+<h3 id="topologies">Topologies</h3>
+
+<p>The logic for a realtime application is packaged into a Storm topology. A 
Storm topology is analogous to a MapReduce job. One key difference is that a 
MapReduce job eventually finishes, whereas a topology runs forever (or until 
you kill it, of course). A topology is a graph of spouts and bolts that are 
connected with stream groupings. These concepts are described below.</p>
+
+<p><strong>Resources:</strong></p>
+
+<ul>
+<li><a 
href="javadocs/backtype/storm/topology/TopologyBuilder.html">TopologyBuilder</a>:
 use this class to construct topologies in Java</li>
+<li><a href="Running-topologies-on-a-production-cluster.html">Running 
topologies on a production cluster</a></li>
+<li><a href="Local-mode.html">Local mode</a>: Read this to learn how to 
develop and test topologies in local mode.</li>
+</ul>
+
+<h3 id="streams">Streams</h3>
+
+<p>The stream is the core abstraction in Storm. A stream is an unbounded 
sequence of tuples that is processed and created in parallel in a distributed 
fashion. Streams are defined with a schema that names the fields in the 
stream&#39;s tuples. By default, tuples can contain integers, longs, shorts, 
bytes, strings, doubles, floats, booleans, and byte arrays. You can also define 
your own serializers so that custom types can be used natively within 
tuples.</p>
+
+<p>Every stream is given an id when declared. Since single-stream spouts and 
bolts are so common, <a 
href="javadocs/backtype/storm/topology/OutputFieldsDeclarer.html">OutputFieldsDeclarer</a>
 has convenience methods for declaring a single stream without specifying an 
id. In this case, the stream is given the default id of &quot;default&quot;.</p>
+
+<p><strong>Resources:</strong></p>
+
+<ul>
+<li><a href="javadocs/backtype/storm/tuple/Tuple.html">Tuple</a>: streams are 
composed of tuples</li>
+<li><a 
href="javadocs/backtype/storm/topology/OutputFieldsDeclarer.html">OutputFieldsDeclarer</a>:
 used to declare streams and their schemas</li>
+<li><a href="Serialization.html">Serialization</a>: Information about 
Storm&#39;s dynamic typing of tuples and declaring custom serializations</li>
+</ul>
+
+<h3 id="spouts">Spouts</h3>
+
+<p>A spout is a source of streams in a topology. Generally spouts will read 
tuples from an external source and emit them into the topology (e.g. a Kestrel 
queue or the Twitter API). Spouts can either be <strong>reliable</strong> or 
<strong>unreliable</strong>. A reliable spout is capable of replaying a tuple 
if it failed to be processed by Storm, whereas an unreliable spout forgets 
about the tuple as soon as it is emitted.</p>
+
+<p>Spouts can emit more than one stream. To do so, declare multiple streams 
using the <code>declareStream</code> method of <a 
href="javadocs/backtype/storm/topology/OutputFieldsDeclarer.html">OutputFieldsDeclarer</a>
 and specify the stream to emit to when using the <code>emit</code> method on 
<a 
href="javadocs/backtype/storm/spout/SpoutOutputCollector.html">SpoutOutputCollector</a>.</p>
+
+<p>The main method on spouts is <code>nextTuple</code>. <code>nextTuple</code> 
either emits a new tuple into the topology or simply returns if there are no 
new tuples to emit. It is imperative that <code>nextTuple</code> does not block 
for any spout implementation, because Storm calls all the spout methods on the 
same thread.</p>
+
+<p>The other main methods on spouts are <code>ack</code> and 
<code>fail</code>. These are called when Storm detects that a tuple emitted 
from the spout either successfully completed through the topology or failed to 
be completed. <code>ack</code> and <code>fail</code> are only called for 
reliable spouts. See <a href="javadocs/backtype/storm/spout/ISpout.html">the 
Javadoc</a> for more information.</p>
+
+<p><strong>Resources:</strong></p>
+
+<ul>
+<li><a href="javadocs/backtype/storm/topology/IRichSpout.html">IRichSpout</a>: 
this is the interface that spouts must implement.</li>
+<li><a href="Guaranteeing-message-processing.html">Guaranteeing message 
processing</a></li>
+</ul>
+
+<h3 id="bolts">Bolts</h3>
+
+<p>All processing in topologies is done in bolts. Bolts can do anything from 
filtering, functions, aggregations, joins, talking to databases, and more. </p>
+
+<p>Bolts can do simple stream transformations. Doing complex stream 
transformations often requires multiple steps and thus multiple bolts. For 
example, transforming a stream of tweets into a stream of trending images 
requires at least two steps: a bolt to do a rolling count of retweets for each 
image, and one or more bolts to stream out the top X images (you can do this 
particular stream transformation in a more scalable way with three bolts than 
with two). </p>
+
+<p>Bolts can emit more than one stream. To do so, declare multiple streams 
using the <code>declareStream</code> method of <a 
href="javadocs/backtype/storm/topology/OutputFieldsDeclarer.html">OutputFieldsDeclarer</a>
 and specify the stream to emit to when using the <code>emit</code> method on 
<a 
href="javadocs/backtype/storm/task/OutputCollector.html">OutputCollector</a>.</p>
+
+<p>When you declare a bolt&#39;s input streams, you always subscribe to 
specific streams of another component. If you want to subscribe to all the 
streams of another component, you have to subscribe to each one individually. 
<a href="javadocs/backtype/storm/topology/InputDeclarer.html">InputDeclarer</a> 
has syntactic sugar for subscribing to streams declared on the default stream 
id. Saying <code>declarer.shuffleGrouping(&quot;1&quot;)</code> subscribes to 
the default stream on component &quot;1&quot; and is equivalent to 
<code>declarer.shuffleGrouping(&quot;1&quot;, DEFAULT_STREAM_ID)</code>.</p>
+
+<p>The main method in bolts is the <code>execute</code> method which takes in 
as input a new tuple. Bolts emit new tuples using the <a 
href="javadocs/backtype/storm/task/OutputCollector.html">OutputCollector</a> 
object. Bolts must call the <code>ack</code> method on the 
<code>OutputCollector</code> for every tuple they process so that Storm knows 
when tuples are completed (and can eventually determine that its safe to ack 
the original spout tuples). For the common case of processing an input tuple, 
emitting 0 or more tuples based on that tuple, and then acking the input tuple, 
Storm provides an <a 
href="javadocs/backtype/storm/topology/IBasicBolt.html">IBasicBolt</a> 
interface which does the acking automatically.</p>
+
+<p>Please note that <a 
href="javadocs/backtype/storm/task/OutputCollector.html">OutputCollector</a> is 
not thread-safe, and all emits, acks, and fails must happen on the same thread. 
Please refer <a href="troubleshooting.html">Troubleshooting</a> for more 
details.</p>
+
+<p><strong>Resources:</strong></p>
+
+<ul>
+<li><a href="javadocs/backtype/storm/topology/IRichBolt.html">IRichBolt</a>: 
this is general interface for bolts.</li>
+<li><a href="javadocs/backtype/storm/topology/IBasicBolt.html">IBasicBolt</a>: 
this is a convenience interface for defining bolts that do filtering or simple 
functions.</li>
+<li><a 
href="javadocs/backtype/storm/task/OutputCollector.html">OutputCollector</a>: 
bolts emit tuples to their output streams using an instance of this class</li>
+<li><a href="Guaranteeing-message-processing.html">Guaranteeing message 
processing</a></li>
+</ul>
+
+<h3 id="stream-groupings">Stream groupings</h3>
+
+<p>Part of defining a topology is specifying for each bolt which streams it 
should receive as input. A stream grouping defines how that stream should be 
partitioned among the bolt&#39;s tasks.</p>
+
+<p>There are eight built-in stream groupings in Storm, and you can implement a 
custom stream grouping by implementing the <a 
href="javadocs/backtype/storm/grouping/CustomStreamGrouping.html">CustomStreamGrouping</a>
 interface:</p>
+
+<ol>
+<li><strong>Shuffle grouping</strong>: Tuples are randomly distributed across 
the bolt&#39;s tasks in a way such that each bolt is guaranteed to get an equal 
number of tuples.</li>
+<li><strong>Fields grouping</strong>: The stream is partitioned by the fields 
specified in the grouping. For example, if the stream is grouped by the 
&quot;user-id&quot; field, tuples with the same &quot;user-id&quot; will always 
go to the same task, but tuples with different &quot;user-id&quot;&#39;s may go 
to different tasks.</li>
+<li><strong>Partial Key grouping</strong>: The stream is partitioned by the 
fields specified in the grouping, like the Fields grouping, but are load 
balanced between two downstream bolts, which provides better utilization of 
resources when the incoming data is skewed. <a 
href="https://melmeric.files.wordpress.com/2014/11/the-power-of-both-choices-practical-load-balancing-for-distributed-stream-processing-engines.pdf";>This
 paper</a> provides a good explanation of how it works and the advantages it 
provides.</li>
+<li><strong>All grouping</strong>: The stream is replicated across all the 
bolt&#39;s tasks. Use this grouping with care.</li>
+<li><strong>Global grouping</strong>: The entire stream goes to a single one 
of the bolt&#39;s tasks. Specifically, it goes to the task with the lowest 
id.</li>
+<li><strong>None grouping</strong>: This grouping specifies that you don&#39;t 
care how the stream is grouped. Currently, none groupings are equivalent to 
shuffle groupings. Eventually though, Storm will push down bolts with none 
groupings to execute in the same thread as the bolt or spout they subscribe 
from (when possible).</li>
+<li><strong>Direct grouping</strong>: This is a special kind of grouping. A 
stream grouped this way means that the <strong>producer</strong> of the tuple 
decides which task of the consumer will receive this tuple. Direct groupings 
can only be declared on streams that have been declared as direct streams. 
Tuples emitted to a direct stream must be emitted using one of the 
[emitDirect](javadocs/backtype/storm/task/OutputCollector.html#emitDirect(int, 
int, java.util.List) methods. A bolt can get the task ids of its consumers by 
either using the provided <a 
href="javadocs/backtype/storm/task/TopologyContext.html">TopologyContext</a> or 
by keeping track of the output of the <code>emit</code> method in <a 
href="javadocs/backtype/storm/task/OutputCollector.html">OutputCollector</a> 
(which returns the task ids that the tuple was sent to).</li>
+<li><strong>Local or shuffle grouping</strong>: If the target bolt has one or 
more tasks in the same worker process, tuples will be shuffled to just those 
in-process tasks. Otherwise, this acts like a normal shuffle grouping.</li>
+</ol>
+
+<p><strong>Resources:</strong></p>
+
+<ul>
+<li><a 
href="javadocs/backtype/storm/topology/TopologyBuilder.html">TopologyBuilder</a>:
 use this class to define topologies</li>
+<li><a 
href="javadocs/backtype/storm/topology/InputDeclarer.html">InputDeclarer</a>: 
this object is returned whenever <code>setBolt</code> is called on 
<code>TopologyBuilder</code> and is used for declaring a bolt&#39;s input 
streams and how those streams should be grouped</li>
+</ul>
+
+<h3 id="reliability">Reliability</h3>
+
+<p>Storm guarantees that every spout tuple will be fully processed by the 
topology. It does this by tracking the tree of tuples triggered by every spout 
tuple and determining when that tree of tuples has been successfully completed. 
Every topology has a &quot;message timeout&quot; associated with it. If Storm 
fails to detect that a spout tuple has been completed within that timeout, then 
it fails the tuple and replays it later. </p>
+
+<p>To take advantage of Storm&#39;s reliability capabilities, you must tell 
Storm when new edges in a tuple tree are being created and tell Storm whenever 
you&#39;ve finished processing an individual tuple. These are done using the <a 
href="javadocs/backtype/storm/task/OutputCollector.html">OutputCollector</a> 
object that bolts use to emit tuples. Anchoring is done in the 
<code>emit</code> method, and you declare that you&#39;re finished with a tuple 
using the <code>ack</code> method.</p>
+
+<p>This is all explained in much more detail in <a 
href="Guaranteeing-message-processing.html">Guaranteeing message 
processing</a>. </p>
+
+<h3 id="tasks">Tasks</h3>
+
+<p>Each spout or bolt executes as many tasks across the cluster. Each task 
corresponds to one thread of execution, and stream groupings define how to send 
tuples from one set of tasks to another set of tasks. You set the parallelism 
for each spout or bolt in the <code>setSpout</code> and <code>setBolt</code> 
methods of <a 
href="javadocs/backtype/storm/topology/TopologyBuilder.html">TopologyBuilder</a>.</p>
+
+<h3 id="workers">Workers</h3>
+
+<p>Topologies execute across one or more worker processes. Each worker process 
is a physical JVM and executes a subset of all the tasks for the topology. For 
example, if the combined parallelism of the topology is 300 and 50 workers are 
allocated, then each worker will execute 6 tasks (as threads within the 
worker). Storm tries to spread the tasks evenly across all the workers.</p>
+
+<p><strong>Resources:</strong></p>
+
+<ul>
+<li><a 
href="javadocs/backtype/storm/Config.html#TOPOLOGY_WORKERS">Config.TOPOLOGY_WORKERS</a>:
 this config sets the number of workers to allocate for executing the 
topology</li>
+</ul>
+
+
+
+                 </div>
+              </div>
+         </div>
+<footer>
+    <div class="container-fluid">
+        <div class="row">
+            <div class="col-md-3">
+                <div class="footer-widget">
+                    <h5>Meetups</h5>
+                    <ul class="latest-news">
+                        
+                        <li><a 
href="http://www.meetup.com/Apache-Storm-Apache-Kafka/";>Apache Storm & Apache 
Kafka</a> <span class="small">(Sunnyvale, CA)</span></li>
+                        
+                        <li><a 
href="http://www.meetup.com/Apache-Storm-Kafka-Users/";>Apache Storm & Kafka 
Users</a> <span class="small">(Seattle, WA)</span></li>
+                        
+                        <li><a 
href="http://www.meetup.com/New-York-City-Storm-User-Group/";>NYC Storm User 
Group</a> <span class="small">(New York, NY)</span></li>
+                        
+                        <li><a 
href="http://www.meetup.com/Bay-Area-Stream-Processing";>Bay Area Stream 
Processing</a> <span class="small">(Emeryville, CA)</span></li>
+                        
+                        <li><a 
href="http://www.meetup.com/Boston-Storm-Users/";>Boston Realtime Data</a> <span 
class="small">(Boston, MA)</span></li>
+                        
+                        <li><a 
href="http://www.meetup.com/storm-london";>London Storm User Group</a> <span 
class="small">(London, UK)</span></li>
+                        
+                        <!-- <li><a 
href="http://www.meetup.com/Apache-Storm-Kafka-Users/";>Seatle, WA</a> <span 
class="small">(27 Jun 2015)</span></li> -->
+                    </ul>
+                </div>
+            </div>
+            <div class="col-md-3">
+                <div class="footer-widget">
+                    <h5>About Storm</h5>
+                    <p>Storm integrates with any queueing system and any 
database system. Storm's spout abstraction makes it easy to integrate a new 
queuing system. Likewise, integrating Storm with database systems is easy.</p>
+               </div>
+            </div>
+            <div class="col-md-3">
+                <div class="footer-widget">
+                    <h5>First Look</h5>
+                    <ul class="footer-list">
+                        <li><a 
href="/releases/current/Rationale.html">Rationale</a></li>
+                        <li><a 
href="/releases/current/Tutorial.html">Tutorial</a></li>
+                        <li><a 
href="/releases/current/Setting-up-development-environment.html">Setting up 
development environment</a></li>
+                        <li><a 
href="/releases/current/Creating-a-new-Storm-project.html">Creating a new Storm 
project</a></li>
+                    </ul>
+                </div>
+            </div>
+            <div class="col-md-3">
+                <div class="footer-widget">
+                    <h5>Documentation</h5>
+                    <ul class="footer-list">
+                        <li><a 
href="/releases/current/index.html">Index</a></li>
+                        <li><a 
href="/releases/current/javadocs/index.html">Javadoc</a></li>
+                        <li><a href="/releases/current/FAQ.html">FAQ</a></li>
+                    </ul>
+                </div>
+            </div>
+        </div>
+        <hr/>
+        <div class="row">   
+            <div class="col-md-12">
+                <p align="center">Copyright © 2015 <a 
href="http://www.apache.org";>Apache Software Foundation</a>. All Rights 
Reserved. 
+                    <br>Apache Storm, Apache, the Apache feather logo, and the 
Apache Storm project logos are trademarks of The Apache Software Foundation. 
+                    <br>All other marks mentioned may be trademarks or 
registered trademarks of their respective owners.</p>
+            </div>
+        </div>
+    </div>
+</footer>
+<!--Footer End-->
+<!-- Scroll to top -->
+<span class="totop"><a href="#"><i class="fa fa-angle-up"></i></a></span> 
+
+</body>
+
+</html>
+

Reply via email to