[33/51] [abbrv] [partial] storm-site git commit: publish release 1.2.1

ptgoetz Mon, 19 Feb 2018 15:48:49 -0800

http://git-wip-us.apache.org/repos/asf/storm-site/blob/ff14ea94/content/releases/0.10.1/Trident-tutorial.html
----------------------------------------------------------------------
diff --git a/content/releases/0.10.1/Trident-tutorial.html 
b/content/releases/0.10.1/Trident-tutorial.html
deleted file mode 100644
index b12b109..0000000
--- a/content/releases/0.10.1/Trident-tutorial.html
+++ /dev/null
@@ -1,462 +0,0 @@
-<!DOCTYPE html>
-<html>
-    <head>
-    <meta charset="utf-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-
-    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
-    <link rel="icon" href="/favicon.ico" type="image/x-icon">
-
-    <title>Trident Tutorial</title>
-
-    <!-- Bootstrap core CSS -->
-    <link href="/assets/css/bootstrap.min.css" rel="stylesheet">
-    <!-- Bootstrap theme -->
-    <link href="/assets/css/bootstrap-theme.min.css" rel="stylesheet">
-
-    <!-- Custom styles for this template -->
-    <link rel="stylesheet" 
href="http://fortawesome.github.io/Font-Awesome/assets/font-awesome/css/font-awesome.css";>
-    <link href="/css/style.css" rel="stylesheet">
-    <link href="/assets/css/owl.theme.css" rel="stylesheet">
-    <link href="/assets/css/owl.carousel.css" rel="stylesheet">
-    <script type="text/javascript" src="/assets/js/jquery.min.js"></script>
-    <script type="text/javascript" src="/assets/js/bootstrap.min.js"></script>
-    <script type="text/javascript" 
src="/assets/js/owl.carousel.min.js"></script>
-    <script type="text/javascript" src="/assets/js/storm.js"></script>
-    <!-- Just for debugging purposes. Don't actually copy these 2 lines! -->
-    <!--[if lt IE 9]><script 
src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
-    
-    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media 
queries -->
-    <!--[if lt IE 9]>
-      <script 
src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js";></script>
-      <script 
src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js";></script>
-    <![endif]-->
-  </head>
-
-
-  <body>
-    <header>
-  <div class="container-fluid">
-     <div class="row">
-          <div class="col-md-5">
-            <a href="/index.html"><img src="/images/logo.png" class="logo" 
/></a>
-          </div>
-          <div class="col-md-5">
-            
-              <h1>Version: 0.10.1</h1>
-            
-          </div>
-          <div class="col-md-2">
-            <a href="/downloads.html" class="btn-std btn-block 
btn-download">Download</a>
-          </div>
-        </div>
-    </div>
-</header>
-<!--Header End-->
-<!--Navigation Begin-->
-<div class="navbar" role="banner">
-  <div class="container-fluid">
-      <div class="navbar-header">
-          <button class="navbar-toggle" type="button" data-toggle="collapse" 
data-target=".bs-navbar-collapse">
-                <span class="icon-bar"></span>
-                <span class="icon-bar"></span>
-                <span class="icon-bar"></span>
-            </button>
-        </div>
-        <nav class="collapse navbar-collapse bs-navbar-collapse" 
role="navigation">
-          <ul class="nav navbar-nav">
-              <li><a href="/index.html" id="home">Home</a></li>
-                <li><a href="/getting-help.html" id="getting-help">Getting 
Help</a></li>
-                <li><a href="/about/integrates.html" id="project-info">Project 
Information</a></li>
-                <li class="dropdown">
-                    <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
id="documentation">Documentation <b class="caret"></b></a>
-                    <ul class="dropdown-menu">
-                      
-                        
-                          <li><a 
href="/releases/2.0.0-SNAPSHOT/index.html">2.0.0-SNAPSHOT</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.2.0/index.html">1.2.0</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.1.2/index.html">1.1.2</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.1.1/index.html">1.1.1</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.1.0/index.html">1.1.0</a></li>
-                        
-                      
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.0.6/index.html">1.0.6</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.0.4/index.html">1.0.4</a></li>
-                        
-                      
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.0.3/index.html">1.0.3</a></li>
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                          <li><a 
href="/releases/0.10.2/index.html">0.10.2</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/0.10.1/index.html">0.10.1</a></li>
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                          <li><a 
href="/releases/0.9.7/index.html">0.9.7</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/0.9.6/index.html">0.9.6</a></li>
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                    </ul>
-                </li>
-                <li><a href="/talksAndVideos.html">Talks and 
Slideshows</a></li>
-                <li class="dropdown">
-                    <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
id="contribute">Community <b class="caret"></b></a>
-                    <ul class="dropdown-menu">
-                        <li><a 
href="/contribute/Contributing-to-Storm.html">Contributing</a></li>
-                        <li><a href="/contribute/People.html">People</a></li>
-                        <li><a href="/contribute/BYLAWS.html">ByLaws</a></li>
-                    </ul>
-                </li>
-                <li><a href="/2018/02/15/storm120-released.html" 
id="news">News</a></li>
-            </ul>
-        </nav>
-    </div>
-</div>
-
-
-
-    <div class="container-fluid">
-    <h1 class="page-title">Trident Tutorial</h1>
-          <div class="row">
-               <div class="col-md-12">
-                    <!-- Documentation -->
-
-<p class="post-meta"></p>
-
-<p>Trident is a high-level abstraction for doing realtime computing on top of 
Storm. It allows you to seamlessly intermix high throughput (millions of 
messages per second), stateful stream processing with low latency distributed 
querying. If you&#39;re familiar with high level batch processing tools like 
Pig or Cascading, the concepts of Trident will be very familiar â Trident has 
joins, aggregations, grouping, functions, and filters. In addition to these, 
Trident adds primitives for doing stateful, incremental processing on top of 
any database or persistence store. Trident has consistent, exactly-once 
semantics, so it is easy to reason about Trident topologies.</p>
-
-<h2 id="illustrative-example">Illustrative example</h2>
-
-<p>Let&#39;s look at an illustrative example of Trident. This example will do 
two things:</p>
-
-<ol>
-<li>Compute streaming word count from an input stream of sentences</li>
-<li>Implement queries to get the sum of the counts for a list of words</li>
-</ol>
-
-<p>For the purposes of illustration, this example will read an infinite stream 
of sentences from the following source:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">FixedBatchSpout</span> <span class="n">spout</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">FixedBatchSpout</span><span class="o">(</span><span 
class="k">new</span> <span class="n">Fields</span><span class="o">(</span><span 
class="s">"sentence"</span><span class="o">),</span> <span 
class="mi">3</span><span class="o">,</span>
-               <span class="k">new</span> <span class="nf">Values</span><span 
class="o">(</span><span class="s">"the cow jumped over the moon"</span><span 
class="o">),</span>
-               <span class="k">new</span> <span class="nf">Values</span><span 
class="o">(</span><span class="s">"the man went to the store and bought some 
candy"</span><span class="o">),</span>
-               <span class="k">new</span> <span class="nf">Values</span><span 
class="o">(</span><span class="s">"four score and seven years ago"</span><span 
class="o">),</span>
-               <span class="k">new</span> <span class="nf">Values</span><span 
class="o">(</span><span class="s">"how many apples can you eat"</span><span 
class="o">));</span>
-<span class="n">spout</span><span class="o">.</span><span 
class="na">setCycle</span><span class="o">(</span><span 
class="kc">true</span><span class="o">);</span>
-</code></pre></div>
-<p>This spout cycles through that set of sentences over and over to produce 
the sentence stream. Here&#39;s the code to do the streaming word count part of 
the computation:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">TridentTopology</span> <span class="n">topology</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">TridentTopology</span><span class="o">();</span>        
-<span class="n">TridentState</span> <span class="n">wordCounts</span> <span 
class="o">=</span>
-     <span class="n">topology</span><span class="o">.</span><span 
class="na">newStream</span><span class="o">(</span><span 
class="s">"spout1"</span><span class="o">,</span> <span 
class="n">spout</span><span class="o">)</span>
-       <span class="o">.</span><span class="na">each</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"sentence"</span><span class="o">),</span> 
<span class="k">new</span> <span class="n">Split</span><span 
class="o">(),</span> <span class="k">new</span> <span 
class="n">Fields</span><span class="o">(</span><span 
class="s">"word"</span><span class="o">))</span>
-       <span class="o">.</span><span class="na">groupBy</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"word"</span><span class="o">))</span>
-       <span class="o">.</span><span 
class="na">persistentAggregate</span><span class="o">(</span><span 
class="k">new</span> <span class="n">MemoryMapState</span><span 
class="o">.</span><span class="na">Factory</span><span class="o">(),</span> 
<span class="k">new</span> <span class="n">Count</span><span 
class="o">(),</span> <span class="k">new</span> <span 
class="n">Fields</span><span class="o">(</span><span 
class="s">"count"</span><span class="o">))</span>                
-       <span class="o">.</span><span class="na">parallelismHint</span><span 
class="o">(</span><span class="mi">6</span><span class="o">);</span>
-</code></pre></div>
-<p>Let&#39;s go through the code line by line. First a TridentTopology object 
is created, which exposes the interface for constructing Trident computations. 
TridentTopology has a method called newStream that creates a new stream of data 
in the topology reading from an input source. In this case, the input source is 
just the FixedBatchSpout defined from before. Input sources can also be queue 
brokers like Kestrel or Kafka. Trident keeps track of a small amount of state 
for each input source (metadata about what it has consumed) in Zookeeper, and 
the &quot;spout1&quot; string here specifies the node in Zookeeper where 
Trident should keep that metadata.</p>
-
-<p>Trident processes the stream as small batches of tuples. For example, the 
incoming stream of sentences might be divided into batches like so:</p>
-
-<p><img src="images/batched-stream.png" alt="Batched stream"></p>
-
-<p>Generally the size of those small batches will be on the order of thousands 
or millions of tuples, depending on your incoming throughput.</p>
-
-<p>Trident provides a fully fledged batch processing API to process those 
small batches. The API is very similar to what you see in high level 
abstractions for Hadoop like Pig or Cascading: you can do group by&#39;s, 
joins, aggregations, run functions, run filters, and so on. Of course, 
processing each small batch in isolation isn&#39;t that interesting, so Trident 
provides functions for doing aggregations across batches and persistently 
storing those aggregations â whether in memory, in Memcached, in Cassandra, 
or some other store. Finally, Trident has first-class functions for querying 
sources of realtime state. That state could be updated by Trident (like in this 
example), or it could be an independent source of state.</p>
-
-<p>Back to the example, the spout emits a stream containing one field called 
&quot;sentence&quot;. The next line of the topology definition applies the 
Split function to each tuple in the stream, taking the &quot;sentence&quot; 
field and splitting it into words. Each sentence tuple creates potentially many 
word tuples â for instance, the sentence &quot;the cow jumped over the 
moon&quot; creates six &quot;word&quot; tuples. Here&#39;s the definition of 
Split:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="kd">public</span> <span class="kd">class</span> <span 
class="nc">Split</span> <span class="kd">extends</span> <span 
class="n">BaseFunction</span> <span class="o">{</span>
-   <span class="kd">public</span> <span class="kt">void</span> <span 
class="nf">execute</span><span class="o">(</span><span 
class="n">TridentTuple</span> <span class="n">tuple</span><span 
class="o">,</span> <span class="n">TridentCollector</span> <span 
class="n">collector</span><span class="o">)</span> <span class="o">{</span>
-       <span class="n">String</span> <span class="n">sentence</span> <span 
class="o">=</span> <span class="n">tuple</span><span class="o">.</span><span 
class="na">getString</span><span class="o">(</span><span 
class="mi">0</span><span class="o">);</span>
-       <span class="k">for</span><span class="o">(</span><span 
class="n">String</span> <span class="nl">word:</span> <span 
class="n">sentence</span><span class="o">.</span><span 
class="na">split</span><span class="o">(</span><span class="s">" "</span><span 
class="o">))</span> <span class="o">{</span>
-           <span class="n">collector</span><span class="o">.</span><span 
class="na">emit</span><span class="o">(</span><span class="k">new</span> <span 
class="n">Values</span><span class="o">(</span><span class="n">word</span><span 
class="o">));</span>                
-       <span class="o">}</span>
-   <span class="o">}</span>
-<span class="o">}</span>
-</code></pre></div>
-<p>As you can see, it&#39;s really simple. It simply grabs the sentence, 
splits it on whitespace, and emits a tuple for each word.</p>
-
-<p>The rest of the topology computes word count and keeps the results 
persistently stored. First the stream is grouped by the &quot;word&quot; field. 
Then, each group is persistently aggregated using the Count aggregator. The 
persistentAggregate function knows how to store and update the results of the 
aggregation in a source of state. In this example, the word counts are kept in 
memory, but this can be trivially swapped to use Memcached, Cassandra, or any 
other persistent store. Swapping this topology to store counts in Memcached is 
as simple as replacing the persistentAggregate line with this (using <a 
href="https://github.com/nathanmarz/trident-memcached";>trident-memcached</a>), 
where the &quot;serverLocations&quot; is a list of host/ports for the Memcached 
cluster:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="o">.</span><span class="na">persistentAggregate</span><span 
class="o">(</span><span class="n">MemcachedState</span><span 
class="o">.</span><span class="na">transactional</span><span 
class="o">(</span><span class="n">serverLocations</span><span 
class="o">),</span> <span class="k">new</span> <span 
class="n">Count</span><span class="o">(),</span> <span class="k">new</span> 
<span class="n">Fields</span><span class="o">(</span><span 
class="s">"count"</span><span class="o">))</span>        
-<span class="n">MemcachedState</span><span class="o">.</span><span 
class="na">transactional</span><span class="o">()</span>
-</code></pre></div>
-<p>The values stored by persistentAggregate represents the aggregation of all 
batches ever emitted by the stream.</p>
-
-<p>One of the cool things about Trident is that it has fully fault-tolerant, 
exactly-once processing semantics. This makes it easy to reason about your 
realtime processing. Trident persists state in a way so that if failures occur 
and retries are necessary, it won&#39;t perform multiple updates to the 
database for the same source data.</p>
-
-<p>The persistentAggregate method transforms a Stream into a TridentState 
object. In this case the TridentState object represents all the word counts. We 
will use this TridentState object to implement the distributed query portion of 
the computation.</p>
-
-<p>The next part of the topology implements a low latency distributed query on 
the word counts. The query takes as input a whitespace separated list of words 
and return the sum of the counts for those words. These queries are executed 
just like normal RPC calls, except they are parallelized in the background. 
Here&#39;s an example of how you might invoke one of these queries:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">DRPCClient</span> <span class="n">client</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">DRPCClient</span><span class="o">(</span><span 
class="s">"drpc.server.location"</span><span class="o">,</span> <span 
class="mi">3772</span><span class="o">);</span>
-<span class="n">System</span><span class="o">.</span><span 
class="na">out</span><span class="o">.</span><span 
class="na">println</span><span class="o">(</span><span 
class="n">client</span><span class="o">.</span><span 
class="na">execute</span><span class="o">(</span><span 
class="s">"words"</span><span class="o">,</span> <span class="s">"cat dog the 
man"</span><span class="o">);</span>
-<span class="c1">// prints the JSON-encoded result, e.g.: "[[5078]]"</span>
-</code></pre></div>
-<p>As you can see, it looks just like a regular remote procedure call (RPC), 
except it&#39;s executing in parallel across a Storm cluster. The latency for 
small queries like this are typically around 10ms. More intense DRPC queries 
can take longer of course, although the latency largely depends on how many 
resources you have allocated for the computation.</p>
-
-<p>The implementation of the distributed query portion of the topology looks 
like this:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">topology</span><span class="o">.</span><span 
class="na">newDRPCStream</span><span class="o">(</span><span 
class="s">"words"</span><span class="o">)</span>
-       <span class="o">.</span><span class="na">each</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"args"</span><span class="o">),</span> <span 
class="k">new</span> <span class="n">Split</span><span class="o">(),</span> 
<span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"word"</span><span class="o">))</span>
-       <span class="o">.</span><span class="na">groupBy</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"word"</span><span class="o">))</span>
-       <span class="o">.</span><span class="na">stateQuery</span><span 
class="o">(</span><span class="n">wordCounts</span><span class="o">,</span> 
<span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"word"</span><span class="o">),</span> <span 
class="k">new</span> <span class="n">MapGet</span><span class="o">(),</span> 
<span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"count"</span><span class="o">))</span>
-       <span class="o">.</span><span class="na">each</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"count"</span><span class="o">),</span> <span 
class="k">new</span> <span class="n">FilterNull</span><span class="o">())</span>
-       <span class="o">.</span><span class="na">aggregate</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"count"</span><span class="o">),</span> <span 
class="k">new</span> <span class="n">Sum</span><span class="o">(),</span> <span 
class="k">new</span> <span class="n">Fields</span><span class="o">(</span><span 
class="s">"sum"</span><span class="o">));</span>
-</code></pre></div>
-<p>The same TridentTopology object is used to create the DRPC stream, and the 
function is named &quot;words&quot;. The function name corresponds to the 
function name given in the first argument of execute when using a 
DRPCClient.</p>
-
-<p>Each DRPC request is treated as its own little batch processing job that 
takes as input a single tuple representing the request. The tuple contains one 
field called &quot;args&quot; that contains the argument provided by the 
client. In this case, the argument is a whitespace separated list of words.</p>
-
-<p>First, the Split function is used to split the arguments for the request 
into its constituent words. The stream is grouped by &quot;word&quot;, and the 
stateQuery operator is used to query the TridentState object that the first 
part of the topology generated. stateQuery takes in a source of state â in 
this case, the word counts computed by the other portion of the topology â 
and a function for querying that state. In this case, the MapGet function is 
invoked, which gets the count for each word. Since the DRPC stream is grouped 
the exact same way as the TridentState was (by the &quot;word&quot; field), 
each word query is routed to the exact partition of the TridentState object 
that manages updates for that word.</p>
-
-<p>Next, words that didn&#39;t have a count are filtered out via the 
FilterNull filter and the counts are summed using the Sum aggregator to get the 
result. Then, Trident automatically sends the result back to the waiting 
client.</p>
-
-<p>Trident is intelligent about how it executes a topology to maximize 
performance. There&#39;s two interesting things happening automatically in this 
topology:</p>
-
-<ol>
-<li>Operations that read from or write to state (like persistentAggregate and 
stateQuery) automatically batch operations to that state. So if there&#39;s 20 
updates that need to be made to the database for the current batch of 
processing, rather than do 20 read requests and 20 writes requests to the 
database, Trident will automatically batch up the reads and writes, doing only 
1 read request and 1 write request (and in many cases, you can use caching in 
your State implementation to eliminate the read request). So you get the best 
of both words of convenience â being able to express your computation in 
terms of what should be done with each tuple â and performance.</li>
-<li>Trident aggregators are heavily optimized. Rather than transfer all tuples 
for a group to the same machine and then run the aggregator, Trident will do 
partial aggregations when possible before sending tuples over the network. For 
example, the Count aggregator computes the count on each partition, sends the 
partial count over the network, and then sums together all the partial counts 
to get the total count. This technique is similar to the use of combiners in 
MapReduce.</li>
-</ol>
-
-<p>Let&#39;s look at another example of Trident.</p>
-
-<h2 id="reach">Reach</h2>
-
-<p>The next example is a pure DRPC topology that computes the reach of a URL 
on demand. Reach is the number of unique people exposed to a URL on Twitter. To 
compute reach, you need to fetch all the people who ever tweeted a URL, fetch 
all the followers of all those people, unique that set of followers, and that 
count that uniqued set. Computing reach is too intense for a single machine â 
it can require thousands of database calls and tens of millions of tuples. With 
Storm and Trident, you can parallelize the computation of each step across a 
cluster.</p>
-
-<p>This topology will read from two sources of state. One database maps URLs 
to a list of people who tweeted that URL. The other database maps a person to a 
list of followers for that person. The topology definition looks like this:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">TridentState</span> <span class="n">urlToTweeters</span> <span 
class="o">=</span>
-       <span class="n">topology</span><span class="o">.</span><span 
class="na">newStaticState</span><span class="o">(</span><span 
class="n">getUrlToTweetersState</span><span class="o">());</span>
-<span class="n">TridentState</span> <span class="n">tweetersToFollowers</span> 
<span class="o">=</span>
-       <span class="n">topology</span><span class="o">.</span><span 
class="na">newStaticState</span><span class="o">(</span><span 
class="n">getTweeterToFollowersState</span><span class="o">());</span>
-
-<span class="n">topology</span><span class="o">.</span><span 
class="na">newDRPCStream</span><span class="o">(</span><span 
class="s">"reach"</span><span class="o">)</span>
-       <span class="o">.</span><span class="na">stateQuery</span><span 
class="o">(</span><span class="n">urlToTweeters</span><span class="o">,</span> 
<span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"args"</span><span class="o">),</span> <span 
class="k">new</span> <span class="n">MapGet</span><span class="o">(),</span> 
<span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"tweeters"</span><span class="o">))</span>
-       <span class="o">.</span><span class="na">each</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"tweeters"</span><span class="o">),</span> 
<span class="k">new</span> <span class="n">ExpandList</span><span 
class="o">(),</span> <span class="k">new</span> <span 
class="n">Fields</span><span class="o">(</span><span 
class="s">"tweeter"</span><span class="o">))</span>
-       <span class="o">.</span><span class="na">shuffle</span><span 
class="o">()</span>
-       <span class="o">.</span><span class="na">stateQuery</span><span 
class="o">(</span><span class="n">tweetersToFollowers</span><span 
class="o">,</span> <span class="k">new</span> <span 
class="n">Fields</span><span class="o">(</span><span 
class="s">"tweeter"</span><span class="o">),</span> <span class="k">new</span> 
<span class="n">MapGet</span><span class="o">(),</span> <span 
class="k">new</span> <span class="n">Fields</span><span class="o">(</span><span 
class="s">"followers"</span><span class="o">))</span>
-       <span class="o">.</span><span class="na">parallelismHint</span><span 
class="o">(</span><span class="mi">200</span><span class="o">)</span>
-       <span class="o">.</span><span class="na">each</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"followers"</span><span class="o">),</span> 
<span class="k">new</span> <span class="n">ExpandList</span><span 
class="o">(),</span> <span class="k">new</span> <span 
class="n">Fields</span><span class="o">(</span><span 
class="s">"follower"</span><span class="o">))</span>
-       <span class="o">.</span><span class="na">groupBy</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"follower"</span><span class="o">))</span>
-       <span class="o">.</span><span class="na">aggregate</span><span 
class="o">(</span><span class="k">new</span> <span class="n">One</span><span 
class="o">(),</span> <span class="k">new</span> <span 
class="n">Fields</span><span class="o">(</span><span 
class="s">"one"</span><span class="o">))</span>
-       <span class="o">.</span><span class="na">parallelismHint</span><span 
class="o">(</span><span class="mi">20</span><span class="o">)</span>
-       <span class="o">.</span><span class="na">aggregate</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Count</span><span 
class="o">(),</span> <span class="k">new</span> <span 
class="n">Fields</span><span class="o">(</span><span 
class="s">"reach"</span><span class="o">));</span>
-</code></pre></div>
-<p>The topology creates TridentState objects representing each external 
database using the newStaticState method. These can then be queried in the 
topology. Like all sources of state, queries to these databases will be 
automatically batched for maximum efficiency.</p>
-
-<p>The topology definition is straightforward â it&#39;s just a simple batch 
processing job. First, the urlToTweeters database is queried to get the list of 
people who tweeted the URL for this request. That returns a list, so the 
ExpandList function is invoked to create a tuple for each tweeter.</p>
-
-<p>Next, the followers for each tweeter must be fetched. It&#39;s important 
that this step be parallelized, so shuffle is invoked to evenly distribute the 
tweeters among all workers for the topology. Then, the followers database is 
queried to get the list of followers for each tweeter. You can see that this 
portion of the topology is given a large parallelism since this is the most 
intense portion of the computation.</p>
-
-<p>Next, the set of followers is uniqued and counted. This is done in two 
steps. First a &quot;group by&quot; is done on the batch by 
&quot;follower&quot;, running the &quot;One&quot; aggregator on each group. The 
&quot;One&quot; aggregator simply emits a single tuple containing the number 
one for each group. Then, the ones are summed together to get the unique count 
of the followers set. Here&#39;s the definition of the &quot;One&quot; 
aggregator:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="kd">public</span> <span class="kd">class</span> <span 
class="nc">One</span> <span class="kd">implements</span> <span 
class="n">CombinerAggregator</span><span class="o">&lt;</span><span 
class="n">Integer</span><span class="o">&gt;</span> <span class="o">{</span>
-   <span class="kd">public</span> <span class="n">Integer</span> <span 
class="nf">init</span><span class="o">(</span><span 
class="n">TridentTuple</span> <span class="n">tuple</span><span 
class="o">)</span> <span class="o">{</span>
-       <span class="k">return</span> <span class="mi">1</span><span 
class="o">;</span>
-   <span class="o">}</span>
-
-   <span class="kd">public</span> <span class="n">Integer</span> <span 
class="nf">combine</span><span class="o">(</span><span class="n">Integer</span> 
<span class="n">val1</span><span class="o">,</span> <span 
class="n">Integer</span> <span class="n">val2</span><span class="o">)</span> 
<span class="o">{</span>
-       <span class="k">return</span> <span class="mi">1</span><span 
class="o">;</span>
-   <span class="o">}</span>
-
-   <span class="kd">public</span> <span class="n">Integer</span> <span 
class="nf">zero</span><span class="o">()</span> <span class="o">{</span>
-       <span class="k">return</span> <span class="mi">1</span><span 
class="o">;</span>
-   <span class="o">}</span>        
-<span class="o">}</span>
-</code></pre></div>
-<p>This is a &quot;combiner aggregator&quot;, which knows how to do partial 
aggregations before transferring tuples over the network to maximize 
efficiency. Sum is also defined as a combiner aggregator, so the global sum 
done at the end of the topology will be very efficient.</p>
-
-<p>Let&#39;s now look at Trident in more detail.</p>
-
-<h2 id="fields-and-tuples">Fields and tuples</h2>
-
-<p>The Trident data model is the TridentTuple which is a named list of values. 
During a topology, tuples are incrementally built up through a sequence of 
operations. Operations generally take in a set of input fields and emit a set 
of &quot;function fields&quot;. The input fields are used to select a subset of 
the tuple as input to the operation, while the &quot;function fields&quot; name 
the fields the operation emits.</p>
-
-<p>Consider this example. Suppose you have a stream called &quot;stream&quot; 
that contains the fields &quot;x&quot;, &quot;y&quot;, and &quot;z&quot;. To 
run a filter MyFilter that takes in &quot;y&quot; as input, you would say:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">stream</span><span class="o">.</span><span 
class="na">each</span><span class="o">(</span><span class="k">new</span> <span 
class="n">Fields</span><span class="o">(</span><span class="s">"y"</span><span 
class="o">),</span> <span class="k">new</span> <span 
class="n">MyFilter</span><span class="o">())</span>
-</code></pre></div>
-<p>Suppose the implementation of MyFilter is this:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="kd">public</span> <span class="kd">class</span> <span 
class="nc">MyFilter</span> <span class="kd">extends</span> <span 
class="n">BaseFilter</span> <span class="o">{</span>
-   <span class="kd">public</span> <span class="kt">boolean</span> <span 
class="nf">isKeep</span><span class="o">(</span><span 
class="n">TridentTuple</span> <span class="n">tuple</span><span 
class="o">)</span> <span class="o">{</span>
-       <span class="k">return</span> <span class="n">tuple</span><span 
class="o">.</span><span class="na">getInteger</span><span 
class="o">(</span><span class="mi">0</span><span class="o">)</span> <span 
class="o">&lt;</span> <span class="mi">10</span><span class="o">;</span>
-   <span class="o">}</span>
-<span class="o">}</span>
-</code></pre></div>
-<p>This will keep all tuples whose &quot;y&quot; field is less than 10. The 
TridentTuple given as input to MyFilter will only contain the &quot;y&quot; 
field. Note that Trident is able to project a subset of a tuple extremely 
efficiently when selecting the input fields: the projection is essentially 
free.</p>
-
-<p>Let&#39;s now look at how &quot;function fields&quot; work. Suppose you had 
this function:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="kd">public</span> <span class="kd">class</span> <span 
class="nc">AddAndMultiply</span> <span class="kd">extends</span> <span 
class="n">BaseFunction</span> <span class="o">{</span>
-   <span class="kd">public</span> <span class="kt">void</span> <span 
class="nf">execute</span><span class="o">(</span><span 
class="n">TridentTuple</span> <span class="n">tuple</span><span 
class="o">,</span> <span class="n">TridentCollector</span> <span 
class="n">collector</span><span class="o">)</span> <span class="o">{</span>
-       <span class="kt">int</span> <span class="n">i1</span> <span 
class="o">=</span> <span class="n">tuple</span><span class="o">.</span><span 
class="na">getInteger</span><span class="o">(</span><span 
class="mi">0</span><span class="o">);</span>
-       <span class="kt">int</span> <span class="n">i2</span> <span 
class="o">=</span> <span class="n">tuple</span><span class="o">.</span><span 
class="na">getInteger</span><span class="o">(</span><span 
class="mi">1</span><span class="o">);</span>
-       <span class="n">collector</span><span class="o">.</span><span 
class="na">emit</span><span class="o">(</span><span class="k">new</span> <span 
class="n">Values</span><span class="o">(</span><span class="n">i1</span> <span 
class="o">+</span> <span class="n">i2</span><span class="o">,</span> <span 
class="n">i1</span> <span class="o">*</span> <span class="n">i2</span><span 
class="o">));</span>
-   <span class="o">}</span>
-<span class="o">}</span>
-</code></pre></div>
-<p>This function takes two numbers as input and emits two new values: the 
addition of the numbers and the multiplication of the numbers. Suppose you had 
a stream with the fields &quot;x&quot;, &quot;y&quot;, and &quot;z&quot;. You 
would use this function like this:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">stream</span><span class="o">.</span><span 
class="na">each</span><span class="o">(</span><span class="k">new</span> <span 
class="n">Fields</span><span class="o">(</span><span class="s">"x"</span><span 
class="o">,</span> <span class="s">"y"</span><span class="o">),</span> <span 
class="k">new</span> <span class="n">AddAndMultiply</span><span 
class="o">(),</span> <span class="k">new</span> <span 
class="n">Fields</span><span class="o">(</span><span 
class="s">"added"</span><span class="o">,</span> <span 
class="s">"multiplied"</span><span class="o">));</span>
-</code></pre></div>
-<p>The output of functions is additive: the fields are added to the input 
tuple. So the output of this each call would contain tuples with the five 
fields &quot;x&quot;, &quot;y&quot;, &quot;z&quot;, &quot;added&quot;, and 
&quot;multiplied&quot;. &quot;added&quot; corresponds to the first value 
emitted by AddAndMultiply, while &quot;multiplied&quot; corresponds to the 
second value.</p>
-
-<p>With aggregators, on the other hand, the function fields replace the input 
tuples. So if you had a stream containing the fields &quot;val1&quot; and 
&quot;val2&quot;, and you did this:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">stream</span><span class="o">.</span><span 
class="na">aggregate</span><span class="o">(</span><span class="k">new</span> 
<span class="n">Fields</span><span class="o">(</span><span 
class="s">"val2"</span><span class="o">),</span> <span class="k">new</span> 
<span class="n">Sum</span><span class="o">(),</span> <span class="k">new</span> 
<span class="n">Fields</span><span class="o">(</span><span 
class="s">"sum"</span><span class="o">))</span>
-</code></pre></div>
-<p>The output stream would only contain a single tuple with a single field 
called &quot;sum&quot;, representing the sum of all &quot;val2&quot; fields in 
that batch.</p>
-
-<p>With grouped streams, the output will contain the grouping fields followed 
by the fields emitted by the aggregator. For example:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">stream</span><span class="o">.</span><span 
class="na">groupBy</span><span class="o">(</span><span class="k">new</span> 
<span class="n">Fields</span><span class="o">(</span><span 
class="s">"val1"</span><span class="o">))</span>
-     <span class="o">.</span><span class="na">aggregate</span><span 
class="o">(</span><span class="k">new</span> <span class="n">Fields</span><span 
class="o">(</span><span class="s">"val2"</span><span class="o">),</span> <span 
class="k">new</span> <span class="n">Sum</span><span class="o">(),</span> <span 
class="k">new</span> <span class="n">Fields</span><span class="o">(</span><span 
class="s">"sum"</span><span class="o">))</span>
-</code></pre></div>
-<p>In this example, the output will contain the fields &quot;val1&quot; and 
&quot;sum&quot;.</p>
-
-<h2 id="state">State</h2>
-
-<p>A key problem to solve with realtime computation is how to manage state so 
that updates are idempotent in the face of failures and retries. It&#39;s 
impossible to eliminate failures, so when a node dies or something else goes 
wrong, batches need to be retried. The question is â how do you do state 
updates (whether external databases or state internal to the topology) so that 
it&#39;s like each message was only processed only once?</p>
-
-<p>This is a tricky problem, and can be illustrated with the following 
example. Suppose that you&#39;re doing a count aggregation of your stream and 
want to store the running count in a database. If you store only the count in 
the database and it&#39;s time to apply a state update for a batch, there&#39;s 
no way to know if you applied that state update before. The batch could have 
been attempted before, succeeded in updating the database, and then failed at a 
later step. Or the batch could have been attempted before and failed to update 
the database. You just don&#39;t know.</p>
-
-<p>Trident solves this problem by doing two things:</p>
-
-<ol>
-<li>Each batch is given a unique id called the &quot;transaction id&quot;. If 
a batch is retried it will have the exact same transaction id.</li>
-<li>State updates are ordered among batches. That is, the state updates for 
batch 3 won&#39;t be applied until the state updates for batch 2 have 
succeeded.</li>
-</ol>
-
-<p>With these two primitives, you can achieve exactly-once semantics with your 
state updates. Rather than store just the count in the database, what you can 
do instead is store the transaction id with the count in the database as an 
atomic value. Then, when updating the count, you can just compare the 
transaction id in the database with the transaction id for the current batch. 
If they&#39;re the same, you skip the update â because of the strong 
ordering, you know for sure that the value in the database incorporates the 
current batch. If they&#39;re different, you increment the count.</p>
-
-<p>Of course, you don&#39;t have to do this logic manually in your topologies. 
This logic is wrapped by the State abstraction and done automatically. Nor is 
your State object required to implement the transaction id trick: if you 
don&#39;t want to pay the cost of storing the transaction id in the database, 
you don&#39;t have to. In that case the State will have 
at-least-once-processing semantics in the case of failures (which may be fine 
for your application). You can read more about how to implement a State and the 
various fault-tolerance tradeoffs possible <a 
href="/documentation/Trident-state.html">in this doc</a>.</p>
-
-<p>A State is allowed to use whatever strategy it wants to store state. So it 
could store state in an external database or it could keep the state in-memory 
but backed by HDFS (like how HBase works). State&#39;s are not required to hold 
onto state forever. For example, you could have an in-memory State 
implementation that only keeps the last X hours of data available and drops 
anything older. Take a look at the implementation of the <a 
href="https://github.com/nathanmarz/trident-memcached/blob/master/src/jvm/trident/memcached/MemcachedState.java";>Memcached
 integration</a> for an example State implementation.</p>
-
-<h2 id="execution-of-trident-topologies">Execution of Trident topologies</h2>
-
-<p>Trident topologies compile down into as efficient of a Storm topology as 
possible. Tuples are only sent over the network when a repartitioning of the 
data is required, such as if you do a groupBy or a shuffle. So if you had this 
Trident topology:</p>
-
-<p><img src="images/trident-to-storm1.png" alt="Compiling Trident to Storm 
1"></p>
-
-<p>It would compile into Storm spouts/bolts like this:</p>
-
-<p><img src="images/trident-to-storm2.png" alt="Compiling Trident to Storm 
2"></p>
-
-<h2 id="conclusion">Conclusion</h2>
-
-<p>Trident makes realtime computation elegant. You&#39;ve seen how high 
throughput stream processing, state manipulation, and low-latency querying can 
be seamlessly intermixed via Trident&#39;s API. Trident lets you express your 
realtime computations in a natural way while still getting maximal 
performance.</p>
-
-
-
-                 </div>
-              </div>
-         </div>
-<footer>
-    <div class="container-fluid">
-        <div class="row">
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>Meetups</h5>
-                    <ul class="latest-news">
-                        
-                        <li><a 
href="http://www.meetup.com/Apache-Storm-Apache-Kafka/";>Apache Storm & Apache 
Kafka</a> <span class="small">(Sunnyvale, CA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Apache-Storm-Kafka-Users/";>Apache Storm & Kafka 
Users</a> <span class="small">(Seattle, WA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/New-York-City-Storm-User-Group/";>NYC Storm User 
Group</a> <span class="small">(New York, NY)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Bay-Area-Stream-Processing";>Bay Area Stream 
Processing</a> <span class="small">(Emeryville, CA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Boston-Storm-Users/";>Boston Realtime Data</a> <span 
class="small">(Boston, MA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/storm-london";>London Storm User Group</a> <span 
class="small">(London, UK)</span></li>
-                        
-                        <!-- <li><a 
href="http://www.meetup.com/Apache-Storm-Kafka-Users/";>Seatle, WA</a> <span 
class="small">(27 Jun 2015)</span></li> -->
-                    </ul>
-                </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>About Storm</h5>
-                    <p>Storm integrates with any queueing system and any 
database system. Storm's spout abstraction makes it easy to integrate a new 
queuing system. Likewise, integrating Storm with database systems is easy.</p>
-               </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>First Look</h5>
-                    <ul class="footer-list">
-                        <li><a 
href="/releases/current/Rationale.html">Rationale</a></li>
-                        <li><a 
href="/releases/current/Tutorial.html">Tutorial</a></li>
-                        <li><a 
href="/releases/current/Setting-up-development-environment.html">Setting up 
development environment</a></li>
-                        <li><a 
href="/releases/current/Creating-a-new-Storm-project.html">Creating a new Storm 
project</a></li>
-                    </ul>
-                </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>Documentation</h5>
-                    <ul class="footer-list">
-                        <li><a 
href="/releases/current/index.html">Index</a></li>
-                        <li><a 
href="/releases/current/javadocs/index.html">Javadoc</a></li>
-                        <li><a href="/releases/current/FAQ.html">FAQ</a></li>
-                    </ul>
-                </div>
-            </div>
-        </div>
-        <hr/>
-        <div class="row">   
-            <div class="col-md-12">
-                <p align="center">Copyright Â© 2015 <a 
href="http://www.apache.org";>Apache Software Foundation</a>. All Rights 
Reserved. 
-                    <br>Apache Storm, Apache, the Apache feather logo, and the 
Apache Storm project logos are trademarks of The Apache Software Foundation. 
-                    <br>All other marks mentioned may be trademarks or 
registered trademarks of their respective owners.</p>
-            </div>
-        </div>
-    </div>
-</footer>
-<!--Footer End-->
-<!-- Scroll to top -->
-<span class="totop"><a href="#"><i class="fa fa-angle-up"></i></a></span> 
-
-</body>
-
-</html>
-


http://git-wip-us.apache.org/repos/asf/storm-site/blob/ff14ea94/content/releases/0.10.1/Troubleshooting.html
----------------------------------------------------------------------
diff --git a/content/releases/0.10.1/Troubleshooting.html 
b/content/releases/0.10.1/Troubleshooting.html
deleted file mode 100644
index ab6e03f..0000000
--- a/content/releases/0.10.1/Troubleshooting.html
+++ /dev/null
@@ -1,450 +0,0 @@
-<!DOCTYPE html>
-<html>
-    <head>
-    <meta charset="utf-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-
-    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
-    <link rel="icon" href="/favicon.ico" type="image/x-icon">
-
-    <title>Troubleshooting</title>
-
-    <!-- Bootstrap core CSS -->
-    <link href="/assets/css/bootstrap.min.css" rel="stylesheet">
-    <!-- Bootstrap theme -->
-    <link href="/assets/css/bootstrap-theme.min.css" rel="stylesheet">
-
-    <!-- Custom styles for this template -->
-    <link rel="stylesheet" 
href="http://fortawesome.github.io/Font-Awesome/assets/font-awesome/css/font-awesome.css";>
-    <link href="/css/style.css" rel="stylesheet">
-    <link href="/assets/css/owl.theme.css" rel="stylesheet">
-    <link href="/assets/css/owl.carousel.css" rel="stylesheet">
-    <script type="text/javascript" src="/assets/js/jquery.min.js"></script>
-    <script type="text/javascript" src="/assets/js/bootstrap.min.js"></script>
-    <script type="text/javascript" 
src="/assets/js/owl.carousel.min.js"></script>
-    <script type="text/javascript" src="/assets/js/storm.js"></script>
-    <!-- Just for debugging purposes. Don't actually copy these 2 lines! -->
-    <!--[if lt IE 9]><script 
src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
-    
-    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media 
queries -->
-    <!--[if lt IE 9]>
-      <script 
src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js";></script>
-      <script 
src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js";></script>
-    <![endif]-->
-  </head>
-
-
-  <body>
-    <header>
-  <div class="container-fluid">
-     <div class="row">
-          <div class="col-md-5">
-            <a href="/index.html"><img src="/images/logo.png" class="logo" 
/></a>
-          </div>
-          <div class="col-md-5">
-            
-              <h1>Version: 0.10.1</h1>
-            
-          </div>
-          <div class="col-md-2">
-            <a href="/downloads.html" class="btn-std btn-block 
btn-download">Download</a>
-          </div>
-        </div>
-    </div>
-</header>
-<!--Header End-->
-<!--Navigation Begin-->
-<div class="navbar" role="banner">
-  <div class="container-fluid">
-      <div class="navbar-header">
-          <button class="navbar-toggle" type="button" data-toggle="collapse" 
data-target=".bs-navbar-collapse">
-                <span class="icon-bar"></span>
-                <span class="icon-bar"></span>
-                <span class="icon-bar"></span>
-            </button>
-        </div>
-        <nav class="collapse navbar-collapse bs-navbar-collapse" 
role="navigation">
-          <ul class="nav navbar-nav">
-              <li><a href="/index.html" id="home">Home</a></li>
-                <li><a href="/getting-help.html" id="getting-help">Getting 
Help</a></li>
-                <li><a href="/about/integrates.html" id="project-info">Project 
Information</a></li>
-                <li class="dropdown">
-                    <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
id="documentation">Documentation <b class="caret"></b></a>
-                    <ul class="dropdown-menu">
-                      
-                        
-                          <li><a 
href="/releases/2.0.0-SNAPSHOT/index.html">2.0.0-SNAPSHOT</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.2.0/index.html">1.2.0</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.1.2/index.html">1.1.2</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.1.1/index.html">1.1.1</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.1.0/index.html">1.1.0</a></li>
-                        
-                      
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.0.6/index.html">1.0.6</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.0.4/index.html">1.0.4</a></li>
-                        
-                      
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.0.3/index.html">1.0.3</a></li>
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                          <li><a 
href="/releases/0.10.2/index.html">0.10.2</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/0.10.1/index.html">0.10.1</a></li>
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                          <li><a 
href="/releases/0.9.7/index.html">0.9.7</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/0.9.6/index.html">0.9.6</a></li>
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                    </ul>
-                </li>
-                <li><a href="/talksAndVideos.html">Talks and 
Slideshows</a></li>
-                <li class="dropdown">
-                    <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
id="contribute">Community <b class="caret"></b></a>
-                    <ul class="dropdown-menu">
-                        <li><a 
href="/contribute/Contributing-to-Storm.html">Contributing</a></li>
-                        <li><a href="/contribute/People.html">People</a></li>
-                        <li><a href="/contribute/BYLAWS.html">ByLaws</a></li>
-                    </ul>
-                </li>
-                <li><a href="/2018/02/15/storm120-released.html" 
id="news">News</a></li>
-            </ul>
-        </nav>
-    </div>
-</div>
-
-
-
-    <div class="container-fluid">
-    <h1 class="page-title">Troubleshooting</h1>
-          <div class="row">
-               <div class="col-md-12">
-                    <!-- Documentation -->
-
-<p class="post-meta"></p>
-
-<p>This page lists issues people have run into when using Storm along with 
their solutions.</p>
-
-<h3 id="worker-processes-are-crashing-on-startup-with-no-stack-trace">Worker 
processes are crashing on startup with no stack trace</h3>
-
-<p>Possible symptoms:</p>
-
-<ul>
-<li>Topologies work with one node, but workers crash with multiple nodes</li>
-</ul>
-
-<p>Solutions:</p>
-
-<ul>
-<li>You may have a misconfigured subnet, where nodes can&#39;t locate other 
nodes based on their hostname. ZeroMQ sometimes crashes the process when it 
can&#39;t resolve a host. There are two solutions:
-
-<ul>
-<li>Make a mapping from hostname to IP address in /etc/hosts</li>
-<li>Set up an internal DNS so that nodes can locate each other based on 
hostname.</li>
-</ul></li>
-</ul>
-
-<h3 id="nodes-are-unable-to-communicate-with-each-other">Nodes are unable to 
communicate with each other</h3>
-
-<p>Possible symptoms:</p>
-
-<ul>
-<li>Every spout tuple is failing</li>
-<li>Processing is not working</li>
-</ul>
-
-<p>Solutions:</p>
-
-<ul>
-<li>Storm doesn&#39;t work with ipv6. You can force ipv4 by adding 
<code>-Djava.net.preferIPv4Stack=true</code> to the supervisor child options 
and restarting the supervisor. </li>
-<li>You may have a misconfigured subnet. See the solutions for <code>Worker 
processes are crashing on startup with no stack trace</code></li>
-</ul>
-
-<h3 id="topology-stops-processing-tuples-after-awhile">Topology stops 
processing tuples after awhile</h3>
-
-<p>Symptoms:</p>
-
-<ul>
-<li>Processing works fine for awhile, and then suddenly stops and spout tuples 
start failing en masse. </li>
-</ul>
-
-<p>Solutions:</p>
-
-<ul>
-<li>This is a known issue with ZeroMQ 2.1.10. Downgrade to ZeroMQ 2.1.7.</li>
-</ul>
-
-<h3 id="not-all-supervisors-appear-in-storm-ui">Not all supervisors appear in 
Storm UI</h3>
-
-<p>Symptoms:</p>
-
-<ul>
-<li>Some supervisor processes are missing from the Storm UI</li>
-<li>List of supervisors in Storm UI changes on refreshes</li>
-</ul>
-
-<p>Solutions:</p>
-
-<ul>
-<li>Make sure the supervisor local dirs are independent (e.g., not sharing a 
local dir over NFS)</li>
-<li>Try deleting the local dirs for the supervisors and restarting the 
daemons. Supervisors create a unique id for themselves and store it locally. 
When that id is copied to other nodes, Storm gets confused. </li>
-</ul>
-
-<h3 id="multiple-defaults-yaml-found-error">&quot;Multiple defaults.yaml 
found&quot; error</h3>
-
-<p>Symptoms:</p>
-
-<ul>
-<li>When deploying a topology with &quot;storm jar&quot;, you get this 
error</li>
-</ul>
-
-<p>Solution:</p>
-
-<ul>
-<li>You&#39;re most likely including the Storm jars inside your topology jar. 
When packaging your topology jar, don&#39;t include the Storm jars as Storm 
will put those on the classpath for you.</li>
-</ul>
-
-<h3 
id="nosuchmethoderror-when-running-storm-jar">&quot;NoSuchMethodError&quot; 
when running storm jar</h3>
-
-<p>Symptoms:</p>
-
-<ul>
-<li>When running storm jar, you get a cryptic 
&quot;NoSuchMethodError&quot;</li>
-</ul>
-
-<p>Solution:</p>
-
-<ul>
-<li>You&#39;re deploying your topology with a different version of Storm than 
you built your topology against. Make sure the storm client you use comes from 
the same version as the version you compiled your topology against.</li>
-</ul>
-
-<h3 id="kryo-concurrentmodificationexception">Kryo 
ConcurrentModificationException</h3>
-
-<p>Symptoms:</p>
-
-<ul>
-<li>At runtime, you get a stack trace like the following:</li>
-</ul>
-<div class="highlight"><pre><code class="language-" 
data-lang="">java.lang.RuntimeException: 
java.util.ConcurrentModificationException
-    at 
backtype.storm.utils.DisruptorQueue.consumeBatchToCursor(DisruptorQueue.java:84)
-    at 
backtype.storm.utils.DisruptorQueue.consumeBatchWhenAvailable(DisruptorQueue.java:55)
-    at 
backtype.storm.disruptor$consume_batch_when_available.invoke(disruptor.clj:56)
-    at 
backtype.storm.disruptor$consume_loop_STAR_$fn__1597.invoke(disruptor.clj:67)
-    at backtype.storm.util$async_loop$fn__465.invoke(util.clj:377)
-    at clojure.lang.AFn.run(AFn.java:24)
-    at java.lang.Thread.run(Thread.java:679)
-Caused by: java.util.ConcurrentModificationException
-    at 
java.util.LinkedHashMap$LinkedHashIterator.nextEntry(LinkedHashMap.java:390)
-    at java.util.LinkedHashMap$EntryIterator.next(LinkedHashMap.java:409)
-    at java.util.LinkedHashMap$EntryIterator.next(LinkedHashMap.java:408)
-    at java.util.HashMap.writeObject(HashMap.java:1016)
-    at sun.reflect.GeneratedMethodAccessor17.invoke(Unknown Source)
-    at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
-    at java.lang.reflect.Method.invoke(Method.java:616)
-    at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:959)
-    at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1480)
-    at 
java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1416)
-    at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1174)
-    at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:346)
-    at 
backtype.storm.serialization.SerializableSerializer.write(SerializableSerializer.java:21)
-    at com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:554)
-    at 
com.esotericsoftware.kryo.serializers.CollectionSerializer.write(CollectionSerializer.java:77)
-    at 
com.esotericsoftware.kryo.serializers.CollectionSerializer.write(CollectionSerializer.java:18)
-    at com.esotericsoftware.kryo.Kryo.writeObject(Kryo.java:472)
-    at 
backtype.storm.serialization.KryoValuesSerializer.serializeInto(KryoValuesSerializer.java:27)
-</code></pre></div>
-<p>Solution: </p>
-
-<ul>
-<li>This means that you&#39;re emitting a mutable object as an output tuple. 
Everything you emit into the output collector must be immutable. What&#39;s 
happening is that your bolt is modifying the object while it is being 
serialized to be sent over the network.</li>
-</ul>
-
-<h3 id="nullpointerexception-from-deep-inside-storm">NullPointerException from 
deep inside Storm</h3>
-
-<p>Symptoms:</p>
-
-<ul>
-<li>You get a NullPointerException that looks something like:</li>
-</ul>
-<div class="highlight"><pre><code class="language-" 
data-lang="">java.lang.RuntimeException: java.lang.NullPointerException
-    at 
backtype.storm.utils.DisruptorQueue.consumeBatchToCursor(DisruptorQueue.java:84)
-    at 
backtype.storm.utils.DisruptorQueue.consumeBatchWhenAvailable(DisruptorQueue.java:55)
-    at 
backtype.storm.disruptor$consume_batch_when_available.invoke(disruptor.clj:56)
-    at 
backtype.storm.disruptor$consume_loop_STAR_$fn__1596.invoke(disruptor.clj:67)
-    at backtype.storm.util$async_loop$fn__465.invoke(util.clj:377)
-    at clojure.lang.AFn.run(AFn.java:24)
-    at java.lang.Thread.run(Thread.java:662)
-Caused by: java.lang.NullPointerException
-    at 
backtype.storm.serialization.KryoTupleSerializer.serialize(KryoTupleSerializer.java:24)
-    at 
backtype.storm.daemon.worker$mk_transfer_fn$fn__4126$fn__4130.invoke(worker.clj:99)
-    at backtype.storm.util$fast_list_map.invoke(util.clj:771)
-    at 
backtype.storm.daemon.worker$mk_transfer_fn$fn__4126.invoke(worker.clj:99)
-    at 
backtype.storm.daemon.executor$start_batch_transfer__GT_worker_handler_BANG_$fn__3904.invoke(executor.clj:205)
-    at 
backtype.storm.disruptor$clojure_handler$reify__1584.onEvent(disruptor.clj:43)
-    at 
backtype.storm.utils.DisruptorQueue.consumeBatchToCursor(DisruptorQueue.java:81)
-    ... 6 more
-</code></pre></div>
-<p>or </p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">java.lang.RuntimeException: java.lang.NullPointerException
-        at
-backtype.storm.utils.DisruptorQueue.consumeBatchToCursor(DisruptorQueue.java:128)
-~[storm-core-0.9.3.jar:0.9.3]
-        at
-backtype.storm.utils.DisruptorQueue.consumeBatchWhenAvailable(DisruptorQueue.java:99)
-~[storm-core-0.9.3.jar:0.9.3]
-        at
-backtype.storm.disruptor$consume_batch_when_available.invoke(disruptor.clj:80)
-~[storm-core-0.9.3.jar:0.9.3]
-        at
-backtype.storm.disruptor$consume_loop_STAR_$fn__759.invoke(disruptor.clj:94)
-~[storm-core-0.9.3.jar:0.9.3]
-        at backtype.storm.util$async_loop$fn__458.invoke(util.clj:463)
-~[storm-core-0.9.3.jar:0.9.3]
-        at clojure.lang.AFn.run(AFn.java:24) [clojure-1.5.1.jar:na]
-        at java.lang.Thread.run(Thread.java:745) [na:1.7.0_65]
-Caused by: java.lang.NullPointerException: null
-        at clojure.lang.RT.intCast(RT.java:1087) ~[clojure-1.5.1.jar:na]
-        at
-backtype.storm.daemon.worker$mk_transfer_fn$fn__3548.invoke(worker.clj:129)
-~[storm-core-0.9.3.jar:0.9.3]
-        at
-backtype.storm.daemon.executor$start_batch_transfer__GT_worker_handler_BANG_$fn__3282.invoke(executor.clj:258)
-~[storm-core-0.9.3.jar:0.9.3]
-        at
-backtype.storm.disruptor$clojure_handler$reify__746.onEvent(disruptor.clj:58)
-~[storm-core-0.9.3.jar:0.9.3]
-        at
-backtype.storm.utils.DisruptorQueue.consumeBatchToCursor(DisruptorQueue.java:125)
-~[storm-core-0.9.3.jar:0.9.3]
-        ... 6 common frames omitted
-</code></pre></div>
-<p>Solution:</p>
-
-<ul>
-<li>This is caused by having multiple threads issue methods on the 
<code>OutputCollector</code>. All emits, acks, and fails must happen on the 
same thread. One subtle way this can happen is if you make a 
<code>IBasicBolt</code> that emits on a separate thread. 
<code>IBasicBolt</code>&#39;s automatically ack after execute is called, so 
this would cause multiple threads to use the <code>OutputCollector</code> 
leading to this exception. When using a basic bolt, all emits must happen in 
the same thread that runs <code>execute</code>.</li>
-</ul>
-
-
-
-                 </div>
-              </div>
-         </div>
-<footer>
-    <div class="container-fluid">
-        <div class="row">
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>Meetups</h5>
-                    <ul class="latest-news">
-                        
-                        <li><a 
href="http://www.meetup.com/Apache-Storm-Apache-Kafka/";>Apache Storm & Apache 
Kafka</a> <span class="small">(Sunnyvale, CA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Apache-Storm-Kafka-Users/";>Apache Storm & Kafka 
Users</a> <span class="small">(Seattle, WA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/New-York-City-Storm-User-Group/";>NYC Storm User 
Group</a> <span class="small">(New York, NY)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Bay-Area-Stream-Processing";>Bay Area Stream 
Processing</a> <span class="small">(Emeryville, CA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Boston-Storm-Users/";>Boston Realtime Data</a> <span 
class="small">(Boston, MA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/storm-london";>London Storm User Group</a> <span 
class="small">(London, UK)</span></li>
-                        
-                        <!-- <li><a 
href="http://www.meetup.com/Apache-Storm-Kafka-Users/";>Seatle, WA</a> <span 
class="small">(27 Jun 2015)</span></li> -->
-                    </ul>
-                </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>About Storm</h5>
-                    <p>Storm integrates with any queueing system and any 
database system. Storm's spout abstraction makes it easy to integrate a new 
queuing system. Likewise, integrating Storm with database systems is easy.</p>
-               </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>First Look</h5>
-                    <ul class="footer-list">
-                        <li><a 
href="/releases/current/Rationale.html">Rationale</a></li>
-                        <li><a 
href="/releases/current/Tutorial.html">Tutorial</a></li>
-                        <li><a 
href="/releases/current/Setting-up-development-environment.html">Setting up 
development environment</a></li>
-                        <li><a 
href="/releases/current/Creating-a-new-Storm-project.html">Creating a new Storm 
project</a></li>
-                    </ul>
-                </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>Documentation</h5>
-                    <ul class="footer-list">
-                        <li><a 
href="/releases/current/index.html">Index</a></li>
-                        <li><a 
href="/releases/current/javadocs/index.html">Javadoc</a></li>
-                        <li><a href="/releases/current/FAQ.html">FAQ</a></li>
-                    </ul>
-                </div>
-            </div>
-        </div>
-        <hr/>
-        <div class="row">   
-            <div class="col-md-12">
-                <p align="center">Copyright Â© 2015 <a 
href="http://www.apache.org";>Apache Software Foundation</a>. All Rights 
Reserved. 
-                    <br>Apache Storm, Apache, the Apache feather logo, and the 
Apache Storm project logos are trademarks of The Apache Software Foundation. 
-                    <br>All other marks mentioned may be trademarks or 
registered trademarks of their respective owners.</p>
-            </div>
-        </div>
-    </div>
-</footer>
-<!--Footer End-->
-<!-- Scroll to top -->
-<span class="totop"><a href="#"><i class="fa fa-angle-up"></i></a></span> 
-
-</body>
-
-</html>
-

[33/51] [abbrv] [partial] storm-site git commit: publish release 1.2.1

Reply via email to