http://git-wip-us.apache.org/repos/asf/metron/blob/ae1d3eb9/site/current-book/metron-platform/metron-indexing/index.html
----------------------------------------------------------------------
diff --git a/site/current-book/metron-platform/metron-indexing/index.html
b/site/current-book/metron-platform/metron-indexing/index.html
index 3d79323..6d4b8d6 100644
--- a/site/current-book/metron-platform/metron-indexing/index.html
+++ b/site/current-book/metron-platform/metron-indexing/index.html
@@ -1,378 +1,200 @@
<!DOCTYPE html>
<!--
- | Generated by Apache Maven Doxia at 2018-01-03
- | Rendered using Apache Maven Fluido Skin 1.3.0
+ | Generated by Apache Maven Doxia Site Renderer 1.8 from
src/site/markdown/metron-platform/metron-indexing/index.md at 2018-06-07
+ | Rendered using Apache Maven Fluido Skin 1.7
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <meta name="Date-Revision-yyyymmdd" content="20180103" />
+ <meta name="Date-Revision-yyyymmdd" content="20180607" />
<meta http-equiv="Content-Language" content="en" />
<title>Metron – Indexing</title>
- <link rel="stylesheet" href="../../css/apache-maven-fluido-1.3.0.min.css"
/>
+ <link rel="stylesheet" href="../../css/apache-maven-fluido-1.7.min.css" />
<link rel="stylesheet" href="../../css/site.css" />
<link rel="stylesheet" href="../../css/print.css" media="print" />
-
-
- <script type="text/javascript"
src="../../js/apache-maven-fluido-1.3.0.min.js"></script>
-
-
-
-<script type="text/javascript">$( document ).ready( function() { $(
'.carousel' ).carousel( { interval: 3500 } ) } );</script>
-
- </head>
- <body class="topBarDisabled">
-
-
-
-
- <div class="container-fluid">
- <div id="banner">
- <div class="pull-left">
- <a href="http://metron.apache.org/"
id="bannerLeft">
-
<img src="../../images/metron-logo.png" alt="Apache Metron"
width="148px" height="48px"/>
- </a>
- </div>
- <div class="pull-right"> </div>
+ <script type="text/javascript"
src="../../js/apache-maven-fluido-1.7.min.js"></script>
+<script type="text/javascript">
+ $( document ).ready( function() { $( '.carousel' ).carousel( {
interval: 3500 } ) } );
+ </script>
+ </head>
+ <body class="topBarDisabled">
+ <div class="container-fluid">
+ <div id="banner">
+ <div class="pull-left"><a href="http://metron.apache.org/"
id="bannerLeft"><img src="../../images/metron-logo.png" alt="Apache Metron"
width="148px" height="48px"/></a></div>
+ <div class="pull-right"></div>
<div class="clear"><hr/></div>
</div>
<div id="breadcrumbs">
<ul class="breadcrumb">
-
-
- <li class="">
- <a href="http://www.apache.org" class="externalLink"
title="Apache">
- Apache</a>
- </li>
- <li class="divider ">/</li>
- <li class="">
- <a href="http://metron.apache.org/" class="externalLink"
title="Metron">
- Metron</a>
- </li>
- <li class="divider ">/</li>
- <li class="">
- <a href="../../index.html" title="Documentation">
- Documentation</a>
- </li>
- <li class="divider ">/</li>
- <li class="">Indexing</li>
-
-
-
- <li id="publishDate" class="pull-right">Last Published:
2018-01-03</li> <li class="divider pull-right">|</li>
- <li id="projectVersion" class="pull-right">Version: 0.4.2</li>
-
- </ul>
+ <li class=""><a href="http://www.apache.org" class="externalLink"
title="Apache">Apache</a><span class="divider">/</span></li>
+ <li class=""><a href="http://metron.apache.org/" class="externalLink"
title="Metron">Metron</a><span class="divider">/</span></li>
+ <li class=""><a href="../../index.html"
title="Documentation">Documentation</a><span class="divider">/</span></li>
+ <li class="active ">Indexing</li>
+ <li id="publishDate" class="pull-right"><span class="divider">|</span>
Last Published: 2018-06-07</li>
+ <li id="projectVersion" class="pull-right">Version: 0.5.0</li>
+ </ul>
</div>
-
-
<div class="row-fluid">
- <div id="leftColumn" class="span3">
+ <div id="leftColumn" class="span2">
<div class="well sidebar-nav">
-
-
- <ul class="nav nav-list">
- <li class="nav-header">User Documentation</li>
-
- <li>
-
- <a href="../../index.html" title="Metron">
- <i class="icon-chevron-down"></i>
- Metron</a>
- <ul class="nav nav-list">
-
- <li>
-
- <a href="../../Upgrading.html" title="Upgrading">
- <i class="none"></i>
- Upgrading</a>
- </li>
-
- <li>
-
- <a href="../../metron-analytics/index.html"
title="Analytics">
- <i class="icon-chevron-right"></i>
- Analytics</a>
- </li>
-
- <li>
-
- <a
href="../../metron-contrib/metron-docker/index.html" title="Docker">
- <i class="none"></i>
- Docker</a>
- </li>
-
- <li>
-
- <a href="../../metron-deployment/index.html"
title="Deployment">
- <i class="icon-chevron-right"></i>
- Deployment</a>
- </li>
-
- <li>
-
- <a
href="../../metron-interface/metron-alerts/index.html" title="Alerts">
- <i class="none"></i>
- Alerts</a>
- </li>
-
- <li>
-
- <a
href="../../metron-interface/metron-config/index.html" title="Config">
- <i class="none"></i>
- Config</a>
- </li>
-
- <li>
-
- <a
href="../../metron-interface/metron-rest/index.html" title="Rest">
- <i class="none"></i>
- Rest</a>
- </li>
-
- <li>
-
- <a href="../../metron-platform/index.html"
title="Platform">
- <i class="icon-chevron-down"></i>
- Platform</a>
- <ul class="nav nav-list">
-
- <li>
-
- <a
href="../../metron-platform/Performance-tuning-guide.html"
title="Performance-tuning-guide">
- <i class="none"></i>
- Performance-tuning-guide</a>
- </li>
-
- <li>
-
- <a
href="../../metron-platform/metron-api/index.html" title="Api">
- <i class="none"></i>
- Api</a>
- </li>
-
- <li>
-
- <a
href="../../metron-platform/metron-common/index.html" title="Common">
- <i class="none"></i>
- Common</a>
- </li>
-
- <li>
-
- <a
href="../../metron-platform/metron-data-management/index.html"
title="Data-management">
- <i class="none"></i>
- Data-management</a>
- </li>
-
- <li>
-
- <a
href="../../metron-platform/metron-elasticsearch/index.html"
title="Elasticsearch">
- <i class="none"></i>
- Elasticsearch</a>
- </li>
-
- <li>
-
- <a
href="../../metron-platform/metron-enrichment/index.html" title="Enrichment">
- <i class="none"></i>
- Enrichment</a>
- </li>
-
- <li class="active">
-
- <a href="#"><i class="none"></i>Indexing</a>
- </li>
-
- <li>
-
- <a
href="../../metron-platform/metron-management/index.html" title="Management">
- <i class="none"></i>
- Management</a>
- </li>
-
- <li>
-
- <a
href="../../metron-platform/metron-parsers/index.html" title="Parsers">
- <i class="icon-chevron-right"></i>
- Parsers</a>
- </li>
-
- <li>
-
- <a
href="../../metron-platform/metron-pcap-backend/index.html"
title="Pcap-backend">
- <i class="none"></i>
- Pcap-backend</a>
- </li>
-
- <li>
-
- <a
href="../../metron-platform/metron-writer/index.html" title="Writer">
- <i class="none"></i>
- Writer</a>
- </li>
- </ul>
- </li>
-
- <li>
-
- <a href="../../metron-sensors/index.html"
title="Sensors">
- <i class="icon-chevron-right"></i>
- Sensors</a>
- </li>
-
- <li>
-
- <a
href="../../metron-stellar/stellar-3rd-party-example/index.html"
title="Stellar-3rd-party-example">
- <i class="none"></i>
- Stellar-3rd-party-example</a>
- </li>
-
- <li>
-
- <a
href="../../metron-stellar/stellar-common/index.html" title="Stellar-common">
- <i class="icon-chevron-right"></i>
- Stellar-common</a>
- </li>
-
- <li>
-
- <a href="../../use-cases/index.html"
title="Use-cases">
- <i class="icon-chevron-right"></i>
- Use-cases</a>
- </li>
- </ul>
- </li>
- </ul>
-
-
-
- <hr class="divider" />
-
- <div id="poweredBy">
- <div class="clear"></div>
- <div class="clear"></div>
- <div class="clear"></div>
- <a href="http://maven.apache.org/" title="Built
by Maven" class="poweredBy">
- <img class="builtBy" alt="Built by Maven"
src="../../images/logos/maven-feather.png" />
- </a>
- </div>
+ <ul class="nav nav-list">
+ <li class="nav-header">User Documentation</li>
+ <li><a href="../../index.html" title="Metron"><span
class="icon-chevron-down"></span>Metron</a>
+ <ul class="nav nav-list">
+ <li><a href="../../CONTRIBUTING.html" title="CONTRIBUTING"><span
class="none"></span>CONTRIBUTING</a></li>
+ <li><a href="../../Upgrading.html" title="Upgrading"><span
class="none"></span>Upgrading</a></li>
+ <li><a href="../../metron-analytics/index.html" title="Analytics"><span
class="icon-chevron-right"></span>Analytics</a></li>
+ <li><a href="../../metron-contrib/metron-docker/index.html"
title="Docker"><span class="none"></span>Docker</a></li>
+ <li><a href="../../metron-contrib/metron-performance/index.html"
title="Performance"><span class="none"></span>Performance</a></li>
+ <li><a href="../../metron-deployment/index.html" title="Deployment"><span
class="icon-chevron-right"></span>Deployment</a></li>
+ <li><a href="../../metron-interface/metron-alerts/index.html"
title="Alerts"><span class="none"></span>Alerts</a></li>
+ <li><a href="../../metron-interface/metron-config/index.html"
title="Config"><span class="none"></span>Config</a></li>
+ <li><a href="../../metron-interface/metron-rest/index.html"
title="Rest"><span class="none"></span>Rest</a></li>
+ <li><a href="../../metron-platform/index.html" title="Platform"><span
class="icon-chevron-down"></span>Platform</a>
+ <ul class="nav nav-list">
+ <li><a href="../../metron-platform/Performance-tuning-guide.html"
title="Performance-tuning-guide"><span
class="none"></span>Performance-tuning-guide</a></li>
+ <li><a href="../../metron-platform/metron-api/index.html"
title="Api"><span class="none"></span>Api</a></li>
+ <li><a href="../../metron-platform/metron-common/index.html"
title="Common"><span class="none"></span>Common</a></li>
+ <li><a href="../../metron-platform/metron-data-management/index.html"
title="Data-management"><span class="none"></span>Data-management</a></li>
+ <li><a href="../../metron-platform/metron-elasticsearch/index.html"
title="Elasticsearch"><span class="none"></span>Elasticsearch</a></li>
+ <li><a href="../../metron-platform/metron-enrichment/index.html"
title="Enrichment"><span class="icon-chevron-right"></span>Enrichment</a></li>
+ <li class="active"><a href="#"><span class="none"></span>Indexing</a></li>
+ <li><a href="../../metron-platform/metron-management/index.html"
title="Management"><span class="none"></span>Management</a></li>
+ <li><a href="../../metron-platform/metron-parsers/index.html"
title="Parsers"><span class="icon-chevron-right"></span>Parsers</a></li>
+ <li><a href="../../metron-platform/metron-pcap-backend/index.html"
title="Pcap-backend"><span class="none"></span>Pcap-backend</a></li>
+ <li><a href="../../metron-platform/metron-writer/index.html"
title="Writer"><span class="none"></span>Writer</a></li>
+ </ul>
+</li>
+ <li><a href="../../metron-sensors/index.html" title="Sensors"><span
class="icon-chevron-right"></span>Sensors</a></li>
+ <li><a href="../../metron-stellar/stellar-3rd-party-example/index.html"
title="Stellar-3rd-party-example"><span
class="none"></span>Stellar-3rd-party-example</a></li>
+ <li><a href="../../metron-stellar/stellar-common/index.html"
title="Stellar-common"><span
class="icon-chevron-right"></span>Stellar-common</a></li>
+ <li><a href="../../metron-stellar/stellar-zeppelin/index.html"
title="Stellar-zeppelin"><span class="none"></span>Stellar-zeppelin</a></li>
+ <li><a href="../../use-cases/index.html" title="Use-cases"><span
class="icon-chevron-right"></span>Use-cases</a></li>
+ </ul>
+</li>
+</ul>
+ <hr />
+ <div id="poweredBy">
+ <div class="clear"></div>
+ <div class="clear"></div>
+ <div class="clear"></div>
+ <div class="clear"></div>
+<a href="http://maven.apache.org/" title="Built by Maven"
class="poweredBy"><img class="builtBy" alt="Built by Maven"
src="../../images/logos/maven-feather.png" /></a>
+ </div>
</div>
</div>
-
-
- <div id="bodyColumn" class="span9" >
-
- <h1>Indexing</h1>
+ <div id="bodyColumn" class="span10" >
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+<h1>Indexing</h1>
<p><a name="Indexing"></a></p>
<div class="section">
<h2><a name="Introduction"></a>Introduction</h2>
<p>The <tt>indexing</tt> topology is a topology dedicated to taking the data
from the enrichment topology that have been enriched and storing the data in
one or more supported indices</p>
-
<ul>
-
+
<li>HDFS as rolled text files, one JSON blob per line</li>
-
<li>Elasticsearch</li>
-
<li>Solr</li>
</ul>
<p>By default, this topology writes out to both HDFS and one of Elasticsearch
and Solr.</p>
<p>Indices are written in batch and the batch size and batch timeout are
specified in the <a href="#Sensor_Indexing_Configuration">Sensor Indexing
Configuration</a> via the <tt>batchSize</tt> and <tt>batchTimeout</tt>
parameters. These configs are variable by sensor type.</p></div>
<div class="section">
+<h2><a name="Minimal_Assumptions_for_Message_Structure"></a>Minimal
Assumptions for Message Structure</h2>
+<p>At minimum, a message should have a <tt>source.type</tt> field. Without
this field, the message tuple will be failed and not written with an
appropriate error indicated in the Storm UI and logs.</p></div>
+<div class="section">
<h2><a name="Indexing_Architecture"></a>Indexing Architecture</h2>
<p><img src="../../images/indexing_arch.png" alt="Architecture" /></p>
-<p>The indexing topology is extremely simple. Data is ingested into kafka and
sent to </p>
-
+<p>The indexing topology is extremely simple. Data is ingested into kafka and
sent to</p>
<ul>
-
+
<li>An indexing bolt configured to write to either elasticsearch or Solr</li>
-
<li>An indexing bolt configured to write to HDFS under
<tt>/apps/metron/enrichment/indexed</tt></li>
</ul>
<p>By default, errors during indexing are sent back into the <tt>indexing</tt>
kafka queue so that they can be indexed and archived.</p></div>
<div class="section">
<h2><a name="Sensor_Indexing_Configuration"></a>Sensor Indexing
Configuration</h2>
-<p>The sensor specific configuration is intended to configure the indexing
used for a given sensor type (e.g. <tt>snort</tt>). </p>
-<p>Just like the global config, the format is a JSON stored in zookeeper and
on disk at <tt>$METRON_HOME/config/zookeeper/indexing</tt>. Within the
sensor-specific configuration, you can configure the individual writers. The
writers currently supported are:</p>
-
+<p>The sensor specific configuration is intended to configure the indexing
used for a given sensor type (e.g. <tt>snort</tt>).</p>
+<p>Just like the global config, the format is a JSON stored in zookeeper and
on disk at <tt>$METRON_HOME/config/zookeeper/indexing</tt>. Within the
sensor-specific configuration, you can configure the individual writers. The
writers currently supported are:</p>
<ul>
-
+
<li><tt>elasticsearch</tt></li>
-
<li><tt>hdfs</tt></li>
-
<li><tt>solr</tt></li>
</ul>
<p>Depending on how you start the indexing topology, it will have either
elasticsearch or solr and hdfs writers running.</p>
<p>The configuration for an individual writer-specific configuration is a JSON
map with the following fields:</p>
-
<ul>
-
+
<li><tt>index</tt> : The name of the index to write to (defaulted to the name
of the sensor).</li>
-
<li><tt>batchSize</tt> : The size of the batch that is written to the indices
at once. Defaults to <tt>1</tt> (no batching).</li>
-
-<li><tt>batchTimeout</tt> : The timeout after which a batch will be flushed
even if batchSize has not been met. Optional. If unspecified, or set to
<tt>0</tt>, it defaults to a system-determined duration which is a fraction of
the Storm parameter <tt>topology.message.timeout.secs</tt>. Ignored if
batchSize is <tt>1</tt>, since this disables batching.</li>
-
+<li><tt>batchTimeout</tt> : The timeout after which a batch will be flushed
even if batchSize has not been met. Optional. If unspecified, or set to
<tt>0</tt>, it defaults to a system-determined duration which is a fraction of
the Storm parameter <tt>topology.message.timeout.secs</tt>. Ignored if
batchSize is <tt>1</tt>, since this disables batching.</li>
<li><tt>enabled</tt> : Whether the writer is enabled (default
<tt>true</tt>).</li>
</ul>
<div class="section">
<h3><a name="Meta_Alerts"></a>Meta Alerts</h3>
-<p>Alerts can be grouped, after appropriate searching, into a set of alerts
called a meta alert. A meta alert is useful for maintaining the context of
searching and grouping during further investigations. Standard searches can
return meta alerts, but grouping and other aggregation or sorting requests will
not, because there’s not a clear way to aggregate in many cases if there
are multiple alerts contained in the meta alert. All meta alerts will have the
source type of metaalert, regardless of the contained alert’s
origins.</p></div>
+<p>Alerts can be grouped, after appropriate searching, into a set of alerts
called a meta alert. A meta alert is useful for maintaining the context of
searching and grouping during further investigations. Standard searches can
return meta alerts, but grouping and other aggregation or sorting requests will
not, because there’s not a clear way to aggregate in many cases if there
are multiple alerts contained in the meta alert. All meta alerts will have the
source type of metaalert, regardless of the contained alert’s
origins.</p></div>
<div class="section">
<h3><a name="Elasticsearch"></a>Elasticsearch</h3>
-<p>Metron comes with built-in templates for the default sensors for
Elasticsearch. When adding a new sensor, it will be necessary to add a new
template defining the output fields appropriately. In addition, there is a
requirement for a field <tt>alert</tt> of type <tt>nested</tt> for
Elasticsearch 2.x installs. This is detailed at <a
href="../metron-elasticsearch/index.html#Using_Metron_with_Elasticsearch_2.x">Using
Metron with Elasticsearch 2.x</a></p></div>
+<p>Metron comes with built-in templates for the default sensors for
Elasticsearch. When adding a new sensor, it will be necessary to add a new
template defining the output fields appropriately. In addition, there is a
requirement for a field <tt>alert</tt> of type <tt>nested</tt> for
Elasticsearch 2.x installs. This is detailed at <a
href="../metron-elasticsearch/index.html#Using_Metron_with_Elasticsearch_2.x">Using
Metron with Elasticsearch 2.x</a></p></div>
<div class="section">
<h3><a name="Indexing_Configuration_Examples"></a>Indexing Configuration
Examples</h3>
-<p>For a given sensor, the following scenarios would be indicated by the
following cases:</p>
+<p>For a given sensor, the following scenarios would be indicated by the
following cases:</p>
<div class="section">
<h4><a name="Base_Case"></a>Base Case</h4>
-<div class="source">
-<div class="source">
-<pre>{
+<div>
+<div>
+<pre class="source">{
}
</pre></div></div>
-<p>or no file at all.</p>
+<p>or no file at all.</p>
<ul>
-
+
<li>elasticsearch writer
-
<ul>
-
+
<li>enabled</li>
-
<li>batch size of 1</li>
-
<li>batch timeout system default</li>
-
<li>index name the same as the sensor</li>
- </ul></li>
-
+</ul>
+</li>
<li>hdfs writer
-
<ul>
-
+
<li>enabled</li>
-
<li>batch size of 1</li>
-
<li>batch timeout system default</li>
-
<li>index name the same as the sensor</li>
- </ul></li>
</ul>
-<p>If a writer config is unspecified, then a warning is indicated in the Storm
console. e.g.: <tt>WARNING: Default and (likely) unoptimized writer config used
for hdfs writer and sensor squid</tt></p></div>
+</li>
+</ul>
+<p>If a writer config is unspecified, then a warning is indicated in the Storm
console. e.g.: <tt>WARNING: Default and (likely) unoptimized writer config
used for hdfs writer and sensor squid</tt></p></div>
<div class="section">
<h4><a name="Fully_specified"></a>Fully specified</h4>
-<div class="source">
-<div class="source">
-<pre>{
+<div>
+<div>
+<pre class="source">{
"elasticsearch": {
"index": "foo",
"batchSize" : 100,
@@ -389,39 +211,32 @@
</pre></div></div>
<ul>
-
+
<li>elasticsearch writer
-
<ul>
-
+
<li>enabled</li>
-
<li>batch size of 100</li>
-
<li>batch timeout system default</li>
-
<li>index name of “foo”</li>
- </ul></li>
-
+</ul>
+</li>
<li>hdfs writer
-
<ul>
-
+
<li>enabled</li>
-
<li>batch size of 1</li>
-
<li>batch timeout system default</li>
-
<li>index name of “foo”</li>
- </ul></li>
+</ul>
+</li>
</ul></div>
<div class="section">
<h4><a name="HDFS_Writer_turned_off"></a>HDFS Writer turned off</h4>
-<div class="source">
-<div class="source">
-<pre>{
+<div>
+<div>
+<pre class="source">{
"elasticsearch": {
"index": "foo",
"enabled" : true
@@ -436,43 +251,37 @@
</pre></div></div>
<ul>
-
+
<li>elasticsearch writer
-
<ul>
-
+
<li>enabled</li>
-
<li>batch size of 1</li>
-
<li>batch timeout system default</li>
-
<li>index name of “foo”</li>
- </ul></li>
-
+</ul>
+</li>
<li>hdfs writer
-
<ul>
-
+
<li>disabled</li>
- </ul></li>
+</ul>
+</li>
</ul>
<p><a name="Updates_to_Indexed_Data"></a></p>
<h1>Updates to Indexed Data</h1>
<p>There are clear usecases where we would want to incorporate the capability
to update indexed data. Thus far, we have limited capabilities provided to
support this use-case:</p>
-
<ul>
-
+
<li>Updates to the random access index (e.g. Elasticsearch and Solr) should be
supported</li>
-
<li>Updates to the cold storage index (e.g. HDFS) is not supported currently,
however to support the batch use-case updated documents will be provided in a
NoSQL write-ahead log (e.g. a HBase table) and an Java API will be provided to
retrieve those updates scalably (i.e. a scan-free architecture).</li>
</ul>
<p>Put simply, the random access index will be always up-to-date, but the HDFS
index will need to be joined to the NoSQL write-ahead log to get current
updates.</p></div></div></div>
<div class="section">
<h2><a name="The_IndexDao_Abstraction"></a>The <tt>IndexDao</tt>
Abstraction</h2>
<p>The indices mentioned above as part of Update should be pluggable by the
developer so that new write-ahead logs or real-time indices can be supported by
providing an implementation supporting the data access patterns.</p>
-<p>To support a new index, one would need to implement the
<tt>org.apache.metron.indexing.dao.IndexDao</tt> abstraction and provide update
and search capabilities. IndexDaos may be composed and updates will be
performed in parallel. This enables a flexible strategy for specifying your
backing store for updates at runtime. For instance, currently the REST API
supports the update functionality and may be configured with a list of IndexDao
implementations to use to support the updates.</p>
-<p>Updates with the IndexDao.update method replace the current object with the
new object. For partial updates, use IndexDao.patch instead.</p>
+<p>To support a new index, one would need to implement the
<tt>org.apache.metron.indexing.dao.IndexDao</tt> abstraction and provide update
and search capabilities. IndexDaos may be composed and updates will be
performed in parallel. This enables a flexible strategy for specifying your
backing store for updates at runtime. For instance, currently the REST API
supports the update functionality and may be configured with a list of IndexDao
implementations to use to support the updates.</p>
+<p>Updates with the IndexDao.update method replace the current object with the
new object. For partial updates, use IndexDao.patch instead.</p>
<div class="section">
<h3><a name="The_HBaseDao"></a>The <tt>HBaseDao</tt></h3>
<p>Updates will be written to HBase. The key structure includes the GUID and
sensor type and for each new version, a new column is created with value as the
message.</p>
@@ -485,71 +294,87 @@
<p>The HBase column family to use for message updates.</p></div></div>
<div class="section">
<h3><a name="The_MetaAlertDao"></a>The <tt>MetaAlertDao</tt></h3>
-<p>The goal of meta alerts is to be able to group together a set of alerts
while being able to transparently perform actions like searches, as if meta
alerts were normal alerts. <tt>org.apache.metron.indexing.dao.MetaAlertDao</tt>
extends <tt>IndexDao</tt> and enables several features: </p>
-
+<p>The goal of meta alerts is to be able to group together a set of alerts
while being able to transparently perform actions like searches, as if meta
alerts were normal alerts.
<tt>org.apache.metron.indexing.dao.MetaAlertDao</tt> extends <tt>IndexDao</tt>
and enables several features:</p>
<ul>
-
+
<li>the ability to get all meta alerts associated with an alert</li>
-
<li>creation of a meta alert</li>
-
<li>adding alerts to a meta alert</li>
-
<li>removing alerts from a meta alert</li>
-
<li>changing a meta alert’s status</li>
</ul>
-<p>The implementation of this is to denormalize the relationship between
alerts and meta alerts, and store alerts as a nested field within a meta alert.
The use of nested fields is to avoid the limitations of parent-child
relationships (one-to-many) and merely linking by IDs (which causes issues with
pagination as a result of being unable to join indices). A list of containing
meta alerts is stored on an alert for the purpose of keeping source alerts and
alerts contained in meta alerts in sync.</p>
+<p>The implementation of this is to denormalize the relationship between
alerts and meta alerts, and store alerts as a nested field within a meta alert.
The use of nested fields is to avoid the limitations of parent-child
relationships (one-to-many) and merely linking by IDs (which causes issues with
pagination as a result of being unable to join indices). A list of containing
meta alerts is stored on an alert for the purpose of keeping source alerts and
alerts contained in meta alerts in sync.</p>
<p>The search functionality of <tt>IndexDao</tt> is wrapped by the
<tt>MetaAlertDao</tt> in order to provide both regular and meta alerts
side-by-side with sorting. The updating capabilities are similarly wrapped, in
order to ensure updates are carried through both the alerts and associated meta
alerts. Both of these functions are handled under the hood.</p>
<p>In addition, API endpoints have been added to expose the features listed
above. The denormalization handles the case of going from meta alert to alert
automatically.</p>
<p><a name="Notes_on_Performance_Tuning"></a></p>
<h1>Notes on Performance Tuning</h1>
-<p>Default installed Metron is untuned for production deployment. By far and
wide, the most likely piece to require TLC from a performance perspective is
the indexing layer. An index that does not keep up will back up and you will
see errors in the kafka bolt. There are a few knobs to tune to get the most out
of your system.</p></div></div>
+<p>Default installed Metron is untuned for production deployment. By far and
wide, the most likely piece to require TLC from a performance perspective is
the indexing layer. An index that does not keep up will back up and you will
see errors in the kafka bolt. There are a few knobs to tune to get the most
out of your system.</p></div></div>
<div class="section">
<h2><a name="Kafka_Queue"></a>Kafka Queue</h2>
-<p>The <tt>indexing</tt> kafka queue is a collection point from the enrichment
topology. As such, make sure that the number of partitions in the kafka topic
is sufficient to handle the throughput that you expect.</p></div>
+<p>The <tt>indexing</tt> kafka queue is a collection point from the enrichment
topology. As such, make sure that the number of partitions in the kafka topic
is sufficient to handle the throughput that you expect.</p></div>
<div class="section">
<h2><a name="Indexing_Topology"></a>Indexing Topology</h2>
-<p>The <tt>indexing</tt> topology as started by the
<tt>$METRON_HOME/bin/start_elasticsearch_topology.sh</tt> or
<tt>$METRON_HOME/bin/start_solr_topology.sh</tt> script uses a default of one
executor per bolt. In a real production system, this should be customized by
modifying the flux file in <tt>$METRON_HOME/flux/indexing/remote.yaml</tt>. </p>
-
+<p>The <tt>indexing</tt> topology as started by the
<tt>$METRON_HOME/bin/start_elasticsearch_topology.sh</tt> or
<tt>$METRON_HOME/bin/start_solr_topology.sh</tt> script uses a default of one
executor per bolt. In a real production system, this should be customized by
modifying the flux file in <tt>$METRON_HOME/flux/indexing/remote.yaml</tt>.</p>
<ul>
-
-<li>Add a <tt>parallelism</tt> field to the bolts to give Storm a parallelism
hint for the various components. Give bolts which appear to be bottlenecks
(e.g. the indexing bolt) a larger hint.</li>
-
+
+<li>Add a <tt>parallelism</tt> field to the bolts to give Storm a parallelism
hint for the various components. Give bolts which appear to be bottlenecks
(e.g. the indexing bolt) a larger hint.</li>
<li>Add a <tt>parallelism</tt> field to the kafka spout which matches the
number of partitions for the enrichment kafka queue.</li>
-
-<li>Adjust the number of workers for the topology by adjusting the
<tt>topology.workers</tt> field for the topology.</li>
+<li>Adjust the number of workers for the topology by adjusting the
<tt>topology.workers</tt> field for the topology.</li>
</ul>
<p>Finally, if workers and executors are new to you or you don’t know
where to modify the flux file, the following might be of use to you:</p>
-
<ul>
-
+
<li><a class="externalLink"
href="http://www.michael-noll.com/blog/2012/10/16/understanding-the-parallelism-of-a-storm-topology/">Understanding
the Parallelism of a Storm Topology</a></li>
-
<li><a class="externalLink"
href="http://storm.apache.org/releases/current/flux.html">Flux Docs</a></li>
-</ul></div>
+</ul>
+<div class="section">
+<h3><a name="Rest_endpoints"></a>Rest endpoints</h3>
+<p>There are rest endpoints available to perform operations like start, stop,
activate, deactivate on the <tt>indexing</tt> topologies.</p>
+<table border="0" class="table table-striped">
+<thead>
+
+<tr class="a">
+<th> </th></tr>
+</thead><tbody>
+
+<tr class="b">
+<td> <a
href="../../metron-interface/metron-rest/index.html#GET_apiv1stormindexingbatch">
<tt>GET /api/v1/storm/indexing/batch</tt></a></td></tr>
+<tr class="a">
+<td> <a
href="../../metron-interface/metron-rest/index.html#GET_apiv1stormindexingbatchactivate">
<tt>GET /api/v1/storm/indexing/batch/activate</tt></a></td></tr>
+<tr class="b">
+<td> <a
href="../../metron-interface/metron-rest/index.html#GET_apiv1stormindexingbatchdeactivate">
<tt>GET /api/v1/storm/indexing/batch/deactivate</tt></a></td></tr>
+<tr class="a">
+<td> <a
href="../../metron-interface/metron-rest/index.html#GET_apiv1stormindexingbatchstart">
<tt>GET /api/v1/storm/indexing/batch/start</tt></a></td></tr>
+<tr class="b">
+<td> <a
href="../../metron-interface/metron-rest/index.html#GET_apiv1stormindexingbatchstop">
<tt>GET /api/v1/storm/indexing/batch/stop</tt></a></td></tr>
+<tr class="a">
+<td> <a
href="../../metron-interface/metron-rest/index.html#GET_apiv1stormindexingrandomaccess">
<tt>GET /api/v1/storm/indexing/randomaccess</tt></a></td></tr>
+<tr class="b">
+<td> <a
href="../../metron-interface/metron-rest/index.html#GET_apiv1stormindexingrandomaccessactivate">
<tt>GET /api/v1/storm/indexing/randomaccess/activate</tt></a></td></tr>
+<tr class="a">
+<td> <a
href="../../metron-interface/metron-rest/index.html#GET_apiv1stormindexingrandomaccessdeactivate">
<tt>GET /api/v1/storm/indexing/randomaccess/deactivate</tt></a></td></tr>
+<tr class="b">
+<td> <a
href="../../metron-interface/metron-rest/index.html#GET_apiv1stormindexingrandomaccessstart">
<tt>GET /api/v1/storm/indexing/randomaccess/start</tt></a></td></tr>
+<tr class="a">
+<td> <a
href="../../metron-interface/metron-rest/index.html#GET_apiv1stormindexingrandomaccessstop">
<tt>GET /api/v1/storm/indexing/randomaccess/stop</tt></a></td></tr>
+</tbody>
+</table></div></div>
<div class="section">
<h2><a name="Zeppelin_Notebooks"></a>Zeppelin Notebooks</h2>
-<p>Zeppelin notebooks can be added to <tt>/src/main/config/zeppelin/</tt> (and
subdirectories can be created for organization). The placed files must be .json
files and be named appropriately. These files must be added to the metron.spec
file and the RPMs rebuilt to be available to be loaded into Ambari.</p>
+<p>Zeppelin notebooks can be added to <tt>/src/main/config/zeppelin/</tt> (and
subdirectories can be created for organization). The placed files must be
.json files and be named appropriately. These files must be added to the
metron.spec file and the RPMs rebuilt to be available to be loaded into
Ambari.</p>
<p>The notebook files will be found on the server in
<tt>$METRON_HOME/config/zeppelin</tt></p>
<p>The Ambari Management Pack has a custom action to load these templates,
ZEPPELIN_DASHBOARD_INSTALL, that will import them into Zeppelin.</p></div>
- </div>
- </div>
- </div>
-
+ </div>
+ </div>
+ </div>
<hr/>
-
<footer>
- <div class="container-fluid">
- <div class="row span12">Copyright © 2018
- <a href="https://www.apache.org">The Apache Software
Foundation</a>.
- All Rights Reserved.
-
+ <div class="container-fluid">
+ <div class="row-fluid">
+é 2015-2016 The Apache Software Foundation. Apache Metron, Metron, Apache,
the Apache feather logo,
+ and the Apache Metron project logo are trademarks of The Apache
Software Foundation.
+ </div>
</div>
-
-
-
- </div>
</footer>
</body>
</html>