Added: release/metron/0.4.2/site-book/metron-platform/metron-common/index.html
==============================================================================
--- release/metron/0.4.2/site-book/metron-platform/metron-common/index.html 
(added)
+++ release/metron/0.4.2/site-book/metron-platform/metron-common/index.html Wed 
Jan  3 18:25:57 2018
@@ -0,0 +1,877 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia at 2017-12-08
+ | Rendered using Apache Maven Fluido Skin 1.3.0
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20171208" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>Metron &#x2013; Contents</title>
+    <link rel="stylesheet" href="../../css/apache-maven-fluido-1.3.0.min.css" 
/>
+    <link rel="stylesheet" href="../../css/site.css" />
+    <link rel="stylesheet" href="../../css/print.css" media="print" />
+
+      
+    <script type="text/javascript" 
src="../../js/apache-maven-fluido-1.3.0.min.js"></script>
+
+                          
+        
+<script type="text/javascript">$( document ).ready( function() { $( 
'.carousel' ).carousel( { interval: 3500 } ) } );</script>
+          
+            </head>
+        <body class="topBarDisabled">
+          
+                
+                    
+    
+        <div class="container-fluid">
+          <div id="banner">
+        <div class="pull-left">
+                                    <a href="http://metron.apache.org/"; 
id="bannerLeft">
+                                                                               
                 <img src="../../images/metron-logo.png"  alt="Apache Metron" 
width="148px" height="48px"/>
+                </a>
+                      </div>
+        <div class="pull-right">  </div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+                
+                    
+                              <li class="">
+                    <a href="http://www.apache.org"; class="externalLink" 
title="Apache">
+        Apache</a>
+        </li>
+      <li class="divider ">/</li>
+            <li class="">
+                    <a href="http://metron.apache.org/"; class="externalLink" 
title="Metron">
+        Metron</a>
+        </li>
+      <li class="divider ">/</li>
+            <li class="">
+                    <a href="../../index.html" title="Documentation">
+        Documentation</a>
+        </li>
+      <li class="divider ">/</li>
+        <li class="">Contents</li>
+        
+                
+                    
+                  <li id="publishDate" class="pull-right">Last Published: 
2017-12-08</li> <li class="divider pull-right">|</li>
+              <li id="projectVersion" class="pull-right">Version: 0.4.2</li>
+            
+                            </ul>
+      </div>
+
+            
+      <div class="row-fluid">
+        <div id="leftColumn" class="span3">
+          <div class="well sidebar-nav">
+                
+                    
+                <ul class="nav nav-list">
+                    <li class="nav-header">User Documentation</li>
+                                                                               
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                      
                                                                          
+      <li>
+    
+                          <a href="../../index.html" title="Metron">
+          <i class="icon-chevron-down"></i>
+        Metron</a>
+                    <ul class="nav nav-list">
+                      
+      <li>
+    
+                          <a href="../../Upgrading.html" title="Upgrading">
+          <i class="none"></i>
+        Upgrading</a>
+            </li>
+                                                                               
                                                                       
+      <li>
+    
+                          <a href="../../metron-analytics/index.html" 
title="Analytics">
+          <i class="icon-chevron-right"></i>
+        Analytics</a>
+                  </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-contrib/metron-docker/index.html" title="Docker">
+          <i class="none"></i>
+        Docker</a>
+            </li>
+                                                                               
                                                                                
                                                                                
                                                                                
                                                                             
+      <li>
+    
+                          <a href="../../metron-deployment/index.html" 
title="Deployment">
+          <i class="icon-chevron-right"></i>
+        Deployment</a>
+                  </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-interface/metron-alerts/index.html" title="Alerts">
+          <i class="none"></i>
+        Alerts</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-interface/metron-config/index.html" title="Config">
+          <i class="none"></i>
+        Config</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-interface/metron-rest/index.html" title="Rest">
+          <i class="none"></i>
+        Rest</a>
+            </li>
+                                                                               
                                                                                
                                                                                
                                               
+      <li>
+    
+                          <a href="../../metron-platform/index.html" 
title="Platform">
+          <i class="icon-chevron-down"></i>
+        Platform</a>
+                    <ul class="nav nav-list">
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/Performance-tuning-guide.html" 
title="Performance-tuning-guide">
+          <i class="none"></i>
+        Performance-tuning-guide</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-api/index.html" title="Api">
+          <i class="none"></i>
+        Api</a>
+            </li>
+                      
+      <li class="active">
+    
+            <a href="#"><i class="none"></i>Common</a>
+          </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-data-management/index.html" 
title="Data-management">
+          <i class="none"></i>
+        Data-management</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-elasticsearch/index.html" 
title="Elasticsearch">
+          <i class="none"></i>
+        Elasticsearch</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-enrichment/index.html" title="Enrichment">
+          <i class="none"></i>
+        Enrichment</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-indexing/index.html" title="Indexing">
+          <i class="none"></i>
+        Indexing</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-management/index.html" title="Management">
+          <i class="none"></i>
+        Management</a>
+            </li>
+                                                                        
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-parsers/index.html" title="Parsers">
+          <i class="icon-chevron-right"></i>
+        Parsers</a>
+                  </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-pcap-backend/index.html" 
title="Pcap-backend">
+          <i class="none"></i>
+        Pcap-backend</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-writer/index.html" title="Writer">
+          <i class="none"></i>
+        Writer</a>
+            </li>
+              </ul>
+        </li>
+                                                                               
           
+      <li>
+    
+                          <a href="../../metron-sensors/index.html" 
title="Sensors">
+          <i class="icon-chevron-right"></i>
+        Sensors</a>
+                  </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-stellar/stellar-3rd-party-example/index.html" 
title="Stellar-3rd-party-example">
+          <i class="none"></i>
+        Stellar-3rd-party-example</a>
+            </li>
+                                                                        
+      <li>
+    
+                          <a 
href="../../metron-stellar/stellar-common/index.html" title="Stellar-common">
+          <i class="icon-chevron-right"></i>
+        Stellar-common</a>
+                  </li>
+                                                                               
           
+      <li>
+    
+                          <a href="../../use-cases/index.html" 
title="Use-cases">
+          <i class="icon-chevron-right"></i>
+        Use-cases</a>
+                  </li>
+              </ul>
+        </li>
+            </ul>
+                
+                    
+                
+          <hr class="divider" />
+
+           <div id="poweredBy">
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                             <a href="http://maven.apache.org/"; title="Built 
by Maven" class="poweredBy">
+        <img class="builtBy" alt="Built by Maven" 
src="../../images/logos/maven-feather.png" />
+      </a>
+                  </div>
+          </div>
+        </div>
+        
+                
+        <div id="bodyColumn"  class="span9" >
+                                  
+            <h1>Contents</h1>
+<p><a name="Contents"></a></p>
+
+<ul>
+  
+<li><a href="#Stellar_Language">Stellar Language</a></li>
+  
+<li><a href="#Global_Configuration">Global Configuration</a></li>
+  
+<li><a href="#Validation_Framework">Validation Framework</a></li>
+  
+<li><a href="#Management_Utility">Management Utility</a></li>
+  
+<li><a href="topology-errors/index.html">Topology Errors</a></li>
+  
+<li><a href="#Performance_Logging">Performance Logging</a></li>
+</ul>
+<p><a name="Stellar_Language"></a></p>
+<h1>Stellar Language</h1>
+<p>For a variety of components (threat intelligence triage and field 
transformations) we have the need to do simple computation and transformation 
using the data from messages as variables.<br />For those purposes, there 
exists a simple, scaled down DSL created to do simple computation and 
transformation.</p>
+<p>The query language supports the following:</p>
+
+<ul>
+  
+<li>Referencing fields in the enriched JSON</li>
+  
+<li>String literals are quoted with either <tt>'</tt> or <tt>&quot;</tt>, and 
support escaping for <tt>'</tt>, <tt>&quot;</tt>, <tt>\t</tt>, <tt>\r</tt>, 
<tt>\n</tt>, and backslash</li>
+  
+<li>Simple boolean operations: <tt>and</tt>, <tt>not</tt>, <tt>or</tt>
+  
+<ul>
+    
+<li>Boolean expressions are short-circuited (e.g. <tt>true or FUNC()</tt> 
would never execute <tt>FUNC</tt>)</li>
+  </ul></li>
+  
+<li>Simple arithmetic operations: <tt>*</tt>, <tt>/</tt>, <tt>+</tt>, 
<tt>-</tt> on real numbers or integers</li>
+  
+<li>Simple comparison operations <tt>&lt;</tt>, <tt>&gt;</tt>, <tt>&lt;=</tt>, 
<tt>&gt;=</tt></li>
+  
+<li>Simple equality comparison operations <tt>==</tt>, <tt>!=</tt></li>
+  
+<li>if/then/else comparisons (i.e. <tt>if var1 &lt; 10 then 'less than 10' 
else '10 or more'</tt>)</li>
+  
+<li>Determining whether a field exists (via <tt>exists</tt>)</li>
+  
+<li>An <tt>in</tt> operator that works like the <tt>in</tt> in Python</li>
+  
+<li>The ability to have parenthesis to make order of operations explicit</li>
+  
+<li>User defined functions, including Lambda expressions</li>
+</ul>
+<p>For documentation of Stellar, please see the <a 
href="../../metron-stellar/stellar-common/index.html">Stellar README</a>.</p>
+<p><a name="Global_Configuration"></a></p>
+<h1>Global Configuration</h1>
+<p>The format of the global enrichment is a JSON String to Object map. This is 
intended for configuration which is non sensor specific configuration.</p>
+<p>This configuration is stored in zookeeper, but looks something like</p>
+
+<div class="source">
+<div class="source">
+<pre>{
+  &quot;es.clustername&quot;: &quot;metron&quot;,
+  &quot;es.ip&quot;: &quot;node1&quot;,
+  &quot;es.port&quot;: &quot;9300&quot;,
+  &quot;es.date.format&quot;: &quot;yyyy.MM.dd.HH&quot;,
+  &quot;parser.error.topic&quot;: &quot;indexing&quot;,
+  &quot;fieldValidations&quot; : [
+              {
+                &quot;input&quot; : [ &quot;ip_src_addr&quot;, 
&quot;ip_dst_addr&quot; ],
+                &quot;validation&quot; : &quot;IP&quot;,
+                &quot;config&quot; : {
+                    &quot;type&quot; : &quot;IPV4&quot;
+                           }
+              } 
+                       ]
+}
+</pre></div></div>
+<p>Various parts of our stack uses the global config are documented throughout 
the Metron documentation, but a convenient index is provided here:</p>
+
+<table border="0" class="table table-striped">
+  <thead>
+    
+<tr class="a">
+      
+<th>Property Name </th>
+      
+<th>Subsystem </th>
+      
+<th>Type </th>
+      
+<th>Ambari Property </th>
+    </tr>
+  </thead>
+  <tbody>
+    
+<tr class="b">
+      
+<td><a 
href="../metron-elasticsearch/index.html#es.clustername"><tt>es.clustername</tt></a>
 </td>
+      
+<td>Indexing </td>
+      
+<td>String </td>
+      
+<td><tt>es_cluster_name</tt> </td>
+    </tr>
+    
+<tr class="a">
+      
+<td><a href="../metron-elasticsearch/index.html#es.ip"><tt>es.ip</tt></a> </td>
+      
+<td>Indexing </td>
+      
+<td>String </td>
+      
+<td><tt>es_hosts</tt> </td>
+    </tr>
+    
+<tr class="b">
+      
+<td><a href="../metron-elasticsearch/index.html#es.port"><tt>es.port</tt></a> 
</td>
+      
+<td>Indexing </td>
+      
+<td>String </td>
+      
+<td><tt>es_port</tt> </td>
+    </tr>
+    
+<tr class="a">
+      
+<td><a 
href="../metron-elasticsearch/index.html#es.date.format"><tt>es.date.format</tt></a>
 </td>
+      
+<td>Indexing </td>
+      
+<td>String </td>
+      
+<td><tt>es_date_format</tt> </td>
+    </tr>
+    
+<tr class="b">
+      
+<td><a href="#validation-framework"><tt>fieldValidations</tt></a> </td>
+      
+<td>Parsing </td>
+      
+<td>Object </td>
+      
+<td>N/A </td>
+    </tr>
+    
+<tr class="a">
+      
+<td><a 
href="../metron-parsers/index.html#parser.error.topic"><tt>parser.error.topic</tt></a>
 </td>
+      
+<td>Parsing </td>
+      
+<td>String </td>
+      
+<td>N/A </td>
+    </tr>
+    
+<tr class="b">
+      
+<td><a 
href="../../metron-stellar/stellar-common/index.html#stellar.function.paths"><tt>stellar.function.paths</tt></a>
 </td>
+      
+<td>Stellar </td>
+      
+<td>CSV String </td>
+      
+<td>N/A </td>
+    </tr>
+    
+<tr class="a">
+      
+<td><a 
href="../../metron-stellar/stellar-common/index.html#stellarfunctionresolverincludesexcludes"><tt>stellar.function.resolver.includes</tt></a>
 </td>
+      
+<td>Stellar </td>
+      
+<td>CSV String </td>
+      
+<td>N/A </td>
+    </tr>
+    
+<tr class="b">
+      
+<td><a 
href="../../metron-stellar/stellar-common/index.html#stellarfunctionresolverincludesexcludes"><tt>stellar.function.resolver.excludes</tt></a>
 </td>
+      
+<td>Stellar </td>
+      
+<td>CSV String </td>
+      
+<td>N/A </td>
+    </tr>
+    
+<tr class="a">
+      
+<td><a 
href="../../metron-analytics/metron-profiler/index.html#profiler.period.duration"><tt>profiler.period.duration</tt></a>
 </td>
+      
+<td>Profiler </td>
+      
+<td>Integer </td>
+      
+<td><tt>profiler_period_duration</tt> </td>
+    </tr>
+    
+<tr class="b">
+      
+<td><a 
href="../../metron-analytics/metron-profiler/index.html#profiler.period.duration.units"><tt>profiler.period.duration.units</tt></a>
 </td>
+      
+<td>Profiler </td>
+      
+<td>String </td>
+      
+<td><tt>profiler_period_units</tt> </td>
+    </tr>
+    
+<tr class="a">
+      
+<td><a 
href="../metron-indexing/index.html#update.hbase.table"><tt>update.hbase.table</tt></a>
 </td>
+      
+<td>REST/Indexing </td>
+      
+<td>String </td>
+      
+<td><tt>update_hbase_table</tt> </td>
+    </tr>
+    
+<tr class="b">
+      
+<td><a 
href="../metron-indexing/index.html#update.hbase.cf"><tt>update.hbase.cf</tt></a>
 </td>
+      
+<td>REST/Indexing </td>
+      
+<td>String </td>
+      
+<td><tt>update_hbase_cf</tt> </td>
+    </tr>
+    
+<tr class="a">
+      
+<td><a 
href="../metron-enrichment/index.html#geo.hdfs.file"><tt>geo.hdfs.file</tt></a> 
</td>
+      
+<td>Enrichment </td>
+      
+<td>String </td>
+      
+<td><tt>geo_hdfs_file</tt> </td>
+    </tr>
+  </tbody>
+</table>
+<div class="section">
+<h2><a name="Note_Configs_in_Ambari"></a>Note Configs in Ambari</h2>
+<p>If a field is managed via ambari, you should change the field via ambari. 
Otherwise, upon service restarts, you may find your update overwritten.</p>
+<p><a name="Validation_Framework"></a></p>
+<h1>Validation Framework</h1>
+<p>Inside of the global configuration, there is a validation framework in 
place that enables the validation that messages coming from all parsers are 
valid. This is done in the form of validation plugins where assertions about 
fields or whole messages can be made. </p>
+<p>The format for this is a <tt>fieldValidations</tt> field inside of global 
config. This is associated with an array of field validation objects structured 
like so:</p>
+
+<ul>
+  
+<li><tt>input</tt> : An array of input fields or a single field. If this is 
omitted, then the whole messages is passed to the validator.</li>
+  
+<li><tt>config</tt> : A String to Object map for validation configuration. 
This is optional if the validation function requires no configuration.</li>
+  
+<li><tt>validation</tt> : The validation function to be used. This is one of
+  
+<ul>
+    
+<li><tt>STELLAR</tt> : Execute a Stellar Language statement. Expects the query 
string in the <tt>condition</tt> field of the config.</li>
+    
+<li><tt>IP</tt> : Validates that the input fields are an IP address. By 
default, if no configuration is set, it assumes <tt>IPV4</tt>, but you can 
specify the type by passing in the config by passing in <tt>type</tt> with 
either <tt>IPV6</tt> or <tt>IPV4</tt> or by passing in a list 
[<tt>IPV4</tt>,<tt>IPV6</tt>] in which case the input(s) will be validated 
against both.</li>
+    
+<li><tt>DOMAIN</tt> : Validates that the fields are all domains.</li>
+    
+<li><tt>EMAIL</tt> : Validates that the fields are all email addresses</li>
+    
+<li><tt>URL</tt> : Validates that the fields are all URLs</li>
+    
+<li><tt>DATE</tt> : Validates that the fields are a date. Expects 
<tt>format</tt> in the config.</li>
+    
+<li><tt>INTEGER</tt> : Validates that the fields are an integer. String 
representation of an integer is allowed.</li>
+    
+<li><tt>REGEX_MATCH</tt> : Validates that the fields match a regex. Expects 
<tt>pattern</tt> in the config.</li>
+    
+<li><tt>NOT_EMPTY</tt> : Validates that the fields exist and are not empty 
(after trimming.)</li>
+  </ul></li>
+</ul>
+<p><a name="Management_Utility"></a></p>
+<h1>Management Utility</h1>
+<p>Configurations should be stored on disk in the following structure starting 
at <tt>$BASE_DIR</tt>:</p>
+
+<ul>
+  
+<li>global.json : The global config</li>
+  
+<li><tt>sensors</tt> : The subdirectory containing sensor enrichment 
configuration JSON (e.g. <tt>snort.json</tt>, <tt>bro.json</tt>)</li>
+</ul>
+<p>By default, this directory as deployed by the ansible infrastructure is at 
<tt>$METRON_HOME/config/zookeeper</tt></p>
+<p>While the configs are stored on disk, they must be loaded into Zookeeper to 
be used. To this end, there is a utility program to assist in this called 
<tt>$METRON_HOME/bin/zk_load_config.sh</tt></p>
+<p>This has the following options:</p>
+
+<div class="source">
+<div class="source">
+<pre> -c,--config_type &lt;CONFIG_TYPE&gt;            The configuration type: 
GLOBAL,
+                                           PARSER, ENRICHMENT, INDEXING,
+                                           PROFILER
+ -f,--force                                Force operation
+ -h,--help                                 Generate Help screen
+ -i,--input_dir &lt;DIR&gt;                      The input directory containing
+                                           the configuration files named
+                                           like &quot;$source.json&quot;
+ -m,--mode &lt;MODE&gt;                          The mode of operation: DUMP,
+                                           PULL, PUSH, PATCH
+ -n,--config_name &lt;CONFIG_NAME&gt;            The configuration name: bro,
+                                           yaf, snort, squid, etc.
+ -o,--output_dir &lt;DIR&gt;                     The output directory which 
will
+                                           store the JSON configuration
+                                           from Zookeeper
+ -pk,--patch_key &lt;PATCH_KEY&gt;               The key to modify
+ -pm,--patch_mode &lt;PATCH_MODE&gt;             One of: ADD, REMOVE - relevant
+                                           only for key/value patches,
+                                           i.e. when a patch file is not
+                                           used.
+ -pf,--patch_file &lt;PATCH_FILE&gt;             Path to the patch file.
+ -pv,--patch_value &lt;PATCH_VALUE&gt;           Value to use in the patch.
+ -z,--zk_quorum &lt;host:port,[host:port]*&gt;   Zookeeper Quorum URL
+                                           (zk1:port,zk2:port,...)
+</pre></div></div>
+<p>Usage examples:</p>
+
+<ul>
+  
+<li>To dump the existing configs from zookeeper on the singlenode vagrant 
machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m DUMP</tt></li>
+  
+<li>To dump the existing GLOBAL configs from zookeeper on the singlenode 
vagrant machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m DUMP 
-c GLOBAL</tt></li>
+  
+<li>To push the configs into zookeeper on the singlenode vagrant machine: 
<tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m PUSH -i 
$METRON_HOME/config/zookeeper</tt></li>
+  
+<li>To push only the GLOBAL configs into zookeeper on the singlenode vagrant 
machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m PUSH -i 
$METRON_HOME/config/zookeeper -c GLOBAL</tt></li>
+  
+<li>To push only the PARSER configs into zookeeper on the singlenode vagrant 
machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m PUSH -i 
$METRON_HOME/config/zookeeper -c PARSER</tt></li>
+  
+<li>To push only the PARSER &#x2018;bro&#x2019; configs into zookeeper on the 
singlenode vagrant machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z 
node1:2181 -m PUSH -i $METRON_HOME/config/zookeeper -c PARSER -n bro</tt></li>
+  
+<li>To pull all configs from zookeeper to the singlenode vagrant machine disk: 
<tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m PULL -o 
$METRON_HOME/config/zookeeper -f</tt></li>
+</ul></div>
+<div class="section">
+<h2><a name="Patching_mechanism"></a>Patching mechanism</h2>
+<p>The configuration management utility leverages a JSON patching library that 
conforms to <a class="externalLink" 
href="https://tools.ietf.org/html/rfc6902";>RFC-6902 spec</a>. We&#x2019;re 
using the zjsonpatch library implementation from here - <a class="externalLink" 
href="https://github.com/flipkart-incubator/zjsonpatch";>https://github.com/flipkart-incubator/zjsonpatch</a>.
 There are a couple options for leveraging patching. You can choose to patch 
the Zookeeper config via patch file:</p>
+<p><tt>$METRON_HOME/bin/zk_load_configs.sh -z $ZOOKEEPER -m PATCH -c GLOBAL 
-pf /tmp/mypatch.txt</tt></p>
+<p>or key/value pair:</p>
+<p><tt>$METRON_HOME/bin/zk_load_configs.sh -z $ZOOKEEPER -m PATCH -c GLOBAL 
-pm ADD -pk foo -pv \&quot;\&quot;bar\&quot;\&quot;</tt></p>
+<p>The options exposed via patch file are the full range of options from 
RFC-6902:</p>
+
+<ul>
+  
+<li>ADD</li>
+  
+<li>REMOVE</li>
+  
+<li>REPLACE</li>
+  
+<li>MOVE</li>
+  
+<li>COPY</li>
+  
+<li>TEST</li>
+</ul>
+<p>whereas with key/value patching, we only current expose ADD and REMOVE. 
Note that ADD will function as a REPLACE when the key already exists.</p>
+<div class="section">
+<h3><a name="Patch_File"></a>Patch File</h3>
+<p>Let&#x2019;s say we want to add a complex JSON object to our configuration 
with a patch file. e.g.</p>
+
+<div class="source">
+<div class="source">
+<pre>&quot;foo&quot; : {
+    &quot;bar&quot; : {
+      &quot;baz&quot; : [ &quot;bazval1&quot;, &quot;bazval2&quot; ]
+    }
+  }
+</pre></div></div>
+<p>We would write a patch file &#x201c;/tmp/mypatch.txt&#x201d; with 
contents:</p>
+
+<div class="source">
+<div class="source">
+<pre>[
+    {
+        &quot;op&quot;: &quot;add&quot;,
+        &quot;path&quot;: &quot;/foo&quot;,
+        &quot;value&quot;: { &quot;bar&quot; : { &quot;baz&quot; : [ 
&quot;bazval1&quot;, &quot;bazval2&quot; ] } }
+    }
+]
+</pre></div></div>
+<p>And submit via zk_load_configs as follows:</p>
+
+<div class="source">
+<div class="source">
+<pre> $METRON_HOME/bin/zk_load_configs.sh -z $ZOOKEEPER -m PATCH -c GLOBAL -pf 
/tmp/mypatch.txt
+</pre></div></div></div>
+<div class="section">
+<h3><a name="Patch_KeyValue"></a>Patch Key/Value</h3>
+<p>Now let&#x2019;s try the same without using a patch file, instead using the 
patch_key and patch_value options right from the command line utility. This 
would like like the following.</p>
+
+<div class="source">
+<div class="source">
+<pre>$METRON_HOME/bin/zk_load_configs.sh -z $ZOOKEEPER -m PATCH -c GLOBAL -pm 
ADD -pk &quot;/foo&quot; -pv &quot;{ \&quot;bar\&quot; : { \&quot;baz\&quot; : 
[ \&quot;bazval1\&quot;, \&quot;bazval2\&quot; ] } }&quot;
+</pre></div></div></div>
+<div class="section">
+<h3><a name="Applying_Multiple_Patches"></a>Applying Multiple Patches</h3>
+<p>Applying multiple patches is also pretty straightforward. You can achieve 
this in a single command using patch files, or simply execute multiple commands 
in sequence using the patch_key/value approach.</p>
+<p>Let&#x2019;s say we wanted to add the following to our global config:</p>
+
+<div class="source">
+<div class="source">
+<pre>&quot;apache&quot; : &quot;metron&quot;,
+&quot;is&quot; : &quot;the best&quot;,
+&quot;streaming&quot; : &quot;analytics platform&quot;
+</pre></div></div>
+<p>and remove the /foo key from the previous example.</p>
+<p>Create a patch file /tmp/mypatch.txt with four separate patch 
operations.</p>
+
+<div class="source">
+<div class="source">
+<pre>[
+    {
+        &quot;op&quot;: &quot;remove&quot;,
+        &quot;path&quot;: &quot;/foo&quot;
+    },
+    {
+        &quot;op&quot;: &quot;add&quot;,
+        &quot;path&quot;: &quot;/apache&quot;,
+        &quot;value&quot;: &quot;metron&quot;
+    },
+    {
+        &quot;op&quot;: &quot;add&quot;,
+        &quot;path&quot;: &quot;/is&quot;,
+        &quot;value&quot;: &quot;the best&quot;
+    },
+    {
+        &quot;op&quot;: &quot;add&quot;,
+        &quot;path&quot;: &quot;/streaming&quot;,
+        &quot;value&quot;: &quot;analytics platform&quot;
+    }
+]
+</pre></div></div>
+<p>Now submit again and you should see a Global config with the 
&#x201c;foo&#x201d; key removed and three new keys added.</p>
+
+<div class="source">
+<div class="source">
+<pre> $METRON_HOME/bin/zk_load_configs.sh -z $ZOOKEEPER -m PATCH -c GLOBAL -pf 
/tmp/mypatch.txt
+</pre></div></div></div>
+<div class="section">
+<h3><a name="Notes_On_Patching"></a>Notes On Patching</h3>
+<p>For any given patch key, the last/leaf node in the key&#x2019;s parent 
<i>must</i> exist, otherwise an exception will be thrown. For example, if you 
want to add the following:</p>
+
+<div class="source">
+<div class="source">
+<pre>&quot;foo&quot;: {
+    &quot;bar&quot;: &quot;baz&quot;
+}
+</pre></div></div>
+<p>It is not sufficient to use /foo/bar as a key if foo does not already 
exist. You would either need to incrementally build the JSON and make this a 
two step process</p>
+
+<div class="source">
+<div class="source">
+<pre>[
+    {
+        &quot;op&quot;: &quot;add&quot;,
+        &quot;path&quot;: &quot;/foo&quot;,
+        &quot;value&quot;: { }
+    },
+    {
+        &quot;op&quot;: &quot;add&quot;,
+        &quot;path&quot;: &quot;/foo/bar&quot;,
+        &quot;value&quot;: &quot;baz&quot;
+    }
+]
+</pre></div></div>
+<p>Or provide the value as a complete JSON object.</p>
+
+<div class="source">
+<div class="source">
+<pre>[
+    {
+        &quot;op&quot;: &quot;add&quot;,
+        &quot;path&quot;: &quot;/foo&quot;,
+        &quot;value&quot;: { &quot;bar&quot; : &quot;baz&quot; }
+    }
+]
+</pre></div></div>
+<p>The REMOVE operation is idempotent. Running the remove command on the same 
key multiple times will not fail once the key has been removed.</p>
+<p><a name="Topology_Errors"></a></p>
+<h1>Topology Errors</h1>
+<p>Errors generated in Metron topologies are transformed into JSON format and 
follow this structure:</p>
+
+<div class="source">
+<div class="source">
+<pre>{
+  &quot;exception&quot;: &quot;java.lang.IllegalStateException: Unable to 
parse Message: ...&quot;,
+  &quot;failed_sensor_type&quot;: &quot;bro&quot;,
+  &quot;stack&quot;: &quot;java.lang.IllegalStateException: Unable to parse 
Message: ...&quot;,
+  &quot;hostname&quot;: &quot;node1&quot;,
+  &quot;source:type&quot;: &quot;error&quot;,
+  &quot;raw_message&quot;: &quot;{\&quot;http\&quot;: 
{\&quot;ts\&quot;:1488809627.000000.31915,\&quot;uid\&quot;:\&quot;C9JpSd2vFAWo3mXKz1\&quot;,
 ...&quot;,
+  &quot;error_hash&quot;: 
&quot;f7baf053f2d3c801a01d196f40f3468e87eea81788b2567423030100865c5061&quot;,
+  &quot;error_type&quot;: &quot;parser_error&quot;,
+  &quot;message&quot;: &quot;Unable to parse Message: {\&quot;http\&quot;: 
{\&quot;ts\&quot;:1488809627.000000.31915,\&quot;uid\&quot;:\&quot;C9JpSd2vFAWo3mXKz1\&quot;,
 ...&quot;,
+  &quot;timestamp&quot;: 1488809630698
+}
+</pre></div></div>
+<p>Each topology can be configured to send error messages to a specific Kafka 
topic. The parser topologies retrieve this setting from the the 
<tt>parser.error.topic</tt> setting in the global config:</p>
+
+<div class="source">
+<div class="source">
+<pre>{
+  &quot;es.clustername&quot;: &quot;metron&quot;,
+  &quot;es.ip&quot;: &quot;node1&quot;,
+  &quot;es.port&quot;: &quot;9300&quot;,
+  &quot;es.date.format&quot;: &quot;yyyy.MM.dd.HH&quot;,
+  &quot;parser.error.topic&quot;: &quot;indexing&quot;
+}
+</pre></div></div>
+<p>Error topics for enrichment and threat intel errors are passed into the 
enrichment topology as flux properties named <tt>enrichment.error.topic</tt> 
and <tt>threat.intel.error.topic</tt>. These properties can be found in 
<tt>$METRON_HOME/config/enrichment.properties</tt>.</p>
+<p>The error topic for indexing errors is passed into the indexing topology as 
a flux property named <tt>index.error.topic</tt>. This property can be found in 
either <tt>$METRON_HOME/config/elasticsearch.properties</tt> or 
<tt>$METRON_HOME/config/solr.properties</tt> depending on the search engine 
selected.</p>
+<p>By default all error messages are sent to the <tt>indexing</tt> topic so 
that they are indexed and archived, just like other messages. The indexing 
config for error messages can be found at 
<tt>$METRON_HOME/config/zookeeper/indexing/error.json</tt>.</p>
+<p><a name="Performance_Logging"></a></p>
+<h1>Performance Logging</h1>
+<p>The PerformanceLogger class provides functionality that enables developers 
to debug performance issues. Basic usage looks like the following:</p>
+
+<div class="source">
+<div class="source">
+<pre>// create a simple inner performance class to use for logger instantiation
+public static class Perf {}
+// instantiation
+PerformnanceLogger perfLog = new PerformanceLogger(() -&gt; 
getConfigurations().getGlobalConfig(), Perf.class.getName());
+// marking a start time
+perfLog.mark(&quot;mark1&quot;);
+// ...do some high performance stuff...
+// log the elapsed time
+perfLog.log(&quot;mark1&quot;, &quot;My high performance stuff is very 
performant&quot;);
+// log no additional message, just the basics
+perfLog.log(&quot;mark1&quot;);
+</pre></div></div>
+<p>The logger maintains a Map&lt;String, Long&gt; of named markers that 
correspond to start times. Calling mark() performs a put on the underlying 
timing store. Output includes the mark name, elapsed time in nanoseconds, as 
well as any custom messaging you provide. A sample log would look like the 
following:</p>
+
+<div class="source">
+<div class="source">
+<pre>[DEBUG] 
markName=execute,time(ns)=121411,message=key=7a8dbe44-4cb9-4db2-9d04-7632f543b56c,
 elapsed time to run execute
+</pre></div></div>
+<p><b>Configuration</b></p>
+<p>The first argument to the logger is a 
java.util.function.Supplier&lt;Map&lt;String, Object&gt;&gt;. The offers 
flexibility in being able to provide multiple configuration 
&#x201c;suppliers&#x201d; depending on your individual usage requirements. The 
example above, taken from 
org.apache.metron.enrichment.bolt.GenericEnrichmentBolt, leverages the global 
config to dymanically provide configuration from Zookeeper. Any updates to the 
global config via Zookeeper are reflected live at runtime. Currently, the 
PerformanceLogger supports the following options:</p>
+
+<table border="0" class="table table-striped">
+  <thead>
+    
+<tr class="a">
+      
+<th>Property Name </th>
+      
+<th>Type </th>
+      
+<th>Valid Values </th>
+    </tr>
+  </thead>
+  <tbody>
+    
+<tr class="b">
+      
+<td>performance.logging.percent.records </td>
+      
+<td>Integer </td>
+      
+<td>0-100 </td>
+    </tr>
+  </tbody>
+</table>
+<p><b>Other Usage Details</b></p>
+<p>You can also provide your own format String and provide arguments that will 
be used when formatting that String. This code avoids expensive String 
concatenation by only formatting when debugging is enabled. For more complex 
arguments, e.g. JSON serialization, we expose an isDebugEnabled() method.</p>
+
+<div class="source">
+<div class="source">
+<pre>// log with format String and single argument
+perfLog.log(&quot;join-message&quot;, &quot;key={}, elapsed time to join 
messages&quot;, key);
+
+// check if debugging is enabled for the performance logger to avoid more 
expensive operations
+if (perfLog.isDebugEnabled()) {
+    perfLog.log(&quot;join-message&quot;, &quot;key={}, elapsed time to join 
messages, message={}&quot;, key, rawMessage.toJSONString());
+}
+</pre></div></div>
+<p><b>Side Effects</b></p>
+<p>Calling the mark() method multiple times simply resets the start time to 
the current nano time. Calling log() with a non-existent mark name will log 0 
ns elapsed time with a warning indicating that log has been invoked for a mark 
name that does not exist. The class is not thread-safe and makes no attempt at 
keeping multiple threads from modifying the same markers.</p></div></div>
+                  </div>
+            </div>
+          </div>
+
+    <hr/>
+
+    <footer>
+            <div class="container-fluid">
+              <div class="row span12">Copyright &copy;                    2017
+                        <a href="https://www.apache.org";>The Apache Software 
Foundation</a>.
+            All Rights Reserved.      
+                    
+      </div>
+
+                          
+        
+                </div>
+    </footer>
+  </body>
+</html>

Added: 
release/metron/0.4.2/site-book/metron-platform/metron-data-management/index.html
==============================================================================
--- 
release/metron/0.4.2/site-book/metron-platform/metron-data-management/index.html
 (added)
+++ 
release/metron/0.4.2/site-book/metron-platform/metron-data-management/index.html
 Wed Jan  3 18:25:57 2018
@@ -0,0 +1,1069 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia at 2017-12-08
+ | Rendered using Apache Maven Fluido Skin 1.3.0
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20171208" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>Metron &#x2013; Resource Data Management</title>
+    <link rel="stylesheet" href="../../css/apache-maven-fluido-1.3.0.min.css" 
/>
+    <link rel="stylesheet" href="../../css/site.css" />
+    <link rel="stylesheet" href="../../css/print.css" media="print" />
+
+      
+    <script type="text/javascript" 
src="../../js/apache-maven-fluido-1.3.0.min.js"></script>
+
+                          
+        
+<script type="text/javascript">$( document ).ready( function() { $( 
'.carousel' ).carousel( { interval: 3500 } ) } );</script>
+          
+            </head>
+        <body class="topBarDisabled">
+          
+                
+                    
+    
+        <div class="container-fluid">
+          <div id="banner">
+        <div class="pull-left">
+                                    <a href="http://metron.apache.org/"; 
id="bannerLeft">
+                                                                               
                 <img src="../../images/metron-logo.png"  alt="Apache Metron" 
width="148px" height="48px"/>
+                </a>
+                      </div>
+        <div class="pull-right">  </div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+                
+                    
+                              <li class="">
+                    <a href="http://www.apache.org"; class="externalLink" 
title="Apache">
+        Apache</a>
+        </li>
+      <li class="divider ">/</li>
+            <li class="">
+                    <a href="http://metron.apache.org/"; class="externalLink" 
title="Metron">
+        Metron</a>
+        </li>
+      <li class="divider ">/</li>
+            <li class="">
+                    <a href="../../index.html" title="Documentation">
+        Documentation</a>
+        </li>
+      <li class="divider ">/</li>
+        <li class="">Resource Data Management</li>
+        
+                
+                    
+                  <li id="publishDate" class="pull-right">Last Published: 
2017-12-08</li> <li class="divider pull-right">|</li>
+              <li id="projectVersion" class="pull-right">Version: 0.4.2</li>
+            
+                            </ul>
+      </div>
+
+            
+      <div class="row-fluid">
+        <div id="leftColumn" class="span3">
+          <div class="well sidebar-nav">
+                
+                    
+                <ul class="nav nav-list">
+                    <li class="nav-header">User Documentation</li>
+                                                                               
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                      
                                                                          
+      <li>
+    
+                          <a href="../../index.html" title="Metron">
+          <i class="icon-chevron-down"></i>
+        Metron</a>
+                    <ul class="nav nav-list">
+                      
+      <li>
+    
+                          <a href="../../Upgrading.html" title="Upgrading">
+          <i class="none"></i>
+        Upgrading</a>
+            </li>
+                                                                               
                                                                       
+      <li>
+    
+                          <a href="../../metron-analytics/index.html" 
title="Analytics">
+          <i class="icon-chevron-right"></i>
+        Analytics</a>
+                  </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-contrib/metron-docker/index.html" title="Docker">
+          <i class="none"></i>
+        Docker</a>
+            </li>
+                                                                               
                                                                                
                                                                                
                                                                                
                                                                             
+      <li>
+    
+                          <a href="../../metron-deployment/index.html" 
title="Deployment">
+          <i class="icon-chevron-right"></i>
+        Deployment</a>
+                  </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-interface/metron-alerts/index.html" title="Alerts">
+          <i class="none"></i>
+        Alerts</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-interface/metron-config/index.html" title="Config">
+          <i class="none"></i>
+        Config</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-interface/metron-rest/index.html" title="Rest">
+          <i class="none"></i>
+        Rest</a>
+            </li>
+                                                                               
                                                                                
                                                                                
                                               
+      <li>
+    
+                          <a href="../../metron-platform/index.html" 
title="Platform">
+          <i class="icon-chevron-down"></i>
+        Platform</a>
+                    <ul class="nav nav-list">
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/Performance-tuning-guide.html" 
title="Performance-tuning-guide">
+          <i class="none"></i>
+        Performance-tuning-guide</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-api/index.html" title="Api">
+          <i class="none"></i>
+        Api</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-common/index.html" title="Common">
+          <i class="none"></i>
+        Common</a>
+            </li>
+                      
+      <li class="active">
+    
+            <a href="#"><i class="none"></i>Data-management</a>
+          </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-elasticsearch/index.html" 
title="Elasticsearch">
+          <i class="none"></i>
+        Elasticsearch</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-enrichment/index.html" title="Enrichment">
+          <i class="none"></i>
+        Enrichment</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-indexing/index.html" title="Indexing">
+          <i class="none"></i>
+        Indexing</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-management/index.html" title="Management">
+          <i class="none"></i>
+        Management</a>
+            </li>
+                                                                        
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-parsers/index.html" title="Parsers">
+          <i class="icon-chevron-right"></i>
+        Parsers</a>
+                  </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-pcap-backend/index.html" 
title="Pcap-backend">
+          <i class="none"></i>
+        Pcap-backend</a>
+            </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-platform/metron-writer/index.html" title="Writer">
+          <i class="none"></i>
+        Writer</a>
+            </li>
+              </ul>
+        </li>
+                                                                               
           
+      <li>
+    
+                          <a href="../../metron-sensors/index.html" 
title="Sensors">
+          <i class="icon-chevron-right"></i>
+        Sensors</a>
+                  </li>
+                      
+      <li>
+    
+                          <a 
href="../../metron-stellar/stellar-3rd-party-example/index.html" 
title="Stellar-3rd-party-example">
+          <i class="none"></i>
+        Stellar-3rd-party-example</a>
+            </li>
+                                                                        
+      <li>
+    
+                          <a 
href="../../metron-stellar/stellar-common/index.html" title="Stellar-common">
+          <i class="icon-chevron-right"></i>
+        Stellar-common</a>
+                  </li>
+                                                                               
           
+      <li>
+    
+                          <a href="../../use-cases/index.html" 
title="Use-cases">
+          <i class="icon-chevron-right"></i>
+        Use-cases</a>
+                  </li>
+              </ul>
+        </li>
+            </ul>
+                
+                    
+                
+          <hr class="divider" />
+
+           <div id="poweredBy">
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                             <a href="http://maven.apache.org/"; title="Built 
by Maven" class="poweredBy">
+        <img class="builtBy" alt="Built by Maven" 
src="../../images/logos/maven-feather.png" />
+      </a>
+                  </div>
+          </div>
+        </div>
+        
+                
+        <div id="bodyColumn"  class="span9" >
+                                  
+            <h1>Resource Data Management</h1>
+<p><a name="Resource_Data_Management"></a></p>
+<p>This project is a collection of classes to assist with loading of various 
enrichment and threat intelligence sources into Metron.</p>
+<div class="section">
+<h2><a name="Simple_HBase_EnrichmentsThreat_Intelligence"></a>Simple HBase 
Enrichments/Threat Intelligence</h2>
+<p>The vast majority of enrichments and threat intelligence processing tend 
toward the following pattern:</p>
+
+<ul>
+  
+<li>Take a field</li>
+  
+<li>Look up the field in a key/value store</li>
+  
+<li>If the key exists, then either it&#x2019;s a threat to be alerted or it 
should be enriched with the value associated with the key.</li>
+</ul>
+<p>As such, we have created this capability as a default threat intel and 
enrichment adapter. The basic primitive for simple enrichments and threat 
intelligence sources is a complex key containing the following:</p>
+
+<ul>
+  
+<li>Type : The type of threat intel or enrichment (e.g. malicious_ip)</li>
+  
+<li>Indicator : The indicator in question</li>
+  
+<li>Value : The value to associate with the type, indicator pair. This is a 
JSON map.</li>
+</ul>
+<p>At present, all of the dataloads utilities function by converting raw data 
sources to this primitive key (type, indicator) and value to be placed in 
HBase.</p>
+<p>In the case of threat intel, a hit on the threat intel table will result 
in:</p>
+
+<ul>
+  
+<li>The <tt>is_alert</tt> field being set to <tt>true</tt> in the index</li>
+  
+<li>A field named 
<tt>threatintels.hbaseThreatIntel.$field.$threatintel_type</tt> is set to 
<tt>alert</tt>
+  
+<ul>
+    
+<li><tt>$field</tt> is the field in the original document that was a match 
(e.g. <tt>src_ip_addr</tt>)</li>
+    
+<li><tt>$threatintel_type</tt> is the type of threat intel imported (defined 
in the Extractor configuration below).</li>
+  </ul></li>
+</ul>
+<p>In the case of simple hbase enrichment, a hit on the enrichments table will 
result in the following new field for each key in the 
value:<tt>enrichments.hbaseEnrichment.$field.$enrichment_type.$key</tt> </p>
+
+<ul>
+  
+<li><tt>$field</tt> is the field in the original document that was a match 
(e.g. <tt>src_ip_addr</tt>)</li>
+  
+<li><tt>$enrichment_type</tt> is the type of enrichment imported (defined in 
the Extractor configuration below).</li>
+  
+<li><tt>$key</tt> is a key in the JSON map associated with the row in 
HBase.</li>
+</ul>
+<p>For instance, in the situation where we had the following very silly 
key/value in HBase in the enrichment table:</p>
+
+<ul>
+  
+<li>indicator: <tt>127.0.0.1</tt></li>
+  
+<li>type : <tt>important_addresses</tt></li>
+  
+<li>value: <tt>{ &quot;name&quot; : &quot;localhost&quot;, 
&quot;location&quot; : &quot;home&quot; }</tt></li>
+</ul>
+<p>If we had a document whose <tt>ip_src_addr</tt> came through with a value 
of <tt>127.0.0.1</tt>, we would have the following fields added to the indexed 
document:</p>
+
+<ul>
+  
+<li><tt>enrichments.hbaseEnrichment.ip_src_addr.important_addresses.name</tt> 
: <tt>localhost</tt></li>
+  
+<li><tt>enrichments.hbaseEnrichment.ip_src_addr.important_addresses.location</tt>
 : <tt>home</tt></li>
+</ul></div>
+<div class="section">
+<h2><a name="Extractor_Framework"></a>Extractor Framework</h2>
+<p>For the purpose of ingesting data of a variety of formats, we have created 
an Extractor framework which allows for common data formats to be interpreted 
as enrichment or threat intelligence sources. The formats supported at present 
are:</p>
+
+<ul>
+  
+<li>CSV (both threat intel and enrichment)</li>
+  
+<li>STIX (threat intel only)</li>
+  
+<li>Custom (pass your own class)</li>
+</ul>
+<p>All of the current utilities take a JSON file to configure how to interpret 
input data. This JSON describes the type of data and the schema if necessary 
for the data if it is not fixed (as in STIX, e.g.).</p>
+<div class="section">
+<h3><a name="CSV_Extractor"></a>CSV Extractor</h3>
+<p>Consider the following example configuration file which describes how to 
process a CSV file.</p>
+
+<div class="source">
+<div class="source">
+<pre>{
+  &quot;config&quot; : {
+    &quot;columns&quot; : {
+         &quot;ip&quot; : 0
+        ,&quot;source&quot; : 2
+    }
+    ,&quot;indicator_column&quot; : &quot;ip&quot;
+    ,&quot;type&quot; : &quot;malicious_ip&quot;
+    ,&quot;separator&quot; : &quot;,&quot;
+  }
+  ,&quot;extractor&quot; : &quot;CSV&quot;
+}
+</pre></div></div>
+<p>In this example, we have instructed the extractor of the schema (i.e. the 
columns field), two columns at the first and third position. We have indicated 
that the <tt>ip</tt> column is the indicator type and that the enrichment type 
is named <tt>malicious_ip</tt>. We have also indicated that the extractor to 
use is the CSV Extractor. The other option is the STIX extractor or a fully 
qualified classname for your own extractor.</p>
+<p>The meta column values will show up in the value in HBase because it is 
called out as a non-indicator column. The key for the value will be 
&#x2018;meta&#x2019;. For instance, given an input string of 
<tt>123.45.123.12,something,the grapevine</tt>, the following key, value would 
be extracted:</p>
+
+<ul>
+  
+<li>Indicator : <tt>123.45.123.12</tt></li>
+  
+<li>Type : <tt>malicious_ip</tt></li>
+  
+<li>Value : <tt>{ &quot;ip&quot; : &quot;123.45.123.12&quot;, 
&quot;source&quot; : &quot;the grapevine&quot; }</tt></li>
+</ul></div>
+<div class="section">
+<h3><a name="STIX_Extractor"></a>STIX Extractor</h3>
+<p>Consider the following config for importing STIX documents. This is a 
threat intelligence interchange format, so it is particularly relevant and 
attractive data to import for our purposes. Because STIX is a standard format, 
there is no need to specify the schema or how to interpret the documents.</p>
+<p>We support the versions of Stix and Cybox supported by <a 
class="externalLink" 
href="https://github.com/STIXProject/java-stix/tree/v1.2.0.2";>java-stix</a>:</p>
+
+<ul>
+  
+<li>Stix - <a class="externalLink" 
href="https://github.com/STIXProject/schemas/blob/356cc4f6b06625465f0808388eb166807313b4e0/stix_core.xsd";>1.2</a>
 and earlier</li>
+  
+<li>Cybox - <a class="externalLink" 
href="https://github.com/CybOXProject/schemas/blob/97beb32c376a9223e91b52cb3e4c8d2af6baf786/cybox_core.xsd";>2.1</a>
 and earlier</li>
+</ul>
+<p>We support a subset of STIX messages for importation:</p>
+
+<table border="0" class="table table-striped">
+  <thead>
+    
+<tr class="a">
+      
+<th>STIX Type </th>
+      
+<th>Specific Type </th>
+      
+<th>Enrichment Type Name </th>
+    </tr>
+  </thead>
+  <tbody>
+    
+<tr class="b">
+      
+<td>Address </td>
+      
+<td>IPV_4_ADDR </td>
+      
+<td>address:IPV_4_ADDR </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>Address </td>
+      
+<td>IPV_6_ADDR </td>
+      
+<td>address:IPV_6_ADDR </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>Address </td>
+      
+<td>E_MAIL </td>
+      
+<td>address:E_MAIL </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>Address </td>
+      
+<td>MAC </td>
+      
+<td>address:MAC </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>Domain </td>
+      
+<td>FQDN </td>
+      
+<td>domain:FQDN </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>Hostname </td>
+      
+<td> </td>
+      
+<td>hostname </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>URI </td>
+      
+<td> </td>
+      
+<td>uriobjecttype </td>
+    </tr>
+  </tbody>
+</table>
+<p>NOTE: The enrichment type will be used as the type above.</p>
+<p>Consider the following configuration for an Extractor</p>
+
+<div class="source">
+<div class="source">
+<pre>{
+  &quot;config&quot; : {
+    &quot;stix_address_categories&quot; : &quot;IPV_4_ADDR&quot;
+  }
+  ,&quot;extractor&quot; : &quot;STIX&quot;
+}
+</pre></div></div>
+<p>In here, we&#x2019;re configuring the STIX extractor to load from a series 
of STIX files, however we only want to bring in IPv4 addresses from the set of 
all possible addresses. Note that if no categories are specified for import, 
all are assumed. Also, only address and domain types allow filtering via 
<tt>stix_address_categories</tt> and <tt>stix_domain_categories</tt> config 
parameters.</p></div>
+<div class="section">
+<h3><a name="Common_Extractor_Properties"></a>Common Extractor Properties</h3>
+<p>Users also have the ability to transform and filter enrichment and threat 
intel data using Stellar as it is loaded into HBase. This feature is available 
to all extractor types.</p>
+<p>As an example, we will be providing a CSV list of top domains as an 
enrichment and filtering the value metadata, as well as the indicator column, 
with Stellar expressions.</p>
+
+<div class="source">
+<div class="source">
+<pre>{
+  &quot;config&quot; : {
+    &quot;zk_quorum&quot; : &quot;node1:2181&quot;,
+    &quot;columns&quot; : {
+       &quot;rank&quot; : 0,
+       &quot;domain&quot; : 1
+    },
+    &quot;value_transform&quot; : {
+       &quot;domain&quot; : &quot;DOMAIN_REMOVE_TLD(domain)&quot;
+    },
+    &quot;value_filter&quot; : &quot;LENGTH(domain) &gt; 0&quot;,
+    &quot;indicator_column&quot; : &quot;domain&quot;,
+    &quot;indicator_transform&quot; : {
+       &quot;indicator&quot; : &quot;DOMAIN_REMOVE_TLD(indicator)&quot;
+    },
+    &quot;indicator_filter&quot; : &quot;LENGTH(indicator) &gt; 0&quot;,
+    &quot;type&quot; : &quot;top_domains&quot;,
+    &quot;separator&quot; : &quot;,&quot;
+  },
+  &quot;extractor&quot; : &quot;CSV&quot;
+}
+</pre></div></div>
+<p>There are 2 property maps that work with full Stellar expressions, and 2 
properties that will work with Stellar predicates.</p>
+
+<table border="0" class="table table-striped">
+  <thead>
+    
+<tr class="a">
+      
+<th>Property </th>
+      
+<th>Description</th>
+    </tr>
+  </thead>
+  <tbody>
+    
+<tr class="b">
+      
+<td>value_transform </td>
+      
+<td>Transform fields defined in the &#x201c;columns&#x201d; mapping with 
Stellar transformations. New keys introduced in the transform will be added to 
the key metadata.</td>
+    </tr>
+    
+<tr class="a">
+      
+<td>value_filter </td>
+      
+<td>Allows additional filtering with Stellar predicates based on results from 
the value transformations. In this example, records whose domain property is 
empty after removing the TLD will be omitted.</td>
+    </tr>
+    
+<tr class="b">
+      
+<td>indicator_transform </td>
+      
+<td>Transform the indicator column independent of the value transformations. 
You can refer to the original indicator value by using 
&#x201c;indicator&#x201d; as the variable name, as shown in the example above. 
In addition, if you prefer to piggyback your transformations, you can refer to 
the variable &#x201c;domain&#x201d;, which will allow your indicator transforms 
to inherit transformations done to this value during the value 
transformations.</td>
+    </tr>
+    
+<tr class="a">
+      
+<td>indicator_filter </td>
+      
+<td>Allows additional filtering with Stellar predicates based on results from 
the value transformations. In this example, records whose indicator value is 
empty after removing the TLD will be omitted.</td>
+    </tr>
+  </tbody>
+</table>
+<p>top-list.csv</p>
+
+<div class="source">
+<div class="source">
+<pre>1,google.com
+2,youtube.com
+...
+</pre></div></div>
+<p>Running a file import with the above data and extractor configuration would 
result in the following 2 extracted data records:</p>
+
+<table border="0" class="table table-striped">
+  <thead>
+    
+<tr class="a">
+      
+<th>Indicator </th>
+      
+<th>Type </th>
+      
+<th>Value </th>
+    </tr>
+  </thead>
+  <tbody>
+    
+<tr class="b">
+      
+<td>google </td>
+      
+<td>top_domains </td>
+      
+<td>{ &#x201c;rank&#x201d; : &#x201c;1&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;google&#x201d; } </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>yahoo </td>
+      
+<td>top_domains </td>
+      
+<td>{ &#x201c;rank&#x201d; : &#x201c;2&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;yahoo&#x201d; } </td>
+    </tr>
+  </tbody>
+</table>
+<p>Similar to the parser framework, providing a Zookeeper quorum via the 
zk_quorum property will enable Stellar to access properties that reside in the 
global config. Expanding on our example above, if the global config looks as 
follows:</p>
+
+<div class="source">
+<div class="source">
+<pre>{
+    &quot;global_property&quot; : &quot;metron-ftw&quot;
+}
+</pre></div></div>
+<p>And we expand our value_tranform:</p>
+
+<div class="source">
+<div class="source">
+<pre>...
+    &quot;value_transform&quot; : {
+       &quot;domain&quot; : &quot;DOMAIN_REMOVE_TLD(domain)&quot;,
+       &quot;a-new-prop&quot; : &quot;global_property&quot;
+    },
+...
+</pre></div></div>
+<p>The resulting value data would look like the following:</p>
+
+<table border="0" class="table table-striped">
+  <thead>
+    
+<tr class="a">
+      
+<th>Indicator </th>
+      
+<th>Type </th>
+      
+<th>Value </th>
+    </tr>
+  </thead>
+  <tbody>
+    
+<tr class="b">
+      
+<td>google </td>
+      
+<td>top_domains </td>
+      
+<td>{ &#x201c;rank&#x201d; : &#x201c;1&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;google&#x201d;, &#x201c;a-new-prop&#x201d; : &#x201c;metron-ftw&#x201d; 
} </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>yahoo </td>
+      
+<td>top_domains </td>
+      
+<td>{ &#x201c;rank&#x201d; : &#x201c;2&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;yahoo&#x201d;, &#x201c;a-new-prop&#x201d; : &#x201c;metron-ftw&#x201d; 
} </td>
+    </tr>
+  </tbody>
+</table></div></div>
+<div class="section">
+<h2><a name="Enrichment_Config"></a>Enrichment Config</h2>
+<p>In order to automatically add new enrichment and threat intel types to 
existing, running enrichment topologies, you will need to add new fields and 
new types to the zookeeper configuration. A convenience parameter has been made 
to assist in this when doing an import. Namely, you can specify the enrichment 
configs and how they associate with the fields of the documents flowing through 
the enrichment topology.</p>
+<p>Consider the following Enrichment Configuration JSON. This one is for a 
threat intelligence type:</p>
+
+<div class="source">
+<div class="source">
+<pre>{
+  &quot;zkQuorum&quot; : &quot;localhost:2181&quot;
+ ,&quot;sensorToFieldList&quot; : {
+    &quot;bro&quot; : {
+           &quot;type&quot; : &quot;THREAT_INTEL&quot;
+          ,&quot;fieldToEnrichmentTypes&quot; : {
+             &quot;ip_src_addr&quot; : [ &quot;malicious_ip&quot; ]
+            ,&quot;ip_dst_addr&quot; : [ &quot;malicious_ip&quot; ]
+                                      }
+           }
+                        }
+}
+</pre></div></div>
+<p>We have to specify the following:</p>
+
+<ul>
+  
+<li>The zookeeper quorum which holds the cluster configuration</li>
+  
+<li>The mapping between the fields in the enriched documents and the 
enrichment types.</li>
+</ul>
+<p>This configuration allows the ingestion tools to update zookeeper 
post-ingestion so that the enrichment topology can take advantage immediately 
of the new type.</p></div>
+<div class="section">
+<h2><a name="Loading_Utilities"></a>Loading Utilities</h2>
+<p>The two configurations above are used in the three separate ingestion 
tools:</p>
+
+<ul>
+  
+<li>Taxii Loader</li>
+  
+<li>Bulk load from HDFS via MapReduce</li>
+  
+<li>Flat File ingestion</li>
+</ul>
+<div class="section">
+<h3><a name="Taxii_Loader"></a>Taxii Loader</h3>
+<p>The shell script <tt>$METRON_HOME/bin/threatintel_taxii_load.sh</tt> can be 
used to poll a Taxii server for STIX documents and ingest them into HBase.<br 
/>It is quite common for this Taxii server to be an aggregation server such as 
Soltra Edge.</p>
+<p>In addition to the Enrichment and Extractor configs described above, this 
loader requires a configuration file describing the connection information to 
the Taxii server. An illustrative example of such a configuration file is:</p>
+
+<div class="source">
+<div class="source">
+<pre>{
+   &quot;endpoint&quot; : 
&quot;http://localhost:8282/taxii-discovery-service&quot;
+  ,&quot;type&quot; : &quot;DISCOVER&quot;
+  ,&quot;collection&quot; : &quot;guest.Abuse_ch&quot;
+  ,&quot;table&quot; : &quot;threat_intel&quot;
+  ,&quot;columnFamily&quot; : &quot;cf&quot;
+  ,&quot;allowedIndicatorTypes&quot; : [ &quot;domainname:FQDN&quot;, 
&quot;address:IPV_4_ADDR&quot; ]
+}
+</pre></div></div>
+<p>As you can see, we are specifying the following information:</p>
+
+<ul>
+  
+<li>endpoint : The URL of the endpoint</li>
+  
+<li>type : <tt>POLL</tt> or <tt>DISCOVER</tt> depending on the endpoint.</li>
+  
+<li>collection : The Taxii collection to ingest</li>
+  
+<li>table : The HBase table to import into</li>
+  
+<li>columnFamily : The column family to import into</li>
+  
+<li>allowedIndicatorTypes : an array of acceptable threat intel types (see the 
&#x201c;Enrichment Type Name&#x201d; column of the Stix table above for the 
possibilities).</li>
+</ul>
+<p>The parameters for the utility are as follows:</p>
+
+<table border="0" class="table table-striped">
+  <thead>
+    
+<tr class="a">
+      
+<th>Short Code </th>
+      
+<th>Long Code </th>
+      
+<th>Is Required? </th>
+      
+<th>Description </th>
+    </tr>
+  </thead>
+  <tbody>
+    
+<tr class="b">
+      
+<td>-h </td>
+      
+<td> </td>
+      
+<td>No </td>
+      
+<td>Generate the help screen/set of options </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>-e </td>
+      
+<td>&#x2013;extractor_config </td>
+      
+<td>Yes </td>
+      
+<td>JSON Document describing the extractor for this input data source </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>-c </td>
+      
+<td>&#x2013;taxii_connection_config </td>
+      
+<td>Yes </td>
+      
+<td>The JSON config file to configure the connection </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>-p </td>
+      
+<td>&#x2013;time_between_polls </td>
+      
+<td>No </td>
+      
+<td>The time between polling the Taxii server in milliseconds. (default: 1 
hour) </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>-b </td>
+      
+<td>&#x2013;begin_time </td>
+      
+<td>No </td>
+      
+<td>Start time to poll the Taxii server (all data from that point will be 
gathered in the first pull). The format for the date is yyyy-MM-dd HH:mm:ss 
</td>
+    </tr>
+    
+<tr class="a">
+      
+<td>-l </td>
+      
+<td>&#x2013;log4j </td>
+      
+<td>No </td>
+      
+<td>The Log4j Properties to load </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>-n </td>
+      
+<td>&#x2013;enrichment_config </td>
+      
+<td>No </td>
+      
+<td>The JSON document describing the enrichments to configure. Unlike other 
loaders, this is run first if specified. </td>
+    </tr>
+  </tbody>
+</table></div>
+<div class="section">
+<h3><a name="Flatfile_Loader"></a>Flatfile Loader</h3>
+<p>The shell script <tt>$METRON_HOME/bin/flatfile_loader.sh</tt> will read 
data from local disk, HDFS or URLs and load the enrichment or threat intel data 
into an HBase table.<br />Note: This utility works for enrichment as well as 
threat intel due to the underlying infrastructure being the same.</p>
+<p>One special thing to note here is that there is a special configuration 
parameter to the Extractor config that is only considered during this 
loader:</p>
+
+<ul>
+  
+<li>inputFormat : This specifies how to consider the data. The two 
implementations are <tt>BY_LINE</tt> and <tt>WHOLE_FILE</tt>.</li>
+</ul>
+<p>The default is <tt>BY_LINE</tt>, which makes sense for a list of CSVs where 
each line indicates a unit of information which can be imported. However, if 
you are importing a set of STIX documents, then you want each document to be 
considered as input to the Extractor.</p>
+<p>The parameters for the utility are as follows:</p>
+
+<table border="0" class="table table-striped">
+  <thead>
+    
+<tr class="a">
+      
+<th>Short Code </th>
+      
+<th>Long Code </th>
+      
+<th>Is Required? </th>
+      
+<th>Description </th>
+    </tr>
+  </thead>
+  <tbody>
+    
+<tr class="b">
+      
+<td>-h </td>
+      
+<td> </td>
+      
+<td>No </td>
+      
+<td>Generate the help screen/set of options </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>-q </td>
+      
+<td>&#x2013;quiet </td>
+      
+<td>No </td>
+      
+<td>Do not update progress </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>-e </td>
+      
+<td>&#x2013;extractor_config </td>
+      
+<td>Yes </td>
+      
+<td>JSON Document describing the extractor for this input data source </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>-m </td>
+      
+<td>&#x2013;import_mode </td>
+      
+<td>No </td>
+      
+<td>The Import mode to use: LOCAL, MR. Default: LOCAL </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>-t </td>
+      
+<td>&#x2013;hbase_table </td>
+      
+<td>Yes </td>
+      
+<td>The HBase table to import into </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>-c </td>
+      
+<td>&#x2013;hbase_cf </td>
+      
+<td>Yes </td>
+      
+<td>The HBase table column family to import into </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>-i </td>
+      
+<td>&#x2013;input </td>
+      
+<td>Yes </td>
+      
+<td>The input data location on local disk. If this is a file, then that file 
will be loaded. If this is a directory, then the files will be loaded 
recursively under that directory. </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>-l </td>
+      
+<td>&#x2013;log4j </td>
+      
+<td>No </td>
+      
+<td>The log4j properties file to load </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>-n </td>
+      
+<td>&#x2013;enrichment_config </td>
+      
+<td>No </td>
+      
+<td>The JSON document describing the enrichments to configure. Unlike other 
loaders, this is run first if specified. </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>-p </td>
+      
+<td>&#x2013;threads </td>
+      
+<td>No </td>
+      
+<td>The number of threads to use when extracting data. The default is the 
number of cores. </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>-b </td>
+      
+<td>&#x2013;batchSize </td>
+      
+<td>No </td>
+      
+<td>The batch size to use for HBase puts </td>
+    </tr>
+  </tbody>
+</table></div>
+<div class="section">
+<h3><a name="GeoLite2_Loader"></a>GeoLite2 Loader</h3>
+<p>The shell script <tt>$METRON_HOME/bin/geo_enrichment_load.sh</tt> will 
retrieve MaxMind GeoLite2 data and load data into HDFS, and update the 
configuration.</p>
+<p>THIS SCRIPT WILL NOT UPDATE AMBARI&#x2019;S GLOBAL.JSON, JUST THE ZK 
CONFIGS. CHANGES WILL GO INTO EFFECT, BUT WILL NOT PERSIST PAST AN AMBARI 
RESTART UNTIL UPDATED THERE.</p>
+<p>The parameters for the utility are as follows:</p>
+
+<table border="0" class="table table-striped">
+  <thead>
+    
+<tr class="a">
+      
+<th>Short Code </th>
+      
+<th>Long Code </th>
+      
+<th>Is Required? </th>
+      
+<th>Description </th>
+    </tr>
+  </thead>
+  <tbody>
+    
+<tr class="b">
+      
+<td>-h </td>
+      
+<td> </td>
+      
+<td>No </td>
+      
+<td>Generate the help screen/set of options </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>-g </td>
+      
+<td>&#x2013;geo_url </td>
+      
+<td>No </td>
+      
+<td>GeoIP URL - defaults to <a class="externalLink" 
href="http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz";>http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz</a>
 </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>-r </td>
+      
+<td>&#x2013;remote_dir </td>
+      
+<td>No </td>
+      
+<td>HDFS directory to land formatted GeoIP file - defaults to 
/apps/metron/geo/&lt;epoch millis&gt;/ </td>
+    </tr>
+    
+<tr class="a">
+      
+<td>-t </td>
+      
+<td>&#x2013;tmp_dir </td>
+      
+<td>No </td>
+      
+<td>Directory for landing the temporary GeoIP data - defaults to /tmp </td>
+    </tr>
+    
+<tr class="b">
+      
+<td>-z </td>
+      
+<td>&#x2013;zk_quorum </td>
+      
+<td>Yes </td>
+      
+<td>Zookeeper Quorum URL (zk1:port,zk2:port,&#x2026;) </td>
+    </tr>
+  </tbody>
+</table></div></div>
+                  </div>
+            </div>
+          </div>
+
+    <hr/>
+
+    <footer>
+            <div class="container-fluid">
+              <div class="row span12">Copyright &copy;                    2017
+                        <a href="https://www.apache.org";>The Apache Software 
Foundation</a>.
+            All Rights Reserved.      
+                    
+      </div>
+
+                          
+        
+                </div>
+    </footer>
+  </body>
+</html>


Reply via email to