Added: release/metron/0.4.2/site-book/metron-platform/metron-common/index.html ============================================================================== --- release/metron/0.4.2/site-book/metron-platform/metron-common/index.html (added) +++ release/metron/0.4.2/site-book/metron-platform/metron-common/index.html Wed Jan 3 18:25:57 2018 @@ -0,0 +1,877 @@ +<!DOCTYPE html> +<!-- + | Generated by Apache Maven Doxia at 2017-12-08 + | Rendered using Apache Maven Fluido Skin 1.3.0 +--> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <meta charset="UTF-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="Date-Revision-yyyymmdd" content="20171208" /> + <meta http-equiv="Content-Language" content="en" /> + <title>Metron – Contents</title> + <link rel="stylesheet" href="../../css/apache-maven-fluido-1.3.0.min.css" /> + <link rel="stylesheet" href="../../css/site.css" /> + <link rel="stylesheet" href="../../css/print.css" media="print" /> + + + <script type="text/javascript" src="../../js/apache-maven-fluido-1.3.0.min.js"></script> + + + +<script type="text/javascript">$( document ).ready( function() { $( '.carousel' ).carousel( { interval: 3500 } ) } );</script> + + </head> + <body class="topBarDisabled"> + + + + + <div class="container-fluid"> + <div id="banner"> + <div class="pull-left"> + <a href="http://metron.apache.org/" id="bannerLeft"> + <img src="../../images/metron-logo.png" alt="Apache Metron" width="148px" height="48px"/> + </a> + </div> + <div class="pull-right"> </div> + <div class="clear"><hr/></div> + </div> + + <div id="breadcrumbs"> + <ul class="breadcrumb"> + + + <li class=""> + <a href="http://www.apache.org" class="externalLink" title="Apache"> + Apache</a> + </li> + <li class="divider ">/</li> + <li class=""> + <a href="http://metron.apache.org/" class="externalLink" title="Metron"> + Metron</a> + </li> + <li class="divider ">/</li> + <li class=""> + <a href="../../index.html" title="Documentation"> + Documentation</a> + </li> + <li class="divider ">/</li> + <li class="">Contents</li> + + + + <li id="publishDate" class="pull-right">Last Published: 2017-12-08</li> <li class="divider pull-right">|</li> + <li id="projectVersion" class="pull-right">Version: 0.4.2</li> + + </ul> + </div> + + + <div class="row-fluid"> + <div id="leftColumn" class="span3"> + <div class="well sidebar-nav"> + + + <ul class="nav nav-list"> + <li class="nav-header">User Documentation</li> + + <li> + + <a href="../../index.html" title="Metron"> + <i class="icon-chevron-down"></i> + Metron</a> + <ul class="nav nav-list"> + + <li> + + <a href="../../Upgrading.html" title="Upgrading"> + <i class="none"></i> + Upgrading</a> + </li> + + <li> + + <a href="../../metron-analytics/index.html" title="Analytics"> + <i class="icon-chevron-right"></i> + Analytics</a> + </li> + + <li> + + <a href="../../metron-contrib/metron-docker/index.html" title="Docker"> + <i class="none"></i> + Docker</a> + </li> + + <li> + + <a href="../../metron-deployment/index.html" title="Deployment"> + <i class="icon-chevron-right"></i> + Deployment</a> + </li> + + <li> + + <a href="../../metron-interface/metron-alerts/index.html" title="Alerts"> + <i class="none"></i> + Alerts</a> + </li> + + <li> + + <a href="../../metron-interface/metron-config/index.html" title="Config"> + <i class="none"></i> + Config</a> + </li> + + <li> + + <a href="../../metron-interface/metron-rest/index.html" title="Rest"> + <i class="none"></i> + Rest</a> + </li> + + <li> + + <a href="../../metron-platform/index.html" title="Platform"> + <i class="icon-chevron-down"></i> + Platform</a> + <ul class="nav nav-list"> + + <li> + + <a href="../../metron-platform/Performance-tuning-guide.html" title="Performance-tuning-guide"> + <i class="none"></i> + Performance-tuning-guide</a> + </li> + + <li> + + <a href="../../metron-platform/metron-api/index.html" title="Api"> + <i class="none"></i> + Api</a> + </li> + + <li class="active"> + + <a href="#"><i class="none"></i>Common</a> + </li> + + <li> + + <a href="../../metron-platform/metron-data-management/index.html" title="Data-management"> + <i class="none"></i> + Data-management</a> + </li> + + <li> + + <a href="../../metron-platform/metron-elasticsearch/index.html" title="Elasticsearch"> + <i class="none"></i> + Elasticsearch</a> + </li> + + <li> + + <a href="../../metron-platform/metron-enrichment/index.html" title="Enrichment"> + <i class="none"></i> + Enrichment</a> + </li> + + <li> + + <a href="../../metron-platform/metron-indexing/index.html" title="Indexing"> + <i class="none"></i> + Indexing</a> + </li> + + <li> + + <a href="../../metron-platform/metron-management/index.html" title="Management"> + <i class="none"></i> + Management</a> + </li> + + <li> + + <a href="../../metron-platform/metron-parsers/index.html" title="Parsers"> + <i class="icon-chevron-right"></i> + Parsers</a> + </li> + + <li> + + <a href="../../metron-platform/metron-pcap-backend/index.html" title="Pcap-backend"> + <i class="none"></i> + Pcap-backend</a> + </li> + + <li> + + <a href="../../metron-platform/metron-writer/index.html" title="Writer"> + <i class="none"></i> + Writer</a> + </li> + </ul> + </li> + + <li> + + <a href="../../metron-sensors/index.html" title="Sensors"> + <i class="icon-chevron-right"></i> + Sensors</a> + </li> + + <li> + + <a href="../../metron-stellar/stellar-3rd-party-example/index.html" title="Stellar-3rd-party-example"> + <i class="none"></i> + Stellar-3rd-party-example</a> + </li> + + <li> + + <a href="../../metron-stellar/stellar-common/index.html" title="Stellar-common"> + <i class="icon-chevron-right"></i> + Stellar-common</a> + </li> + + <li> + + <a href="../../use-cases/index.html" title="Use-cases"> + <i class="icon-chevron-right"></i> + Use-cases</a> + </li> + </ul> + </li> + </ul> + + + + <hr class="divider" /> + + <div id="poweredBy"> + <div class="clear"></div> + <div class="clear"></div> + <div class="clear"></div> + <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy"> + <img class="builtBy" alt="Built by Maven" src="../../images/logos/maven-feather.png" /> + </a> + </div> + </div> + </div> + + + <div id="bodyColumn" class="span9" > + + <h1>Contents</h1> +<p><a name="Contents"></a></p> + +<ul> + +<li><a href="#Stellar_Language">Stellar Language</a></li> + +<li><a href="#Global_Configuration">Global Configuration</a></li> + +<li><a href="#Validation_Framework">Validation Framework</a></li> + +<li><a href="#Management_Utility">Management Utility</a></li> + +<li><a href="topology-errors/index.html">Topology Errors</a></li> + +<li><a href="#Performance_Logging">Performance Logging</a></li> +</ul> +<p><a name="Stellar_Language"></a></p> +<h1>Stellar Language</h1> +<p>For a variety of components (threat intelligence triage and field transformations) we have the need to do simple computation and transformation using the data from messages as variables.<br />For those purposes, there exists a simple, scaled down DSL created to do simple computation and transformation.</p> +<p>The query language supports the following:</p> + +<ul> + +<li>Referencing fields in the enriched JSON</li> + +<li>String literals are quoted with either <tt>'</tt> or <tt>"</tt>, and support escaping for <tt>'</tt>, <tt>"</tt>, <tt>\t</tt>, <tt>\r</tt>, <tt>\n</tt>, and backslash</li> + +<li>Simple boolean operations: <tt>and</tt>, <tt>not</tt>, <tt>or</tt> + +<ul> + +<li>Boolean expressions are short-circuited (e.g. <tt>true or FUNC()</tt> would never execute <tt>FUNC</tt>)</li> + </ul></li> + +<li>Simple arithmetic operations: <tt>*</tt>, <tt>/</tt>, <tt>+</tt>, <tt>-</tt> on real numbers or integers</li> + +<li>Simple comparison operations <tt><</tt>, <tt>></tt>, <tt><=</tt>, <tt>>=</tt></li> + +<li>Simple equality comparison operations <tt>==</tt>, <tt>!=</tt></li> + +<li>if/then/else comparisons (i.e. <tt>if var1 < 10 then 'less than 10' else '10 or more'</tt>)</li> + +<li>Determining whether a field exists (via <tt>exists</tt>)</li> + +<li>An <tt>in</tt> operator that works like the <tt>in</tt> in Python</li> + +<li>The ability to have parenthesis to make order of operations explicit</li> + +<li>User defined functions, including Lambda expressions</li> +</ul> +<p>For documentation of Stellar, please see the <a href="../../metron-stellar/stellar-common/index.html">Stellar README</a>.</p> +<p><a name="Global_Configuration"></a></p> +<h1>Global Configuration</h1> +<p>The format of the global enrichment is a JSON String to Object map. This is intended for configuration which is non sensor specific configuration.</p> +<p>This configuration is stored in zookeeper, but looks something like</p> + +<div class="source"> +<div class="source"> +<pre>{ + "es.clustername": "metron", + "es.ip": "node1", + "es.port": "9300", + "es.date.format": "yyyy.MM.dd.HH", + "parser.error.topic": "indexing", + "fieldValidations" : [ + { + "input" : [ "ip_src_addr", "ip_dst_addr" ], + "validation" : "IP", + "config" : { + "type" : "IPV4" + } + } + ] +} +</pre></div></div> +<p>Various parts of our stack uses the global config are documented throughout the Metron documentation, but a convenient index is provided here:</p> + +<table border="0" class="table table-striped"> + <thead> + +<tr class="a"> + +<th>Property Name </th> + +<th>Subsystem </th> + +<th>Type </th> + +<th>Ambari Property </th> + </tr> + </thead> + <tbody> + +<tr class="b"> + +<td><a href="../metron-elasticsearch/index.html#es.clustername"><tt>es.clustername</tt></a> </td> + +<td>Indexing </td> + +<td>String </td> + +<td><tt>es_cluster_name</tt> </td> + </tr> + +<tr class="a"> + +<td><a href="../metron-elasticsearch/index.html#es.ip"><tt>es.ip</tt></a> </td> + +<td>Indexing </td> + +<td>String </td> + +<td><tt>es_hosts</tt> </td> + </tr> + +<tr class="b"> + +<td><a href="../metron-elasticsearch/index.html#es.port"><tt>es.port</tt></a> </td> + +<td>Indexing </td> + +<td>String </td> + +<td><tt>es_port</tt> </td> + </tr> + +<tr class="a"> + +<td><a href="../metron-elasticsearch/index.html#es.date.format"><tt>es.date.format</tt></a> </td> + +<td>Indexing </td> + +<td>String </td> + +<td><tt>es_date_format</tt> </td> + </tr> + +<tr class="b"> + +<td><a href="#validation-framework"><tt>fieldValidations</tt></a> </td> + +<td>Parsing </td> + +<td>Object </td> + +<td>N/A </td> + </tr> + +<tr class="a"> + +<td><a href="../metron-parsers/index.html#parser.error.topic"><tt>parser.error.topic</tt></a> </td> + +<td>Parsing </td> + +<td>String </td> + +<td>N/A </td> + </tr> + +<tr class="b"> + +<td><a href="../../metron-stellar/stellar-common/index.html#stellar.function.paths"><tt>stellar.function.paths</tt></a> </td> + +<td>Stellar </td> + +<td>CSV String </td> + +<td>N/A </td> + </tr> + +<tr class="a"> + +<td><a href="../../metron-stellar/stellar-common/index.html#stellarfunctionresolverincludesexcludes"><tt>stellar.function.resolver.includes</tt></a> </td> + +<td>Stellar </td> + +<td>CSV String </td> + +<td>N/A </td> + </tr> + +<tr class="b"> + +<td><a href="../../metron-stellar/stellar-common/index.html#stellarfunctionresolverincludesexcludes"><tt>stellar.function.resolver.excludes</tt></a> </td> + +<td>Stellar </td> + +<td>CSV String </td> + +<td>N/A </td> + </tr> + +<tr class="a"> + +<td><a href="../../metron-analytics/metron-profiler/index.html#profiler.period.duration"><tt>profiler.period.duration</tt></a> </td> + +<td>Profiler </td> + +<td>Integer </td> + +<td><tt>profiler_period_duration</tt> </td> + </tr> + +<tr class="b"> + +<td><a href="../../metron-analytics/metron-profiler/index.html#profiler.period.duration.units"><tt>profiler.period.duration.units</tt></a> </td> + +<td>Profiler </td> + +<td>String </td> + +<td><tt>profiler_period_units</tt> </td> + </tr> + +<tr class="a"> + +<td><a href="../metron-indexing/index.html#update.hbase.table"><tt>update.hbase.table</tt></a> </td> + +<td>REST/Indexing </td> + +<td>String </td> + +<td><tt>update_hbase_table</tt> </td> + </tr> + +<tr class="b"> + +<td><a href="../metron-indexing/index.html#update.hbase.cf"><tt>update.hbase.cf</tt></a> </td> + +<td>REST/Indexing </td> + +<td>String </td> + +<td><tt>update_hbase_cf</tt> </td> + </tr> + +<tr class="a"> + +<td><a href="../metron-enrichment/index.html#geo.hdfs.file"><tt>geo.hdfs.file</tt></a> </td> + +<td>Enrichment </td> + +<td>String </td> + +<td><tt>geo_hdfs_file</tt> </td> + </tr> + </tbody> +</table> +<div class="section"> +<h2><a name="Note_Configs_in_Ambari"></a>Note Configs in Ambari</h2> +<p>If a field is managed via ambari, you should change the field via ambari. Otherwise, upon service restarts, you may find your update overwritten.</p> +<p><a name="Validation_Framework"></a></p> +<h1>Validation Framework</h1> +<p>Inside of the global configuration, there is a validation framework in place that enables the validation that messages coming from all parsers are valid. This is done in the form of validation plugins where assertions about fields or whole messages can be made. </p> +<p>The format for this is a <tt>fieldValidations</tt> field inside of global config. This is associated with an array of field validation objects structured like so:</p> + +<ul> + +<li><tt>input</tt> : An array of input fields or a single field. If this is omitted, then the whole messages is passed to the validator.</li> + +<li><tt>config</tt> : A String to Object map for validation configuration. This is optional if the validation function requires no configuration.</li> + +<li><tt>validation</tt> : The validation function to be used. This is one of + +<ul> + +<li><tt>STELLAR</tt> : Execute a Stellar Language statement. Expects the query string in the <tt>condition</tt> field of the config.</li> + +<li><tt>IP</tt> : Validates that the input fields are an IP address. By default, if no configuration is set, it assumes <tt>IPV4</tt>, but you can specify the type by passing in the config by passing in <tt>type</tt> with either <tt>IPV6</tt> or <tt>IPV4</tt> or by passing in a list [<tt>IPV4</tt>,<tt>IPV6</tt>] in which case the input(s) will be validated against both.</li> + +<li><tt>DOMAIN</tt> : Validates that the fields are all domains.</li> + +<li><tt>EMAIL</tt> : Validates that the fields are all email addresses</li> + +<li><tt>URL</tt> : Validates that the fields are all URLs</li> + +<li><tt>DATE</tt> : Validates that the fields are a date. Expects <tt>format</tt> in the config.</li> + +<li><tt>INTEGER</tt> : Validates that the fields are an integer. String representation of an integer is allowed.</li> + +<li><tt>REGEX_MATCH</tt> : Validates that the fields match a regex. Expects <tt>pattern</tt> in the config.</li> + +<li><tt>NOT_EMPTY</tt> : Validates that the fields exist and are not empty (after trimming.)</li> + </ul></li> +</ul> +<p><a name="Management_Utility"></a></p> +<h1>Management Utility</h1> +<p>Configurations should be stored on disk in the following structure starting at <tt>$BASE_DIR</tt>:</p> + +<ul> + +<li>global.json : The global config</li> + +<li><tt>sensors</tt> : The subdirectory containing sensor enrichment configuration JSON (e.g. <tt>snort.json</tt>, <tt>bro.json</tt>)</li> +</ul> +<p>By default, this directory as deployed by the ansible infrastructure is at <tt>$METRON_HOME/config/zookeeper</tt></p> +<p>While the configs are stored on disk, they must be loaded into Zookeeper to be used. To this end, there is a utility program to assist in this called <tt>$METRON_HOME/bin/zk_load_config.sh</tt></p> +<p>This has the following options:</p> + +<div class="source"> +<div class="source"> +<pre> -c,--config_type <CONFIG_TYPE> The configuration type: GLOBAL, + PARSER, ENRICHMENT, INDEXING, + PROFILER + -f,--force Force operation + -h,--help Generate Help screen + -i,--input_dir <DIR> The input directory containing + the configuration files named + like "$source.json" + -m,--mode <MODE> The mode of operation: DUMP, + PULL, PUSH, PATCH + -n,--config_name <CONFIG_NAME> The configuration name: bro, + yaf, snort, squid, etc. + -o,--output_dir <DIR> The output directory which will + store the JSON configuration + from Zookeeper + -pk,--patch_key <PATCH_KEY> The key to modify + -pm,--patch_mode <PATCH_MODE> One of: ADD, REMOVE - relevant + only for key/value patches, + i.e. when a patch file is not + used. + -pf,--patch_file <PATCH_FILE> Path to the patch file. + -pv,--patch_value <PATCH_VALUE> Value to use in the patch. + -z,--zk_quorum <host:port,[host:port]*> Zookeeper Quorum URL + (zk1:port,zk2:port,...) +</pre></div></div> +<p>Usage examples:</p> + +<ul> + +<li>To dump the existing configs from zookeeper on the singlenode vagrant machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m DUMP</tt></li> + +<li>To dump the existing GLOBAL configs from zookeeper on the singlenode vagrant machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m DUMP -c GLOBAL</tt></li> + +<li>To push the configs into zookeeper on the singlenode vagrant machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m PUSH -i $METRON_HOME/config/zookeeper</tt></li> + +<li>To push only the GLOBAL configs into zookeeper on the singlenode vagrant machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m PUSH -i $METRON_HOME/config/zookeeper -c GLOBAL</tt></li> + +<li>To push only the PARSER configs into zookeeper on the singlenode vagrant machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m PUSH -i $METRON_HOME/config/zookeeper -c PARSER</tt></li> + +<li>To push only the PARSER ‘bro’ configs into zookeeper on the singlenode vagrant machine: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m PUSH -i $METRON_HOME/config/zookeeper -c PARSER -n bro</tt></li> + +<li>To pull all configs from zookeeper to the singlenode vagrant machine disk: <tt>$METRON_HOME/bin/zk_load_configs.sh -z node1:2181 -m PULL -o $METRON_HOME/config/zookeeper -f</tt></li> +</ul></div> +<div class="section"> +<h2><a name="Patching_mechanism"></a>Patching mechanism</h2> +<p>The configuration management utility leverages a JSON patching library that conforms to <a class="externalLink" href="https://tools.ietf.org/html/rfc6902">RFC-6902 spec</a>. We’re using the zjsonpatch library implementation from here - <a class="externalLink" href="https://github.com/flipkart-incubator/zjsonpatch">https://github.com/flipkart-incubator/zjsonpatch</a>. There are a couple options for leveraging patching. You can choose to patch the Zookeeper config via patch file:</p> +<p><tt>$METRON_HOME/bin/zk_load_configs.sh -z $ZOOKEEPER -m PATCH -c GLOBAL -pf /tmp/mypatch.txt</tt></p> +<p>or key/value pair:</p> +<p><tt>$METRON_HOME/bin/zk_load_configs.sh -z $ZOOKEEPER -m PATCH -c GLOBAL -pm ADD -pk foo -pv \"\"bar\"\"</tt></p> +<p>The options exposed via patch file are the full range of options from RFC-6902:</p> + +<ul> + +<li>ADD</li> + +<li>REMOVE</li> + +<li>REPLACE</li> + +<li>MOVE</li> + +<li>COPY</li> + +<li>TEST</li> +</ul> +<p>whereas with key/value patching, we only current expose ADD and REMOVE. Note that ADD will function as a REPLACE when the key already exists.</p> +<div class="section"> +<h3><a name="Patch_File"></a>Patch File</h3> +<p>Let’s say we want to add a complex JSON object to our configuration with a patch file. e.g.</p> + +<div class="source"> +<div class="source"> +<pre>"foo" : { + "bar" : { + "baz" : [ "bazval1", "bazval2" ] + } + } +</pre></div></div> +<p>We would write a patch file “/tmp/mypatch.txt” with contents:</p> + +<div class="source"> +<div class="source"> +<pre>[ + { + "op": "add", + "path": "/foo", + "value": { "bar" : { "baz" : [ "bazval1", "bazval2" ] } } + } +] +</pre></div></div> +<p>And submit via zk_load_configs as follows:</p> + +<div class="source"> +<div class="source"> +<pre> $METRON_HOME/bin/zk_load_configs.sh -z $ZOOKEEPER -m PATCH -c GLOBAL -pf /tmp/mypatch.txt +</pre></div></div></div> +<div class="section"> +<h3><a name="Patch_KeyValue"></a>Patch Key/Value</h3> +<p>Now let’s try the same without using a patch file, instead using the patch_key and patch_value options right from the command line utility. This would like like the following.</p> + +<div class="source"> +<div class="source"> +<pre>$METRON_HOME/bin/zk_load_configs.sh -z $ZOOKEEPER -m PATCH -c GLOBAL -pm ADD -pk "/foo" -pv "{ \"bar\" : { \"baz\" : [ \"bazval1\", \"bazval2\" ] } }" +</pre></div></div></div> +<div class="section"> +<h3><a name="Applying_Multiple_Patches"></a>Applying Multiple Patches</h3> +<p>Applying multiple patches is also pretty straightforward. You can achieve this in a single command using patch files, or simply execute multiple commands in sequence using the patch_key/value approach.</p> +<p>Let’s say we wanted to add the following to our global config:</p> + +<div class="source"> +<div class="source"> +<pre>"apache" : "metron", +"is" : "the best", +"streaming" : "analytics platform" +</pre></div></div> +<p>and remove the /foo key from the previous example.</p> +<p>Create a patch file /tmp/mypatch.txt with four separate patch operations.</p> + +<div class="source"> +<div class="source"> +<pre>[ + { + "op": "remove", + "path": "/foo" + }, + { + "op": "add", + "path": "/apache", + "value": "metron" + }, + { + "op": "add", + "path": "/is", + "value": "the best" + }, + { + "op": "add", + "path": "/streaming", + "value": "analytics platform" + } +] +</pre></div></div> +<p>Now submit again and you should see a Global config with the “foo” key removed and three new keys added.</p> + +<div class="source"> +<div class="source"> +<pre> $METRON_HOME/bin/zk_load_configs.sh -z $ZOOKEEPER -m PATCH -c GLOBAL -pf /tmp/mypatch.txt +</pre></div></div></div> +<div class="section"> +<h3><a name="Notes_On_Patching"></a>Notes On Patching</h3> +<p>For any given patch key, the last/leaf node in the key’s parent <i>must</i> exist, otherwise an exception will be thrown. For example, if you want to add the following:</p> + +<div class="source"> +<div class="source"> +<pre>"foo": { + "bar": "baz" +} +</pre></div></div> +<p>It is not sufficient to use /foo/bar as a key if foo does not already exist. You would either need to incrementally build the JSON and make this a two step process</p> + +<div class="source"> +<div class="source"> +<pre>[ + { + "op": "add", + "path": "/foo", + "value": { } + }, + { + "op": "add", + "path": "/foo/bar", + "value": "baz" + } +] +</pre></div></div> +<p>Or provide the value as a complete JSON object.</p> + +<div class="source"> +<div class="source"> +<pre>[ + { + "op": "add", + "path": "/foo", + "value": { "bar" : "baz" } + } +] +</pre></div></div> +<p>The REMOVE operation is idempotent. Running the remove command on the same key multiple times will not fail once the key has been removed.</p> +<p><a name="Topology_Errors"></a></p> +<h1>Topology Errors</h1> +<p>Errors generated in Metron topologies are transformed into JSON format and follow this structure:</p> + +<div class="source"> +<div class="source"> +<pre>{ + "exception": "java.lang.IllegalStateException: Unable to parse Message: ...", + "failed_sensor_type": "bro", + "stack": "java.lang.IllegalStateException: Unable to parse Message: ...", + "hostname": "node1", + "source:type": "error", + "raw_message": "{\"http\": {\"ts\":1488809627.000000.31915,\"uid\":\"C9JpSd2vFAWo3mXKz1\", ...", + "error_hash": "f7baf053f2d3c801a01d196f40f3468e87eea81788b2567423030100865c5061", + "error_type": "parser_error", + "message": "Unable to parse Message: {\"http\": {\"ts\":1488809627.000000.31915,\"uid\":\"C9JpSd2vFAWo3mXKz1\", ...", + "timestamp": 1488809630698 +} +</pre></div></div> +<p>Each topology can be configured to send error messages to a specific Kafka topic. The parser topologies retrieve this setting from the the <tt>parser.error.topic</tt> setting in the global config:</p> + +<div class="source"> +<div class="source"> +<pre>{ + "es.clustername": "metron", + "es.ip": "node1", + "es.port": "9300", + "es.date.format": "yyyy.MM.dd.HH", + "parser.error.topic": "indexing" +} +</pre></div></div> +<p>Error topics for enrichment and threat intel errors are passed into the enrichment topology as flux properties named <tt>enrichment.error.topic</tt> and <tt>threat.intel.error.topic</tt>. These properties can be found in <tt>$METRON_HOME/config/enrichment.properties</tt>.</p> +<p>The error topic for indexing errors is passed into the indexing topology as a flux property named <tt>index.error.topic</tt>. This property can be found in either <tt>$METRON_HOME/config/elasticsearch.properties</tt> or <tt>$METRON_HOME/config/solr.properties</tt> depending on the search engine selected.</p> +<p>By default all error messages are sent to the <tt>indexing</tt> topic so that they are indexed and archived, just like other messages. The indexing config for error messages can be found at <tt>$METRON_HOME/config/zookeeper/indexing/error.json</tt>.</p> +<p><a name="Performance_Logging"></a></p> +<h1>Performance Logging</h1> +<p>The PerformanceLogger class provides functionality that enables developers to debug performance issues. Basic usage looks like the following:</p> + +<div class="source"> +<div class="source"> +<pre>// create a simple inner performance class to use for logger instantiation +public static class Perf {} +// instantiation +PerformnanceLogger perfLog = new PerformanceLogger(() -> getConfigurations().getGlobalConfig(), Perf.class.getName()); +// marking a start time +perfLog.mark("mark1"); +// ...do some high performance stuff... +// log the elapsed time +perfLog.log("mark1", "My high performance stuff is very performant"); +// log no additional message, just the basics +perfLog.log("mark1"); +</pre></div></div> +<p>The logger maintains a Map<String, Long> of named markers that correspond to start times. Calling mark() performs a put on the underlying timing store. Output includes the mark name, elapsed time in nanoseconds, as well as any custom messaging you provide. A sample log would look like the following:</p> + +<div class="source"> +<div class="source"> +<pre>[DEBUG] markName=execute,time(ns)=121411,message=key=7a8dbe44-4cb9-4db2-9d04-7632f543b56c, elapsed time to run execute +</pre></div></div> +<p><b>Configuration</b></p> +<p>The first argument to the logger is a java.util.function.Supplier<Map<String, Object>>. The offers flexibility in being able to provide multiple configuration “suppliers” depending on your individual usage requirements. The example above, taken from org.apache.metron.enrichment.bolt.GenericEnrichmentBolt, leverages the global config to dymanically provide configuration from Zookeeper. Any updates to the global config via Zookeeper are reflected live at runtime. Currently, the PerformanceLogger supports the following options:</p> + +<table border="0" class="table table-striped"> + <thead> + +<tr class="a"> + +<th>Property Name </th> + +<th>Type </th> + +<th>Valid Values </th> + </tr> + </thead> + <tbody> + +<tr class="b"> + +<td>performance.logging.percent.records </td> + +<td>Integer </td> + +<td>0-100 </td> + </tr> + </tbody> +</table> +<p><b>Other Usage Details</b></p> +<p>You can also provide your own format String and provide arguments that will be used when formatting that String. This code avoids expensive String concatenation by only formatting when debugging is enabled. For more complex arguments, e.g. JSON serialization, we expose an isDebugEnabled() method.</p> + +<div class="source"> +<div class="source"> +<pre>// log with format String and single argument +perfLog.log("join-message", "key={}, elapsed time to join messages", key); + +// check if debugging is enabled for the performance logger to avoid more expensive operations +if (perfLog.isDebugEnabled()) { + perfLog.log("join-message", "key={}, elapsed time to join messages, message={}", key, rawMessage.toJSONString()); +} +</pre></div></div> +<p><b>Side Effects</b></p> +<p>Calling the mark() method multiple times simply resets the start time to the current nano time. Calling log() with a non-existent mark name will log 0 ns elapsed time with a warning indicating that log has been invoked for a mark name that does not exist. The class is not thread-safe and makes no attempt at keeping multiple threads from modifying the same markers.</p></div></div> + </div> + </div> + </div> + + <hr/> + + <footer> + <div class="container-fluid"> + <div class="row span12">Copyright © 2017 + <a href="https://www.apache.org">The Apache Software Foundation</a>. + All Rights Reserved. + + </div> + + + + </div> + </footer> + </body> +</html>
Added: release/metron/0.4.2/site-book/metron-platform/metron-data-management/index.html ============================================================================== --- release/metron/0.4.2/site-book/metron-platform/metron-data-management/index.html (added) +++ release/metron/0.4.2/site-book/metron-platform/metron-data-management/index.html Wed Jan 3 18:25:57 2018 @@ -0,0 +1,1069 @@ +<!DOCTYPE html> +<!-- + | Generated by Apache Maven Doxia at 2017-12-08 + | Rendered using Apache Maven Fluido Skin 1.3.0 +--> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <meta charset="UTF-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="Date-Revision-yyyymmdd" content="20171208" /> + <meta http-equiv="Content-Language" content="en" /> + <title>Metron – Resource Data Management</title> + <link rel="stylesheet" href="../../css/apache-maven-fluido-1.3.0.min.css" /> + <link rel="stylesheet" href="../../css/site.css" /> + <link rel="stylesheet" href="../../css/print.css" media="print" /> + + + <script type="text/javascript" src="../../js/apache-maven-fluido-1.3.0.min.js"></script> + + + +<script type="text/javascript">$( document ).ready( function() { $( '.carousel' ).carousel( { interval: 3500 } ) } );</script> + + </head> + <body class="topBarDisabled"> + + + + + <div class="container-fluid"> + <div id="banner"> + <div class="pull-left"> + <a href="http://metron.apache.org/" id="bannerLeft"> + <img src="../../images/metron-logo.png" alt="Apache Metron" width="148px" height="48px"/> + </a> + </div> + <div class="pull-right"> </div> + <div class="clear"><hr/></div> + </div> + + <div id="breadcrumbs"> + <ul class="breadcrumb"> + + + <li class=""> + <a href="http://www.apache.org" class="externalLink" title="Apache"> + Apache</a> + </li> + <li class="divider ">/</li> + <li class=""> + <a href="http://metron.apache.org/" class="externalLink" title="Metron"> + Metron</a> + </li> + <li class="divider ">/</li> + <li class=""> + <a href="../../index.html" title="Documentation"> + Documentation</a> + </li> + <li class="divider ">/</li> + <li class="">Resource Data Management</li> + + + + <li id="publishDate" class="pull-right">Last Published: 2017-12-08</li> <li class="divider pull-right">|</li> + <li id="projectVersion" class="pull-right">Version: 0.4.2</li> + + </ul> + </div> + + + <div class="row-fluid"> + <div id="leftColumn" class="span3"> + <div class="well sidebar-nav"> + + + <ul class="nav nav-list"> + <li class="nav-header">User Documentation</li> + + <li> + + <a href="../../index.html" title="Metron"> + <i class="icon-chevron-down"></i> + Metron</a> + <ul class="nav nav-list"> + + <li> + + <a href="../../Upgrading.html" title="Upgrading"> + <i class="none"></i> + Upgrading</a> + </li> + + <li> + + <a href="../../metron-analytics/index.html" title="Analytics"> + <i class="icon-chevron-right"></i> + Analytics</a> + </li> + + <li> + + <a href="../../metron-contrib/metron-docker/index.html" title="Docker"> + <i class="none"></i> + Docker</a> + </li> + + <li> + + <a href="../../metron-deployment/index.html" title="Deployment"> + <i class="icon-chevron-right"></i> + Deployment</a> + </li> + + <li> + + <a href="../../metron-interface/metron-alerts/index.html" title="Alerts"> + <i class="none"></i> + Alerts</a> + </li> + + <li> + + <a href="../../metron-interface/metron-config/index.html" title="Config"> + <i class="none"></i> + Config</a> + </li> + + <li> + + <a href="../../metron-interface/metron-rest/index.html" title="Rest"> + <i class="none"></i> + Rest</a> + </li> + + <li> + + <a href="../../metron-platform/index.html" title="Platform"> + <i class="icon-chevron-down"></i> + Platform</a> + <ul class="nav nav-list"> + + <li> + + <a href="../../metron-platform/Performance-tuning-guide.html" title="Performance-tuning-guide"> + <i class="none"></i> + Performance-tuning-guide</a> + </li> + + <li> + + <a href="../../metron-platform/metron-api/index.html" title="Api"> + <i class="none"></i> + Api</a> + </li> + + <li> + + <a href="../../metron-platform/metron-common/index.html" title="Common"> + <i class="none"></i> + Common</a> + </li> + + <li class="active"> + + <a href="#"><i class="none"></i>Data-management</a> + </li> + + <li> + + <a href="../../metron-platform/metron-elasticsearch/index.html" title="Elasticsearch"> + <i class="none"></i> + Elasticsearch</a> + </li> + + <li> + + <a href="../../metron-platform/metron-enrichment/index.html" title="Enrichment"> + <i class="none"></i> + Enrichment</a> + </li> + + <li> + + <a href="../../metron-platform/metron-indexing/index.html" title="Indexing"> + <i class="none"></i> + Indexing</a> + </li> + + <li> + + <a href="../../metron-platform/metron-management/index.html" title="Management"> + <i class="none"></i> + Management</a> + </li> + + <li> + + <a href="../../metron-platform/metron-parsers/index.html" title="Parsers"> + <i class="icon-chevron-right"></i> + Parsers</a> + </li> + + <li> + + <a href="../../metron-platform/metron-pcap-backend/index.html" title="Pcap-backend"> + <i class="none"></i> + Pcap-backend</a> + </li> + + <li> + + <a href="../../metron-platform/metron-writer/index.html" title="Writer"> + <i class="none"></i> + Writer</a> + </li> + </ul> + </li> + + <li> + + <a href="../../metron-sensors/index.html" title="Sensors"> + <i class="icon-chevron-right"></i> + Sensors</a> + </li> + + <li> + + <a href="../../metron-stellar/stellar-3rd-party-example/index.html" title="Stellar-3rd-party-example"> + <i class="none"></i> + Stellar-3rd-party-example</a> + </li> + + <li> + + <a href="../../metron-stellar/stellar-common/index.html" title="Stellar-common"> + <i class="icon-chevron-right"></i> + Stellar-common</a> + </li> + + <li> + + <a href="../../use-cases/index.html" title="Use-cases"> + <i class="icon-chevron-right"></i> + Use-cases</a> + </li> + </ul> + </li> + </ul> + + + + <hr class="divider" /> + + <div id="poweredBy"> + <div class="clear"></div> + <div class="clear"></div> + <div class="clear"></div> + <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy"> + <img class="builtBy" alt="Built by Maven" src="../../images/logos/maven-feather.png" /> + </a> + </div> + </div> + </div> + + + <div id="bodyColumn" class="span9" > + + <h1>Resource Data Management</h1> +<p><a name="Resource_Data_Management"></a></p> +<p>This project is a collection of classes to assist with loading of various enrichment and threat intelligence sources into Metron.</p> +<div class="section"> +<h2><a name="Simple_HBase_EnrichmentsThreat_Intelligence"></a>Simple HBase Enrichments/Threat Intelligence</h2> +<p>The vast majority of enrichments and threat intelligence processing tend toward the following pattern:</p> + +<ul> + +<li>Take a field</li> + +<li>Look up the field in a key/value store</li> + +<li>If the key exists, then either it’s a threat to be alerted or it should be enriched with the value associated with the key.</li> +</ul> +<p>As such, we have created this capability as a default threat intel and enrichment adapter. The basic primitive for simple enrichments and threat intelligence sources is a complex key containing the following:</p> + +<ul> + +<li>Type : The type of threat intel or enrichment (e.g. malicious_ip)</li> + +<li>Indicator : The indicator in question</li> + +<li>Value : The value to associate with the type, indicator pair. This is a JSON map.</li> +</ul> +<p>At present, all of the dataloads utilities function by converting raw data sources to this primitive key (type, indicator) and value to be placed in HBase.</p> +<p>In the case of threat intel, a hit on the threat intel table will result in:</p> + +<ul> + +<li>The <tt>is_alert</tt> field being set to <tt>true</tt> in the index</li> + +<li>A field named <tt>threatintels.hbaseThreatIntel.$field.$threatintel_type</tt> is set to <tt>alert</tt> + +<ul> + +<li><tt>$field</tt> is the field in the original document that was a match (e.g. <tt>src_ip_addr</tt>)</li> + +<li><tt>$threatintel_type</tt> is the type of threat intel imported (defined in the Extractor configuration below).</li> + </ul></li> +</ul> +<p>In the case of simple hbase enrichment, a hit on the enrichments table will result in the following new field for each key in the value:<tt>enrichments.hbaseEnrichment.$field.$enrichment_type.$key</tt> </p> + +<ul> + +<li><tt>$field</tt> is the field in the original document that was a match (e.g. <tt>src_ip_addr</tt>)</li> + +<li><tt>$enrichment_type</tt> is the type of enrichment imported (defined in the Extractor configuration below).</li> + +<li><tt>$key</tt> is a key in the JSON map associated with the row in HBase.</li> +</ul> +<p>For instance, in the situation where we had the following very silly key/value in HBase in the enrichment table:</p> + +<ul> + +<li>indicator: <tt>127.0.0.1</tt></li> + +<li>type : <tt>important_addresses</tt></li> + +<li>value: <tt>{ "name" : "localhost", "location" : "home" }</tt></li> +</ul> +<p>If we had a document whose <tt>ip_src_addr</tt> came through with a value of <tt>127.0.0.1</tt>, we would have the following fields added to the indexed document:</p> + +<ul> + +<li><tt>enrichments.hbaseEnrichment.ip_src_addr.important_addresses.name</tt> : <tt>localhost</tt></li> + +<li><tt>enrichments.hbaseEnrichment.ip_src_addr.important_addresses.location</tt> : <tt>home</tt></li> +</ul></div> +<div class="section"> +<h2><a name="Extractor_Framework"></a>Extractor Framework</h2> +<p>For the purpose of ingesting data of a variety of formats, we have created an Extractor framework which allows for common data formats to be interpreted as enrichment or threat intelligence sources. The formats supported at present are:</p> + +<ul> + +<li>CSV (both threat intel and enrichment)</li> + +<li>STIX (threat intel only)</li> + +<li>Custom (pass your own class)</li> +</ul> +<p>All of the current utilities take a JSON file to configure how to interpret input data. This JSON describes the type of data and the schema if necessary for the data if it is not fixed (as in STIX, e.g.).</p> +<div class="section"> +<h3><a name="CSV_Extractor"></a>CSV Extractor</h3> +<p>Consider the following example configuration file which describes how to process a CSV file.</p> + +<div class="source"> +<div class="source"> +<pre>{ + "config" : { + "columns" : { + "ip" : 0 + ,"source" : 2 + } + ,"indicator_column" : "ip" + ,"type" : "malicious_ip" + ,"separator" : "," + } + ,"extractor" : "CSV" +} +</pre></div></div> +<p>In this example, we have instructed the extractor of the schema (i.e. the columns field), two columns at the first and third position. We have indicated that the <tt>ip</tt> column is the indicator type and that the enrichment type is named <tt>malicious_ip</tt>. We have also indicated that the extractor to use is the CSV Extractor. The other option is the STIX extractor or a fully qualified classname for your own extractor.</p> +<p>The meta column values will show up in the value in HBase because it is called out as a non-indicator column. The key for the value will be ‘meta’. For instance, given an input string of <tt>123.45.123.12,something,the grapevine</tt>, the following key, value would be extracted:</p> + +<ul> + +<li>Indicator : <tt>123.45.123.12</tt></li> + +<li>Type : <tt>malicious_ip</tt></li> + +<li>Value : <tt>{ "ip" : "123.45.123.12", "source" : "the grapevine" }</tt></li> +</ul></div> +<div class="section"> +<h3><a name="STIX_Extractor"></a>STIX Extractor</h3> +<p>Consider the following config for importing STIX documents. This is a threat intelligence interchange format, so it is particularly relevant and attractive data to import for our purposes. Because STIX is a standard format, there is no need to specify the schema or how to interpret the documents.</p> +<p>We support the versions of Stix and Cybox supported by <a class="externalLink" href="https://github.com/STIXProject/java-stix/tree/v1.2.0.2">java-stix</a>:</p> + +<ul> + +<li>Stix - <a class="externalLink" href="https://github.com/STIXProject/schemas/blob/356cc4f6b06625465f0808388eb166807313b4e0/stix_core.xsd">1.2</a> and earlier</li> + +<li>Cybox - <a class="externalLink" href="https://github.com/CybOXProject/schemas/blob/97beb32c376a9223e91b52cb3e4c8d2af6baf786/cybox_core.xsd">2.1</a> and earlier</li> +</ul> +<p>We support a subset of STIX messages for importation:</p> + +<table border="0" class="table table-striped"> + <thead> + +<tr class="a"> + +<th>STIX Type </th> + +<th>Specific Type </th> + +<th>Enrichment Type Name </th> + </tr> + </thead> + <tbody> + +<tr class="b"> + +<td>Address </td> + +<td>IPV_4_ADDR </td> + +<td>address:IPV_4_ADDR </td> + </tr> + +<tr class="a"> + +<td>Address </td> + +<td>IPV_6_ADDR </td> + +<td>address:IPV_6_ADDR </td> + </tr> + +<tr class="b"> + +<td>Address </td> + +<td>E_MAIL </td> + +<td>address:E_MAIL </td> + </tr> + +<tr class="a"> + +<td>Address </td> + +<td>MAC </td> + +<td>address:MAC </td> + </tr> + +<tr class="b"> + +<td>Domain </td> + +<td>FQDN </td> + +<td>domain:FQDN </td> + </tr> + +<tr class="a"> + +<td>Hostname </td> + +<td> </td> + +<td>hostname </td> + </tr> + +<tr class="b"> + +<td>URI </td> + +<td> </td> + +<td>uriobjecttype </td> + </tr> + </tbody> +</table> +<p>NOTE: The enrichment type will be used as the type above.</p> +<p>Consider the following configuration for an Extractor</p> + +<div class="source"> +<div class="source"> +<pre>{ + "config" : { + "stix_address_categories" : "IPV_4_ADDR" + } + ,"extractor" : "STIX" +} +</pre></div></div> +<p>In here, we’re configuring the STIX extractor to load from a series of STIX files, however we only want to bring in IPv4 addresses from the set of all possible addresses. Note that if no categories are specified for import, all are assumed. Also, only address and domain types allow filtering via <tt>stix_address_categories</tt> and <tt>stix_domain_categories</tt> config parameters.</p></div> +<div class="section"> +<h3><a name="Common_Extractor_Properties"></a>Common Extractor Properties</h3> +<p>Users also have the ability to transform and filter enrichment and threat intel data using Stellar as it is loaded into HBase. This feature is available to all extractor types.</p> +<p>As an example, we will be providing a CSV list of top domains as an enrichment and filtering the value metadata, as well as the indicator column, with Stellar expressions.</p> + +<div class="source"> +<div class="source"> +<pre>{ + "config" : { + "zk_quorum" : "node1:2181", + "columns" : { + "rank" : 0, + "domain" : 1 + }, + "value_transform" : { + "domain" : "DOMAIN_REMOVE_TLD(domain)" + }, + "value_filter" : "LENGTH(domain) > 0", + "indicator_column" : "domain", + "indicator_transform" : { + "indicator" : "DOMAIN_REMOVE_TLD(indicator)" + }, + "indicator_filter" : "LENGTH(indicator) > 0", + "type" : "top_domains", + "separator" : "," + }, + "extractor" : "CSV" +} +</pre></div></div> +<p>There are 2 property maps that work with full Stellar expressions, and 2 properties that will work with Stellar predicates.</p> + +<table border="0" class="table table-striped"> + <thead> + +<tr class="a"> + +<th>Property </th> + +<th>Description</th> + </tr> + </thead> + <tbody> + +<tr class="b"> + +<td>value_transform </td> + +<td>Transform fields defined in the “columns” mapping with Stellar transformations. New keys introduced in the transform will be added to the key metadata.</td> + </tr> + +<tr class="a"> + +<td>value_filter </td> + +<td>Allows additional filtering with Stellar predicates based on results from the value transformations. In this example, records whose domain property is empty after removing the TLD will be omitted.</td> + </tr> + +<tr class="b"> + +<td>indicator_transform </td> + +<td>Transform the indicator column independent of the value transformations. You can refer to the original indicator value by using “indicator” as the variable name, as shown in the example above. In addition, if you prefer to piggyback your transformations, you can refer to the variable “domain”, which will allow your indicator transforms to inherit transformations done to this value during the value transformations.</td> + </tr> + +<tr class="a"> + +<td>indicator_filter </td> + +<td>Allows additional filtering with Stellar predicates based on results from the value transformations. In this example, records whose indicator value is empty after removing the TLD will be omitted.</td> + </tr> + </tbody> +</table> +<p>top-list.csv</p> + +<div class="source"> +<div class="source"> +<pre>1,google.com +2,youtube.com +... +</pre></div></div> +<p>Running a file import with the above data and extractor configuration would result in the following 2 extracted data records:</p> + +<table border="0" class="table table-striped"> + <thead> + +<tr class="a"> + +<th>Indicator </th> + +<th>Type </th> + +<th>Value </th> + </tr> + </thead> + <tbody> + +<tr class="b"> + +<td>google </td> + +<td>top_domains </td> + +<td>{ “rank” : “1”, “domain” : “google” } </td> + </tr> + +<tr class="a"> + +<td>yahoo </td> + +<td>top_domains </td> + +<td>{ “rank” : “2”, “domain” : “yahoo” } </td> + </tr> + </tbody> +</table> +<p>Similar to the parser framework, providing a Zookeeper quorum via the zk_quorum property will enable Stellar to access properties that reside in the global config. Expanding on our example above, if the global config looks as follows:</p> + +<div class="source"> +<div class="source"> +<pre>{ + "global_property" : "metron-ftw" +} +</pre></div></div> +<p>And we expand our value_tranform:</p> + +<div class="source"> +<div class="source"> +<pre>... + "value_transform" : { + "domain" : "DOMAIN_REMOVE_TLD(domain)", + "a-new-prop" : "global_property" + }, +... +</pre></div></div> +<p>The resulting value data would look like the following:</p> + +<table border="0" class="table table-striped"> + <thead> + +<tr class="a"> + +<th>Indicator </th> + +<th>Type </th> + +<th>Value </th> + </tr> + </thead> + <tbody> + +<tr class="b"> + +<td>google </td> + +<td>top_domains </td> + +<td>{ “rank” : “1”, “domain” : “google”, “a-new-prop” : “metron-ftw” } </td> + </tr> + +<tr class="a"> + +<td>yahoo </td> + +<td>top_domains </td> + +<td>{ “rank” : “2”, “domain” : “yahoo”, “a-new-prop” : “metron-ftw” } </td> + </tr> + </tbody> +</table></div></div> +<div class="section"> +<h2><a name="Enrichment_Config"></a>Enrichment Config</h2> +<p>In order to automatically add new enrichment and threat intel types to existing, running enrichment topologies, you will need to add new fields and new types to the zookeeper configuration. A convenience parameter has been made to assist in this when doing an import. Namely, you can specify the enrichment configs and how they associate with the fields of the documents flowing through the enrichment topology.</p> +<p>Consider the following Enrichment Configuration JSON. This one is for a threat intelligence type:</p> + +<div class="source"> +<div class="source"> +<pre>{ + "zkQuorum" : "localhost:2181" + ,"sensorToFieldList" : { + "bro" : { + "type" : "THREAT_INTEL" + ,"fieldToEnrichmentTypes" : { + "ip_src_addr" : [ "malicious_ip" ] + ,"ip_dst_addr" : [ "malicious_ip" ] + } + } + } +} +</pre></div></div> +<p>We have to specify the following:</p> + +<ul> + +<li>The zookeeper quorum which holds the cluster configuration</li> + +<li>The mapping between the fields in the enriched documents and the enrichment types.</li> +</ul> +<p>This configuration allows the ingestion tools to update zookeeper post-ingestion so that the enrichment topology can take advantage immediately of the new type.</p></div> +<div class="section"> +<h2><a name="Loading_Utilities"></a>Loading Utilities</h2> +<p>The two configurations above are used in the three separate ingestion tools:</p> + +<ul> + +<li>Taxii Loader</li> + +<li>Bulk load from HDFS via MapReduce</li> + +<li>Flat File ingestion</li> +</ul> +<div class="section"> +<h3><a name="Taxii_Loader"></a>Taxii Loader</h3> +<p>The shell script <tt>$METRON_HOME/bin/threatintel_taxii_load.sh</tt> can be used to poll a Taxii server for STIX documents and ingest them into HBase.<br />It is quite common for this Taxii server to be an aggregation server such as Soltra Edge.</p> +<p>In addition to the Enrichment and Extractor configs described above, this loader requires a configuration file describing the connection information to the Taxii server. An illustrative example of such a configuration file is:</p> + +<div class="source"> +<div class="source"> +<pre>{ + "endpoint" : "http://localhost:8282/taxii-discovery-service" + ,"type" : "DISCOVER" + ,"collection" : "guest.Abuse_ch" + ,"table" : "threat_intel" + ,"columnFamily" : "cf" + ,"allowedIndicatorTypes" : [ "domainname:FQDN", "address:IPV_4_ADDR" ] +} +</pre></div></div> +<p>As you can see, we are specifying the following information:</p> + +<ul> + +<li>endpoint : The URL of the endpoint</li> + +<li>type : <tt>POLL</tt> or <tt>DISCOVER</tt> depending on the endpoint.</li> + +<li>collection : The Taxii collection to ingest</li> + +<li>table : The HBase table to import into</li> + +<li>columnFamily : The column family to import into</li> + +<li>allowedIndicatorTypes : an array of acceptable threat intel types (see the “Enrichment Type Name” column of the Stix table above for the possibilities).</li> +</ul> +<p>The parameters for the utility are as follows:</p> + +<table border="0" class="table table-striped"> + <thead> + +<tr class="a"> + +<th>Short Code </th> + +<th>Long Code </th> + +<th>Is Required? </th> + +<th>Description </th> + </tr> + </thead> + <tbody> + +<tr class="b"> + +<td>-h </td> + +<td> </td> + +<td>No </td> + +<td>Generate the help screen/set of options </td> + </tr> + +<tr class="a"> + +<td>-e </td> + +<td>–extractor_config </td> + +<td>Yes </td> + +<td>JSON Document describing the extractor for this input data source </td> + </tr> + +<tr class="b"> + +<td>-c </td> + +<td>–taxii_connection_config </td> + +<td>Yes </td> + +<td>The JSON config file to configure the connection </td> + </tr> + +<tr class="a"> + +<td>-p </td> + +<td>–time_between_polls </td> + +<td>No </td> + +<td>The time between polling the Taxii server in milliseconds. (default: 1 hour) </td> + </tr> + +<tr class="b"> + +<td>-b </td> + +<td>–begin_time </td> + +<td>No </td> + +<td>Start time to poll the Taxii server (all data from that point will be gathered in the first pull). The format for the date is yyyy-MM-dd HH:mm:ss </td> + </tr> + +<tr class="a"> + +<td>-l </td> + +<td>–log4j </td> + +<td>No </td> + +<td>The Log4j Properties to load </td> + </tr> + +<tr class="b"> + +<td>-n </td> + +<td>–enrichment_config </td> + +<td>No </td> + +<td>The JSON document describing the enrichments to configure. Unlike other loaders, this is run first if specified. </td> + </tr> + </tbody> +</table></div> +<div class="section"> +<h3><a name="Flatfile_Loader"></a>Flatfile Loader</h3> +<p>The shell script <tt>$METRON_HOME/bin/flatfile_loader.sh</tt> will read data from local disk, HDFS or URLs and load the enrichment or threat intel data into an HBase table.<br />Note: This utility works for enrichment as well as threat intel due to the underlying infrastructure being the same.</p> +<p>One special thing to note here is that there is a special configuration parameter to the Extractor config that is only considered during this loader:</p> + +<ul> + +<li>inputFormat : This specifies how to consider the data. The two implementations are <tt>BY_LINE</tt> and <tt>WHOLE_FILE</tt>.</li> +</ul> +<p>The default is <tt>BY_LINE</tt>, which makes sense for a list of CSVs where each line indicates a unit of information which can be imported. However, if you are importing a set of STIX documents, then you want each document to be considered as input to the Extractor.</p> +<p>The parameters for the utility are as follows:</p> + +<table border="0" class="table table-striped"> + <thead> + +<tr class="a"> + +<th>Short Code </th> + +<th>Long Code </th> + +<th>Is Required? </th> + +<th>Description </th> + </tr> + </thead> + <tbody> + +<tr class="b"> + +<td>-h </td> + +<td> </td> + +<td>No </td> + +<td>Generate the help screen/set of options </td> + </tr> + +<tr class="a"> + +<td>-q </td> + +<td>–quiet </td> + +<td>No </td> + +<td>Do not update progress </td> + </tr> + +<tr class="b"> + +<td>-e </td> + +<td>–extractor_config </td> + +<td>Yes </td> + +<td>JSON Document describing the extractor for this input data source </td> + </tr> + +<tr class="a"> + +<td>-m </td> + +<td>–import_mode </td> + +<td>No </td> + +<td>The Import mode to use: LOCAL, MR. Default: LOCAL </td> + </tr> + +<tr class="b"> + +<td>-t </td> + +<td>–hbase_table </td> + +<td>Yes </td> + +<td>The HBase table to import into </td> + </tr> + +<tr class="a"> + +<td>-c </td> + +<td>–hbase_cf </td> + +<td>Yes </td> + +<td>The HBase table column family to import into </td> + </tr> + +<tr class="b"> + +<td>-i </td> + +<td>–input </td> + +<td>Yes </td> + +<td>The input data location on local disk. If this is a file, then that file will be loaded. If this is a directory, then the files will be loaded recursively under that directory. </td> + </tr> + +<tr class="a"> + +<td>-l </td> + +<td>–log4j </td> + +<td>No </td> + +<td>The log4j properties file to load </td> + </tr> + +<tr class="b"> + +<td>-n </td> + +<td>–enrichment_config </td> + +<td>No </td> + +<td>The JSON document describing the enrichments to configure. Unlike other loaders, this is run first if specified. </td> + </tr> + +<tr class="a"> + +<td>-p </td> + +<td>–threads </td> + +<td>No </td> + +<td>The number of threads to use when extracting data. The default is the number of cores. </td> + </tr> + +<tr class="b"> + +<td>-b </td> + +<td>–batchSize </td> + +<td>No </td> + +<td>The batch size to use for HBase puts </td> + </tr> + </tbody> +</table></div> +<div class="section"> +<h3><a name="GeoLite2_Loader"></a>GeoLite2 Loader</h3> +<p>The shell script <tt>$METRON_HOME/bin/geo_enrichment_load.sh</tt> will retrieve MaxMind GeoLite2 data and load data into HDFS, and update the configuration.</p> +<p>THIS SCRIPT WILL NOT UPDATE AMBARI’S GLOBAL.JSON, JUST THE ZK CONFIGS. CHANGES WILL GO INTO EFFECT, BUT WILL NOT PERSIST PAST AN AMBARI RESTART UNTIL UPDATED THERE.</p> +<p>The parameters for the utility are as follows:</p> + +<table border="0" class="table table-striped"> + <thead> + +<tr class="a"> + +<th>Short Code </th> + +<th>Long Code </th> + +<th>Is Required? </th> + +<th>Description </th> + </tr> + </thead> + <tbody> + +<tr class="b"> + +<td>-h </td> + +<td> </td> + +<td>No </td> + +<td>Generate the help screen/set of options </td> + </tr> + +<tr class="a"> + +<td>-g </td> + +<td>–geo_url </td> + +<td>No </td> + +<td>GeoIP URL - defaults to <a class="externalLink" href="http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz">http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz</a> </td> + </tr> + +<tr class="b"> + +<td>-r </td> + +<td>–remote_dir </td> + +<td>No </td> + +<td>HDFS directory to land formatted GeoIP file - defaults to /apps/metron/geo/<epoch millis>/ </td> + </tr> + +<tr class="a"> + +<td>-t </td> + +<td>–tmp_dir </td> + +<td>No </td> + +<td>Directory for landing the temporary GeoIP data - defaults to /tmp </td> + </tr> + +<tr class="b"> + +<td>-z </td> + +<td>–zk_quorum </td> + +<td>Yes </td> + +<td>Zookeeper Quorum URL (zk1:port,zk2:port,…) </td> + </tr> + </tbody> +</table></div></div> + </div> + </div> + </div> + + <hr/> + + <footer> + <div class="container-fluid"> + <div class="row span12">Copyright © 2017 + <a href="https://www.apache.org">The Apache Software Foundation</a>. + All Rights Reserved. + + </div> + + + + </div> + </footer> + </body> +</html>
