http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/operators/images/filesplitter/baseexample.png ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/operators/images/filesplitter/baseexample.png b/content/docs/malhar-3.5/operators/images/filesplitter/baseexample.png new file mode 100644 index 0000000..6af2b44 Binary files /dev/null and b/content/docs/malhar-3.5/operators/images/filesplitter/baseexample.png differ
http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/operators/images/filesplitter/classdiagram.png ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/operators/images/filesplitter/classdiagram.png b/content/docs/malhar-3.5/operators/images/filesplitter/classdiagram.png new file mode 100644 index 0000000..6490368 Binary files /dev/null and b/content/docs/malhar-3.5/operators/images/filesplitter/classdiagram.png differ http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/operators/images/filesplitter/inputexample.png ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/operators/images/filesplitter/inputexample.png b/content/docs/malhar-3.5/operators/images/filesplitter/inputexample.png new file mode 100644 index 0000000..65e199f Binary files /dev/null and b/content/docs/malhar-3.5/operators/images/filesplitter/inputexample.png differ http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/operators/images/filesplitter/sequence.png ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/operators/images/filesplitter/sequence.png b/content/docs/malhar-3.5/operators/images/filesplitter/sequence.png new file mode 100644 index 0000000..85cf702 Binary files /dev/null and b/content/docs/malhar-3.5/operators/images/filesplitter/sequence.png differ http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/operators/images/kafkainput/image00.png ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/operators/images/kafkainput/image00.png b/content/docs/malhar-3.5/operators/images/kafkainput/image00.png new file mode 100644 index 0000000..0fa00e8 Binary files /dev/null and b/content/docs/malhar-3.5/operators/images/kafkainput/image00.png differ http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/operators/kafkaInputOperator/index.html ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/operators/kafkaInputOperator/index.html b/content/docs/malhar-3.5/operators/kafkaInputOperator/index.html new file mode 100644 index 0000000..d35d4c8 --- /dev/null +++ b/content/docs/malhar-3.5/operators/kafkaInputOperator/index.html @@ -0,0 +1,462 @@ +<!DOCTYPE html> +<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> +<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> +<head> + <meta charset="utf-8"> + <meta http-equiv="X-UA-Compatible" content="IE=edge"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + + + <title>Kafka Input - Apache Apex Malhar Documentation</title> + + + <link rel="shortcut icon" href="../../favicon.ico"> + + + + <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'> + + <link rel="stylesheet" href="../../css/theme.css" type="text/css" /> + <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" /> + <link rel="stylesheet" href="../../css/highlight.css"> + + + <script> + // Current page data + var mkdocs_page_name = "Kafka Input"; + var mkdocs_page_input_path = "operators/kafkaInputOperator.md"; + var mkdocs_page_url = "/operators/kafkaInputOperator/"; + </script> + + <script src="../../js/jquery-2.1.1.min.js"></script> + <script src="../../js/modernizr-2.8.3.min.js"></script> + <script type="text/javascript" src="../../js/highlight.pack.js"></script> + <script src="../../js/theme.js"></script> + + +</head> + +<body class="wy-body-for-nav" role="document"> + + <div class="wy-grid-for-nav"> + + + <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav"> + <div class="wy-side-nav-search"> + <a href="../.." class="icon icon-home"> Apache Apex Malhar Documentation</a> + <div role="search"> + <form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get"> + <input type="text" name="q" placeholder="Search docs" /> + </form> +</div> + </div> + + <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> + <ul class="current"> + + <li> + <li class="toctree-l1 "> + <a class="" href="../..">Apache Apex Malhar</a> + + </li> +<li> + + <li> + <ul class="subnav"> + <li><span>Operators</span></li> + + + + <li class="toctree-l1 current"> + <a class="current" href="./">Kafka Input</a> + + <ul> + + <li class="toctree-l3"><a href="#kafka-input-operator">KAFKA INPUT OPERATOR</a></li> + + <li><a class="toctree-l4" href="#introduction-about-kafka-input-operator">Introduction: About Kafka Input Operator</a></li> + + <li><a class="toctree-l4" href="#why-is-it-needed">Why is it needed ?</a></li> + + <li><a class="toctree-l4" href="#abstractkafkainputoperator">AbstractKafkaInputOperator</a></li> + + <li><a class="toctree-l4" href="#kafkaconsumer">KafkaConsumer</a></li> + + <li><a class="toctree-l4" href="#pre-requisites">Pre-requisites</a></li> + + <li><a class="toctree-l4" href="#offsetmanager">OffsetManager</a></li> + + <li><a class="toctree-l4" href="#partitioning">Partitioning</a></li> + + <li><a class="toctree-l4" href="#abstractsingleportkafkainputoperator">AbstractSinglePortKafkaInputOperator</a></li> + + <li><a class="toctree-l4" href="#concrete-classes">Concrete Classes</a></li> + + <li><a class="toctree-l4" href="#application-example">Application Example</a></li> + + + </ul> + + </li> + + + + <li class="toctree-l1 "> + <a class="" href="../file_splitter/">File Splitter</a> + + </li> + + + + <li class="toctree-l1 "> + <a class="" href="../block_reader/">Block Reader</a> + + </li> + + + + <li class="toctree-l1 "> + <a class="" href="../file_output/">File Output</a> + + </li> + + + + <li class="toctree-l1 "> + <a class="" href="../enricher/">Enricher</a> + + </li> + + + </ul> +<li> + + </ul> + </div> + + </nav> + + <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> + + + <nav class="wy-nav-top" role="navigation" aria-label="top navigation"> + <i data-toggle="wy-nav-top" class="fa fa-bars"></i> + <a href="../..">Apache Apex Malhar Documentation</a> + </nav> + + + <div class="wy-nav-content"> + <div class="rst-content"> + <div role="navigation" aria-label="breadcrumbs navigation"> + <ul class="wy-breadcrumbs"> + <li><a href="../..">Docs</a> »</li> + + + + <li>Operators »</li> + + + + <li>Kafka Input</li> + <li class="wy-breadcrumbs-aside"> + + </li> + </ul> + <hr/> +</div> + <div role="main"> + <div class="section"> + + <h1 id="kafka-input-operator">KAFKA INPUT OPERATOR</h1> +<h3 id="introduction-about-kafka-input-operator">Introduction: About Kafka Input Operator</h3> +<p>This is an input operator that consumes data from Kafka messaging system for further processing in Apex. Kafka Input Operator is an fault-tolerant and scalable Malhar Operator.</p> +<h3 id="why-is-it-needed">Why is it needed ?</h3> +<p>Kafka is a pull-based and distributed publish subscribe messaging system, topics are partitioned and replicated across +nodes. Kafka input operator is needed when you want to read data from multiple +partitions of a Kafka topic in parallel in an Apex application.</p> +<h3 id="abstractkafkainputoperator">AbstractKafkaInputOperator</h3> +<p>This is the abstract implementation that serves as base class for consuming messages from Kafka messaging system. This class doesnât have any ports.</p> +<p><img alt="AbstractKafkaInput.png" src="../images/kafkainput/image00.png" /></p> +<h4 id="configuration-parameters">Configuration Parameters</h4> +<p><table> +<col width="25%" /> +<col width="75%" /> +<tbody> +<tr class="odd"> +<td align="left"><p>Parameter</p></td> +<td align="left"><p>Description</p></td> +</tr> +<tr class="even"> +<td align="left"><p>maxTuplesPerWindow</p></td> +<td align="left"><p>Controls the maximum number of messages emitted in each streaming window from this operator. Minimum value is 1. Default value = MAX_VALUE </p></td> +</tr> +<tr class="odd"> +<td align="left"><p>idempotentStorageManager</p></td> +<td align="left"><p>This is an instance of IdempotentStorageManager. Idempotency ensures that the operator will process the same set of messages in a window before and after a failure. For example, let's say the operator completed window 10 and failed somewhere between window 11. If the operator gets restored at window 10 then it will process the same messages again in window 10 which it did in the previous run before the failure. Idempotency is important but comes with higher cost because at the end of each window the operator needs to persist some state with respect to that window. Default Value = com.datatorrent.lib.io.IdempotentStorageManager.<br>NoopIdempotentStorageManager</p></td> +</tr> +<tr class="even"> +<td align="left"><p>strategy</p></td> +<td align="left"><p>Operator supports two types of partitioning strategies, ONE_TO_ONE and ONE_TO_MANY.</p> +<p>ONE_TO_ONE: If this is enabled, the AppMaster creates one input operator instance per Kafka topic partition. So the number of Kafka topic partitions equals the number of operator instances.</p> +<p>ONE_TO_MANY: The AppMaster creates K = min(initialPartitionCount, N) Kafka input operator instances where N is the number of Kafka topic partitions. If K is less than N, the remaining topic partitions are assigned to the K operator instances in round-robin fashion. If K is less than initialPartitionCount, the AppMaster creates one input operator instance per Kafka topic partition. For example, if initialPartitionCount = 5 and number of Kafka partitions(N) = 2 then AppMaster creates 2 Kafka input operator instances. +Default Value = ONE_TO_ONE</p></td> +</tr> +<tr class="odd"> +<td align="left"><p>msgRateUpperBound</p></td> +<td align="left"><p>Maximum messages upper bound. Operator repartitions when the <em>msgProcessedPS</em> exceeds this bound. <em>msgProcessedPS</em> is the average number of messages processed per second by this operator.</p></td> +</tr> +<tr class="even"> +<td align="left"><p>byteRateUpperBound</p></td> +<td align="left"><p>Maximum bytes upper bound. Operator repartitions when the <em>bytesPS</em> exceeds this bound. <em>bytesPS</em> is the average number of bytes processed per second by this operator.</p> +<p></p></td> +</tr> +<tr class="odd"> +<td align="left"><p>offsetManager</p></td> +<td align="left"><p>This is an optional parameter that is useful when the application restarts or start at specific offsets (offsets are explained below)</p></td> +</tr> +<tr class="even"> +<td align="left"><p>repartitionInterval</p></td> +<td align="left"><p>Interval specified in milliseconds. This value specifies the minimum time required between two repartition actions. Default Value = 30 Seconds</p></td> +</tr> +<tr class="odd"> +<td align="left"><p>repartitionCheckInterval</p></td> +<td align="left"><p>Interval specified in milliseconds. This value specifies the minimum interval between two offset updates. Default Value = 5 Seconds</p></td> +</tr> +<tr class="even"> +<td align="left"><p>initialPartitionCount</p></td> +<td align="left"><p>When the ONE_TO_MANY partition strategy is enabled, this value indicates the number of Kafka input operator instances. Default Value = 1</p></td> +</tr> +<tr class="odd"> +<td align="left"><p>consumer</p></td> +<td align="left"><p>This is an instance of com.datatorrent.contrib.kafka.KafkaConsumer. Default Value = Instance of SimpleKafkaConsumer.</p></td> +</tr> +</tbody> +</table></p> +<h4 id="abstract-methods">Abstract Methods</h4> +<p>void emitTuple(Message message): Abstract method that emits tuples +extracted from Kafka message.</p> +<h3 id="kafkaconsumer">KafkaConsumer</h3> +<p>This is an abstract implementation of Kafka consumer. It sends the fetch +requests to the leading brokers of Kafka partitions. For each request, +it receives the set of messages and stores them into the buffer which is +ArrayBlockingQueue. SimpleKafkaConsumer which extends +KafkaConsumer and serves the functionality of Simple Consumer API and +HighLevelKafkaConsumer which extends KafkaConsumer and  serves the +functionality of High Level Consumer API.</p> +<h3 id="pre-requisites">Pre-requisites</h3> +<p>This operator referred the Kafka Consumer API of version +0.8.1.1. So, this operator will work with any 0.8.x and 0.7.x version of Apache Kafka.</p> +<h4 id="configuration-parameters_1">Configuration Parameters</h4> +<table> +<col width="15%" /> +<col width="15%" /> +<col width="15%" /> +<col width="55%" /> +<tbody> +<tr class="odd"> +<td align="left"><p>Parameter</p></td> +<td align="left"><p>Type</p></td> +<td align="left"><p>Default</p></td> +<td align="left"><p>Description</p></td> +</tr> +<tr class="even"> +<td align="left"><p>zookeeper</p></td> +<td align="left"><p>String</p></td> +<td align="left"><p></p></td> +<td align="left"><p>Specifies the zookeeper quorum of Kafka clusters that you want to consume messages from. zookeeper  is a string in the form of hostname1:port1,hostname2:port2,hostname3:port3  where hostname1,hostname2,hostname3 are hosts and port1,port2,port3 are ports of zookeeper server.  If the topic name is the same across the Kafka clusters and want to consume data from these clusters, then configure the zookeeper as follows: c1::hs1:p1,hs2:p2,hs3:p3;c2::hs4:p4,hs5:p5,c3::hs6:p6</p> +<p>where</p> +<p>c1,c2,c3 indicates the cluster names, hs1,hs2,hs3,hs4,hs5,hs6 are zookeeper hosts and p1,p2,p3,p4,p5,p6 are corresponding ports. Here, cluster name is optional in case of single cluster</p></td> +</tr> +<tr class="odd"> +<td align="left"><p>cacheSize</p></td> +<td align="left"><p>int</p></td> +<td align="left"><p>1024</p></td> +<td align="left"><p>Maximum of buffered messages hold in memory.</p></td> +</tr> +<tr class="even"> +<td align="left"><p>topic</p></td> +<td align="left"><p>String</p></td> +<td align="left"><p>default_topic</p></td> +<td align="left"><p>Indicates the name of the topic.</p></td> +</tr> +<tr class="odd"> +<td align="left"><p>initialOffset</p></td> +<td align="left"><p>String</p></td> +<td align="left"><p>latest</p></td> +<td align="left"><p>Indicates the type of offset i.e, âearliest or latestâ. If initialOffset is âlatestâ, then the operator consumes messages from latest point of Kafka queue. If initialOffset is âearliestâ, then the operator consumes messages starting from message queue. This can be overridden by OffsetManager.</p></td> +</tr> +</tbody> +</table> + +<h4 id="abstract-methods_1">Abstract Methods</h4> +<ol> +<li>void commitOffset(): Commit the offsets at checkpoint.</li> +<li>Map <KafkaPartition, Long> getCurrentOffsets(): Return the current + offset status.</li> +<li>resetPartitionsAndOffset(Set <KafkaPartition> partitionIds, + Map <KafkaPartition, Long> startOffset): Reset the partitions with + parittionIds and offsets with startOffset.</li> +</ol> +<h4 id="configuration-parameters-for-simplekafkaconsumer">Configuration Parameters for SimpleKafkaConsumer</h4> +<table> +<col width="25%" /> +<col width="15%" /> +<col width="15%" /> +<col width="45%" /> +<tbody> +<tr class="odd"> +<td align="left"><p>Parameter</p></td> +<td align="left"><p>Type</p></td> +<td align="left"><p>Default</p></td> +<td align="left"><p>Description</p></td> +</tr> +<tr class="even"> +<td align="left"><p>bufferSize</p></td> +<td align="left"><p>int</p></td> +<td align="left"><p>1 MB</p></td> +<td align="left"><p>Specifies the maximum total size of messages for each fetch request.</p></td> +</tr> +<tr class="odd"> +<td align="left"><p>metadataRefreshInterval</p></td> +<td align="left"><p>int</p></td> +<td align="left"><p>30 Seconds</p></td> +<td align="left"><p>Interval in between refresh the metadata change(broker change) in milliseconds. Enabling metadata refresh guarantees an automatic reconnect when a new broker is elected as the host. A value of -1 disables this feature.</p></td> +</tr> +<tr class="even"> +<td align="left"><p>metadataRefreshRetryLimit</p></td> +<td align="left"><p>int</p></td> +<td align="left"><p>-1</p></td> +<td align="left"><p>Specifies the maximum brokers' metadata refresh retry limit. -1 means unlimited retry.</p></td> +</tr> +</tbody> +</table> + +<h3 id="offsetmanager">OffsetManager</h3> +<p>This is an interface for offset management and is useful when consuming data +from specified offsets. Updates the offsets for all the Kafka partitions +periodically. Below is the code snippet:        </p> +<pre><code class="java">public interface OffsetManager +{ + public Map<KafkaPartition, Long> loadInitialOffsets(); + public void updateOffsets(Map<KafkaPartition, Long> offsetsOfPartitions); +} +</code></pre> + +<h4 id="abstract-methods_2">Abstract Methods</h4> +<p>Map <KafkaPartition, Long> loadInitialOffsets(): Specifies the initial offset for consuming messages; called at the activation stage.</p> +<p>updateOffsets(Map <KafkaPartition, Long> offsetsOfPartitions):  This +method is called at every repartitionCheckInterval to update offsets.</p> +<h3 id="partitioning">Partitioning</h3> +<p>The logical instance of the KafkaInputOperator acts as the Partitioner +as well as a StatsListener. This is because the +AbstractKafkaInputOperator implements both the +com.datatorrent.api.Partitioner and com.datatorrent.api.StatsListener +interfaces and provides an implementation of definePartitions(...) and +processStats(...) which makes it auto-scalable.</p> +<h4 id="response-processstatsbatchedoperatorstats-stats">Response processStats(BatchedOperatorStats stats)</h4> +<p>The application master invokes this method on the logical instance with +the stats (tuplesProcessedPS, bytesPS, etc.) of each partition. +Re-partitioning happens based on whether any new Kafka partitions added for +the topic or bytesPS and msgPS cross their respective upper bounds.</p> +<h4 id="definepartitions">DefinePartitions</h4> +<p>Based on the repartitionRequired field of the Response object which is +returned by processStats(...) method, the application master invokes +definePartitions(...) on the logical instance which is also the +partitioner instance. Dynamic partition can be disabled by setting the +parameter repartitionInterval value to a negative value.</p> +<h3 id="abstractsingleportkafkainputoperator">AbstractSinglePortKafkaInputOperator</h3> +<p>This class extends AbstractKafkaInputOperator and having single output +port, will emit the messages through this port.</p> +<h4 id="ports">Ports</h4> +<p>outputPort <T>: Tuples extracted from Kafka messages are emitted through +this port.</p> +<h4 id="abstract-methods_3">Abstract Methods</h4> +<p>T getTuple(Message msg) : Converts the Kafka message to tuple.</p> +<h3 id="concrete-classes">Concrete Classes</h3> +<ol> +<li> +<p>KafkaSinglePortStringInputOperator : +This class extends AbstractSinglePortKafkaInputOperator and getTuple() method extracts string from Kafka message.</p> +</li> +<li> +<p>KafkaSinglePortByteArrayInputOperator: +This class extends AbstractSinglePortKafkaInputOperator and getTuple() method extracts byte array from Kafka message.</p> +</li> +</ol> +<h3 id="application-example">Application Example</h3> +<p>This section builds an Apex application using Kafka input operator. +Below is the code snippet:</p> +<pre><code class="java">@ApplicationAnnotation(name = "KafkaApp") +public class ExampleKafkaApplication implements StreamingApplication +{ +@Override +public void populateDAG(DAG dag, Configuration entries) +{ + KafkaSinglePortByteArrayInputOperator input = dag.addOperator("MessageReader", new KafkaSinglePortByteArrayInputOperator()); + + ConsoleOutputOperator output = dag.addOperator("Output", new ConsoleOutputOperator()); + + dag.addStream("MessageData", input.outputPort, output.input); +} +} +</code></pre> + +<p>Below is the configuration for âtestâ Kafka topic name and +âlocalhost:2181â is the zookeeper forum:</p> +<pre><code class="xml"><property> +<name>dt.operator.MessageReader.prop.topic</name> +<value>test</value> +</property> + +<property> +<name>dt.operator.KafkaInputOperator.prop.zookeeper</nam> +<value>localhost:2181</value> +</property> +</code></pre> + + </div> + </div> + <footer> + + <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> + + <a href="../file_splitter/" class="btn btn-neutral float-right" title="File Splitter">Next <span class="icon icon-circle-arrow-right"></span></a> + + + <a href="../.." class="btn btn-neutral" title="Apache Apex Malhar"><span class="icon icon-circle-arrow-left"></span> Previous</a> + + </div> + + + <hr/> + + <div role="contentinfo"> + <!-- Copyright etc --> + + </div> + + Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. +</footer> + + </div> + </div> + + </section> + + </div> + +<div class="rst-versions" role="note" style="cursor: pointer"> + <span class="rst-current-version" data-toggle="rst-current-version"> + + + <span><a href="../.." style="color: #fcfcfc;">« Previous</a></span> + + + <span style="margin-left: 15px"><a href="../file_splitter/" style="color: #fcfcfc">Next »</a></span> + + </span> +</div> + +</body> +</html> http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/search.html ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/search.html b/content/docs/malhar-3.5/search.html new file mode 100644 index 0000000..07c2683 --- /dev/null +++ b/content/docs/malhar-3.5/search.html @@ -0,0 +1,178 @@ +<!DOCTYPE html> +<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> +<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> +<head> + <meta charset="utf-8"> + <meta http-equiv="X-UA-Compatible" content="IE=edge"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + + + <title>Apache Apex Malhar Documentation</title> + + + <link rel="shortcut icon" href="favicon.ico"> + + + + <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'> + + <link rel="stylesheet" href="./css/theme.css" type="text/css" /> + <link rel="stylesheet" href="./css/theme_extra.css" type="text/css" /> + <link rel="stylesheet" href="./css/highlight.css"> + + + <script src="./js/jquery-2.1.1.min.js"></script> + <script src="./js/modernizr-2.8.3.min.js"></script> + <script type="text/javascript" src="./js/highlight.pack.js"></script> + <script src="./js/theme.js"></script> + <script>var base_url = '.';</script> + <script data-main="./mkdocs/js/search.js" src="./mkdocs/js/require.js"></script> + + + +</head> + +<body class="wy-body-for-nav" role="document"> + + <div class="wy-grid-for-nav"> + + + <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav"> + <div class="wy-side-nav-search"> + <a href="." class="icon icon-home"> Apache Apex Malhar Documentation</a> + <div role="search"> + <form id ="rtd-search-form" class="wy-form" action="./search.html" method="get"> + <input type="text" name="q" placeholder="Search docs" /> + </form> +</div> + </div> + + <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> + <ul class="current"> + + <li> + <li class="toctree-l1 "> + <a class="" href=".">Apache Apex Malhar</a> + + </li> +<li> + + <li> + <ul class="subnav"> + <li><span>Operators</span></li> + + + + <li class="toctree-l1 "> + <a class="" href="operators/kafkaInputOperator/">Kafka Input</a> + + </li> + + + + <li class="toctree-l1 "> + <a class="" href="operators/file_splitter/">File Splitter</a> + + </li> + + + + <li class="toctree-l1 "> + <a class="" href="operators/block_reader/">Block Reader</a> + + </li> + + + + <li class="toctree-l1 "> + <a class="" href="operators/file_output/">File Output</a> + + </li> + + + + <li class="toctree-l1 "> + <a class="" href="operators/enricher/">Enricher</a> + + </li> + + + </ul> +<li> + + </ul> + </div> + + </nav> + + <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> + + + <nav class="wy-nav-top" role="navigation" aria-label="top navigation"> + <i data-toggle="wy-nav-top" class="fa fa-bars"></i> + <a href=".">Apache Apex Malhar Documentation</a> + </nav> + + + <div class="wy-nav-content"> + <div class="rst-content"> + <div role="navigation" aria-label="breadcrumbs navigation"> + <ul class="wy-breadcrumbs"> + <li><a href=".">Docs</a> »</li> + + + <li class="wy-breadcrumbs-aside"> + + </li> + </ul> + <hr/> +</div> + <div role="main"> + <div class="section"> + + + <h1 id="search">Search Results</h1> + + <form id="content_search" action="search.html"> + <span role="status" aria-live="polite" class="ui-helper-hidden-accessible"></span> + <input name="q" id="mkdocs-search-query" type="text" class="search_input search-query ui-autocomplete-input" placeholder="Search the Docs" autocomplete="off" autofocus> + </form> + + <div id="mkdocs-search-results"> + Searching... + </div> + + + </div> + </div> + <footer> + + + <hr/> + + <div role="contentinfo"> + <!-- Copyright etc --> + + </div> + + Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. +</footer> + + </div> + </div> + + </section> + + </div> + +<div class="rst-versions" role="note" style="cursor: pointer"> + <span class="rst-current-version" data-toggle="rst-current-version"> + + + + </span> +</div> + +</body> +</html> http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/searchbox.html ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/searchbox.html b/content/docs/malhar-3.5/searchbox.html new file mode 100644 index 0000000..177fcb3 --- /dev/null +++ b/content/docs/malhar-3.5/searchbox.html @@ -0,0 +1,5 @@ +<div role="search"> + <form id ="rtd-search-form" class="wy-form" action="{{ base_url }}/search.html" method="get"> + <input type="text" name="q" placeholder="Search docs" /> + </form> +</div> http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/sitemap.xml ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/sitemap.xml b/content/docs/malhar-3.5/sitemap.xml new file mode 100644 index 0000000..ea3ee75 --- /dev/null +++ b/content/docs/malhar-3.5/sitemap.xml @@ -0,0 +1,46 @@ +<?xml version="1.0" encoding="UTF-8"?> +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> + + + <url> + <loc>/</loc> + <lastmod>2016-08-29</lastmod> + <changefreq>daily</changefreq> + </url> + + + + + <url> + <loc>/operators/kafkaInputOperator/</loc> + <lastmod>2016-08-29</lastmod> + <changefreq>daily</changefreq> + </url> + + <url> + <loc>/operators/file_splitter/</loc> + <lastmod>2016-08-29</lastmod> + <changefreq>daily</changefreq> + </url> + + <url> + <loc>/operators/block_reader/</loc> + <lastmod>2016-08-29</lastmod> + <changefreq>daily</changefreq> + </url> + + <url> + <loc>/operators/file_output/</loc> + <lastmod>2016-08-29</lastmod> + <changefreq>daily</changefreq> + </url> + + <url> + <loc>/operators/enricher/</loc> + <lastmod>2016-08-29</lastmod> + <changefreq>daily</changefreq> + </url> + + + +</urlset> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/toc.html ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/toc.html b/content/docs/malhar-3.5/toc.html new file mode 100644 index 0000000..6cd2fc9 --- /dev/null +++ b/content/docs/malhar-3.5/toc.html @@ -0,0 +1,23 @@ +{% if nav_item.children %} + <ul class="subnav"> + <li><span>{{ nav_item.title }}</span></li> + + {% for nav_item in nav_item.children %} + {% include 'toc.html' %} + {% endfor %} + </ul> +{% else %} + <li class="toctree-l1 {% if nav_item.active%}current{%endif%}"> + <a class="{% if nav_item.active%}current{%endif%}" href="{{ nav_item.url }}">{{ nav_item.title }}</a> + {% if nav_item == current_page %} + <ul> + {% for toc_item in toc %} + <li class="toctree-l3"><a href="{{ toc_item.url }}">{{ toc_item.title }}</a></li> + {% for toc_item in toc_item.children %} + <li><a class="toctree-l4" href="{{ toc_item.url }}">{{ toc_item.title }}</a></li> + {% endfor %} + {% endfor %} + </ul> + {% endif %} + </li> +{% endif %} http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/docs/malhar-3.5/versions.html ---------------------------------------------------------------------- diff --git a/content/docs/malhar-3.5/versions.html b/content/docs/malhar-3.5/versions.html new file mode 100644 index 0000000..d12d197 --- /dev/null +++ b/content/docs/malhar-3.5/versions.html @@ -0,0 +1,15 @@ +<div class="rst-versions" role="note" style="cursor: pointer"> + <span class="rst-current-version" data-toggle="rst-current-version"> + {% if repo_name == 'GitHub' %} + <a href="{{ repo_url }}" class="icon icon-github" style="float: left; color: #fcfcfc"> GitHub</a> + {% elif repo_name == 'Bitbucket' %} + <a href="{{ repo_url }}" class="icon icon-bitbucket" style="float: left; color: #fcfcfc"> BitBucket</a> + {% endif %} + {% if previous_page %} + <span><a href="{{ previous_page.url }}" style="color: #fcfcfc;">« Previous</a></span> + {% endif %} + {% if next_page %} + <span style="margin-left: 15px"><a href="{{ next_page.url }}" style="color: #fcfcfc">Next »</a></span> + {% endif %} + </span> +</div> http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/malhar-contributing.html ---------------------------------------------------------------------- diff --git a/content/malhar-contributing.html b/content/malhar-contributing.html new file mode 100644 index 0000000..5813f8c --- /dev/null +++ b/content/malhar-contributing.html @@ -0,0 +1,138 @@ +<html lang="en"><head> + + <meta charset="utf-8"> + <meta http-equiv="X-UA-Compatible" content="IE=edge"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> + <meta name="description" content="Apex is an enterprise grade native YARN big data-in-motion platform that unifies stream processing as well as batch processing."> + <meta name="author" content="Apache Software Foundation"> + <link rel="icon" href="favicon.ico"> + + <title>Apache Apex</title> + + <!-- Main Stylesheet --> + <link href="css/main.css" rel="stylesheet"> + + </head> + + <body> + <nav class="navbar navbar-default navbar-static-top" id="main-nav"> + <div class="container"> + + <div class="navbar-header"> + <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a class="navbar-brand" href="/"> + <img src="images/apex-logo.svg" class="logo" alt="Apache Apex Logo"> + Apache Apex<span class="trademark">™</span> + </a> + </div> + + <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> + <ul class="nav navbar-right navbar-nav"> + <li class="nav-item"> + <a class="nav-link " href="/">Home</a> + </li> + <li class="nav-item"> + <a class="nav-link " href="/announcements.html">Announcements</a> + </li> + <li class="nav-item"> + <a class="nav-link " href="/community.html">Community</a> + </li> + <li class="nav-item"> + <a class="nav-link " href="/docs.html">Docs</a> + </li> + <li class="nav-item"> + <a href="#" data-toggle="dropdown" class="dropdown-toggle nav-link">Source<b class="caret"></b></a> + <ul class="dropdown-menu"> + <li><a href="https://git-wip-us.apache.org/repos/asf?p=apex-core.git">Apex Core (ASF)</a></li> + <li><a href="https://github.com/apache/apex-core">Apex Core (Github Mirror)</a></li> + <li><a href="https://git-wip-us.apache.org/repos/asf?p=apex-malhar.git">Apex Malhar (ASF)</a></li> + <li><a href="https://github.com/apache/apex-malhar">Apex Malhar (Github Mirror)</a></li> + </ul> + </li> + <li class="nav-item"> + <a href="#" data-toggle="dropdown" class="dropdown-toggle nav-link">Apache<b class="caret"></b></a> + <ul class="dropdown-menu"> + <li><a href="http://www.apache.org/foundation/how-it-works.html">Apache Foundation</a></li> + <li><a href="http://www.apache.org/licenses/">Apache License</a></li> + <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li> + <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li> + </ul> + </li> + <li class="nav-item"> + <a class="nav-link btn btn-success" href="/downloads.html">Download</a> + </li> + </ul> + + </div> + </nav> + +<div class="container"> + + <h1 id="malhar-contribution-guidelines">Malhar Contribution Guidelines</h1> +<p>Malhar library predominantly contains different kinds of operators like connectors to messaging systems, databases, key-value and document stores, block i/o operators like various file system operators, analytic and algorithmic operators and other miscellaneous operators. It also provides other components to build applications such as partitioners, stats listeners, stream codecs and state management. This document outlines the general steps for making contributions to Malhar. Even though the processes described in the rest of the document refer to Operators in particular, they also generally apply to other application components mentioned above.</p> +<h2 id="operators">Operators</h2> +<ul> +<li><p>Follow the unix philosophy, design your operator to do one thing and do it well. If the operator is doing multiple things, you may not be taking advantage of platform parallelism aspects like pipelining to the fullest extent (akin to unix pipes). </p> +</li> +<li><p>Search Malhar project to see if there is an operator with similar functionality before embarking on writing a new one</p> +<ul> +<li>If an operator that is supposed to solve the same problem is already present but isnât complete or does not have the required functionality, consider making improvements to it. There should be a strong reason to not do this and create a new one. </li> +<li>If there is an overlap in functionality with an existing operator or an operator that is designed to be extended, do not rewrite that functionality. Create the new operator in such a way that it reuses code already present and implements the new functionality on top of it. This might require refactoring of existing operator(s).</li> +</ul> +</li> +<li>If the functionality requires connecting two or more operators together, do that in the application. If this pattern would be useful to others and can be reused in other applications consider making it a module.</li> +</ul> +<h2 id="preparation-for-making-changes">Preparation for making changes</h2> +<p>If after performing the above analysis, there is a need to write a new operator or update an existing one, follow these steps</p> +<ul> +<li>Make the case on <a href="/community.html#mailing-lists">dev mailing list</a> as to why the changes are needed and propose any design that you are thinking of.</li> +<li>If writing a new operator, mention any existing operators you considered, that have related or partially matching functionality to the desired functionality and why they cannot be improved to meet the requirements.</li> +<li>Also mention which folder under Malhar the operator would go into. If scalability and recovery features such as partitioning, idempotency etc., are not going to be implemented then the operator should go into <strong>contrib</strong>. For more explanation, see implementing an operator section below.</li> +<li>Respond to comments and suggestions and make appropriate modifications to design that are consistent with the evolving consensus.</li> +<li>Summarize the final list of changes in an email.</li> +<li>Create a <a href="https://issues.apache.org/jira/browse/APEXMALHAR">JIRA</a> to track the work.</li> +<li>Work on the changes.</li> +</ul> +<h2 id="implementing-an-operator">Implementing an operator</h2> +<ul> +<li>Look at the <a href="/docs/apex/operator_development">Operator Development Guide</a> and the <a href="/docs/malhar/development_best_practices">Best Practices Guide</a> on how to implement an operator and what the dos and don'ts are.</li> +<li>Refer to existing operator implementations when in doubt or unsure about how to implement some functionality. You can also email the <a href="/community.html#mailing-lists">dev mailing list</a> with any questions.</li> +<li>Write unit tests for operators<ul> +<li>Refer to unit tests for existing operators.</li> +<li>If possible write a sample application testing the operator.</li> +<li>Try to keep the tests self contained i.e., avoid the user having to perform additional setup or setup other services before running the tests. This can be done by starting mock versions of the required services from within the test and shutting them down when the test finishes.</li> +</ul> +</li> +<li>Follow the code style guidelines of the Apache Apex project described <a href="http://apex.apache.org/contributing.html#code-style">here</a>.</li> +<li>If only the core operator business logic for the operator is implemented and advanced platform functionality for scalability and recovery, such as partitioning, idempotence etc., described in the above reference guides are not implemented, make the code submission pull request to the <strong>contrib</strong> folder in Malhar, otherwise make it to an appropriate top-level module or a new one if one is not present.</li> +</ul> + +</div> + + <hr> + <div class="container"> + <footer id="main-footer"> + <p> + Copyright © <span id="copyright-year">2015</span> <a href="http://apache.org">The Apache Software Foundation</a>, + Licensed under the Apache License, Version 2.0<br> + Apache and the Apache feather logo are trademarks of The Apache Software Foundation.<br> + <a class="footer-link-img" href="http://apache.org"><img src="/images/asf_logo.svg" alt="The Apache Software Foundation"></a> + </p> + </footer> + </div> <!-- /container --> + + <!-- Placed at the end of the document so the pages load faster --> + <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script> + <script src="/js/bootstrap.min.js"></script> + <script> + $('#copyright-year').text((new Date()).getFullYear()); + </script> + +</body> +</html> http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/release.html ---------------------------------------------------------------------- diff --git a/content/release.html b/content/release.html index 8dffac8..5c89657 100644 --- a/content/release.html +++ b/content/release.html @@ -159,8 +159,10 @@ gpg --yes --armor --output ${RNAME}-source-release.zip.asc --detach-sig ${RNAME} svn import svn-dist https://dist.apache.org/repos/dist/dev/apex/${RNAME}-RC1 -m "Apache Apex v${rv}-RC1" </code></pre> <h2 id="build-and-deploy-documentation">Build and Deploy Documentation</h2> -<p>The documentation will be generated as static HTML files and copied into the <code>apex-site</code> repository. It will be available at an alternate URL path until the release is promoted, at which time it will also be promoted to the main website location.</p> -<p>Do the following setup steps before building and deploying the documentation. </p> +<p>The documentation will be generated as static HTML files into the <code>apex-site</code> repository, <a href="https://github.com/apache/apex-site/tree/asf-site/docs">separated by version (X.Y)</a>.</p> +<p><strong>Note</strong> You need Python 2.7+ and <a href="http://www.mkdocs.org/">mkdocs</a> with patch for issue <a href="https://github.com/mkdocs/mkdocs/issues/859">mkdocs #859</a> on top of the currently available version 0.15.3. After installing mkdocs with pip, run the following to obtain this build:</p> +<pre><code>sudo pip install --upgrade git+https://github.com/sashadt/mkdocs.git +</code></pre><p>Do the following setup steps before building and deploying the documentation. </p> <ol> <li><p>Clone the apex-site repository into a folder called <code>apex-site</code> at the same level as the current repository. </p> </li> @@ -174,8 +176,8 @@ svn import svn-dist https://dist.apache.org/repos/dist/dev/apex/${RNAME}-RC1 -m </code></pre><p> The <code>REPO_NAME</code> variable above should match the folder name of the cloned apex module being built.</p> </li> </ol> -<p>To build and deploy the documentation, execute the following commands in the apex module folder specified in <code>REPO_NAME</code> above. <strong>Note</strong>: Until <a href="https://github.com/mkdocs/mkdocs/issues/859">mkdocs #859</a> is resolved and available for download, use mkdocs built against <a href="https://github.com/mkdocs/mkdocs">master</a>.</p> -<pre><code class="lang-bash"># build docs, they would be generated in a site sub-folder +<p>Build and deploy the documentation in the release directory:</p> +<pre><code class="lang-bash"># build docs in ${REPO_NAME}, they will be generated in a site sub-folder mkdocs build --clean # Calculate the major.minor version @@ -190,9 +192,8 @@ git add -A git commit -m "Adding ${DOC_NAME}-${rv} documentation" git push </code></pre> -<p>The documentation is now accessible at the URL location below. For patch releases, however, the documentation changes will already reflect on the website.</p> -<pre><code>https://apex.apache.org/docs/${DOC_NAME}-${docv}/ -</code></pre><h2 id="voting">Voting</h2> +<p>After <a href="https://github.com/apache/apex-site/tree/master#contributing">publishing the site</a> the new documentation will be available at <code>https://apex.apache.org/docs/${DOC_NAME}-${docv}/</code></p> +<h2 id="voting">Voting</h2> <p>Vote call sample: <a href="http://mail-archives.apache.org/mod_mbox/apex-dev/201605.mbox/%3CCAKJfLDPr3CBCfstQJWjchG-ZEYw5P%2Bwv5jN0tfy3EL%2BU%3DBUQgQ%40mail.gmail.com%3E">http://mail-archives.apache.org/mod_mbox/apex-dev/201605.mbox/%3CCAKJfLDPr3CBCfstQJWjchG-ZEYw5P%2Bwv5jN0tfy3EL%2BU%3DBUQgQ%40mail.gmail.com%3E</a></p> <p>Vote result: http://git-wip-us.apache.org/repos/asf/apex-site/blob/1a9dad19/content/roadmap.html ---------------------------------------------------------------------- diff --git a/content/roadmap.html b/content/roadmap.html index 1ee8ecd..8371f65 100644 --- a/content/roadmap.html +++ b/content/roadmap.html @@ -343,50 +343,6 @@ So keep things sane - the proposed compromise is that we start creating smaller </tr> <tr> <td> - <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR-1897">APEXMALHAR-1897</a> - </td> - <td title="ManagedState is described in the document below: - -https://docs.google.com/document/d/1gRWN9ufKSZSZD0N-pthlhpC9TZ8KwJ6hJlAX6nxl5f8/edit#heading=h.z87ti1fwyt0t"> - Large operator state management - </td> - <td> - - - <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR/fixforversion/12334637">3.4.0</a> - - - </td> - </tr> - <tr> - <td> - <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR-1938">APEXMALHAR-1938</a> - </td> - <td title="Currently Apex engine provides operator checkpointing in Hdfs ( with Hdfs backed StorageAgents i.e. FSStorageAgent & AsyncFSStorageAgent ) -As operator check-pointing is critical functionality of Apex streaming platform to ensure fault tolerant behavior, platform should also provide alternate StorageAgents which will work seamlessly with large applications that requires Exactly once semantics. -HDFS read/write latency is limited and doesn't improve beyond certain point because of disk io & staging writes. Having alternate strategy to this check-pointing in fault tolerant distributed in-memory grid would ensure application stability and performance is not impacted by checkpointing - -*This feature will add below functionalities* -* A KeyValue store interface which is used by In-memory checkpointing storage agent. -* Abstract implementation of KeyValue storage agent which can be configured with concrete implementation of KeyValue store for checkpointing. -* Concrete implementation of In memory storage agent for Apache Geode - -*This feature depends on below APEX core feature* -https://issues.apache.org/jira/browse/APEXCORE-283 -* Interface for storage agent to provide application id -* Stram client changes to pass applicationId"> - Operator checkpointing in distributed in-memory store - </td> - <td> - - - <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR/fixforversion/12334637">3.4.0</a> - - - </td> - </tr> - <tr> - <td> <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR-1939">APEXMALHAR-1939</a> </td> <td title=""> @@ -399,23 +355,6 @@ https://issues.apache.org/jira/browse/APEXCORE-283 </tr> <tr> <td> - <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR-1942">APEXMALHAR-1942</a> - </td> - <td title="We would like to contribute the Apache Geode(http://geode.incubator.apache.org/) Operator support for Apex. -It will basically be implementation for writing to geode region. -This is in continuation with the Operator checkpointing alternative under review (MLHR-1938)"> - Apex Operator for Apache Geode. - </td> - <td> - - - <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR/fixforversion/12334637">3.4.0</a> - - - </td> - </tr> - <tr> - <td> <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR-1999">APEXMALHAR-1999</a> </td> <td title="Flink streaming is compatible with Apache Storm interfaces and therefore allows reusing code that was implemented for Storm. @@ -453,6 +392,9 @@ This jira item can contain tasks for providing similar support in Apex"> <td> + <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR/fixforversion/12335815">3.5.0</a> + + </td> </tr> <tr> @@ -468,6 +410,21 @@ This jira item can contain tasks for providing similar support in Apex"> </td> </tr> + <tr> + <td> + <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR-2142">APEXMALHAR-2142</a> + </td> + <td title=""> + High-level API window support + </td> + <td> + + + <a target="_blank" href="https://issues.apache.org/jira/browse/APEXMALHAR/fixforversion/12335815">3.5.0</a> + + + </td> + </tr> </tbody> </table>
