http://git-wip-us.apache.org/repos/asf/apex-site/blob/afbb4705/content/docs/malhar-3.7/operators/AbstractJdbcTransactionableOutputOperator/index.html
----------------------------------------------------------------------
diff --git 
a/content/docs/malhar-3.7/operators/AbstractJdbcTransactionableOutputOperator/index.html
 
b/content/docs/malhar-3.7/operators/AbstractJdbcTransactionableOutputOperator/index.html
new file mode 100644
index 0000000..d81fb21
--- /dev/null
+++ 
b/content/docs/malhar-3.7/operators/AbstractJdbcTransactionableOutputOperator/index.html
@@ -0,0 +1,601 @@
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  
+  
+  <title>Jdbc Output Operator - Apache Apex Malhar Documentation</title>
+  
+
+  <link rel="shortcut icon" href="../../favicon.ico">
+  
+
+  
+  <link 
href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700'
 rel='stylesheet' type='text/css'>
+
+  <link rel="stylesheet" href="../../css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/highlight.css">
+
+  
+  <script>
+    // Current page data
+    var mkdocs_page_name = "Jdbc Output Operator";
+    var mkdocs_page_input_path = 
"operators/AbstractJdbcTransactionableOutputOperator.md";
+    var mkdocs_page_url = 
"/operators/AbstractJdbcTransactionableOutputOperator/";
+  </script>
+  
+  <script src="../../js/jquery-2.1.1.min.js"></script>
+  <script src="../../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../../js/highlight.pack.js"></script>
+  <script src="../../js/theme.js"></script> 
+
+  
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
+      <div class="wy-side-nav-search">
+        <a href="../.." class="icon icon-home"> Apache Apex Malhar 
Documentation</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../../search.html" 
method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" 
aria-label="main navigation">
+        <ul class="current">
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="../..">Apache Apex Malhar</a>
+        
+    </li>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>APIs</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../../apis/calcite/">SQL</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Operators</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../block_reader/">Block Reader</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvformatter/">CSV Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvParserOperator/">CSV Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../deduper/">Deduper</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../enricher/">Enricher</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../fsInputOperator/">File Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_output/">File Output</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_splitter/">File Splitter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../filter/">Filter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 current">
+        <a class="current" href="./">Jdbc Output Operator</a>
+        
+            <ul>
+            
+                <li class="toctree-l3"><a 
href="#jdbc-transactional-pojo-output-operator">JDBC Transactional POJO Output 
Operator</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#operator-objective">Operator Objective</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#overview">Overview</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#operator-information">Operator Information</a></li>
+                
+                    <li><a class="toctree-l4" href="#how-to-use">How to 
Use?</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#abstract-methods">Abstract Methods</a></li>
+                
+            
+                <li class="toctree-l3"><a 
href="#abstractjdbcpojooutputoperator">AbstractJdbcPOJOOutputOperator</a></li>
+                
+            
+                <li class="toctree-l3"><a 
href="#platform-attributes-that-influence-operator-behavior">Platform 
Attributes that influence operator behavior</a></li>
+                
+            
+                <li class="toctree-l3"><a href="#features">Features</a></li>
+                
+            
+                <li class="toctree-l3"><a 
href="#partitioning-of-jdbc-output-operator">Partitioning of JDBC Output 
Operator</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#static-partitioning">Static Partitioning</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#dynamic-partitioning">Dynamic Partitioning</a></li>
+                
+            
+                <li class="toctree-l3"><a href="#example">Example</a></li>
+                
+            
+            </ul>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jdbcPollInputOperator/">JDBC Poller Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jmsInputOperator/">JMS Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonFormatter/">JSON Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">JSON Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../kafkaInputOperator/">Kafka Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transformer</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../windowedOperator/">Windowed Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../xmlParserOperator/">XML Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">Json Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonFormatter/">Json Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transform Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvformatter/">Csv Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../s3outputmodule/">S3 Output Module</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+        </ul>
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../..">Apache Apex Malhar Documentation</a>
+      </nav>
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../..">Docs</a> &raquo;</li>
+    
+      
+        
+          <li>Operators &raquo;</li>
+        
+      
+    
+    <li>Jdbc Output Operator</li>
+    <li class="wy-breadcrumbs-aside">
+      
+    </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            <div class="section">
+              
+                <h1 id="jdbc-transactional-pojo-output-operator">JDBC 
Transactional POJO Output Operator</h1>
+<h2 id="operator-objective">Operator Objective</h2>
+<p>This operator receives an input stream of POJOs and inserts them as rows in 
a database table in a fault-tolerant way.</p>
+<h2 id="overview">Overview</h2>
+<p>The main features of this operator 
(<code>AbstractJdbcTransactionableOutputOperator</code>) are persisting data to 
the database table and fault tolerance. This operator creates a transaction at 
the start of each window, executes batches of SQL updates, and closes the 
transaction at the end of the window. Each tuple corresponds to an SQL update 
statement. The operator groups the updates in a batch and submits them with one 
call to the database. Batch processing improves performance considerably. The 
size of a batch is configured by <code>batchSize</code> property. The tuples in 
a window are stored in a check-pointed collection which is cleared in each 
<code>endWindow()</code> call. The operator writes a tuple exactly once in the 
database.</p>
+<p>An (indirect) base class for this operator is 
<code>AbstractPassThruTransactionableStoreOutputOperator</code> which 
implements a pass-through output adapter for a transactional store; it 
guarantees exactly-once semantics. "Pass-through" means it does not wait for 
end window to write to the store. It will begin transaction at 
<code>beginWindow</code> and write to the store as the tuples come and commit 
the transaction at <code>endWindow</code>.</p>
+<p>The overall heirarchy is described in the the following diagram:</p>
+<p><img alt="JdbcPOJOInsertOutputOperator.png" 
src="../images/jdbcoutput/operatorsClassDiagrams.png" /></p>
+<p><code>AbstractTransactionableStoreOutputOperator</code>: A skeleton 
implementation of an output operator that writes to a transactional store; the 
tuple type and store type are generic parameters. Defines an input port whose 
process method invokes the processTuple() abstract method. Exactly-once 
semantics are not guaranteed and must be provided by subclasses if needed.</p>
+<p><code>AbstractPassThruTransactionableStoreOutputOperator</code>: Simple 
extension of the above base class which adds exactly-once semantics by starting 
a transaction in <code>beginWindow()</code> and committing it in 
<code>endWindow()</code>.</p>
+<p><code>AbstractJdbcTransactionableOutputOperator</code>: (focus of this 
document) Adds support for JDBC by using an instance of JdbcTransactionalStore 
as the store. Also adds support for processing tuples in batches and provides 
an implementation of the <code>proessTuple()</code> abstract method mentioned 
above.</p>
+<p><code>AbstractJdbcPOJOOutputOperator</code>: Serves as base class for 
inserting rows in a table using a JDBC store.</p>
+<p><strong>Note</strong>: For enforcing exactly once semantics a table named 
<code>dt_meta</code> must exist in the database. The sample SQL to create the 
same is as follows</p>
+<pre><code>&quot;CREATE TABLE IF NOT EXISTS dt_meta (dt_app_id VARCHAR(100) 
NOT NULL, dt_operator_id INT NOT NULL, dt_window BIGINT NOT NULL, 
UNIQUE(dt_app_id,dt_operator_id,dt_window))&quot;.
+</code></pre>
+
+<p><strong>Note</strong>: Additionally this operator assumes that the 
underlying database/table in which records are to be added supports 
transactions. If the database/table does not support transactions then a tuple 
may be inserted in a table more than once in case of auto recovery from a 
failure (violation of exactly once semantics).</p>
+<h2 id="operator-information">Operator Information</h2>
+<ol>
+<li>Operator location: <strong><em>malhar-library</em></strong></li>
+<li>Available since: <strong><em>0.9.4</em></strong></li>
+<li>Java Packages:<ul>
+<li>Operator: <strong><em><a 
href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/lib/db/jdbc/AbstractJdbcTransactionableOutputOperator.html";>com.datatorrent.lib.db.jdbc.AbstractJdbcTransactionableOutputOperator</a></em></strong></li>
+</ul>
+</li>
+</ol>
+<h2 id="how-to-use">How to Use?</h2>
+<p>Concrete subclasses need to implement a couple of abstract methods (if not 
using AbstractJdbcPOJOOutputOperator): 
<code>setStatementParameters(PreparedStatement statement, T tuple)</code> to 
set the parameter of the insert/update statement (which is a PreparedStatement) 
with values from the tuple and <code>getUpdateCommand()</code> to return the 
SQL statement to update a tuple in the database.  Note that subclasses of 
AbstractJdbcPOJOOutputOperator need not define these methods since they are 
already defined in that class.</p>
+<p>Several properties are available to configure the behavior of this operator 
and they are summarized in the table below.</p>
+<h3 id="properties-of-abstractjdbctransactionableoutputoperator"><a 
name="AbstractJdbcTransactionableOutputOperatorProps"></a>Properties of 
AbstractJdbcTransactionableOutputOperator</h3>
+<table>
+<thead>
+<tr>
+<th><strong>Property</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+<th><strong>Default Value</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>batchSize</em></td>
+<td>Maximum number of tuples to insert in a single call (see explanation 
above).</td>
+<td>int</td>
+<td>No</td>
+<td>1000</td>
+</tr>
+</tbody>
+</table>
+<h4 id="properties-of-jdbc-store"><a 
name="JdbcTransactionalStore"></a>Properties of JDBC Store</h4>
+<table>
+<thead>
+<tr>
+<th><strong>Property</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+<th><strong>Default Value</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>databaseDriver</em></td>
+<td>JDBC Driver class for connection to JDBC Store. This driver should be 
present in the class path</td>
+<td>String</td>
+<td>Yes</td>
+<td>N/A</td>
+</tr>
+<tr>
+<td><em>databaseUrl</em></td>
+<td><a 
href="http://www.roseindia.net/tutorial/java/jdbc/databaseurl.html";>"Database 
URL"</a> of the form jdbc:subprotocol:subname</td>
+<td>String</td>
+<td>Yes</td>
+<td>N/A</td>
+</tr>
+<tr>
+<td><em>userName</em></td>
+<td>Name of the user configured in the database</td>
+<td>String</td>
+<td>Yes</td>
+<td>N/A</td>
+</tr>
+<tr>
+<td><em>password</em></td>
+<td>Password of the user configured in the database</td>
+<td>String</td>
+<td>Yes</td>
+<td>N/A</td>
+</tr>
+</tbody>
+</table>
+<p>Those attributes can be set like this:</p>
+<pre><code class="xml">&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.prop.batchSize&lt;/name&gt;
+  &lt;value&gt;500&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.prop.store.databaseDriver&lt;/name&gt;
+  &lt;value&gt;com.mysql.jdbc.Driver&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.prop.store.databaseUrl&lt;/name&gt;
+  &lt;value&gt;jdbc:mysql://localhost:3306/mydb&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.prop.store.userName&lt;/name&gt;
+  &lt;value&gt;myuser&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.prop.store.password&lt;/name&gt;
+  &lt;value&gt;mypassword&lt;/value&gt;
+&lt;/property&gt;
+</code></pre>
+
+<h3 id="abstract-methods">Abstract Methods</h3>
+<p>These methods are defined as abstract in 
AbstractJdbcTransactionableOutputOperator <code>void 
setStatementParameters(PreparedStatement statement, T tuple)</code>:Sets the 
parameter of the insert/update statement with values from the tuple.
+<code>String getUpdateCommand()</code>:Gets the statement which insert/update 
the table in the database.</p>
+<h2 id="abstractjdbcpojooutputoperator">AbstractJdbcPOJOOutputOperator</h2>
+<p>This is the abstract implementation extending the functionality of 
AbstractJdbcTransactionableOutputOperator that serves as base class for 
inserting rows in a table using a JDBC store. It has the definition for the 
abstract methods in AbstractJdbcTransactionableOutputOperator. It can be 
further extended to modify functionality or add new capabilities. This class 
has an input port to recieve the records in the form of tuples, so concrete 
subclasses won't need to provide the same, and processes/inserts each input 
tuple as a database table record. You need to set the input port attribute 
TUPLE_CLASS to define your <a 
href="https://en.wikipedia.org/wiki/Plain_Old_Java_Object";>POJO</a> class name 
to define Object type.</p>
+<h3 id="properties-of-abstractjdbcpojooutputoperator"><a 
name="AbstractJdbcPOJOOutputOperatorProps"></a>Properties of 
AbstractJdbcPOJOOutputOperator</h3>
+<p>Several properties are available to configure the behavior of this operator 
and they are summarized in the table below.</p>
+<table>
+<thead>
+<tr>
+<th><strong>Property</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+<th><strong>Default Value</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>tablename</em></td>
+<td>Name of the table where data is to be inserted</td>
+<td>String</td>
+<td>Yes</td>
+<td>N/A</td>
+</tr>
+<tr>
+<td><em>fieldInfos</em></td>
+<td>JdbcFieldInfo maps a store column to a POJO field name</td>
+<td>List</td>
+<td>Yes</td>
+<td>N/A</td>
+</tr>
+</tbody>
+</table>
+<p>Those attributes can be set like this:</p>
+<pre><code class="xml">&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.prop.tablename&lt;/name&gt;
+  &lt;value&gt;ResultTable&lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.fieldInfosItem[0]&lt;/name&gt;
+  &lt;value&gt;
+  {
+    &quot;sqlType&quot;: 0,
+    &quot;columnName&quot;:&quot;ID&quot;,
+    &quot;pojoFieldExpression&quot;: &quot;id&quot;,
+    &quot;type&quot;:&quot;INTEGER&quot;
+  }
+  &lt;/value&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.fieldInfosItem[1]&lt;/name&gt;
+  &lt;value&gt;
+  {
+    &quot;sqlType&quot;: 4,
+    &quot;columnName&quot;:&quot;NAME&quot;,
+    &quot;pojoFieldExpression&quot;: &quot;name&quot;,
+    &quot;type&quot;:&quot;STRING&quot;
+  }
+  &lt;/value&gt;
+&lt;/property&gt;
+</code></pre>
+
+<h2 id="platform-attributes-that-influence-operator-behavior">Platform 
Attributes that influence operator behavior</h2>
+<table>
+<thead>
+<tr>
+<th><strong>Attribute</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>TUPLE_CLASS</em></td>
+<td>TUPLE_CLASS attribute on input port which tells operator the class of POJO 
which is being received</td>
+<td>Class</td>
+<td>Yes</td>
+</tr>
+</tbody>
+</table>
+<p>Those attributes can be set like this:</p>
+<pre><code class="xml">&lt;property&gt;
+  
&lt;name&gt;dt.operator.{OperatorName}.port.input.attr.TUPLE_CLASS&lt;/name&gt; 
   
+  &lt;value&gt;com.example.mydtapp.PojoEvent&lt;/value&gt;
+&lt;/property&gt;
+</code></pre>
+
+<p>A concrete implementation is provided in Malhar as <a 
href="https://github.com/apache/apex-malhar/blob/master/library/src/main/java/com/datatorrent/lib/db/jdbc/JdbcPOJOInsertOutputOperator.java";>JdbcPOJOInsertOutputOperator</a>.
  The incoming tuples will be inserted in the table using PreparedStatement of 
the base class,  which is formed in <code>activate()</code> method of this 
operator.</p>
+<h2 id="features">Features</h2>
+<p>The operator is <strong>idempotent</strong>, 
<strong>fault-tolerant</strong> and <strong>statically 
partitionable</strong>.</p>
+<h2 id="partitioning-of-jdbc-output-operator">Partitioning of JDBC Output 
Operator</h2>
+<h4 id="static-partitioning">Static Partitioning</h4>
+<p>Only static partitioning is supported for this operator.</p>
+<p>Static partitioning can be achieved by specifying the partitioner and 
number of partitions in the populateDAG() method</p>
+<pre><code class="java">  JdbcPOJOInsertOutputOperator 
jdbcPOJOInsertOutputOperator = 
dag.addOperator(&quot;jdbcPOJOInsertOutputOperator&quot;, 
JdbcPOJOInsertOutputOperator.class);
+  StatelessPartitioner&lt;JdbcPOJOInsertOutputOperator&gt; partitioner1 = new 
StatelessPartitioner&lt;JdbcPOJOInsertOutputOperator&gt;(2);
+  dag.setAttribute(jdbcPOJOInsertOutputOperator, 
Context.OperatorContext.PARTITIONER, partitioner1);
+</code></pre>
+
+<p>Static partitioning can also be achieved by specifying the partitioner in 
properties file.</p>
+<pre><code class="xml">  &lt;property&gt;
+    &lt;name&gt;dt.operator.{OperatorName}.attr.PARTITIONER&lt;/name&gt;
+    
&lt;value&gt;com.datatorrent.common.partitioner.StatelessPartitioner:2&lt;/value&gt;
+  &lt;/property&gt;
+</code></pre>
+
+<p>where {OperatorName} is the name of the JdbcPOJOInsertOutputOperator 
operator.
+Above lines will partition JdbcPOJOInsertOutputOperator statically 2 times. 
Above value can be changed accordingly to change the number of static 
partitions.</p>
+<h4 id="dynamic-partitioning">Dynamic Partitioning</h4>
+<p>Not supported.</p>
+<h2 id="example">Example</h2>
+<p>An example application using this operator can be found <a 
href="https://github.com/DataTorrent/examples/tree/master/tutorials/fileToJdbc";>here</a>.
 This example shows how to read files from HDFS, parse into POJOs and then 
insert into a table in MySQL.</p>
+              
+            </div>
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer 
navigation">
+      
+        <a href="../jdbcPollInputOperator/" class="btn btn-neutral 
float-right" title="JDBC Poller Input">Next <span class="icon 
icon-circle-arrow-right"></span></a>
+      
+      
+        <a href="../filter/" class="btn btn-neutral" title="Filter"><span 
class="icon icon-circle-arrow-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <!-- Copyright etc -->
+    
+  </div>
+
+  Built with <a href="http://www.mkdocs.org";>MkDocs</a> using a <a 
href="https://github.com/snide/sphinx_rtd_theme";>theme</a> provided by <a 
href="https://readthedocs.org";>Read the Docs</a>.
+</footer>
+         
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+
+<div class="rst-versions" role="note" style="cursor: pointer">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      
+      
+        <span><a href="../filter/" style="color: #fcfcfc;">&laquo; 
Previous</a></span>
+      
+      
+        <span style="margin-left: 15px"><a href="../jdbcPollInputOperator/" 
style="color: #fcfcfc">Next &raquo;</a></span>
+      
+    </span>
+</div>
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/apex-site/blob/afbb4705/content/docs/malhar-3.7/operators/block_reader/index.html
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.7/operators/block_reader/index.html 
b/content/docs/malhar-3.7/operators/block_reader/index.html
new file mode 100644
index 0000000..cbb9e87
--- /dev/null
+++ b/content/docs/malhar-3.7/operators/block_reader/index.html
@@ -0,0 +1,548 @@
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  
+  
+  <title>Block Reader - Apache Apex Malhar Documentation</title>
+  
+
+  <link rel="shortcut icon" href="../../favicon.ico">
+  
+
+  
+  <link 
href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700'
 rel='stylesheet' type='text/css'>
+
+  <link rel="stylesheet" href="../../css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/highlight.css">
+
+  
+  <script>
+    // Current page data
+    var mkdocs_page_name = "Block Reader";
+    var mkdocs_page_input_path = "operators/block_reader.md";
+    var mkdocs_page_url = "/operators/block_reader/";
+  </script>
+  
+  <script src="../../js/jquery-2.1.1.min.js"></script>
+  <script src="../../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../../js/highlight.pack.js"></script>
+  <script src="../../js/theme.js"></script> 
+
+  
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
+      <div class="wy-side-nav-search">
+        <a href="../.." class="icon icon-home"> Apache Apex Malhar 
Documentation</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../../search.html" 
method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" 
aria-label="main navigation">
+        <ul class="current">
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="../..">Apache Apex Malhar</a>
+        
+    </li>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>APIs</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../../apis/calcite/">SQL</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Operators</span></li>
+
+        
+            
+    <li class="toctree-l1 current">
+        <a class="current" href="./">Block Reader</a>
+        
+            <ul>
+            
+                <li class="toctree-l3"><a href="#block-reader">Block 
Reader</a></li>
+                
+                    <li><a class="toctree-l4" href="#why-is-it-needed">Why is 
it needed?</a></li>
+                
+                    <li><a class="toctree-l4" href="#class-diagram">Class 
Diagram</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#abstractblockreader">AbstractBlockReader</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#example-application">Example Application</a></li>
+                
+            
+                <li class="toctree-l3"><a 
href="#abstractfsreadaheadlinereader">AbstractFSReadAheadLineReader</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#readaheadlinereadercontext">ReadAheadLineReaderContext</a></li>
+                
+            
+                <li class="toctree-l3"><a 
href="#abstractfslinereader">AbstractFSLineReader</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#linereadercontext">LineReaderContext</a></li>
+                
+            
+                <li class="toctree-l3"><a 
href="#fsslicereader">FSSliceReader</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#fixedbytesreadercontext">FixedBytesReaderContext</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#configuration_1">Configuration</a></li>
+                
+            
+                <li class="toctree-l3"><a 
href="#partitioner-and-statslistener">Partitioner and StatsListener</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#processstats">processStats </a></li>
+                
+                    <li><a class="toctree-l4" 
href="#definepartitions">definePartitions</a></li>
+                
+            
+            </ul>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvformatter/">CSV Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvParserOperator/">CSV Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../deduper/">Deduper</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../enricher/">Enricher</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../fsInputOperator/">File Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_output/">File Output</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_splitter/">File Splitter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../filter/">Filter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../AbstractJdbcTransactionableOutputOperator/">Jdbc 
Output Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jdbcPollInputOperator/">JDBC Poller Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jmsInputOperator/">JMS Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonFormatter/">JSON Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">JSON Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../kafkaInputOperator/">Kafka Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transformer</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../windowedOperator/">Windowed Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../xmlParserOperator/">XML Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">Json Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonFormatter/">Json Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transform Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvformatter/">Csv Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../s3outputmodule/">S3 Output Module</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+        </ul>
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../..">Apache Apex Malhar Documentation</a>
+      </nav>
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../..">Docs</a> &raquo;</li>
+    
+      
+        
+          <li>Operators &raquo;</li>
+        
+      
+    
+    <li>Block Reader</li>
+    <li class="wy-breadcrumbs-aside">
+      
+    </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            <div class="section">
+              
+                <h1 id="block-reader">Block Reader</h1>
+<p>This is a scalable operator that reads and parses blocks of data sources 
into records. A data source can be a file or a message bus that contains 
records and a block defines a chunk of data in the source by specifying the 
block offset and the length of the source belonging to the block. </p>
+<h2 id="why-is-it-needed">Why is it needed?</h2>
+<p>A Block Reader is needed to parallelize reading and parsing of a single 
data source, for example a file. Simple parallelism of reading data sources can 
be achieved by multiple partitions reading different source of same type (for 
files see <a 
href="https://github.com/apache/incubator-apex-malhar/blob/master/library/src/main/java/com/datatorrent/lib/io/fs/AbstractFileInputOperator.java";>AbstractFileInputOperator</a>)
 but Block Reader partitions can read blocks of same source in parallel and 
parse them for records ensuring that no record is duplicated or missed.</p>
+<h2 id="class-diagram">Class Diagram</h2>
+<p><img alt="BlockReader class diagram" 
src="../images/blockreader/classdiagram.png" /></p>
+<h2 id="abstractblockreader">AbstractBlockReader</h2>
+<p>This is the abstract implementation that serves as the base for different 
types of data sources. It defines how a block metadata is processed. The flow 
diagram below describes the processing of a block metadata.</p>
+<p><img alt="BlockReader flow diagram" 
src="../images/blockreader/flowdiagram.png" /></p>
+<h3 id="ports">Ports</h3>
+<ul>
+<li>
+<p>blocksMetadataInput: input port on which block metadata are received.</p>
+</li>
+<li>
+<p>blocksMetadataOutput: output port on which block metadata are emitted if 
the port is connected. This port is useful when a downstream operator that 
receives records from block reader may also be interested to know the details 
of the corresponding blocks.</p>
+</li>
+<li>
+<p>messages: output port on which tuples of type 
<code>com.datatorrent.lib.io.block.AbstractBlockReader.ReaderRecord</code> are 
emitted. This class encapsulates a <code>record</code> and the 
<code>blockId</code> of the corresponding block.</p>
+</li>
+</ul>
+<h3 id="readercontext">readerContext</h3>
+<p>This is one of the most important fields in the block reader. It is of type 
<code>com.datatorrent.lib.io.block.ReaderContext</code> and is responsible for 
fetching bytes that make a record. It also lets the reader know how many total 
bytes were consumed which may not be equal to the total bytes in a record 
because consumed bytes also include bytes for the record delimiter which may 
not be a part of the actual record.</p>
+<p>Once the reader creates an input stream for the block (or uses the previous 
opened stream if the current block is successor of the previous block) it 
initializes the reader context by invoking 
<code>readerContext.initialize(stream, blockMetadata, 
consecutiveBlock);</code>. Initialize method is where any implementation of 
<code>ReaderContext</code> can perform all the operations which have to be 
executed just before reading the block or create states which are used during 
the lifetime of reading the block.</p>
+<p>Once the initialization is done, <code>readerContext.next()</code> is 
called repeatedly until it returns <code>null</code>. It is left to the 
<code>ReaderContext</code> implementations to decide when a block is completely 
processed. In cases when a record is split across adjacent blocks, reader 
context may decide to read ahead of the current block boundary to completely 
fetch the split record (examples- <code>LineReaderContext</code> and 
<code>ReadAheadLineReaderContext</code>). In other cases when there isn't a 
possibility of split record (example- <code>FixedBytesReaderContext</code>), it 
returns <code>null</code> immediately when the block boundary is reached. The 
return type of <code>readerContext.next()</code> is of type 
<code>com.datatorrent.lib.io.block.ReaderContext.Entity</code> which is just a 
wrapper for a <code>byte[]</code> that represents the record and total bytes 
used in fetching the record.</p>
+<h3 id="abstract-methods">Abstract methods</h3>
+<ul>
+<li>
+<p><code>STREAM setupStream(B block)</code>: creating a stream for a block is 
dependent on the type of source which is not known to AbstractBlockReader. 
Sub-classes which deal with a specific data source provide this 
implementation.</p>
+</li>
+<li>
+<p><code>R convertToRecord(byte[] bytes)</code><a name="convertToRecord"></a>: 
this converts the array of bytes into the actual instance of record type.</p>
+</li>
+</ul>
+<h3 id="auto-scalability">Auto-scalability</h3>
+<p>Block reader can auto-scale, that is, depending on the backlog (total 
number of all the blocks which are waiting in the 
<code>blocksMetadataInput</code> port queue of all partitions) it can create 
more partitions or reduce them. Details are discussed in the last section which 
covers the <a href="#partitioning">partitioner and stats-listener</a>.</p>
+<h3 id="configuration">Configuration</h3>
+<ol>
+<li><a name="maxReaders"></a><strong>maxReaders</strong>: when auto-scaling is 
enabled, this controls the maximum number of block reader partitions that can 
be created.</li>
+<li><a name="minReaders"></a><strong>minReaders</strong>: when auto-scaling is 
enabled, this controls the minimum number of block reader partitions that 
should always exist.</li>
+<li><a name="collectStats"></a><strong>collectStats</strong>: this enables or 
disables auto-scaling. When it is set to <code>true</code> the stats (number of 
blocks in the queue) are collected and this triggers partitioning; otherwise 
auto-scaling is disabled.</li>
+<li><strong>intervalMillis</strong>: when auto-scaling is enabled, this 
specifies the interval at which the reader will trigger the logic of computing 
the backlog and auto-scale.</li>
+</ol>
+<h2 id="abstractfsblockreader"><a name="AbstractFSBlockReader"></a> 
AbstractFSBlockReader</h2>
+<p>This abstract implementation deals with files. Different types of file 
systems that are implementations of 
<code>org.apache.hadoop.fs.FileSystem</code> are supported. The user can 
override <code>getFSInstance()</code> method to create an instance of a 
specific <code>FileSystem</code>. By default, filesystem instance is created 
from the filesytem URI that comes from the default hadoop configuration.</p>
+<pre><code class="java">protected FileSystem getFSInstance() throws IOException
+{
+  return FileSystem.newInstance(configuration);
+}
+</code></pre>
+
+<p>It uses this filesystem instance to setup a stream of type 
<code>org.apache.hadoop.fs.FSDataInputStream</code> to read the block.</p>
+<pre><code class="java">@Override
+protected FSDataInputStream setupStream(BlockMetadata.FileBlockMetadata block) 
throws IOException
+{
+  return fs.open(new Path(block.getFilePath()));
+}
+</code></pre>
+
+<p>All the ports and configurations are derived from the super class. It 
doesn't provide an implementation of <a 
href="#convertToRecord"><code>convertToRecord(byte[] bytes)</code></a> method 
which is delegated to concrete sub-classes.</p>
+<h3 id="example-application">Example Application</h3>
+<p>This simple dag demonstrates how any concrete implementation of 
<code>AbstractFSBlockReader</code> can be plugged into an application. </p>
+<p><img alt="Application with FSBlockReader" 
src="../images/blockreader/fsreaderexample.png" /></p>
+<p>In the above application, file splitter creates block metadata for files 
which are sent to block reader. Partitions of the block reader parses the file 
blocks for records which are filtered, transformed and then persisted to a file 
(created per block). Therefore block reader is parallel partitioned with the 2 
downstream operators - filter/converter and record output operator. The code 
which implements this dag is below.</p>
+<pre><code class="java">public class ExampleApplication implements 
StreamingApplication
+{
+  @Override
+  public void populateDAG(DAG dag, Configuration configuration)
+  {
+    FileSplitterInput input = dag.addOperator(&quot;File-splitter&quot;, new 
FileSplitterInput());
+    //any concrete implementation of AbstractFSBlockReader based on the 
use-case can be added here.
+    LineReader blockReader = dag.addOperator(&quot;Block-reader&quot;, new 
LineReader());
+    Filter filter = dag.addOperator(&quot;Filter&quot;, new Filter());
+    RecordOutputOperator recordOutputOperator = 
dag.addOperator(&quot;Record-writer&quot;, new RecordOutputOperator());
+
+    dag.addStream(&quot;file-block metadata&quot;, input.blocksMetadataOutput, 
blockReader.blocksMetadataInput);
+    dag.addStream(&quot;records&quot;, blockReader.messages, filter.input);
+    dag.addStream(&quot;filtered-records&quot;, filter.output, 
recordOutputOperator.input);
+  }
+
+  /**
+   * Concrete implementation of {@link AbstractFSBlockReader} for which a 
record is a line in the file.
+   */
+  public static class LineReader extends 
AbstractFSBlockReader.AbstractFSReadAheadLineReader&lt;String&gt;
+  {
+
+    @Override
+    protected String convertToRecord(byte[] bytes)
+    {
+      return new String(bytes);
+    }
+  }
+
+  /**
+   * Considers any line starting with a '.' as invalid. Emits the valid 
records.
+   */
+  public static class Filter extends BaseOperator
+  {
+    public final transient 
DefaultOutputPort&lt;AbstractBlockReader.ReaderRecord&lt;String&gt;&gt; output 
= new DefaultOutputPort&lt;&gt;();
+    public final transient 
DefaultInputPort&lt;AbstractBlockReader.ReaderRecord&lt;String&gt;&gt; input = 
new DefaultInputPort&lt;AbstractBlockReader.ReaderRecord&lt;String&gt;&gt;()
+    {
+      @Override
+      public void process(AbstractBlockReader.ReaderRecord&lt;String&gt; 
stringRecord)
+      {
+        //filter records and transform
+        //if the string starts with a '.' ignore the string.
+        if (!StringUtils.startsWith(stringRecord.getRecord(), &quot;.&quot;)) {
+          output.emit(stringRecord);
+        }
+      }
+    };
+  }
+
+  /**
+   * Persists the valid records to corresponding block files.
+   */
+  public static class RecordOutputOperator extends 
AbstractFileOutputOperator&lt;AbstractBlockReader.ReaderRecord&lt;String&gt;&gt;
+  {
+    @Override
+    protected String 
getFileName(AbstractBlockReader.ReaderRecord&lt;String&gt; tuple)
+    {
+      return Long.toHexString(tuple.getBlockId());
+    }
+
+    @Override
+    protected byte[] 
getBytesForTuple(AbstractBlockReader.ReaderRecord&lt;String&gt; tuple)
+    {
+      return tuple.getRecord().getBytes();
+    }
+  }
+}
+</code></pre>
+
+<p>Configuration to parallel partition block reader with its downstream 
operators.</p>
+<pre><code class="xml">  &lt;property&gt;
+    
&lt;name&gt;dt.operator.Filter.port.input.attr.PARTITION_PARALLEL&lt;/name&gt;
+    &lt;value&gt;true&lt;/value&gt;
+  &lt;/property&gt;
+  &lt;property&gt;
+    
&lt;name&gt;dt.operator.Record-writer.port.input.attr.PARTITION_PARALLEL&lt;/name&gt;
+    &lt;value&gt;true&lt;/value&gt;
+  &lt;/property&gt;
+</code></pre>
+
+<h2 id="abstractfsreadaheadlinereader">AbstractFSReadAheadLineReader</h2>
+<p>This extension of <a 
href="#AbstractFSBlockReader"><code>AbstractFSBlockReader</code></a> parses 
lines from a block and binds the <code>readerContext</code> field to an 
instance of <code>ReaderContext.ReadAheadLineReaderContext</code>.</p>
+<p>It is abstract because it doesn't provide an implementation of <a 
href="#convertToRecord"><code>convertToRecord(byte[] bytes)</code></a> since 
the user may want to convert the bytes that make a line into some other type. 
</p>
+<h3 id="readaheadlinereadercontext">ReadAheadLineReaderContext</h3>
+<p>In order to handle a line split across adjacent blocks, 
ReadAheadLineReaderContext always reads beyond the block boundary and ignores 
the bytes till the first end-of-line character of all the blocks except the 
first block of the file. This ensures that no line is missed or incomplete.</p>
+<p>This is one of the most common ways of handling a split record. It doesn't 
require any further information to decide if a line is complete. However, the 
cost of this consistent way to handle a line split is that it always reads from 
the next block.</p>
+<h2 id="abstractfslinereader">AbstractFSLineReader</h2>
+<p>Similar to <code>AbstractFSReadAheadLineReader</code>, even this parses 
lines from a block. However, it binds the <code>readerContext</code> field to 
an instance of <code>ReaderContext.LineReaderContext</code>.</p>
+<h3 id="linereadercontext">LineReaderContext</h3>
+<p>This handles the line split differently from 
<code>ReadAheadLineReaderContext</code>. It doesn't always read from the next 
block. If the end of the last line is aligned with the block boundary then it 
stops processing the block. It does read from the next block when the 
boundaries are not aligned, that is, last line extends beyond the block 
boundary. The result of this is an inconsistency in reading the next block.</p>
+<p>When the boundary of the last line of the previous block was aligned with 
its block, then the first line of the current block is a valid line. However, 
in the other case the bytes from the block start offset to the first 
end-of-line character should be ignored. Therefore, this means that any record 
formed by this reader context has to be validated. For example, if the lines 
are of fixed size then size of each record can be validated or if each line 
begins with a special field then that knowledge can be used to check if a 
record is complete.</p>
+<p>If the validations of completeness fails for a line then <a 
href="#convertToRecord"><code>convertToRecord(byte[] bytes)</code></a> should 
return null.</p>
+<h2 id="fsslicereader">FSSliceReader</h2>
+<p>A concrete extension of <a 
href="#AbstractFSBlockReader"><code>AbstractFSBlockReader</code></a> that reads 
fixed-size <code>byte[]</code> from a block and emits the byte array wrapped in 
<code>com.datatorrent.netlet.util.Slice</code>.</p>
+<p>This operator binds the <code>readerContext</code> to an instance of 
<code>ReaderContext.FixedBytesReaderContext</code>.</p>
+<h3 id="fixedbytesreadercontext">FixedBytesReaderContext</h3>
+<p>This implementation of <code>ReaderContext</code> never reads beyond a 
block boundary which can result in the last <code>byte[]</code> of a block to 
be of a shorter length than the rest of the records.</p>
+<h3 id="configuration_1">Configuration</h3>
+<p><strong>readerContext.length</strong>: length of each record. By default, 
this is initialized to the default hdfs block size.</p>
+<h2 id="partitioner-and-statslistener">Partitioner and StatsListener</h2>
+<p>The logical instance of the block reader acts as the Partitioner (unless a 
custom partitioner is set using the operator attribute - 
<code>PARTITIONER</code>) as well as a StatsListener. This is because the 
+<code>AbstractBlockReader</code> implements both the 
<code>com.datatorrent.api.Partitioner</code> and 
<code>com.datatorrent.api.StatsListener</code> interfaces and provides an 
implementation of <code>definePartitions(...)</code> and 
<code>processStats(...)</code> which make it auto-scalable.</p>
+<h3 id="processstats">processStats <a name="processStats"></a></h3>
+<p>The application master invokes <code>Response 
processStats(BatchedOperatorStats stats)</code> method on the logical instance 
with the stats (<code>tuplesProcessedPSMA</code>, 
<code>tuplesEmittedPSMA</code>, <code>latencyMA</code>, etc.) of each 
partition. The data which this operator is interested in is the 
<code>queueSize</code> of the input port <code>blocksMetadataInput</code>.</p>
+<p>Usually the <code>queueSize</code> of an input port gives the count of 
waiting control tuples plus data tuples. However, if a stats listener is 
interested only in the count of data tuples then that can be expressed by 
annotating the class with <code>@DataQueueSize</code>. In this case 
<code>AbstractBlockReader</code> itself is the <code>StatsListener</code> which 
is why it is annotated with <code>@DataQueueSize</code>.</p>
+<p>The logical instance caches the queue size per partition and at regular 
intervals (configured by <code>intervalMillis</code>) sums these values to find 
the total backlog which is then used to decide whether re-partitioning is 
needed. The flow-diagram below describes this logic.</p>
+<p><img alt="Processing of total-backlog" 
src="../images/blockreader/totalBacklogProcessing.png" /></p>
+<p>The goal of this logic is to create as many partitions within bounds (see 
<a href="#maxReaders"><code>maxReaders</code></a> and <a 
href="#minReaders"><code>minReaders</code></a> above) to quickly reduce this 
backlog or if the backlog is small then remove any idle partitions.</p>
+<h3 id="definepartitions">definePartitions</h3>
+<p>Based on the <code>repartitionRequired</code> field of the 
<code>Response</code> object which is returned by <em><a 
href="#processStats">processStats</a></em> method, the application master 
invokes </p>
+<pre><code 
class="java">Collection&lt;Partition&lt;AbstractBlockReader&lt;...&gt;&gt;&gt; 
definePartitions(Collection&lt;Partition&lt;AbstractBlockReader&lt;...&gt;&gt;&gt;
 partitions, PartitioningContext context)
+</code></pre>
+
+<p>on the logical instance which is also the partitioner instance. The 
implementation calculates the difference between required partitions and the 
existing count of partitions. If this difference is negative, then equivalent 
number of partitions are removed otherwise new partitions are created. </p>
+<p>Please note auto-scaling can be disabled by setting <a 
href="#collectStats"><code>collectStats</code></a> to <code>false</code>. If 
the use-case requires only static partitioning, then that can be achieved by 
setting <a 
href="https://github.com/chandnisingh/incubator-apex-core/blob/master/common/src/main/java/com/datatorrent/common/partitioner/StatelessPartitioner.java";><code>StatelessPartitioner</code></a>
 as the operator attribute- <code>PARTITIONER</code> on the block reader.</p>
+              
+            </div>
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer 
navigation">
+      
+        <a href="../csvformatter/" class="btn btn-neutral float-right" 
title="CSV Formatter">Next <span class="icon 
icon-circle-arrow-right"></span></a>
+      
+      
+        <a href="../../apis/calcite/" class="btn btn-neutral" 
title="SQL"><span class="icon icon-circle-arrow-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <!-- Copyright etc -->
+    
+  </div>
+
+  Built with <a href="http://www.mkdocs.org";>MkDocs</a> using a <a 
href="https://github.com/snide/sphinx_rtd_theme";>theme</a> provided by <a 
href="https://readthedocs.org";>Read the Docs</a>.
+</footer>
+         
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+
+<div class="rst-versions" role="note" style="cursor: pointer">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      
+      
+        <span><a href="../../apis/calcite/" style="color: #fcfcfc;">&laquo; 
Previous</a></span>
+      
+      
+        <span style="margin-left: 15px"><a href="../csvformatter/" 
style="color: #fcfcfc">Next &raquo;</a></span>
+      
+    </span>
+</div>
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/apex-site/blob/afbb4705/content/docs/malhar-3.7/operators/csvParserOperator/index.html
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.7/operators/csvParserOperator/index.html 
b/content/docs/malhar-3.7/operators/csvParserOperator/index.html
new file mode 100644
index 0000000..7910acc
--- /dev/null
+++ b/content/docs/malhar-3.7/operators/csvParserOperator/index.html
@@ -0,0 +1,633 @@
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  
+  
+  <title>CSV Parser - Apache Apex Malhar Documentation</title>
+  
+
+  <link rel="shortcut icon" href="../../favicon.ico">
+  
+
+  
+  <link 
href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700'
 rel='stylesheet' type='text/css'>
+
+  <link rel="stylesheet" href="../../css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/highlight.css">
+
+  
+  <script>
+    // Current page data
+    var mkdocs_page_name = "CSV Parser";
+    var mkdocs_page_input_path = "operators/csvParserOperator.md";
+    var mkdocs_page_url = "/operators/csvParserOperator/";
+  </script>
+  
+  <script src="../../js/jquery-2.1.1.min.js"></script>
+  <script src="../../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../../js/highlight.pack.js"></script>
+  <script src="../../js/theme.js"></script> 
+
+  
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
+      <div class="wy-side-nav-search">
+        <a href="../.." class="icon icon-home"> Apache Apex Malhar 
Documentation</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../../search.html" 
method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" 
aria-label="main navigation">
+        <ul class="current">
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="../..">Apache Apex Malhar</a>
+        
+    </li>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>APIs</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../../apis/calcite/">SQL</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Operators</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../block_reader/">Block Reader</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvformatter/">CSV Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 current">
+        <a class="current" href="./">CSV Parser</a>
+        
+            <ul>
+            
+                <li class="toctree-l3"><a href="#csv-parser-operator">Csv 
Parser Operator</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#operator-objective">Operator Objective</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#overview">Overview</a></li>
+                
+                    <li><a class="toctree-l4" href="#class-diagram">Class 
Diagram</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#operator-information">Operator Information</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#platform-attributes-that-influences-operator-behavior">Platform 
Attributes that influences operator behavior</a></li>
+                
+                    <li><a class="toctree-l4" href="#ports">Ports</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#partitioning">Partitioning</a></li>
+                
+                    <li><a class="toctree-l4" href="#example">Example</a></li>
+                
+            
+            </ul>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../deduper/">Deduper</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../enricher/">Enricher</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../fsInputOperator/">File Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_output/">File Output</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_splitter/">File Splitter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../filter/">Filter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../AbstractJdbcTransactionableOutputOperator/">Jdbc 
Output Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jdbcPollInputOperator/">JDBC Poller Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jmsInputOperator/">JMS Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonFormatter/">JSON Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">JSON Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../kafkaInputOperator/">Kafka Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transformer</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../windowedOperator/">Windowed Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../xmlParserOperator/">XML Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">Json Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonFormatter/">Json Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transform Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvformatter/">Csv Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../s3outputmodule/">S3 Output Module</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+        </ul>
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../..">Apache Apex Malhar Documentation</a>
+      </nav>
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../..">Docs</a> &raquo;</li>
+    
+      
+        
+          <li>Operators &raquo;</li>
+        
+      
+    
+    <li>CSV Parser</li>
+    <li class="wy-breadcrumbs-aside">
+      
+    </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            <div class="section">
+              
+                <h1 id="csv-parser-operator">Csv Parser Operator</h1>
+<h2 id="operator-objective">Operator Objective</h2>
+<p>This operator is designed to parse delimited records and construct a map or 
concrete java class also known as <a 
href="https://en.wikipedia.org/wiki/Plain_Old_Java_Object";>"POJO"</a> out of 
it. User need to provide the schema to describe the delimited data. Based on 
schema definition the operator will parse the incoming record to object map and 
POJO.  User can also provide constraints if any, in the schema. The supported 
constraints are listed in <a href="#constraints">constraints table</a>. The 
incoming record will be validated against those constraints. Valid records will 
be emitted as POJO / map while invalid ones are emitted on error port with 
error message.</p>
+<p><strong>Note</strong>: field names of POJO must match field names in schema 
and in the same order as it appears in the incoming data.</p>
+<h2 id="overview">Overview</h2>
+<p>The operator is <strong>idempotent</strong>, 
<strong>fault-tolerant</strong> and <strong>partitionable</strong>.</p>
+<h2 id="class-diagram">Class Diagram</h2>
+<p><img alt="" src="../images/csvParser/CSVParser.png" /></p>
+<h2 id="operator-information">Operator Information</h2>
+<ol>
+<li>Operator location:<strong><em>malhar-contrib</em></strong></li>
+<li>Available since:<strong><em>3.2.0</em></strong></li>
+<li>Operator state:<strong><em>Evolving</em></strong></li>
+<li>Java Package:<a 
href="https://github.com/apache/apex-malhar/blob/master/contrib/src/main/java/com/datatorrent/contrib/parser/CsvParser.java";>com.datatorrent.contrib.parser.CsvParser</a></li>
+</ol>
+<h2 id="properties-of-csv-parser"><a name="props"></a>Properties of Csv 
Parser</h2>
+<p>User need to set the schema which describes delimited data as well as 
specifies constraints on values if any.
+e.g.</p>
+<pre><code class="xml">{
+  &quot;separator&quot;:&quot;,&quot;,
+  &quot;quoteChar&quot;:&quot;\&quot;&quot;,
+  &quot;fields&quot;:[
+    {
+      &quot;name&quot;:&quot;adId&quot;,
+      &quot;type&quot;:&quot;Integer&quot;,
+      &quot;constraints&quot;:{
+         &quot;required&quot;:&quot;true&quot;
+      }
+    },
+    {
+      &quot;name&quot;:&quot;adName&quot;,
+      &quot;type&quot;:&quot;String&quot;,
+      &quot;constraints&quot;:{
+         &quot;required&quot;:&quot;true&quot;,
+         &quot;pattern&quot;:&quot;[a-z].*[a-z]$&quot;,
+         &quot;maxLength&quot;:&quot;10&quot;
+      }
+    },
+    {
+      &quot;name&quot;:&quot;bidPrice&quot;,
+      &quot;type&quot;:&quot;Double&quot;,
+      &quot;constraints&quot;:{
+         &quot;required&quot;:&quot;true&quot;,
+         &quot;minValue&quot;:&quot;0.1&quot;,
+         &quot;maxValue&quot;:&quot;3.2&quot;
+      }
+    },
+    {
+      &quot;name&quot;:&quot;startDate&quot;,
+      &quot;type&quot;:&quot;Date&quot;,
+      &quot;constraints&quot;:{
+         &quot;format&quot;:&quot;yyyy-MM-dd HH:mm:ss&quot;
+      }
+    }
+  ]
+}
+</code></pre>
+
+<table>
+<thead>
+<tr>
+<th><strong>Property</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+<th><strong>Default Value</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>schema</em></td>
+<td><a 
href="https://github.com/apache/apex-malhar/blob/master/contrib/src/main/java/com/datatorrent/contrib/parser/DelimitedSchema.java";>Schema</a>
  describing delimited data</td>
+<td>String</td>
+<td>YES</td>
+<td>N/A</td>
+</tr>
+</tbody>
+</table>
+<h2 id="platform-attributes-that-influences-operator-behavior">Platform 
Attributes that influences operator behavior</h2>
+<table>
+<thead>
+<tr>
+<th><strong>Attribute</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>out.TUPLE_CLASS</em></td>
+<td>TUPLE_CLASS attribute on output port which tells operator the class of 
POJO which need to be emitted</td>
+<td>Class</td>
+<td>Yes</td>
+</tr>
+</tbody>
+</table>
+<h2 id="supported-datatypes-in-schema"><a name="dataTypes"></a>Supported 
DataTypes in Schema</h2>
+<ul>
+<li>Integer</li>
+<li>Long</li>
+<li>Double</li>
+<li>Character</li>
+<li>String</li>
+<li>Boolean</li>
+<li>Date</li>
+<li>Float</li>
+</ul>
+<h2 id="schema-constraints"><a name="constraints"></a>Schema Constraints</h2>
+<table>
+<thead>
+<tr>
+<th><strong>DataType</strong></th>
+<th><strong>Constraints</strong></th>
+<th><strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>All data Types</em></td>
+<td>required</td>
+<td>If mentioned, indicates that the data type constraints that follow are 
required. It cannot be blank/null. It may or may not satisfy other constraints 
( like equals/minVal/maxVal etc )</td>
+</tr>
+<tr>
+<td><em>All data Types</em></td>
+<td>equals</td>
+<td>If mentioned, indicates that the data string or value declared in the data 
type constraints must be an exact match with the specified value. <code>Note: 
This constraints is not applicable for data type boolean and date</code></td>
+</tr>
+<tr>
+<td><em>String</em></td>
+<td>Length</td>
+<td>The string must be of the length that is specified.</td>
+</tr>
+<tr>
+<td><em>String</em></td>
+<td>minLength</td>
+<td>The string is at least the length specified as minLength value.</td>
+</tr>
+<tr>
+<td><em>String</em></td>
+<td>maxLength</td>
+<td>The string can be at the most the length specified as maxLength value.</td>
+</tr>
+<tr>
+<td><em>String</em></td>
+<td>pattern</td>
+<td>The string must match the specified regular expression.</td>
+</tr>
+<tr>
+<td><em>Long</em></td>
+<td>maxValue</td>
+<td>The numeric can be at the most the value specified as maxValue.</td>
+</tr>
+<tr>
+<td><em>Long</em></td>
+<td>minValue</td>
+<td>The numeric is at least the value specified as minValue.</td>
+</tr>
+<tr>
+<td><em>Double</em></td>
+<td>maxValue</td>
+<td>The numeric can be at the most the value specified as maxValue.</td>
+</tr>
+<tr>
+<td><em>Double</em></td>
+<td>minValue</td>
+<td>The numeric is at least the value specified as minValue.</td>
+</tr>
+<tr>
+<td><em>Float</em></td>
+<td>maxValue</td>
+<td>The numeric can be at the most the value specified as maxValue.</td>
+</tr>
+<tr>
+<td><em>Float</em></td>
+<td>minValue</td>
+<td>The numeric is at least the value specified as minValue.</td>
+</tr>
+<tr>
+<td><em>Integer</em></td>
+<td>maxValue</td>
+<td>The numeric can be at the most the value specified as maxValue.</td>
+</tr>
+<tr>
+<td><em>Integer</em></td>
+<td>minValue</td>
+<td>The numeric is at least the value specified as minValue.</td>
+</tr>
+<tr>
+<td><em>Date</em></td>
+<td>format</td>
+<td>A simple date format as specified in the SimpleDateFormat class: 
http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html?is-external=true</td>
+</tr>
+<tr>
+<td><em>Boolean</em></td>
+<td>trueValue</td>
+<td>String for which boolean value is true. The default values are: true, 1, 
y, and t. <code>Note: If you specify trueValue, you must also specify 
falseValue.</code></td>
+</tr>
+<tr>
+<td><em>Boolean</em></td>
+<td>falseValue</td>
+<td>String for which boolean value is false. The default values are: false, 0, 
n, and f. <code>Note: If you specify falseValue, you must also specify 
trueValue.</code></td>
+</tr>
+</tbody>
+</table>
+<h2 id="ports">Ports</h2>
+<table>
+<thead>
+<tr>
+<th><strong>Port</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>in</em></td>
+<td>Tuples that needs to be parsed are recieved on this port</td>
+<td>byte[]</td>
+<td>Yes</td>
+</tr>
+<tr>
+<td><em>out</em></td>
+<td>Valid Tuples that are emitted as pojo</td>
+<td>Object (POJO)</td>
+<td>No</td>
+</tr>
+<tr>
+<td><em>parsedOutput</em></td>
+<td>Valid Tuples that are emitted as map</td>
+<td>Map</td>
+<td>No</td>
+</tr>
+<tr>
+<td><em>err</em></td>
+<td>Invalid Tuples are emitted with error message</td>
+<td>KeyValPair &lt;String, String></td>
+<td>No</td>
+</tr>
+</tbody>
+</table>
+<h2 id="partitioning">Partitioning</h2>
+<p>CSV Parser is both statically and dynamically partitionable.</p>
+<h3 id="static-partitioning">Static Partitioning</h3>
+<p>This can be achieved in 2 ways as shown below.</p>
+<p>Specifying the partitioner and number of partitions in the populateDAG() 
method</p>
+<pre><code class="java">    CsvParser csvParser = 
dag.addOperator(&quot;csvParser&quot;, CsvParser.class);
+    StatelessPartitioner&lt;CsvParser&gt; partitioner1 = new 
StatelessPartitioner&lt;CsvParser&gt;(2);
+    dag.setAttribute(csvParser, Context.OperatorContext.PARTITIONER, 
partitioner1);
+</code></pre>
+
+<p>Specifying the partitioner in properties file.</p>
+<pre><code class="xml">   &lt;property&gt;
+     &lt;name&gt;dt.operator.{OperatorName}.attr.PARTITIONER&lt;/name&gt;
+     
&lt;value&gt;com.datatorrent.common.partitioner.StatelessPartitioner:2&lt;/value&gt;
+   &lt;/property&gt;
+</code></pre>
+
+<p>where {OperatorName} is the name of the CsvParser operator.
+ Above lines will partition CsvParser statically 2 times. Above value can be 
changed accordingly to change the number of static partitions.</p>
+<h3 id="dynamic-paritioning">Dynamic Paritioning</h3>
+<p>CsvParser can be dynamically partitioned using out-of-the-box 
partitioner:</p>
+<h4 id="throughput-based">Throughput based</h4>
+<p>Following code can be added to populateDAG method of application to 
dynamically partition CsvParser:</p>
+<pre><code class="java">CsvParser csvParser = 
dag.addOperator(&quot;csvParser&quot;, CsvParser.class);
+StatelessThroughputBasedPartitioner&lt;CsvParser&gt; partitioner = new 
StatelessThroughputBasedPartitioner&lt;&gt;();
+partitioner.setCooldownMillis(conf.getLong(COOL_DOWN_MILLIS, 10000));
+partitioner.setMaximumEvents(conf.getLong(MAX_THROUGHPUT, 30000));
+partitioner.setMinimumEvents(conf.getLong(MIN_THROUGHPUT, 10000));
+dag.setAttribute(csvParser, OperatorContext.STATS_LISTENERS, Arrays.asList(new 
StatsListener[]{partitioner}));
+dag.setAttribute(csvParser, OperatorContext.PARTITIONER, partitioner);
+</code></pre>
+
+<p>Above code will dynamically partition csvParser when the throughput changes.
+If the overall throughput of csvParser goes beyond 30000 or less than 10000, 
the platform will repartition CsvParser
+to balance throughput of a single partition to be between 10000 and 30000.
+CooldownMillis of 10000 will be used as the threshold time for which the 
throughput change is observed.</p>
+<h2 id="example">Example</h2>
+<p>Example for Csv Parser can be found at: <a 
href="https://github.com/DataTorrent/examples/tree/master/tutorials/parser";>https://github.com/DataTorrent/examples/tree/master/tutorials/parser</a></p>
+              
+            </div>
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer 
navigation">
+      
+        <a href="../deduper/" class="btn btn-neutral float-right" 
title="Deduper">Next <span class="icon icon-circle-arrow-right"></span></a>
+      
+      
+        <a href="../csvformatter/" class="btn btn-neutral" title="CSV 
Formatter"><span class="icon icon-circle-arrow-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <!-- Copyright etc -->
+    
+  </div>
+
+  Built with <a href="http://www.mkdocs.org";>MkDocs</a> using a <a 
href="https://github.com/snide/sphinx_rtd_theme";>theme</a> provided by <a 
href="https://readthedocs.org";>Read the Docs</a>.
+</footer>
+         
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+
+<div class="rst-versions" role="note" style="cursor: pointer">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      
+      
+        <span><a href="../csvformatter/" style="color: #fcfcfc;">&laquo; 
Previous</a></span>
+      
+      
+        <span style="margin-left: 15px"><a href="../deduper/" style="color: 
#fcfcfc">Next &raquo;</a></span>
+      
+    </span>
+</div>
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/apex-site/blob/afbb4705/content/docs/malhar-3.7/operators/csvformatter/index.html
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.7/operators/csvformatter/index.html 
b/content/docs/malhar-3.7/operators/csvformatter/index.html
new file mode 100644
index 0000000..fdc5567
--- /dev/null
+++ b/content/docs/malhar-3.7/operators/csvformatter/index.html
@@ -0,0 +1,500 @@
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  
+  
+  <title>Csv Formatter - Apache Apex Malhar Documentation</title>
+  
+
+  <link rel="shortcut icon" href="../../favicon.ico">
+  
+
+  
+  <link 
href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700'
 rel='stylesheet' type='text/css'>
+
+  <link rel="stylesheet" href="../../css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/highlight.css">
+
+  
+  <script>
+    // Current page data
+    var mkdocs_page_name = "Csv Formatter";
+    var mkdocs_page_input_path = "operators/csvformatter.md";
+    var mkdocs_page_url = "/operators/csvformatter/";
+  </script>
+  
+  <script src="../../js/jquery-2.1.1.min.js"></script>
+  <script src="../../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../../js/highlight.pack.js"></script>
+  <script src="../../js/theme.js"></script> 
+
+  
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
+      <div class="wy-side-nav-search">
+        <a href="../.." class="icon icon-home"> Apache Apex Malhar 
Documentation</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../../search.html" 
method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" 
aria-label="main navigation">
+        <ul class="current">
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="../..">Apache Apex Malhar</a>
+        
+    </li>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>APIs</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../../apis/calcite/">SQL</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Operators</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../block_reader/">Block Reader</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="./">CSV Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvParserOperator/">CSV Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../deduper/">Deduper</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../enricher/">Enricher</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../fsInputOperator/">File Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_output/">File Output</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_splitter/">File Splitter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../filter/">Filter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../AbstractJdbcTransactionableOutputOperator/">Jdbc 
Output Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jdbcPollInputOperator/">JDBC Poller Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jmsInputOperator/">JMS Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonFormatter/">JSON Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">JSON Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../kafkaInputOperator/">Kafka Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transformer</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../windowedOperator/">Windowed Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../xmlParserOperator/">XML Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">Json Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonFormatter/">Json Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transform Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 current">
+        <a class="current" href="./">Csv Formatter</a>
+        
+            <ul>
+            
+                <li class="toctree-l3"><a 
href="#csvformatter">CsvFormatter</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#operator-objective">Operator Objective</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#operator-information">Operator Information</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#properties-attributes-and-ports">Properties, Attributes and 
Ports</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#limitations">Limitations</a></li>
+                
+                    <li><a class="toctree-l4" href="#example">Example</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#advanced">Advanced</a></li>
+                
+            
+            </ul>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../s3outputmodule/">S3 Output Module</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+        </ul>
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../..">Apache Apex Malhar Documentation</a>
+      </nav>
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../..">Docs</a> &raquo;</li>
+    
+      
+        
+          <li>Operators &raquo;</li>
+        
+      
+    
+    <li>Csv Formatter</li>
+    <li class="wy-breadcrumbs-aside">
+      
+    </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            <div class="section">
+              
+                <h1 id="csvformatter">CsvFormatter</h1>
+<h2 id="operator-objective">Operator Objective</h2>
+<p>This operator receives a POJO (<a 
href="https://en.wikipedia.org/wiki/Plain_Old_Java_Object";>Plain Old Java 
Object</a>) as an incoming tuple, converts the data in 
+the incoming POJO to a custom delimited string and emits the delimited 
string.</p>
+<p>CsvFormatter supports schema definition as a JSON string. </p>
+<p>CsvFormatter does not hold any state and is <strong>idempotent</strong>, 
<strong>fault-tolerant</strong> and <strong>statically/dynamically 
partitionable</strong>.</p>
+<h2 id="operator-information">Operator Information</h2>
+<ol>
+<li>Operator location: <strong><em>malhar-contrib</em></strong></li>
+<li>Available since: <strong><em>3.2.0</em></strong></li>
+<li>Operator state: <strong><em>Evolving</em></strong></li>
+<li>Java Packages:<ul>
+<li>Operator: <strong><em><a 
href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/contrib/formatter/CsvFormatter.html";>com.datatorrent.contrib.formatter.CsvFormatter</a></em></strong></li>
+</ul>
+</li>
+</ol>
+<h2 id="properties-attributes-and-ports">Properties, Attributes and Ports</h2>
+<h3 id="properties-of-pojoenricher"><a name="props"></a>Properties of 
POJOEnricher</h3>
+<table>
+<thead>
+<tr>
+<th><strong>Property</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+<th><strong>Default Value</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>schema</em></td>
+<td>Contents of the schema.Schema is specified in a json format.</td>
+<td>String</td>
+<td>Yes</td>
+<td>N/A</td>
+</tr>
+</tbody>
+</table>
+<h3 id="platform-attributes-that-influences-operator-behavior">Platform 
Attributes that influences operator behavior</h3>
+<table>
+<thead>
+<tr>
+<th><strong>Attribute</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>in.TUPLE_CLASS</em></td>
+<td>TUPLE_CLASS attribute on input port which tells operator the class of POJO 
which will be incoming</td>
+<td>Class or FQCN</td>
+<td>Yes</td>
+</tr>
+</tbody>
+</table>
+<h3 id="ports">Ports</h3>
+<table>
+<thead>
+<tr>
+<th><strong>Port</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>in</em></td>
+<td>Tuples which need to be formatted are received on this port</td>
+<td>Object (POJO)</td>
+<td>Yes</td>
+</tr>
+<tr>
+<td><em>out</em></td>
+<td>Tuples that are formatted are emitted from this port</td>
+<td>String</td>
+<td>No</td>
+</tr>
+<tr>
+<td><em>err</em></td>
+<td>Tuples that could not be converted are emitted on this port</td>
+<td>Object</td>
+<td>No</td>
+</tr>
+</tbody>
+</table>
+<h2 id="limitations">Limitations</h2>
+<p>Current CsvFormatter contain following limitations:</p>
+<ol>
+<li>The field names in schema and the pojo field names should match.For eg. if 
name of the schema field is "customerName", then POJO should contain a field 
with the same name. </li>
+<li>Field wise validation/formatting is not yet supported.</li>
+<li>The fields will be written to the file in the same order as specified in 
schema.json</li>
+</ol>
+<h2 id="example">Example</h2>
+<p>Example for CsvFormatter can be found at: <a 
href="https://github.com/DataTorrent/examples/tree/master/tutorials/csvformatter";>https://github.com/DataTorrent/examples/tree/master/tutorials/csvformatter</a></p>
+<h2 id="advanced">Advanced</h2>
+<h3 id="schema-format-for-csvformatter"><a name="JSONFileFormat"></a> Schema 
format for CsvFormatter</h3>
+<p>CsvFormatter expects schema to be a String in JSON format:</p>
+<p>Example for format of schema:</p>
+<pre><code class="json">{
+  &quot;separator&quot;: &quot;,&quot;,
+  &quot;quoteChar&quot;: &quot;\&quot;&quot;,
+  &quot;lineDelimiter&quot;: &quot;\n&quot;,
+  &quot;fields&quot;: [
+    {
+      &quot;name&quot;: &quot;campaignId&quot;,
+      &quot;type&quot;: &quot;Integer&quot;
+    },
+    {
+      &quot;name&quot;: &quot;startDate&quot;,
+      &quot;type&quot;: &quot;Date&quot;,
+      &quot;constraints&quot;: {
+        &quot;format&quot;: &quot;yyyy-MM-dd&quot;
+      }
+    }
+    ]
+}
+</code></pre>
+
+<h3 id="partitioning-of-csvformatter">Partitioning of CsvFormatter</h3>
+<p>Being stateless operator, CsvFormatter will ensure built-in partitioners 
present in Malhar library can be directly used by setting properties as 
follows:</p>
+<h4 id="stateless-partioning-of-csvformatter">Stateless partioning of 
CsvFormatter</h4>
+<p>Stateless partitioning will ensure that CsvFormatter will be partitioned 
right at the start of the application and will remain partitioned throughout 
the lifetime of the DAG.
+CsvFormatter can be stateless partitioned by adding following lines to 
properties.xml:</p>
+<pre><code class="xml">  &lt;property&gt;
+    &lt;name&gt;dt.operator.{OperatorName}.attr.PARTITIONER&lt;/name&gt;
+    
&lt;value&gt;com.datatorrent.common.partitioner.StatelessPartitioner:2&lt;/value&gt;
+  &lt;/property&gt;
+</code></pre>
+
+<p>where {OperatorName} is the name of the CsvFormatter operator.
+Above lines will partition CsvFormatter statically 2 times. Above value can be 
changed accordingly to change the number of static partitions.</p>
+<h4 id="dynamic-partitioning-of-csvformatter">Dynamic Partitioning of 
CsvFormatter</h4>
+<p>Dynamic partitioning is a feature of Apex platform which changes the 
partition of the operator based on certain conditions.
+CsvFormatter can be dynamically partitioned using below out-of-the-box 
partitioner:</p>
+<h5 id="throughput-based">Throughput based</h5>
+<p>Following code can be added to populateDAG method of application to 
dynamically partition CsvFormatter:</p>
+<pre><code class="java">    
StatelessThroughputBasedPartitioner&lt;CsvFormatter&gt; partitioner = new 
StatelessThroughputBasedPartitioner&lt;&gt;();
+    partitioner.setCooldownMillis(conf.getLong(COOL_DOWN_MILLIS, 10000));
+    partitioner.setMaximumEvents(conf.getLong(MAX_THROUGHPUT, 30000));
+    partitioner.setMinimumEvents(conf.getLong(MIN_THROUGHPUT, 10000));
+    dag.setAttribute(csvFormatter, OperatorContext.STATS_LISTENERS, 
Arrays.asList(new StatsListener[]{partitioner}));
+    dag.setAttribute(csvFormatter, OperatorContext.PARTITIONER, partitioner);
+</code></pre>
+
+<p>Above code will dynamically partition CsvFormatter when throughput changes.
+If overall throughput of CsvFormatter goes beyond 30000 or less than 10000, 
the platform will repartition CsvFormatter 
+to balance throughput of a single partition to be between 10000 and 30000.
+CooldownMillis of 10000 will be used as threshold time for which  throughput 
change is observed.</p>
+              
+            </div>
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer 
navigation">
+      
+        <a href="../s3outputmodule/" class="btn btn-neutral float-right" 
title="S3 Output Module">Next <span class="icon 
icon-circle-arrow-right"></span></a>
+      
+      
+        <a href="../transform/" class="btn btn-neutral" title="Transform 
Operator"><span class="icon icon-circle-arrow-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <!-- Copyright etc -->
+    
+  </div>
+
+  Built with <a href="http://www.mkdocs.org";>MkDocs</a> using a <a 
href="https://github.com/snide/sphinx_rtd_theme";>theme</a> provided by <a 
href="https://readthedocs.org";>Read the Docs</a>.
+</footer>
+         
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+
+<div class="rst-versions" role="note" style="cursor: pointer">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      
+      
+        <span><a href="../transform/" style="color: #fcfcfc;">&laquo; 
Previous</a></span>
+      
+      
+        <span style="margin-left: 15px"><a href="../s3outputmodule/" 
style="color: #fcfcfc">Next &raquo;</a></span>
+      
+    </span>
+</div>
+
+</body>
+</html>

Reply via email to