[2/5] bahir-website git commit: Publishing from 6d16a5c37065c70c06953838f0d76469926967a0

lresende Mon, 17 Jul 2017 10:26:50 -0700

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-sql-cloudant/index.html
----------------------------------------------------------------------
diff --git a/content/docs/spark/current/spark-sql-cloudant/index.html 
b/content/docs/spark/current/spark-sql-cloudant/index.html
new file mode 100644
index 0000000..1ed6ac4
--- /dev/null
+++ b/content/docs/spark/current/spark-sql-cloudant/index.html
@@ -0,0 +1,677 @@
+
+
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Spark Data Source for Apache CouchDB/Cloudant</title>
+    <meta name="description" content="Spark Data Source for Apache 
CouchDB/Cloudant">
+    <meta name="author" content="">
+
+    <!-- Enable responsive viewport -->
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+    <!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
+    <!--[if lt IE 9]>
+      <script 
src="http://html5shim.googlecode.com/svn/trunk/html5.js";></script>
+    <![endif]-->
+
+    <!-- Le styles -->
+    <link href="/assets/themes/apache-clean/bootstrap/css/bootstrap.css" 
rel="stylesheet">
+    <link href="/assets/themes/apache-clean/css/style.css?body=1" 
rel="stylesheet" type="text/css">
+    <link href="/assets/themes/apache-clean/css/syntax.css" rel="stylesheet"  
type="text/css" media="screen" />
+    <!-- Le fav and touch icons -->
+    <!-- Update these with your own images
+    <link rel="shortcut icon" href="images/favicon.ico">
+    <link rel="apple-touch-icon" href="images/apple-touch-icon.png">
+    <link rel="apple-touch-icon" sizes="72x72" 
href="images/apple-touch-icon-72x72.png">
+    <link rel="apple-touch-icon" sizes="114x114" 
href="images/apple-touch-icon-114x114.png">
+  -->
+
+    <!-- make tables sortable by adding class tag "sortable" to table elements 
-->
+    <script 
src="http://www.kryogenix.org/code/browser/sorttable/sorttable.js";></script>
+
+
+  </head>
+
+  <body>
+
+    
+
+<!-- Navigation -->
+<div id="nav-bar">
+  <nav id="nav-container" class="navbar navbar-inverse " role="navigation">
+    <div class="container">
+      <!-- Brand and toggle get grouped for better mobile display -->
+
+      <div class="navbar-header page-scroll">
+        <button type="button" class="navbar-toggle" data-toggle="collapse" 
data-target=".navbar-collapse">
+          <span class="sr-only">Toggle navigation</span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+        </button>
+        <a class="navbar-brand page-scroll" href="/#home">Home</a>
+      </div>
+      <!-- Collect the nav links, forms, and other content for toggling -->
+      <nav class="navbar-collapse collapse" role="navigation">
+        <ul class="nav navbar-nav">
+          
+          
+          
+          <li id="download">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Download<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="/downloads/spark" target="_self">Bahir Spark 
Extensions</a></li>
+              
+              
+              <li><a href="/downloads/flink" target="_self">Bahir Flink 
Extensions</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="community">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Community<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="/community" target="_self">Get Involved</a></li>
+              
+              
+              <li><a href="/contributing" target="_self">Contributing</a></li>
+              
+              
+              <li><a href="/contributing-extensions" 
target="_self">Contributing Extensions</a></li>
+              
+              
+              <li><a href="https://issues.apache.org/jira/browse/BAHIR"; 
target="_blank">Issue Tracker</a></li>
+              
+              
+              <li><a href="https://github.com/apache/bahir"; 
target="_blank">Source Code</a></li>
+              
+              
+              <li><a href="/community-members" target="_self">Project 
Committers</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="documentation">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Documentation<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="/docs/spark/overview" target="_self">Bahir Spark 
Extensions</a></li>
+              
+              
+              <li><a href="/docs/flink/overview" target="_self">Bahir Flink 
Extensions</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="github">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">GitHub<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="https://github.com/apache/bahir"; 
target="_blank">Bahir Spark Extensions</a></li>
+              
+              
+              <li><a href="https://github.com/apache/bahir-flink"; 
target="_blank">Bahir Flink Extensions</a></li>
+              
+              
+              <li><a href="https://github.com/apache/bahir-website"; 
target="_blank">Bahir Website</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="apache">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Apache<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="http://www.apache.org/foundation/how-it-works.html"; 
target="_blank">Apache Software Foundation</a></li>
+              
+              
+              <li><a href="http://www.apache.org/licenses/"; 
target="_blank">Apache License</a></li>
+              
+              
+              <li><a href="http://www.apache.org/foundation/sponsorship"; 
target="_blank">Sponsorship</a></li>
+              
+              
+              <li><a href="http://www.apache.org/foundation/thanks.html"; 
target="_blank">Thanks</a></li>
+              
+              
+              <li><a href="/privacy-policy" target="_self">Privacy 
Policy</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+        </ul>
+      </nav><!--/.navbar-collapse -->
+      <!-- /.navbar-collapse -->
+    </div>
+    <!-- /.container -->
+  </nav>
+</div>
+
+
+    <div class="container">
+
+      
+
+<!--<div class="hero-unit Spark Data Source for Apache CouchDB/Cloudant">
+  <h1></h1>
+</div>
+-->
+
+<div class="row">
+  <div class="col-md-12">
+    <!--
+
+-->
+
+<p>A library for reading data from Cloudant or CouchDB databases using Spark 
SQL and Spark Streaming.</p>
+
+<p><a href="https://cloudant.com";>IBMÂ® CloudantÂ®</a> is a document-oriented 
DataBase as a Service (DBaaS). It stores data as documents 
+in JSON format. Itâs built with scalability, high availability, and 
durability in mind. It comes with a 
+wide variety of indexing options including map-reduce, Cloudant Query, 
full-text indexing, and 
+geospatial indexing. The replication capabilities make it easy to keep data in 
sync between database 
+clusters, desktop PCs, and mobile devices.</p>
+
+<p><a href="http://couchdb.apache.org";>Apache CouchDBâ¢</a> is open source 
database software that focuses on ease of use and having an architecture that 
âcompletely embraces the Webâ. It has a document-oriented NoSQL database 
architecture and is implemented in the concurrency-oriented language Erlang; it 
uses JSON to store data, JavaScript as its query language using MapReduce, and 
HTTP for an API.</p>
+
+<h2 id="linking">Linking</h2>
+
+<p>Using SBT:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-sql-cloudant" % "2.2.0-SNAPSHOT"
+</code></pre>
+</div>
+
+<p>Using Maven:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>&lt;dependency&gt;
+    &lt;groupId&gt;org.apache.bahir&lt;/groupId&gt;
+    &lt;artifactId&gt;spark-sql-cloudant_2.11&lt;/artifactId&gt;
+    &lt;version&gt;2.2.0-SNAPSHOT&lt;/version&gt;
+&lt;/dependency&gt;
+</code></pre>
+</div>
+
+<p>This library can also be added to Spark jobs launched through <code 
class="highlighter-rouge">spark-shell</code> or <code 
class="highlighter-rouge">spark-submit</code> by using the <code 
class="highlighter-rouge">--packages</code> command line option.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0-SNAPSHOT
+</code></pre>
+</div>
+
+<p>Unlike using <code class="highlighter-rouge">--jars</code>, using <code 
class="highlighter-rouge">--packages</code> ensures that this library and its 
dependencies will be added to the classpath.
+The <code class="highlighter-rouge">--packages</code> argument can also be 
used with <code class="highlighter-rouge">bin/spark-submit</code>.</p>
+
+<p>Submit a job in Python:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>spark-submit  
--master local[4] --jars &lt;path to cloudant-spark.jar&gt;  &lt;path to python 
script&gt; 
+</code></pre>
+</div>
+
+<p>Submit a job in Scala:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>spark-submit 
--class "&lt;your class&gt;" --master local[4] --jars &lt;path to 
cloudant-spark.jar&gt; &lt;path to your app jar&gt;
+</code></pre>
+</div>
+
+<p>This library is compiled for Scala 2.11 only, and intends to support Spark 
2.0 onwards.</p>
+
+<h2 id="configuration-options">Configuration options</h2>
+<p>The configuration is obtained in the following sequence:</p>
+
+<ol>
+  <li>default in the Config, which is set in the application.conf</li>
+  <li>key in the SparkConf, which is set in SparkConf</li>
+  <li>key in the parameters, which is set in a dataframe or temporaty table 
options</li>
+  <li>âspark.â+key in the SparkConf (as they are treated as the one passed 
in through spark-submit using âconf option)</li>
+</ol>
+
+<p>Here each subsequent configuration overrides the previous one. Thus, 
configuration set using DataFrame option overrides what has beens set in 
SparkConf. And configuration passed in spark-submit using âconf takes 
precedence over any setting in the code.</p>
+
+<h3 id="configuration-in-applicationconf">Configuration in 
application.conf</h3>
+<p>Default values are defined in <a 
href="cloudant-spark-sql/src/main/resources/application.conf">here</a>.</p>
+
+<h3 id="configuration-on-sparkconf">Configuration on SparkConf</h3>
+
+<table>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th style="text-align: center">Default</th>
+      <th>Meaning</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>cloudant.protocol</td>
+      <td style="text-align: center">https</td>
+      <td>protocol to use to transfer data: http or https</td>
+    </tr>
+    <tr>
+      <td>cloudant.host</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant host url</td>
+    </tr>
+    <tr>
+      <td>cloudant.username</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant userid</td>
+    </tr>
+    <tr>
+      <td>cloudant.password</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant password</td>
+    </tr>
+    <tr>
+      <td>cloudant.useQuery</td>
+      <td style="text-align: center">false</td>
+      <td>By default, _all_docs endpoint is used if configuration âviewâ 
and âindexâ (see below) are not set. When useQuery is enabled, _find 
endpoint will be used in place of _all_docs when query condition is not on 
primary key field (_id), so that query predicates may be driven into 
datastore.</td>
+    </tr>
+    <tr>
+      <td>cloudant.queryLimit</td>
+      <td style="text-align: center">25</td>
+      <td>The maximum number of results returned when querying the _find 
endpoint.</td>
+    </tr>
+    <tr>
+      <td>jsonstore.rdd.partitions</td>
+      <td style="text-align: center">10</td>
+      <td>the number of partitions intent used to drive JsonStoreRDD loading 
query result in parallel. The actual number is calculated based on total rows 
returned and satisfying maxInPartition and minInPartition</td>
+    </tr>
+    <tr>
+      <td>jsonstore.rdd.maxInPartition</td>
+      <td style="text-align: center">-1</td>
+      <td>the max rows in a partition. -1 means unlimited</td>
+    </tr>
+    <tr>
+      <td>jsonstore.rdd.minInPartition</td>
+      <td style="text-align: center">10</td>
+      <td>the min rows in a partition.</td>
+    </tr>
+    <tr>
+      <td>jsonstore.rdd.requestTimeout</td>
+      <td style="text-align: center">900000</td>
+      <td>the request timeout in milliseconds</td>
+    </tr>
+    <tr>
+      <td>bulkSize</td>
+      <td style="text-align: center">200</td>
+      <td>the bulk save size</td>
+    </tr>
+    <tr>
+      <td>schemaSampleSize</td>
+      <td style="text-align: center">â-1â</td>
+      <td>the sample size for RDD schema discovery. 1 means we are using only 
first document for schema discovery; -1 means all documents; 0 will be treated 
as 1; any number N means min(N, total) docs</td>
+    </tr>
+    <tr>
+      <td>createDBOnSave</td>
+      <td style="text-align: center">âfalseâ</td>
+      <td>whether to create a new database during save operation. If false, a 
database should already exist. If true, a new database will be created. If 
true, and a database with a provided name already exists, an error will be 
raised.</td>
+    </tr>
+  </tbody>
+</table>
+
+<h3 id="configuration-on-spark-sql-temporary-table-or-dataframe">Configuration 
on Spark SQL Temporary Table or DataFrame</h3>
+
+<p>Besides all the configurations passed to a temporary table or dataframe 
through SparkConf, it is also possible to set the following configurations in 
temporary table or dataframe using OPTIONS:</p>
+
+<table>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th style="text-align: center">Default</th>
+      <th>Meaning</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>database</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant database name</td>
+    </tr>
+    <tr>
+      <td>view</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant view w/o the database name. only used for load.</td>
+    </tr>
+    <tr>
+      <td>index</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant search index w/o the database name. only used for load data 
with less than or equal to 200 results.</td>
+    </tr>
+    <tr>
+      <td>path</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant: as database name if database is not present</td>
+    </tr>
+    <tr>
+      <td>schemaSampleSize</td>
+      <td style="text-align: center">â-1â</td>
+      <td>the sample size used to discover the schema for this temp table. -1 
scans all documents</td>
+    </tr>
+    <tr>
+      <td>bulkSize</td>
+      <td style="text-align: center">200</td>
+      <td>the bulk save size</td>
+    </tr>
+    <tr>
+      <td>createDBOnSave</td>
+      <td style="text-align: center">âfalseâ</td>
+      <td>whether to create a new database during save operation. If false, a 
database should already exist. If true, a new database will be created. If 
true, and a database with a provided name already exists, an error will be 
raised.</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>For fast loading, views are loaded without include_docs. Thus, a derived 
schema will always be: <code class="highlighter-rouge"><span 
class="p">{</span><span class="err">id,</span><span class="w"> </span><span 
class="err">key,</span><span class="w"> </span><span 
class="err">value</span><span class="p">}</span></code>, where <code 
class="highlighter-rouge">value </code>can be a compount field. An example of 
loading data from a view:</p>
+
+<div class="language-python highlighter-rouge"><pre 
class="highlight"><code><span class="n">spark</span><span 
class="o">.</span><span class="n">sql</span><span class="p">(</span><span 
class="s">" CREATE TEMPORARY TABLE flightTable1 USING org.apache.bahir.cloudant 
OPTIONS ( database 'n_flight', view '_design/view/_view/AA0')"</span><span 
class="p">)</span>
+
+</code></pre>
+</div>
+
+<h3 id="configuration-on-cloudant-receiver-for-spark-streaming">Configuration 
on Cloudant Receiver for Spark Streaming</h3>
+
+<table>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th style="text-align: center">Default</th>
+      <th>Meaning</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>cloudant.host</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant host url</td>
+    </tr>
+    <tr>
+      <td>cloudant.username</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant userid</td>
+    </tr>
+    <tr>
+      <td>cloudant.password</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant password</td>
+    </tr>
+    <tr>
+      <td>database</td>
+      <td style="text-align: center">Â </td>
+      <td>cloudant database name</td>
+    </tr>
+    <tr>
+      <td>selector</td>
+      <td style="text-align: center">all documents</td>
+      <td>a selector written in Cloudant Query syntax, specifying conditions 
for selecting documents. Only documents satisfying the selectorâs conditions 
will be retrieved from Cloudant and loaded into Spark.</td>
+    </tr>
+  </tbody>
+</table>
+
+<h3 id="configuration-in-spark-submit-using---conf-option">Configuration in 
spark-submit using âconf option</h3>
+
+<p>The above stated configuration keys can also be set using <code 
class="highlighter-rouge">spark-submit --conf</code> option. When passing 
configuration in spark-submit, make sure adding âspark.â as prefix to the 
keys.</p>
+
+<h2 id="examples">Examples</h2>
+
+<h3 id="python-api">Python API</h3>
+
+<h4 id="using-sql-in-python">Using SQL In Python</h4>
+
+<div class="language-python highlighter-rouge"><pre 
class="highlight"><code><span class="n">spark</span> <span class="o">=</span> 
<span class="n">SparkSession</span>\
+    <span class="o">.</span><span class="n">builder</span>\
+    <span class="o">.</span><span class="n">appName</span><span 
class="p">(</span><span class="s">"Cloudant Spark SQL Example in Python using 
temp tables"</span><span class="p">)</span>\
+    <span class="o">.</span><span class="n">config</span><span 
class="p">(</span><span class="s">"cloudant.host"</span><span 
class="p">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span 
class="p">)</span>\
+    <span class="o">.</span><span class="n">config</span><span 
class="p">(</span><span class="s">"cloudant.username"</span><span 
class="p">,</span> <span class="s">"USERNAME"</span><span class="p">)</span>\
+    <span class="o">.</span><span class="n">config</span><span 
class="p">(</span><span class="s">"cloudant.password"</span><span 
class="p">,</span><span class="s">"PASSWORD"</span><span class="p">)</span>\
+    <span class="o">.</span><span class="n">getOrCreate</span><span 
class="p">()</span>
+
+
+<span class="c"># Loading temp table from Cloudant db</span>
+<span class="n">spark</span><span class="o">.</span><span 
class="n">sql</span><span class="p">(</span><span class="s">" CREATE TEMPORARY 
TABLE airportTable USING org.apache.bahir.cloudant OPTIONS ( database 
'n_airportcodemapping')"</span><span class="p">)</span>
+<span class="n">airportData</span> <span class="o">=</span> <span 
class="n">spark</span><span class="o">.</span><span class="n">sql</span><span 
class="p">(</span><span class="s">"SELECT _id, airportName FROM airportTable 
WHERE _id &gt;= 'CAA' AND _id &lt;= 'GAA' ORDER BY _id"</span><span 
class="p">)</span>
+<span class="n">airportData</span><span class="o">.</span><span 
class="n">printSchema</span><span class="p">()</span>
+<span class="k">print</span> <span class="s">'Total # of rows in airportData: 
'</span> <span class="o">+</span> <span class="nb">str</span><span 
class="p">(</span><span class="n">airportData</span><span 
class="o">.</span><span class="n">count</span><span class="p">())</span>
+<span class="k">for</span> <span class="n">code</span> <span 
class="ow">in</span> <span class="n">airportData</span><span 
class="o">.</span><span class="n">collect</span><span class="p">():</span>
+    <span class="k">print</span> <span class="n">code</span><span 
class="o">.</span><span class="n">_id</span>
+</code></pre>
+</div>
+
+<p>See <a href="examples/python/CloudantApp.py">CloudantApp.py</a> for 
examples.</p>
+
+<p>Submit job example:
+<code class="highlighter-rouge">
+spark-submit  --packages 
org.apache.bahir:spark-sql-cloudant_2.11:2.2.0-SNAPSHOT --conf 
spark.cloudant.host=ACCOUNT.cloudant.com --conf 
spark.cloudant.username=USERNAME --conf spark.cloudant.password=PASSWORD 
sql-cloudant/examples/python/CloudantApp.py
+</code></p>
+
+<h4 id="using-dataframe-in-python">Using DataFrame In Python</h4>
+
+<div class="language-python highlighter-rouge"><pre 
class="highlight"><code><span class="n">spark</span> <span class="o">=</span> 
<span class="n">SparkSession</span>\
+    <span class="o">.</span><span class="n">builder</span>\
+    <span class="o">.</span><span class="n">appName</span><span 
class="p">(</span><span class="s">"Cloudant Spark SQL Example in Python using 
dataframes"</span><span class="p">)</span>\
+    <span class="o">.</span><span class="n">config</span><span 
class="p">(</span><span class="s">"cloudant.host"</span><span 
class="p">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span 
class="p">)</span>\
+    <span class="o">.</span><span class="n">config</span><span 
class="p">(</span><span class="s">"cloudant.username"</span><span 
class="p">,</span> <span class="s">"USERNAME"</span><span class="p">)</span>\
+    <span class="o">.</span><span class="n">config</span><span 
class="p">(</span><span class="s">"cloudant.password"</span><span 
class="p">,</span><span class="s">"PASSWORD"</span><span class="p">)</span>\
+    <span class="o">.</span><span class="n">config</span><span 
class="p">(</span><span class="s">"jsonstore.rdd.partitions"</span><span 
class="p">,</span> <span class="mi">8</span><span class="p">)</span>\
+    <span class="o">.</span><span class="n">getOrCreate</span><span 
class="p">()</span>
+
+<span class="c"># ***1. Loading dataframe from Cloudant db</span>
+<span class="n">df</span> <span class="o">=</span> <span 
class="n">spark</span><span class="o">.</span><span class="n">read</span><span 
class="o">.</span><span class="n">load</span><span class="p">(</span><span 
class="s">"n_airportcodemapping"</span><span class="p">,</span> <span 
class="s">"org.apache.bahir.cloudant"</span><span class="p">)</span>
+<span class="n">df</span><span class="o">.</span><span 
class="n">cache</span><span class="p">()</span> 
+<span class="n">df</span><span class="o">.</span><span 
class="n">printSchema</span><span class="p">()</span>
+<span class="n">df</span><span class="o">.</span><span 
class="nb">filter</span><span class="p">(</span><span class="n">df</span><span 
class="o">.</span><span class="n">airportName</span> <span 
class="o">&gt;=</span> <span class="s">'Moscow'</span><span 
class="p">)</span><span class="o">.</span><span class="n">select</span><span 
class="p">(</span><span class="s">"_id"</span><span class="p">,</span><span 
class="s">'airportName'</span><span class="p">)</span><span 
class="o">.</span><span class="n">show</span><span class="p">()</span>
+<span class="n">df</span><span class="o">.</span><span 
class="nb">filter</span><span class="p">(</span><span class="n">df</span><span 
class="o">.</span><span class="n">_id</span> <span class="o">&gt;=</span> <span 
class="s">'CAA'</span><span class="p">)</span><span class="o">.</span><span 
class="n">select</span><span class="p">(</span><span 
class="s">"_id"</span><span class="p">,</span><span 
class="s">'airportName'</span><span class="p">)</span><span 
class="o">.</span><span class="n">show</span><span class="p">()</span>      
+</code></pre>
+</div>
+
+<p>See <a href="examples/python/CloudantDF.py">CloudantDF.py</a> for 
examples.</p>
+
+<p>In case of doing multiple operations on a dataframe (select, filter etc.),
+you should persist a dataframe. Otherwise, every operation on a dataframe will 
load the same data from Cloudant again.
+Persisting will also speed up computation. This statement will persist an RDD 
in memory: <code class="highlighter-rouge">df.cache()</code>.  Alternatively 
for large dbs to persist in memory &amp; disk, use:</p>
+
+<div class="language-python highlighter-rouge"><pre 
class="highlight"><code><span class="kn">from</span> <span 
class="nn">pyspark</span> <span class="kn">import</span> <span 
class="n">StorageLevel</span>
+<span class="n">df</span><span class="o">.</span><span 
class="n">persist</span><span class="p">(</span><span 
class="n">storageLevel</span> <span class="o">=</span> <span 
class="n">StorageLevel</span><span class="p">(</span><span 
class="bp">True</span><span class="p">,</span> <span 
class="bp">True</span><span class="p">,</span> <span 
class="bp">False</span><span class="p">,</span> <span 
class="bp">True</span><span class="p">,</span> <span class="mi">1</span><span 
class="p">))</span>
+</code></pre>
+</div>
+
+<p><a href="examples/python/CloudantDFOption.py">Sample code</a> on using 
DataFrame option to define cloudant configuration</p>
+
+<h3 id="scala-api">Scala API</h3>
+
+<h4 id="using-sql-in-scala">Using SQL In Scala</h4>
+
+<div class="language-scala highlighter-rouge"><pre 
class="highlight"><code><span class="k">val</span> <span class="n">spark</span> 
<span class="k">=</span> <span class="nc">SparkSession</span>
+      <span class="o">.</span><span class="n">builder</span><span 
class="o">()</span>
+      <span class="o">.</span><span class="n">appName</span><span 
class="o">(</span><span class="s">"Cloudant Spark SQL Example"</span><span 
class="o">)</span>
+      <span class="o">.</span><span class="n">config</span><span 
class="o">(</span><span class="s">"cloudant.host"</span><span 
class="o">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span 
class="o">)</span>
+      <span class="o">.</span><span class="n">config</span><span 
class="o">(</span><span class="s">"cloudant.username"</span><span 
class="o">,</span> <span class="s">"USERNAME"</span><span class="o">)</span>
+      <span class="o">.</span><span class="n">config</span><span 
class="o">(</span><span class="s">"cloudant.password"</span><span 
class="o">,</span><span class="s">"PASSWORD"</span><span class="o">)</span>
+      <span class="o">.</span><span class="n">getOrCreate</span><span 
class="o">()</span>
+
+<span class="c1">// For implicit conversions of Dataframe to RDDs
+</span><span class="k">import</span> <span class="nn">spark.implicits._</span>
+    
+<span class="c1">// create a temp table from Cloudant db and query it using 
sql syntax
+</span><span class="n">spark</span><span class="o">.</span><span 
class="n">sql</span><span class="o">(</span>
+    <span class="n">s</span><span class="s">"""
+    |CREATE TEMPORARY TABLE airportTable
+    |USING org.apache.bahir.cloudant
+    |OPTIONS ( database 'n_airportcodemapping')
+    """</span><span class="o">.</span><span class="n">stripMargin</span><span 
class="o">)</span>
+<span class="c1">// create a dataframe
+</span><span class="k">val</span> <span class="n">airportData</span> <span 
class="k">=</span> <span class="n">spark</span><span class="o">.</span><span 
class="n">sql</span><span class="o">(</span><span class="s">"SELECT _id, 
airportName FROM airportTable WHERE _id &gt;= 'CAA' AND _id &lt;= 'GAA' ORDER 
BY _id"</span><span class="o">)</span>
+<span class="n">airportData</span><span class="o">.</span><span 
class="n">printSchema</span><span class="o">()</span>
+<span class="n">println</span><span class="o">(</span><span 
class="n">s</span><span class="s">"Total # of rows in airportData: "</span> 
<span class="o">+</span> <span class="n">airportData</span><span 
class="o">.</span><span class="n">count</span><span class="o">())</span>
+<span class="c1">// convert dataframe to array of Rows, and process each row
+</span><span class="n">airportData</span><span class="o">.</span><span 
class="n">map</span><span class="o">(</span><span class="n">t</span> <span 
class="k">=&gt;</span> <span class="s">"code: "</span> <span class="o">+</span> 
<span class="n">t</span><span class="o">(</span><span class="mi">0</span><span 
class="o">)</span> <span class="o">+</span> <span class="s">",name:"</span> 
<span class="o">+</span> <span class="n">t</span><span class="o">(</span><span 
class="mi">1</span><span class="o">)).</span><span 
class="n">collect</span><span class="o">().</span><span 
class="n">foreach</span><span class="o">(</span><span 
class="n">println</span><span class="o">)</span>
+</code></pre>
+</div>
+<p>See <a 
href="examples/scala/src/main/scala/mytest/spark/CloudantApp.scala">CloudantApp.scala</a>
 for examples.</p>
+
+<p>Submit job example:
+<code class="highlighter-rouge">
+spark-submit --class org.apache.spark.examples.sql.cloudant.CloudantApp 
--packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0-SNAPSHOT --conf 
spark.cloudant.host=ACCOUNT.cloudant.com --conf 
spark.cloudant.username=USERNAME --conf spark.cloudant.password=PASSWORD  
/path/to/spark-sql-cloudant_2.11-2.2.0-SNAPSHOT-tests.jar
+</code></p>
+
+<h3 id="using-dataframe-in-scala">Using DataFrame In Scala</h3>
+
+<div class="language-scala highlighter-rouge"><pre 
class="highlight"><code><span class="k">val</span> <span class="n">spark</span> 
<span class="k">=</span> <span class="nc">SparkSession</span>
+      <span class="o">.</span><span class="n">builder</span><span 
class="o">()</span>
+      <span class="o">.</span><span class="n">appName</span><span 
class="o">(</span><span class="s">"Cloudant Spark SQL Example with 
Dataframe"</span><span class="o">)</span>
+      <span class="o">.</span><span class="n">config</span><span 
class="o">(</span><span class="s">"cloudant.host"</span><span 
class="o">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span 
class="o">)</span>
+      <span class="o">.</span><span class="n">config</span><span 
class="o">(</span><span class="s">"cloudant.username"</span><span 
class="o">,</span> <span class="s">"USERNAME"</span><span class="o">)</span>
+      <span class="o">.</span><span class="n">config</span><span 
class="o">(</span><span class="s">"cloudant.password"</span><span 
class="o">,</span><span class="s">"PASSWORD"</span><span class="o">)</span>
+      <span class="o">.</span><span class="n">config</span><span 
class="o">(</span><span class="s">"createDBOnSave"</span><span 
class="o">,</span><span class="s">"true"</span><span class="o">)</span> <span 
class="c1">// to create a db on save
+</span>      <span class="o">.</span><span class="n">config</span><span 
class="o">(</span><span class="s">"jsonstore.rdd.partitions"</span><span 
class="o">,</span> <span class="s">"20"</span><span class="o">)</span> <span 
class="c1">// using 20 partitions
+</span>      <span class="o">.</span><span class="n">getOrCreate</span><span 
class="o">()</span>
+          
+<span class="c1">// 1. Loading data from Cloudant db
+</span><span class="k">val</span> <span class="n">df</span> <span 
class="k">=</span> <span class="n">spark</span><span class="o">.</span><span 
class="n">read</span><span class="o">.</span><span class="n">format</span><span 
class="o">(</span><span class="s">"org.apache.bahir.cloudant"</span><span 
class="o">).</span><span class="n">load</span><span class="o">(</span><span 
class="s">"n_flight"</span><span class="o">)</span>
+<span class="c1">// Caching df in memory to speed computations
+// and not to retrieve data from cloudant again
+</span><span class="n">df</span><span class="o">.</span><span 
class="n">cache</span><span class="o">()</span> 
+<span class="n">df</span><span class="o">.</span><span 
class="n">printSchema</span><span class="o">()</span>
+
+<span class="c1">// 2. Saving dataframe to Cloudant db
+</span><span class="k">val</span> <span class="n">df2</span> <span 
class="k">=</span> <span class="n">df</span><span class="o">.</span><span 
class="n">filter</span><span class="o">(</span><span class="n">df</span><span 
class="o">(</span><span class="s">"flightSegmentId"</span><span 
class="o">)</span> <span class="o">===</span> <span 
class="s">"AA106"</span><span class="o">)</span>
+    <span class="o">.</span><span class="n">select</span><span 
class="o">(</span><span class="s">"flightSegmentId"</span><span 
class="o">,</span><span class="s">"economyClassBaseCost"</span><span 
class="o">)</span>
+<span class="n">df2</span><span class="o">.</span><span 
class="n">show</span><span class="o">()</span>
+<span class="n">df2</span><span class="o">.</span><span 
class="n">write</span><span class="o">.</span><span 
class="n">format</span><span class="o">(</span><span 
class="s">"org.apache.bahir.cloudant"</span><span class="o">).</span><span 
class="n">save</span><span class="o">(</span><span 
class="s">"n_flight2"</span><span class="o">)</span>
+</code></pre>
+</div>
+
+<p>See <a 
href="examples/scala/src/main/scala/mytest/spark/CloudantDF.scala">CloudantDF.scala</a>
 for examples.</p>
+
+<p><a 
href="examples/scala/src/main/scala/mytest/spark/CloudantDFOption.scala">Sample 
code</a> on using DataFrame option to define Cloudant configuration.</p>
+
+<h3 id="using-streams-in-scala">Using Streams In Scala</h3>
+
+<div class="language-scala highlighter-rouge"><pre 
class="highlight"><code><span class="k">val</span> <span class="n">ssc</span> 
<span class="k">=</span> <span class="k">new</span> <span 
class="nc">StreamingContext</span><span class="o">(</span><span 
class="n">sparkConf</span><span class="o">,</span> <span 
class="nc">Seconds</span><span class="o">(</span><span 
class="mi">10</span><span class="o">))</span>
+<span class="k">val</span> <span class="n">changes</span> <span 
class="k">=</span> <span class="n">ssc</span><span class="o">.</span><span 
class="n">receiverStream</span><span class="o">(</span><span 
class="k">new</span> <span class="nc">CloudantReceiver</span><span 
class="o">(</span><span class="nc">Map</span><span class="o">(</span>
+  <span class="s">"cloudant.host"</span> <span class="o">-&gt;</span> <span 
class="s">"ACCOUNT.cloudant.com"</span><span class="o">,</span>
+  <span class="s">"cloudant.username"</span> <span class="o">-&gt;</span> 
<span class="s">"USERNAME"</span><span class="o">,</span>
+  <span class="s">"cloudant.password"</span> <span class="o">-&gt;</span> 
<span class="s">"PASSWORD"</span><span class="o">,</span>
+  <span class="s">"database"</span> <span class="o">-&gt;</span> <span 
class="s">"n_airportcodemapping"</span><span class="o">)))</span>
+
+<span class="n">changes</span><span class="o">.</span><span 
class="n">foreachRDD</span><span class="o">((</span><span 
class="n">rdd</span><span class="k">:</span> <span class="kt">RDD</span><span 
class="o">[</span><span class="kt">String</span><span class="o">],</span> <span 
class="n">time</span><span class="k">:</span> <span class="kt">Time</span><span 
class="o">)</span> <span class="k">=&gt;</span> <span class="o">{</span>
+  <span class="c1">// Get the singleton instance of SparkSession
+</span>  <span class="k">val</span> <span class="n">spark</span> <span 
class="k">=</span> <span class="nc">SparkSessionSingleton</span><span 
class="o">.</span><span class="n">getInstance</span><span 
class="o">(</span><span class="n">rdd</span><span class="o">.</span><span 
class="n">sparkContext</span><span class="o">.</span><span 
class="n">getConf</span><span class="o">)</span>
+
+  <span class="n">println</span><span class="o">(</span><span 
class="n">s</span><span class="s">"========= $time ========="</span><span 
class="o">)</span>
+  <span class="c1">// Convert RDD[String] to DataFrame
+</span>  <span class="k">val</span> <span class="n">changesDataFrame</span> 
<span class="k">=</span> <span class="n">spark</span><span 
class="o">.</span><span class="n">read</span><span class="o">.</span><span 
class="n">json</span><span class="o">(</span><span class="n">rdd</span><span 
class="o">)</span>
+  <span class="k">if</span> <span class="o">(!</span><span 
class="n">changesDataFrame</span><span class="o">.</span><span 
class="n">schema</span><span class="o">.</span><span 
class="n">isEmpty</span><span class="o">)</span> <span class="o">{</span>
+    <span class="n">changesDataFrame</span><span class="o">.</span><span 
class="n">printSchema</span><span class="o">()</span>
+    <span class="n">changesDataFrame</span><span class="o">.</span><span 
class="n">select</span><span class="o">(</span><span class="s">"*"</span><span 
class="o">).</span><span class="n">show</span><span class="o">()</span>
+    <span class="o">....</span>
+  <span class="o">}</span>
+<span class="o">})</span>
+<span class="n">ssc</span><span class="o">.</span><span 
class="n">start</span><span class="o">()</span>
+<span class="c1">// run streaming for 120 secs
+</span><span class="nc">Thread</span><span class="o">.</span><span 
class="n">sleep</span><span class="o">(</span><span 
class="mi">120000L</span><span class="o">)</span>
+<span class="n">ssc</span><span class="o">.</span><span 
class="n">stop</span><span class="o">(</span><span class="kc">true</span><span 
class="o">)</span>
+       
+</code></pre>
+</div>
+
+<p>See <a 
href="examples/scala/src/main/scala/mytest/spark/CloudantStreaming.scala">CloudantStreaming.scala</a>
 for examples.</p>
+
+<p>By default, Spark Streaming will load all documents from a database. If you 
want to limit the loading to 
+specific documents, use <code class="highlighter-rouge">selector</code> option 
of <code class="highlighter-rouge">CloudantReceiver</code> and specify your 
conditions 
+(See <a 
href="examples/scala/src/main/scala/mytest/spark/CloudantStreamingSelector.scala">CloudantStreamingSelector.scala</a>
+example for more details):</p>
+
+<div class="language-scala highlighter-rouge"><pre 
class="highlight"><code><span class="k">val</span> <span 
class="n">changes</span> <span class="k">=</span> <span 
class="n">ssc</span><span class="o">.</span><span 
class="n">receiverStream</span><span class="o">(</span><span 
class="k">new</span> <span class="nc">CloudantReceiver</span><span 
class="o">(</span><span class="nc">Map</span><span class="o">(</span>
+  <span class="s">"cloudant.host"</span> <span class="o">-&gt;</span> <span 
class="s">"ACCOUNT.cloudant.com"</span><span class="o">,</span>
+  <span class="s">"cloudant.username"</span> <span class="o">-&gt;</span> 
<span class="s">"USERNAME"</span><span class="o">,</span>
+  <span class="s">"cloudant.password"</span> <span class="o">-&gt;</span> 
<span class="s">"PASSWORD"</span><span class="o">,</span>
+  <span class="s">"database"</span> <span class="o">-&gt;</span> <span 
class="s">"sales"</span><span class="o">,</span>
+  <span class="s">"selector"</span> <span class="o">-&gt;</span> <span 
class="s">"{\"month\":\"May\", \"rep\":\"John\"}"</span><span 
class="o">)))</span>
+</code></pre>
+</div>
+
+  </div>
+</div>
+
+
+
+      <hr>
+
+      <!-- <p>&copy; 2017 </p>-->
+      <footer class="site-footer">
+    <div class="wrapper">
+        <div class="footer-col-wrapper">
+            
+            <div style="text-align:center;">
+                
+                <div>
+                    Copyright &copy; 2016-2017 <a 
href="http://www.apache.org";>The Apache Software Foundation</a>.
+                    Licensed under the <a 
href="http://www.apache.org/licenses/LICENSE-2.0";>Apache License, Version
+                    2.0</a>.
+                    <br>
+                    
+                    Apache and the Apache Feather logo are trademarks of The 
Apache Software Foundation.
+                    
+                </div>
+            </div>
+        </div>
+    </div>
+</footer>
+
+    </div>
+
+    
+
+
+  <script type="text/javascript">
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+  ga('create', 'UA-79140859-1', 'bahir.apache.org');
+  ga('require', 'linkid', 'linkid.js');
+  ga('send', 'pageview');
+
+</script>
+
+
+
+    <script 
src="/assets/themes/apache-clean/jquery/jquery-2.1.1.min.js"></script>
+
+    <script 
src="/assets/themes/apache-clean/bootstrap/js/bootstrap.min.js"></script>
+
+
+  </body>
+</html>
+


http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-sql-streaming-akka/index.html
----------------------------------------------------------------------
diff --git a/content/docs/spark/current/spark-sql-streaming-akka/index.html 
b/content/docs/spark/current/spark-sql-streaming-akka/index.html
new file mode 100644
index 0000000..6fb3cce
--- /dev/null
+++ b/content/docs/spark/current/spark-sql-streaming-akka/index.html
@@ -0,0 +1,380 @@
+
+
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Spark Structured Streaming Akka</title>
+    <meta name="description" content="Spark Structured Streaming Akka">
+    <meta name="author" content="">
+
+    <!-- Enable responsive viewport -->
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+    <!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
+    <!--[if lt IE 9]>
+      <script 
src="http://html5shim.googlecode.com/svn/trunk/html5.js";></script>
+    <![endif]-->
+
+    <!-- Le styles -->
+    <link href="/assets/themes/apache-clean/bootstrap/css/bootstrap.css" 
rel="stylesheet">
+    <link href="/assets/themes/apache-clean/css/style.css?body=1" 
rel="stylesheet" type="text/css">
+    <link href="/assets/themes/apache-clean/css/syntax.css" rel="stylesheet"  
type="text/css" media="screen" />
+    <!-- Le fav and touch icons -->
+    <!-- Update these with your own images
+    <link rel="shortcut icon" href="images/favicon.ico">
+    <link rel="apple-touch-icon" href="images/apple-touch-icon.png">
+    <link rel="apple-touch-icon" sizes="72x72" 
href="images/apple-touch-icon-72x72.png">
+    <link rel="apple-touch-icon" sizes="114x114" 
href="images/apple-touch-icon-114x114.png">
+  -->
+
+    <!-- make tables sortable by adding class tag "sortable" to table elements 
-->
+    <script 
src="http://www.kryogenix.org/code/browser/sorttable/sorttable.js";></script>
+
+
+  </head>
+
+  <body>
+
+    
+
+<!-- Navigation -->
+<div id="nav-bar">
+  <nav id="nav-container" class="navbar navbar-inverse " role="navigation">
+    <div class="container">
+      <!-- Brand and toggle get grouped for better mobile display -->
+
+      <div class="navbar-header page-scroll">
+        <button type="button" class="navbar-toggle" data-toggle="collapse" 
data-target=".navbar-collapse">
+          <span class="sr-only">Toggle navigation</span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+        </button>
+        <a class="navbar-brand page-scroll" href="/#home">Home</a>
+      </div>
+      <!-- Collect the nav links, forms, and other content for toggling -->
+      <nav class="navbar-collapse collapse" role="navigation">
+        <ul class="nav navbar-nav">
+          
+          
+          
+          <li id="download">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Download<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="/downloads/spark" target="_self">Bahir Spark 
Extensions</a></li>
+              
+              
+              <li><a href="/downloads/flink" target="_self">Bahir Flink 
Extensions</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="community">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Community<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="/community" target="_self">Get Involved</a></li>
+              
+              
+              <li><a href="/contributing" target="_self">Contributing</a></li>
+              
+              
+              <li><a href="/contributing-extensions" 
target="_self">Contributing Extensions</a></li>
+              
+              
+              <li><a href="https://issues.apache.org/jira/browse/BAHIR"; 
target="_blank">Issue Tracker</a></li>
+              
+              
+              <li><a href="https://github.com/apache/bahir"; 
target="_blank">Source Code</a></li>
+              
+              
+              <li><a href="/community-members" target="_self">Project 
Committers</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="documentation">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Documentation<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="/docs/spark/overview" target="_self">Bahir Spark 
Extensions</a></li>
+              
+              
+              <li><a href="/docs/flink/overview" target="_self">Bahir Flink 
Extensions</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="github">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">GitHub<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="https://github.com/apache/bahir"; 
target="_blank">Bahir Spark Extensions</a></li>
+              
+              
+              <li><a href="https://github.com/apache/bahir-flink"; 
target="_blank">Bahir Flink Extensions</a></li>
+              
+              
+              <li><a href="https://github.com/apache/bahir-website"; 
target="_blank">Bahir Website</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="apache">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Apache<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="http://www.apache.org/foundation/how-it-works.html"; 
target="_blank">Apache Software Foundation</a></li>
+              
+              
+              <li><a href="http://www.apache.org/licenses/"; 
target="_blank">Apache License</a></li>
+              
+              
+              <li><a href="http://www.apache.org/foundation/sponsorship"; 
target="_blank">Sponsorship</a></li>
+              
+              
+              <li><a href="http://www.apache.org/foundation/thanks.html"; 
target="_blank">Thanks</a></li>
+              
+              
+              <li><a href="/privacy-policy" target="_self">Privacy 
Policy</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+        </ul>
+      </nav><!--/.navbar-collapse -->
+      <!-- /.navbar-collapse -->
+    </div>
+    <!-- /.container -->
+  </nav>
+</div>
+
+
+    <div class="container">
+
+      
+
+<!--<div class="hero-unit Spark Structured Streaming Akka">
+  <h1></h1>
+</div>
+-->
+
+<div class="row">
+  <div class="col-md-12">
+    <!--
+
+-->
+
+<p>A library for reading data from Akka Actors using Spark SQL Streaming ( or 
Structured streaming.).</p>
+
+<h2 id="linking">Linking</h2>
+
+<p>Using SBT:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-sql-streaming-akka" % "2.2.0-SNAPSHOT"
+</code></pre>
+</div>
+
+<p>Using Maven:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>&lt;dependency&gt;
+    &lt;groupId&gt;org.apache.bahir&lt;/groupId&gt;
+    &lt;artifactId&gt;spark-sql-streaming-akka_2.11&lt;/artifactId&gt;
+    &lt;version&gt;2.2.0-SNAPSHOT&lt;/version&gt;
+&lt;/dependency&gt;
+</code></pre>
+</div>
+
+<p>This library can also be added to Spark jobs launched through <code 
class="highlighter-rouge">spark-shell</code> or <code 
class="highlighter-rouge">spark-submit</code> by using the <code 
class="highlighter-rouge">--packages</code> command line option.
+For example, to include it when starting the spark shell:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-sql-streaming-akka_2.11:2.2.0-SNAPSHOT
+</code></pre>
+</div>
+
+<p>Unlike using <code class="highlighter-rouge">--jars</code>, using <code 
class="highlighter-rouge">--packages</code> ensures that this library and its 
dependencies will be added to the classpath.
+The <code class="highlighter-rouge">--packages</code> argument can also be 
used with <code class="highlighter-rouge">bin/spark-submit</code>.</p>
+
+<p>This library is compiled for Scala 2.11 only, and intends to support Spark 
2.0 onwards.</p>
+
+<h2 id="examples">Examples</h2>
+
+<p>A SQL Stream can be created with data streams received from Akka Feeder 
actor using,</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>    
sqlContext.readStream
+            
.format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+            .option("urlOfPublisher", "feederActorUri")
+            .load()
+</code></pre>
+</div>
+
+<h2 id="enable-recovering-from-failures">Enable recovering from failures.</h2>
+
+<p>Setting values for option <code 
class="highlighter-rouge">persistenceDirPath</code> helps in recovering in case 
of a restart, by restoring the state where it left off before the shutdown.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>    
sqlContext.readStream
+            
.format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+            .option("urlOfPublisher", "feederActorUri")
+            .option("persistenceDirPath", "/path/to/localdir")
+            .load() 
+</code></pre>
+</div>
+
+<h2 id="configuration-options">Configuration options.</h2>
+
+<p>This source uses <a 
href="http://doc.akka.io/api/akka/2.4/akka/actor/Actor.html";>Akka Actor 
api</a>.</p>
+
+<ul>
+  <li><code class="highlighter-rouge">urlOfPublisher</code> The url of 
Publisher or Feeder actor that the Receiver actor connects to. Set this as the 
tcp url of the Publisher or Feeder actor.</li>
+  <li><code class="highlighter-rouge">persistenceDirPath</code> By default it 
is used for storing incoming messages on disk.</li>
+</ul>
+
+<h3 id="scala-api">Scala API</h3>
+
+<p>An example, for scala API to count words from incoming message stream.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>    // Create 
DataFrame representing the stream of input lines from connection
+    // to publisher or feeder actor
+    val lines = spark.readStream
+                
.format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+                .option("urlOfPublisher", urlOfPublisher)
+                .load().as[(String, Timestamp)]
+
+    // Split the lines into words
+    val words = lines.map(_._1).flatMap(_.split(" "))
+
+    // Generate running word count
+    val wordCounts = words.groupBy("value").count()
+
+    // Start running the query that prints the running counts to the console
+    val query = wordCounts.writeStream
+                .outputMode("complete")
+                .format("console")
+                .start()
+
+    query.awaitTermination()
+</code></pre>
+</div>
+
+<p>Please see <code class="highlighter-rouge">AkkaStreamWordCount.scala</code> 
for full example.</p>
+
+<h3 id="java-api">Java API</h3>
+
+<p>An example, for Java API to count words from incoming message stream.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>    // Create 
DataFrame representing the stream of input lines from connection
+    // to publisher or feeder actor
+    Dataset&lt;String&gt; lines = spark
+                            .readStream()
+                            
.format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+                            .option("urlOfPublisher", urlOfPublisher)
+                            .load().select("value").as(Encoders.STRING());
+
+    // Split the lines into words
+    Dataset&lt;String&gt; words = lines.flatMap(new FlatMapFunction&lt;String, 
String&gt;() {
+      @Override
+      public Iterator&lt;String&gt; call(String s) throws Exception {
+        return Arrays.asList(s.split(" ")).iterator();
+      }
+    }, Encoders.STRING());
+
+    // Generate running word count
+    Dataset&lt;Row&gt; wordCounts = words.groupBy("value").count();
+
+    // Start running the query that prints the running counts to the console
+    StreamingQuery query = wordCounts.writeStream()
+                            .outputMode("complete")
+                            .format("console")
+                            .start();
+
+    query.awaitTermination();   
+</code></pre>
+</div>
+
+<p>Please see <code 
class="highlighter-rouge">JavaAkkaStreamWordCount.java</code> for full 
example.</p>
+
+  </div>
+</div>
+
+
+
+      <hr>
+
+      <!-- <p>&copy; 2017 </p>-->
+      <footer class="site-footer">
+    <div class="wrapper">
+        <div class="footer-col-wrapper">
+            
+            <div style="text-align:center;">
+                
+                <div>
+                    Copyright &copy; 2016-2017 <a 
href="http://www.apache.org";>The Apache Software Foundation</a>.
+                    Licensed under the <a 
href="http://www.apache.org/licenses/LICENSE-2.0";>Apache License, Version
+                    2.0</a>.
+                    <br>
+                    
+                    Apache and the Apache Feather logo are trademarks of The 
Apache Software Foundation.
+                    
+                </div>
+            </div>
+        </div>
+    </div>
+</footer>
+
+    </div>
+
+    
+
+
+  <script type="text/javascript">
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+  ga('create', 'UA-79140859-1', 'bahir.apache.org');
+  ga('require', 'linkid', 'linkid.js');
+  ga('send', 'pageview');
+
+</script>
+
+
+
+    <script 
src="/assets/themes/apache-clean/jquery/jquery-2.1.1.min.js"></script>
+
+    <script 
src="/assets/themes/apache-clean/bootstrap/js/bootstrap.min.js"></script>
+
+
+  </body>
+</html>
+

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-sql-streaming-mqtt/index.html
----------------------------------------------------------------------
diff --git a/content/docs/spark/current/spark-sql-streaming-mqtt/index.html 
b/content/docs/spark/current/spark-sql-streaming-mqtt/index.html
index 4f8fc76..e9bf272 100644
--- a/content/docs/spark/current/spark-sql-streaming-mqtt/index.html
+++ b/content/docs/spark/current/spark-sql-streaming-mqtt/index.html
@@ -201,7 +201,7 @@
 
 <p>Using SBT:</p>
 
-<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-sql-streaming-mqtt" % "2.1.0"
+<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-sql-streaming-mqtt" % "2.1.0-SNAPSHOT"
 </code></pre>
 </div>
 
@@ -210,7 +210,7 @@
 <div class="highlighter-rouge"><pre class="highlight"><code>&lt;dependency&gt;
     &lt;groupId&gt;org.apache.bahir&lt;/groupId&gt;
     &lt;artifactId&gt;spark-sql-streaming-mqtt_2.11&lt;/artifactId&gt;
-    &lt;version&gt;2.1.0&lt;/version&gt;
+    &lt;version&gt;2.2.0-SNAPSHOT&lt;/version&gt;
 &lt;/dependency&gt;
 </code></pre>
 </div>
@@ -218,7 +218,7 @@
 <p>This library can also be added to Spark jobs launched through <code 
class="highlighter-rouge">spark-shell</code> or <code 
class="highlighter-rouge">spark-submit</code> by using the <code 
class="highlighter-rouge">--packages</code> command line option.
 For example, to include it when starting the spark shell:</p>
 
-<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-sql-streaming-mqtt_2.11:2.1.0
+<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-sql-streaming-mqtt_2.11:2.1.0-SNAPSHOT
 </code></pre>
 </div>
 
@@ -331,7 +331,6 @@ query.awaitTermination();
 
 <p>Please see <code 
class="highlighter-rouge">JavaMQTTStreamWordCount.java</code> for full 
example.</p>
 
-
   </div>
 </div>
 

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-streaming-akka/index.html
----------------------------------------------------------------------
diff --git a/content/docs/spark/current/spark-streaming-akka/index.html 
b/content/docs/spark/current/spark-streaming-akka/index.html
index 78e2342..c32f7c5 100644
--- a/content/docs/spark/current/spark-streaming-akka/index.html
+++ b/content/docs/spark/current/spark-streaming-akka/index.html
@@ -201,7 +201,7 @@
 
 <p>Using SBT:</p>
 
-<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-streaming-akka" % "2.1.0"
+<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-streaming-akka" % "2.1.0-SNAPSHOT"
 </code></pre>
 </div>
 
@@ -210,7 +210,7 @@
 <div class="highlighter-rouge"><pre class="highlight"><code>&lt;dependency&gt;
     &lt;groupId&gt;org.apache.bahir&lt;/groupId&gt;
     &lt;artifactId&gt;spark-streaming-akka_2.11&lt;/artifactId&gt;
-    &lt;version&gt;2.1.0&lt;/version&gt;
+    &lt;version&gt;2.2.0-SNAPSHOT&lt;/version&gt;
 &lt;/dependency&gt;
 </code></pre>
 </div>
@@ -218,7 +218,7 @@
 <p>This library can also be added to Spark jobs launched through <code 
class="highlighter-rouge">spark-shell</code> or <code 
class="highlighter-rouge">spark-submit</code> by using the <code 
class="highlighter-rouge">--packages</code> command line option.
 For example, to include it when starting the spark shell:</p>
 
-<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-streaming-akka_2.11:2.1.0
+<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-streaming-akka_2.11:2.1.0-SNAPSHOT
 </code></pre>
 </div>
 

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-streaming-mqtt/index.html
----------------------------------------------------------------------
diff --git a/content/docs/spark/current/spark-streaming-mqtt/index.html 
b/content/docs/spark/current/spark-streaming-mqtt/index.html
index 0f512f7..c1b3f61 100644
--- a/content/docs/spark/current/spark-streaming-mqtt/index.html
+++ b/content/docs/spark/current/spark-streaming-mqtt/index.html
@@ -201,7 +201,7 @@
 
 <p>Using SBT:</p>
 
-<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-streaming-mqtt" % "2.1.0"
+<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-streaming-mqtt" % "2.1.0-SNAPSHOT"
 </code></pre>
 </div>
 
@@ -210,7 +210,7 @@
 <div class="highlighter-rouge"><pre class="highlight"><code>&lt;dependency&gt;
     &lt;groupId&gt;org.apache.bahir&lt;/groupId&gt;
     &lt;artifactId&gt;spark-streaming-mqtt_2.11&lt;/artifactId&gt;
-    &lt;version&gt;2.1.0&lt;/version&gt;
+    &lt;version&gt;2.2.0-SNAPSHOT&lt;/version&gt;
 &lt;/dependency&gt;
 </code></pre>
 </div>
@@ -218,7 +218,7 @@
 <p>This library can also be added to Spark jobs launched through <code 
class="highlighter-rouge">spark-shell</code> or <code 
class="highlighter-rouge">spark-submit</code> by using the <code 
class="highlighter-rouge">--packages</code> command line option.
 For example, to include it when starting the spark shell:</p>
 
-<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-streaming-mqtt_2.11:2.1.0
+<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-streaming-mqtt_2.11:2.1.0-SNAPSHOT
 </code></pre>
 </div>
 
@@ -235,6 +235,7 @@ The <code class="highlighter-rouge">--packages</code> 
argument can also be used
   <li><code class="highlighter-rouge">brokerUrl</code> A url MqttClient 
connects to. Set this as the url of the Mqtt Server. e.g. 
tcp://localhost:1883.</li>
   <li><code class="highlighter-rouge">storageLevel</code> By default it is 
used for storing incoming messages on disk.</li>
   <li><code class="highlighter-rouge">topic</code> Topic MqttClient subscribes 
to.</li>
+  <li><code class="highlighter-rouge">topics</code> List of topics MqttClient 
subscribes to.</li>
   <li><code class="highlighter-rouge">clientId</code> clientId, this client is 
assoicated with. Provide the same value to recover a stopped client.</li>
   <li><code class="highlighter-rouge">QoS</code> The maximum quality of 
service to subscribe each topic at. Messages published at a lower quality of 
service will be received at the published QoS. Messages published at a higher 
quality of service will be received using the QoS specified on the 
subscribe.</li>
   <li><code class="highlighter-rouge">username</code> Sets the user name to 
use for the connection to Mqtt Server. Do not set it, if server does not need 
this. Setting it empty will lead to errors.</li>
@@ -253,12 +254,14 @@ The <code class="highlighter-rouge">--packages</code> 
argument can also be used
 this actor can be configured to handle failures, etc.</p>
 
 <div class="highlighter-rouge"><pre class="highlight"><code>val lines = 
MQTTUtils.createStream(ssc, brokerUrl, topic)
+val lines = MQTTUtils.createPairedStream(ssc, brokerUrl, topic)
 </code></pre>
 </div>
 
 <p>Additional mqtt connection options can be provided:</p>
 
 <pre><code class="language-Scala">val lines = MQTTUtils.createStream(ssc, 
brokerUrl, topic, storageLevel, clientId, username, password, cleanSession, 
qos, connectionTimeout, keepAliveInterval, mqttVersion)
+val lines = MQTTUtils.createPairedStream(ssc, brokerUrl, topics, storageLevel, 
clientId, username, password, cleanSession, qos, connectionTimeout, 
keepAliveInterval, mqttVersion)
 </code></pre>
 
 <h3 id="java-api">Java API</h3>
@@ -267,6 +270,7 @@ this actor can be configured to handle failures, etc.</p>
 this actor can be configured to handle failures, etc.</p>
 
 <div class="highlighter-rouge"><pre 
class="highlight"><code>JavaDStream&lt;String&gt; lines = 
MQTTUtils.createStream(jssc, brokerUrl, topic);
+JavaReceiverInputDStream&lt;Tuple2&lt;String, String&gt;&gt; lines = 
MQTTUtils.createPairedStream(jssc, brokerUrl, topics);
 </code></pre>
 </div>
 

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-streaming-pubsub/index.html
----------------------------------------------------------------------
diff --git a/content/docs/spark/current/spark-streaming-pubsub/index.html 
b/content/docs/spark/current/spark-streaming-pubsub/index.html
new file mode 100644
index 0000000..d3786b4
--- /dev/null
+++ b/content/docs/spark/current/spark-streaming-pubsub/index.html
@@ -0,0 +1,308 @@
+
+
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Spark Streaming Google Pub-Sub</title>
+    <meta name="description" content="Spark Streaming Google Pub-Sub">
+    <meta name="author" content="">
+
+    <!-- Enable responsive viewport -->
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+    <!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
+    <!--[if lt IE 9]>
+      <script 
src="http://html5shim.googlecode.com/svn/trunk/html5.js";></script>
+    <![endif]-->
+
+    <!-- Le styles -->
+    <link href="/assets/themes/apache-clean/bootstrap/css/bootstrap.css" 
rel="stylesheet">
+    <link href="/assets/themes/apache-clean/css/style.css?body=1" 
rel="stylesheet" type="text/css">
+    <link href="/assets/themes/apache-clean/css/syntax.css" rel="stylesheet"  
type="text/css" media="screen" />
+    <!-- Le fav and touch icons -->
+    <!-- Update these with your own images
+    <link rel="shortcut icon" href="images/favicon.ico">
+    <link rel="apple-touch-icon" href="images/apple-touch-icon.png">
+    <link rel="apple-touch-icon" sizes="72x72" 
href="images/apple-touch-icon-72x72.png">
+    <link rel="apple-touch-icon" sizes="114x114" 
href="images/apple-touch-icon-114x114.png">
+  -->
+
+    <!-- make tables sortable by adding class tag "sortable" to table elements 
-->
+    <script 
src="http://www.kryogenix.org/code/browser/sorttable/sorttable.js";></script>
+
+
+  </head>
+
+  <body>
+
+    
+
+<!-- Navigation -->
+<div id="nav-bar">
+  <nav id="nav-container" class="navbar navbar-inverse " role="navigation">
+    <div class="container">
+      <!-- Brand and toggle get grouped for better mobile display -->
+
+      <div class="navbar-header page-scroll">
+        <button type="button" class="navbar-toggle" data-toggle="collapse" 
data-target=".navbar-collapse">
+          <span class="sr-only">Toggle navigation</span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+        </button>
+        <a class="navbar-brand page-scroll" href="/#home">Home</a>
+      </div>
+      <!-- Collect the nav links, forms, and other content for toggling -->
+      <nav class="navbar-collapse collapse" role="navigation">
+        <ul class="nav navbar-nav">
+          
+          
+          
+          <li id="download">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Download<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="/downloads/spark" target="_self">Bahir Spark 
Extensions</a></li>
+              
+              
+              <li><a href="/downloads/flink" target="_self">Bahir Flink 
Extensions</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="community">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Community<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="/community" target="_self">Get Involved</a></li>
+              
+              
+              <li><a href="/contributing" target="_self">Contributing</a></li>
+              
+              
+              <li><a href="/contributing-extensions" 
target="_self">Contributing Extensions</a></li>
+              
+              
+              <li><a href="https://issues.apache.org/jira/browse/BAHIR"; 
target="_blank">Issue Tracker</a></li>
+              
+              
+              <li><a href="https://github.com/apache/bahir"; 
target="_blank">Source Code</a></li>
+              
+              
+              <li><a href="/community-members" target="_self">Project 
Committers</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="documentation">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Documentation<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="/docs/spark/overview" target="_self">Bahir Spark 
Extensions</a></li>
+              
+              
+              <li><a href="/docs/flink/overview" target="_self">Bahir Flink 
Extensions</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="github">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">GitHub<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="https://github.com/apache/bahir"; 
target="_blank">Bahir Spark Extensions</a></li>
+              
+              
+              <li><a href="https://github.com/apache/bahir-flink"; 
target="_blank">Bahir Flink Extensions</a></li>
+              
+              
+              <li><a href="https://github.com/apache/bahir-website"; 
target="_blank">Bahir Website</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+          
+          
+          <li id="apache">
+            
+            <a href="#" data-toggle="dropdown" 
class="dropdown-toggle">Apache<b class="caret"></b></a>
+            <ul class="dropdown-menu dropdown-left">
+              
+              
+              <li><a href="http://www.apache.org/foundation/how-it-works.html"; 
target="_blank">Apache Software Foundation</a></li>
+              
+              
+              <li><a href="http://www.apache.org/licenses/"; 
target="_blank">Apache License</a></li>
+              
+              
+              <li><a href="http://www.apache.org/foundation/sponsorship"; 
target="_blank">Sponsorship</a></li>
+              
+              
+              <li><a href="http://www.apache.org/foundation/thanks.html"; 
target="_blank">Thanks</a></li>
+              
+              
+              <li><a href="/privacy-policy" target="_self">Privacy 
Policy</a></li>
+              
+            </ul>
+            
+          </li>
+          
+          
+        </ul>
+      </nav><!--/.navbar-collapse -->
+      <!-- /.navbar-collapse -->
+    </div>
+    <!-- /.container -->
+  </nav>
+</div>
+
+
+    <div class="container">
+
+      
+
+<!--<div class="hero-unit Spark Streaming Google Pub-Sub">
+  <h1></h1>
+</div>
+-->
+
+<div class="row">
+  <div class="col-md-12">
+    <!--
+
+-->
+
+<p>A library for reading data from <a 
href="https://cloud.google.com/pubsub/";>Google Cloud Pub/Sub</a> using Spark 
Streaming.</p>
+
+<h2 id="linking">Linking</h2>
+
+<p>Using SBT:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-streaming-pubsub" % "2.2.0-SNAPSHOT"
+</code></pre>
+</div>
+
+<p>Using Maven:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>&lt;dependency&gt;
+    &lt;groupId&gt;org.apache.bahir&lt;/groupId&gt;
+    &lt;artifactId&gt;spark-streaming-pubsub_2.11&lt;/artifactId&gt;
+    &lt;version&gt;2.2.0-SNAPSHOT&lt;/version&gt;
+&lt;/dependency&gt;
+</code></pre>
+</div>
+
+<p>This library can also be added to Spark jobs launched through <code 
class="highlighter-rouge">spark-shell</code> or <code 
class="highlighter-rouge">spark-submit</code> by using the <code 
class="highlighter-rouge">--packages</code> command line option.
+For example, to include it when starting the spark shell:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-streaming-pubsub_2.11:2.2.0-SNAPSHOT
+</code></pre>
+</div>
+
+<p>Unlike using <code class="highlighter-rouge">--jars</code>, using <code 
class="highlighter-rouge">--packages</code> ensures that this library and its 
dependencies will be added to the classpath.
+The <code class="highlighter-rouge">--packages</code> argument can also be 
used with <code class="highlighter-rouge">bin/spark-submit</code>.</p>
+
+<h2 id="examples">Examples</h2>
+
+<p>First you need to create credential by SparkGCPCredentials, it support four 
type of credentials
+* application default
+    <code class="highlighter-rouge">SparkGCPCredentials.builder.build()</code>
+* json type service account
+    <code 
class="highlighter-rouge">SparkGCPCredentials.builder.jsonServiceAccount(PATH_TO_JSON_KEY).build()</code>
+* p12 type service account
+    <code 
class="highlighter-rouge">SparkGCPCredentials.builder.p12ServiceAccount(PATH_TO_P12_KEY,
 EMAIL_ACCOUNT).build()</code>
+* metadata service account(running on dataproc)
+    <code 
class="highlighter-rouge">SparkGCPCredentials.builder.metadataServiceAccount().build()</code></p>
+
+<h3 id="scala-api">Scala API</h3>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>val lines = 
PubsubUtils.createStream(ssc, projectId, subscriptionName, credential, ..)
+</code></pre>
+</div>
+
+<h3 id="java-api">Java API</h3>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>JavaDStream&lt;SparkPubsubMessage&gt; lines = 
PubsubUtils.createStream(jssc, projectId, subscriptionName, credential...) 
+</code></pre>
+</div>
+
+<p>See end-to-end examples at <a href="streaming-pubsub/examples">Google Cloud 
Pubsub Examples</a></p>
+
+  </div>
+</div>
+
+
+
+      <hr>
+
+      <!-- <p>&copy; 2017 </p>-->
+      <footer class="site-footer">
+    <div class="wrapper">
+        <div class="footer-col-wrapper">
+            
+            <div style="text-align:center;">
+                
+                <div>
+                    Copyright &copy; 2016-2017 <a 
href="http://www.apache.org";>The Apache Software Foundation</a>.
+                    Licensed under the <a 
href="http://www.apache.org/licenses/LICENSE-2.0";>Apache License, Version
+                    2.0</a>.
+                    <br>
+                    
+                    Apache and the Apache Feather logo are trademarks of The 
Apache Software Foundation.
+                    
+                </div>
+            </div>
+        </div>
+    </div>
+</footer>
+
+    </div>
+
+    
+
+
+  <script type="text/javascript">
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+  ga('create', 'UA-79140859-1', 'bahir.apache.org');
+  ga('require', 'linkid', 'linkid.js');
+  ga('send', 'pageview');
+
+</script>
+
+
+
+    <script 
src="/assets/themes/apache-clean/jquery/jquery-2.1.1.min.js"></script>
+
+    <script 
src="/assets/themes/apache-clean/bootstrap/js/bootstrap.min.js"></script>
+
+
+  </body>
+</html>
+

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-streaming-twitter/index.html
----------------------------------------------------------------------
diff --git a/content/docs/spark/current/spark-streaming-twitter/index.html 
b/content/docs/spark/current/spark-streaming-twitter/index.html
index ccbde83..dc35cd3 100644
--- a/content/docs/spark/current/spark-streaming-twitter/index.html
+++ b/content/docs/spark/current/spark-streaming-twitter/index.html
@@ -201,7 +201,7 @@
 
 <p>Using SBT:</p>
 
-<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-streaming-twitter" % "2.1.0"
+<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-streaming-twitter" % "2.1.0-SNAPSHOT"
 </code></pre>
 </div>
 
@@ -210,7 +210,7 @@
 <div class="highlighter-rouge"><pre class="highlight"><code>&lt;dependency&gt;
     &lt;groupId&gt;org.apache.bahir&lt;/groupId&gt;
     &lt;artifactId&gt;spark-streaming-twitter_2.11&lt;/artifactId&gt;
-    &lt;version&gt;2.1.0&lt;/version&gt;
+    &lt;version&gt;2.2.0-SNAPSHOT&lt;/version&gt;
 &lt;/dependency&gt;
 </code></pre>
 </div>
@@ -218,7 +218,7 @@
 <p>This library can also be added to Spark jobs launched through <code 
class="highlighter-rouge">spark-shell</code> or <code 
class="highlighter-rouge">spark-submit</code> by using the <code 
class="highlighter-rouge">--packages</code> command line option.
 For example, to include it when starting the spark shell:</p>
 
-<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-streaming-twitter_2.11:2.1.0
+<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-streaming-twitter_2.11:2.1.0-SNAPSHOT
 </code></pre>
 </div>
 
@@ -248,7 +248,7 @@ TwitterUtils.createStream(jssc);
 </code></pre>
 </div>
 
-<p>You can also either get the public stream, or get the filtered stream based 
on keywords. 
+<p>You can also either get the public stream, or get the filtered stream based 
on keywords.
 See end-to-end examples at <a 
href="https://github.com/apache/bahir/tree/master/streaming-twitter/examples";>Twitter
 Examples</a></p>
 
   </div>

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-streaming-zeromq/index.html
----------------------------------------------------------------------
diff --git a/content/docs/spark/current/spark-streaming-zeromq/index.html 
b/content/docs/spark/current/spark-streaming-zeromq/index.html
index 1e2157d..c8d1e59 100644
--- a/content/docs/spark/current/spark-streaming-zeromq/index.html
+++ b/content/docs/spark/current/spark-streaming-zeromq/index.html
@@ -201,7 +201,7 @@
 
 <p>Using SBT:</p>
 
-<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-streaming-zeromq" % "2.1.0"
+<div class="highlighter-rouge"><pre 
class="highlight"><code>libraryDependencies += "org.apache.bahir" %% 
"spark-streaming-zeromq" % "2.1.0-SNAPSHOT"
 </code></pre>
 </div>
 
@@ -210,7 +210,7 @@
 <div class="highlighter-rouge"><pre class="highlight"><code>&lt;dependency&gt;
     &lt;groupId&gt;org.apache.bahir&lt;/groupId&gt;
     &lt;artifactId&gt;spark-streaming-zeromq_2.11&lt;/artifactId&gt;
-    &lt;version&gt;2.1.0&lt;/version&gt;
+    &lt;version&gt;2.2.0-SNAPSHOT&lt;/version&gt;
 &lt;/dependency&gt;
 </code></pre>
 </div>
@@ -218,7 +218,7 @@
 <p>This library can also be added to Spark jobs launched through <code 
class="highlighter-rouge">spark-shell</code> or <code 
class="highlighter-rouge">spark-submit</code> by using the <code 
class="highlighter-rouge">--packages</code> command line option.
 For example, to include it when starting the spark shell:</p>
 
-<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-streaming-zeromq_2.11:2.1.0
+<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell 
--packages org.apache.bahir:spark-streaming-zeromq_2.11:2.1.0-SNAPSHOT
 </code></pre>
 </div>
 

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/overview/index.html
----------------------------------------------------------------------
diff --git a/content/docs/spark/overview/index.html 
b/content/docs/spark/overview/index.html
index f102da9..31fa392 100644
--- a/content/docs/spark/overview/index.html
+++ b/content/docs/spark/overview/index.html
@@ -199,6 +199,7 @@
 
 <ul>
   <li><a href="/docs/spark/current/documentation">Current - 
2.2.0-SNAPSHOT</a></li>
+  <li><a href="/docs/spark/2.1.1/documentation">2.1.1</a></li>
   <li><a href="/docs/spark/2.1.0/documentation">2.1.0</a></li>
   <li><a href="/docs/spark/2.0.2/documentation">2.0.2</a></li>
   <li><a href="/docs/spark/2.0.1/documentation">2.0.1</a></li>

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/downloads/flink/index.html
----------------------------------------------------------------------
diff --git a/content/downloads/flink/index.html 
b/content/downloads/flink/index.html
index c96b31d..52744e3 100644
--- a/content/downloads/flink/index.html
+++ b/content/downloads/flink/index.html
@@ -201,15 +201,75 @@
 
 <h2 id="bahir-extensions-for-apache-flink-latest-release">Bahir Extensions for 
Apache Flink Latest Release</h2>
 
-<p>Currently, there isnât a release available for Bahir Flink Extensions 
yet.</p>
-
-<p>You can still retrieve the source files from our git repository by 
typing:</p>
+<p>Our latest Apache Bahir release for Apache Flink extensions is 1.0, 
released on 05/24/2017.</p>
+
+<table class="table table-hover sortable">
+    <thead>
+        <tr>
+            <th><b>Name</b></th>
+            <th><b>Archive</b></th>
+            <th><b>MD5</b></th>
+            <!--th><b>SHA-1</b></th-->
+            <th><b>signature</b></th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <td>Apache Bahir Flink Extensions 1.0 (tar.gz)</td>
+            <td><a 
href="http://www.apache.org/dyn/closer.lua/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.tgz";>tar.gz</a></td>
+            <td><a 
href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.tgz.md5";>MD5</a></td>
+            <!--td><a 
href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.tgz.sha1";>SHA-1</a></td-->
+            <td><a 
href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.tgz.asc";>ASC</a></td>
+        </tr>
+        <tr>
+            <td>Apache Bahir Flink Extensions 1.0 (zip)</td>
+            <td><a 
href="http://www.apache.org/dyn/closer.lua/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.zip";>zip</a></td>
+            <td><a 
href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.zip.md5";>MD5</a></td>
+            <!--td><a 
href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.zip.sha1";>SHA-1</a></td-->
+            <td><a 
href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.zip.asc";>ASC</a></td>
+        </tr>
+        <tr>
+            <td>Release Notes</td>
+            <td><a href="/releases/flink/1.0/release-notes">1.0</a></td>
+            <td></td>
+            <!--td></td-->
+            <td></td>
+        </tr>
+    </tbody>
+</table>
+
+<p>You can also retrieve the source files from our git repository by 
typing:</p>
 
 <pre>
 git clone https://github.com/apache/bahir-flink
 cd bahir-flink
+git checkout -b tags/v1.0 v1.0
 </pre>
 
+<h3 id="previous-releases">Previous Releases</h3>
+
+<p>All previous releases of Apache Bahir Flink Extensions can be found in the 
<a href="http://archive.apache.org/dist/bahir/bahir-flink";>archives</a>.</p>
+
+<h2 id="verifying-a-release">Verifying a Release</h2>
+
+<p>Instructions for checking hashes and signatures is indicated on the <a 
href="http://www.apache.org/info/verification.html";>Verifying Apache Software 
Foundation Releases</a> page.</p>
+
+<p>Choose a source distribution in either <em>tar</em> or <em>zip</em> format,
+and <a href="http://www.apache.org/dyn/closer.cgi#verify";>verify</a>
+using the corresponding <em>pgp</em> signature (using the committer file in
+<a href="http://www.apache.org/dist/bahir-flink/KEYS";>KEYS</a>).
+If you cannot do that, the <em>md5</em> hash file may be used to check that the
+download has completed OK.</p>
+
+<p>For fast downloads, current source distributions are hosted on mirror 
servers;
+older source distributions are in the
+<a href="http://archive.apache.org/dist/bahir-flink/";>archive</a>.
+If a download from a mirror fails, retry, and the second download will likely
+succeed.</p>
+
+<p>For security, hash and signature files are always hosted at
+<a href="https://www.apache.org/dist";>Apache</a>.</p>
+
 
   </div>
 </div>

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/downloads/spark/index.html
----------------------------------------------------------------------
diff --git a/content/downloads/spark/index.html 
b/content/downloads/spark/index.html
index 7aa4e56..7c59b8f 100644
--- a/content/downloads/spark/index.html
+++ b/content/downloads/spark/index.html
@@ -199,9 +199,9 @@
 
 <p>Please find below the latest releases of Apache Bahir for Apache Spark 
Extensions. Note that the binary artifacts for each platform are also published 
independently through Maven and each <a href="/docs/spark/overview">extension 
documentation page</a> describes how to add these artifacts to your 
application.</p>
 
-<h2 id="bahir-extensions-for-apache-spark-210-release">Bahir Extensions for 
Apache Spark 2.1.0 Release</h2>
+<h2 id="bahir-extensions-for-apache-spark-211-release">Bahir Extensions for 
Apache Spark 2.1.1 Release</h2>
 
-<p>Our latest release supports Apache Spark 2.1.0, and was released on 
02/22/2017.</p>
+<p>Our latest release supports Apache Spark 2.1.1, and was released on 
07/11/2017.</p>
 
 <table class="table table-hover sortable">
     <thead>
@@ -215,22 +215,22 @@
     </thead>
     <tbody>
         <tr>
-            <td>Apache Bahir Extensions for Apache Spark 2.1.0 (tar.gz)</td>
-            <td><a 
href="https://www.apache.org/dyn/closer.lua/bahir/2.1.0/apache-bahir-2.1.0-src.tar.gz";>tar.gz</a></td>
-            <td><a 
href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.tar.gz.md5";>MD5</a></td>
-            <!--td><a 
href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.tar.gz.sha1";>SHA-1</a></td-->
-            <td><a 
href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.tar.gz.asc";>ASC</a></td>
+            <td>Apache Bahir Extensions for Apache Spark 2.1.1 (tar.gz)</td>
+            <td><a 
href="http://www.apache.org/dyn/closer.lua/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.tar.gz";>tar.gz</a></td>
+            <td><a 
href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.tar.gz.md5";>MD5</a></td>
+            <!--td><a 
href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.tar.gz.sha1";>SHA-1</a></td-->
+            <td><a 
href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.tar.gz.asc";>ASC</a></td>
         </tr>
         <tr>
-            <td>Apache Bahir Extensions for Apache Spark 2.1.0 (zip)</td>
-            <td><a 
href="https://www.apache.org/dyn/closer.lua/bahir/2.1.0/apache-bahir-2.1.0-src.zip";>zip</a></td>
-            <td><a 
href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.zip.md5";>MD5</a></td>
-            <!--td><a 
href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.zip.sha1";>SHA-1</a></td-->
-            <td><a 
href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.zip.asc";>ASC</a></td>
+            <td>Apache Bahir Extensions for Apache Spark 2.1.1 (zip)</td>
+            <td><a 
href="http://www.apache.org/dyn/closer.lua/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.zip";>zip</a></td>
+            <td><a 
href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.zip.md5";>MD5</a></td>
+            <!--td><a 
href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.zip.sha1";>SHA-1</a></td-->
+            <td><a 
href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.zip.asc";>ASC</a></td>
         </tr>
         <tr>
             <td>Release Notes</td>
-            <td><a href="/releases/spark/2.1.0/release-notes">2.1.0</a></td>
+            <td><a href="/releases/spark/2.1.1/release-notes">2.1.1</a></td>
             <td></td>
             <!--td></td-->
             <td></td>
@@ -243,7 +243,7 @@
 <pre>
 git clone https://github.com/apache/bahir
 cd bahir
-git checkout -b tags/v2.1.0 v2.1.0
+git checkout -b tags/v2.1.1 v2.1.1
 </pre>
 
 <h3 id="previous-releases">Previous Releases</h3>

http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/feed.xml
----------------------------------------------------------------------
diff --git a/content/feed.xml b/content/feed.xml
index 624a87b..afe7441 100644
--- a/content/feed.xml
+++ b/content/feed.xml
@@ -6,8 +6,8 @@
 </description>
     <link>http://bahir.apache.org/</link>
     <atom:link href="http://bahir.apache.org/feed.xml"; rel="self" 
type="application/rss+xml"/>
-    <pubDate>Sat, 24 Jun 2017 05:25:24 -0700</pubDate>
-    <lastBuildDate>Sat, 24 Jun 2017 05:25:24 -0700</lastBuildDate>
+    <pubDate>Mon, 17 Jul 2017 10:26:27 -0700</pubDate>
+    <lastBuildDate>Mon, 17 Jul 2017 10:26:27 -0700</lastBuildDate>
     <generator>Jekyll v3.2.1</generator>
     
       <item>

[2/5] bahir-website git commit: Publishing from 6d16a5c37065c70c06953838f0d76469926967a0

Reply via email to