Publishing from feca558749d7db51c640adc75321a39a1dd745c7
Project: http://git-wip-us.apache.org/repos/asf/bahir-website/repo Commit: http://git-wip-us.apache.org/repos/asf/bahir-website/commit/cc64cde3 Tree: http://git-wip-us.apache.org/repos/asf/bahir-website/tree/cc64cde3 Diff: http://git-wip-us.apache.org/repos/asf/bahir-website/diff/cc64cde3 Branch: refs/heads/asf-site Commit: cc64cde3d5118a301b4031f7d7b932c41d6e6501 Parents: cde7fd3 Author: Luciano Resende <lrese...@apache.org> Authored: Wed Sep 6 16:55:46 2017 -0700 Committer: Luciano Resende <lrese...@apache.org> Committed: Wed Sep 6 16:55:46 2017 -0700 ---------------------------------------------------------------------- .../docs/spark/2.0.0/documentation/index.html | 2 +- .../docs/spark/2.0.1/documentation/index.html | 2 +- .../docs/spark/2.0.2/documentation/index.html | 2 +- .../docs/spark/2.1.0/documentation/index.html | 2 +- .../docs/spark/2.1.1/documentation/index.html | 2 +- .../docs/spark/2.2.0/documentation/index.html | 284 ++++++++ .../spark/2.2.0/spark-sql-cloudant/index.html | 677 +++++++++++++++++++ .../2.2.0/spark-sql-streaming-akka/index.html | 380 +++++++++++ .../2.2.0/spark-sql-streaming-mqtt/index.html | 388 +++++++++++ .../spark/2.2.0/spark-streaming-akka/index.html | 325 +++++++++ .../spark/2.2.0/spark-streaming-mqtt/index.html | 333 +++++++++ .../2.2.0/spark-streaming-pubsub/index.html | 308 +++++++++ .../2.2.0/spark-streaming-twitter/index.html | 308 +++++++++ .../2.2.0/spark-streaming-zeromq/index.html | 300 ++++++++ .../spark/current/spark-sql-cloudant/index.html | 10 +- .../current/spark-sql-streaming-akka/index.html | 6 +- .../current/spark-sql-streaming-mqtt/index.html | 6 +- .../current/spark-streaming-akka/index.html | 6 +- .../current/spark-streaming-mqtt/index.html | 6 +- .../current/spark-streaming-pubsub/index.html | 6 +- .../current/spark-streaming-twitter/index.html | 6 +- .../current/spark-streaming-zeromq/index.html | 6 +- content/docs/spark/overview/index.html | 3 +- content/downloads/spark/index.html | 28 +- content/feed.xml | 4 +- content/index.html | 2 +- 26 files changed, 3353 insertions(+), 49 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bahir-website/blob/cc64cde3/content/docs/spark/2.0.0/documentation/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/2.0.0/documentation/index.html b/content/docs/spark/2.0.0/documentation/index.html index f50d7dc..297b663 100644 --- a/content/docs/spark/2.0.0/documentation/index.html +++ b/content/docs/spark/2.0.0/documentation/index.html @@ -195,7 +195,7 @@ --> -<h3 id="apache-bahir-extensions-for-apache-spark">Apache Bahir Extensions for Apache Spark</h3> +<h3 id="apache-bahir-extensions-for-apache-spark-200">Apache Bahir Extensions for Apache Spark 2.0.0</h3> <p><br /></p> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/cc64cde3/content/docs/spark/2.0.1/documentation/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/2.0.1/documentation/index.html b/content/docs/spark/2.0.1/documentation/index.html index f50d7dc..c95598f 100644 --- a/content/docs/spark/2.0.1/documentation/index.html +++ b/content/docs/spark/2.0.1/documentation/index.html @@ -195,7 +195,7 @@ --> -<h3 id="apache-bahir-extensions-for-apache-spark">Apache Bahir Extensions for Apache Spark</h3> +<h3 id="apache-bahir-extensions-for-apache-spark-201">Apache Bahir Extensions for Apache Spark 2.0.1</h3> <p><br /></p> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/cc64cde3/content/docs/spark/2.0.2/documentation/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/2.0.2/documentation/index.html b/content/docs/spark/2.0.2/documentation/index.html index f50d7dc..75d2c17 100644 --- a/content/docs/spark/2.0.2/documentation/index.html +++ b/content/docs/spark/2.0.2/documentation/index.html @@ -195,7 +195,7 @@ --> -<h3 id="apache-bahir-extensions-for-apache-spark">Apache Bahir Extensions for Apache Spark</h3> +<h3 id="apache-bahir-extensions-for-apache-spark-202">Apache Bahir Extensions for Apache Spark 2.0.2</h3> <p><br /></p> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/cc64cde3/content/docs/spark/2.1.0/documentation/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/2.1.0/documentation/index.html b/content/docs/spark/2.1.0/documentation/index.html index f50d7dc..68bbaed 100644 --- a/content/docs/spark/2.1.0/documentation/index.html +++ b/content/docs/spark/2.1.0/documentation/index.html @@ -195,7 +195,7 @@ --> -<h3 id="apache-bahir-extensions-for-apache-spark">Apache Bahir Extensions for Apache Spark</h3> +<h3 id="apache-bahir-extensions-for-apache-spark-210">Apache Bahir Extensions for Apache Spark 2.1.0</h3> <p><br /></p> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/cc64cde3/content/docs/spark/2.1.1/documentation/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/2.1.1/documentation/index.html b/content/docs/spark/2.1.1/documentation/index.html index dcf0a52..49203b5 100644 --- a/content/docs/spark/2.1.1/documentation/index.html +++ b/content/docs/spark/2.1.1/documentation/index.html @@ -195,7 +195,7 @@ --> -<h3 id="apache-bahir-extensions-for-apache-spark">Apache Bahir Extensions for Apache Spark</h3> +<h3 id="apache-bahir-extensions-for-apache-spark-211">Apache Bahir Extensions for Apache Spark 2.1.1</h3> <p><br /></p> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/cc64cde3/content/docs/spark/2.2.0/documentation/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/2.2.0/documentation/index.html b/content/docs/spark/2.2.0/documentation/index.html new file mode 100644 index 0000000..e8dace0 --- /dev/null +++ b/content/docs/spark/2.2.0/documentation/index.html @@ -0,0 +1,284 @@ + + +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>Extensions for Apache Spark</title> + <meta name="description" content="Extensions for Apache Spark"> + <meta name="author" content=""> + + <!-- Enable responsive viewport --> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <!-- Le HTML5 shim, for IE6-8 support of HTML elements --> + <!--[if lt IE 9]> + <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> + <![endif]--> + + <!-- Le styles --> + <link href="/assets/themes/apache-clean/bootstrap/css/bootstrap.css" rel="stylesheet"> + <link href="/assets/themes/apache-clean/css/style.css?body=1" rel="stylesheet" type="text/css"> + <link href="/assets/themes/apache-clean/css/syntax.css" rel="stylesheet" type="text/css" media="screen" /> + <!-- Le fav and touch icons --> + <!-- Update these with your own images + <link rel="shortcut icon" href="images/favicon.ico"> + <link rel="apple-touch-icon" href="images/apple-touch-icon.png"> + <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png"> + <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png"> + --> + + <!-- make tables sortable by adding class tag "sortable" to table elements --> + <script src="http://www.kryogenix.org/code/browser/sorttable/sorttable.js"></script> + + + </head> + + <body> + + + +<!-- Navigation --> +<div id="nav-bar"> + <nav id="nav-container" class="navbar navbar-inverse " role="navigation"> + <div class="container"> + <!-- Brand and toggle get grouped for better mobile display --> + + <div class="navbar-header page-scroll"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a class="navbar-brand page-scroll" href="/#home">Home</a> + </div> + <!-- Collect the nav links, forms, and other content for toggling --> + <nav class="navbar-collapse collapse" role="navigation"> + <ul class="nav navbar-nav"> + + + + <li id="download"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Download<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/downloads/spark" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/downloads/flink" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="community"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Community<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/community" target="_self">Get Involved</a></li> + + + <li><a href="/contributing" target="_self">Contributing</a></li> + + + <li><a href="/contributing-extensions" target="_self">Contributing Extensions</a></li> + + + <li><a href="https://issues.apache.org/jira/browse/BAHIR" target="_blank">Issue Tracker</a></li> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Source Code</a></li> + + + <li><a href="/community-members" target="_self">Project Committers</a></li> + + </ul> + + </li> + + + + + <li id="documentation"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Documentation<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/docs/spark/overview" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/docs/flink/overview" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="github"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">GitHub<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Bahir Spark Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-flink" target="_blank">Bahir Flink Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-website" target="_blank">Bahir Website</a></li> + + </ul> + + </li> + + + + + <li id="apache"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Apache<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="http://www.apache.org/foundation/how-it-works.html" target="_blank">Apache Software Foundation</a></li> + + + <li><a href="http://www.apache.org/licenses/" target="_blank">Apache License</a></li> + + + <li><a href="http://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></li> + + + <li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li> + + + <li><a href="/privacy-policy" target="_self">Privacy Policy</a></li> + + </ul> + + </li> + + + </ul> + </nav><!--/.navbar-collapse --> + <!-- /.navbar-collapse --> + </div> + <!-- /.container --> + </nav> +</div> + + + <div class="container"> + + + +<!--<div class="hero-unit Extensions for Apache Spark"> + <h1></h1> +</div> +--> + +<div class="row"> + <div class="col-md-12"> + <!-- + +--> + +<h3 id="apache-bahir-extensions-for-apache-spark-220">Apache Bahir Extensions for Apache Spark 2.2.0</h3> + +<p><br /></p> + +<h4 id="sql--data-sources">SQL Data Sources</h4> + +<p><a href="../spark-sql-cloudant">Apache CouchDB/Cloudant data source</a> <img src="/assets/themes/apache-clean/img/new-black.png" alt="" height="36px" width="36px" /></p> + +<p><br /></p> + +<h4 id="structured-streaming-data-sources">Structured Streaming Data Sources</h4> + +<p><a href="../spark-sql-streaming-akka">Akka data source</a> <img src="/assets/themes/apache-clean/img/new-black.png" alt="" height="36px" width="36px" /></p> + +<p><a href="../spark-sql-streaming-mqtt">MQTT data source</a></p> + +<p><br /></p> + +<h4 id="discretized-streams-dstreams-connectors">Discretized Streams (DStreams) Connectors</h4> + +<p><a href="../spark-sql-cloudant">Apache CouchDB/Cloudant connector</a> <img src="/assets/themes/apache-clean/img/new-black.png" alt="" height="36px" width="36px" /></p> + +<p><a href="../spark-streaming-akka">Akka connector</a></p> + +<p><a href="../spark-streaming-pubsub">Google Cloud Pub/Sub connector</a> <img src="/assets/themes/apache-clean/img/new-black.png" alt="" height="36px" width="36px" /></p> + +<p><a href="../spark-streaming-mqtt">MQTT connector</a></p> + +<p><a href="../spark-streaming-twitter">Twitter connector</a></p> + +<p><a href="../spark-streaming-zeromq">ZeroMQ connector</a></p> + + </div> +</div> + + + + <hr> + + <!-- <p>© 2017 </p>--> + <footer class="site-footer"> + <div class="wrapper"> + <div class="footer-col-wrapper"> + + <div style="text-align:center;"> + + <div> + Copyright © 2016-2017 <a href="http://www.apache.org">The Apache Software Foundation</a>. + Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version + 2.0</a>. + <br> + + Apache and the Apache Feather logo are trademarks of The Apache Software Foundation. + + </div> + </div> + </div> + </div> +</footer> + + </div> + + + + + <script type="text/javascript"> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-79140859-1', 'bahir.apache.org'); + ga('require', 'linkid', 'linkid.js'); + ga('send', 'pageview'); + +</script> + + + + <script src="/assets/themes/apache-clean/jquery/jquery-2.1.1.min.js"></script> + + <script src="/assets/themes/apache-clean/bootstrap/js/bootstrap.min.js"></script> + + + </body> +</html> + http://git-wip-us.apache.org/repos/asf/bahir-website/blob/cc64cde3/content/docs/spark/2.2.0/spark-sql-cloudant/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/2.2.0/spark-sql-cloudant/index.html b/content/docs/spark/2.2.0/spark-sql-cloudant/index.html new file mode 100644 index 0000000..a7051cc --- /dev/null +++ b/content/docs/spark/2.2.0/spark-sql-cloudant/index.html @@ -0,0 +1,677 @@ + + +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>Spark Data Source for Apache CouchDB/Cloudant</title> + <meta name="description" content="Spark Data Source for Apache CouchDB/Cloudant"> + <meta name="author" content=""> + + <!-- Enable responsive viewport --> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <!-- Le HTML5 shim, for IE6-8 support of HTML elements --> + <!--[if lt IE 9]> + <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> + <![endif]--> + + <!-- Le styles --> + <link href="/assets/themes/apache-clean/bootstrap/css/bootstrap.css" rel="stylesheet"> + <link href="/assets/themes/apache-clean/css/style.css?body=1" rel="stylesheet" type="text/css"> + <link href="/assets/themes/apache-clean/css/syntax.css" rel="stylesheet" type="text/css" media="screen" /> + <!-- Le fav and touch icons --> + <!-- Update these with your own images + <link rel="shortcut icon" href="images/favicon.ico"> + <link rel="apple-touch-icon" href="images/apple-touch-icon.png"> + <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png"> + <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png"> + --> + + <!-- make tables sortable by adding class tag "sortable" to table elements --> + <script src="http://www.kryogenix.org/code/browser/sorttable/sorttable.js"></script> + + + </head> + + <body> + + + +<!-- Navigation --> +<div id="nav-bar"> + <nav id="nav-container" class="navbar navbar-inverse " role="navigation"> + <div class="container"> + <!-- Brand and toggle get grouped for better mobile display --> + + <div class="navbar-header page-scroll"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a class="navbar-brand page-scroll" href="/#home">Home</a> + </div> + <!-- Collect the nav links, forms, and other content for toggling --> + <nav class="navbar-collapse collapse" role="navigation"> + <ul class="nav navbar-nav"> + + + + <li id="download"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Download<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/downloads/spark" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/downloads/flink" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="community"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Community<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/community" target="_self">Get Involved</a></li> + + + <li><a href="/contributing" target="_self">Contributing</a></li> + + + <li><a href="/contributing-extensions" target="_self">Contributing Extensions</a></li> + + + <li><a href="https://issues.apache.org/jira/browse/BAHIR" target="_blank">Issue Tracker</a></li> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Source Code</a></li> + + + <li><a href="/community-members" target="_self">Project Committers</a></li> + + </ul> + + </li> + + + + + <li id="documentation"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Documentation<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/docs/spark/overview" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/docs/flink/overview" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="github"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">GitHub<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Bahir Spark Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-flink" target="_blank">Bahir Flink Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-website" target="_blank">Bahir Website</a></li> + + </ul> + + </li> + + + + + <li id="apache"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Apache<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="http://www.apache.org/foundation/how-it-works.html" target="_blank">Apache Software Foundation</a></li> + + + <li><a href="http://www.apache.org/licenses/" target="_blank">Apache License</a></li> + + + <li><a href="http://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></li> + + + <li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li> + + + <li><a href="/privacy-policy" target="_self">Privacy Policy</a></li> + + </ul> + + </li> + + + </ul> + </nav><!--/.navbar-collapse --> + <!-- /.navbar-collapse --> + </div> + <!-- /.container --> + </nav> +</div> + + + <div class="container"> + + + +<!--<div class="hero-unit Spark Data Source for Apache CouchDB/Cloudant"> + <h1></h1> +</div> +--> + +<div class="row"> + <div class="col-md-12"> + <!-- + +--> + +<p>A library for reading data from Cloudant or CouchDB databases using Spark SQL and Spark Streaming.</p> + +<p><a href="https://cloudant.com">IBM® Cloudant®</a> is a document-oriented DataBase as a Service (DBaaS). It stores data as documents +in JSON format. Itâs built with scalability, high availability, and durability in mind. It comes with a +wide variety of indexing options including map-reduce, Cloudant Query, full-text indexing, and +geospatial indexing. The replication capabilities make it easy to keep data in sync between database +clusters, desktop PCs, and mobile devices.</p> + +<p><a href="http://couchdb.apache.org">Apache CouchDBâ¢</a> is open source database software that focuses on ease of use and having an architecture that âcompletely embraces the Webâ. It has a document-oriented NoSQL database architecture and is implemented in the concurrency-oriented language Erlang; it uses JSON to store data, JavaScript as its query language using MapReduce, and HTTP for an API.</p> + +<h2 id="linking">Linking</h2> + +<p>Using SBT:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-sql-cloudant" % "2.2.0" +</code></pre> +</div> + +<p>Using Maven:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code><dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-sql-cloudant_2.11</artifactId> + <version>2.2.0</version> +</dependency> +</code></pre> +</div> + +<p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0 +</code></pre> +</div> + +<p>Unlike using <code class="highlighter-rouge">--jars</code>, using <code class="highlighter-rouge">--packages</code> ensures that this library and its dependencies will be added to the classpath. +The <code class="highlighter-rouge">--packages</code> argument can also be used with <code class="highlighter-rouge">bin/spark-submit</code>.</p> + +<p>Submit a job in Python:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>spark-submit --master local[4] --jars <path to cloudant-spark.jar> <path to python script> +</code></pre> +</div> + +<p>Submit a job in Scala:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>spark-submit --class "<your class>" --master local[4] --jars <path to cloudant-spark.jar> <path to your app jar> +</code></pre> +</div> + +<p>This library is compiled for Scala 2.11 only, and intends to support Spark 2.0 onwards.</p> + +<h2 id="configuration-options">Configuration options</h2> +<p>The configuration is obtained in the following sequence:</p> + +<ol> + <li>default in the Config, which is set in the application.conf</li> + <li>key in the SparkConf, which is set in SparkConf</li> + <li>key in the parameters, which is set in a dataframe or temporaty table options</li> + <li>âspark.â+key in the SparkConf (as they are treated as the one passed in through spark-submit using âconf option)</li> +</ol> + +<p>Here each subsequent configuration overrides the previous one. Thus, configuration set using DataFrame option overrides what has beens set in SparkConf. And configuration passed in spark-submit using âconf takes precedence over any setting in the code.</p> + +<h3 id="configuration-in-applicationconf">Configuration in application.conf</h3> +<p>Default values are defined in <a href="cloudant-spark-sql/src/main/resources/application.conf">here</a>.</p> + +<h3 id="configuration-on-sparkconf">Configuration on SparkConf</h3> + +<table> + <thead> + <tr> + <th>Name</th> + <th style="text-align: center">Default</th> + <th>Meaning</th> + </tr> + </thead> + <tbody> + <tr> + <td>cloudant.protocol</td> + <td style="text-align: center">https</td> + <td>protocol to use to transfer data: http or https</td> + </tr> + <tr> + <td>cloudant.host</td> + <td style="text-align: center"> </td> + <td>cloudant host url</td> + </tr> + <tr> + <td>cloudant.username</td> + <td style="text-align: center"> </td> + <td>cloudant userid</td> + </tr> + <tr> + <td>cloudant.password</td> + <td style="text-align: center"> </td> + <td>cloudant password</td> + </tr> + <tr> + <td>cloudant.useQuery</td> + <td style="text-align: center">false</td> + <td>By default, _all_docs endpoint is used if configuration âviewâ and âindexâ (see below) are not set. When useQuery is enabled, _find endpoint will be used in place of _all_docs when query condition is not on primary key field (_id), so that query predicates may be driven into datastore.</td> + </tr> + <tr> + <td>cloudant.queryLimit</td> + <td style="text-align: center">25</td> + <td>The maximum number of results returned when querying the _find endpoint.</td> + </tr> + <tr> + <td>jsonstore.rdd.partitions</td> + <td style="text-align: center">10</td> + <td>the number of partitions intent used to drive JsonStoreRDD loading query result in parallel. The actual number is calculated based on total rows returned and satisfying maxInPartition and minInPartition</td> + </tr> + <tr> + <td>jsonstore.rdd.maxInPartition</td> + <td style="text-align: center">-1</td> + <td>the max rows in a partition. -1 means unlimited</td> + </tr> + <tr> + <td>jsonstore.rdd.minInPartition</td> + <td style="text-align: center">10</td> + <td>the min rows in a partition.</td> + </tr> + <tr> + <td>jsonstore.rdd.requestTimeout</td> + <td style="text-align: center">900000</td> + <td>the request timeout in milliseconds</td> + </tr> + <tr> + <td>bulkSize</td> + <td style="text-align: center">200</td> + <td>the bulk save size</td> + </tr> + <tr> + <td>schemaSampleSize</td> + <td style="text-align: center">â-1â</td> + <td>the sample size for RDD schema discovery. 1 means we are using only first document for schema discovery; -1 means all documents; 0 will be treated as 1; any number N means min(N, total) docs</td> + </tr> + <tr> + <td>createDBOnSave</td> + <td style="text-align: center">âfalseâ</td> + <td>whether to create a new database during save operation. If false, a database should already exist. If true, a new database will be created. If true, and a database with a provided name already exists, an error will be raised.</td> + </tr> + </tbody> +</table> + +<h3 id="configuration-on-spark-sql-temporary-table-or-dataframe">Configuration on Spark SQL Temporary Table or DataFrame</h3> + +<p>Besides all the configurations passed to a temporary table or dataframe through SparkConf, it is also possible to set the following configurations in temporary table or dataframe using OPTIONS:</p> + +<table> + <thead> + <tr> + <th>Name</th> + <th style="text-align: center">Default</th> + <th>Meaning</th> + </tr> + </thead> + <tbody> + <tr> + <td>database</td> + <td style="text-align: center"> </td> + <td>cloudant database name</td> + </tr> + <tr> + <td>view</td> + <td style="text-align: center"> </td> + <td>cloudant view w/o the database name. only used for load.</td> + </tr> + <tr> + <td>index</td> + <td style="text-align: center"> </td> + <td>cloudant search index w/o the database name. only used for load data with less than or equal to 200 results.</td> + </tr> + <tr> + <td>path</td> + <td style="text-align: center"> </td> + <td>cloudant: as database name if database is not present</td> + </tr> + <tr> + <td>schemaSampleSize</td> + <td style="text-align: center">â-1â</td> + <td>the sample size used to discover the schema for this temp table. -1 scans all documents</td> + </tr> + <tr> + <td>bulkSize</td> + <td style="text-align: center">200</td> + <td>the bulk save size</td> + </tr> + <tr> + <td>createDBOnSave</td> + <td style="text-align: center">âfalseâ</td> + <td>whether to create a new database during save operation. If false, a database should already exist. If true, a new database will be created. If true, and a database with a provided name already exists, an error will be raised.</td> + </tr> + </tbody> +</table> + +<p>For fast loading, views are loaded without include_docs. Thus, a derived schema will always be: <code class="highlighter-rouge"><span class="p">{</span><span class="err">id,</span><span class="w"> </span><span class="err">key,</span><span class="w"> </span><span class="err">value</span><span class="p">}</span></code>, where <code class="highlighter-rouge">value </code>can be a compount field. An example of loading data from a view:</p> + +<div class="language-python highlighter-rouge"><pre class="highlight"><code><span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s">" CREATE TEMPORARY TABLE flightTable1 USING org.apache.bahir.cloudant OPTIONS ( database 'n_flight', view '_design/view/_view/AA0')"</span><span class="p">)</span> + +</code></pre> +</div> + +<h3 id="configuration-on-cloudant-receiver-for-spark-streaming">Configuration on Cloudant Receiver for Spark Streaming</h3> + +<table> + <thead> + <tr> + <th>Name</th> + <th style="text-align: center">Default</th> + <th>Meaning</th> + </tr> + </thead> + <tbody> + <tr> + <td>cloudant.host</td> + <td style="text-align: center"> </td> + <td>cloudant host url</td> + </tr> + <tr> + <td>cloudant.username</td> + <td style="text-align: center"> </td> + <td>cloudant userid</td> + </tr> + <tr> + <td>cloudant.password</td> + <td style="text-align: center"> </td> + <td>cloudant password</td> + </tr> + <tr> + <td>database</td> + <td style="text-align: center"> </td> + <td>cloudant database name</td> + </tr> + <tr> + <td>selector</td> + <td style="text-align: center">all documents</td> + <td>a selector written in Cloudant Query syntax, specifying conditions for selecting documents. Only documents satisfying the selectorâs conditions will be retrieved from Cloudant and loaded into Spark.</td> + </tr> + </tbody> +</table> + +<h3 id="configuration-in-spark-submit-using---conf-option">Configuration in spark-submit using âconf option</h3> + +<p>The above stated configuration keys can also be set using <code class="highlighter-rouge">spark-submit --conf</code> option. When passing configuration in spark-submit, make sure adding âspark.â as prefix to the keys.</p> + +<h2 id="examples">Examples</h2> + +<h3 id="python-api">Python API</h3> + +<h4 id="using-sql-in-python">Using SQL In Python</h4> + +<div class="language-python highlighter-rouge"><pre class="highlight"><code><span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span>\ + <span class="o">.</span><span class="n">builder</span>\ + <span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s">"Cloudant Spark SQL Example in Python using temp tables"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.host"</span><span class="p">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.username"</span><span class="p">,</span> <span class="s">"USERNAME"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.password"</span><span class="p">,</span><span class="s">"PASSWORD"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> + + +<span class="c"># Loading temp table from Cloudant db</span> +<span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s">" CREATE TEMPORARY TABLE airportTable USING org.apache.bahir.cloudant OPTIONS ( database 'n_airportcodemapping')"</span><span class="p">)</span> +<span class="n">airportData</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s">"SELECT _id, airportName FROM airportTable WHERE _id >= 'CAA' AND _id <= 'GAA' ORDER BY _id"</span><span class="p">)</span> +<span class="n">airportData</span><span class="o">.</span><span class="n">printSchema</span><span class="p">()</span> +<span class="k">print</span> <span class="s">'Total # of rows in airportData: '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">airportData</span><span class="o">.</span><span class="n">count</span><span class="p">())</span> +<span class="k">for</span> <span class="n">code</span> <span class="ow">in</span> <span class="n">airportData</span><span class="o">.</span><span class="n">collect</span><span class="p">():</span> + <span class="k">print</span> <span class="n">code</span><span class="o">.</span><span class="n">_id</span> +</code></pre> +</div> + +<p>See <a href="examples/python/CloudantApp.py">CloudantApp.py</a> for examples.</p> + +<p>Submit job example: +<code class="highlighter-rouge"> +spark-submit --packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0 --conf spark.cloudant.host=ACCOUNT.cloudant.com --conf spark.cloudant.username=USERNAME --conf spark.cloudant.password=PASSWORD sql-cloudant/examples/python/CloudantApp.py +</code></p> + +<h4 id="using-dataframe-in-python">Using DataFrame In Python</h4> + +<div class="language-python highlighter-rouge"><pre class="highlight"><code><span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span>\ + <span class="o">.</span><span class="n">builder</span>\ + <span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s">"Cloudant Spark SQL Example in Python using dataframes"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.host"</span><span class="p">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.username"</span><span class="p">,</span> <span class="s">"USERNAME"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.password"</span><span class="p">,</span><span class="s">"PASSWORD"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"jsonstore.rdd.partitions"</span><span class="p">,</span> <span class="mi">8</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> + +<span class="c"># ***1. Loading dataframe from Cloudant db</span> +<span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">"n_airportcodemapping"</span><span class="p">,</span> <span class="s">"org.apache.bahir.cloudant"</span><span class="p">)</span> +<span class="n">df</span><span class="o">.</span><span class="n">cache</span><span class="p">()</span> +<span class="n">df</span><span class="o">.</span><span class="n">printSchema</span><span class="p">()</span> +<span class="n">df</span><span class="o">.</span><span class="nb">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">airportName</span> <span class="o">>=</span> <span class="s">'Moscow'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s">"_id"</span><span class="p">,</span><span class="s">'airportName'</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> +<span class="n">df</span><span class="o">.</span><span class="nb">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">_id</span> <span class="o">>=</span> <span class="s">'CAA'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s">"_id"</span><span class="p">,</span><span class="s">'airportName'</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> +</code></pre> +</div> + +<p>See <a href="examples/python/CloudantDF.py">CloudantDF.py</a> for examples.</p> + +<p>In case of doing multiple operations on a dataframe (select, filter etc.), +you should persist a dataframe. Otherwise, every operation on a dataframe will load the same data from Cloudant again. +Persisting will also speed up computation. This statement will persist an RDD in memory: <code class="highlighter-rouge">df.cache()</code>. Alternatively for large dbs to persist in memory & disk, use:</p> + +<div class="language-python highlighter-rouge"><pre class="highlight"><code><span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">StorageLevel</span> +<span class="n">df</span><span class="o">.</span><span class="n">persist</span><span class="p">(</span><span class="n">storageLevel</span> <span class="o">=</span> <span class="n">StorageLevel</span><span class="p">(</span><span class="bp">True</span><span class="p">,</span> <span class="bp">True</span><span class="p">,</span> <span class="bp">False</span><span class="p">,</span> <span class="bp">True</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> +</code></pre> +</div> + +<p><a href="examples/python/CloudantDFOption.py">Sample code</a> on using DataFrame option to define cloudant configuration</p> + +<h3 id="scala-api">Scala API</h3> + +<h4 id="using-sql-in-scala">Using SQL In Scala</h4> + +<div class="language-scala highlighter-rouge"><pre class="highlight"><code><span class="k">val</span> <span class="n">spark</span> <span class="k">=</span> <span class="nc">SparkSession</span> + <span class="o">.</span><span class="n">builder</span><span class="o">()</span> + <span class="o">.</span><span class="n">appName</span><span class="o">(</span><span class="s">"Cloudant Spark SQL Example"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.host"</span><span class="o">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.username"</span><span class="o">,</span> <span class="s">"USERNAME"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.password"</span><span class="o">,</span><span class="s">"PASSWORD"</span><span class="o">)</span> + <span class="o">.</span><span class="n">getOrCreate</span><span class="o">()</span> + +<span class="c1">// For implicit conversions of Dataframe to RDDs +</span><span class="k">import</span> <span class="nn">spark.implicits._</span> + +<span class="c1">// create a temp table from Cloudant db and query it using sql syntax +</span><span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="o">(</span> + <span class="n">s</span><span class="s">""" + |CREATE TEMPORARY TABLE airportTable + |USING org.apache.bahir.cloudant + |OPTIONS ( database 'n_airportcodemapping') + """</span><span class="o">.</span><span class="n">stripMargin</span><span class="o">)</span> +<span class="c1">// create a dataframe +</span><span class="k">val</span> <span class="n">airportData</span> <span class="k">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="o">(</span><span class="s">"SELECT _id, airportName FROM airportTable WHERE _id >= 'CAA' AND _id <= 'GAA' ORDER BY _id"</span><span class="o">)</span> +<span class="n">airportData</span><span class="o">.</span><span class="n">printSchema</span><span class="o">()</span> +<span class="n">println</span><span class="o">(</span><span class="n">s</span><span class="s">"Total # of rows in airportData: "</span> <span class="o">+</span> <span class="n">airportData</span><span class="o">.</span><span class="n">count</span><span class="o">())</span> +<span class="c1">// convert dataframe to array of Rows, and process each row +</span><span class="n">airportData</span><span class="o">.</span><span class="n">map</span><span class="o">(</span><span class="n">t</span> <span class="k">=></span> <span class="s">"code: "</span> <span class="o">+</span> <span class="n">t</span><span class="o">(</span><span class="mi">0</span><span class="o">)</span> <span class="o">+</span> <span class="s">",name:"</span> <span class="o">+</span> <span class="n">t</span><span class="o">(</span><span class="mi">1</span><span class="o">)).</span><span class="n">collect</span><span class="o">().</span><span class="n">foreach</span><span class="o">(</span><span class="n">println</span><span class="o">)</span> +</code></pre> +</div> +<p>See <a href="examples/scala/src/main/scala/mytest/spark/CloudantApp.scala">CloudantApp.scala</a> for examples.</p> + +<p>Submit job example: +<code class="highlighter-rouge"> +spark-submit --class org.apache.spark.examples.sql.cloudant.CloudantApp --packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0 --conf spark.cloudant.host=ACCOUNT.cloudant.com --conf spark.cloudant.username=USERNAME --conf spark.cloudant.password=PASSWORD /path/to/spark-sql-cloudant_2.11-2.2.0-tests.jar +</code></p> + +<h3 id="using-dataframe-in-scala">Using DataFrame In Scala</h3> + +<div class="language-scala highlighter-rouge"><pre class="highlight"><code><span class="k">val</span> <span class="n">spark</span> <span class="k">=</span> <span class="nc">SparkSession</span> + <span class="o">.</span><span class="n">builder</span><span class="o">()</span> + <span class="o">.</span><span class="n">appName</span><span class="o">(</span><span class="s">"Cloudant Spark SQL Example with Dataframe"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.host"</span><span class="o">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.username"</span><span class="o">,</span> <span class="s">"USERNAME"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.password"</span><span class="o">,</span><span class="s">"PASSWORD"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"createDBOnSave"</span><span class="o">,</span><span class="s">"true"</span><span class="o">)</span> <span class="c1">// to create a db on save +</span> <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"jsonstore.rdd.partitions"</span><span class="o">,</span> <span class="s">"20"</span><span class="o">)</span> <span class="c1">// using 20 partitions +</span> <span class="o">.</span><span class="n">getOrCreate</span><span class="o">()</span> + +<span class="c1">// 1. Loading data from Cloudant db +</span><span class="k">val</span> <span class="n">df</span> <span class="k">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">format</span><span class="o">(</span><span class="s">"org.apache.bahir.cloudant"</span><span class="o">).</span><span class="n">load</span><span class="o">(</span><span class="s">"n_flight"</span><span class="o">)</span> +<span class="c1">// Caching df in memory to speed computations +// and not to retrieve data from cloudant again +</span><span class="n">df</span><span class="o">.</span><span class="n">cache</span><span class="o">()</span> +<span class="n">df</span><span class="o">.</span><span class="n">printSchema</span><span class="o">()</span> + +<span class="c1">// 2. Saving dataframe to Cloudant db +</span><span class="k">val</span> <span class="n">df2</span> <span class="k">=</span> <span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="o">(</span><span class="n">df</span><span class="o">(</span><span class="s">"flightSegmentId"</span><span class="o">)</span> <span class="o">===</span> <span class="s">"AA106"</span><span class="o">)</span> + <span class="o">.</span><span class="n">select</span><span class="o">(</span><span class="s">"flightSegmentId"</span><span class="o">,</span><span class="s">"economyClassBaseCost"</span><span class="o">)</span> +<span class="n">df2</span><span class="o">.</span><span class="n">show</span><span class="o">()</span> +<span class="n">df2</span><span class="o">.</span><span class="n">write</span><span class="o">.</span><span class="n">format</span><span class="o">(</span><span class="s">"org.apache.bahir.cloudant"</span><span class="o">).</span><span class="n">save</span><span class="o">(</span><span class="s">"n_flight2"</span><span class="o">)</span> +</code></pre> +</div> + +<p>See <a href="examples/scala/src/main/scala/mytest/spark/CloudantDF.scala">CloudantDF.scala</a> for examples.</p> + +<p><a href="examples/scala/src/main/scala/mytest/spark/CloudantDFOption.scala">Sample code</a> on using DataFrame option to define Cloudant configuration.</p> + +<h3 id="using-streams-in-scala">Using Streams In Scala</h3> + +<div class="language-scala highlighter-rouge"><pre class="highlight"><code><span class="k">val</span> <span class="n">ssc</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">StreamingContext</span><span class="o">(</span><span class="n">sparkConf</span><span class="o">,</span> <span class="nc">Seconds</span><span class="o">(</span><span class="mi">10</span><span class="o">))</span> +<span class="k">val</span> <span class="n">changes</span> <span class="k">=</span> <span class="n">ssc</span><span class="o">.</span><span class="n">receiverStream</span><span class="o">(</span><span class="k">new</span> <span class="nc">CloudantReceiver</span><span class="o">(</span><span class="nc">Map</span><span class="o">(</span> + <span class="s">"cloudant.host"</span> <span class="o">-></span> <span class="s">"ACCOUNT.cloudant.com"</span><span class="o">,</span> + <span class="s">"cloudant.username"</span> <span class="o">-></span> <span class="s">"USERNAME"</span><span class="o">,</span> + <span class="s">"cloudant.password"</span> <span class="o">-></span> <span class="s">"PASSWORD"</span><span class="o">,</span> + <span class="s">"database"</span> <span class="o">-></span> <span class="s">"n_airportcodemapping"</span><span class="o">)))</span> + +<span class="n">changes</span><span class="o">.</span><span class="n">foreachRDD</span><span class="o">((</span><span class="n">rdd</span><span class="k">:</span> <span class="kt">RDD</span><span class="o">[</span><span class="kt">String</span><span class="o">],</span> <span class="n">time</span><span class="k">:</span> <span class="kt">Time</span><span class="o">)</span> <span class="k">=></span> <span class="o">{</span> + <span class="c1">// Get the singleton instance of SparkSession +</span> <span class="k">val</span> <span class="n">spark</span> <span class="k">=</span> <span class="nc">SparkSessionSingleton</span><span class="o">.</span><span class="n">getInstance</span><span class="o">(</span><span class="n">rdd</span><span class="o">.</span><span class="n">sparkContext</span><span class="o">.</span><span class="n">getConf</span><span class="o">)</span> + + <span class="n">println</span><span class="o">(</span><span class="n">s</span><span class="s">"========= $time ========="</span><span class="o">)</span> + <span class="c1">// Convert RDD[String] to DataFrame +</span> <span class="k">val</span> <span class="n">changesDataFrame</span> <span class="k">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">json</span><span class="o">(</span><span class="n">rdd</span><span class="o">)</span> + <span class="k">if</span> <span class="o">(!</span><span class="n">changesDataFrame</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">isEmpty</span><span class="o">)</span> <span class="o">{</span> + <span class="n">changesDataFrame</span><span class="o">.</span><span class="n">printSchema</span><span class="o">()</span> + <span class="n">changesDataFrame</span><span class="o">.</span><span class="n">select</span><span class="o">(</span><span class="s">"*"</span><span class="o">).</span><span class="n">show</span><span class="o">()</span> + <span class="o">....</span> + <span class="o">}</span> +<span class="o">})</span> +<span class="n">ssc</span><span class="o">.</span><span class="n">start</span><span class="o">()</span> +<span class="c1">// run streaming for 120 secs +</span><span class="nc">Thread</span><span class="o">.</span><span class="n">sleep</span><span class="o">(</span><span class="mi">120000L</span><span class="o">)</span> +<span class="n">ssc</span><span class="o">.</span><span class="n">stop</span><span class="o">(</span><span class="kc">true</span><span class="o">)</span> + +</code></pre> +</div> + +<p>See <a href="examples/scala/src/main/scala/mytest/spark/CloudantStreaming.scala">CloudantStreaming.scala</a> for examples.</p> + +<p>By default, Spark Streaming will load all documents from a database. If you want to limit the loading to +specific documents, use <code class="highlighter-rouge">selector</code> option of <code class="highlighter-rouge">CloudantReceiver</code> and specify your conditions +(See <a href="examples/scala/src/main/scala/mytest/spark/CloudantStreamingSelector.scala">CloudantStreamingSelector.scala</a> +example for more details):</p> + +<div class="language-scala highlighter-rouge"><pre class="highlight"><code><span class="k">val</span> <span class="n">changes</span> <span class="k">=</span> <span class="n">ssc</span><span class="o">.</span><span class="n">receiverStream</span><span class="o">(</span><span class="k">new</span> <span class="nc">CloudantReceiver</span><span class="o">(</span><span class="nc">Map</span><span class="o">(</span> + <span class="s">"cloudant.host"</span> <span class="o">-></span> <span class="s">"ACCOUNT.cloudant.com"</span><span class="o">,</span> + <span class="s">"cloudant.username"</span> <span class="o">-></span> <span class="s">"USERNAME"</span><span class="o">,</span> + <span class="s">"cloudant.password"</span> <span class="o">-></span> <span class="s">"PASSWORD"</span><span class="o">,</span> + <span class="s">"database"</span> <span class="o">-></span> <span class="s">"sales"</span><span class="o">,</span> + <span class="s">"selector"</span> <span class="o">-></span> <span class="s">"{\"month\":\"May\", \"rep\":\"John\"}"</span><span class="o">)))</span> +</code></pre> +</div> + + </div> +</div> + + + + <hr> + + <!-- <p>© 2017 </p>--> + <footer class="site-footer"> + <div class="wrapper"> + <div class="footer-col-wrapper"> + + <div style="text-align:center;"> + + <div> + Copyright © 2016-2017 <a href="http://www.apache.org">The Apache Software Foundation</a>. + Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version + 2.0</a>. + <br> + + Apache and the Apache Feather logo are trademarks of The Apache Software Foundation. + + </div> + </div> + </div> + </div> +</footer> + + </div> + + + + + <script type="text/javascript"> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-79140859-1', 'bahir.apache.org'); + ga('require', 'linkid', 'linkid.js'); + ga('send', 'pageview'); + +</script> + + + + <script src="/assets/themes/apache-clean/jquery/jquery-2.1.1.min.js"></script> + + <script src="/assets/themes/apache-clean/bootstrap/js/bootstrap.min.js"></script> + + + </body> +</html> + http://git-wip-us.apache.org/repos/asf/bahir-website/blob/cc64cde3/content/docs/spark/2.2.0/spark-sql-streaming-akka/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/2.2.0/spark-sql-streaming-akka/index.html b/content/docs/spark/2.2.0/spark-sql-streaming-akka/index.html new file mode 100644 index 0000000..8f07640 --- /dev/null +++ b/content/docs/spark/2.2.0/spark-sql-streaming-akka/index.html @@ -0,0 +1,380 @@ + + +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>Spark Structured Streaming Akka</title> + <meta name="description" content="Spark Structured Streaming Akka"> + <meta name="author" content=""> + + <!-- Enable responsive viewport --> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <!-- Le HTML5 shim, for IE6-8 support of HTML elements --> + <!--[if lt IE 9]> + <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> + <![endif]--> + + <!-- Le styles --> + <link href="/assets/themes/apache-clean/bootstrap/css/bootstrap.css" rel="stylesheet"> + <link href="/assets/themes/apache-clean/css/style.css?body=1" rel="stylesheet" type="text/css"> + <link href="/assets/themes/apache-clean/css/syntax.css" rel="stylesheet" type="text/css" media="screen" /> + <!-- Le fav and touch icons --> + <!-- Update these with your own images + <link rel="shortcut icon" href="images/favicon.ico"> + <link rel="apple-touch-icon" href="images/apple-touch-icon.png"> + <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png"> + <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png"> + --> + + <!-- make tables sortable by adding class tag "sortable" to table elements --> + <script src="http://www.kryogenix.org/code/browser/sorttable/sorttable.js"></script> + + + </head> + + <body> + + + +<!-- Navigation --> +<div id="nav-bar"> + <nav id="nav-container" class="navbar navbar-inverse " role="navigation"> + <div class="container"> + <!-- Brand and toggle get grouped for better mobile display --> + + <div class="navbar-header page-scroll"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a class="navbar-brand page-scroll" href="/#home">Home</a> + </div> + <!-- Collect the nav links, forms, and other content for toggling --> + <nav class="navbar-collapse collapse" role="navigation"> + <ul class="nav navbar-nav"> + + + + <li id="download"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Download<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/downloads/spark" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/downloads/flink" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="community"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Community<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/community" target="_self">Get Involved</a></li> + + + <li><a href="/contributing" target="_self">Contributing</a></li> + + + <li><a href="/contributing-extensions" target="_self">Contributing Extensions</a></li> + + + <li><a href="https://issues.apache.org/jira/browse/BAHIR" target="_blank">Issue Tracker</a></li> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Source Code</a></li> + + + <li><a href="/community-members" target="_self">Project Committers</a></li> + + </ul> + + </li> + + + + + <li id="documentation"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Documentation<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/docs/spark/overview" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/docs/flink/overview" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="github"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">GitHub<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Bahir Spark Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-flink" target="_blank">Bahir Flink Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-website" target="_blank">Bahir Website</a></li> + + </ul> + + </li> + + + + + <li id="apache"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Apache<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="http://www.apache.org/foundation/how-it-works.html" target="_blank">Apache Software Foundation</a></li> + + + <li><a href="http://www.apache.org/licenses/" target="_blank">Apache License</a></li> + + + <li><a href="http://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></li> + + + <li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li> + + + <li><a href="/privacy-policy" target="_self">Privacy Policy</a></li> + + </ul> + + </li> + + + </ul> + </nav><!--/.navbar-collapse --> + <!-- /.navbar-collapse --> + </div> + <!-- /.container --> + </nav> +</div> + + + <div class="container"> + + + +<!--<div class="hero-unit Spark Structured Streaming Akka"> + <h1></h1> +</div> +--> + +<div class="row"> + <div class="col-md-12"> + <!-- + +--> + +<p>A library for reading data from Akka Actors using Spark SQL Streaming ( or Structured streaming.).</p> + +<h2 id="linking">Linking</h2> + +<p>Using SBT:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-sql-streaming-akka" % "2.2.0" +</code></pre> +</div> + +<p>Using Maven:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code><dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-sql-streaming-akka_2.11</artifactId> + <version>2.2.0</version> +</dependency> +</code></pre> +</div> + +<p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option. +For example, to include it when starting the spark shell:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-sql-streaming-akka_2.11:2.2.0 +</code></pre> +</div> + +<p>Unlike using <code class="highlighter-rouge">--jars</code>, using <code class="highlighter-rouge">--packages</code> ensures that this library and its dependencies will be added to the classpath. +The <code class="highlighter-rouge">--packages</code> argument can also be used with <code class="highlighter-rouge">bin/spark-submit</code>.</p> + +<p>This library is compiled for Scala 2.11 only, and intends to support Spark 2.0 onwards.</p> + +<h2 id="examples">Examples</h2> + +<p>A SQL Stream can be created with data streams received from Akka Feeder actor using,</p> + +<div class="highlighter-rouge"><pre class="highlight"><code> sqlContext.readStream + .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider") + .option("urlOfPublisher", "feederActorUri") + .load() +</code></pre> +</div> + +<h2 id="enable-recovering-from-failures">Enable recovering from failures.</h2> + +<p>Setting values for option <code class="highlighter-rouge">persistenceDirPath</code> helps in recovering in case of a restart, by restoring the state where it left off before the shutdown.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code> sqlContext.readStream + .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider") + .option("urlOfPublisher", "feederActorUri") + .option("persistenceDirPath", "/path/to/localdir") + .load() +</code></pre> +</div> + +<h2 id="configuration-options">Configuration options.</h2> + +<p>This source uses <a href="http://doc.akka.io/api/akka/2.4/akka/actor/Actor.html">Akka Actor api</a>.</p> + +<ul> + <li><code class="highlighter-rouge">urlOfPublisher</code> The url of Publisher or Feeder actor that the Receiver actor connects to. Set this as the tcp url of the Publisher or Feeder actor.</li> + <li><code class="highlighter-rouge">persistenceDirPath</code> By default it is used for storing incoming messages on disk.</li> +</ul> + +<h3 id="scala-api">Scala API</h3> + +<p>An example, for scala API to count words from incoming message stream.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code> // Create DataFrame representing the stream of input lines from connection + // to publisher or feeder actor + val lines = spark.readStream + .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider") + .option("urlOfPublisher", urlOfPublisher) + .load().as[(String, Timestamp)] + + // Split the lines into words + val words = lines.map(_._1).flatMap(_.split(" ")) + + // Generate running word count + val wordCounts = words.groupBy("value").count() + + // Start running the query that prints the running counts to the console + val query = wordCounts.writeStream + .outputMode("complete") + .format("console") + .start() + + query.awaitTermination() +</code></pre> +</div> + +<p>Please see <code class="highlighter-rouge">AkkaStreamWordCount.scala</code> for full example.</p> + +<h3 id="java-api">Java API</h3> + +<p>An example, for Java API to count words from incoming message stream.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code> // Create DataFrame representing the stream of input lines from connection + // to publisher or feeder actor + Dataset<String> lines = spark + .readStream() + .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider") + .option("urlOfPublisher", urlOfPublisher) + .load().select("value").as(Encoders.STRING()); + + // Split the lines into words + Dataset<String> words = lines.flatMap(new FlatMapFunction<String, String>() { + @Override + public Iterator<String> call(String s) throws Exception { + return Arrays.asList(s.split(" ")).iterator(); + } + }, Encoders.STRING()); + + // Generate running word count + Dataset<Row> wordCounts = words.groupBy("value").count(); + + // Start running the query that prints the running counts to the console + StreamingQuery query = wordCounts.writeStream() + .outputMode("complete") + .format("console") + .start(); + + query.awaitTermination(); +</code></pre> +</div> + +<p>Please see <code class="highlighter-rouge">JavaAkkaStreamWordCount.java</code> for full example.</p> + + </div> +</div> + + + + <hr> + + <!-- <p>© 2017 </p>--> + <footer class="site-footer"> + <div class="wrapper"> + <div class="footer-col-wrapper"> + + <div style="text-align:center;"> + + <div> + Copyright © 2016-2017 <a href="http://www.apache.org">The Apache Software Foundation</a>. + Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version + 2.0</a>. + <br> + + Apache and the Apache Feather logo are trademarks of The Apache Software Foundation. + + </div> + </div> + </div> + </div> +</footer> + + </div> + + + + + <script type="text/javascript"> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-79140859-1', 'bahir.apache.org'); + ga('require', 'linkid', 'linkid.js'); + ga('send', 'pageview'); + +</script> + + + + <script src="/assets/themes/apache-clean/jquery/jquery-2.1.1.min.js"></script> + + <script src="/assets/themes/apache-clean/bootstrap/js/bootstrap.min.js"></script> + + + </body> +</html> + http://git-wip-us.apache.org/repos/asf/bahir-website/blob/cc64cde3/content/docs/spark/2.2.0/spark-sql-streaming-mqtt/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/2.2.0/spark-sql-streaming-mqtt/index.html b/content/docs/spark/2.2.0/spark-sql-streaming-mqtt/index.html new file mode 100644 index 0000000..b0223ce --- /dev/null +++ b/content/docs/spark/2.2.0/spark-sql-streaming-mqtt/index.html @@ -0,0 +1,388 @@ + + +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>Spark Structured Streaming MQTT</title> + <meta name="description" content="Spark Structured Streaming MQTT"> + <meta name="author" content=""> + + <!-- Enable responsive viewport --> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <!-- Le HTML5 shim, for IE6-8 support of HTML elements --> + <!--[if lt IE 9]> + <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> + <![endif]--> + + <!-- Le styles --> + <link href="/assets/themes/apache-clean/bootstrap/css/bootstrap.css" rel="stylesheet"> + <link href="/assets/themes/apache-clean/css/style.css?body=1" rel="stylesheet" type="text/css"> + <link href="/assets/themes/apache-clean/css/syntax.css" rel="stylesheet" type="text/css" media="screen" /> + <!-- Le fav and touch icons --> + <!-- Update these with your own images + <link rel="shortcut icon" href="images/favicon.ico"> + <link rel="apple-touch-icon" href="images/apple-touch-icon.png"> + <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png"> + <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png"> + --> + + <!-- make tables sortable by adding class tag "sortable" to table elements --> + <script src="http://www.kryogenix.org/code/browser/sorttable/sorttable.js"></script> + + + </head> + + <body> + + + +<!-- Navigation --> +<div id="nav-bar"> + <nav id="nav-container" class="navbar navbar-inverse " role="navigation"> + <div class="container"> + <!-- Brand and toggle get grouped for better mobile display --> + + <div class="navbar-header page-scroll"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a class="navbar-brand page-scroll" href="/#home">Home</a> + </div> + <!-- Collect the nav links, forms, and other content for toggling --> + <nav class="navbar-collapse collapse" role="navigation"> + <ul class="nav navbar-nav"> + + + + <li id="download"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Download<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/downloads/spark" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/downloads/flink" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="community"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Community<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/community" target="_self">Get Involved</a></li> + + + <li><a href="/contributing" target="_self">Contributing</a></li> + + + <li><a href="/contributing-extensions" target="_self">Contributing Extensions</a></li> + + + <li><a href="https://issues.apache.org/jira/browse/BAHIR" target="_blank">Issue Tracker</a></li> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Source Code</a></li> + + + <li><a href="/community-members" target="_self">Project Committers</a></li> + + </ul> + + </li> + + + + + <li id="documentation"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Documentation<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/docs/spark/overview" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/docs/flink/overview" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="github"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">GitHub<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Bahir Spark Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-flink" target="_blank">Bahir Flink Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-website" target="_blank">Bahir Website</a></li> + + </ul> + + </li> + + + + + <li id="apache"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Apache<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="http://www.apache.org/foundation/how-it-works.html" target="_blank">Apache Software Foundation</a></li> + + + <li><a href="http://www.apache.org/licenses/" target="_blank">Apache License</a></li> + + + <li><a href="http://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></li> + + + <li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li> + + + <li><a href="/privacy-policy" target="_self">Privacy Policy</a></li> + + </ul> + + </li> + + + </ul> + </nav><!--/.navbar-collapse --> + <!-- /.navbar-collapse --> + </div> + <!-- /.container --> + </nav> +</div> + + + <div class="container"> + + + +<!--<div class="hero-unit Spark Structured Streaming MQTT"> + <h1></h1> +</div> +--> + +<div class="row"> + <div class="col-md-12"> + <!-- + +--> + +<p>A library for reading data from MQTT Servers using Spark SQL Streaming ( or Structured streaming.).</p> + +<h2 id="linking">Linking</h2> + +<p>Using SBT:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-sql-streaming-mqtt" % "2.2.0" +</code></pre> +</div> + +<p>Using Maven:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code><dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-sql-streaming-mqtt_2.11</artifactId> + <version>2.2.0</version> +</dependency> +</code></pre> +</div> + +<p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option. +For example, to include it when starting the spark shell:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-sql-streaming-mqtt_2.11:2.2.0 +</code></pre> +</div> + +<p>Unlike using <code class="highlighter-rouge">--jars</code>, using <code class="highlighter-rouge">--packages</code> ensures that this library and its dependencies will be added to the classpath. +The <code class="highlighter-rouge">--packages</code> argument can also be used with <code class="highlighter-rouge">bin/spark-submit</code>.</p> + +<p>This library is compiled for Scala 2.11 only, and intends to support Spark 2.0 onwards.</p> + +<h2 id="examples">Examples</h2> + +<p>A SQL Stream can be created with data streams received through MQTT Server using,</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>sqlContext.readStream + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", "mytopic") + .load("tcp://localhost:1883") +</code></pre> +</div> + +<h2 id="enable-recovering-from-failures">Enable recovering from failures.</h2> + +<p>Setting values for option <code class="highlighter-rouge">localStorage</code> and <code class="highlighter-rouge">clientId</code> helps in recovering in case of a restart, by restoring the state where it left off before the shutdown.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>sqlContext.readStream + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", "mytopic") + .option("localStorage", "/path/to/localdir") + .option("clientId", "some-client-id") + .load("tcp://localhost:1883") +</code></pre> +</div> + +<h2 id="configuration-options">Configuration options.</h2> + +<p>This source uses <a href="https://eclipse.org/paho/clients/java/">Eclipse Paho Java Client</a>. Client API documentation is located <a href="http://www.eclipse.org/paho/files/javadoc/index.html">here</a>.</p> + +<ul> + <li><code class="highlighter-rouge">brokerUrl</code> A url MqttClient connects to. Set this or <code class="highlighter-rouge">path</code> as the url of the Mqtt Server. e.g. tcp://localhost:1883.</li> + <li><code class="highlighter-rouge">persistence</code> By default it is used for storing incoming messages on disk. If <code class="highlighter-rouge">memory</code> is provided as value for this option, then recovery on restart is not supported.</li> + <li><code class="highlighter-rouge">topic</code> Topic MqttClient subscribes to.</li> + <li><code class="highlighter-rouge">clientId</code> clientId, this client is assoicated with. Provide the same value to recover a stopped client.</li> + <li><code class="highlighter-rouge">QoS</code> The maximum quality of service to subscribe each topic at. Messages published at a lower quality of service will be received at the published QoS. Messages published at a higher quality of service will be received using the QoS specified on the subscribe.</li> + <li><code class="highlighter-rouge">username</code> Sets the user name to use for the connection to Mqtt Server. Do not set it, if server does not need this. Setting it empty will lead to errors.</li> + <li><code class="highlighter-rouge">password</code> Sets the password to use for the connection.</li> + <li><code class="highlighter-rouge">cleanSession</code> Setting it true starts a clean session, removes all checkpointed messages by a previous run of this source. This is set to false by default.</li> + <li><code class="highlighter-rouge">connectionTimeout</code> Sets the connection timeout, a value of 0 is interpretted as wait until client connects. See <code class="highlighter-rouge">MqttConnectOptions.setConnectionTimeout</code> for more information.</li> + <li><code class="highlighter-rouge">keepAlive</code> Same as <code class="highlighter-rouge">MqttConnectOptions.setKeepAliveInterval</code>.</li> + <li><code class="highlighter-rouge">mqttVersion</code> Same as <code class="highlighter-rouge">MqttConnectOptions.setMqttVersion</code>.</li> +</ul> + +<h3 id="scala-api">Scala API</h3> + +<p>An example, for scala API to count words from incoming message stream.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>// Create DataFrame representing the stream of input lines from connection to mqtt server +val lines = spark.readStream + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", topic) + .load(brokerUrl).as[(String, Timestamp)] + +// Split the lines into words +val words = lines.map(_._1).flatMap(_.split(" ")) + +// Generate running word count +val wordCounts = words.groupBy("value").count() + +// Start running the query that prints the running counts to the console +val query = wordCounts.writeStream + .outputMode("complete") + .format("console") + .start() + +query.awaitTermination() +</code></pre> +</div> + +<p>Please see <code class="highlighter-rouge">MQTTStreamWordCount.scala</code> for full example.</p> + +<h3 id="java-api">Java API</h3> + +<p>An example, for Java API to count words from incoming message stream.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>// Create DataFrame representing the stream of input lines from connection to mqtt server. +Dataset<String> lines = spark + .readStream() + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", topic) + .load(brokerUrl).select("value").as(Encoders.STRING()); + +// Split the lines into words +Dataset<String> words = lines.flatMap(new FlatMapFunction<String, String>() { + @Override + public Iterator<String> call(String x) { + return Arrays.asList(x.split(" ")).iterator(); + } +}, Encoders.STRING()); + +// Generate running word count +Dataset<Row> wordCounts = words.groupBy("value").count(); + +// Start running the query that prints the running counts to the console +StreamingQuery query = wordCounts.writeStream() + .outputMode("complete") + .format("console") + .start(); + +query.awaitTermination(); +</code></pre> +</div> + +<p>Please see <code class="highlighter-rouge">JavaMQTTStreamWordCount.java</code> for full example.</p> + + </div> +</div> + + + + <hr> + + <!-- <p>© 2017 </p>--> + <footer class="site-footer"> + <div class="wrapper"> + <div class="footer-col-wrapper"> + + <div style="text-align:center;"> + + <div> + Copyright © 2016-2017 <a href="http://www.apache.org">The Apache Software Foundation</a>. + Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version + 2.0</a>. + <br> + + Apache and the Apache Feather logo are trademarks of The Apache Software Foundation. + + </div> + </div> + </div> + </div> +</footer> + + </div> + + + + + <script type="text/javascript"> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-79140859-1', 'bahir.apache.org'); + ga('require', 'linkid', 'linkid.js'); + ga('send', 'pageview'); + +</script> + + + + <script src="/assets/themes/apache-clean/jquery/jquery-2.1.1.min.js"></script> + + <script src="/assets/themes/apache-clean/bootstrap/js/bootstrap.min.js"></script> + + + </body> +</html> +