http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-sql-cloudant/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/current/spark-sql-cloudant/index.html b/content/docs/spark/current/spark-sql-cloudant/index.html new file mode 100644 index 0000000..1ed6ac4 --- /dev/null +++ b/content/docs/spark/current/spark-sql-cloudant/index.html @@ -0,0 +1,677 @@ + + +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>Spark Data Source for Apache CouchDB/Cloudant</title> + <meta name="description" content="Spark Data Source for Apache CouchDB/Cloudant"> + <meta name="author" content=""> + + <!-- Enable responsive viewport --> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <!-- Le HTML5 shim, for IE6-8 support of HTML elements --> + <!--[if lt IE 9]> + <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> + <![endif]--> + + <!-- Le styles --> + <link href="/assets/themes/apache-clean/bootstrap/css/bootstrap.css" rel="stylesheet"> + <link href="/assets/themes/apache-clean/css/style.css?body=1" rel="stylesheet" type="text/css"> + <link href="/assets/themes/apache-clean/css/syntax.css" rel="stylesheet" type="text/css" media="screen" /> + <!-- Le fav and touch icons --> + <!-- Update these with your own images + <link rel="shortcut icon" href="images/favicon.ico"> + <link rel="apple-touch-icon" href="images/apple-touch-icon.png"> + <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png"> + <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png"> + --> + + <!-- make tables sortable by adding class tag "sortable" to table elements --> + <script src="http://www.kryogenix.org/code/browser/sorttable/sorttable.js"></script> + + + </head> + + <body> + + + +<!-- Navigation --> +<div id="nav-bar"> + <nav id="nav-container" class="navbar navbar-inverse " role="navigation"> + <div class="container"> + <!-- Brand and toggle get grouped for better mobile display --> + + <div class="navbar-header page-scroll"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a class="navbar-brand page-scroll" href="/#home">Home</a> + </div> + <!-- Collect the nav links, forms, and other content for toggling --> + <nav class="navbar-collapse collapse" role="navigation"> + <ul class="nav navbar-nav"> + + + + <li id="download"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Download<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/downloads/spark" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/downloads/flink" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="community"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Community<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/community" target="_self">Get Involved</a></li> + + + <li><a href="/contributing" target="_self">Contributing</a></li> + + + <li><a href="/contributing-extensions" target="_self">Contributing Extensions</a></li> + + + <li><a href="https://issues.apache.org/jira/browse/BAHIR" target="_blank">Issue Tracker</a></li> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Source Code</a></li> + + + <li><a href="/community-members" target="_self">Project Committers</a></li> + + </ul> + + </li> + + + + + <li id="documentation"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Documentation<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/docs/spark/overview" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/docs/flink/overview" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="github"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">GitHub<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Bahir Spark Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-flink" target="_blank">Bahir Flink Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-website" target="_blank">Bahir Website</a></li> + + </ul> + + </li> + + + + + <li id="apache"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Apache<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="http://www.apache.org/foundation/how-it-works.html" target="_blank">Apache Software Foundation</a></li> + + + <li><a href="http://www.apache.org/licenses/" target="_blank">Apache License</a></li> + + + <li><a href="http://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></li> + + + <li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li> + + + <li><a href="/privacy-policy" target="_self">Privacy Policy</a></li> + + </ul> + + </li> + + + </ul> + </nav><!--/.navbar-collapse --> + <!-- /.navbar-collapse --> + </div> + <!-- /.container --> + </nav> +</div> + + + <div class="container"> + + + +<!--<div class="hero-unit Spark Data Source for Apache CouchDB/Cloudant"> + <h1></h1> +</div> +--> + +<div class="row"> + <div class="col-md-12"> + <!-- + +--> + +<p>A library for reading data from Cloudant or CouchDB databases using Spark SQL and Spark Streaming.</p> + +<p><a href="https://cloudant.com">IBM® Cloudant®</a> is a document-oriented DataBase as a Service (DBaaS). It stores data as documents +in JSON format. Itâs built with scalability, high availability, and durability in mind. It comes with a +wide variety of indexing options including map-reduce, Cloudant Query, full-text indexing, and +geospatial indexing. The replication capabilities make it easy to keep data in sync between database +clusters, desktop PCs, and mobile devices.</p> + +<p><a href="http://couchdb.apache.org">Apache CouchDBâ¢</a> is open source database software that focuses on ease of use and having an architecture that âcompletely embraces the Webâ. It has a document-oriented NoSQL database architecture and is implemented in the concurrency-oriented language Erlang; it uses JSON to store data, JavaScript as its query language using MapReduce, and HTTP for an API.</p> + +<h2 id="linking">Linking</h2> + +<p>Using SBT:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-sql-cloudant" % "2.2.0-SNAPSHOT" +</code></pre> +</div> + +<p>Using Maven:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code><dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-sql-cloudant_2.11</artifactId> + <version>2.2.0-SNAPSHOT</version> +</dependency> +</code></pre> +</div> + +<p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0-SNAPSHOT +</code></pre> +</div> + +<p>Unlike using <code class="highlighter-rouge">--jars</code>, using <code class="highlighter-rouge">--packages</code> ensures that this library and its dependencies will be added to the classpath. +The <code class="highlighter-rouge">--packages</code> argument can also be used with <code class="highlighter-rouge">bin/spark-submit</code>.</p> + +<p>Submit a job in Python:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>spark-submit --master local[4] --jars <path to cloudant-spark.jar> <path to python script> +</code></pre> +</div> + +<p>Submit a job in Scala:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>spark-submit --class "<your class>" --master local[4] --jars <path to cloudant-spark.jar> <path to your app jar> +</code></pre> +</div> + +<p>This library is compiled for Scala 2.11 only, and intends to support Spark 2.0 onwards.</p> + +<h2 id="configuration-options">Configuration options</h2> +<p>The configuration is obtained in the following sequence:</p> + +<ol> + <li>default in the Config, which is set in the application.conf</li> + <li>key in the SparkConf, which is set in SparkConf</li> + <li>key in the parameters, which is set in a dataframe or temporaty table options</li> + <li>âspark.â+key in the SparkConf (as they are treated as the one passed in through spark-submit using âconf option)</li> +</ol> + +<p>Here each subsequent configuration overrides the previous one. Thus, configuration set using DataFrame option overrides what has beens set in SparkConf. And configuration passed in spark-submit using âconf takes precedence over any setting in the code.</p> + +<h3 id="configuration-in-applicationconf">Configuration in application.conf</h3> +<p>Default values are defined in <a href="cloudant-spark-sql/src/main/resources/application.conf">here</a>.</p> + +<h3 id="configuration-on-sparkconf">Configuration on SparkConf</h3> + +<table> + <thead> + <tr> + <th>Name</th> + <th style="text-align: center">Default</th> + <th>Meaning</th> + </tr> + </thead> + <tbody> + <tr> + <td>cloudant.protocol</td> + <td style="text-align: center">https</td> + <td>protocol to use to transfer data: http or https</td> + </tr> + <tr> + <td>cloudant.host</td> + <td style="text-align: center"> </td> + <td>cloudant host url</td> + </tr> + <tr> + <td>cloudant.username</td> + <td style="text-align: center"> </td> + <td>cloudant userid</td> + </tr> + <tr> + <td>cloudant.password</td> + <td style="text-align: center"> </td> + <td>cloudant password</td> + </tr> + <tr> + <td>cloudant.useQuery</td> + <td style="text-align: center">false</td> + <td>By default, _all_docs endpoint is used if configuration âviewâ and âindexâ (see below) are not set. When useQuery is enabled, _find endpoint will be used in place of _all_docs when query condition is not on primary key field (_id), so that query predicates may be driven into datastore.</td> + </tr> + <tr> + <td>cloudant.queryLimit</td> + <td style="text-align: center">25</td> + <td>The maximum number of results returned when querying the _find endpoint.</td> + </tr> + <tr> + <td>jsonstore.rdd.partitions</td> + <td style="text-align: center">10</td> + <td>the number of partitions intent used to drive JsonStoreRDD loading query result in parallel. The actual number is calculated based on total rows returned and satisfying maxInPartition and minInPartition</td> + </tr> + <tr> + <td>jsonstore.rdd.maxInPartition</td> + <td style="text-align: center">-1</td> + <td>the max rows in a partition. -1 means unlimited</td> + </tr> + <tr> + <td>jsonstore.rdd.minInPartition</td> + <td style="text-align: center">10</td> + <td>the min rows in a partition.</td> + </tr> + <tr> + <td>jsonstore.rdd.requestTimeout</td> + <td style="text-align: center">900000</td> + <td>the request timeout in milliseconds</td> + </tr> + <tr> + <td>bulkSize</td> + <td style="text-align: center">200</td> + <td>the bulk save size</td> + </tr> + <tr> + <td>schemaSampleSize</td> + <td style="text-align: center">â-1â</td> + <td>the sample size for RDD schema discovery. 1 means we are using only first document for schema discovery; -1 means all documents; 0 will be treated as 1; any number N means min(N, total) docs</td> + </tr> + <tr> + <td>createDBOnSave</td> + <td style="text-align: center">âfalseâ</td> + <td>whether to create a new database during save operation. If false, a database should already exist. If true, a new database will be created. If true, and a database with a provided name already exists, an error will be raised.</td> + </tr> + </tbody> +</table> + +<h3 id="configuration-on-spark-sql-temporary-table-or-dataframe">Configuration on Spark SQL Temporary Table or DataFrame</h3> + +<p>Besides all the configurations passed to a temporary table or dataframe through SparkConf, it is also possible to set the following configurations in temporary table or dataframe using OPTIONS:</p> + +<table> + <thead> + <tr> + <th>Name</th> + <th style="text-align: center">Default</th> + <th>Meaning</th> + </tr> + </thead> + <tbody> + <tr> + <td>database</td> + <td style="text-align: center"> </td> + <td>cloudant database name</td> + </tr> + <tr> + <td>view</td> + <td style="text-align: center"> </td> + <td>cloudant view w/o the database name. only used for load.</td> + </tr> + <tr> + <td>index</td> + <td style="text-align: center"> </td> + <td>cloudant search index w/o the database name. only used for load data with less than or equal to 200 results.</td> + </tr> + <tr> + <td>path</td> + <td style="text-align: center"> </td> + <td>cloudant: as database name if database is not present</td> + </tr> + <tr> + <td>schemaSampleSize</td> + <td style="text-align: center">â-1â</td> + <td>the sample size used to discover the schema for this temp table. -1 scans all documents</td> + </tr> + <tr> + <td>bulkSize</td> + <td style="text-align: center">200</td> + <td>the bulk save size</td> + </tr> + <tr> + <td>createDBOnSave</td> + <td style="text-align: center">âfalseâ</td> + <td>whether to create a new database during save operation. If false, a database should already exist. If true, a new database will be created. If true, and a database with a provided name already exists, an error will be raised.</td> + </tr> + </tbody> +</table> + +<p>For fast loading, views are loaded without include_docs. Thus, a derived schema will always be: <code class="highlighter-rouge"><span class="p">{</span><span class="err">id,</span><span class="w"> </span><span class="err">key,</span><span class="w"> </span><span class="err">value</span><span class="p">}</span></code>, where <code class="highlighter-rouge">value </code>can be a compount field. An example of loading data from a view:</p> + +<div class="language-python highlighter-rouge"><pre class="highlight"><code><span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s">" CREATE TEMPORARY TABLE flightTable1 USING org.apache.bahir.cloudant OPTIONS ( database 'n_flight', view '_design/view/_view/AA0')"</span><span class="p">)</span> + +</code></pre> +</div> + +<h3 id="configuration-on-cloudant-receiver-for-spark-streaming">Configuration on Cloudant Receiver for Spark Streaming</h3> + +<table> + <thead> + <tr> + <th>Name</th> + <th style="text-align: center">Default</th> + <th>Meaning</th> + </tr> + </thead> + <tbody> + <tr> + <td>cloudant.host</td> + <td style="text-align: center"> </td> + <td>cloudant host url</td> + </tr> + <tr> + <td>cloudant.username</td> + <td style="text-align: center"> </td> + <td>cloudant userid</td> + </tr> + <tr> + <td>cloudant.password</td> + <td style="text-align: center"> </td> + <td>cloudant password</td> + </tr> + <tr> + <td>database</td> + <td style="text-align: center"> </td> + <td>cloudant database name</td> + </tr> + <tr> + <td>selector</td> + <td style="text-align: center">all documents</td> + <td>a selector written in Cloudant Query syntax, specifying conditions for selecting documents. Only documents satisfying the selectorâs conditions will be retrieved from Cloudant and loaded into Spark.</td> + </tr> + </tbody> +</table> + +<h3 id="configuration-in-spark-submit-using---conf-option">Configuration in spark-submit using âconf option</h3> + +<p>The above stated configuration keys can also be set using <code class="highlighter-rouge">spark-submit --conf</code> option. When passing configuration in spark-submit, make sure adding âspark.â as prefix to the keys.</p> + +<h2 id="examples">Examples</h2> + +<h3 id="python-api">Python API</h3> + +<h4 id="using-sql-in-python">Using SQL In Python</h4> + +<div class="language-python highlighter-rouge"><pre class="highlight"><code><span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span>\ + <span class="o">.</span><span class="n">builder</span>\ + <span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s">"Cloudant Spark SQL Example in Python using temp tables"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.host"</span><span class="p">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.username"</span><span class="p">,</span> <span class="s">"USERNAME"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.password"</span><span class="p">,</span><span class="s">"PASSWORD"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> + + +<span class="c"># Loading temp table from Cloudant db</span> +<span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s">" CREATE TEMPORARY TABLE airportTable USING org.apache.bahir.cloudant OPTIONS ( database 'n_airportcodemapping')"</span><span class="p">)</span> +<span class="n">airportData</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s">"SELECT _id, airportName FROM airportTable WHERE _id >= 'CAA' AND _id <= 'GAA' ORDER BY _id"</span><span class="p">)</span> +<span class="n">airportData</span><span class="o">.</span><span class="n">printSchema</span><span class="p">()</span> +<span class="k">print</span> <span class="s">'Total # of rows in airportData: '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">airportData</span><span class="o">.</span><span class="n">count</span><span class="p">())</span> +<span class="k">for</span> <span class="n">code</span> <span class="ow">in</span> <span class="n">airportData</span><span class="o">.</span><span class="n">collect</span><span class="p">():</span> + <span class="k">print</span> <span class="n">code</span><span class="o">.</span><span class="n">_id</span> +</code></pre> +</div> + +<p>See <a href="examples/python/CloudantApp.py">CloudantApp.py</a> for examples.</p> + +<p>Submit job example: +<code class="highlighter-rouge"> +spark-submit --packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0-SNAPSHOT --conf spark.cloudant.host=ACCOUNT.cloudant.com --conf spark.cloudant.username=USERNAME --conf spark.cloudant.password=PASSWORD sql-cloudant/examples/python/CloudantApp.py +</code></p> + +<h4 id="using-dataframe-in-python">Using DataFrame In Python</h4> + +<div class="language-python highlighter-rouge"><pre class="highlight"><code><span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span>\ + <span class="o">.</span><span class="n">builder</span>\ + <span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s">"Cloudant Spark SQL Example in Python using dataframes"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.host"</span><span class="p">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.username"</span><span class="p">,</span> <span class="s">"USERNAME"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"cloudant.password"</span><span class="p">,</span><span class="s">"PASSWORD"</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">config</span><span class="p">(</span><span class="s">"jsonstore.rdd.partitions"</span><span class="p">,</span> <span class="mi">8</span><span class="p">)</span>\ + <span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> + +<span class="c"># ***1. Loading dataframe from Cloudant db</span> +<span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">"n_airportcodemapping"</span><span class="p">,</span> <span class="s">"org.apache.bahir.cloudant"</span><span class="p">)</span> +<span class="n">df</span><span class="o">.</span><span class="n">cache</span><span class="p">()</span> +<span class="n">df</span><span class="o">.</span><span class="n">printSchema</span><span class="p">()</span> +<span class="n">df</span><span class="o">.</span><span class="nb">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">airportName</span> <span class="o">>=</span> <span class="s">'Moscow'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s">"_id"</span><span class="p">,</span><span class="s">'airportName'</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> +<span class="n">df</span><span class="o">.</span><span class="nb">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">_id</span> <span class="o">>=</span> <span class="s">'CAA'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s">"_id"</span><span class="p">,</span><span class="s">'airportName'</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> +</code></pre> +</div> + +<p>See <a href="examples/python/CloudantDF.py">CloudantDF.py</a> for examples.</p> + +<p>In case of doing multiple operations on a dataframe (select, filter etc.), +you should persist a dataframe. Otherwise, every operation on a dataframe will load the same data from Cloudant again. +Persisting will also speed up computation. This statement will persist an RDD in memory: <code class="highlighter-rouge">df.cache()</code>. Alternatively for large dbs to persist in memory & disk, use:</p> + +<div class="language-python highlighter-rouge"><pre class="highlight"><code><span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">StorageLevel</span> +<span class="n">df</span><span class="o">.</span><span class="n">persist</span><span class="p">(</span><span class="n">storageLevel</span> <span class="o">=</span> <span class="n">StorageLevel</span><span class="p">(</span><span class="bp">True</span><span class="p">,</span> <span class="bp">True</span><span class="p">,</span> <span class="bp">False</span><span class="p">,</span> <span class="bp">True</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> +</code></pre> +</div> + +<p><a href="examples/python/CloudantDFOption.py">Sample code</a> on using DataFrame option to define cloudant configuration</p> + +<h3 id="scala-api">Scala API</h3> + +<h4 id="using-sql-in-scala">Using SQL In Scala</h4> + +<div class="language-scala highlighter-rouge"><pre class="highlight"><code><span class="k">val</span> <span class="n">spark</span> <span class="k">=</span> <span class="nc">SparkSession</span> + <span class="o">.</span><span class="n">builder</span><span class="o">()</span> + <span class="o">.</span><span class="n">appName</span><span class="o">(</span><span class="s">"Cloudant Spark SQL Example"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.host"</span><span class="o">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.username"</span><span class="o">,</span> <span class="s">"USERNAME"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.password"</span><span class="o">,</span><span class="s">"PASSWORD"</span><span class="o">)</span> + <span class="o">.</span><span class="n">getOrCreate</span><span class="o">()</span> + +<span class="c1">// For implicit conversions of Dataframe to RDDs +</span><span class="k">import</span> <span class="nn">spark.implicits._</span> + +<span class="c1">// create a temp table from Cloudant db and query it using sql syntax +</span><span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="o">(</span> + <span class="n">s</span><span class="s">""" + |CREATE TEMPORARY TABLE airportTable + |USING org.apache.bahir.cloudant + |OPTIONS ( database 'n_airportcodemapping') + """</span><span class="o">.</span><span class="n">stripMargin</span><span class="o">)</span> +<span class="c1">// create a dataframe +</span><span class="k">val</span> <span class="n">airportData</span> <span class="k">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="o">(</span><span class="s">"SELECT _id, airportName FROM airportTable WHERE _id >= 'CAA' AND _id <= 'GAA' ORDER BY _id"</span><span class="o">)</span> +<span class="n">airportData</span><span class="o">.</span><span class="n">printSchema</span><span class="o">()</span> +<span class="n">println</span><span class="o">(</span><span class="n">s</span><span class="s">"Total # of rows in airportData: "</span> <span class="o">+</span> <span class="n">airportData</span><span class="o">.</span><span class="n">count</span><span class="o">())</span> +<span class="c1">// convert dataframe to array of Rows, and process each row +</span><span class="n">airportData</span><span class="o">.</span><span class="n">map</span><span class="o">(</span><span class="n">t</span> <span class="k">=></span> <span class="s">"code: "</span> <span class="o">+</span> <span class="n">t</span><span class="o">(</span><span class="mi">0</span><span class="o">)</span> <span class="o">+</span> <span class="s">",name:"</span> <span class="o">+</span> <span class="n">t</span><span class="o">(</span><span class="mi">1</span><span class="o">)).</span><span class="n">collect</span><span class="o">().</span><span class="n">foreach</span><span class="o">(</span><span class="n">println</span><span class="o">)</span> +</code></pre> +</div> +<p>See <a href="examples/scala/src/main/scala/mytest/spark/CloudantApp.scala">CloudantApp.scala</a> for examples.</p> + +<p>Submit job example: +<code class="highlighter-rouge"> +spark-submit --class org.apache.spark.examples.sql.cloudant.CloudantApp --packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0-SNAPSHOT --conf spark.cloudant.host=ACCOUNT.cloudant.com --conf spark.cloudant.username=USERNAME --conf spark.cloudant.password=PASSWORD /path/to/spark-sql-cloudant_2.11-2.2.0-SNAPSHOT-tests.jar +</code></p> + +<h3 id="using-dataframe-in-scala">Using DataFrame In Scala</h3> + +<div class="language-scala highlighter-rouge"><pre class="highlight"><code><span class="k">val</span> <span class="n">spark</span> <span class="k">=</span> <span class="nc">SparkSession</span> + <span class="o">.</span><span class="n">builder</span><span class="o">()</span> + <span class="o">.</span><span class="n">appName</span><span class="o">(</span><span class="s">"Cloudant Spark SQL Example with Dataframe"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.host"</span><span class="o">,</span><span class="s">"ACCOUNT.cloudant.com"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.username"</span><span class="o">,</span> <span class="s">"USERNAME"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"cloudant.password"</span><span class="o">,</span><span class="s">"PASSWORD"</span><span class="o">)</span> + <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"createDBOnSave"</span><span class="o">,</span><span class="s">"true"</span><span class="o">)</span> <span class="c1">// to create a db on save +</span> <span class="o">.</span><span class="n">config</span><span class="o">(</span><span class="s">"jsonstore.rdd.partitions"</span><span class="o">,</span> <span class="s">"20"</span><span class="o">)</span> <span class="c1">// using 20 partitions +</span> <span class="o">.</span><span class="n">getOrCreate</span><span class="o">()</span> + +<span class="c1">// 1. Loading data from Cloudant db +</span><span class="k">val</span> <span class="n">df</span> <span class="k">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">format</span><span class="o">(</span><span class="s">"org.apache.bahir.cloudant"</span><span class="o">).</span><span class="n">load</span><span class="o">(</span><span class="s">"n_flight"</span><span class="o">)</span> +<span class="c1">// Caching df in memory to speed computations +// and not to retrieve data from cloudant again +</span><span class="n">df</span><span class="o">.</span><span class="n">cache</span><span class="o">()</span> +<span class="n">df</span><span class="o">.</span><span class="n">printSchema</span><span class="o">()</span> + +<span class="c1">// 2. Saving dataframe to Cloudant db +</span><span class="k">val</span> <span class="n">df2</span> <span class="k">=</span> <span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="o">(</span><span class="n">df</span><span class="o">(</span><span class="s">"flightSegmentId"</span><span class="o">)</span> <span class="o">===</span> <span class="s">"AA106"</span><span class="o">)</span> + <span class="o">.</span><span class="n">select</span><span class="o">(</span><span class="s">"flightSegmentId"</span><span class="o">,</span><span class="s">"economyClassBaseCost"</span><span class="o">)</span> +<span class="n">df2</span><span class="o">.</span><span class="n">show</span><span class="o">()</span> +<span class="n">df2</span><span class="o">.</span><span class="n">write</span><span class="o">.</span><span class="n">format</span><span class="o">(</span><span class="s">"org.apache.bahir.cloudant"</span><span class="o">).</span><span class="n">save</span><span class="o">(</span><span class="s">"n_flight2"</span><span class="o">)</span> +</code></pre> +</div> + +<p>See <a href="examples/scala/src/main/scala/mytest/spark/CloudantDF.scala">CloudantDF.scala</a> for examples.</p> + +<p><a href="examples/scala/src/main/scala/mytest/spark/CloudantDFOption.scala">Sample code</a> on using DataFrame option to define Cloudant configuration.</p> + +<h3 id="using-streams-in-scala">Using Streams In Scala</h3> + +<div class="language-scala highlighter-rouge"><pre class="highlight"><code><span class="k">val</span> <span class="n">ssc</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">StreamingContext</span><span class="o">(</span><span class="n">sparkConf</span><span class="o">,</span> <span class="nc">Seconds</span><span class="o">(</span><span class="mi">10</span><span class="o">))</span> +<span class="k">val</span> <span class="n">changes</span> <span class="k">=</span> <span class="n">ssc</span><span class="o">.</span><span class="n">receiverStream</span><span class="o">(</span><span class="k">new</span> <span class="nc">CloudantReceiver</span><span class="o">(</span><span class="nc">Map</span><span class="o">(</span> + <span class="s">"cloudant.host"</span> <span class="o">-></span> <span class="s">"ACCOUNT.cloudant.com"</span><span class="o">,</span> + <span class="s">"cloudant.username"</span> <span class="o">-></span> <span class="s">"USERNAME"</span><span class="o">,</span> + <span class="s">"cloudant.password"</span> <span class="o">-></span> <span class="s">"PASSWORD"</span><span class="o">,</span> + <span class="s">"database"</span> <span class="o">-></span> <span class="s">"n_airportcodemapping"</span><span class="o">)))</span> + +<span class="n">changes</span><span class="o">.</span><span class="n">foreachRDD</span><span class="o">((</span><span class="n">rdd</span><span class="k">:</span> <span class="kt">RDD</span><span class="o">[</span><span class="kt">String</span><span class="o">],</span> <span class="n">time</span><span class="k">:</span> <span class="kt">Time</span><span class="o">)</span> <span class="k">=></span> <span class="o">{</span> + <span class="c1">// Get the singleton instance of SparkSession +</span> <span class="k">val</span> <span class="n">spark</span> <span class="k">=</span> <span class="nc">SparkSessionSingleton</span><span class="o">.</span><span class="n">getInstance</span><span class="o">(</span><span class="n">rdd</span><span class="o">.</span><span class="n">sparkContext</span><span class="o">.</span><span class="n">getConf</span><span class="o">)</span> + + <span class="n">println</span><span class="o">(</span><span class="n">s</span><span class="s">"========= $time ========="</span><span class="o">)</span> + <span class="c1">// Convert RDD[String] to DataFrame +</span> <span class="k">val</span> <span class="n">changesDataFrame</span> <span class="k">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">json</span><span class="o">(</span><span class="n">rdd</span><span class="o">)</span> + <span class="k">if</span> <span class="o">(!</span><span class="n">changesDataFrame</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">isEmpty</span><span class="o">)</span> <span class="o">{</span> + <span class="n">changesDataFrame</span><span class="o">.</span><span class="n">printSchema</span><span class="o">()</span> + <span class="n">changesDataFrame</span><span class="o">.</span><span class="n">select</span><span class="o">(</span><span class="s">"*"</span><span class="o">).</span><span class="n">show</span><span class="o">()</span> + <span class="o">....</span> + <span class="o">}</span> +<span class="o">})</span> +<span class="n">ssc</span><span class="o">.</span><span class="n">start</span><span class="o">()</span> +<span class="c1">// run streaming for 120 secs +</span><span class="nc">Thread</span><span class="o">.</span><span class="n">sleep</span><span class="o">(</span><span class="mi">120000L</span><span class="o">)</span> +<span class="n">ssc</span><span class="o">.</span><span class="n">stop</span><span class="o">(</span><span class="kc">true</span><span class="o">)</span> + +</code></pre> +</div> + +<p>See <a href="examples/scala/src/main/scala/mytest/spark/CloudantStreaming.scala">CloudantStreaming.scala</a> for examples.</p> + +<p>By default, Spark Streaming will load all documents from a database. If you want to limit the loading to +specific documents, use <code class="highlighter-rouge">selector</code> option of <code class="highlighter-rouge">CloudantReceiver</code> and specify your conditions +(See <a href="examples/scala/src/main/scala/mytest/spark/CloudantStreamingSelector.scala">CloudantStreamingSelector.scala</a> +example for more details):</p> + +<div class="language-scala highlighter-rouge"><pre class="highlight"><code><span class="k">val</span> <span class="n">changes</span> <span class="k">=</span> <span class="n">ssc</span><span class="o">.</span><span class="n">receiverStream</span><span class="o">(</span><span class="k">new</span> <span class="nc">CloudantReceiver</span><span class="o">(</span><span class="nc">Map</span><span class="o">(</span> + <span class="s">"cloudant.host"</span> <span class="o">-></span> <span class="s">"ACCOUNT.cloudant.com"</span><span class="o">,</span> + <span class="s">"cloudant.username"</span> <span class="o">-></span> <span class="s">"USERNAME"</span><span class="o">,</span> + <span class="s">"cloudant.password"</span> <span class="o">-></span> <span class="s">"PASSWORD"</span><span class="o">,</span> + <span class="s">"database"</span> <span class="o">-></span> <span class="s">"sales"</span><span class="o">,</span> + <span class="s">"selector"</span> <span class="o">-></span> <span class="s">"{\"month\":\"May\", \"rep\":\"John\"}"</span><span class="o">)))</span> +</code></pre> +</div> + + </div> +</div> + + + + <hr> + + <!-- <p>© 2017 </p>--> + <footer class="site-footer"> + <div class="wrapper"> + <div class="footer-col-wrapper"> + + <div style="text-align:center;"> + + <div> + Copyright © 2016-2017 <a href="http://www.apache.org">The Apache Software Foundation</a>. + Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version + 2.0</a>. + <br> + + Apache and the Apache Feather logo are trademarks of The Apache Software Foundation. + + </div> + </div> + </div> + </div> +</footer> + + </div> + + + + + <script type="text/javascript"> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-79140859-1', 'bahir.apache.org'); + ga('require', 'linkid', 'linkid.js'); + ga('send', 'pageview'); + +</script> + + + + <script src="/assets/themes/apache-clean/jquery/jquery-2.1.1.min.js"></script> + + <script src="/assets/themes/apache-clean/bootstrap/js/bootstrap.min.js"></script> + + + </body> +</html> +
http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-sql-streaming-akka/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/current/spark-sql-streaming-akka/index.html b/content/docs/spark/current/spark-sql-streaming-akka/index.html new file mode 100644 index 0000000..6fb3cce --- /dev/null +++ b/content/docs/spark/current/spark-sql-streaming-akka/index.html @@ -0,0 +1,380 @@ + + +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>Spark Structured Streaming Akka</title> + <meta name="description" content="Spark Structured Streaming Akka"> + <meta name="author" content=""> + + <!-- Enable responsive viewport --> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <!-- Le HTML5 shim, for IE6-8 support of HTML elements --> + <!--[if lt IE 9]> + <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> + <![endif]--> + + <!-- Le styles --> + <link href="/assets/themes/apache-clean/bootstrap/css/bootstrap.css" rel="stylesheet"> + <link href="/assets/themes/apache-clean/css/style.css?body=1" rel="stylesheet" type="text/css"> + <link href="/assets/themes/apache-clean/css/syntax.css" rel="stylesheet" type="text/css" media="screen" /> + <!-- Le fav and touch icons --> + <!-- Update these with your own images + <link rel="shortcut icon" href="images/favicon.ico"> + <link rel="apple-touch-icon" href="images/apple-touch-icon.png"> + <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png"> + <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png"> + --> + + <!-- make tables sortable by adding class tag "sortable" to table elements --> + <script src="http://www.kryogenix.org/code/browser/sorttable/sorttable.js"></script> + + + </head> + + <body> + + + +<!-- Navigation --> +<div id="nav-bar"> + <nav id="nav-container" class="navbar navbar-inverse " role="navigation"> + <div class="container"> + <!-- Brand and toggle get grouped for better mobile display --> + + <div class="navbar-header page-scroll"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a class="navbar-brand page-scroll" href="/#home">Home</a> + </div> + <!-- Collect the nav links, forms, and other content for toggling --> + <nav class="navbar-collapse collapse" role="navigation"> + <ul class="nav navbar-nav"> + + + + <li id="download"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Download<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/downloads/spark" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/downloads/flink" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="community"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Community<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/community" target="_self">Get Involved</a></li> + + + <li><a href="/contributing" target="_self">Contributing</a></li> + + + <li><a href="/contributing-extensions" target="_self">Contributing Extensions</a></li> + + + <li><a href="https://issues.apache.org/jira/browse/BAHIR" target="_blank">Issue Tracker</a></li> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Source Code</a></li> + + + <li><a href="/community-members" target="_self">Project Committers</a></li> + + </ul> + + </li> + + + + + <li id="documentation"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Documentation<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/docs/spark/overview" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/docs/flink/overview" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="github"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">GitHub<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Bahir Spark Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-flink" target="_blank">Bahir Flink Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-website" target="_blank">Bahir Website</a></li> + + </ul> + + </li> + + + + + <li id="apache"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Apache<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="http://www.apache.org/foundation/how-it-works.html" target="_blank">Apache Software Foundation</a></li> + + + <li><a href="http://www.apache.org/licenses/" target="_blank">Apache License</a></li> + + + <li><a href="http://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></li> + + + <li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li> + + + <li><a href="/privacy-policy" target="_self">Privacy Policy</a></li> + + </ul> + + </li> + + + </ul> + </nav><!--/.navbar-collapse --> + <!-- /.navbar-collapse --> + </div> + <!-- /.container --> + </nav> +</div> + + + <div class="container"> + + + +<!--<div class="hero-unit Spark Structured Streaming Akka"> + <h1></h1> +</div> +--> + +<div class="row"> + <div class="col-md-12"> + <!-- + +--> + +<p>A library for reading data from Akka Actors using Spark SQL Streaming ( or Structured streaming.).</p> + +<h2 id="linking">Linking</h2> + +<p>Using SBT:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-sql-streaming-akka" % "2.2.0-SNAPSHOT" +</code></pre> +</div> + +<p>Using Maven:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code><dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-sql-streaming-akka_2.11</artifactId> + <version>2.2.0-SNAPSHOT</version> +</dependency> +</code></pre> +</div> + +<p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option. +For example, to include it when starting the spark shell:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-sql-streaming-akka_2.11:2.2.0-SNAPSHOT +</code></pre> +</div> + +<p>Unlike using <code class="highlighter-rouge">--jars</code>, using <code class="highlighter-rouge">--packages</code> ensures that this library and its dependencies will be added to the classpath. +The <code class="highlighter-rouge">--packages</code> argument can also be used with <code class="highlighter-rouge">bin/spark-submit</code>.</p> + +<p>This library is compiled for Scala 2.11 only, and intends to support Spark 2.0 onwards.</p> + +<h2 id="examples">Examples</h2> + +<p>A SQL Stream can be created with data streams received from Akka Feeder actor using,</p> + +<div class="highlighter-rouge"><pre class="highlight"><code> sqlContext.readStream + .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider") + .option("urlOfPublisher", "feederActorUri") + .load() +</code></pre> +</div> + +<h2 id="enable-recovering-from-failures">Enable recovering from failures.</h2> + +<p>Setting values for option <code class="highlighter-rouge">persistenceDirPath</code> helps in recovering in case of a restart, by restoring the state where it left off before the shutdown.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code> sqlContext.readStream + .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider") + .option("urlOfPublisher", "feederActorUri") + .option("persistenceDirPath", "/path/to/localdir") + .load() +</code></pre> +</div> + +<h2 id="configuration-options">Configuration options.</h2> + +<p>This source uses <a href="http://doc.akka.io/api/akka/2.4/akka/actor/Actor.html">Akka Actor api</a>.</p> + +<ul> + <li><code class="highlighter-rouge">urlOfPublisher</code> The url of Publisher or Feeder actor that the Receiver actor connects to. Set this as the tcp url of the Publisher or Feeder actor.</li> + <li><code class="highlighter-rouge">persistenceDirPath</code> By default it is used for storing incoming messages on disk.</li> +</ul> + +<h3 id="scala-api">Scala API</h3> + +<p>An example, for scala API to count words from incoming message stream.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code> // Create DataFrame representing the stream of input lines from connection + // to publisher or feeder actor + val lines = spark.readStream + .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider") + .option("urlOfPublisher", urlOfPublisher) + .load().as[(String, Timestamp)] + + // Split the lines into words + val words = lines.map(_._1).flatMap(_.split(" ")) + + // Generate running word count + val wordCounts = words.groupBy("value").count() + + // Start running the query that prints the running counts to the console + val query = wordCounts.writeStream + .outputMode("complete") + .format("console") + .start() + + query.awaitTermination() +</code></pre> +</div> + +<p>Please see <code class="highlighter-rouge">AkkaStreamWordCount.scala</code> for full example.</p> + +<h3 id="java-api">Java API</h3> + +<p>An example, for Java API to count words from incoming message stream.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code> // Create DataFrame representing the stream of input lines from connection + // to publisher or feeder actor + Dataset<String> lines = spark + .readStream() + .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider") + .option("urlOfPublisher", urlOfPublisher) + .load().select("value").as(Encoders.STRING()); + + // Split the lines into words + Dataset<String> words = lines.flatMap(new FlatMapFunction<String, String>() { + @Override + public Iterator<String> call(String s) throws Exception { + return Arrays.asList(s.split(" ")).iterator(); + } + }, Encoders.STRING()); + + // Generate running word count + Dataset<Row> wordCounts = words.groupBy("value").count(); + + // Start running the query that prints the running counts to the console + StreamingQuery query = wordCounts.writeStream() + .outputMode("complete") + .format("console") + .start(); + + query.awaitTermination(); +</code></pre> +</div> + +<p>Please see <code class="highlighter-rouge">JavaAkkaStreamWordCount.java</code> for full example.</p> + + </div> +</div> + + + + <hr> + + <!-- <p>© 2017 </p>--> + <footer class="site-footer"> + <div class="wrapper"> + <div class="footer-col-wrapper"> + + <div style="text-align:center;"> + + <div> + Copyright © 2016-2017 <a href="http://www.apache.org">The Apache Software Foundation</a>. + Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version + 2.0</a>. + <br> + + Apache and the Apache Feather logo are trademarks of The Apache Software Foundation. + + </div> + </div> + </div> + </div> +</footer> + + </div> + + + + + <script type="text/javascript"> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-79140859-1', 'bahir.apache.org'); + ga('require', 'linkid', 'linkid.js'); + ga('send', 'pageview'); + +</script> + + + + <script src="/assets/themes/apache-clean/jquery/jquery-2.1.1.min.js"></script> + + <script src="/assets/themes/apache-clean/bootstrap/js/bootstrap.min.js"></script> + + + </body> +</html> + http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-sql-streaming-mqtt/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/current/spark-sql-streaming-mqtt/index.html b/content/docs/spark/current/spark-sql-streaming-mqtt/index.html index 4f8fc76..e9bf272 100644 --- a/content/docs/spark/current/spark-sql-streaming-mqtt/index.html +++ b/content/docs/spark/current/spark-sql-streaming-mqtt/index.html @@ -201,7 +201,7 @@ <p>Using SBT:</p> -<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-sql-streaming-mqtt" % "2.1.0" +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-sql-streaming-mqtt" % "2.1.0-SNAPSHOT" </code></pre> </div> @@ -210,7 +210,7 @@ <div class="highlighter-rouge"><pre class="highlight"><code><dependency> <groupId>org.apache.bahir</groupId> <artifactId>spark-sql-streaming-mqtt_2.11</artifactId> - <version>2.1.0</version> + <version>2.2.0-SNAPSHOT</version> </dependency> </code></pre> </div> @@ -218,7 +218,7 @@ <p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option. For example, to include it when starting the spark shell:</p> -<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-sql-streaming-mqtt_2.11:2.1.0 +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-sql-streaming-mqtt_2.11:2.1.0-SNAPSHOT </code></pre> </div> @@ -331,7 +331,6 @@ query.awaitTermination(); <p>Please see <code class="highlighter-rouge">JavaMQTTStreamWordCount.java</code> for full example.</p> - </div> </div> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-streaming-akka/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/current/spark-streaming-akka/index.html b/content/docs/spark/current/spark-streaming-akka/index.html index 78e2342..c32f7c5 100644 --- a/content/docs/spark/current/spark-streaming-akka/index.html +++ b/content/docs/spark/current/spark-streaming-akka/index.html @@ -201,7 +201,7 @@ <p>Using SBT:</p> -<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-streaming-akka" % "2.1.0" +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-streaming-akka" % "2.1.0-SNAPSHOT" </code></pre> </div> @@ -210,7 +210,7 @@ <div class="highlighter-rouge"><pre class="highlight"><code><dependency> <groupId>org.apache.bahir</groupId> <artifactId>spark-streaming-akka_2.11</artifactId> - <version>2.1.0</version> + <version>2.2.0-SNAPSHOT</version> </dependency> </code></pre> </div> @@ -218,7 +218,7 @@ <p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option. For example, to include it when starting the spark shell:</p> -<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-streaming-akka_2.11:2.1.0 +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-streaming-akka_2.11:2.1.0-SNAPSHOT </code></pre> </div> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-streaming-mqtt/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/current/spark-streaming-mqtt/index.html b/content/docs/spark/current/spark-streaming-mqtt/index.html index 0f512f7..c1b3f61 100644 --- a/content/docs/spark/current/spark-streaming-mqtt/index.html +++ b/content/docs/spark/current/spark-streaming-mqtt/index.html @@ -201,7 +201,7 @@ <p>Using SBT:</p> -<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-streaming-mqtt" % "2.1.0" +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-streaming-mqtt" % "2.1.0-SNAPSHOT" </code></pre> </div> @@ -210,7 +210,7 @@ <div class="highlighter-rouge"><pre class="highlight"><code><dependency> <groupId>org.apache.bahir</groupId> <artifactId>spark-streaming-mqtt_2.11</artifactId> - <version>2.1.0</version> + <version>2.2.0-SNAPSHOT</version> </dependency> </code></pre> </div> @@ -218,7 +218,7 @@ <p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option. For example, to include it when starting the spark shell:</p> -<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-streaming-mqtt_2.11:2.1.0 +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-streaming-mqtt_2.11:2.1.0-SNAPSHOT </code></pre> </div> @@ -235,6 +235,7 @@ The <code class="highlighter-rouge">--packages</code> argument can also be used <li><code class="highlighter-rouge">brokerUrl</code> A url MqttClient connects to. Set this as the url of the Mqtt Server. e.g. tcp://localhost:1883.</li> <li><code class="highlighter-rouge">storageLevel</code> By default it is used for storing incoming messages on disk.</li> <li><code class="highlighter-rouge">topic</code> Topic MqttClient subscribes to.</li> + <li><code class="highlighter-rouge">topics</code> List of topics MqttClient subscribes to.</li> <li><code class="highlighter-rouge">clientId</code> clientId, this client is assoicated with. Provide the same value to recover a stopped client.</li> <li><code class="highlighter-rouge">QoS</code> The maximum quality of service to subscribe each topic at. Messages published at a lower quality of service will be received at the published QoS. Messages published at a higher quality of service will be received using the QoS specified on the subscribe.</li> <li><code class="highlighter-rouge">username</code> Sets the user name to use for the connection to Mqtt Server. Do not set it, if server does not need this. Setting it empty will lead to errors.</li> @@ -253,12 +254,14 @@ The <code class="highlighter-rouge">--packages</code> argument can also be used this actor can be configured to handle failures, etc.</p> <div class="highlighter-rouge"><pre class="highlight"><code>val lines = MQTTUtils.createStream(ssc, brokerUrl, topic) +val lines = MQTTUtils.createPairedStream(ssc, brokerUrl, topic) </code></pre> </div> <p>Additional mqtt connection options can be provided:</p> <pre><code class="language-Scala">val lines = MQTTUtils.createStream(ssc, brokerUrl, topic, storageLevel, clientId, username, password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion) +val lines = MQTTUtils.createPairedStream(ssc, brokerUrl, topics, storageLevel, clientId, username, password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion) </code></pre> <h3 id="java-api">Java API</h3> @@ -267,6 +270,7 @@ this actor can be configured to handle failures, etc.</p> this actor can be configured to handle failures, etc.</p> <div class="highlighter-rouge"><pre class="highlight"><code>JavaDStream<String> lines = MQTTUtils.createStream(jssc, brokerUrl, topic); +JavaReceiverInputDStream<Tuple2<String, String>> lines = MQTTUtils.createPairedStream(jssc, brokerUrl, topics); </code></pre> </div> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-streaming-pubsub/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/current/spark-streaming-pubsub/index.html b/content/docs/spark/current/spark-streaming-pubsub/index.html new file mode 100644 index 0000000..d3786b4 --- /dev/null +++ b/content/docs/spark/current/spark-streaming-pubsub/index.html @@ -0,0 +1,308 @@ + + +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>Spark Streaming Google Pub-Sub</title> + <meta name="description" content="Spark Streaming Google Pub-Sub"> + <meta name="author" content=""> + + <!-- Enable responsive viewport --> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <!-- Le HTML5 shim, for IE6-8 support of HTML elements --> + <!--[if lt IE 9]> + <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> + <![endif]--> + + <!-- Le styles --> + <link href="/assets/themes/apache-clean/bootstrap/css/bootstrap.css" rel="stylesheet"> + <link href="/assets/themes/apache-clean/css/style.css?body=1" rel="stylesheet" type="text/css"> + <link href="/assets/themes/apache-clean/css/syntax.css" rel="stylesheet" type="text/css" media="screen" /> + <!-- Le fav and touch icons --> + <!-- Update these with your own images + <link rel="shortcut icon" href="images/favicon.ico"> + <link rel="apple-touch-icon" href="images/apple-touch-icon.png"> + <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png"> + <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png"> + --> + + <!-- make tables sortable by adding class tag "sortable" to table elements --> + <script src="http://www.kryogenix.org/code/browser/sorttable/sorttable.js"></script> + + + </head> + + <body> + + + +<!-- Navigation --> +<div id="nav-bar"> + <nav id="nav-container" class="navbar navbar-inverse " role="navigation"> + <div class="container"> + <!-- Brand and toggle get grouped for better mobile display --> + + <div class="navbar-header page-scroll"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a class="navbar-brand page-scroll" href="/#home">Home</a> + </div> + <!-- Collect the nav links, forms, and other content for toggling --> + <nav class="navbar-collapse collapse" role="navigation"> + <ul class="nav navbar-nav"> + + + + <li id="download"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Download<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/downloads/spark" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/downloads/flink" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="community"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Community<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/community" target="_self">Get Involved</a></li> + + + <li><a href="/contributing" target="_self">Contributing</a></li> + + + <li><a href="/contributing-extensions" target="_self">Contributing Extensions</a></li> + + + <li><a href="https://issues.apache.org/jira/browse/BAHIR" target="_blank">Issue Tracker</a></li> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Source Code</a></li> + + + <li><a href="/community-members" target="_self">Project Committers</a></li> + + </ul> + + </li> + + + + + <li id="documentation"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Documentation<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="/docs/spark/overview" target="_self">Bahir Spark Extensions</a></li> + + + <li><a href="/docs/flink/overview" target="_self">Bahir Flink Extensions</a></li> + + </ul> + + </li> + + + + + <li id="github"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">GitHub<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="https://github.com/apache/bahir" target="_blank">Bahir Spark Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-flink" target="_blank">Bahir Flink Extensions</a></li> + + + <li><a href="https://github.com/apache/bahir-website" target="_blank">Bahir Website</a></li> + + </ul> + + </li> + + + + + <li id="apache"> + + <a href="#" data-toggle="dropdown" class="dropdown-toggle">Apache<b class="caret"></b></a> + <ul class="dropdown-menu dropdown-left"> + + + <li><a href="http://www.apache.org/foundation/how-it-works.html" target="_blank">Apache Software Foundation</a></li> + + + <li><a href="http://www.apache.org/licenses/" target="_blank">Apache License</a></li> + + + <li><a href="http://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></li> + + + <li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li> + + + <li><a href="/privacy-policy" target="_self">Privacy Policy</a></li> + + </ul> + + </li> + + + </ul> + </nav><!--/.navbar-collapse --> + <!-- /.navbar-collapse --> + </div> + <!-- /.container --> + </nav> +</div> + + + <div class="container"> + + + +<!--<div class="hero-unit Spark Streaming Google Pub-Sub"> + <h1></h1> +</div> +--> + +<div class="row"> + <div class="col-md-12"> + <!-- + +--> + +<p>A library for reading data from <a href="https://cloud.google.com/pubsub/">Google Cloud Pub/Sub</a> using Spark Streaming.</p> + +<h2 id="linking">Linking</h2> + +<p>Using SBT:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-streaming-pubsub" % "2.2.0-SNAPSHOT" +</code></pre> +</div> + +<p>Using Maven:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code><dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-streaming-pubsub_2.11</artifactId> + <version>2.2.0-SNAPSHOT</version> +</dependency> +</code></pre> +</div> + +<p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option. +For example, to include it when starting the spark shell:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-streaming-pubsub_2.11:2.2.0-SNAPSHOT +</code></pre> +</div> + +<p>Unlike using <code class="highlighter-rouge">--jars</code>, using <code class="highlighter-rouge">--packages</code> ensures that this library and its dependencies will be added to the classpath. +The <code class="highlighter-rouge">--packages</code> argument can also be used with <code class="highlighter-rouge">bin/spark-submit</code>.</p> + +<h2 id="examples">Examples</h2> + +<p>First you need to create credential by SparkGCPCredentials, it support four type of credentials +* application default + <code class="highlighter-rouge">SparkGCPCredentials.builder.build()</code> +* json type service account + <code class="highlighter-rouge">SparkGCPCredentials.builder.jsonServiceAccount(PATH_TO_JSON_KEY).build()</code> +* p12 type service account + <code class="highlighter-rouge">SparkGCPCredentials.builder.p12ServiceAccount(PATH_TO_P12_KEY, EMAIL_ACCOUNT).build()</code> +* metadata service account(running on dataproc) + <code class="highlighter-rouge">SparkGCPCredentials.builder.metadataServiceAccount().build()</code></p> + +<h3 id="scala-api">Scala API</h3> + +<div class="highlighter-rouge"><pre class="highlight"><code>val lines = PubsubUtils.createStream(ssc, projectId, subscriptionName, credential, ..) +</code></pre> +</div> + +<h3 id="java-api">Java API</h3> + +<div class="highlighter-rouge"><pre class="highlight"><code>JavaDStream<SparkPubsubMessage> lines = PubsubUtils.createStream(jssc, projectId, subscriptionName, credential...) +</code></pre> +</div> + +<p>See end-to-end examples at <a href="streaming-pubsub/examples">Google Cloud Pubsub Examples</a></p> + + </div> +</div> + + + + <hr> + + <!-- <p>© 2017 </p>--> + <footer class="site-footer"> + <div class="wrapper"> + <div class="footer-col-wrapper"> + + <div style="text-align:center;"> + + <div> + Copyright © 2016-2017 <a href="http://www.apache.org">The Apache Software Foundation</a>. + Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version + 2.0</a>. + <br> + + Apache and the Apache Feather logo are trademarks of The Apache Software Foundation. + + </div> + </div> + </div> + </div> +</footer> + + </div> + + + + + <script type="text/javascript"> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-79140859-1', 'bahir.apache.org'); + ga('require', 'linkid', 'linkid.js'); + ga('send', 'pageview'); + +</script> + + + + <script src="/assets/themes/apache-clean/jquery/jquery-2.1.1.min.js"></script> + + <script src="/assets/themes/apache-clean/bootstrap/js/bootstrap.min.js"></script> + + + </body> +</html> + http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-streaming-twitter/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/current/spark-streaming-twitter/index.html b/content/docs/spark/current/spark-streaming-twitter/index.html index ccbde83..dc35cd3 100644 --- a/content/docs/spark/current/spark-streaming-twitter/index.html +++ b/content/docs/spark/current/spark-streaming-twitter/index.html @@ -201,7 +201,7 @@ <p>Using SBT:</p> -<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-streaming-twitter" % "2.1.0" +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-streaming-twitter" % "2.1.0-SNAPSHOT" </code></pre> </div> @@ -210,7 +210,7 @@ <div class="highlighter-rouge"><pre class="highlight"><code><dependency> <groupId>org.apache.bahir</groupId> <artifactId>spark-streaming-twitter_2.11</artifactId> - <version>2.1.0</version> + <version>2.2.0-SNAPSHOT</version> </dependency> </code></pre> </div> @@ -218,7 +218,7 @@ <p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option. For example, to include it when starting the spark shell:</p> -<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-streaming-twitter_2.11:2.1.0 +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-streaming-twitter_2.11:2.1.0-SNAPSHOT </code></pre> </div> @@ -248,7 +248,7 @@ TwitterUtils.createStream(jssc); </code></pre> </div> -<p>You can also either get the public stream, or get the filtered stream based on keywords. +<p>You can also either get the public stream, or get the filtered stream based on keywords. See end-to-end examples at <a href="https://github.com/apache/bahir/tree/master/streaming-twitter/examples">Twitter Examples</a></p> </div> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/current/spark-streaming-zeromq/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/current/spark-streaming-zeromq/index.html b/content/docs/spark/current/spark-streaming-zeromq/index.html index 1e2157d..c8d1e59 100644 --- a/content/docs/spark/current/spark-streaming-zeromq/index.html +++ b/content/docs/spark/current/spark-streaming-zeromq/index.html @@ -201,7 +201,7 @@ <p>Using SBT:</p> -<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-streaming-zeromq" % "2.1.0" +<div class="highlighter-rouge"><pre class="highlight"><code>libraryDependencies += "org.apache.bahir" %% "spark-streaming-zeromq" % "2.1.0-SNAPSHOT" </code></pre> </div> @@ -210,7 +210,7 @@ <div class="highlighter-rouge"><pre class="highlight"><code><dependency> <groupId>org.apache.bahir</groupId> <artifactId>spark-streaming-zeromq_2.11</artifactId> - <version>2.1.0</version> + <version>2.2.0-SNAPSHOT</version> </dependency> </code></pre> </div> @@ -218,7 +218,7 @@ <p>This library can also be added to Spark jobs launched through <code class="highlighter-rouge">spark-shell</code> or <code class="highlighter-rouge">spark-submit</code> by using the <code class="highlighter-rouge">--packages</code> command line option. For example, to include it when starting the spark shell:</p> -<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-streaming-zeromq_2.11:2.1.0 +<div class="highlighter-rouge"><pre class="highlight"><code>$ bin/spark-shell --packages org.apache.bahir:spark-streaming-zeromq_2.11:2.1.0-SNAPSHOT </code></pre> </div> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/docs/spark/overview/index.html ---------------------------------------------------------------------- diff --git a/content/docs/spark/overview/index.html b/content/docs/spark/overview/index.html index f102da9..31fa392 100644 --- a/content/docs/spark/overview/index.html +++ b/content/docs/spark/overview/index.html @@ -199,6 +199,7 @@ <ul> <li><a href="/docs/spark/current/documentation">Current - 2.2.0-SNAPSHOT</a></li> + <li><a href="/docs/spark/2.1.1/documentation">2.1.1</a></li> <li><a href="/docs/spark/2.1.0/documentation">2.1.0</a></li> <li><a href="/docs/spark/2.0.2/documentation">2.0.2</a></li> <li><a href="/docs/spark/2.0.1/documentation">2.0.1</a></li> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/downloads/flink/index.html ---------------------------------------------------------------------- diff --git a/content/downloads/flink/index.html b/content/downloads/flink/index.html index c96b31d..52744e3 100644 --- a/content/downloads/flink/index.html +++ b/content/downloads/flink/index.html @@ -201,15 +201,75 @@ <h2 id="bahir-extensions-for-apache-flink-latest-release">Bahir Extensions for Apache Flink Latest Release</h2> -<p>Currently, there isnât a release available for Bahir Flink Extensions yet.</p> - -<p>You can still retrieve the source files from our git repository by typing:</p> +<p>Our latest Apache Bahir release for Apache Flink extensions is 1.0, released on 05/24/2017.</p> + +<table class="table table-hover sortable"> + <thead> + <tr> + <th><b>Name</b></th> + <th><b>Archive</b></th> + <th><b>MD5</b></th> + <!--th><b>SHA-1</b></th--> + <th><b>signature</b></th> + </tr> + </thead> + <tbody> + <tr> + <td>Apache Bahir Flink Extensions 1.0 (tar.gz)</td> + <td><a href="http://www.apache.org/dyn/closer.lua/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.tgz">tar.gz</a></td> + <td><a href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.tgz.md5">MD5</a></td> + <!--td><a href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.tgz.sha1">SHA-1</a></td--> + <td><a href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.tgz.asc">ASC</a></td> + </tr> + <tr> + <td>Apache Bahir Flink Extensions 1.0 (zip)</td> + <td><a href="http://www.apache.org/dyn/closer.lua/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.zip">zip</a></td> + <td><a href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.zip.md5">MD5</a></td> + <!--td><a href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.zip.sha1">SHA-1</a></td--> + <td><a href="http://www.apache.org/dist/bahir/bahir-flink/1.0/apache-bahir-flink-1.0-src.zip.asc">ASC</a></td> + </tr> + <tr> + <td>Release Notes</td> + <td><a href="/releases/flink/1.0/release-notes">1.0</a></td> + <td></td> + <!--td></td--> + <td></td> + </tr> + </tbody> +</table> + +<p>You can also retrieve the source files from our git repository by typing:</p> <pre> git clone https://github.com/apache/bahir-flink cd bahir-flink +git checkout -b tags/v1.0 v1.0 </pre> +<h3 id="previous-releases">Previous Releases</h3> + +<p>All previous releases of Apache Bahir Flink Extensions can be found in the <a href="http://archive.apache.org/dist/bahir/bahir-flink">archives</a>.</p> + +<h2 id="verifying-a-release">Verifying a Release</h2> + +<p>Instructions for checking hashes and signatures is indicated on the <a href="http://www.apache.org/info/verification.html">Verifying Apache Software Foundation Releases</a> page.</p> + +<p>Choose a source distribution in either <em>tar</em> or <em>zip</em> format, +and <a href="http://www.apache.org/dyn/closer.cgi#verify">verify</a> +using the corresponding <em>pgp</em> signature (using the committer file in +<a href="http://www.apache.org/dist/bahir-flink/KEYS">KEYS</a>). +If you cannot do that, the <em>md5</em> hash file may be used to check that the +download has completed OK.</p> + +<p>For fast downloads, current source distributions are hosted on mirror servers; +older source distributions are in the +<a href="http://archive.apache.org/dist/bahir-flink/">archive</a>. +If a download from a mirror fails, retry, and the second download will likely +succeed.</p> + +<p>For security, hash and signature files are always hosted at +<a href="https://www.apache.org/dist">Apache</a>.</p> + </div> </div> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/downloads/spark/index.html ---------------------------------------------------------------------- diff --git a/content/downloads/spark/index.html b/content/downloads/spark/index.html index 7aa4e56..7c59b8f 100644 --- a/content/downloads/spark/index.html +++ b/content/downloads/spark/index.html @@ -199,9 +199,9 @@ <p>Please find below the latest releases of Apache Bahir for Apache Spark Extensions. Note that the binary artifacts for each platform are also published independently through Maven and each <a href="/docs/spark/overview">extension documentation page</a> describes how to add these artifacts to your application.</p> -<h2 id="bahir-extensions-for-apache-spark-210-release">Bahir Extensions for Apache Spark 2.1.0 Release</h2> +<h2 id="bahir-extensions-for-apache-spark-211-release">Bahir Extensions for Apache Spark 2.1.1 Release</h2> -<p>Our latest release supports Apache Spark 2.1.0, and was released on 02/22/2017.</p> +<p>Our latest release supports Apache Spark 2.1.1, and was released on 07/11/2017.</p> <table class="table table-hover sortable"> <thead> @@ -215,22 +215,22 @@ </thead> <tbody> <tr> - <td>Apache Bahir Extensions for Apache Spark 2.1.0 (tar.gz)</td> - <td><a href="https://www.apache.org/dyn/closer.lua/bahir/2.1.0/apache-bahir-2.1.0-src.tar.gz">tar.gz</a></td> - <td><a href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.tar.gz.md5">MD5</a></td> - <!--td><a href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.tar.gz.sha1">SHA-1</a></td--> - <td><a href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.tar.gz.asc">ASC</a></td> + <td>Apache Bahir Extensions for Apache Spark 2.1.1 (tar.gz)</td> + <td><a href="http://www.apache.org/dyn/closer.lua/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.tar.gz">tar.gz</a></td> + <td><a href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.tar.gz.md5">MD5</a></td> + <!--td><a href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.tar.gz.sha1">SHA-1</a></td--> + <td><a href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.tar.gz.asc">ASC</a></td> </tr> <tr> - <td>Apache Bahir Extensions for Apache Spark 2.1.0 (zip)</td> - <td><a href="https://www.apache.org/dyn/closer.lua/bahir/2.1.0/apache-bahir-2.1.0-src.zip">zip</a></td> - <td><a href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.zip.md5">MD5</a></td> - <!--td><a href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.zip.sha1">SHA-1</a></td--> - <td><a href="https://www.apache.org/dist/bahir/2.1.0/apache-bahir-2.1.0-src.zip.asc">ASC</a></td> + <td>Apache Bahir Extensions for Apache Spark 2.1.1 (zip)</td> + <td><a href="http://www.apache.org/dyn/closer.lua/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.zip">zip</a></td> + <td><a href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.zip.md5">MD5</a></td> + <!--td><a href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.zip.sha1">SHA-1</a></td--> + <td><a href="http://www.apache.org/dist/bahir/bahir-spark/2.1.1/apache-bahir-2.1.1-src.zip.asc">ASC</a></td> </tr> <tr> <td>Release Notes</td> - <td><a href="/releases/spark/2.1.0/release-notes">2.1.0</a></td> + <td><a href="/releases/spark/2.1.1/release-notes">2.1.1</a></td> <td></td> <!--td></td--> <td></td> @@ -243,7 +243,7 @@ <pre> git clone https://github.com/apache/bahir cd bahir -git checkout -b tags/v2.1.0 v2.1.0 +git checkout -b tags/v2.1.1 v2.1.1 </pre> <h3 id="previous-releases">Previous Releases</h3> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/7ad4d5a8/content/feed.xml ---------------------------------------------------------------------- diff --git a/content/feed.xml b/content/feed.xml index 624a87b..afe7441 100644 --- a/content/feed.xml +++ b/content/feed.xml @@ -6,8 +6,8 @@ </description> <link>http://bahir.apache.org/</link> <atom:link href="http://bahir.apache.org/feed.xml" rel="self" type="application/rss+xml"/> - <pubDate>Sat, 24 Jun 2017 05:25:24 -0700</pubDate> - <lastBuildDate>Sat, 24 Jun 2017 05:25:24 -0700</lastBuildDate> + <pubDate>Mon, 17 Jul 2017 10:26:27 -0700</pubDate> + <lastBuildDate>Mon, 17 Jul 2017 10:26:27 -0700</lastBuildDate> <generator>Jekyll v3.2.1</generator> <item>
