Repository: bahir-website Updated Branches: refs/heads/master 272652c5d -> bb4cd4b2a
Update website for Bahir release 2.0.2 - add release notes for version 2.0.2 - add Download link to navigation (exclude it from Home page) - add doc for 2.0.2 (generated by update-doc.sh), add link in navbar - move doc template files under 'content/docs/spark/templates' - add generated doc files under 'content/docs/spark/current' - modify update-doc.sh script to use new 'templates' folder - exclude 'templates' folder contents from being generated by Jekyll - unify heading sizes across pages - add generated _site folder to .gitignore file - update Copyright notices for 2017 Project: http://git-wip-us.apache.org/repos/asf/bahir-website/repo Commit: http://git-wip-us.apache.org/repos/asf/bahir-website/commit/bb4cd4b2 Tree: http://git-wip-us.apache.org/repos/asf/bahir-website/tree/bb4cd4b2 Diff: http://git-wip-us.apache.org/repos/asf/bahir-website/diff/bb4cd4b2 Branch: refs/heads/master Commit: bb4cd4b2a21d267ce599f2a53a5a21ad5fb5f881 Parents: 272652c Author: Christian Kadner <[email protected]> Authored: Sat Jan 28 01:28:35 2017 -0800 Committer: Christian Kadner <[email protected]> Committed: Sat Jan 28 02:58:09 2017 -0800 ---------------------------------------------------------------------- .gitignore | 1 + NOTICE | 2 +- site/_config.yml | 2 +- site/_data/navigation.yml | 5 + site/_data/project.yml | 4 +- site/_data/releases.yml | 3 + .../themes/apache-clean/_navigation.html | 3 + site/_includes/themes/apache-clean/footer.html | 2 +- site/_includes/themes/apache/footer.html | 2 +- site/community-members.md | 4 +- site/community.md | 12 +- site/docs/spark/2.0.2/documentation.md | 46 ++++++ .../spark/2.0.2/spark-sql-streaming-mqtt.md | 147 +++++++++++++++++++ site/docs/spark/2.0.2/spark-streaming-akka.md | 89 +++++++++++ site/docs/spark/2.0.2/spark-streaming-mqtt.md | 94 ++++++++++++ .../docs/spark/2.0.2/spark-streaming-twitter.md | 74 ++++++++++ site/docs/spark/2.0.2/spark-streaming-zeromq.md | 65 ++++++++ site/docs/spark/current/.gitignore | 1 - .../spark/current/spark-sql-streaming-mqtt.md | 147 +++++++++++++++++++ .../current/spark-sql-streaming-mqtt.template | 27 ---- site/docs/spark/current/spark-streaming-akka.md | 89 +++++++++++ .../spark/current/spark-streaming-akka.template | 26 ---- site/docs/spark/current/spark-streaming-mqtt.md | 94 ++++++++++++ .../spark/current/spark-streaming-mqtt.template | 27 ---- .../spark/current/spark-streaming-twitter.md | 74 ++++++++++ .../current/spark-streaming-twitter.template | 26 ---- .../spark/current/spark-streaming-zeromq.md | 65 ++++++++ .../current/spark-streaming-zeromq.template | 26 ---- .../templates/spark-sql-streaming-mqtt.template | 27 ++++ .../templates/spark-streaming-akka.template | 26 ++++ .../templates/spark-streaming-mqtt.template | 27 ++++ .../templates/spark-streaming-twitter.template | 26 ++++ .../templates/spark-streaming-zeromq.template | 26 ++++ site/download.md | 14 +- site/releases/2.0.0/release-notes.md | 69 +++++++++ site/releases/2.0.2/release-notes.md | 43 ++++++ update-doc.sh | 66 +++++++-- 37 files changed, 1315 insertions(+), 166 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/.gitignore ---------------------------------------------------------------------- diff --git a/.gitignore b/.gitignore index 31468f4..548691c 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ target/ *.log # ignore published content (after running ./publish.sh) +site/_site/ content/ # Others http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/NOTICE ---------------------------------------------------------------------- diff --git a/NOTICE b/NOTICE index a09223d..4127e40 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Apache Website Template -Copyright [2016] The Apache Software Foundation +Copyright [2016-2017] The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/_config.yml ---------------------------------------------------------------------- diff --git a/site/_config.yml b/site/_config.yml index f5c0e13..cdd65f0 100644 --- a/site/_config.yml +++ b/site/_config.yml @@ -29,7 +29,7 @@ excerpt_separator: "" repository: https://github.com/apache/bahir-website destination: _site -exclude: [README.md,Gemfile*] +exclude: [README.md,Gemfile*,"docs/spark/templates"] keep_files: [".git", ".svn", "apidocs"] # if 'analytics_on' is true, analytics section will be rendered on the HTML pages http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/_data/navigation.yml ---------------------------------------------------------------------- diff --git a/site/_data/navigation.yml b/site/_data/navigation.yml index c72a2cb..98376ce 100644 --- a/site/_data/navigation.yml +++ b/site/_data/navigation.yml @@ -30,6 +30,9 @@ topnav: +- title: Download + url: /download + - title: Community subcategories: - title: Get Involved @@ -49,6 +52,8 @@ topnav: subcategories: - title: Bahir Spark Extensions - Current url: /docs/spark/current/documentation + - title: Bahir Spark Extensions - 2.0.2 + url: /docs/spark/2.0.2/documentation - title: Bahir Spark Extensions - 2.0.1 url: /docs/spark/2.0.1/documentation - title: Bahir Spark Extensions - 2.0.0 http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/_data/project.yml ---------------------------------------------------------------------- diff --git a/site/_data/project.yml b/site/_data/project.yml index 4bde46a..9618694 100644 --- a/site/_data/project.yml +++ b/site/_data/project.yml @@ -22,8 +22,8 @@ github_project_name: bahir description: Apache Bahir provides extensions to distributed analytic platforms such as Apache Spark. download: /download -latest_release: 2.0.1 -latest_release_date: 11/27/2016 +latest_release: 2.0.2 +latest_release_date: 01/27/2017 dev_list: [email protected] dev_list_subscribe: [email protected] http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/_data/releases.yml ---------------------------------------------------------------------- diff --git a/site/_data/releases.yml b/site/_data/releases.yml index 2fcd101..d539950 100644 --- a/site/_data/releases.yml +++ b/site/_data/releases.yml @@ -14,6 +14,9 @@ # limitations under the License. # +- version: 2.0.2 + date: 01/27/2017 + - version: 2.0.1 date: 11/27/2016 http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/_includes/themes/apache-clean/_navigation.html ---------------------------------------------------------------------- diff --git a/site/_includes/themes/apache-clean/_navigation.html b/site/_includes/themes/apache-clean/_navigation.html index c1b917a..5fc7aa8 100644 --- a/site/_includes/themes/apache-clean/_navigation.html +++ b/site/_includes/themes/apache-clean/_navigation.html @@ -31,6 +31,8 @@ <nav class="navbar-collapse collapse" role="navigation"> <ul class="nav navbar-nav"> {% for entry in site.data.navigation.topnav %} + {% comment %}<!-- exclude Download from navbar on the Home page -->{% endcomment %} + {% if page.title != "Home" or entry.title != "Download" %} <li id="{{ entry.title | slugify }}"> {% if entry.subcategories %} <a href="#" data-toggle="dropdown" class="dropdown-toggle">{{ entry.title }}<b class="caret"></b></a> @@ -49,6 +51,7 @@ <a href="{{ entry.url }}" target="{{ target }}">{{ entry.title }}</a> {% endif %} </li> + {% endif %} {% endfor %} </ul> </nav><!--/.navbar-collapse --> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/_includes/themes/apache-clean/footer.html ---------------------------------------------------------------------- diff --git a/site/_includes/themes/apache-clean/footer.html b/site/_includes/themes/apache-clean/footer.html index 61de3b1..6070a43 100644 --- a/site/_includes/themes/apache-clean/footer.html +++ b/site/_includes/themes/apache-clean/footer.html @@ -18,7 +18,7 @@ </div> {% endif %} <div> - Copyright © 2016 <a href="http://www.apache.org">The Apache Software Foundation</a>. + Copyright © 2016-2017 <a href="http://www.apache.org">The Apache Software Foundation</a>. Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>. <br> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/_includes/themes/apache/footer.html ---------------------------------------------------------------------- diff --git a/site/_includes/themes/apache/footer.html b/site/_includes/themes/apache/footer.html index 2f6835c..5134ebc 100644 --- a/site/_includes/themes/apache/footer.html +++ b/site/_includes/themes/apache/footer.html @@ -18,7 +18,7 @@ </div> {% endif %} <div> - Copyright © 2016 <a href="http://www.apache.org">The Apache Software Foundation</a>. + Copyright © 2016-2017 <a href="http://www.apache.org">The Apache Software Foundation</a>. Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>. <br> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/community-members.md ---------------------------------------------------------------------- diff --git a/site/community-members.md b/site/community-members.md index 5f09c9c..2d8fcf6 100644 --- a/site/community-members.md +++ b/site/community-members.md @@ -25,9 +25,7 @@ limitations under the License. {% include JB/setup %} -<br/><br/><br/> - -### {{ site.data.project.short_name }} Team Members +# {{ site.data.project.short_name }} Team Members {% if site.data.contributors %} <table class="table table-hover sortable"> http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/community.md ---------------------------------------------------------------------- diff --git a/site/community.md b/site/community.md index e60b256..44a73f0 100644 --- a/site/community.md +++ b/site/community.md @@ -38,7 +38,7 @@ You can: Visit the [Contributing](/community) page for more information. -### Mailing list +## Mailing list Get help using {{ site.data.project.short_name }} or contribute to the project on our mailing lists: @@ -47,11 +47,11 @@ Get help using {{ site.data.project.short_name }} or contribute to the project o * [{{ site.data.project.commits_list }}](mailto:{{ site.data.project.commits_list }}) is for commit messages and patches to {{ site.data.project.short_name }}. [subscribe](mailto:{{ site.data.project.commits_list_subscribe }}?subject=send this email to subscribe), [unsubscribe](mailto:{{ site.data.project.commits_list_unsubscribe }}?subject=send this email to unsubscribe), [archives]({{ site.data.project.commits_list_archive_mailarchive }}) -### Issue tracker +## Issue tracker -#### Bug Reports +### Bug Reports Found bug? Enter an issue in the [Issue Tracker](https://issues.apache.org/jira/browse/{{ site.data.project.jira }}). @@ -63,7 +63,7 @@ Before submitting an issue, please: -#### Feature Requests +### Feature Requests Enhancement requests for new features are also welcome. The more concrete and rationale the request is, the greater the chance it will incorporated into future releases. @@ -71,11 +71,11 @@ Enhancement requests for new features are also welcome. The more concrete and ra [https://issues.apache.org/jira/browse/{{ site.data.project.jira }}](https://issues.apache.org/jira/browse/{{ site.data.project.jira }}) -### Source Code +## Source Code The project sources are accessible via the [source code repository]({{ site.data.project.source_repository }}) which is also mirrored in [GitHub]({{ site.data.project.source_repository_mirror }}) -### Website Source Code +## Website Source Code The project website sources are accessible via the [website source code repository]({{ site.data.project.website_repository }}) which is also mirrored in [GitHub]({{ site.data.project.website_repository_mirror }}) http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/2.0.2/documentation.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/2.0.2/documentation.md b/site/docs/spark/2.0.2/documentation.md new file mode 100644 index 0000000..38148e9 --- /dev/null +++ b/site/docs/spark/2.0.2/documentation.md @@ -0,0 +1,46 @@ +--- +layout: page +title: Extensions for Apache Spark +description: Extensions for Apache Spark +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + +### Apache Bahir Extensions for Apache Spark + +<br/> + +#### Structured Streaming Data Sources + +[MQTT data source](../spark-sql-streaming-mqtt) + +<br/> + +#### Discretized Streams (DStreams) Connectors + +[Akka connector](../spark-streaming-akka) + +[MQTT connector](../spark-streaming-mqtt) + +[Twitter connector](../spark-streaming-twitter) + +[ZeroMQ connector](../spark-streaming-zeromq) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/2.0.2/spark-sql-streaming-mqtt.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/2.0.2/spark-sql-streaming-mqtt.md b/site/docs/spark/2.0.2/spark-sql-streaming-mqtt.md new file mode 100644 index 0000000..a595565 --- /dev/null +++ b/site/docs/spark/2.0.2/spark-sql-streaming-mqtt.md @@ -0,0 +1,147 @@ +--- +layout: page +title: Spark Structured Streaming MQTT +description: Spark Structured Streaming MQTT +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + +A library for reading data from MQTT Servers using Spark SQL Streaming ( or Structured streaming.). + +## Linking + +Using SBT: + + libraryDependencies += "org.apache.bahir" %% "spark-sql-streaming-mqtt" % "2.0.2" + +Using Maven: + + <dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-sql-streaming-mqtt_2.11</artifactId> + <version>2.0.2</version> + </dependency> + +This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option. +For example, to include it when starting the spark shell: + + $ bin/spark-shell --packages org.apache.bahir:spark-sql-streaming-mqtt_2.11:2.0.2 + +Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath. +The `--packages` argument can also be used with `bin/spark-submit`. + +This library is compiled for Scala 2.11 only, and intends to support Spark 2.0 onwards. + +## Examples + +A SQL Stream can be created with data streams received through MQTT Server using, + + sqlContext.readStream + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", "mytopic") + .load("tcp://localhost:1883") + +## Enable recovering from failures. + +Setting values for option `localStorage` and `clientId` helps in recovering in case of a restart, by restoring the state where it left off before the shutdown. + + sqlContext.readStream + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", "mytopic") + .option("localStorage", "/path/to/localdir") + .option("clientId", "some-client-id") + .load("tcp://localhost:1883") + +## Configuration options. + +This source uses [Eclipse Paho Java Client](https://eclipse.org/paho/clients/java/). Client API documentation is located [here](http://www.eclipse.org/paho/files/javadoc/index.html). + + * `brokerUrl` A url MqttClient connects to. Set this or `path` as the url of the Mqtt Server. e.g. tcp://localhost:1883. + * `persistence` By default it is used for storing incoming messages on disk. If `memory` is provided as value for this option, then recovery on restart is not supported. + * `topic` Topic MqttClient subscribes to. + * `clientId` clientId, this client is assoicated with. Provide the same value to recover a stopped client. + * `QoS` The maximum quality of service to subscribe each topic at. Messages published at a lower quality of service will be received at the published QoS. Messages published at a higher quality of service will be received using the QoS specified on the subscribe. + * `username` Sets the user name to use for the connection to Mqtt Server. Do not set it, if server does not need this. Setting it empty will lead to errors. + * `password` Sets the password to use for the connection. + * `cleanSession` Setting it true starts a clean session, removes all checkpointed messages by a previous run of this source. This is set to false by default. + * `connectionTimeout` Sets the connection timeout, a value of 0 is interpretted as wait until client connects. See `MqttConnectOptions.setConnectionTimeout` for more information. + * `keepAlive` Same as `MqttConnectOptions.setKeepAliveInterval`. + * `mqttVersion` Same as `MqttConnectOptions.setMqttVersion`. + +### Scala API + +An example, for scala API to count words from incoming message stream. + + // Create DataFrame representing the stream of input lines from connection to mqtt server + val lines = spark.readStream + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", topic) + .load(brokerUrl).as[(String, Timestamp)] + + // Split the lines into words + val words = lines.map(_._1).flatMap(_.split(" ")) + + // Generate running word count + val wordCounts = words.groupBy("value").count() + + // Start running the query that prints the running counts to the console + val query = wordCounts.writeStream + .outputMode("complete") + .format("console") + .start() + + query.awaitTermination() + +Please see `MQTTStreamWordCount.scala` for full example. + +### Java API + +An example, for Java API to count words from incoming message stream. + + // Create DataFrame representing the stream of input lines from connection to mqtt server. + Dataset<String> lines = spark + .readStream() + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", topic) + .load(brokerUrl).select("value").as(Encoders.STRING()); + + // Split the lines into words + Dataset<String> words = lines.flatMap(new FlatMapFunction<String, String>() { + @Override + public Iterator<String> call(String x) { + return Arrays.asList(x.split(" ")).iterator(); + } + }, Encoders.STRING()); + + // Generate running word count + Dataset<Row> wordCounts = words.groupBy("value").count(); + + // Start running the query that prints the running counts to the console + StreamingQuery query = wordCounts.writeStream() + .outputMode("complete") + .format("console") + .start(); + + query.awaitTermination(); + +Please see `JavaMQTTStreamWordCount.java` for full example. + http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/2.0.2/spark-streaming-akka.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/2.0.2/spark-streaming-akka.md b/site/docs/spark/2.0.2/spark-streaming-akka.md new file mode 100644 index 0000000..ff8b7f9 --- /dev/null +++ b/site/docs/spark/2.0.2/spark-streaming-akka.md @@ -0,0 +1,89 @@ +--- +layout: page +title: Spark Streaming Akka +description: Spark Streaming Akka +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + +A library for reading data from Akka Actors using Spark Streaming. + +## Linking + +Using SBT: + + libraryDependencies += "org.apache.bahir" %% "spark-streaming-akka" % "2.0.2" + +Using Maven: + + <dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-streaming-akka_2.11</artifactId> + <version>2.0.2</version> + </dependency> + +This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option. +For example, to include it when starting the spark shell: + + $ bin/spark-shell --packages org.apache.bahir:spark-streaming_akka_2.11:2.0.2 + +Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath. +The `--packages` argument can also be used with `bin/spark-submit`. + +This library is cross-published for Scala 2.10 and Scala 2.11, so users should replace the proper Scala version (2.10 or 2.11) in the commands listed above. + +## Examples + +DStreams can be created with data streams received through Akka actors by using `AkkaUtils.createStream(ssc, actorProps, actor-name)`. + +### Scala API + +You need to extend `ActorReceiver` so as to store received data into Spark using `store(...)` methods. The supervisor strategy of +this actor can be configured to handle failures, etc. + + class CustomActor extends ActorReceiver { + def receive = { + case data: String => store(data) + } + } + + // A new input stream can be created with this custom actor as + val ssc: StreamingContext = ... + val lines = AkkaUtils.createStream[String](ssc, Props[CustomActor](), "CustomReceiver") + +### Java API + +You need to extend `JavaActorReceiver` so as to store received data into Spark using `store(...)` methods. The supervisor strategy of +this actor can be configured to handle failures, etc. + + class CustomActor extends JavaActorReceiver { + @Override + public void onReceive(Object msg) throws Exception { + store((String) msg); + } + } + + // A new input stream can be created with this custom actor as + JavaStreamingContext jssc = ...; + JavaDStream<String> lines = AkkaUtils.<String>createStream(jssc, Props.create(CustomActor.class), "CustomReceiver"); + +See end-to-end examples at [Akka Examples](https://github.com/apache/bahir/tree/master/streaming-akka/examples) http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/2.0.2/spark-streaming-mqtt.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/2.0.2/spark-streaming-mqtt.md b/site/docs/spark/2.0.2/spark-streaming-mqtt.md new file mode 100644 index 0000000..50c866d --- /dev/null +++ b/site/docs/spark/2.0.2/spark-streaming-mqtt.md @@ -0,0 +1,94 @@ +--- +layout: page +title: Spark Structured Streaming MQTT +description: Spark Structured Streaming MQTT +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + + +[MQTT](http://mqtt.org/) is MQTT is a machine-to-machine (M2M)/"Internet of Things" connectivity protocol. It was designed as an extremely lightweight publish/subscribe messaging transport. It is useful for connections with remote locations where a small code footprint is required and/or network bandwidth is at a premium. + +## Linking + +Using SBT: + + libraryDependencies += "org.apache.bahir" %% "spark-streaming-mqtt" % "2.0.2" + +Using Maven: + + <dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-streaming-mqtt_2.11</artifactId> + <version>2.0.2</version> + </dependency> + +This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option. +For example, to include it when starting the spark shell: + + $ bin/spark-shell --packages org.apache.bahir:spark-streaming_mqtt_2.11:2.0.2 + +Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath. +The `--packages` argument can also be used with `bin/spark-submit`. + +This library is cross-published for Scala 2.10 and Scala 2.11, so users should replace the proper Scala version (2.10 or 2.11) in the commands listed above. + +## Configuration options. + +This source uses the [Eclipse Paho Java Client](https://eclipse.org/paho/clients/java/). Client API documentation is located [here](http://www.eclipse.org/paho/files/javadoc/index.html). + + * `brokerUrl` A url MqttClient connects to. Set this as the url of the Mqtt Server. e.g. tcp://localhost:1883. + * `storageLevel` By default it is used for storing incoming messages on disk. + * `topic` Topic MqttClient subscribes to. + * `clientId` clientId, this client is assoicated with. Provide the same value to recover a stopped client. + * `QoS` The maximum quality of service to subscribe each topic at. Messages published at a lower quality of service will be received at the published QoS. Messages published at a higher quality of service will be received using the QoS specified on the subscribe. + * `username` Sets the user name to use for the connection to Mqtt Server. Do not set it, if server does not need this. Setting it empty will lead to errors. + * `password` Sets the password to use for the connection. + * `cleanSession` Setting it true starts a clean session, removes all checkpointed messages by a previous run of this source. This is set to false by default. + * `connectionTimeout` Sets the connection timeout, a value of 0 is interpreted as wait until client connects. See `MqttConnectOptions.setConnectionTimeout` for more information. + * `keepAlive` Same as `MqttConnectOptions.setKeepAliveInterval`. + * `mqttVersion` Same as `MqttConnectOptions.setMqttVersion`. + + +## Examples + +### Scala API + +You need to extend `ActorReceiver` so as to store received data into Spark using `store(...)` methods. The supervisor strategy of +this actor can be configured to handle failures, etc. + + val lines = MQTTUtils.createStream(ssc, brokerUrl, topic) + +Additional mqtt connection options can be provided: + +```Scala +val lines = MQTTUtils.createStream(ssc, brokerUrl, topic, storageLevel, clientId, username, password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion) +``` + +### Java API + +You need to extend `JavaActorReceiver` so as to store received data into Spark using `store(...)` methods. The supervisor strategy of +this actor can be configured to handle failures, etc. + + JavaDStream<String> lines = MQTTUtils.createStream(jssc, brokerUrl, topic); + +See end-to-end examples at [MQTT Examples](https://github.com/apache/bahir/tree/master/streaming-mqtt/examples) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/2.0.2/spark-streaming-twitter.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/2.0.2/spark-streaming-twitter.md b/site/docs/spark/2.0.2/spark-streaming-twitter.md new file mode 100644 index 0000000..190d25c --- /dev/null +++ b/site/docs/spark/2.0.2/spark-streaming-twitter.md @@ -0,0 +1,74 @@ +--- +layout: page +title: Spark Streaming Twitter +description: Spark Streaming Twitter +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + +A library for reading social data from [twitter](http://twitter.com/) using Spark Streaming. + +## Linking + +Using SBT: + + libraryDependencies += "org.apache.bahir" %% "spark-streaming-twitter" % "2.0.2" + +Using Maven: + + <dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-streaming-twitter_2.11</artifactId> + <version>2.0.2</version> + </dependency> + +This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option. +For example, to include it when starting the spark shell: + + $ bin/spark-shell --packages org.apache.bahir:spark-streaming_twitter_2.11:2.0.2 + +Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath. +The `--packages` argument can also be used with `bin/spark-submit`. + +This library is cross-published for Scala 2.10 and Scala 2.11, so users should replace the proper Scala version (2.10 or 2.11) in the commands listed above. + + +## Examples + +`TwitterUtils` uses Twitter4j to get the public stream of tweets using [Twitter's Streaming API](https://dev.twitter.com/docs/streaming-apis). Authentication information +can be provided by any of the [methods](http://twitter4j.org/en/configuration.html) supported by Twitter4J library. You can import the `TwitterUtils` class and create a DStream with `TwitterUtils.createStream` as shown below. + +### Scala API + + import org.apache.spark.streaming.twitter._ + + TwitterUtils.createStream(ssc, None) + +### Java API + + import org.apache.spark.streaming.twitter.*; + + TwitterUtils.createStream(jssc); + + +You can also either get the public stream, or get the filtered stream based on keywords. +See end-to-end examples at [Twitter Examples](https://github.com/apache/bahir/tree/master/streaming-twitter/examples) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/2.0.2/spark-streaming-zeromq.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/2.0.2/spark-streaming-zeromq.md b/site/docs/spark/2.0.2/spark-streaming-zeromq.md new file mode 100644 index 0000000..718725d --- /dev/null +++ b/site/docs/spark/2.0.2/spark-streaming-zeromq.md @@ -0,0 +1,65 @@ +--- +layout: page +title: Spark Streaming ZeroMQ +description: Spark Streaming ZeroMQ +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + +A library for reading data from [ZeroMQ](http://zeromq.org/) using Spark Streaming. + +## Linking + +Using SBT: + + libraryDependencies += "org.apache.bahir" %% "spark-streaming-zeromq" % "2.0.2" + +Using Maven: + + <dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-streaming-zeromq_2.11</artifactId> + <version>2.0.2</version> + </dependency> + +This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option. +For example, to include it when starting the spark shell: + + $ bin/spark-shell --packages org.apache.bahir:spark-streaming_zeromq_2.11:2.0.2 + +Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath. +The `--packages` argument can also be used with `bin/spark-submit`. + +This library is cross-published for Scala 2.10 and Scala 2.11, so users should replace the proper Scala version (2.10 or 2.11) in the commands listed above. + +## Examples + + +### Scala API + + val lines = ZeroMQUtils.createStream(ssc, ...) + +### Java API + + JavaDStream<String> lines = ZeroMQUtils.createStream(jssc, ...); + +See end-to-end examples at [ZeroMQ Examples](https://github.com/apache/bahir/tree/master/streaming-zeromq/examples) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/.gitignore ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/.gitignore b/site/docs/spark/current/.gitignore deleted file mode 100644 index 6b7415a..0000000 --- a/site/docs/spark/current/.gitignore +++ /dev/null @@ -1 +0,0 @@ -spark*.md http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/spark-sql-streaming-mqtt.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/spark-sql-streaming-mqtt.md b/site/docs/spark/current/spark-sql-streaming-mqtt.md new file mode 100644 index 0000000..a595565 --- /dev/null +++ b/site/docs/spark/current/spark-sql-streaming-mqtt.md @@ -0,0 +1,147 @@ +--- +layout: page +title: Spark Structured Streaming MQTT +description: Spark Structured Streaming MQTT +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + +A library for reading data from MQTT Servers using Spark SQL Streaming ( or Structured streaming.). + +## Linking + +Using SBT: + + libraryDependencies += "org.apache.bahir" %% "spark-sql-streaming-mqtt" % "2.0.2" + +Using Maven: + + <dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-sql-streaming-mqtt_2.11</artifactId> + <version>2.0.2</version> + </dependency> + +This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option. +For example, to include it when starting the spark shell: + + $ bin/spark-shell --packages org.apache.bahir:spark-sql-streaming-mqtt_2.11:2.0.2 + +Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath. +The `--packages` argument can also be used with `bin/spark-submit`. + +This library is compiled for Scala 2.11 only, and intends to support Spark 2.0 onwards. + +## Examples + +A SQL Stream can be created with data streams received through MQTT Server using, + + sqlContext.readStream + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", "mytopic") + .load("tcp://localhost:1883") + +## Enable recovering from failures. + +Setting values for option `localStorage` and `clientId` helps in recovering in case of a restart, by restoring the state where it left off before the shutdown. + + sqlContext.readStream + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", "mytopic") + .option("localStorage", "/path/to/localdir") + .option("clientId", "some-client-id") + .load("tcp://localhost:1883") + +## Configuration options. + +This source uses [Eclipse Paho Java Client](https://eclipse.org/paho/clients/java/). Client API documentation is located [here](http://www.eclipse.org/paho/files/javadoc/index.html). + + * `brokerUrl` A url MqttClient connects to. Set this or `path` as the url of the Mqtt Server. e.g. tcp://localhost:1883. + * `persistence` By default it is used for storing incoming messages on disk. If `memory` is provided as value for this option, then recovery on restart is not supported. + * `topic` Topic MqttClient subscribes to. + * `clientId` clientId, this client is assoicated with. Provide the same value to recover a stopped client. + * `QoS` The maximum quality of service to subscribe each topic at. Messages published at a lower quality of service will be received at the published QoS. Messages published at a higher quality of service will be received using the QoS specified on the subscribe. + * `username` Sets the user name to use for the connection to Mqtt Server. Do not set it, if server does not need this. Setting it empty will lead to errors. + * `password` Sets the password to use for the connection. + * `cleanSession` Setting it true starts a clean session, removes all checkpointed messages by a previous run of this source. This is set to false by default. + * `connectionTimeout` Sets the connection timeout, a value of 0 is interpretted as wait until client connects. See `MqttConnectOptions.setConnectionTimeout` for more information. + * `keepAlive` Same as `MqttConnectOptions.setKeepAliveInterval`. + * `mqttVersion` Same as `MqttConnectOptions.setMqttVersion`. + +### Scala API + +An example, for scala API to count words from incoming message stream. + + // Create DataFrame representing the stream of input lines from connection to mqtt server + val lines = spark.readStream + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", topic) + .load(brokerUrl).as[(String, Timestamp)] + + // Split the lines into words + val words = lines.map(_._1).flatMap(_.split(" ")) + + // Generate running word count + val wordCounts = words.groupBy("value").count() + + // Start running the query that prints the running counts to the console + val query = wordCounts.writeStream + .outputMode("complete") + .format("console") + .start() + + query.awaitTermination() + +Please see `MQTTStreamWordCount.scala` for full example. + +### Java API + +An example, for Java API to count words from incoming message stream. + + // Create DataFrame representing the stream of input lines from connection to mqtt server. + Dataset<String> lines = spark + .readStream() + .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") + .option("topic", topic) + .load(brokerUrl).select("value").as(Encoders.STRING()); + + // Split the lines into words + Dataset<String> words = lines.flatMap(new FlatMapFunction<String, String>() { + @Override + public Iterator<String> call(String x) { + return Arrays.asList(x.split(" ")).iterator(); + } + }, Encoders.STRING()); + + // Generate running word count + Dataset<Row> wordCounts = words.groupBy("value").count(); + + // Start running the query that prints the running counts to the console + StreamingQuery query = wordCounts.writeStream() + .outputMode("complete") + .format("console") + .start(); + + query.awaitTermination(); + +Please see `JavaMQTTStreamWordCount.java` for full example. + http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/spark-sql-streaming-mqtt.template ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/spark-sql-streaming-mqtt.template b/site/docs/spark/current/spark-sql-streaming-mqtt.template deleted file mode 100644 index fd08baa..0000000 --- a/site/docs/spark/current/spark-sql-streaming-mqtt.template +++ /dev/null @@ -1,27 +0,0 @@ ---- -layout: page -title: Spark Structured Streaming MQTT -description: Spark Structured Streaming MQTT -group: nav-right ---- -<!-- -{% comment %} -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to you under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -{% endcomment %} ---> - -{% include JB/setup %} - http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/spark-streaming-akka.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/spark-streaming-akka.md b/site/docs/spark/current/spark-streaming-akka.md new file mode 100644 index 0000000..ff8b7f9 --- /dev/null +++ b/site/docs/spark/current/spark-streaming-akka.md @@ -0,0 +1,89 @@ +--- +layout: page +title: Spark Streaming Akka +description: Spark Streaming Akka +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + +A library for reading data from Akka Actors using Spark Streaming. + +## Linking + +Using SBT: + + libraryDependencies += "org.apache.bahir" %% "spark-streaming-akka" % "2.0.2" + +Using Maven: + + <dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-streaming-akka_2.11</artifactId> + <version>2.0.2</version> + </dependency> + +This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option. +For example, to include it when starting the spark shell: + + $ bin/spark-shell --packages org.apache.bahir:spark-streaming_akka_2.11:2.0.2 + +Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath. +The `--packages` argument can also be used with `bin/spark-submit`. + +This library is cross-published for Scala 2.10 and Scala 2.11, so users should replace the proper Scala version (2.10 or 2.11) in the commands listed above. + +## Examples + +DStreams can be created with data streams received through Akka actors by using `AkkaUtils.createStream(ssc, actorProps, actor-name)`. + +### Scala API + +You need to extend `ActorReceiver` so as to store received data into Spark using `store(...)` methods. The supervisor strategy of +this actor can be configured to handle failures, etc. + + class CustomActor extends ActorReceiver { + def receive = { + case data: String => store(data) + } + } + + // A new input stream can be created with this custom actor as + val ssc: StreamingContext = ... + val lines = AkkaUtils.createStream[String](ssc, Props[CustomActor](), "CustomReceiver") + +### Java API + +You need to extend `JavaActorReceiver` so as to store received data into Spark using `store(...)` methods. The supervisor strategy of +this actor can be configured to handle failures, etc. + + class CustomActor extends JavaActorReceiver { + @Override + public void onReceive(Object msg) throws Exception { + store((String) msg); + } + } + + // A new input stream can be created with this custom actor as + JavaStreamingContext jssc = ...; + JavaDStream<String> lines = AkkaUtils.<String>createStream(jssc, Props.create(CustomActor.class), "CustomReceiver"); + +See end-to-end examples at [Akka Examples](https://github.com/apache/bahir/tree/master/streaming-akka/examples) http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/spark-streaming-akka.template ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/spark-streaming-akka.template b/site/docs/spark/current/spark-streaming-akka.template deleted file mode 100644 index 78b7d14..0000000 --- a/site/docs/spark/current/spark-streaming-akka.template +++ /dev/null @@ -1,26 +0,0 @@ ---- -layout: page -title: Spark Streaming Akka -description: Spark Streaming Akka -group: nav-right ---- -<!-- -{% comment %} -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to you under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -{% endcomment %} ---> - -{% include JB/setup %} http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/spark-streaming-mqtt.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/spark-streaming-mqtt.md b/site/docs/spark/current/spark-streaming-mqtt.md new file mode 100644 index 0000000..50c866d --- /dev/null +++ b/site/docs/spark/current/spark-streaming-mqtt.md @@ -0,0 +1,94 @@ +--- +layout: page +title: Spark Structured Streaming MQTT +description: Spark Structured Streaming MQTT +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + + +[MQTT](http://mqtt.org/) is MQTT is a machine-to-machine (M2M)/"Internet of Things" connectivity protocol. It was designed as an extremely lightweight publish/subscribe messaging transport. It is useful for connections with remote locations where a small code footprint is required and/or network bandwidth is at a premium. + +## Linking + +Using SBT: + + libraryDependencies += "org.apache.bahir" %% "spark-streaming-mqtt" % "2.0.2" + +Using Maven: + + <dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-streaming-mqtt_2.11</artifactId> + <version>2.0.2</version> + </dependency> + +This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option. +For example, to include it when starting the spark shell: + + $ bin/spark-shell --packages org.apache.bahir:spark-streaming_mqtt_2.11:2.0.2 + +Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath. +The `--packages` argument can also be used with `bin/spark-submit`. + +This library is cross-published for Scala 2.10 and Scala 2.11, so users should replace the proper Scala version (2.10 or 2.11) in the commands listed above. + +## Configuration options. + +This source uses the [Eclipse Paho Java Client](https://eclipse.org/paho/clients/java/). Client API documentation is located [here](http://www.eclipse.org/paho/files/javadoc/index.html). + + * `brokerUrl` A url MqttClient connects to. Set this as the url of the Mqtt Server. e.g. tcp://localhost:1883. + * `storageLevel` By default it is used for storing incoming messages on disk. + * `topic` Topic MqttClient subscribes to. + * `clientId` clientId, this client is assoicated with. Provide the same value to recover a stopped client. + * `QoS` The maximum quality of service to subscribe each topic at. Messages published at a lower quality of service will be received at the published QoS. Messages published at a higher quality of service will be received using the QoS specified on the subscribe. + * `username` Sets the user name to use for the connection to Mqtt Server. Do not set it, if server does not need this. Setting it empty will lead to errors. + * `password` Sets the password to use for the connection. + * `cleanSession` Setting it true starts a clean session, removes all checkpointed messages by a previous run of this source. This is set to false by default. + * `connectionTimeout` Sets the connection timeout, a value of 0 is interpreted as wait until client connects. See `MqttConnectOptions.setConnectionTimeout` for more information. + * `keepAlive` Same as `MqttConnectOptions.setKeepAliveInterval`. + * `mqttVersion` Same as `MqttConnectOptions.setMqttVersion`. + + +## Examples + +### Scala API + +You need to extend `ActorReceiver` so as to store received data into Spark using `store(...)` methods. The supervisor strategy of +this actor can be configured to handle failures, etc. + + val lines = MQTTUtils.createStream(ssc, brokerUrl, topic) + +Additional mqtt connection options can be provided: + +```Scala +val lines = MQTTUtils.createStream(ssc, brokerUrl, topic, storageLevel, clientId, username, password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion) +``` + +### Java API + +You need to extend `JavaActorReceiver` so as to store received data into Spark using `store(...)` methods. The supervisor strategy of +this actor can be configured to handle failures, etc. + + JavaDStream<String> lines = MQTTUtils.createStream(jssc, brokerUrl, topic); + +See end-to-end examples at [MQTT Examples](https://github.com/apache/bahir/tree/master/streaming-mqtt/examples) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/spark-streaming-mqtt.template ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/spark-streaming-mqtt.template b/site/docs/spark/current/spark-streaming-mqtt.template deleted file mode 100644 index fd08baa..0000000 --- a/site/docs/spark/current/spark-streaming-mqtt.template +++ /dev/null @@ -1,27 +0,0 @@ ---- -layout: page -title: Spark Structured Streaming MQTT -description: Spark Structured Streaming MQTT -group: nav-right ---- -<!-- -{% comment %} -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to you under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -{% endcomment %} ---> - -{% include JB/setup %} - http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/spark-streaming-twitter.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/spark-streaming-twitter.md b/site/docs/spark/current/spark-streaming-twitter.md new file mode 100644 index 0000000..190d25c --- /dev/null +++ b/site/docs/spark/current/spark-streaming-twitter.md @@ -0,0 +1,74 @@ +--- +layout: page +title: Spark Streaming Twitter +description: Spark Streaming Twitter +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + +A library for reading social data from [twitter](http://twitter.com/) using Spark Streaming. + +## Linking + +Using SBT: + + libraryDependencies += "org.apache.bahir" %% "spark-streaming-twitter" % "2.0.2" + +Using Maven: + + <dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-streaming-twitter_2.11</artifactId> + <version>2.0.2</version> + </dependency> + +This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option. +For example, to include it when starting the spark shell: + + $ bin/spark-shell --packages org.apache.bahir:spark-streaming_twitter_2.11:2.0.2 + +Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath. +The `--packages` argument can also be used with `bin/spark-submit`. + +This library is cross-published for Scala 2.10 and Scala 2.11, so users should replace the proper Scala version (2.10 or 2.11) in the commands listed above. + + +## Examples + +`TwitterUtils` uses Twitter4j to get the public stream of tweets using [Twitter's Streaming API](https://dev.twitter.com/docs/streaming-apis). Authentication information +can be provided by any of the [methods](http://twitter4j.org/en/configuration.html) supported by Twitter4J library. You can import the `TwitterUtils` class and create a DStream with `TwitterUtils.createStream` as shown below. + +### Scala API + + import org.apache.spark.streaming.twitter._ + + TwitterUtils.createStream(ssc, None) + +### Java API + + import org.apache.spark.streaming.twitter.*; + + TwitterUtils.createStream(jssc); + + +You can also either get the public stream, or get the filtered stream based on keywords. +See end-to-end examples at [Twitter Examples](https://github.com/apache/bahir/tree/master/streaming-twitter/examples) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/spark-streaming-twitter.template ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/spark-streaming-twitter.template b/site/docs/spark/current/spark-streaming-twitter.template deleted file mode 100644 index 9a97ced..0000000 --- a/site/docs/spark/current/spark-streaming-twitter.template +++ /dev/null @@ -1,26 +0,0 @@ ---- -layout: page -title: Spark Streaming Twitter -description: Spark Streaming Twitter -group: nav-right ---- -<!-- -{% comment %} -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to you under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -{% endcomment %} ---> - -{% include JB/setup %} http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/spark-streaming-zeromq.md ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/spark-streaming-zeromq.md b/site/docs/spark/current/spark-streaming-zeromq.md new file mode 100644 index 0000000..718725d --- /dev/null +++ b/site/docs/spark/current/spark-streaming-zeromq.md @@ -0,0 +1,65 @@ +--- +layout: page +title: Spark Streaming ZeroMQ +description: Spark Streaming ZeroMQ +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + +A library for reading data from [ZeroMQ](http://zeromq.org/) using Spark Streaming. + +## Linking + +Using SBT: + + libraryDependencies += "org.apache.bahir" %% "spark-streaming-zeromq" % "2.0.2" + +Using Maven: + + <dependency> + <groupId>org.apache.bahir</groupId> + <artifactId>spark-streaming-zeromq_2.11</artifactId> + <version>2.0.2</version> + </dependency> + +This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option. +For example, to include it when starting the spark shell: + + $ bin/spark-shell --packages org.apache.bahir:spark-streaming_zeromq_2.11:2.0.2 + +Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath. +The `--packages` argument can also be used with `bin/spark-submit`. + +This library is cross-published for Scala 2.10 and Scala 2.11, so users should replace the proper Scala version (2.10 or 2.11) in the commands listed above. + +## Examples + + +### Scala API + + val lines = ZeroMQUtils.createStream(ssc, ...) + +### Java API + + JavaDStream<String> lines = ZeroMQUtils.createStream(jssc, ...); + +See end-to-end examples at [ZeroMQ Examples](https://github.com/apache/bahir/tree/master/streaming-zeromq/examples) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/current/spark-streaming-zeromq.template ---------------------------------------------------------------------- diff --git a/site/docs/spark/current/spark-streaming-zeromq.template b/site/docs/spark/current/spark-streaming-zeromq.template deleted file mode 100644 index 6a6a02a..0000000 --- a/site/docs/spark/current/spark-streaming-zeromq.template +++ /dev/null @@ -1,26 +0,0 @@ ---- -layout: page -title: Spark Streaming ZeroMQ -description: Spark Streaming ZeroMQ -group: nav-right ---- -<!-- -{% comment %} -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to you under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -{% endcomment %} ---> - -{% include JB/setup %} http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/templates/spark-sql-streaming-mqtt.template ---------------------------------------------------------------------- diff --git a/site/docs/spark/templates/spark-sql-streaming-mqtt.template b/site/docs/spark/templates/spark-sql-streaming-mqtt.template new file mode 100644 index 0000000..fd08baa --- /dev/null +++ b/site/docs/spark/templates/spark-sql-streaming-mqtt.template @@ -0,0 +1,27 @@ +--- +layout: page +title: Spark Structured Streaming MQTT +description: Spark Structured Streaming MQTT +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/templates/spark-streaming-akka.template ---------------------------------------------------------------------- diff --git a/site/docs/spark/templates/spark-streaming-akka.template b/site/docs/spark/templates/spark-streaming-akka.template new file mode 100644 index 0000000..78b7d14 --- /dev/null +++ b/site/docs/spark/templates/spark-streaming-akka.template @@ -0,0 +1,26 @@ +--- +layout: page +title: Spark Streaming Akka +description: Spark Streaming Akka +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/templates/spark-streaming-mqtt.template ---------------------------------------------------------------------- diff --git a/site/docs/spark/templates/spark-streaming-mqtt.template b/site/docs/spark/templates/spark-streaming-mqtt.template new file mode 100644 index 0000000..fd08baa --- /dev/null +++ b/site/docs/spark/templates/spark-streaming-mqtt.template @@ -0,0 +1,27 @@ +--- +layout: page +title: Spark Structured Streaming MQTT +description: Spark Structured Streaming MQTT +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/templates/spark-streaming-twitter.template ---------------------------------------------------------------------- diff --git a/site/docs/spark/templates/spark-streaming-twitter.template b/site/docs/spark/templates/spark-streaming-twitter.template new file mode 100644 index 0000000..9a97ced --- /dev/null +++ b/site/docs/spark/templates/spark-streaming-twitter.template @@ -0,0 +1,26 @@ +--- +layout: page +title: Spark Streaming Twitter +description: Spark Streaming Twitter +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/docs/spark/templates/spark-streaming-zeromq.template ---------------------------------------------------------------------- diff --git a/site/docs/spark/templates/spark-streaming-zeromq.template b/site/docs/spark/templates/spark-streaming-zeromq.template new file mode 100644 index 0000000..6a6a02a --- /dev/null +++ b/site/docs/spark/templates/spark-streaming-zeromq.template @@ -0,0 +1,26 @@ +--- +layout: page +title: Spark Streaming ZeroMQ +description: Spark Streaming ZeroMQ +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/download.md ---------------------------------------------------------------------- diff --git a/site/download.md b/site/download.md index 88d6814..5558109 100644 --- a/site/download.md +++ b/site/download.md @@ -24,13 +24,13 @@ limitations under the License. --> {% include JB/setup %} -## {{ site.data.project.name }} Downloads +# {{ site.data.project.name }} Downloads Please find below the latest releases of {{ site.data.project.name }}. Note that the binary artifacts are also published through Maven. -### Latest Release +## Latest Release -Our latest release is {{ site.data.project.name }} {{site.data.project.latest_release}}, released on ({{site.data.project.latest_release_date}}). +Our latest release is {{ site.data.project.name }} {{site.data.project.latest_release}}, released on {{site.data.project.latest_release_date}}. {% if site.data.project.latest_release %} <table class="table table-hover sortable"> @@ -72,17 +72,17 @@ Our latest release is {{ site.data.project.name }} {{site.data.project.latest_re Instructions for checking hashes and signatures is indicated on the [Verifying Apache Software Foundation Releases](http://www.apache.org/info/verification.html) page. -You can also retrieve the source files from our svn repository by typing : +You can also retrieve the source files from our git repository by typing: <pre> -git clone https://github.com/apache/bahir +git clone https://github.com/apache/bahir.git cd bahir git checkout -b tags/v{{site.data.project.latest_release}} v{{site.data.project.latest_release}} </pre> -### Verifying a Release +## Verifying a Release Choose a source distribution in either *tar* or *zip* format, and [verify](http://www.apache.org/dyn/closer.cgi#verify) @@ -100,6 +100,6 @@ succeed. For security, hash and signature files are always hosted at [Apache](https://www.apache.org/dist). -### Previous Releases +## Previous Releases All previous releases of Apache Bahir can be found in the [archives](http://archive.apache.org/dist/bahir/). http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/releases/2.0.0/release-notes.md ---------------------------------------------------------------------- diff --git a/site/releases/2.0.0/release-notes.md b/site/releases/2.0.0/release-notes.md new file mode 100644 index 0000000..f21b520 --- /dev/null +++ b/site/releases/2.0.0/release-notes.md @@ -0,0 +1,69 @@ +--- +layout: page +title: Community +description: Project Community Page +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + + +# Apache Bahir 2.0.0 for Apache Spark + +## Bug + + [BAHIR-13] - Update spark tags dependency + [BAHIR-14] - Cleanup maven pom from Spark dependencies + [BAHIR-15] - Enable RAT on Bahir builds + [BAHIR-16] - Build issues due to log4j properties not found + [BAHIR-18] - Include examples in Maven test build + [BAHIR-23] - Build should fail on Checkstyle violation + [BAHIR-24] - Fix MQTT Python code + [BAHIR-38] - Spark-submit does not use latest locally installed Bahir packages + [BAHIR-42] - Refactor sql-streaming-mqtt examples to follow other projects pattern + [BAHIR-43] - Add missing apache license header to sql-mqtt file + [BAHIR-44] - Add new sql-streaming-mqtt to distribution + +## Improvement + + [BAHIR-36] - Update readme.md with build instructions + +## New Feature + + [BAHIR-2] - Create initial build for Bahir components + [BAHIR-39] - MQTT as a streaming source for SQL Streaming + +## Task + + [BAHIR-17] - Prepare release based on Apache Spark 2.0.0-preview + [BAHIR-22] - Add script to run examples + [BAHIR-35] - Include Python code in the binary jars for use with "--packages ..." + [BAHIR-37] - Prepare release based on Apache Spark 2.0.0 + +## Sub-task + + [BAHIR-19] - Create Bahir source distribution + [BAHIR-20] - Create release script + [BAHIR-21] - Create script to change build between scala 2.10 and 2.11 + [BAHIR-28] - Add documentation for streaming-akka connector + [BAHIR-29] - Add documentation for streaming-mqtt connector + [BAHIR-30] - Add documentation for streaming-twitter connector + [BAHIR-31] - Add documentation for streaming-zeromq connector http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/site/releases/2.0.2/release-notes.md ---------------------------------------------------------------------- diff --git a/site/releases/2.0.2/release-notes.md b/site/releases/2.0.2/release-notes.md new file mode 100644 index 0000000..fc3a4bc --- /dev/null +++ b/site/releases/2.0.2/release-notes.md @@ -0,0 +1,43 @@ +--- +layout: page +title: Community +description: Project Community Page +group: nav-right +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +{% include JB/setup %} + + +# Apache Bahir 2.0.2 for Apache Spark + +## Task + + [BAHIR-82] - Prepare release based on Apache Spark 2.0.2 + +## Sub-Task + + [BAHIR-64] - Add test that Akka streaming connector can receive data + [BAHIR-69] - Clean build between different scala versions + +## Test + + [BAHIR-83] - Flaky test in BasicMQTTSourceSuite + [BAHIR-84] - Build log flooded with test log messages http://git-wip-us.apache.org/repos/asf/bahir-website/blob/bb4cd4b2/update-doc.sh ---------------------------------------------------------------------- diff --git a/update-doc.sh b/update-doc.sh index 5232ea0..81e4a84 100755 --- a/update-doc.sh +++ b/update-doc.sh @@ -16,9 +16,34 @@ # limitations under the License. # +############################################################################## +# This script generates the documentation for the individual Bahir modules # +# from the README.md files found in the modules in the Bahir source repo # +# # +# bahir (source repo) bahir-website # +# . ââsite # +# . ââdocs # +# . ââspark # +# ââsql-streaming-mqtt  ââcurrent # +# â ââREADME.md âââââ> ââspark-sql-streaming-mqtt.md # +# ââstreaming-akka â # +# â ââREADME.md âââââ> ââspark-streaming-akka.md # +# ââstreaming-mqtt â # +# â ââREADME.md âââââ> ââspark-streaming-mqtt.md # +# ââstreaming-twitter â # +# â ââREADME.md âââââ> ââspark-streaming-twitter.md # +# ââstreaming-zeromq â # +#  ââREADME.md âââââ> ââspark-streaming-zeromq.md # +# # +# Page header with license text comes from the respective template files # +# under site/docs/spark/templates # +############################################################################## + + set -e BASE_DIR=$(pwd) +WEBSITE_TEMPLATES_DIR=$BASE_DIR/site/docs/spark/templates WEBSITE_DOC_DIR=$BASE_DIR/site/docs/spark/current BAHIR_SOURCE_DIR=$BASE_DIR/target/bahir @@ -27,7 +52,7 @@ function checkout_code { rm -rf target mkdir target cd target - git clone https://git-wip-us.apache.org/repos/asf/bahir.git + git clone https://git-wip-us.apache.org/repos/asf/bahir.git --quiet cd bahir git checkout $GIT_REF git_hash=`git rev-parse --short HEAD` @@ -42,19 +67,38 @@ checkout_code rm -rf $WEBSITE_DOC_DIR/spark*.md -cp $WEBSITE_DOC_DIR/spark-sql-streaming-mqtt.template $WEBSITE_DOC_DIR/spark-sql-streaming-mqtt.md -cat $BAHIR_SOURCE_DIR/sql-streaming-mqtt/README.md >> $WEBSITE_DOC_DIR/spark-sql-streaming-mqtt.md +cp $WEBSITE_TEMPLATES_DIR/spark-sql-streaming-mqtt.template $WEBSITE_DOC_DIR/spark-sql-streaming-mqtt.md +cat $BAHIR_SOURCE_DIR/sql-streaming-mqtt/README.md >> $WEBSITE_DOC_DIR/spark-sql-streaming-mqtt.md -cp $WEBSITE_DOC_DIR/spark-streaming-akka.template $WEBSITE_DOC_DIR/spark-streaming-akka.md -cat $BAHIR_SOURCE_DIR/streaming-akka/README.md >> $WEBSITE_DOC_DIR/spark-streaming-akka.md +cp $WEBSITE_TEMPLATES_DIR/spark-streaming-akka.template $WEBSITE_DOC_DIR/spark-streaming-akka.md +cat $BAHIR_SOURCE_DIR/streaming-akka/README.md >> $WEBSITE_DOC_DIR/spark-streaming-akka.md -cp $WEBSITE_DOC_DIR/spark-streaming-mqtt.template $WEBSITE_DOC_DIR/spark-streaming-mqtt.md -cat $BAHIR_SOURCE_DIR/streaming-mqtt/README.md >> $WEBSITE_DOC_DIR/spark-streaming-mqtt.md +cp $WEBSITE_TEMPLATES_DIR/spark-streaming-mqtt.template $WEBSITE_DOC_DIR/spark-streaming-mqtt.md +cat $BAHIR_SOURCE_DIR/streaming-mqtt/README.md >> $WEBSITE_DOC_DIR/spark-streaming-mqtt.md -cp $WEBSITE_DOC_DIR/spark-streaming-twitter.template $WEBSITE_DOC_DIR/spark-streaming-twitter.md -cat $BAHIR_SOURCE_DIR/streaming-twitter/README.md >> $WEBSITE_DOC_DIR/spark-streaming-twitter.md +cp $WEBSITE_TEMPLATES_DIR/spark-streaming-twitter.template $WEBSITE_DOC_DIR/spark-streaming-twitter.md +cat $BAHIR_SOURCE_DIR/streaming-twitter/README.md >> $WEBSITE_DOC_DIR/spark-streaming-twitter.md -cp $WEBSITE_DOC_DIR/spark-streaming-zeromq.template $WEBSITE_DOC_DIR/spark-streaming-zeromq.md -cat $BAHIR_SOURCE_DIR/streaming-zeromq/README.md >> $WEBSITE_DOC_DIR/spark-streaming-zeromq.md +cp $WEBSITE_TEMPLATES_DIR/spark-streaming-zeromq.template $WEBSITE_DOC_DIR/spark-streaming-zeromq.md +cat $BAHIR_SOURCE_DIR/streaming-zeromq/README.md >> $WEBSITE_DOC_DIR/spark-streaming-zeromq.md set +e + +echo "Done." + +if grep -q -r "[0-9]-SNAPSHOT" $WEBSITE_DOC_DIR/spark*.md ; then + echo + echo "TODO: Replace '...-SNAPSHOT' version strings:" + echo + grep -r -n "[0-9]-SNAPSHOT" $WEBSITE_DOC_DIR/spark*.md | sed -e 's|'$(pwd)/'||g' | grep --color "[0-9.]*-SNAPSHOT" + echo + echo "i.e. to replace '2.1.0-SNAPSHOT' with '2.0.2' run the following command:" + echo + echo " perl -i -pe 's/2.1.0-SNAPSHOT/2.0.2/g' $WEBSITE_DOC_DIR/spark*.md" + echo +else + echo + echo "Generated files:" + echo + ls $WEBSITE_DOC_DIR/spark*.md | xargs -n1 | sed -e 's|'$(pwd -P)/'||g' +fi
