update examples and docs for HDFS example
Project: http://git-wip-us.apache.org/repos/asf/storm/repo Commit: http://git-wip-us.apache.org/repos/asf/storm/commit/0c1e0aa8 Tree: http://git-wip-us.apache.org/repos/asf/storm/tree/0c1e0aa8 Diff: http://git-wip-us.apache.org/repos/asf/storm/diff/0c1e0aa8 Branch: refs/heads/master Commit: 0c1e0aa81f39e473bcf3482448813d78065e1212 Parents: 3411bc7 Author: P. Taylor Goetz <[email protected]> Authored: Mon Apr 6 23:48:38 2015 -0400 Committer: P. Taylor Goetz <[email protected]> Committed: Mon Apr 6 23:48:38 2015 -0400 ---------------------------------------------------------------------- .../src/test/resources/configs/hdfs_test.yaml | 97 ----------------- flux-examples/README.md | 30 +++++- flux-examples/pom.xml | 6 ++ .../src/main/resources/hdfs_bolt.properties | 9 ++ flux-examples/src/main/resources/multilang.yaml | 89 ++++++++++++++++ flux-examples/src/main/resources/shell.yaml | 89 ---------------- .../src/main/resources/simple_hdfs.yaml | 105 +++++++++++++++++++ 7 files changed, 238 insertions(+), 187 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-core/src/test/resources/configs/hdfs_test.yaml ---------------------------------------------------------------------- diff --git a/flux-core/src/test/resources/configs/hdfs_test.yaml b/flux-core/src/test/resources/configs/hdfs_test.yaml deleted file mode 100644 index c1d28d2..0000000 --- a/flux-core/src/test/resources/configs/hdfs_test.yaml +++ /dev/null @@ -1,97 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Test ability to wire together shell spouts/bolts ---- - -# topology definition -# name to be used when submitting -name: "hdfs-topology" - -# Components -# Components are analagous to Spring beans. They are meant to be used as constructor, -# property(setter), and builder arguments. -# -# for the time being, components must be declared in the order they are referenced -components: - - id: "syncPolicy" - className: "org.apache.storm.hdfs.bolt.sync.CountSyncPolicy" - constructorArgs: - - 1000 - - id: "rotationPolicy" - className: "org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy" - constructorArgs: - - 5.0 - - MB - - - id: "fileNameFormat" - className: "org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat" - configMethods: - - name: "withPath" - args: ["/tmp/foo/"] - - name: "withExtension" - args: [".txt"] - - - id: "recordFormat" - className: "org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat" - configMethods: - - name: "withFieldDelimiter" - args: ["|"] - - - id: "rotationAction" - className: "org.apache.storm.hdfs.common.rotation.MoveFileAction" - configMethods: - - name: "toDestination" - args: ["/tmp/dest2"] - -# spout definitions -spouts: - - id: "spout-1" - className: "backtype.storm.testing.TestWordSpout" - parallelism: 1 - # ... - -# bolt definitions - -# HdfsBolt bolt = new HdfsBolt() -# .withConfigKey("hdfs.config") -# .withFsUrl(args[0]) -# .withFileNameFormat(fileNameFormat) -# .withRecordFormat(format) -# .withRotationPolicy(rotationPolicy) -# .withSyncPolicy(syncPolicy) -# .addRotationAction(new MoveFileAction().toDestination("/tmp/dest2/")); -bolts: - - id: "bolt-1" - className: "org.apache.storm.hdfs.bolt.HdfsBolt" - configMethods: - - name: "withConfigKey" - args: ["hdfs.config"] - - name: "withFsUrl" - args: ["hdfs://localhost:1234"] - - name: "withFileNameFormat" - args: [ref: "fileNameFormat"] - - name: "withRecordFormat" - args: [ref: "recordFormat"] - - name: "withRotationPolicy" - args: [ref: "rotationPolicy"] - - name: "withSyncPolicy" - args: [ref: "syncPolicy"] - - name: "addRotationAction" - args: [ref: "rotationAction"] - parallelism: 1 - # ... - http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/README.md ---------------------------------------------------------------------- diff --git a/flux-examples/README.md b/flux-examples/README.md index 2f107e7..9f5682e 100644 --- a/flux-examples/README.md +++ b/flux-examples/README.md @@ -23,6 +23,34 @@ The example YAML files are also packaged in the examples jar, so they can also b command line switch: ```bash -storm jar ./target/flux-examples-0.2.3-SNAPSHOT.jar org.apache.storm.flux.Flux --local --resource /sime_wordcount.yaml +storm jar ./target/flux-examples-0.2.3-SNAPSHOT.jar org.apache.storm.flux.Flux --local --resource /simple_wordcount.yaml +``` + +## Available Examples + +### simple_wordcount.yaml + +This is a very basic wordcount example using Java spouts and bolts. It simply logs the running count of each word +received. + +### multilang.yaml + +Another wordcount example that uses a spout written in JavaScript (node.js), a bolt written in Python, and two bolts +written in java. + +### kafka_spout.yaml +This example illustrates how to configure Storm's `storm-kafka` spout using Flux YAML DSL `components`, `references`, +and `constructor arguments` constructs. + +### simple_hdfs.yaml + +This example demonstrates using Flux to setup a storm-hdfs bolt to write to an HDFS cluster. It also demonstrates Flux's +variable substitution/filtering feature. + +To run the `simple_hdfs.yaml` example, copy the `hdfs_bolt.properties` file to a convenient location and change, at +least, the property `hdfs.url` to point to a HDFS cluster. Then you can run the example something like: + +```bash +storm jar ./target/flux-examples-0.2.3-SNAPSHOT.jar org.apache.storm.flux.Flux --local ./src/main/resources/simple_hdfs.yaml --filter my_hdfs_bolt.properties ``` http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/pom.xml ---------------------------------------------------------------------- diff --git a/flux-examples/pom.xml b/flux-examples/pom.xml index 63bc312..09db717 100644 --- a/flux-examples/pom.xml +++ b/flux-examples/pom.xml @@ -44,6 +44,12 @@ <artifactId>flux-wrappers</artifactId> <version>${project.version}</version> </dependency> + + <dependency> + <groupId>org.apache.storm</groupId> + <artifactId>storm-hdfs</artifactId> + <version>${storm.version}</version> + </dependency> </dependencies> <build> http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/hdfs_bolt.properties ---------------------------------------------------------------------- diff --git a/flux-examples/src/main/resources/hdfs_bolt.properties b/flux-examples/src/main/resources/hdfs_bolt.properties new file mode 100644 index 0000000..34a7a23 --- /dev/null +++ b/flux-examples/src/main/resources/hdfs_bolt.properties @@ -0,0 +1,9 @@ +# The HDFS url +hdfs.url="hdfs://hadoop:54310" + +# The HDFS directory where the bolt will write incoming data +hdfs.write.dir="/incoming" + +# The HDFS directory where files will be moved once the bolt has +# finished writing to it. +hdfs.dest.dir="/complete" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/multilang.yaml ---------------------------------------------------------------------- diff --git a/flux-examples/src/main/resources/multilang.yaml b/flux-examples/src/main/resources/multilang.yaml new file mode 100644 index 0000000..4f80667 --- /dev/null +++ b/flux-examples/src/main/resources/multilang.yaml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test ability to wire together shell spouts/bolts +--- + +# topology definition +# name to be used when submitting +name: "shell-topology" + +# topology configuration +# this will be passed to the submitter as a map of config options +# +config: + topology.workers: 1 + # ... + +# spout definitions +spouts: + - id: "sentence-spout" + className: "org.apache.storm.flux.wrappers.spouts.FluxShellSpout" + # shell spout constructor takes 2 arguments: String[], String[] + constructorArgs: + # command line + - ["node", "randomsentence.js"] + # output fields + - ["word"] + parallelism: 1 + # ... + +# bolt definitions +bolts: + - id: "splitsentence" + className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" + constructorArgs: + # command line + - ["python", "splitsentence.py"] + # output fields + - ["word"] + parallelism: 1 + # ... + + - id: "log" + className: "org.apache.storm.flux.wrappers.bolts.LogInfoBolt" + parallelism: 1 + # ... + + - id: "count" + className: "backtype.storm.testing.TestWordCounter" + parallelism: 1 + # ... + +#stream definitions +# stream definitions define connections between spouts and bolts. +# note that such connections can be cyclical +# custom stream groupings are also supported + +streams: + - name: "spout --> split" # name isn't used (placeholder for logging, UI, etc.) + from: "sentence-spout" + to: "splitsentence" + grouping: + type: SHUFFLE + + - name: "split --> count" + from: "splitsentence" + to: "count" + grouping: + type: FIELDS + args: ["word"] + + - name: "count --> log" + from: "count" + to: "log" + grouping: + type: SHUFFLE http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/shell.yaml ---------------------------------------------------------------------- diff --git a/flux-examples/src/main/resources/shell.yaml b/flux-examples/src/main/resources/shell.yaml deleted file mode 100644 index 4f80667..0000000 --- a/flux-examples/src/main/resources/shell.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Test ability to wire together shell spouts/bolts ---- - -# topology definition -# name to be used when submitting -name: "shell-topology" - -# topology configuration -# this will be passed to the submitter as a map of config options -# -config: - topology.workers: 1 - # ... - -# spout definitions -spouts: - - id: "sentence-spout" - className: "org.apache.storm.flux.wrappers.spouts.FluxShellSpout" - # shell spout constructor takes 2 arguments: String[], String[] - constructorArgs: - # command line - - ["node", "randomsentence.js"] - # output fields - - ["word"] - parallelism: 1 - # ... - -# bolt definitions -bolts: - - id: "splitsentence" - className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" - constructorArgs: - # command line - - ["python", "splitsentence.py"] - # output fields - - ["word"] - parallelism: 1 - # ... - - - id: "log" - className: "org.apache.storm.flux.wrappers.bolts.LogInfoBolt" - parallelism: 1 - # ... - - - id: "count" - className: "backtype.storm.testing.TestWordCounter" - parallelism: 1 - # ... - -#stream definitions -# stream definitions define connections between spouts and bolts. -# note that such connections can be cyclical -# custom stream groupings are also supported - -streams: - - name: "spout --> split" # name isn't used (placeholder for logging, UI, etc.) - from: "sentence-spout" - to: "splitsentence" - grouping: - type: SHUFFLE - - - name: "split --> count" - from: "splitsentence" - to: "count" - grouping: - type: FIELDS - args: ["word"] - - - name: "count --> log" - from: "count" - to: "log" - grouping: - type: SHUFFLE http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/simple_hdfs.yaml ---------------------------------------------------------------------- diff --git a/flux-examples/src/main/resources/simple_hdfs.yaml b/flux-examples/src/main/resources/simple_hdfs.yaml new file mode 100644 index 0000000..ea7721d --- /dev/null +++ b/flux-examples/src/main/resources/simple_hdfs.yaml @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test ability to wire together shell spouts/bolts +--- + +# topology definition +# name to be used when submitting +name: "hdfs-topology" + +# Components +# Components are analagous to Spring beans. They are meant to be used as constructor, +# property(setter), and builder arguments. +# +# for the time being, components must be declared in the order they are referenced +components: + - id: "syncPolicy" + className: "org.apache.storm.hdfs.bolt.sync.CountSyncPolicy" + constructorArgs: + - 1000 + - id: "rotationPolicy" + className: "org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy" + constructorArgs: + - 30 + - SECONDS + + - id: "fileNameFormat" + className: "org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat" + configMethods: + - name: "withPath" + args: [${hdfs.write.dir}] + - name: "withExtension" + args: [".txt"] + + - id: "recordFormat" + className: "org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat" + configMethods: + - name: "withFieldDelimiter" + args: ["|"] + + - id: "rotationAction" + className: "org.apache.storm.hdfs.common.rotation.MoveFileAction" + configMethods: + - name: "toDestination" + args: [${hdfs.dest.dir}] + +# spout definitions +spouts: + - id: "spout-1" + className: "backtype.storm.testing.TestWordSpout" + parallelism: 1 + # ... + +# bolt definitions + +bolts: + - id: "bolt-1" + className: "org.apache.storm.hdfs.bolt.HdfsBolt" + configMethods: + - name: "withConfigKey" + args: ["hdfs.config"] + - name: "withFsUrl" + args: [${hdfs.url}] + - name: "withFileNameFormat" + args: [ref: "fileNameFormat"] + - name: "withRecordFormat" + args: [ref: "recordFormat"] + - name: "withRotationPolicy" + args: [ref: "rotationPolicy"] + - name: "withSyncPolicy" + args: [ref: "syncPolicy"] + - name: "addRotationAction" + args: [ref: "rotationAction"] + parallelism: 1 + # ... + + - id: "bolt-2" + className: "org.apache.storm.flux.wrappers.bolts.LogInfoBolt" + parallelism: 1 + +streams: + - name: "" # name isn't used (placeholder for logging, UI, etc.) + from: "spout-1" + to: "bolt-1" + grouping: + type: SHUFFLE + + - name: "" # name isn't used (placeholder for logging, UI, etc.) + from: "spout-1" + to: "bolt-2" + grouping: + type: SHUFFLE \ No newline at end of file
