SAMZA-235; add script for users that don't have outside internet access in 
their YARN grid. produce a synthetic wikipedia-raw feed.


Project: http://git-wip-us.apache.org/repos/asf/incubator-samza-hello-samza/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-samza-hello-samza/commit/b6b8bc87
Tree: 
http://git-wip-us.apache.org/repos/asf/incubator-samza-hello-samza/tree/b6b8bc87
Diff: 
http://git-wip-us.apache.org/repos/asf/incubator-samza-hello-samza/diff/b6b8bc87

Branch: refs/heads/master
Commit: b6b8bc87304e66caccb5f8e3183106c618848c4f
Parents: d45f3ee
Author: Chris Riccomini <[email protected]>
Authored: Mon May 5 09:06:48 2014 -0700
Committer: Chris Riccomini <[email protected]>
Committed: Mon May 5 09:06:48 2014 -0700

----------------------------------------------------------------------
 bin/produce-wikipedia-raw-data.sh |   49 ++
 wikipedia-raw.json                | 1000 ++++++++++++++++++++++++++++++++
 2 files changed, 1049 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samza-hello-samza/blob/b6b8bc87/bin/produce-wikipedia-raw-data.sh
----------------------------------------------------------------------
diff --git a/bin/produce-wikipedia-raw-data.sh 
b/bin/produce-wikipedia-raw-data.sh
new file mode 100755
index 0000000..826bf4e
--- /dev/null
+++ b/bin/produce-wikipedia-raw-data.sh
@@ -0,0 +1,49 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script will generate wikipedia-raw data to Kafka
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+BASE_DIR=$(dirname $DIR)
+ZOOKEEPER=localhost:2181
+KAFKA_BROKER=localhost:9092
+
+# overwritten options
+while getopts "z:b:" option
+do
+  case ${option} in 
+    z) ZOOKEEPER="${OPTARG}";;
+    b) KAFKA_BROKER="${OPTARG}";;
+  esac
+done
+echo "Using ${ZOOKEEPER} as the zookeeper. You can overwrite it with '-z 
yourlocation'"
+echo "Using ${KAFKA_BROKER} as the kafka broker. You can overwrite it with '-b 
yourlocation'"
+
+# check if the topic exists. if not, create the topic
+EXIST=$($BASE_DIR/deploy/kafka/bin/kafka-topics.sh --describe --topic 
wikipedia-raw --zookeeper $ZOOKEEPER)
+if [ -z "$EXIST" ]
+  then
+    $BASE_DIR/deploy/kafka/bin/kafka-topics.sh --create --zookeeper $ZOOKEEPER 
--topic wikipedia-raw --partition 1 --replication-factor 1
+fi
+
+# produce raw data
+while sleep 1
+do 
+  $BASE_DIR/deploy/kafka/bin/kafka-console-producer.sh < 
$BASE_DIR/wikipedia-raw.json --topic wikipedia-raw --broker $KAFKA_BROKER
+done
+

Reply via email to