SAMZA-235; add script for users that don't have outside internet access in their YARN grid. produce a synthetic wikipedia-raw feed.
Project: http://git-wip-us.apache.org/repos/asf/incubator-samza-hello-samza/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-samza-hello-samza/commit/b6b8bc87 Tree: http://git-wip-us.apache.org/repos/asf/incubator-samza-hello-samza/tree/b6b8bc87 Diff: http://git-wip-us.apache.org/repos/asf/incubator-samza-hello-samza/diff/b6b8bc87 Branch: refs/heads/master Commit: b6b8bc87304e66caccb5f8e3183106c618848c4f Parents: d45f3ee Author: Chris Riccomini <[email protected]> Authored: Mon May 5 09:06:48 2014 -0700 Committer: Chris Riccomini <[email protected]> Committed: Mon May 5 09:06:48 2014 -0700 ---------------------------------------------------------------------- bin/produce-wikipedia-raw-data.sh | 49 ++ wikipedia-raw.json | 1000 ++++++++++++++++++++++++++++++++ 2 files changed, 1049 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-samza-hello-samza/blob/b6b8bc87/bin/produce-wikipedia-raw-data.sh ---------------------------------------------------------------------- diff --git a/bin/produce-wikipedia-raw-data.sh b/bin/produce-wikipedia-raw-data.sh new file mode 100755 index 0000000..826bf4e --- /dev/null +++ b/bin/produce-wikipedia-raw-data.sh @@ -0,0 +1,49 @@ +#!/bin/bash -e +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This script will generate wikipedia-raw data to Kafka + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +BASE_DIR=$(dirname $DIR) +ZOOKEEPER=localhost:2181 +KAFKA_BROKER=localhost:9092 + +# overwritten options +while getopts "z:b:" option +do + case ${option} in + z) ZOOKEEPER="${OPTARG}";; + b) KAFKA_BROKER="${OPTARG}";; + esac +done +echo "Using ${ZOOKEEPER} as the zookeeper. You can overwrite it with '-z yourlocation'" +echo "Using ${KAFKA_BROKER} as the kafka broker. You can overwrite it with '-b yourlocation'" + +# check if the topic exists. if not, create the topic +EXIST=$($BASE_DIR/deploy/kafka/bin/kafka-topics.sh --describe --topic wikipedia-raw --zookeeper $ZOOKEEPER) +if [ -z "$EXIST" ] + then + $BASE_DIR/deploy/kafka/bin/kafka-topics.sh --create --zookeeper $ZOOKEEPER --topic wikipedia-raw --partition 1 --replication-factor 1 +fi + +# produce raw data +while sleep 1 +do + $BASE_DIR/deploy/kafka/bin/kafka-console-producer.sh < $BASE_DIR/wikipedia-raw.json --topic wikipedia-raw --broker $KAFKA_BROKER +done +
