BIGTOP-1624: Add puppet recipes for deploying kafka (closes #126)
Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/36bf0a70 Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/36bf0a70 Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/36bf0a70 Branch: refs/heads/BIGTOP-2253 Commit: 36bf0a70fd03c38c5df49aba11231d7f1baead44 Parents: 1859c90 Author: Konstantinos Tsakalozos <[email protected]> Authored: Thu Jun 2 13:36:23 2016 +0300 Committer: Roman Shaposhnik <[email protected]> Committed: Mon Jul 18 15:53:00 2016 -0700 ---------------------------------------------------------------------- .../puppet/hieradata/bigtop/cluster.yaml | 4 + bigtop-deploy/puppet/manifests/cluster.pp | 6 +- .../puppet/modules/kafka/manifests/init.pp | 56 +++++++++ .../modules/kafka/templates/server.properties | 117 +++++++++++++++++++ 4 files changed, 182 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/36bf0a70/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml index 5c2c5f1..62256d5 100644 --- a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml +++ b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml @@ -187,6 +187,10 @@ hadoop::common::tez_jars: "/usr/lib/tez" # to enable tez in hive, uncomment the lines below # hadoop_hive::client::hive_execution_engine: "tez" +#kafka +kafka::server::port: "9092" +kafka::server::zookeeper_connection_string: "%{hiera('bigtop::hadoop_head_node')}:2181" + zeppelin::server::spark_master_url: "yarn-client" zeppelin::server::hiveserver2_url: "jdbc:hive2://%{hiera('hadoop-hive::common::hiveserver2_host')}:%{hiera('hadoop-hive::common::hiveserver2_port')}" zeppelin::server::hiveserver2_user: "%{hiera('bigtop::hiveserver2_user')}" http://git-wip-us.apache.org/repos/asf/bigtop/blob/36bf0a70/bigtop-deploy/puppet/manifests/cluster.pp ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/manifests/cluster.pp b/bigtop-deploy/puppet/manifests/cluster.pp index 9ff424c..12e0424 100644 --- a/bigtop-deploy/puppet/manifests/cluster.pp +++ b/bigtop-deploy/puppet/manifests/cluster.pp @@ -117,6 +117,9 @@ $roles_map = { worker => ["qfs-chunkserver"], client => ["qfs-client"], }, + kafka => { + master => ["kafka-server"], + } } class hadoop_cluster_node ( @@ -185,7 +188,8 @@ class node_with_roles ($roles = hiera("bigtop::roles")) inherits hadoop_cluster_ "tez", "ycsb", "kerberos", - "zeppelin" + "zeppelin", + "kafka" ] deploy_module { $modules: http://git-wip-us.apache.org/repos/asf/bigtop/blob/36bf0a70/bigtop-deploy/puppet/modules/kafka/manifests/init.pp ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/kafka/manifests/init.pp b/bigtop-deploy/puppet/modules/kafka/manifests/init.pp new file mode 100644 index 0000000..f44b737 --- /dev/null +++ b/bigtop-deploy/puppet/modules/kafka/manifests/init.pp @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class kafka { + + class deploy ($roles) { + if ('kafka-server' in $roles) { + include server + } + } + + class server( + $broker_id = "0", + $port = "9092", + $zookeeper_connection_string = "localhost:2181" + ) { + + package { 'kafka': + ensure => latest, + } + + package { 'kafka-server': + ensure => latest, + } + + file { '/etc/kafka/conf/server.properties': + content => template('kafka/server.properties'), + require => [ Package['kafka'], Package['kafka-server'] ], + owner => 'kafka', + group => 'kafka', + } + + service { 'kafka-server': + ensure => running, + subscribe => [ + Package['kafka'], + Package['kafka-server'], + File['/etc/kafka/conf/server.properties'], + ], + hasrestart => true, + hasstatus => true, + } + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/36bf0a70/bigtop-deploy/puppet/modules/kafka/templates/server.properties ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/kafka/templates/server.properties b/bigtop-deploy/puppet/modules/kafka/templates/server.properties new file mode 100644 index 0000000..a30e970 --- /dev/null +++ b/bigtop-deploy/puppet/modules/kafka/templates/server.properties @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id=<%= @broker_id %> + +############################# Socket Server Settings ############################# + +# The port the socket server listens on +port=<%= @port %> + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name=<hostname routable by clients> + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port=<port accessible by clients> + +# The number of threads handling network requests +num.network.threads=2 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=1048576 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=1048576 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs=/tmp/kafka-logs + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions=2 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=536870912 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=60000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect=<%= @zookeeper_connection_string %> + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=1000000
