Repository: bigtop Updated Branches: refs/heads/master bf67f6e5b -> ed6377f69
BIGTOP-2105. Puppet recipes improvements after Spark is bumped to 1.5.1 Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/ed6377f6 Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/ed6377f6 Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/ed6377f6 Branch: refs/heads/master Commit: ed6377f693e7d2655172fd49e6e68ddc9d5709fd Parents: 05d9fad Author: Jonathan Kelly <[email protected]> Authored: Fri Oct 2 15:20:33 2015 -0700 Committer: Konstantin Boudnik <[email protected]> Committed: Mon Nov 2 13:02:28 2015 -0800 ---------------------------------------------------------------------- .../puppet/modules/spark/manifests/init.pp | 140 +++++++++++++++---- .../modules/spark/templates/spark-defaults.conf | 22 +++ .../puppet/modules/spark/templates/spark-env.sh | 65 +-------- 3 files changed, 145 insertions(+), 82 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/ed6377f6/bigtop-deploy/puppet/modules/spark/manifests/init.pp ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/spark/manifests/init.pp b/bigtop-deploy/puppet/modules/spark/manifests/init.pp index 9b33bb9..b93be1d 100644 --- a/bigtop-deploy/puppet/modules/spark/manifests/init.pp +++ b/bigtop-deploy/puppet/modules/spark/manifests/init.pp @@ -16,23 +16,42 @@ class spark { class deploy ($roles) { - if ("spark-master" in $roles) { - include spark::master + if ('spark-client' in $roles) { + include client } - if ("spark-worker" in $roles) { - include spark::worker + if ('spark-on-yarn' in $roles) { + include yarn + } + + if ('spark-yarn-slave' in $roles) { + include yarn_slave + } + + if ('spark-master' in $roles) { + include master + } + + if ('spark-worker' in $roles) { + include worker + } + + if ('spark-history-server' in $roles) { + include history_server } } - class common ($master_host = $fqdn, $master_port = "7077", $master_ui_port = "18080") { - package { "spark-core": - ensure => latest, + class client { + include common + + package { 'spark-python': + ensure => latest, + require => Package['spark-core'], } - file { "/etc/spark/conf/spark-env.sh": - content => template("spark/spark-env.sh"), - require => [Package["spark-core"]], + package { 'spark-extras': + ensure => latest, + require => Package['spark-core'], } } @@ -43,14 +62,15 @@ class spark { ensure => latest, } - if ( $fqdn == $common::master_host ) { - service { "spark-master": - ensure => running, - require => [ Package["spark-master"], File["/etc/spark/conf/spark-env.sh"], ], - subscribe => [Package["spark-master"], File["/etc/spark/conf/spark-env.sh"] ], - hasrestart => true, - hasstatus => true, - } + service { 'spark-master': + ensure => running, + subscribe => [ + Package['spark-master'], + File['/etc/spark/conf/spark-env.sh'], + File['/etc/spark/conf/spark-defaults.conf'], + ], + hasrestart => true, + hasstatus => true, } } @@ -61,15 +81,87 @@ class spark { ensure => latest, } - if ( $fqdn == $common::master_host ) { - Service["spark-master"] ~> Service["spark-worker"] + service { 'spark-worker': + ensure => running, + subscribe => [ + Package['spark-worker'], + File['/etc/spark/conf/spark-env.sh'], + File['/etc/spark/conf/spark-defaults.conf'], + ], + hasrestart => true, + hasstatus => true, } - service { "spark-worker": - ensure => running, - require => [ Package["spark-worker"], File["/etc/spark/conf/spark-env.sh"], ], - subscribe => [Package["spark-worker"], File["/etc/spark/conf/spark-env.sh"] ], + } + + class history_server { + include common + + package { 'spark-history-server': + ensure => latest, + } + + service { 'spark-history-server': + ensure => running, + subscribe => [ + Package['spark-history-server'], + File['/etc/spark/conf/spark-env.sh'], + File['/etc/spark/conf/spark-defaults.conf'], + ], hasrestart => true, hasstatus => true, } } + + class yarn { + include common + include datanucleus + } + + class yarn_slave { + include yarn_shuffle + include datanucleus + } + + class yarn_shuffle { + package { 'spark-yarn-shuffle': + ensure => latest, + } + } + + class datanucleus { + package { 'spark-datanucleus': + ensure => latest, + } + } + + class common( + $master_url = 'yarn', + $master_host = $fqdn, + $master_port = 7077, + $worker_port = 7078, + $master_ui_port = 8080, + $worker_ui_port = 8081, + $history_ui_port = 18080, + $use_yarn_shuffle_service = false, + ) { + + package { 'spark-core': + ensure => latest, + } + + file { '/etc/spark/conf/spark-env.sh': + content => template('spark/spark-env.sh'), + require => Package['spark-core'], + } + + file { '/etc/spark/conf/spark-defaults.conf': + content => template('spark/spark-defaults.conf'), + require => Package['spark-core'], + } + + file { '/etc/spark/conf/log4j.properties': + source => '/etc/spark/conf/log4j.properties.template', + require => Package['spark-core'], + } + } } http://git-wip-us.apache.org/repos/asf/bigtop/blob/ed6377f6/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf b/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf new file mode 100644 index 0000000..bdb9e56 --- /dev/null +++ b/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +spark.master <%= @master_url %> +spark.eventLog.enabled true +spark.eventLog.dir hdfs:///var/log/spark/apps +spark.history.fs.logDirectory hdfs:///var/log/spark/apps +spark.yarn.historyServer.address <%= @master_host %>:<%= @history_ui_port %> +spark.history.ui.port <%= @history_ui_port %> +spark.shuffle.service.enabled <%= @use_yarn_shuffle_service %> http://git-wip-us.apache.org/repos/asf/bigtop/blob/ed6377f6/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh ---------------------------------------------------------------------- diff --git a/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh b/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh old mode 100644 new mode 100755 index f7845a1..eb351c7 --- a/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh +++ b/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh @@ -14,68 +14,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -# This file is sourced when running various Spark programs. -# Copy it as spark-env.sh and edit that to configure Spark for your site. - -# Options read when launching programs locally with -# ./bin/run-example or ./bin/spark-submit -# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files -# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node -# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program -# - SPARK_CLASSPATH, default classpath entries to append - -# Options read by executors and drivers running inside the cluster -# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node -# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program -# - SPARK_CLASSPATH, default classpath entries to append -# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data -# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos - -# Options read in YARN client mode -# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files -# - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2) -# - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1). -# - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G) -# - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb) -# - SPARK_YARN_APP_NAME, The name of your application (Default: Spark) -# - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: âdefaultâ) -# - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job. -# - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job. - -# Options for the daemons used in the standalone deploy mode -# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname -# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master -# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") -# - SPARK_WORKER_CORES, to set the number of cores to use on this machine -# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) -# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker -# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node -# - SPARK_WORKER_DIR, to set the working directory of worker processes -# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") -# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") -# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") -# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers - -# Generic options for the daemons used in the standalone deploy mode -# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) -# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) -# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) -# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) -# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) - +export SPARK_HOME=${SPARK_HOME:-/usr/lib/spark} +export SPARK_LOG_DIR=${SPARK_LOG_DIR:-/var/log/spark} export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf} +export HIVE_CONF_DIR=${HIVE_CONF_DIR:-/etc/hive/conf} export STANDALONE_SPARK_MASTER_HOST=<%= @master_host %> +export SPARK_MASTER_PORT=<%= @master_port %> export SPARK_MASTER_IP=$STANDALONE_SPARK_MASTER_HOST export SPARK_MASTER_WEBUI_PORT=<%= @master_ui_port %> -export SPARK_MASTER_PORT=7077 -export SPARK_WORKER_PORT=7078 -export SPARK_WORKER_WEBUI_PORT=18081 -export SPARK_WORKER_DIR=/var/run/spark/work -export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082" - -export SPARK_LOG_DIR=/var/log/spark - -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HADOOP_HOME}/lib/native +export SPARK_WORKER_DIR=${SPARK_WORKER_DIR:-/var/run/spark/work} +export SPARK_WORKER_PORT=<%= @worker_port %> +export SPARK_WORKER_WEBUI_PORT=<%= @worker_ui_port %>
