Repository: ambari Updated Branches: refs/heads/branch-2.4 60e6e44cd -> a54c63c71
AMBARI-17814 Spark Livy should wait for ATS start for BP deployments (dsen) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/a54c63c7 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/a54c63c7 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/a54c63c7 Branch: refs/heads/branch-2.4 Commit: a54c63c71a218100e34f94ef542435e7ac3d4af6 Parents: 60e6e44 Author: Dmytro Sen <[email protected]> Authored: Fri Jul 22 18:03:40 2016 +0300 Committer: Dmytro Sen <[email protected]> Committed: Fri Jul 22 18:04:55 2016 +0300 ---------------------------------------------------------------------- .../SPARK/1.2.1/package/scripts/livy_server.py | 61 ++++++++++ .../SPARK/1.2.1/package/scripts/params.py | 15 ++- .../python/stacks/2.5/SPARK/test_spark_livy.py | 120 +++++++++++++++++++ .../test/python/stacks/2.5/configs/default.json | 57 +++++++++ 4 files changed, 252 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/a54c63c7/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/livy_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/livy_server.py b/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/livy_server.py index 1e859a8..462836a 100644 --- a/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/livy_server.py +++ b/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/livy_server.py @@ -22,6 +22,15 @@ from resource_management.libraries.script.script import Script from resource_management.libraries.functions.check_process_status import check_process_status from resource_management.libraries.functions.stack_features import check_stack_feature from resource_management.libraries.functions.constants import StackFeature +from resource_management.core.exceptions import Fail +from resource_management.core.resources.system import Execute +from resource_management.libraries.providers.hdfs_resource import WebHDFSUtil +from resource_management.libraries.providers.hdfs_resource import HdfsResourceProvider +from resource_management import is_empty +from resource_management import shell +from resource_management.libraries.functions.decorator import retry +from resource_management.core.logger import Logger +from resource_management.libraries.functions.format import format from livy_service import livy_service from setup_livy import setup_livy @@ -44,6 +53,10 @@ class LivyServer(Script): import params env.set_params(params) + if params.has_ats and params.has_livyserver: + Logger.info("Verifying DFS directories where ATS stores time line data for active and completed applications.") + self.wait_for_dfs_directories_created([params.entity_groupfs_store_dir, params.entity_groupfs_active_dir]) + self.configure(env) livy_service('server', upgrade_type=upgrade_type, action='start') @@ -59,6 +72,54 @@ class LivyServer(Script): check_process_status(status_params.livy_server_pid_file) + # TODO move out and compose with similar method in resourcemanager.py + def wait_for_dfs_directories_created(self, dirs): + import params + + ignored_dfs_dirs = HdfsResourceProvider.get_ignored_resources_list(params.hdfs_resource_ignore_file) + + if params.security_enabled: + Execute(format("{kinit_path_local} -kt {livy_kerberos_keytab} {livy_principal}"), + user=params.livy_user + ) + Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"), + user=params.hdfs_user + ) + + for dir_path in dirs: + self.wait_for_dfs_directory_created(dir_path, ignored_dfs_dirs) + + + @retry(times=8, sleep_time=20, backoff_factor=1, err_class=Fail) + def wait_for_dfs_directory_created(self, dir_path, ignored_dfs_dirs): + import params + + + if not is_empty(dir_path): + dir_path = HdfsResourceProvider.parse_path(dir_path) + + if dir_path in ignored_dfs_dirs: + Logger.info("Skipping DFS directory '" + dir_path + "' as it's marked to be ignored.") + return + + Logger.info("Verifying if DFS directory '" + dir_path + "' exists.") + + dir_exists = None + + if WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): + # check with webhdfs is much faster than executing hdfs dfs -test + util = WebHDFSUtil(params.hdfs_site, params.hdfs_user, params.security_enabled) + list_status = util.run_command(dir_path, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) + dir_exists = ('FileStatus' in list_status) + else: + # have to do time expensive hdfs dfs -d check. + dfs_ret_code = shell.call(format("hdfs --config {hadoop_conf_dir} dfs -test -d " + dir_path), user=params.livy_user)[0] + dir_exists = not dfs_ret_code #dfs -test -d returns 0 in case the dir exists + + if not dir_exists: + raise Fail("DFS directory '" + dir_path + "' does not exist !") + else: + Logger.info("DFS directory '" + dir_path + "' exists.") def get_component_name(self): return "livy-server" http://git-wip-us.apache.org/repos/asf/ambari/blob/a54c63c7/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/params.py b/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/params.py index 8d4331b..a86f1d3 100644 --- a/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/params.py +++ b/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/params.py @@ -185,6 +185,10 @@ if has_spark_thriftserver and 'spark-thrift-sparkconf' in config['configurations default_fs = config['configurations']['core-site']['fs.defaultFS'] hdfs_site = config['configurations']['hdfs-site'] +hdfs_resource_ignore_file = "/var/lib/ambari-agent/data/.hdfs_resource_ignore" + +ats_host = set(default("/clusterHostInfo/app_timeline_server_hosts", [])) +has_ats = len(ats_host) > 0 dfs_type = default("/commandParams/dfs_type", "") @@ -218,8 +222,17 @@ if stack_version_formatted and check_stack_feature(StackFeature.SPARK_LIVY, stac livy_livyserver_hosts = default("/clusterHostInfo/livy_server_hosts", []) + # ats 1.5 properties + entity_groupfs_active_dir = config['configurations']['yarn-site']['yarn.timeline-service.entity-group-fs-store.active-dir'] + entity_groupfs_active_dir_mode = 01777 + entity_groupfs_store_dir = config['configurations']['yarn-site']['yarn.timeline-service.entity-group-fs-store.done-dir'] + entity_groupfs_store_dir_mode = 0700 + is_webhdfs_enabled = hdfs_site['dfs.webhdfs.enabled'] + if len(livy_livyserver_hosts) > 0: has_livyserver = True + if security_enabled: + livy_principal = livy_kerberos_principal.replace('_HOST', config['hostname'].lower()) livy_livyserver_port = default('configurations/livy-conf/livy.server.port',8998) @@ -231,7 +244,7 @@ import functools HdfsResource = functools.partial( HdfsResource, user=hdfs_user, - hdfs_resource_ignore_file = "/var/lib/ambari-agent/data/.hdfs_resource_ignore", + hdfs_resource_ignore_file = hdfs_resource_ignore_file, security_enabled = security_enabled, keytab = hdfs_user_keytab, kinit_path_local = kinit_path_local, http://git-wip-us.apache.org/repos/asf/ambari/blob/a54c63c7/ambari-server/src/test/python/stacks/2.5/SPARK/test_spark_livy.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.5/SPARK/test_spark_livy.py b/ambari-server/src/test/python/stacks/2.5/SPARK/test_spark_livy.py new file mode 100644 index 0000000..b56474a --- /dev/null +++ b/ambari-server/src/test/python/stacks/2.5/SPARK/test_spark_livy.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python + +''' +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' +import json +from mock.mock import MagicMock, patch +from stacks.utils.RMFTestCase import * + +from only_for_platform import not_for_platform, PLATFORM_WINDOWS + +@not_for_platform(PLATFORM_WINDOWS) +@patch("resource_management.libraries.functions.get_stack_version", new=MagicMock(return_value="2.5.0.0-1597")) +class TestSparkClient(RMFTestCase): + COMMON_SERVICES_PACKAGE_DIR = "SPARK/1.2.1/package" + STACK_VERSION = "2.5" + DEFAULT_IMMUTABLE_PATHS = ['/apps/hive/warehouse', '/apps/falcon', '/mr-history/done', '/app-logs', '/tmp'] + def test_configure_default(self): + self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/livy_server.py", + classname = "LivyServer", + command = "start", + config_file="default.json", + stack_version = self.STACK_VERSION, + target = RMFTestCase.TARGET_COMMON_SERVICES + ) + self.assert_start_default() + self.assertNoMoreResources() + + def assert_start_default(self): + self.assertResourceCalled('Directory', '/var/run/livy', + owner = 'livy', + group = 'hadoop', + create_parents = True, + mode = 0775 + ) + self.assertResourceCalled('Directory', '/var/log/livy', + owner = 'livy', + group = 'hadoop', + create_parents = True, + mode = 0775 + ) + self.assertResourceCalled('HdfsResource', '/user/livy', + immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, + security_enabled = False, + hadoop_bin_dir = '/usr/hdp/current/hadoop-client/bin', + keytab = UnknownConfigurationMock(), + default_fs = 'hdfs://c6401.ambari.apache.org:8020', + hdfs_site = {u'a': u'b'}, + kinit_path_local = '/usr/bin/kinit', + principal_name = UnknownConfigurationMock(), + user = 'hdfs', + owner = 'livy', + hadoop_conf_dir = '/usr/hdp/current/hadoop-client/conf', + type = 'directory', + action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', + dfs_type = '', + mode = 0775, + ) + self.assertResourceCalled('HdfsResource', None, + immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, + security_enabled = False, + hadoop_bin_dir = '/usr/hdp/current/hadoop-client/bin', + keytab = UnknownConfigurationMock(), + default_fs = 'hdfs://c6401.ambari.apache.org:8020', + hdfs_site = {u'a': u'b'}, + kinit_path_local = '/usr/bin/kinit', + principal_name = UnknownConfigurationMock(), + user = 'hdfs', + action = ['execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', + dfs_type = '', + hadoop_conf_dir = '/usr/hdp/current/hadoop-client/conf', + ) + self.assertResourceCalled('File', '/usr/hdp/current/livy-server/conf/livy-env.sh', + content = InlineTemplate(self.getConfig()['configurations']['livy-env']['content']), + owner = 'livy', + group = 'livy', + mode = 0644, + ) + self.assertResourceCalled('PropertiesFile', '/usr/hdp/current/livy-server/conf/livy.conf', + owner = 'livy', + key_value_delimiter = ' ', + group = 'livy', + properties = self.getConfig()['configurations']['livy-conf'], + ) + self.assertResourceCalled('File', '/usr/hdp/current/livy-server/conf/log4j.properties', + content = '\n # Set everything to be logged to the console\n log4j.rootCategory=INFO, console\n log4j.appender.console=org.apache.log4j.ConsoleAppender\n log4j.appender.console.target=System.err\n log4j.appender.console.layout=org.apache.log4j.PatternLayout\n log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n\n\n log4j.logger.org.eclipse.jetty=WARN', + owner = 'livy', + group = 'livy', + mode = 0644, + ) + self.assertResourceCalled('File', '/usr/hdp/current/livy-server/conf/spark-blacklist.conf', + content = self.getConfig()['configurations']['livy-spark-blacklist']['content'], + owner = 'livy', + group = 'livy', + mode = 0644, + ) + self.assertResourceCalled('Directory', '/usr/hdp/current/livy-server/logs', + owner = 'livy', + group = 'livy', + mode = 0755, + ) + self.assertResourceCalled('Execute', '/usr/hdp/current/livy-server/bin/livy-server start', + environment = {'JAVA_HOME': '/usr/jdk64/jdk1.7.0_45'}, + not_if = 'ls /var/run/livy/livy-livy-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/livy/livy-livy-server.pid` >/dev/null 2>&1', + user = 'livy' + ) http://git-wip-us.apache.org/repos/asf/ambari/blob/a54c63c7/ambari-server/src/test/python/stacks/2.5/configs/default.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.5/configs/default.json b/ambari-server/src/test/python/stacks/2.5/configs/default.json index 148616b..a7e380a 100644 --- a/ambari-server/src/test/python/stacks/2.5/configs/default.json +++ b/ambari-server/src/test/python/stacks/2.5/configs/default.json @@ -171,6 +171,63 @@ "log.index.interval.bytes": "4096", "log.retention.hours": "168" }, + "spark-defaults": { + "spark.yarn.applicationMaster.waitTries": "10", + "spark.history.kerberos.keytab": "none", + "spark.yarn.preserve.staging.files": "false", + "spark.yarn.submit.file.replication": "3", + "spark.history.kerberos.principal": "none", + "spark.yarn.driver.memoryOverhead": "384", + "spark.yarn.queue": "default", + "spark.yarn.containerLauncherMaxThreads": "25", + "spark.yarn.scheduler.heartbeat.interval-ms": "5000", + "spark.history.ui.port": "18080", + "spark.yarn.max.executor.failures": "3", + "spark.driver.extraJavaOptions": "", + "spark.history.provider": "org.apache.spark.deploy.yarn.history.YarnHistoryProvider", + "spark.yarn.am.extraJavaOptions": "", + "spark.yarn.executor.memoryOverhead": "384" + }, + "spark-javaopts-properties": { + "content": " " + }, + "spark-log4j-properties": { + "content": "\n# Set everything to be logged to the console\nlog4j.rootCategory=INFO, console\nlog4j.appender.console=org.apache.log4j.ConsoleAppender\nlog4j.appender.console.target=System.err\nlog4j.appender.console.layout=org.apache.log4j.PatternLayout\nlog4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n\n\n# Settings to quiet third party logs that are too verbose\nlog4j.logger.org.eclipse.jetty=WARN\nlog4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR\nlog4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO\nlog4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO" + }, + "spark-env": { + "content": "\n#!/usr/bin/env bash\n\n# This file is sourced when running various Spark programs.\n# Copy it as spark-env.sh and edit that to configure Spark for your site.\n\n# Options read in YARN client mode\n#SPARK_EXECUTOR_INSTANCES=\"2\" #Number of workers to start (Default: 2)\n#SPARK_EXECUTOR_CORES=\"1\" #Number of cores for the workers (Default: 1).\n#SPARK_EXECUTOR_MEMORY=\"1G\" #Memory per Worker (e.g. 1000M, 2G) (Default: 1G)\n#SPARK_DRIVER_MEMORY=\"512 Mb\" #Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb)\n#SPARK_YARN_APP_NAME=\"spark\" #The name of your application (Default: Spark)\n#SPARK_YARN_QUEUE=\"~@~Xdefault~@~Y\" #The hadoop queue to use for allocation requests (Default: @~Xdefault~@~Y)\n#SPARK_YARN_DIST_FILES=\"\" #Comma separated list of files to be distributed with the job.\n#SPARK_YARN_DIST_ARCHIVES=\"\" #Comma separated list of archives to be distributed with the job.\n\n# Generic options for the daemons used in the standalone deploy mode\n\n# Alt ernate conf dir. (Default: ${SPARK_HOME}/conf)\nexport SPARK_CONF_DIR=${SPARK_HOME:-{{spark_home}}}/conf\n\n# Where log files are stored.(Default:${SPARK_HOME}/logs)\n#export SPARK_LOG_DIR=${SPARK_HOME:-{{spark_home}}}/logs\nexport SPARK_LOG_DIR={{spark_log_dir}}\n\n# Where the pid file is stored. (Default: /tmp)\nexport SPARK_PID_DIR={{spark_pid_dir}}\n\n# A string representing this instance of spark.(Default: $USER)\nSPARK_IDENT_STRING=$USER\n\n# The scheduling priority for daemons. (Default: 0)\nSPARK_NICENESS=0\n\nexport HADOOP_HOME=${HADOOP_HOME:-{{hadoop_home}}}\nexport HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-{{hadoop_conf_dir}}}\n\n# The java implementation to use.\nexport JAVA_HOME={{java_home}}\n\nif [ -d \"/etc/tez/conf/\" ]; then\n export TEZ_CONF_DIR=/etc/tez/conf\nelse\n export TEZ_CONF_DIR=\nfi", + "spark_pid_dir": "/var/run/spark", + "spark_log_dir": "/var/log/spark", + "spark_group": "spark", + "spark_user": "spark" + }, + "spark-metrics-properties": { + "content": "\n# syntax: [instance].sink|source.[name].[options]=[value]\n\n# This file configures Spark's internal metrics system. The metrics system is\n# divided into instances which correspond to internal components.\n# Each instance can be configured to report its metrics to one or more sinks.\n# Accepted values for [instance] are \"master\", \"worker\", \"executor\", \"driver\",\n# and \"applications\". A wild card \"*\" can be used as an instance name, in\n# which case all instances will inherit the supplied property.\n#\n# Within an instance, a \"source\" specifies a particular set of grouped metrics.\n# there are two kinds of sources:\n# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will\n# collect a Spark component's internal state. Each instance is paired with a\n# Spark source that is added automatically.\n# 2. Common sources, like JvmSource, which will collect low level state.\n# These can be added through configuration options and are then loaded\n# using reflection.\n#\n# A \"sink\" specifies where metrics are delivered to. Each instance can be\n# assigned one or more sinks.\n#\n# The sink|source field specifies whether the property relates to a sink or\n# source.\n#\n# The [name] field specifies the name of source or sink.\n#\n# The [options] field is the specific property of this source or sink. The\n# source or sink is responsible for parsing this property.\n#\n# Notes:\n# 1. To add a new sink, set the \"class\" option to a fully qualified class\n# name (see examples below).\n# 2. Some sinks involve a polling period. The minimum allowed polling period\n# is 1 second.\n# 3. Wild card properties can be overridden by more specific properties.\n# For example, master.sink.console.period takes precedence over\n# *.sink.console.period.\n# 4. A metrics specific configuration\n# \"spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties\" should be\n# added to Java properties using -Dspark.metrics.conf=xxx if you want to\ n# customize metrics system. You can also put the file in ${SPARK_HOME}/conf\n# and it will be loaded automatically.\n# 5. MetricsServlet is added by default as a sink in master, worker and client\n# driver, you can send http request \"/metrics/json\" to get a snapshot of all the\n# registered metrics in json format. For master, requests \"/metrics/master/json\" and\n# \"/metrics/applications/json\" can be sent seperately to get metrics snapshot of\n# instance master and applications. MetricsServlet may not be configured by self.\n#\n\n## List of available sinks and their properties.\n\n# org.apache.spark.metrics.sink.ConsoleSink\n# Name: Default: Description:\n# period 10 Poll period\n# unit seconds Units of poll period\n\n# org.apache.spark.metrics.sink.CSVSink\n# Name: Default: Description:\n# period 10 Poll period\n# unit seconds Units of poll period\n# directory /tmp Where to store CSV files\n\n# org.apache.spark.metrics.sink.GangliaSink\n# Name: Default: Description:\n# host N ONE Hostname or multicast group of Ganglia server\n# port NONE Port of Ganglia server(s)\n# period 10 Poll period\n# unit seconds Units of poll period\n# ttl 1 TTL of messages sent by Ganglia\n# mode multicast Ganglia network mode ('unicast' or 'multicast')\n\n# org.apache.spark.metrics.sink.JmxSink\n\n# org.apache.spark.metrics.sink.MetricsServlet\n# Name: Default: Description:\n# path VARIES* Path prefix from the web server root\n# sample false Whether to show entire set of samples for histograms ('false' or 'true')\n#\n# * Default path is /metrics/json for all instances except the master. The master has two paths:\n# /metrics/aplications/json # App information\n# /metrics/master/json # Master information\n\n# org.apache.spark.metrics.sink.GraphiteSink\n# Name: Default: Description:\n# host NONE Hostname of Graphite server\n# port NONE Port of Graphite server\n# period 10 Poll period\n# unit seconds Units of poll period\n# prefix EMPTY STRING Prefix to prepend to metric name\n\n## Examples\n# Enable JmxSink for all instances by class name\n#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink\n\n# Enable ConsoleSink for all instances by class name\n#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink\n\n# Polling period for ConsoleSink\n#*.sink.console.period=10\n\n#*.sink.console.unit=seconds\n\n# Master instance overlap polling period\n#master.sink.console.period=15\n\n#master.sink.console.unit=seconds\n\n# Enable CsvSink for all instances\n#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink\n\n# Polling period for CsvSink\n#*.sink.csv.period=1\n\n#*.sink.csv.unit=minutes\n\n# Polling directory for CsvSink\n#*.sink.csv.directory=/tmp/\n\n# Worker instance overlap polling period\n#worker.sink.csv.period=10\n\n#worker.sink.csv.unit=minutes\n\n# Enable jvm source for instance master, worker, driver and executor\n#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource\n\n#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSo urce\n\n#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource\n\n#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource" + }, + "spark-metrics-properties": { + "content": "\n# syntax: [instance].sink|source.[name].[options]=[value]\n\n# This file configures Spark's internal metrics system. The metrics system is\n# divided into instances which correspond to internal components.\n# Each instance can be configured to report its metrics to one or more sinks.\n# Accepted values for [instance] are \"master\", \"worker\", \"executor\", \"driver\",\n# and \"applications\". A wild card \"*\" can be used as an instance name, in\n# which case all instances will inherit the supplied property.\n#\n# Within an instance, a \"source\" specifies a particular set of grouped metrics.\n# there are two kinds of sources:\n# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will\n# collect a Spark component's internal state. Each instance is paired with a\n# Spark source that is added automatically.\n# 2. Common sources, like JvmSource, which will collect low level state.\n# These can be added through configuration options and are then loaded\n# using reflection.\n#\n# A \"sink\" specifies where metrics are delivered to. Each instance can be\n# assigned one or more sinks.\n#\n# The sink|source field specifies whether the property relates to a sink or\n# source.\n#\n# The [name] field specifies the name of source or sink.\n#\n# The [options] field is the specific property of this source or sink. The\n# source or sink is responsible for parsing this property.\n#\n# Notes:\n# 1. To add a new sink, set the \"class\" option to a fully qualified class\n# name (see examples below).\n# 2. Some sinks involve a polling period. The minimum allowed polling period\n# is 1 second.\n# 3. Wild card properties can be overridden by more specific properties.\n# For example, master.sink.console.period takes precedence over\n# *.sink.console.period.\n# 4. A metrics specific configuration\n# \"spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties\" should be\n# added to Java properties using -Dspark.metrics.conf=xxx if you want to\ n# customize metrics system. You can also put the file in ${SPARK_HOME}/conf\n# and it will be loaded automatically.\n# 5. MetricsServlet is added by default as a sink in master, worker and client\n# driver, you can send http request \"/metrics/json\" to get a snapshot of all the\n# registered metrics in json format. For master, requests \"/metrics/master/json\" and\n# \"/metrics/applications/json\" can be sent seperately to get metrics snapshot of\n# instance master and applications. MetricsServlet may not be configured by self.\n#\n\n## List of available sinks and their properties.\n\n# org.apache.spark.metrics.sink.ConsoleSink\n# Name: Default: Description:\n# period 10 Poll period\n# unit seconds Units of poll period\n\n# org.apache.spark.metrics.sink.CSVSink\n# Name: Default: Description:\n# period 10 Poll period\n# unit seconds Units of poll period\n# directory /tmp Where to store CSV files\n\n# org.apache.spark.metrics.sink.GangliaSink\n# Name: Default: Description:\n# host N ONE Hostname or multicast group of Ganglia server\n# port NONE Port of Ganglia server(s)\n# period 10 Poll period\n# unit seconds Units of poll period\n# ttl 1 TTL of messages sent by Ganglia\n# mode multicast Ganglia network mode ('unicast' or 'multicast')\n\n# org.apache.spark.metrics.sink.JmxSink\n\n# org.apache.spark.metrics.sink.MetricsServlet\n# Name: Default: Description:\n# path VARIES* Path prefix from the web server root\n# sample false Whether to show entire set of samples for histograms ('false' or 'true')\n#\n# * Default path is /metrics/json for all instances except the master. The master has two paths:\n# /metrics/aplications/json # App information\n# /metrics/master/json # Master information\n\n# org.apache.spark.metrics.sink.GraphiteSink\n# Name: Default: Description:\n# host NONE Hostname of Graphite server\n# port NONE Port of Graphite server\n# period 10 Poll period\n# unit seconds Units of poll period\n# prefix EMPTY STRING Prefix to prepend to metric name\n\n## Examples\n# Enable JmxSink for all instances by class name\n#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink\n\n# Enable ConsoleSink for all instances by class name\n#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink\n\n# Polling period for ConsoleSink\n#*.sink.console.period=10\n\n#*.sink.console.unit=seconds\n\n# Master instance overlap polling period\n#master.sink.console.period=15\n\n#master.sink.console.unit=seconds\n\n# Enable CsvSink for all instances\n#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink\n\n# Polling period for CsvSink\n#*.sink.csv.period=1\n\n#*.sink.csv.unit=minutes\n\n# Polling directory for CsvSink\n#*.sink.csv.directory=/tmp/\n\n# Worker instance overlap polling period\n#worker.sink.csv.period=10\n\n#worker.sink.csv.unit=minutes\n\n# Enable jvm source for instance master, worker, driver and executor\n#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource\n\n#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSo urce\n\n#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource\n\n#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource" + }, + "livy-log4j-properties": { + "content": "\n # Set everything to be logged to the console\n log4j.rootCategory=INFO, console\n log4j.appender.console=org.apache.log4j.ConsoleAppender\n log4j.appender.console.target=System.err\n log4j.appender.console.layout=org.apache.log4j.PatternLayout\n log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n\n\n log4j.logger.org.eclipse.jetty=WARN" + }, + "livy-conf": { + "livy.server.port": "8998", + "livy.server.csrf_protection.enabled": "true", + "livy.environment": "production", + "livy.impersonation.enabled": "true", + "livy.server.session.timeout": "3600000" + }, + "livy-spark-blacklist": { + "content": "\n #\n # Configuration override / blacklist. Defines a list of properties that users are not allowed\n # to override when starting Spark sessions.\n #\n # This file takes a list of property names (one per line). Empty lines and lines starting with \"#\"\n # are ignored.\n #" + }, + "livy-env": { + "livy_group": "livy", + "spark_home": "/usr/hdp/current/spark-client", + "content": "\n #!/usr/bin/env bash\n\n # - SPARK_HOME Spark which you would like to use in livy\n # - HADOOP_CONF_DIR Directory containing the Hadoop / YARN configuration to use.\n # - LIVY_LOG_DIR Where log files are stored. (Default: ${LIVY_HOME}/logs)\n # - LIVY_PID_DIR Where the pid file is stored. (Default: /tmp)\n # - LIVY_SERVER_JAVA_OPTS Java Opts for running livy server (You can set jvm related setting here, like jvm memory/gc algorithm and etc.)\n export SPARK_HOME=/usr/hdp/current/spark-client\n export HADOOP_CONF_DIR=/etc/hadoop/conf\n export LIVY_LOG_DIR={{livy_log_dir}}\n export LIVY_PID_DIR={{livy_pid_dir}}\n export LIVY_SERVER_JAVA_OPTS=\"-Xmx2g\"", + "livy_pid_dir": "/var/run/livy", + "livy_log_dir": "/var/log/livy", + "livy_user": "livy" + }, "logsearch-solr-env": { "logsearch_solr_znode": "/logsearch", "logsearch_solr_user": "solr",
