Repository: ambari
Updated Branches:
  refs/heads/branch-2.4 60e6e44cd -> a54c63c71


AMBARI-17814 Spark Livy should wait for ATS start for BP deployments (dsen)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/a54c63c7
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/a54c63c7
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/a54c63c7

Branch: refs/heads/branch-2.4
Commit: a54c63c71a218100e34f94ef542435e7ac3d4af6
Parents: 60e6e44
Author: Dmytro Sen <[email protected]>
Authored: Fri Jul 22 18:03:40 2016 +0300
Committer: Dmytro Sen <[email protected]>
Committed: Fri Jul 22 18:04:55 2016 +0300

----------------------------------------------------------------------
 .../SPARK/1.2.1/package/scripts/livy_server.py  |  61 ++++++++++
 .../SPARK/1.2.1/package/scripts/params.py       |  15 ++-
 .../python/stacks/2.5/SPARK/test_spark_livy.py  | 120 +++++++++++++++++++
 .../test/python/stacks/2.5/configs/default.json |  57 +++++++++
 4 files changed, 252 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/a54c63c7/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/livy_server.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/livy_server.py
 
b/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/livy_server.py
index 1e859a8..462836a 100644
--- 
a/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/livy_server.py
+++ 
b/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/livy_server.py
@@ -22,6 +22,15 @@ from resource_management.libraries.script.script import 
Script
 from resource_management.libraries.functions.check_process_status import 
check_process_status
 from resource_management.libraries.functions.stack_features import 
check_stack_feature
 from resource_management.libraries.functions.constants import StackFeature
+from resource_management.core.exceptions import Fail
+from resource_management.core.resources.system import Execute
+from resource_management.libraries.providers.hdfs_resource import WebHDFSUtil
+from resource_management.libraries.providers.hdfs_resource import 
HdfsResourceProvider
+from resource_management import is_empty
+from resource_management import shell
+from resource_management.libraries.functions.decorator import retry
+from resource_management.core.logger import Logger
+from resource_management.libraries.functions.format import format
 
 from livy_service import livy_service
 from setup_livy import setup_livy
@@ -44,6 +53,10 @@ class LivyServer(Script):
     import params
     env.set_params(params)
 
+    if params.has_ats and params.has_livyserver:
+      Logger.info("Verifying DFS directories where ATS stores time line data 
for active and completed applications.")
+      self.wait_for_dfs_directories_created([params.entity_groupfs_store_dir, 
params.entity_groupfs_active_dir])
+
     self.configure(env)
     livy_service('server', upgrade_type=upgrade_type, action='start')
 
@@ -59,6 +72,54 @@ class LivyServer(Script):
 
     check_process_status(status_params.livy_server_pid_file)
 
+  #  TODO move out and compose with similar method in resourcemanager.py
+  def wait_for_dfs_directories_created(self, dirs):
+    import params
+
+    ignored_dfs_dirs = 
HdfsResourceProvider.get_ignored_resources_list(params.hdfs_resource_ignore_file)
+
+    if params.security_enabled:
+      Execute(format("{kinit_path_local} -kt {livy_kerberos_keytab} 
{livy_principal}"),
+              user=params.livy_user
+              )
+      Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} 
{hdfs_principal_name}"),
+              user=params.hdfs_user
+              )
+
+    for dir_path in dirs:
+      self.wait_for_dfs_directory_created(dir_path, ignored_dfs_dirs)
+
+
+  @retry(times=8, sleep_time=20, backoff_factor=1, err_class=Fail)
+  def wait_for_dfs_directory_created(self, dir_path, ignored_dfs_dirs):
+    import params
+
+
+    if not is_empty(dir_path):
+      dir_path = HdfsResourceProvider.parse_path(dir_path)
+
+      if dir_path in ignored_dfs_dirs:
+        Logger.info("Skipping DFS directory '" + dir_path + "' as it's marked 
to be ignored.")
+        return
+
+      Logger.info("Verifying if DFS directory '" + dir_path + "' exists.")
+
+      dir_exists = None
+
+      if WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, 
params.default_fs):
+        # check with webhdfs is much faster than executing hdfs dfs -test
+        util = WebHDFSUtil(params.hdfs_site, params.hdfs_user, 
params.security_enabled)
+        list_status = util.run_command(dir_path, 'GETFILESTATUS', 
method='GET', ignore_status_codes=['404'], assertable_result=False)
+        dir_exists = ('FileStatus' in list_status)
+      else:
+        # have to do time expensive hdfs dfs -d check.
+        dfs_ret_code = shell.call(format("hdfs --config {hadoop_conf_dir} dfs 
-test -d " + dir_path), user=params.livy_user)[0]
+        dir_exists = not dfs_ret_code #dfs -test -d returns 0 in case the dir 
exists
+
+      if not dir_exists:
+        raise Fail("DFS directory '" + dir_path + "' does not exist !")
+      else:
+        Logger.info("DFS directory '" + dir_path + "' exists.")
 
   def get_component_name(self):
     return "livy-server"

http://git-wip-us.apache.org/repos/asf/ambari/blob/a54c63c7/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/params.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/params.py
 
b/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/params.py
index 8d4331b..a86f1d3 100644
--- 
a/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/params.py
+++ 
b/ambari-server/src/main/resources/common-services/SPARK/1.2.1/package/scripts/params.py
@@ -185,6 +185,10 @@ if has_spark_thriftserver and 'spark-thrift-sparkconf' in 
config['configurations
 
 default_fs = config['configurations']['core-site']['fs.defaultFS']
 hdfs_site = config['configurations']['hdfs-site']
+hdfs_resource_ignore_file = "/var/lib/ambari-agent/data/.hdfs_resource_ignore"
+
+ats_host = set(default("/clusterHostInfo/app_timeline_server_hosts", []))
+has_ats = len(ats_host) > 0
 
 dfs_type = default("/commandParams/dfs_type", "")
 
@@ -218,8 +222,17 @@ if stack_version_formatted and 
check_stack_feature(StackFeature.SPARK_LIVY, stac
 
   livy_livyserver_hosts = default("/clusterHostInfo/livy_server_hosts", [])
 
+  # ats 1.5 properties
+  entity_groupfs_active_dir = 
config['configurations']['yarn-site']['yarn.timeline-service.entity-group-fs-store.active-dir']
+  entity_groupfs_active_dir_mode = 01777
+  entity_groupfs_store_dir = 
config['configurations']['yarn-site']['yarn.timeline-service.entity-group-fs-store.done-dir']
+  entity_groupfs_store_dir_mode = 0700
+  is_webhdfs_enabled = hdfs_site['dfs.webhdfs.enabled']
+
   if len(livy_livyserver_hosts) > 0:
     has_livyserver = True
+    if security_enabled:
+      livy_principal = livy_kerberos_principal.replace('_HOST', 
config['hostname'].lower())
 
   livy_livyserver_port = 
default('configurations/livy-conf/livy.server.port',8998)
 
@@ -231,7 +244,7 @@ import functools
 HdfsResource = functools.partial(
   HdfsResource,
   user=hdfs_user,
-  hdfs_resource_ignore_file = 
"/var/lib/ambari-agent/data/.hdfs_resource_ignore",
+  hdfs_resource_ignore_file = hdfs_resource_ignore_file,
   security_enabled = security_enabled,
   keytab = hdfs_user_keytab,
   kinit_path_local = kinit_path_local,

http://git-wip-us.apache.org/repos/asf/ambari/blob/a54c63c7/ambari-server/src/test/python/stacks/2.5/SPARK/test_spark_livy.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.5/SPARK/test_spark_livy.py 
b/ambari-server/src/test/python/stacks/2.5/SPARK/test_spark_livy.py
new file mode 100644
index 0000000..b56474a
--- /dev/null
+++ b/ambari-server/src/test/python/stacks/2.5/SPARK/test_spark_livy.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import json
+from mock.mock import MagicMock, patch
+from stacks.utils.RMFTestCase import *
+
+from only_for_platform import not_for_platform, PLATFORM_WINDOWS
+
+@not_for_platform(PLATFORM_WINDOWS)
+@patch("resource_management.libraries.functions.get_stack_version", 
new=MagicMock(return_value="2.5.0.0-1597"))
+class TestSparkClient(RMFTestCase):
+    COMMON_SERVICES_PACKAGE_DIR = "SPARK/1.2.1/package"
+    STACK_VERSION = "2.5"
+    DEFAULT_IMMUTABLE_PATHS = ['/apps/hive/warehouse', '/apps/falcon', 
'/mr-history/done', '/app-logs', '/tmp']
+    def test_configure_default(self):
+        self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + 
"/scripts/livy_server.py",
+                           classname = "LivyServer",
+                           command = "start",
+                           config_file="default.json",
+                           stack_version = self.STACK_VERSION,
+                           target = RMFTestCase.TARGET_COMMON_SERVICES
+                           )
+        self.assert_start_default()
+        self.assertNoMoreResources()
+
+    def assert_start_default(self):
+        self.assertResourceCalled('Directory', '/var/run/livy',
+                                  owner = 'livy',
+                                  group = 'hadoop',
+                                  create_parents = True,
+                                  mode = 0775
+                                  )
+        self.assertResourceCalled('Directory', '/var/log/livy',
+                                  owner = 'livy',
+                                  group = 'hadoop',
+                                  create_parents = True,
+                                  mode = 0775
+                                  )
+        self.assertResourceCalled('HdfsResource', '/user/livy',
+                                  immutable_paths = 
self.DEFAULT_IMMUTABLE_PATHS,
+                                  security_enabled = False,
+                                  hadoop_bin_dir = 
'/usr/hdp/current/hadoop-client/bin',
+                                  keytab = UnknownConfigurationMock(),
+                                  default_fs = 
'hdfs://c6401.ambari.apache.org:8020',
+                                  hdfs_site = {u'a': u'b'},
+                                  kinit_path_local = '/usr/bin/kinit',
+                                  principal_name = UnknownConfigurationMock(),
+                                  user = 'hdfs',
+                                  owner = 'livy',
+                                  hadoop_conf_dir = 
'/usr/hdp/current/hadoop-client/conf',
+                                  type = 'directory',
+                                  action = ['create_on_execute'], 
hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore',
+                                  dfs_type = '',
+                                  mode = 0775,
+                                  )
+        self.assertResourceCalled('HdfsResource', None,
+                                  immutable_paths = 
self.DEFAULT_IMMUTABLE_PATHS,
+                                  security_enabled = False,
+                                  hadoop_bin_dir = 
'/usr/hdp/current/hadoop-client/bin',
+                                  keytab = UnknownConfigurationMock(),
+                                  default_fs = 
'hdfs://c6401.ambari.apache.org:8020',
+                                  hdfs_site = {u'a': u'b'},
+                                  kinit_path_local = '/usr/bin/kinit',
+                                  principal_name = UnknownConfigurationMock(),
+                                  user = 'hdfs',
+                                  action = ['execute'], 
hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore',
+                                  dfs_type = '',
+                                  hadoop_conf_dir = 
'/usr/hdp/current/hadoop-client/conf',
+                                  )
+        self.assertResourceCalled('File', 
'/usr/hdp/current/livy-server/conf/livy-env.sh',
+                                  content = 
InlineTemplate(self.getConfig()['configurations']['livy-env']['content']),
+                                  owner = 'livy',
+                                  group = 'livy',
+                                  mode = 0644,
+                                  )
+        self.assertResourceCalled('PropertiesFile', 
'/usr/hdp/current/livy-server/conf/livy.conf',
+                                  owner = 'livy',
+                                  key_value_delimiter = ' ',
+                                  group = 'livy',
+                                  properties = 
self.getConfig()['configurations']['livy-conf'],
+                                  )
+        self.assertResourceCalled('File', 
'/usr/hdp/current/livy-server/conf/log4j.properties',
+                                  content = '\n            # Set everything to 
be logged to the console\n            log4j.rootCategory=INFO, console\n        
    log4j.appender.console=org.apache.log4j.ConsoleAppender\n            
log4j.appender.console.target=System.err\n            
log4j.appender.console.layout=org.apache.log4j.PatternLayout\n            
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: 
%m%n\n\n            log4j.logger.org.eclipse.jetty=WARN',
+                                  owner = 'livy',
+                                  group = 'livy',
+                                  mode = 0644,
+                                  )
+        self.assertResourceCalled('File', 
'/usr/hdp/current/livy-server/conf/spark-blacklist.conf',
+                                  content = 
self.getConfig()['configurations']['livy-spark-blacklist']['content'],
+                                  owner = 'livy',
+                                  group = 'livy',
+                                  mode = 0644,
+                                  )
+        self.assertResourceCalled('Directory', 
'/usr/hdp/current/livy-server/logs',
+                                  owner = 'livy',
+                                  group = 'livy',
+                                  mode = 0755,
+                                  )
+        self.assertResourceCalled('Execute', 
'/usr/hdp/current/livy-server/bin/livy-server start',
+                                  environment = {'JAVA_HOME': 
'/usr/jdk64/jdk1.7.0_45'},
+                                  not_if = 'ls 
/var/run/livy/livy-livy-server.pid >/dev/null 2>&1 && ps -p `cat 
/var/run/livy/livy-livy-server.pid` >/dev/null 2>&1',
+                                  user = 'livy'
+                                  )

http://git-wip-us.apache.org/repos/asf/ambari/blob/a54c63c7/ambari-server/src/test/python/stacks/2.5/configs/default.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.5/configs/default.json 
b/ambari-server/src/test/python/stacks/2.5/configs/default.json
index 148616b..a7e380a 100644
--- a/ambari-server/src/test/python/stacks/2.5/configs/default.json
+++ b/ambari-server/src/test/python/stacks/2.5/configs/default.json
@@ -171,6 +171,63 @@
       "log.index.interval.bytes": "4096",
       "log.retention.hours": "168"
     },
+    "spark-defaults": {
+      "spark.yarn.applicationMaster.waitTries": "10",
+      "spark.history.kerberos.keytab": "none",
+      "spark.yarn.preserve.staging.files": "false",
+      "spark.yarn.submit.file.replication": "3",
+      "spark.history.kerberos.principal": "none",
+      "spark.yarn.driver.memoryOverhead": "384",
+      "spark.yarn.queue": "default",
+      "spark.yarn.containerLauncherMaxThreads": "25",
+      "spark.yarn.scheduler.heartbeat.interval-ms": "5000",
+      "spark.history.ui.port": "18080",
+      "spark.yarn.max.executor.failures": "3",
+      "spark.driver.extraJavaOptions": "",
+      "spark.history.provider": 
"org.apache.spark.deploy.yarn.history.YarnHistoryProvider",
+      "spark.yarn.am.extraJavaOptions": "",
+      "spark.yarn.executor.memoryOverhead": "384"
+    },
+    "spark-javaopts-properties": {
+      "content": " "
+    },
+    "spark-log4j-properties": {
+      "content": "\n# Set everything to be logged to the 
console\nlog4j.rootCategory=INFO, 
console\nlog4j.appender.console=org.apache.log4j.ConsoleAppender\nlog4j.appender.console.target=System.err\nlog4j.appender.console.layout=org.apache.log4j.PatternLayout\nlog4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd
 HH:mm:ss} %p %c{1}: %m%n\n\n# Settings to quiet third party logs that are too 
verbose\nlog4j.logger.org.eclipse.jetty=WARN\nlog4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR\nlog4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO\nlog4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO"
+    },
+    "spark-env": {
+      "content": "\n#!/usr/bin/env bash\n\n# This file is sourced when running 
various Spark programs.\n# Copy it as spark-env.sh and edit that to configure 
Spark for your site.\n\n# Options read in YARN client 
mode\n#SPARK_EXECUTOR_INSTANCES=\"2\" #Number of workers to start (Default: 
2)\n#SPARK_EXECUTOR_CORES=\"1\" #Number of cores for the workers (Default: 
1).\n#SPARK_EXECUTOR_MEMORY=\"1G\" #Memory per Worker (e.g. 1000M, 2G) 
(Default: 1G)\n#SPARK_DRIVER_MEMORY=\"512 Mb\" #Memory for Master (e.g. 1000M, 
2G) (Default: 512 Mb)\n#SPARK_YARN_APP_NAME=\"spark\" #The name of your 
application (Default: Spark)\n#SPARK_YARN_QUEUE=\"~@~Xdefault~@~Y\" #The hadoop 
queue to use for allocation requests (Default: 
@~Xdefault~@~Y)\n#SPARK_YARN_DIST_FILES=\"\" #Comma separated list of files to 
be distributed with the job.\n#SPARK_YARN_DIST_ARCHIVES=\"\" #Comma separated 
list of archives to be distributed with the job.\n\n# Generic options for the 
daemons used in the standalone deploy mode\n\n# Alt
 ernate conf dir. (Default: ${SPARK_HOME}/conf)\nexport 
SPARK_CONF_DIR=${SPARK_HOME:-{{spark_home}}}/conf\n\n# Where log files are 
stored.(Default:${SPARK_HOME}/logs)\n#export 
SPARK_LOG_DIR=${SPARK_HOME:-{{spark_home}}}/logs\nexport 
SPARK_LOG_DIR={{spark_log_dir}}\n\n# Where the pid file is stored. (Default: 
/tmp)\nexport SPARK_PID_DIR={{spark_pid_dir}}\n\n# A string representing this 
instance of spark.(Default: $USER)\nSPARK_IDENT_STRING=$USER\n\n# The 
scheduling priority for daemons. (Default: 0)\nSPARK_NICENESS=0\n\nexport 
HADOOP_HOME=${HADOOP_HOME:-{{hadoop_home}}}\nexport 
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-{{hadoop_conf_dir}}}\n\n# The java 
implementation to use.\nexport JAVA_HOME={{java_home}}\n\nif [ -d 
\"/etc/tez/conf/\" ]; then\n  export TEZ_CONF_DIR=/etc/tez/conf\nelse\n  export 
TEZ_CONF_DIR=\nfi",
+      "spark_pid_dir": "/var/run/spark",
+      "spark_log_dir": "/var/log/spark",
+      "spark_group": "spark",
+      "spark_user": "spark"
+    },
+    "spark-metrics-properties": {
+      "content": "\n# syntax: 
[instance].sink|source.[name].[options]=[value]\n\n# This file configures 
Spark's internal metrics system. The metrics system is\n# divided into 
instances which correspond to internal components.\n# Each instance can be 
configured to report its metrics to one or more sinks.\n# Accepted values for 
[instance] are \"master\", \"worker\", \"executor\", \"driver\",\n# and 
\"applications\". A wild card \"*\" can be used as an instance name, in\n# 
which case all instances will inherit the supplied property.\n#\n# Within an 
instance, a \"source\" specifies a particular set of grouped metrics.\n# there 
are two kinds of sources:\n# 1. Spark internal sources, like MasterSource, 
WorkerSource, etc, which will\n# collect a Spark component's internal state. 
Each instance is paired with a\n# Spark source that is added automatically.\n# 
2. Common sources, like JvmSource, which will collect low level state.\n# These 
can be added through configuration options and are then
  loaded\n# using reflection.\n#\n# A \"sink\" specifies where metrics are 
delivered to. Each instance can be\n# assigned one or more sinks.\n#\n# The 
sink|source field specifies whether the property relates to a sink or\n# 
source.\n#\n# The [name] field specifies the name of source or sink.\n#\n# The 
[options] field is the specific property of this source or sink. The\n# source 
or sink is responsible for parsing this property.\n#\n# Notes:\n# 1. To add a 
new sink, set the \"class\" option to a fully qualified class\n# name (see 
examples below).\n# 2. Some sinks involve a polling period. The minimum allowed 
polling period\n# is 1 second.\n# 3. Wild card properties can be overridden by 
more specific properties.\n# For example, master.sink.console.period takes 
precedence over\n# *.sink.console.period.\n# 4. A metrics specific 
configuration\n# \"spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties\" 
should be\n# added to Java properties using -Dspark.metrics.conf=xxx if you 
want to\
 n# customize metrics system. You can also put the file in 
${SPARK_HOME}/conf\n# and it will be loaded automatically.\n# 5. MetricsServlet 
is added by default as a sink in master, worker and client\n# driver, you can 
send http request \"/metrics/json\" to get a snapshot of all the\n# registered 
metrics in json format. For master, requests \"/metrics/master/json\" and\n# 
\"/metrics/applications/json\" can be sent seperately to get metrics snapshot 
of\n# instance master and applications. MetricsServlet may not be configured by 
self.\n#\n\n## List of available sinks and their properties.\n\n# 
org.apache.spark.metrics.sink.ConsoleSink\n# Name: Default: Description:\n# 
period 10 Poll period\n# unit seconds Units of poll period\n\n# 
org.apache.spark.metrics.sink.CSVSink\n# Name: Default: Description:\n# period 
10 Poll period\n# unit seconds Units of poll period\n# directory /tmp Where to 
store CSV files\n\n# org.apache.spark.metrics.sink.GangliaSink\n# Name: 
Default: Description:\n# host N
 ONE Hostname or multicast group of Ganglia server\n# port NONE Port of Ganglia 
server(s)\n# period 10 Poll period\n# unit seconds Units of poll period\n# ttl 
1 TTL of messages sent by Ganglia\n# mode multicast Ganglia network mode 
('unicast' or 'multicast')\n\n# org.apache.spark.metrics.sink.JmxSink\n\n# 
org.apache.spark.metrics.sink.MetricsServlet\n# Name: Default: Description:\n# 
path VARIES* Path prefix from the web server root\n# sample false Whether to 
show entire set of samples for histograms ('false' or 'true')\n#\n# * Default 
path is /metrics/json for all instances except the master. The master has two 
paths:\n# /metrics/aplications/json # App information\n# /metrics/master/json # 
Master information\n\n# org.apache.spark.metrics.sink.GraphiteSink\n# Name: 
Default: Description:\n# host NONE Hostname of Graphite server\n# port NONE 
Port of Graphite server\n# period 10 Poll period\n# unit seconds Units of poll 
period\n# prefix EMPTY STRING Prefix to prepend to metric name\n\n##
  Examples\n# Enable JmxSink for all instances by class 
name\n#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink\n\n# Enable 
ConsoleSink for all instances by class 
name\n#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink\n\n# 
Polling period for 
ConsoleSink\n#*.sink.console.period=10\n\n#*.sink.console.unit=seconds\n\n# 
Master instance overlap polling 
period\n#master.sink.console.period=15\n\n#master.sink.console.unit=seconds\n\n#
 Enable CsvSink for all 
instances\n#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink\n\n# Polling 
period for CsvSink\n#*.sink.csv.period=1\n\n#*.sink.csv.unit=minutes\n\n# 
Polling directory for CsvSink\n#*.sink.csv.directory=/tmp/\n\n# Worker instance 
overlap polling 
period\n#worker.sink.csv.period=10\n\n#worker.sink.csv.unit=minutes\n\n# Enable 
jvm source for instance master, worker, driver and 
executor\n#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource\n\n#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSo
 
urce\n\n#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource\n\n#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource"
+    },
+    "spark-metrics-properties": {
+      "content": "\n# syntax: 
[instance].sink|source.[name].[options]=[value]\n\n# This file configures 
Spark's internal metrics system. The metrics system is\n# divided into 
instances which correspond to internal components.\n# Each instance can be 
configured to report its metrics to one or more sinks.\n# Accepted values for 
[instance] are \"master\", \"worker\", \"executor\", \"driver\",\n# and 
\"applications\". A wild card \"*\" can be used as an instance name, in\n# 
which case all instances will inherit the supplied property.\n#\n# Within an 
instance, a \"source\" specifies a particular set of grouped metrics.\n# there 
are two kinds of sources:\n# 1. Spark internal sources, like MasterSource, 
WorkerSource, etc, which will\n# collect a Spark component's internal state. 
Each instance is paired with a\n# Spark source that is added automatically.\n# 
2. Common sources, like JvmSource, which will collect low level state.\n# These 
can be added through configuration options and are then
  loaded\n# using reflection.\n#\n# A \"sink\" specifies where metrics are 
delivered to. Each instance can be\n# assigned one or more sinks.\n#\n# The 
sink|source field specifies whether the property relates to a sink or\n# 
source.\n#\n# The [name] field specifies the name of source or sink.\n#\n# The 
[options] field is the specific property of this source or sink. The\n# source 
or sink is responsible for parsing this property.\n#\n# Notes:\n# 1. To add a 
new sink, set the \"class\" option to a fully qualified class\n# name (see 
examples below).\n# 2. Some sinks involve a polling period. The minimum allowed 
polling period\n# is 1 second.\n# 3. Wild card properties can be overridden by 
more specific properties.\n# For example, master.sink.console.period takes 
precedence over\n# *.sink.console.period.\n# 4. A metrics specific 
configuration\n# \"spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties\" 
should be\n# added to Java properties using -Dspark.metrics.conf=xxx if you 
want to\
 n# customize metrics system. You can also put the file in 
${SPARK_HOME}/conf\n# and it will be loaded automatically.\n# 5. MetricsServlet 
is added by default as a sink in master, worker and client\n# driver, you can 
send http request \"/metrics/json\" to get a snapshot of all the\n# registered 
metrics in json format. For master, requests \"/metrics/master/json\" and\n# 
\"/metrics/applications/json\" can be sent seperately to get metrics snapshot 
of\n# instance master and applications. MetricsServlet may not be configured by 
self.\n#\n\n## List of available sinks and their properties.\n\n# 
org.apache.spark.metrics.sink.ConsoleSink\n# Name: Default: Description:\n# 
period 10 Poll period\n# unit seconds Units of poll period\n\n# 
org.apache.spark.metrics.sink.CSVSink\n# Name: Default: Description:\n# period 
10 Poll period\n# unit seconds Units of poll period\n# directory /tmp Where to 
store CSV files\n\n# org.apache.spark.metrics.sink.GangliaSink\n# Name: 
Default: Description:\n# host N
 ONE Hostname or multicast group of Ganglia server\n# port NONE Port of Ganglia 
server(s)\n# period 10 Poll period\n# unit seconds Units of poll period\n# ttl 
1 TTL of messages sent by Ganglia\n# mode multicast Ganglia network mode 
('unicast' or 'multicast')\n\n# org.apache.spark.metrics.sink.JmxSink\n\n# 
org.apache.spark.metrics.sink.MetricsServlet\n# Name: Default: Description:\n# 
path VARIES* Path prefix from the web server root\n# sample false Whether to 
show entire set of samples for histograms ('false' or 'true')\n#\n# * Default 
path is /metrics/json for all instances except the master. The master has two 
paths:\n# /metrics/aplications/json # App information\n# /metrics/master/json # 
Master information\n\n# org.apache.spark.metrics.sink.GraphiteSink\n# Name: 
Default: Description:\n# host NONE Hostname of Graphite server\n# port NONE 
Port of Graphite server\n# period 10 Poll period\n# unit seconds Units of poll 
period\n# prefix EMPTY STRING Prefix to prepend to metric name\n\n##
  Examples\n# Enable JmxSink for all instances by class 
name\n#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink\n\n# Enable 
ConsoleSink for all instances by class 
name\n#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink\n\n# 
Polling period for 
ConsoleSink\n#*.sink.console.period=10\n\n#*.sink.console.unit=seconds\n\n# 
Master instance overlap polling 
period\n#master.sink.console.period=15\n\n#master.sink.console.unit=seconds\n\n#
 Enable CsvSink for all 
instances\n#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink\n\n# Polling 
period for CsvSink\n#*.sink.csv.period=1\n\n#*.sink.csv.unit=minutes\n\n# 
Polling directory for CsvSink\n#*.sink.csv.directory=/tmp/\n\n# Worker instance 
overlap polling 
period\n#worker.sink.csv.period=10\n\n#worker.sink.csv.unit=minutes\n\n# Enable 
jvm source for instance master, worker, driver and 
executor\n#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource\n\n#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSo
 
urce\n\n#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource\n\n#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource"
+    },
+    "livy-log4j-properties": {
+      "content": "\n            # Set everything to be logged to the console\n 
           log4j.rootCategory=INFO, console\n            
log4j.appender.console=org.apache.log4j.ConsoleAppender\n            
log4j.appender.console.target=System.err\n            
log4j.appender.console.layout=org.apache.log4j.PatternLayout\n            
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: 
%m%n\n\n            log4j.logger.org.eclipse.jetty=WARN"
+    },
+    "livy-conf": {
+      "livy.server.port": "8998",
+      "livy.server.csrf_protection.enabled": "true",
+      "livy.environment": "production",
+      "livy.impersonation.enabled": "true",
+      "livy.server.session.timeout": "3600000"
+    },
+    "livy-spark-blacklist": {
+      "content": "\n            #\n            # Configuration override / 
blacklist. Defines a list of properties that users are not allowed\n            
# to override when starting Spark sessions.\n            #\n            # This 
file takes a list of property names (one per line). Empty lines and lines 
starting with \"#\"\n            # are ignored.\n            #"
+    },
+    "livy-env": {
+      "livy_group": "livy",
+      "spark_home": "/usr/hdp/current/spark-client",
+      "content": "\n            #!/usr/bin/env bash\n\n            # - 
SPARK_HOME      Spark which you would like to use in livy\n            # - 
HADOOP_CONF_DIR Directory containing the Hadoop / YARN configuration to use.\n  
          # - LIVY_LOG_DIR    Where log files are stored.  (Default: 
${LIVY_HOME}/logs)\n            # - LIVY_PID_DIR    Where the pid file is 
stored. (Default: /tmp)\n            # - LIVY_SERVER_JAVA_OPTS  Java Opts for 
running livy server (You can set jvm related setting here, like jvm memory/gc 
algorithm and etc.)\n        export SPARK_HOME=/usr/hdp/current/spark-client\n  
      export HADOOP_CONF_DIR=/etc/hadoop/conf\n        export 
LIVY_LOG_DIR={{livy_log_dir}}\n        export LIVY_PID_DIR={{livy_pid_dir}}\n   
     export LIVY_SERVER_JAVA_OPTS=\"-Xmx2g\"",
+      "livy_pid_dir": "/var/run/livy",
+      "livy_log_dir": "/var/log/livy",
+      "livy_user": "livy"
+    },
     "logsearch-solr-env": {
       "logsearch_solr_znode": "/logsearch",
       "logsearch_solr_user": "solr",

Reply via email to