http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_server.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_server.py new file mode 100755 index 0000000..5cefc73 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_server.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +import sys +from resource_management import * +from resource_management.libraries.functions import conf_select +from resource_management.libraries.functions import Direction +from resource_management.libraries.functions import stack_select +from solr_service import solr_service +from solr import solr +import os + +class SolrServer(Script): + def install(self, env): + self.install_packages(env) + + def configure(self, env, upgrade_type=None): + import params + env.set_params(params) + if not os.path.isfile("/usr/iop/4.1.0.0/solr/conf/solr.in.sh"): + solr(type='4103', upgrade_type=upgrade_type) + solr(type='server', upgrade_type=upgrade_type) + + def pre_upgrade_restart(self, env, upgrade_type=None): + Logger.info("Executing Stack Upgrade pre-restart") + import params + env.set_params(params) + if params.version and compare_versions(format_stack_version(params.version), '4.1.0.0') >= 0: + stack_select.select("solr-server", params.version) + + call_conf_select = True + conf_dir = '/usr/iop/4.1.0.0/solr/conf' + if params.upgrade_direction is not None and params.upgrade_direction == Direction.DOWNGRADE and not os.path.islink(conf_dir): + call_conf_select = False + + if call_conf_select: + conf_select.select(params.stack_name, "solr", params.version) + + def start(self, env, upgrade_type=None): + import params + env.set_params(params) + self.configure(env) + solr_service(action = 'start') + + def stop(self, env, upgrade_type=None): + import params + env.set_params(params) + solr_service(action = 'stop') + + def status(self, env): + import status_params + env.set_params(status_params) + check_process_status(status_params.solr_pid_file) + + def security_status(self, env): + import status_params + env.set_params(status_params) + if status_params.security_enabled: + props_value_check = {"solr.hdfs.security.kerberos.enabled":"true"} + props_empty_check = ["solr.hdfs.security.kerberos.keytabfile", + "solr.hdfs.security.kerberos.principal"] + props_read_check = ["solr.hdfs.security.kerberos.keytabfile"] + solr_site_props = build_expectations('solr-site', props_value_check, props_empty_check, props_read_check) + + solr_expectations = {} + solr_expectations.update(solr_site_props) + + security_params = get_params_from_filesystem(status_params.solr_conf_dir, + {'solr-site.xml': FILE_TYPE_XML}) + result_issues = validate_security_config_properties(security_params,solr_expectations) + + if not result_issues: # If all validations passed successfully + try: + if 'solr-site' not in security_params \ + or 'solr.hdfs.security.kerberos.keytabfile' not in security_params['solr-site'] \ + or 'solr.hdfs.security.kerberos.principal' not in security_params['solr-site']: + self.put_structured_out({"securityState": "UNSECURED"}) + self.put_structured_out({"securityIssuesFound": "Keytab file or principal are not set property."}) + return + cached_kinit_executor(status_params.kinit_path_local, + status_params.solr_user, + security_params['solr-site']['solr.hdfs.security.kerberos.keytabfile'], + security_params['solr-site']['solr.hdfs.security.kerberos.principal'], + status_params.hostname, + status_params.tmp_dir) + self.put_structured_out({"securityState": "SECURED_KERBEROS"}) + except Exception as e: + self.put_structured_out({"securityState": "ERROR"}) + self.put_structured_out({"securityStateErrorInfo": str(e)}) + else: + issues = [] + for cf in result_issues: + issues.append("Configuration file %s did not pass the validation. Reason: %s" % (cf, result_issues[cf])) + self.put_structured_out({"securityIssuesFound": ". ".join(issues)}) + self.put_structured_out({"securityState": "UNSECURED"}) + else: + self.put_structured_out({"securityState": "UNSECURED"}) + + +if __name__ == "__main__": + SolrServer().execute()
http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_service.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_service.py new file mode 100755 index 0000000..105aac6 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_service.py @@ -0,0 +1,59 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management import * + +def solr_service(action='start'): + import params + cmd = format("{solr_home}/bin/solr") + + if action == 'start': + + if params.security_enabled: + if params.solr_principal is None: + solr_principal_with_host = 'missing_principal' + else: + solr_principal_with_host = params.solr_principal.replace("_HOST", params.hostname) + kinit_cmd = format("{kinit_path_local} -kt {solr_keytab} {solr_principal_with_host};") + Execute(kinit_cmd,user=params.solr_user) + + Execute (params.solr_home+'/server/scripts/cloud-scripts/zkcli.sh -zkhost ' + params.zookeeper_hosts_list + ' -cmd makepath ' + params.zookeeper_chroot, user=params.solr_user, ignore_failures=True ) + + if (params.upgrade_direction is not None and params.upgrade_direction == Direction.UPGRADE) or (compare_versions(format_stack_version(params.current_version), '4.2.0.0') >= 0): + solr_home_dir = params.solr_data_dir + else: + solr_home_dir = params.lib_dir + "/data" + + daemon_cmd = format("SOLR_INCLUDE={solr_conf_dir}/solr.in.sh {cmd} start -c -s {solr_home_dir} -V") + no_op_test = format("ls {solr_pid_file} >/dev/null 2>&1 && ps `cat {solr_pid_file}` >/dev/null 2>&1") + Execute(daemon_cmd, + not_if=no_op_test, + user=params.solr_user + ) + elif action == 'stop': + daemon_cmd = format("SOLR_INCLUDE={solr_conf_dir}/solr.in.sh {cmd} stop") + no_op_test = format("! ((`SOLR_INCLUDE={solr_conf_dir}/solr.in.sh {cmd} status |grep process |wc -l`))") + rm_pid = format("rm -f {solr_pid_file}") + Execute(daemon_cmd, + not_if=no_op_test, + user=params.solr_user + ) + Execute(rm_pid) http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_upgrade.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_upgrade.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_upgrade.py new file mode 100755 index 0000000..d72c5a3 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/solr_upgrade.py @@ -0,0 +1,135 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" +import os +from resource_management import * +from resource_management.core.logger import Logger +from resource_management.core.resources.system import Execute +from resource_management.libraries.functions import conf_select +from resource_management.libraries.functions import get_unique_id_and_date + +class SolrServerUpgrade(Script): + def pre_upgrade_conf41(self, env): + """ + Create /etc/solr/4.1.0.0/0 directory and copies Solr config files here. + Create symlinks accordingly. + + conf-select create-conf-dir --package solr --stack-version 4.1.0.0 --conf-version 0 + cp -r /usr/iop/4.1.0.0/solr/conf/* /etc/solr/4.1.0.0/0/. + unlink or rm -r /usr/iop/4.1.0.0/solr/conf + ln -s /etc/solr/4.1.0.0/0 /usr/iop/4.1.0.0/solr/conf + conf-select set-conf-dir --package solr --stack-version 4.1.0.0 --conf-version 0 + """ + import params + env.set_params(params) + + solr41_conf_dir="/usr/iop/4.1.0.0/solr/conf" + solr41_etc_dir="/etc/solr/4.1.0.0/0" + if not os.path.exists(solr41_etc_dir): + conf_select.create(params.stack_name, "solr", "4.1.0.0") + + content_path=solr41_conf_dir + if not os.path.isfile("/usr/iop/4.1.0.0/solr/conf/solr.in.sh"): + content_path = "/etc/solr/conf.backup" + + for each in os.listdir(content_path): + File(os.path.join(solr41_etc_dir, each), + owner=params.solr_user, + content = StaticFile(os.path.join(content_path, each))) + + if not os.path.islink(solr41_conf_dir): + Directory(solr41_conf_dir, + action="delete", + create_parents=True) + + if os.path.islink(solr41_conf_dir): + os.unlink(solr41_conf_dir) + + if not os.path.islink(solr41_conf_dir): + Link(solr41_conf_dir, + to=solr41_etc_dir + ) + + conf_select.select(params.stack_name, "solr", "4.1.0.0") + + def pre_stop_backup_cores(self, env): + """ + Backs up the Solr cores under Solr's home directory. + cp -r /var/lib/solr/data/* /tmp/solr/cores + """ + import params + env.set_params(params) + + if compare_versions(format_stack_version(params.current_version), '4.2.0.0') >= 0: + solr_home_dir=params.solr_data_dir + else: #4.1.0.0 + solr_home_dir=params.old_lib_dir + "/data" + + unique = get_unique_id_and_date() + backup_solr_dir="/tmp/upgrades/{0}/solr_{1}".format(params.current_version, unique) + backup_solr_cores="/tmp/solr/cores" + + if os.path.isdir(solr_home_dir) and not os.path.isdir(backup_solr_dir): + os.makedirs(backup_solr_dir) + Execute(('cp', '-r', solr_home_dir+"/.", backup_solr_dir), + sudo=True + ) + + if params.upgrade_direction is not None and params.upgrade_direction == Direction.UPGRADE: + Directory(backup_solr_cores, + action="delete", + create_parents=True) + + Directory(backup_solr_cores, + mode=0755, + cd_access='a', + owner=params.solr_user, + create_parents=True, + group=params.user_group + ) + + Execute(('cp', '-r', solr_home_dir+"/.", backup_solr_cores), + user=params.solr_user + ) + + def pre_start_migrate_cores(self, env): + """ + Copy the Solr cores from previous version to the new Solr home directory if solr_home is a differnet directory. + cp -r /tmp/solr/cores/* /opt/solr/data/. + """ + import params + env.set_params(params) + + if params.upgrade_direction is not None and params.upgrade_direction == Direction.UPGRADE: + backup_solr_cores="/tmp/solr/cores" + solr_home_dir=params.solr_data_dir + + Directory(format(solr_home_dir), + owner=params.solr_user, + create_parents=True, + group=params.user_group + ) + + if os.path.isdir(solr_home_dir) and os.path.isdir(backup_solr_cores): + Execute(('cp', '-rn', backup_solr_cores+"/.", solr_home_dir), + user=params.solr_user, + logoutput=True + ) + +if __name__ == "__main__": + SolrServerUpgrade().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/status_params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/status_params.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/status_params.py new file mode 100755 index 0000000..73c3b16 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/scripts/status_params.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +from resource_management import * +import status_params + +config = Script.get_config() + +security_enabled = config['configurations']['cluster-env']['security_enabled'] +solr_conf_dir='/usr/iop/current/solr-server/conf' +solr_user = config['configurations']['solr-env']['solr_user'] +hostname = config['hostname'] +kinit_path_local = functions.get_kinit_path() +tmp_dir = Script.get_tmp_dir() +solr_pid_dir = config['configurations']['solr-env']['solr_pid_dir'] +solr_port = config['configurations']['solr-env']['solr_port'] +solr_pid_file = format("{solr_pid_dir}/solr-{solr_port}.pid") \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/templates/solr.xml.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/templates/solr.xml.j2 b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/templates/solr.xml.j2 new file mode 100755 index 0000000..71d821e --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/templates/solr.xml.j2 @@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- + This is an example of a simple "solr.xml" file for configuring one or + more Solr Cores, as well as allowing Cores to be added, removed, and + reloaded via HTTP requests. + + More information about options available in this configuration file, + and Solr Core administration can be found online: + http://wiki.apache.org/solr/CoreAdmin +--> + +<solr> + + <solrcloud> + + <str name="host">${host:}</str> + <int name="hostPort">${jetty.port:8983}</int> + <str name="hostContext">${hostContext:solr}</str> + + <bool name="genericCoreNodeNames">${genericCoreNodeNames:true}</bool> + + <int name="zkClientTimeout">${zkClientTimeout:30000}</int> + <int name="distribUpdateSoTimeout">${distribUpdateSoTimeout:600000}</int> + <int name="distribUpdateConnTimeout">${distribUpdateConnTimeout:60000}</int> + + </solrcloud> + + <shardHandlerFactory name="shardHandlerFactory" + class="HttpShardHandlerFactory"> + <int name="socketTimeout">${socketTimeout:600000}</int> + <int name="connTimeout">${connTimeout:60000}</int> + </shardHandlerFactory> + +</solr> http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/templates/solr_jaas.conf.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/templates/solr_jaas.conf.j2 b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/templates/solr_jaas.conf.j2 new file mode 100755 index 0000000..91794d6 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SOLR/package/templates/solr_jaas.conf.j2 @@ -0,0 +1,26 @@ +{# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#} + +Client { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + useTicketCache=false + keyTab="{{solr_web_kerberos_keytab}}" + principal="{{solr_web_kerberos_principal}}"; +}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/alerts.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/alerts.json b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/alerts.json new file mode 100755 index 0000000..0e38f16 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/alerts.json @@ -0,0 +1,32 @@ +{ + "SPARK": { + "service": [], + "SPARK_JOBHISTORYSERVER": [ + { + "name": "SPARK_JOBHISTORYSERVER_PROCESS", + "label": "Spark History Server", + "description": "This host-level alert is triggered if the Spark History Server cannot be determined to be up.", + "interval": 1, + "scope": "HOST", + "source": { + "type": "PORT", + "uri": "{{spark-defaults/spark.history.ui.port}}", + "default_port": 18080, + "reporting": { + "ok": { + "text": "TCP OK - {0:.3f}s response on port {1}" + }, + "warning": { + "text": "TCP OK - {0:.3f}s response on port {1}", + "value": 1.5 + }, + "critical": { + "text": "Connection failed: {0} to {1}:{2}", + "value": 5 + } + } + } + } + ] + } +} http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-defaults.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-defaults.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-defaults.xml new file mode 100755 index 0000000..30826f1 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-defaults.xml @@ -0,0 +1,159 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> + +<configuration supports_final="true"> + + <property> + <name>spark.eventLog.enabled</name> + <value>true</value> + <description>Whether to log Spark events, useful for reconstructing the Web UI after the application has finished.</description> + </property> + + <property> + <name>spark.eventLog.dir</name> + <value>/iop/apps/4.1.0.0/spark/logs/history-server</value> + <description>Base directory in which Spark events are logged, if spark.eventLog.enabled is true. Within this base directory, Spark creates a sub-directory for each application, and logs the events specific to the application in this directory. Users may want to set this to a unified location like an HDFS directory so history files can be read by the history server.</description> + </property> + + <property> + <name>spark.yarn.executor.memoryOverhead</name> + <value>384</value> + <description> + The amount of off heap memory (in megabytes) to be allocated per executor. + This is memory that accounts for things like VM overheads, interned strings, + other native overheads, etc. + </description> + </property> + + <property> + <name>spark.yarn.driver.memoryOverhead</name> + <value>384</value> + <description> + The amount of off heap memory (in megabytes) to be allocated per driver. + This is memory that accounts for things like VM overheads, interned strings, + other native overheads, etc. + </description> + </property> + + <property> + <name>spark.yarn.applicationMaster.waitTries</name> + <value>10</value> + <description> + Set the number of times the ApplicationMaster waits for the the Spark master and then + also the number of tries it waits for the SparkContext to be initialized. + </description> + </property> + + <property> + <name>spark.yarn.scheduler.heartbeat.interval-ms</name> + <value>5000</value> + <description> + The interval in ms in which the Spark application master heartbeats into the YARN ResourceManager. + </description> + </property> + + <property> + <name>spark.yarn.max.executor.failures</name> + <value>3</value> + <description> + The maximum number of executor failures before failing the application. + </description> + </property> + + <property> + <name>spark.yarn.queue</name> + <value>default</value> + <description> + The name of the YARN queue to which the application is submitted. + </description> + </property> + + <property> + <name>spark.yarn.containerLauncherMaxThreads</name> + <value>25</value> + <description> + The maximum number of threads to use in the application master for launching executor containers. + </description> + </property> + + <property> + <name>spark.yarn.submit.file.replication</name> + <value>3</value> + <description> + HDFS replication level for the files uploaded into HDFS for the application. + These include things like the Spark jar, the app jar, and any distributed cache files/archives. + </description> + </property> + + <property> + <name>spark.yarn.preserve.staging.files</name> + <value>false</value> + <description> + Set to true to preserve the staged files (Spark jar, app jar, distributed cache files) at the + end of the job rather then delete them. + </description> + </property> + + <property> + <name>spark.history.ui.port</name> + <value>18080</value> + <description> + The port to which the web interface of the History Server binds. + </description> + </property> + <property> + <name>spark.driver.extraJavaOptions</name> + <value>-Diop.version={{iop_full_version}}</value> + <description> + Specifies parameters that are passed to the JVM of the Spark driver. + </description> + <value-attributes> + <empty-value-valid>true</empty-value-valid> + </value-attributes> + </property> + + <property> + <name>spark.yarn.am.extraJavaOptions</name> + <value>-Diop.version={{iop_full_version}}</value> + <description> + Specifies the parameters that are passed to the JVM of the Spark Application Master. + </description> + <value-attributes> + <empty-value-valid>true</empty-value-valid> + </value-attributes> + </property> + + <property> + <name>spark.history.kerberos.principal</name> + <value>none</value> + <description> + Kerberos principal name for the Spark History Server. + </description> + </property> + + <property> + <name>spark.history.kerberos.keytab</name> + <value>none</value> + <description> + Location of the kerberos keytab file for the Spark History Server. + </description> + </property> +</configuration> http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-env.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-env.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-env.xml new file mode 100755 index 0000000..e0c8a01 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-env.xml @@ -0,0 +1,110 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> + +<configuration> + + <property> + <name>spark_thriftserver_port</name> + <value>10002</value> + <description> + TCP port number to listen on, default 10002. + </description> + </property> + + <property> + <name>spark_user</name> + <value>spark</value> + <property-type>USER</property-type> + <description>Spark User.</description> + </property> + + <property> + <name>spark_group</name> + <value>spark</value> + <property-type>GROUP</property-type> + <description>spark group</description> + </property> + + <property> + <name>spark_log_dir</name> + <value>/var/log/spark</value> + <description>Spark Log Dir</description> + </property> + + <property> + <name>spark_pid_dir</name> + <value>/var/run/spark</value> + </property> + + <!-- spark-env.sh --> + <property> + <name>content</name> + <description>This is the jinja template for spark-env.sh file</description> + <value> +#!/usr/bin/env bash + +# This file is sourced when running various Spark programs. +# Copy it as spark-env.sh and edit that to configure Spark for your site. + +# Options read in YARN client mode +#SPARK_EXECUTOR_INSTANCES="2" #Number of workers to start (Default: 2) +#SPARK_EXECUTOR_CORES="1" #Number of cores for the workers (Default: 1). +#SPARK_EXECUTOR_MEMORY="1G" #Memory per Worker (e.g. 1000M, 2G) (Default: 1G) +#SPARK_DRIVER_MEMORY="512 Mb" #Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb) +#SPARK_YARN_APP_NAME="spark" #The name of your application (Default: Spark) +#SPARK_YARN_QUEUE="~@~Xdefault~@~Y" #The hadoop queue to use for allocation requests (Default: @~Xdefault~@~Y) +#SPARK_YARN_DIST_FILES="" #Comma separated list of files to be distributed with the job. +#SPARK_YARN_DIST_ARCHIVES="" #Comma separated list of archives to be distributed with the job. + +# Generic options for the daemons used in the standalone deploy mode + +# Alternate conf dir. (Default: ${SPARK_HOME}/conf) +export SPARK_CONF_DIR=${SPARK_HOME:-{{spark_home}}}/conf + +# Where log files are stored.(Default:${SPARK_HOME}/logs) +#export SPARK_LOG_DIR=${SPARK_HOME:-{{spark_home}}}/logs +export SPARK_LOG_DIR={{spark_log_dir}} + +# Where the pid file is stored. (Default: /tmp) +export SPARK_PID_DIR={{spark_pid_dir}} + +# A string representing this instance of spark.(Default: $USER) +SPARK_IDENT_STRING=$USER + +# The scheduling priority for daemons. (Default: 0) +SPARK_NICENESS=0 + +export SPARK_PUBLIC_DNS={{spark_history_server_host}} +export SPARK_HISTORY_OPTS="-Dspark.history.ui.port={{spark_history_ui_port}} -Dspark.history.fs.logDirectory={{spark_eventlog_dir_default}}" +export HIVE_SERVER2_THRIFT_BIND_HOST={{spark_thrift_server_host}} +export HIVE_SERVER2_THRIFT_PORT={{spark_thriftserver_port}} + +export HADOOP_HOME=${HADOOP_HOME:-{{hadoop_home}}} +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-{{hadoop_conf_dir}}} + +# The java implementation to use. +export JAVA_HOME={{java_home}} + +</value> + </property> + +</configuration> http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-javaopts-properties.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-javaopts-properties.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-javaopts-properties.xml new file mode 100755 index 0000000..c2db325 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-javaopts-properties.xml @@ -0,0 +1,27 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> +<configuration supports_final="true"> + <property> + <name>content</name> + <description>Spark-javaopts-properties</description> + <value> </value> + </property> +</configuration> http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-log4j.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-log4j.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-log4j.xml new file mode 100755 index 0000000..2ba64fb --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-log4j.xml @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> + +<configuration supports_final="false"> + <property> + <name>content</name> + <description>Spark-log4j-Properties</description> + <value> +# Set everything to be logged to the console +log4j.rootCategory=INFO, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Settings to quiet third party logs that are too verbose +log4j.logger.org.eclipse.jetty=WARN +log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR +log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO +log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO + + </value> + </property> +</configuration> http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-metrics-properties.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-metrics-properties.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-metrics-properties.xml new file mode 100755 index 0000000..9af5f2e --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/configuration/spark-metrics-properties.xml @@ -0,0 +1,160 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> +<configuration supports_final="true"> + <property> + <name>content</name> + <description>Spark-metrics-properties</description> + <value> +# syntax: [instance].sink|source.[name].[options]=[value] + +# This file configures Spark's internal metrics system. The metrics system is +# divided into instances which correspond to internal components. +# Each instance can be configured to report its metrics to one or more sinks. +# Accepted values for [instance] are "master", "worker", "executor", "driver", +# and "applications". A wild card "*" can be used as an instance name, in +# which case all instances will inherit the supplied property. +# +# Within an instance, a "source" specifies a particular set of grouped metrics. +# there are two kinds of sources: +# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will +# collect a Spark component's internal state. Each instance is paired with a +# Spark source that is added automatically. +# 2. Common sources, like JvmSource, which will collect low level state. +# These can be added through configuration options and are then loaded +# using reflection. +# +# A "sink" specifies where metrics are delivered to. Each instance can be +# assigned one or more sinks. +# +# The sink|source field specifies whether the property relates to a sink or +# source. +# +# The [name] field specifies the name of source or sink. +# +# The [options] field is the specific property of this source or sink. The +# source or sink is responsible for parsing this property. +# +# Notes: +# 1. To add a new sink, set the "class" option to a fully qualified class +# name (see examples below). +# 2. Some sinks involve a polling period. The minimum allowed polling period +# is 1 second. +# 3. Wild card properties can be overridden by more specific properties. +# For example, master.sink.console.period takes precedence over +# *.sink.console.period. +# 4. A metrics specific configuration +# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be +# added to Java properties using -Dspark.metrics.conf=xxx if you want to +# customize metrics system. You can also put the file in ${SPARK_HOME}/conf +# and it will be loaded automatically. +# 5. MetricsServlet is added by default as a sink in master, worker and client +# driver, you can send http request "/metrics/json" to get a snapshot of all the +# registered metrics in json format. For master, requests "/metrics/master/json" and +# "/metrics/applications/json" can be sent seperately to get metrics snapshot of +# instance master and applications. MetricsServlet may not be configured by self. +# + +## List of available sinks and their properties. + +# org.apache.spark.metrics.sink.ConsoleSink +# Name: Default: Description: +# period 10 Poll period +# unit seconds Units of poll period + +# org.apache.spark.metrics.sink.CSVSink +# Name: Default: Description: +# period 10 Poll period +# unit seconds Units of poll period +# directory /tmp Where to store CSV files + +# org.apache.spark.metrics.sink.GangliaSink +# Name: Default: Description: +# host NONE Hostname or multicast group of Ganglia server +# port NONE Port of Ganglia server(s) +# period 10 Poll period +# unit seconds Units of poll period +# ttl 1 TTL of messages sent by Ganglia +# mode multicast Ganglia network mode ('unicast' or 'multicast') + +# org.apache.spark.metrics.sink.JmxSink + +# org.apache.spark.metrics.sink.MetricsServlet +# Name: Default: Description: +# path VARIES* Path prefix from the web server root +# sample false Whether to show entire set of samples for histograms ('false' or 'true') +# +# * Default path is /metrics/json for all instances except the master. The master has two paths: +# /metrics/aplications/json # App information +# /metrics/master/json # Master information + +# org.apache.spark.metrics.sink.GraphiteSink +# Name: Default: Description: +# host NONE Hostname of Graphite server +# port NONE Port of Graphite server +# period 10 Poll period +# unit seconds Units of poll period +# prefix EMPTY STRING Prefix to prepend to metric name + +## Examples +# Enable JmxSink for all instances by class name +#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink + +# Enable ConsoleSink for all instances by class name +#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink + +# Polling period for ConsoleSink +#*.sink.console.period=10 + +#*.sink.console.unit=seconds + +# Master instance overlap polling period +#master.sink.console.period=15 + +#master.sink.console.unit=seconds + +# Enable CsvSink for all instances +#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink + +# Polling period for CsvSink +#*.sink.csv.period=1 + +#*.sink.csv.unit=minutes + +# Polling directory for CsvSink +#*.sink.csv.directory=/tmp/ + +# Worker instance overlap polling period +#worker.sink.csv.period=10 + +#worker.sink.csv.unit=minutes + +# Enable jvm source for instance master, worker, driver and executor +#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource + +#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource + +#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource + +#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource + + </value> + </property> +</configuration> http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/kerberos.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/kerberos.json b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/kerberos.json new file mode 100755 index 0000000..6189324 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/kerberos.json @@ -0,0 +1,55 @@ +{ + "services": [ + { + "name": "SPARK", + "identities": [ + { + "name": "/smokeuser" + }, + { + "name": "/hdfs" + }, + { + "name": "sparkuser", + "principal": { + "value": "${spark-env/spark_user}-${cluster_name}@${realm}", + "type" : "user", + "configuration": "spark-defaults/spark.history.kerberos.principal", + "local_username" : "${spark-env/spark_user}" + }, + "keytab": { + "file": "${keytab_dir}/spark.headless.keytab", + "owner": { + "name": "${spark-env/spark_user}", + "access": "r" + }, + "group": { + "name": "${cluster-env/user_group}", + "access": "" + }, + "configuration": "spark-defaults/spark.history.kerberos.keytab" + } + } + ], + "components": [ + { + "name": "SPARK_JOBHISTORYSERVER" + }, + { + "name": "SPARK_CLIENT" + }, + { + "name": "SPARK_THRIFTSERVER", + "identities": [ + { + "name": "/HDFS/NAMENODE/hdfs" + }, + { + "name": "/HIVE/HIVE_SERVER/hive_server_hive" + } + ] + } + ] + } + ] +} http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/metainfo.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/metainfo.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/metainfo.xml new file mode 100755 index 0000000..02abc62 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/metainfo.xml @@ -0,0 +1,187 @@ +<?xml version="1.0"?> +<!--Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +--> +<metainfo> + <schemaVersion>2.0</schemaVersion> + <services> + <service> + <name>SPARK</name> + <displayName>Spark</displayName> + <comment>Apache Spark is a fast and general engine for large-scale data processing</comment> + <version>1.4.1</version> + <components> + <component> + <name>SPARK_JOBHISTORYSERVER</name> + <displayName>Spark History Server</displayName> + <category>MASTER</category> + <cardinality>1</cardinality> + <versionAdvertised>true</versionAdvertised> + <dependencies> + <dependency> + <name>HDFS/HDFS_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>MAPREDUCE2/MAPREDUCE2_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>YARN/YARN_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + </dependencies> + <commandScript> + <script>scripts/job_history_server.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </component> + <component> + <name>SPARK_THRIFTSERVER</name> + <displayName>Spark Thrift Server</displayName> + <category>MASTER</category> + <cardinality>1</cardinality> + <versionAdvertised>true</versionAdvertised> + <dependencies> + <dependency> + <name>HIVE/HIVE_METASTORE</name> + <scope>cluster</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>HIVE/HIVE_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + </dependencies> + <commandScript> + <script>scripts/thrift_server.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </component> + <component> + <name>SPARK_CLIENT</name> + <displayName>Spark Client</displayName> + <category>CLIENT</category> + <cardinality>1+</cardinality> + <versionAdvertised>true</versionAdvertised> + <dependencies> + <dependency> + <name>HDFS/HDFS_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>MAPREDUCE2/MAPREDUCE2_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>YARN/YARN_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + </dependencies> + <commandScript> + <script>scripts/spark_client.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + <configFiles> + <configFile> + <type>env</type> + <fileName>spark-env.sh</fileName> + <dictionaryName>spark-env</dictionaryName> + </configFile> + <configFile> + <type>properties</type> + <fileName>spark-defaults.conf</fileName> + <dictionaryName>spark-defaults</dictionaryName> + </configFile> + <configFile> + <type>env</type> + <fileName>spark-log4j.properties</fileName> + <dictionaryName>spark-log4j</dictionaryName> + </configFile> + </configFiles> + </component> + </components> + + <osSpecifics> + <osSpecific> + <osFamily>redhat6,suse11</osFamily> + <packages> + <package> + <name>spark-*_IBM*</name> + </package> + <package> + <name>spark-python*_IBM*</name> + </package> + </packages> + </osSpecific> + <osSpecific> + <osFamily>ubuntu12</osFamily> + <packages> + <package> + <name>spark-*</name> + </package> + </packages> + </osSpecific> + </osSpecifics> + + <configuration-dependencies> + <config-type>spark-env</config-type> + <config-type>spark-defaults</config-type> + <config-type>spark-log4j</config-type> + </configuration-dependencies> + + <commandScript> + <script>scripts/service_check.py</script> + <scriptType>PYTHON</scriptType> + <timeout>300</timeout> + </commandScript> + + <requiredServices> + <service>HDFS</service> + <service>YARN</service> + <service>HIVE</service> + </requiredServices> + + </service> + </services> +</metainfo> http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/job_history_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/job_history_server.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/job_history_server.py new file mode 100755 index 0000000..439defe --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/job_history_server.py @@ -0,0 +1,167 @@ +#!/usr/bin/python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +import sys +import os +from resource_management import * +from resource_management.libraries.functions import conf_select +from resource_management.libraries.functions import stack_select +#from resource_management.libraries.functions.version import compare_versions, format_stack_version +from resource_management.core.exceptions import ComponentIsNotRunning +from resource_management.core.logger import Logger +from resource_management.core import shell +from spark import * + + +class JobHistoryServer(Script): + + def get_component_name(self): + return "spark-historyserver" + + def pre_upgrade_restart(self, env, upgrade_type=None): + import params + + env.set_params(params) + if params.version and compare_versions(format_stack_version(params.version), '4.0.0.0') >= 0: + conf_select.select(params.stack_name, "spark", params.version) + stack_select.select("spark-historyserver", params.version) + #Execute(format("stack-select set spark-historyserver {version}")) + + def install(self, env): + self.install_packages(env) + import params + + env.set_params(params) + self.configure(env) + + def stop(self, env, upgrade_type=None): + import params + + env.set_params(params) + self.configure(env) + daemon_cmd = format('{spark_history_server_stop}') + Execute(daemon_cmd, + user=params.spark_user, + environment={'JAVA_HOME': params.java_home} + ) + if os.path.isfile(params.spark_history_server_pid_file): + os.remove(params.spark_history_server_pid_file) + + + def start(self, env, upgrade_type=None): + import params + + env.set_params(params) + self.configure(env) + self.create_historyServer_directory() + self.copy_spark_yarn_jar() + + if params.security_enabled: + spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ") + Execute(spark_kinit_cmd, user=params.spark_user) + + # FIXME! TODO! remove this after soft link bug is fixed: + #if not os.path.islink('/usr/iop/current/spark'): + # iop_version = get_iop_version() + # cmd = 'ln -s /usr/iop/' + iop_version + '/spark /usr/iop/current/spark' + # Execute(cmd) + + daemon_cmd = format('{spark_history_server_start}') + no_op_test = format( + 'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1') + Execute(daemon_cmd, + user=params.spark_user, + environment={'JAVA_HOME': params.java_home}, + not_if=no_op_test + ) + + def status(self, env): + import status_params + + env.set_params(status_params) + pid_file = format("{spark_history_server_pid_file}") + # Recursively check all existing pid files + check_process_status(pid_file) + + def create_historyServer_directory(self): + import params + + params.HdfsResource(params.spark_hdfs_user_dir, + type="directory", + action="create_on_execute", + owner=params.spark_user, + group=params.user_group, + mode=params.spark_hdfs_user_mode) + + params.HdfsResource(params.spark_eventlog_dir_default, + type="directory", + action="create_on_execute", + owner=params.spark_user, + group=params.user_group, + mode=params.spark_eventlog_dir_mode) + + params.HdfsResource(None, action="execute") + + def copy_spark_yarn_jar(self): + import params + + jar_src_file = params.spark_jar_src_dir + "/" + params.spark_jar_src_file + jar_dst_file = params.spark_jar_hdfs_dir + "/" + params.spark_jar_src_file + jar_dst_path = params.spark_jar_hdfs_dir + + # Remove to enable refreshing jars during restart + hdfs_remove_cmd = "dfs -rm -R -skipTrash %s" % jar_dst_path + + try: + ExecuteHadoop(hdfs_remove_cmd, + user=params.hdfs_user, + logoutput=True, + conf_dir=params.hadoop_conf_dir, + bin_dir=params.hadoop_bin_dir) + except Fail: + pass + + params.HdfsResource(jar_dst_path, + type="directory", + action="create_on_execute", + owner=params.spark_user, + group=params.user_group, + mode=params.spark_jar_hdfs_dir_mode) + + params.HdfsResource(None, action="execute") + + params.HdfsResource(InlineTemplate(jar_dst_file).get_content(), + type="file", + action="create_on_execute", + source=jar_src_file, + owner=params.spark_user, + group=params.user_group, + mode=params.spark_jar_file_mode) + + params.HdfsResource(None, action="execute") + + def configure(self, env): + import params + + env.set_params(params) + spark(env) + +if __name__ == "__main__": + JobHistoryServer().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/params.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/params.py new file mode 100755 index 0000000..53efae0 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/params.py @@ -0,0 +1,199 @@ +#!/usr/bin/python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +from resource_management.libraries.functions.default import default +from resource_management import * +from resource_management.libraries.functions import conf_select, stack_select +from spark import * +import status_params + + +# a map of the Ambari role to the component name +# for use with /usr/iop/current/<component> +SERVER_ROLE_DIRECTORY_MAP = { + 'SPARK_JOBHISTORYSERVER' : 'spark-historyserver', + 'SPARK_CLIENT' : 'spark-client', + 'SPARK_THRIFTSERVER' : 'spark-thriftserver' +} +upgrade_direction = default("/commandParams/upgrade_direction", None) + +component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP, "SPARK_CLIENT") + +config = Script.get_config() +tmp_dir = Script.get_tmp_dir() + +# New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade +version = default("/commandParams/version", None) +stack_name = default("/hostLevelParams/stack_name", None) + +hadoop_home = "/usr/iop/current/hadoop-client" +spark_conf = format("/usr/iop/current/{component_directory}/conf") +spark_log_dir = config['configurations']['spark-env']['spark_log_dir'] +spark_pid_dir = status_params.spark_pid_dir +spark_role_root = "spark-client" + +command_role = default("/role", "") + +if command_role == "SPARK_CLIENT": + spark_role_root = "spark-client" +elif command_role == "SPARK_JOBHISTORYSERVER": + spark_role_root = "spark-historyserver" +elif command_role == "SPARK_THRIFTSERVER": + spark_role_root = "spark-thriftserver" + +spark_home = format("/usr/iop/current/{spark_role_root}") +if not os.path.exists(spark_home): + os.symlink('/usr/iop/current/spark', spark_home) + +java_home = config['hostLevelParams']['java_home'] + +spark_user = status_params.spark_user +hive_user = status_params.hive_user +spark_group = status_params.spark_group +user_group = status_params.user_group + +spark_hdfs_user_dir = format("/user/{spark_user}") +spark_hdfs_user_mode = 0755 +spark_eventlog_dir_mode = 01777 +spark_jar_hdfs_dir = "/iop/apps/4.1.0.0/spark/jars" +spark_jar_hdfs_dir_mode = 0755 +spark_jar_file_mode = 0444 +spark_jar_src_dir = "/usr/iop/current/spark-historyserver/lib" +spark_jar_src_file = "spark-assembly.jar" + +spark_history_server_pid_file = status_params.spark_history_server_pid_file +spark_thrift_server_pid_file = status_params.spark_thrift_server_pid_file + +spark_history_server_start = format("{spark_home}/sbin/start-history-server.sh") +spark_history_server_stop = format("{spark_home}/sbin/stop-history-server.sh") + +spark_thrift_server_start = format("{spark_home}/sbin/start-thriftserver.sh") +spark_thrift_server_stop = format("{spark_home}/sbin/stop-thriftserver.sh") + +spark_submit_cmd = format("{spark_home}/bin/spark-submit") +spark_smoke_example = "org.apache.spark.examples.SparkPi" +spark_service_check_cmd = format( + "{spark_submit_cmd} --class {spark_smoke_example} --master yarn-cluster --num-executors 1 --driver-memory 256m --executor-memory 256m --executor-cores 1 {spark_home}/lib/spark-examples*.jar 1") + +spark_jobhistoryserver_hosts = default("/clusterHostInfo/spark_jobhistoryserver_hosts", []) +spark_thriftserver_hosts = default("/clusterHostInfo/spark_thriftserver_hosts", []) +namenode_hosts = default("/clusterHostInfo/namenode_host", []) +has_namenode = not len(namenode_hosts) == 0 + +if len(spark_jobhistoryserver_hosts) > 0: + spark_history_server_host = spark_jobhistoryserver_hosts[0] +else: + spark_history_server_host = "localhost" + +if len(spark_thriftserver_hosts) > 0: + spark_thrift_server_host = spark_thriftserver_hosts[0] +else: + spark_thrift_server_host = "localhost" +# spark-defaults params +if has_namenode: + namenode_host = str(namenode_hosts[0]) +else: + namenode_host = "localhost" + +hadoop_fs_defaultfs = config['configurations']['core-site']['fs.defaultFS'] +spark_eventlog_dir_default=hadoop_fs_defaultfs + config['configurations']['spark-defaults']['spark.eventLog.dir'] +spark_yarn_jar_default=hadoop_fs_defaultfs + '/iop/apps/4.1.0.0/spark/jars/spark-assembly.jar' + +spark_yarn_applicationMaster_waitTries = default( + "/configurations/spark-defaults/spark.yarn.applicationMaster.waitTries", '10') +spark_yarn_submit_file_replication = default("/configurations/spark-defaults/spark.yarn.submit.file.replication", '3') +spark_yarn_preserve_staging_files = default("/configurations/spark-defaults/spark.yarn.preserve.staging.files", "false") +spark_yarn_scheduler_heartbeat_interval = default( + "/configurations/spark-defaults/spark.yarn.scheduler.heartbeat.interval-ms", "5000") +spark_yarn_queue = default("/configurations/spark-defaults/spark.yarn.queue", "default") +spark_yarn_containerLauncherMaxThreads = default( + "/configurations/spark-defaults/spark.yarn.containerLauncherMaxThreads", "25") +spark_yarn_max_executor_failures = default("/configurations/spark-defaults/spark.yarn.max.executor.failures", "3") +spark_yarn_executor_memoryOverhead = default("/configurations/spark-defaults/spark.yarn.executor.memoryOverhead", "384") +spark_yarn_driver_memoryOverhead = default("/configurations/spark-defaults/spark.yarn.driver.memoryOverhead", "384") +spark_history_ui_port = default("/configurations/spark-defaults/spark.history.ui.port", "18080") +spark_thriftserver_port = default("/configurations/spark-env/spark_thriftserver_port", "10002") +spark_eventlog_enabled = default("/configurations/spark-defaults/spark.eventLog.enabled", "true") +spark_eventlog_dir = default("/configurations/spark-defaults/spark.eventLog.dir", spark_eventlog_dir_default) +spark_yarn_jar = default("/configurations/spark-defaults/spark.yarn.jar", spark_yarn_jar_default) + +# add the properties that cannot be configured thru UI +spark_conf_properties_map = dict(config['configurations']['spark-defaults']) +spark_conf_properties_map["spark.yarn.historyServer.address"] = spark_history_server_host + ":" + str(spark_history_ui_port) +spark_conf_properties_map["spark.yarn.jar"] = spark_yarn_jar +spark_conf_properties_map["spark.eventLog.dir"] = spark_eventlog_dir_default + +spark_env_sh = config['configurations']['spark-env']['content'] +spark_log4j = config['configurations']['spark-log4j']['content'] +#spark_metrics_properties = config['configurations']['spark-metrics-properties']['content'] +spark_javaopts_properties = config['configurations']['spark-javaopts-properties']['content'] +hive_server_host = default("/clusterHostInfo/hive_server_host", []) +is_hive_installed = not len(hive_server_host) == 0 + +iop_full_version = get_iop_version() + +spark_driver_extraJavaOptions = str(config['configurations']['spark-defaults']['spark.driver.extraJavaOptions']) +if spark_driver_extraJavaOptions.find('-Diop.version') == -1: + spark_driver_extraJavaOptions = spark_driver_extraJavaOptions + ' -Diop.version=' + str(iop_full_version) + +spark_yarn_am_extraJavaOptions = str(config['configurations']['spark-defaults']['spark.yarn.am.extraJavaOptions']) +if spark_yarn_am_extraJavaOptions.find('-Diop.version') == -1: + spark_yarn_am_extraJavaOptions = spark_yarn_am_extraJavaOptions + ' -Diop.version=' + str(iop_full_version) + +spark_javaopts_properties = str(spark_javaopts_properties) +if spark_javaopts_properties.find('-Diop.version') == -1: + spark_javaopts_properties = spark_javaopts_properties+ ' -Diop.version=' + str(iop_full_version) + +security_enabled = status_params.security_enabled +kinit_path_local = functions.get_kinit_path() +spark_kerberos_keytab = config['configurations']['spark-defaults']['spark.history.kerberos.keytab'] +spark_kerberos_principal = config['configurations']['spark-defaults']['spark.history.kerberos.principal'] +if security_enabled: + spark_principal = spark_kerberos_principal.replace('_HOST',spark_history_server_host.lower()) +# for create_hdfs_directory + +# To create hdfs directory +hadoop_conf_dir = conf_select.get_hadoop_conf_dir() +hadoop_bin_dir = stack_select.get_hadoop_dir("bin") + +hostname = config["hostname"] +hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab'] +hdfs_user = config['configurations']['hadoop-env']['hdfs_user'] +hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name'] +kinit_path_local = functions.get_kinit_path() + +hdfs_site = config['configurations']['hdfs-site'] +default_fs = config['configurations']['core-site']['fs.defaultFS'] + +import functools +#create partial functions with common arguments for every HdfsResource call +#to create/delete hdfs directory/file/copyfromlocal we need to call params.HdfsResource in code +HdfsResource = functools.partial( + HdfsResource, + user=hdfs_user, + security_enabled = security_enabled, + keytab = hdfs_user_keytab, + kinit_path_local = kinit_path_local, + hadoop_bin_dir = hadoop_bin_dir, + hadoop_conf_dir = hadoop_conf_dir, + principal_name = hdfs_principal_name, + hdfs_site = hdfs_site, + default_fs = default_fs +) http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/service_check.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/service_check.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/service_check.py new file mode 100755 index 0000000..4c9ea4a --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/SPARK/package/scripts/service_check.py @@ -0,0 +1,78 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from resource_management import * +import subprocess +import time + +class SparkServiceCheck(Script): + def service_check(self, env): + import params + + env.set_params(params) + + # smoke_cmd = params.spark_service_check_cmd + # code, output = shell.call(smoke_cmd, timeout=100) + # if code == 0: + # Logger.info('Spark-on-Yarn Job submitted successfully') + # else: + # Logger.info('Spark-on-Yarn Job cannot be submitted') + # raise ComponentIsNotRunning() + + command = "curl" + httpGssnegotiate = "--negotiate" + userpswd = "-u:" + insecure = "-k" + silent = "-s" + out = "-o /dev/null" + head = "-w'%{http_code}'" + url = 'http://' + params.spark_history_server_host + ':' + str(params.spark_history_ui_port) + + command_with_flags = [command, silent, out, head, httpGssnegotiate, userpswd, insecure, url] + + is_running = False + for i in range(1,11): + proc = subprocess.Popen(command_with_flags, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + Logger.info("Try %d, command: %s" % (i, " ".join(command_with_flags))) + (stdout, stderr) = proc.communicate() + response = stdout + if '200' in response: + is_running = True + Logger.info('Spark Job History Server up and running') + break + Logger.info("Response: %s" % str(response)) + time.sleep(5) + + if is_running == False : + Logger.info('Spark Job History Server not running.') + raise ComponentIsNotRunning() + + + + #command_with_flags = [command, silent, out, head, httpGssnegotiate, userpswd, insecure, url] + # proc = subprocess.Popen(command_with_flags, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # (stdout, stderr) = proc.communicate() + # response = stdout + # if '200' in response: + # Logger.info('Spark Job History Server up and running') + # else: + # Logger.info('Spark Job History Server not running.') + # raise ComponentIsNotRunning() + +if __name__ == "__main__": + SparkServiceCheck().execute()
