AMBARI-19126 Spark 2.x interpreter confgurations in Zeppelin (r-kamath)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/b77b2fac Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/b77b2fac Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/b77b2fac Branch: refs/heads/trunk Commit: b77b2fac39bc06df1d5b6a59d1be4d6bdd8b0386 Parents: f936fd0 Author: Renjith Kamath <[email protected]> Authored: Thu Dec 15 11:57:34 2016 +0530 Committer: Renjith Kamath <[email protected]> Committed: Thu Dec 15 11:59:13 2016 +0530 ---------------------------------------------------------------------- .../0.6.0.2.5/configuration/zeppelin-env.xml | 2 +- .../0.6.0.2.5/package/scripts/master.py | 108 +++++++++++-------- .../0.6.0.2.5/package/scripts/params.py | 4 +- .../package/scripts/spark2_config_template.py | 64 +++++++++++ 4 files changed, 133 insertions(+), 45 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/b77b2fac/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/configuration/zeppelin-env.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/configuration/zeppelin-env.xml b/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/configuration/zeppelin-env.xml index c03d2dc..317ad73 100644 --- a/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/configuration/zeppelin-env.xml +++ b/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/configuration/zeppelin-env.xml @@ -115,7 +115,7 @@ export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory={{executor_mem}} -Dspark.exec ## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit ## # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries -export SPARK_HOME={{spark_home}} +#export SPARK_HOME={{spark_home}} # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". # export SPARK_SUBMIT_OPTIONS http://git-wip-us.apache.org/repos/asf/ambari/blob/b77b2fac/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/master.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/master.py b/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/master.py index e82b53c..9cc808b 100644 --- a/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/master.py +++ b/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/master.py @@ -237,40 +237,40 @@ class Master(Script): import params config_data = self.get_interpreter_settings() interpreter_settings = config_data['interpreterSettings'] - for notebooks in interpreter_settings: - notebook = interpreter_settings[notebooks] - if notebook['group'] == 'livy' and params.livy_livyserver_host: + for interpreter_setting in interpreter_settings: + interpreter = interpreter_settings[interpreter_setting] + if interpreter['group'] == 'livy' and params.livy_livyserver_host: if params.zeppelin_kerberos_principal and params.zeppelin_kerberos_keytab and params.security_enabled: - notebook['properties']['zeppelin.livy.principal'] = params.zeppelin_kerberos_principal - notebook['properties']['zeppelin.livy.keytab'] = params.zeppelin_kerberos_keytab + interpreter['properties']['zeppelin.livy.principal'] = params.zeppelin_kerberos_principal + interpreter['properties']['zeppelin.livy.keytab'] = params.zeppelin_kerberos_keytab else: - notebook['properties']['zeppelin.livy.principal'] = "" - notebook['properties']['zeppelin.livy.keytab'] = "" - elif notebook['group'] == 'spark': + interpreter['properties']['zeppelin.livy.principal'] = "" + interpreter['properties']['zeppelin.livy.keytab'] = "" + elif interpreter['group'] == 'spark': if params.zeppelin_kerberos_principal and params.zeppelin_kerberos_keytab and params.security_enabled: - notebook['properties']['spark.yarn.principal'] = params.zeppelin_kerberos_principal - notebook['properties']['spark.yarn.keytab'] = params.zeppelin_kerberos_keytab + interpreter['properties']['spark.yarn.principal'] = params.zeppelin_kerberos_principal + interpreter['properties']['spark.yarn.keytab'] = params.zeppelin_kerberos_keytab else: - notebook['properties']['spark.yarn.principal'] = "" - notebook['properties']['spark.yarn.keytab'] = "" - elif notebook['group'] == 'jdbc': + interpreter['properties']['spark.yarn.principal'] = "" + interpreter['properties']['spark.yarn.keytab'] = "" + elif interpreter['group'] == 'jdbc': if params.zeppelin_kerberos_principal and params.zeppelin_kerberos_keytab and params.security_enabled: - notebook['properties']['zeppelin.jdbc.auth.type'] = "KERBEROS" - notebook['properties']['zeppelin.jdbc.principal'] = params.zeppelin_kerberos_principal - notebook['properties']['zeppelin.jdbc.keytab.location'] = params.zeppelin_kerberos_keytab + interpreter['properties']['zeppelin.jdbc.auth.type'] = "KERBEROS" + interpreter['properties']['zeppelin.jdbc.principal'] = params.zeppelin_kerberos_principal + interpreter['properties']['zeppelin.jdbc.keytab.location'] = params.zeppelin_kerberos_keytab else: - notebook['properties']['zeppelin.jdbc.auth.type'] = "" - notebook['properties']['zeppelin.jdbc.principal'] = "" - notebook['properties']['zeppelin.jdbc.keytab.location'] = "" - elif notebook['group'] == 'sh': + interpreter['properties']['zeppelin.jdbc.auth.type'] = "" + interpreter['properties']['zeppelin.jdbc.principal'] = "" + interpreter['properties']['zeppelin.jdbc.keytab.location'] = "" + elif interpreter['group'] == 'sh': if params.zeppelin_kerberos_principal and params.zeppelin_kerberos_keytab and params.security_enabled: - notebook['properties']['zeppelin.shell.auth.type'] = "KERBEROS" - notebook['properties']['zeppelin.shell.principal'] = params.zeppelin_kerberos_principal - notebook['properties']['zeppelin.shell.keytab.location'] = params.zeppelin_kerberos_keytab + interpreter['properties']['zeppelin.shell.auth.type'] = "KERBEROS" + interpreter['properties']['zeppelin.shell.principal'] = params.zeppelin_kerberos_principal + interpreter['properties']['zeppelin.shell.keytab.location'] = params.zeppelin_kerberos_keytab else: - notebook['properties']['zeppelin.shell.auth.type'] = "" - notebook['properties']['zeppelin.shell.principal'] = "" - notebook['properties']['zeppelin.shell.keytab.location'] = "" + interpreter['properties']['zeppelin.shell.auth.type'] = "" + interpreter['properties']['zeppelin.shell.principal'] = "" + interpreter['properties']['zeppelin.shell.keytab.location'] = "" self.set_interpreter_settings(config_data) @@ -279,39 +279,57 @@ class Master(Script): config_data = self.get_interpreter_settings() interpreter_settings = config_data['interpreterSettings'] - for notebooks in interpreter_settings: - notebook = interpreter_settings[notebooks] - if notebook['group'] == 'jdbc': - notebook['dependencies'] = [] + if params.spark2_home: + spark2_config = self.get_spark2_interpreter_config() + config_id = spark2_config["id"] + interpreter_settings[config_id] = spark2_config + + for interpreter_setting in interpreter_settings: + interpreter = interpreter_settings[interpreter_setting] + if interpreter['group'] == 'jdbc': + interpreter['dependencies'] = [] if params.hive_server_host: if params.hive_server2_support_dynamic_service_discovery: - notebook['properties']['hive.url'] = 'jdbc:hive2://' + \ + interpreter['properties']['hive.url'] = 'jdbc:hive2://' + \ params.hive_zookeeper_quorum + \ '/;' + 'serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2' else: - notebook['properties']['hive.url'] = 'jdbc:hive2://' + \ + interpreter['properties']['hive.url'] = 'jdbc:hive2://' + \ params.hive_server_host + \ ':' + params.hive_server_port - notebook['dependencies'].append( + interpreter['dependencies'].append( {"groupArtifactVersion": "org.apache.hive:hive-jdbc:2.0.1", "local": "false"}) - notebook['dependencies'].append( + interpreter['dependencies'].append( {"groupArtifactVersion": "org.apache.hadoop:hadoop-common:2.7.2", "local": "false"}) - notebook['dependencies'].append( + interpreter['dependencies'].append( {"groupArtifactVersion": "org.apache.hive.shims:hive-shims-0.23:2.1.0", "local": "false"}) if params.zookeeper_znode_parent \ and params.hbase_zookeeper_quorum: - notebook['properties']['phoenix.url'] = "jdbc:phoenix:" + \ + interpreter['properties']['phoenix.url'] = "jdbc:phoenix:" + \ params.hbase_zookeeper_quorum + ':' + \ params.zookeeper_znode_parent - notebook['dependencies'].append( + interpreter['dependencies'].append( {"groupArtifactVersion": "org.apache.phoenix:phoenix-core:4.7.0-HBase-1.1", "local": "false"}) - elif notebook['group'] == 'livy' and params.livy_livyserver_host: - notebook['properties']['livy.spark.master'] = "yarn-cluster" - notebook['properties']['zeppelin.livy.url'] = "http://" + params.livy_livyserver_host +\ + elif interpreter['group'] == 'livy' and params.livy_livyserver_host: + interpreter['properties']['livy.spark.master'] = "yarn-cluster" + interpreter['properties']['zeppelin.livy.url'] = "http://" + params.livy_livyserver_host +\ ":" + params.livy_livyserver_port - elif notebook['group'] == 'spark': - notebook['properties']['master'] = "yarn-client" + + elif interpreter['group'] == 'spark' and interpreter['name'] == 'spark': + if params.spark_home: + interpreter['properties']['master'] = "yarn-client" + interpreter['properties']['SPARK_HOME'] = "/usr/hdp/current/spark-client/" + else: + interpreter['properties']['master'] = "local[*]" + + elif interpreter['group'] == 'spark' and interpreter['name'] == 'spark2': + if params.spark2_home: + interpreter['properties']['master'] = "yarn-client" + interpreter['properties']['SPARK_HOME'] = "/usr/hdp/current/spark2-client/" + else: + interpreter['properties']['master'] = "local[*]" + self.set_interpreter_settings(config_data) def check_zeppelin_server(self, retries=10): @@ -334,5 +352,11 @@ class Master(Script): import params return glob.glob(params.zeppelin_dir + '/interpreter/spark/dep/zeppelin-spark-dependencies*.jar') + def get_spark2_interpreter_config(self): + import spark2_config_template + import json + + return json.loads(spark2_config_template.template) + if __name__ == "__main__": Master().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/b77b2fac/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/params.py b/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/params.py index 5a3174f..5ee6147 100644 --- a/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/params.py +++ b/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/params.py @@ -67,9 +67,9 @@ spark_jar = format("{spark_jar_dir}/zeppelin-spark-0.5.5-SNAPSHOT.jar") setup_view = True temp_file = config['configurations']['zeppelin-env']['zeppelin.temp.file'] -spark_home = None +spark_home = "" spark_version = None -spark2_home = None +spark2_home = "" spark2_version = None if 'spark-defaults' in config['configurations']: spark_home = os.path.join(stack_root, "current", 'spark-client') http://git-wip-us.apache.org/repos/asf/ambari/blob/b77b2fac/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/spark2_config_template.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/spark2_config_template.py b/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/spark2_config_template.py new file mode 100644 index 0000000..0e81542 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/ZEPPELIN/0.6.0.2.5/package/scripts/spark2_config_template.py @@ -0,0 +1,64 @@ +template = ''' +{ + "id": "2C4U48MY3_spark2", + "name": "spark2", + "group": "spark", + "properties": { + "spark.executor.memory": "", + "args": "", + "zeppelin.spark.printREPLOutput": "true", + "spark.cores.max": "", + "zeppelin.dep.additionalRemoteRepository": "spark-packages,http://dl.bintray.com/spark-packages/maven,false;", + "zeppelin.spark.importImplicit": "true", + "zeppelin.spark.sql.stacktrace": "false", + "zeppelin.spark.concurrentSQL": "false", + "zeppelin.spark.useHiveContext": "true", + "zeppelin.pyspark.python": "python", + "zeppelin.dep.localrepo": "local-repo", + "zeppelin.R.knitr": "true", + "zeppelin.spark.maxResult": "1000", + "master": "local[*]", + "spark.app.name": "Zeppelin", + "zeppelin.R.image.width": "100%", + "zeppelin.R.render.options": "out.format \u003d \u0027html\u0027, comment \u003d NA, echo \u003d FALSE, results \u003d \u0027asis\u0027, message \u003d F, warning \u003d F", + "zeppelin.R.cmd": "R" + }, + "status": "READY", + "interpreterGroup": [ + { + "name": "spark", + "class": "org.apache.zeppelin.spark.SparkInterpreter", + "defaultInterpreter": true + }, + { + "name": "sql", + "class": "org.apache.zeppelin.spark.SparkSqlInterpreter", + "defaultInterpreter": false + }, + { + "name": "dep", + "class": "org.apache.zeppelin.spark.DepInterpreter", + "defaultInterpreter": false + }, + { + "name": "pyspark", + "class": "org.apache.zeppelin.spark.PySparkInterpreter", + "defaultInterpreter": false + }, + { + "name": "r", + "class": "org.apache.zeppelin.spark.SparkRInterpreter", + "defaultInterpreter": false + } + ], + "dependencies": [], + "option": { + "remote": true, + "port": -1, + "perNoteSession": false, + "perNoteProcess": false, + "isExistingProcess": false, + "setPermission": false + } +} +''' \ No newline at end of file
