Paul Brenner created ZEPPELIN-5310:
--------------------------------------
Summary: Cluster mode is broken on latest build from source
Key: ZEPPELIN-5310
URL: https://issues.apache.org/jira/browse/ZEPPELIN-5310
Project: Zeppelin
Issue Type: Bug
Affects Versions: 0.10.0
Environment: Interpreter settings are as follows:
"spark_paul": {
"id": "spark_paul",
"name": "spark_paul",
"group": "spark",
"properties": {
"SPARK_HOME": {
"name": "SPARK_HOME",
"value": "",
"type": "string",
"description": "Location of spark distribution"
},
"spark.master": {
"name": "spark.master",
"value": "yarn",
"type": "string",
"description": "Spark master uri. local | yarn-client | yarn-cluster | spark
master address of standalone mode, ex) spark://master_host:7077"
},
"spark.submit.deployMode": {
"name": "spark.submit.deployMode",
"value": "client",
"type": "string",
"description": "The deploy mode of Spark driver program, either \"client\" or
\"cluster\", Which means to launch driver program locally (\"client\") or
remotely (\"cluster\") on one of the nodes inside the cluster."
},
"spark.app.name": {
"name": "spark.app.name",
"value": "zeppelin_dev_paul",
"type": "string",
"description": "The name of spark application."
},
"spark.driver.cores": {
"name": "spark.driver.cores",
"value": "1",
"type": "number",
"description": "Number of cores to use for the driver process, only in cluster
mode."
},
"spark.driver.memory": {
"name": "spark.driver.memory",
"value": "5g",
"type": "string",
"description": "Amount of memory to use for the driver process, i.e. where
SparkContext is initialized, in the same format as JVM memory strings with a
size unit suffix (\"k\", \"m\", \"g\" or \"t\") (e.g. 512m, 2g)."
},
"spark.executor.cores": {
"name": "spark.executor.cores",
"value": "1",
"type": "number",
"description": "The number of cores to use on each executor"
},
"spark.executor.memory": {
"name": "spark.executor.memory",
"value": "3g",
"type": "string",
"description": "Executor memory per worker instance. ex) 512m, 32g"
},
"spark.executor.instances": {
"name": "spark.executor.instances",
"value": "2",
"type": "number",
"description": "The number of executors for static allocation."
},
"spark.files": {
"name": "spark.files",
"value": "",
"type": "string",
"description": "Comma-separated list of files to be placed in the working
directory of each executor. Globs are allowed."
},
"spark.jars": {
"name": "spark.jars",
"value":
"http://nexus.placeiq.net:8081/nexus/content/repositories/releases/com/placeiq/lap/4.1.25/lap-4.1.25.jar,hdfs://gandalf-nn.placeiq.net/lib/dap/0.1.0/dap-jar-assembled.jar";,
"type": "string",
"description": "Comma-separated list of jars to include on the driver and
executor classpaths. Globs are allowed."
},
"spark.jars.packages": {
"name": "spark.jars.packages",
"value": "ds-commons:ds-commons_2.11:0.1-SNAPSHOT",
"type": "string",
"description": "Comma-separated list of Maven coordinates of jars to include
on the driver and executor classpaths. The coordinates should be
groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will
be resolved according to the configuration in the file, otherwise artifacts
will be searched for in the local maven repo, then maven central and finally
any additional remote repositories given by the command-line option
--repositories."
},
"zeppelin.spark.useHiveContext": {
"name": "zeppelin.spark.useHiveContext",
"value": true,
"type": "checkbox",
"description": "Use HiveContext instead of SQLContext if it is true. Enable
hive for SparkSession."
},
"zeppelin.spark.printREPLOutput": {
"name": "zeppelin.spark.printREPLOutput",
"value": true,
"type": "checkbox",
"description": "Print REPL output"
},
"zeppelin.spark.maxResult": {
"name": "zeppelin.spark.maxResult",
"value": "1000",
"type": "number",
"description": "Max number of result to display."
},
"zeppelin.spark.enableSupportedVersionCheck": {
"name": "zeppelin.spark.enableSupportedVersionCheck",
"value": true,
"type": "checkbox",
"description": "Whether checking supported spark version. Developer only
setting, not for production use"
},
"zeppelin.spark.uiWebUrl": {
"name": "zeppelin.spark.uiWebUrl",
"value": "",
"type": "string",
"description": "Override Spark UI default URL. In Kubernetes mode, value can
be Jinja template string with 3 template variables \u0027PORT\u0027,
\u0027SERVICE_NAME\u0027 and \u0027SERVICE_DOMAIN\u0027. (ex:
http://\{{PORT}}-\{{SERVICE_NAME}}.\{{SERVICE_DOMAIN}})"
},
"zeppelin.spark.ui.hidden": {
"name": "zeppelin.spark.ui.hidden",
"value": false,
"type": "checkbox",
"description": "Whether hide spark ui in zeppelin ui"
},
"spark.webui.yarn.useProxy": {
"name": "spark.webui.yarn.useProxy",
"value": false,
"type": "checkbox",
"description": "whether use yarn proxy url as spark weburl, e.g.
http://localhost:8088/proxy/application_1583396598068_0004";
},
"zeppelin.spark.scala.color": {
"name": "zeppelin.spark.scala.color",
"value": true,
"type": "checkbox",
"description": "Whether enable color output of spark scala interpreter"
},
"zeppelin.spark.deprecatedMsg.show": {
"name": "zeppelin.spark.deprecatedMsg.show",
"value": true,
"type": "checkbox",
"description": "Whether show the spark deprecated message, spark 2.2 and
before are deprecated. Zeppelin will display warning message by default"
},
"zeppelin.spark.concurrentSQL": {
"name": "zeppelin.spark.concurrentSQL",
"value": true,
"type": "checkbox",
"description": "Execute multiple SQL concurrently if set true."
},
"zeppelin.spark.concurrentSQL.max": {
"name": "zeppelin.spark.concurrentSQL.max",
"value": "10",
"type": "number",
"description": "Max number of SQL concurrently executed"
},
"zeppelin.spark.sql.stacktrace": {
"name": "zeppelin.spark.sql.stacktrace",
"value": true,
"type": "checkbox",
"description": "Show full exception stacktrace for SQL queries if set to true."
},
"zeppelin.spark.sql.interpolation": {
"name": "zeppelin.spark.sql.interpolation",
"value": false,
"type": "checkbox",
"description": "Enable ZeppelinContext variable interpolation into spark sql"
},
"PYSPARK_PYTHON": {
"name": "PYSPARK_PYTHON",
"value": "python",
"type": "string",
"description": "Python binary executable to use for PySpark in both driver and
workers (default is python2.7 if available, otherwise python). Property
`spark.pyspark.python` take precedence if it is set"
},
"PYSPARK_DRIVER_PYTHON": {
"name": "PYSPARK_DRIVER_PYTHON",
"value": "python",
"type": "string",
"description": "Python binary executable to use for PySpark in driver only
(default is `PYSPARK_PYTHON`). Property `spark.pyspark.driver.python` take
precedence if it is set"
},
"zeppelin.pyspark.useIPython": {
"name": "zeppelin.pyspark.useIPython",
"value": true,
"type": "checkbox",
"description": "Whether use IPython when it is available"
},
"zeppelin.R.knitr": {
"name": "zeppelin.R.knitr",
"value": true,
"type": "checkbox",
"description": "Whether use knitr or not"
},
"zeppelin.R.cmd": {
"name": "zeppelin.R.cmd",
"value": "R",
"type": "string",
"description": "R binary executable path"
},
"zeppelin.R.image.width": {
"name": "zeppelin.R.image.width",
"value": "100%",
"type": "number",
"description": "Image width of R plotting"
},
"zeppelin.R.render.options": {
"name": "zeppelin.R.render.options",
"value": "out.format \u003d \u0027html\u0027, comment \u003d NA, echo \u003d
FALSE, results \u003d \u0027asis\u0027, message \u003d F, warning \u003d F,
fig.retina \u003d 2",
"type": "textarea",
"description": ""
},
"zeppelin.kotlin.shortenTypes": {
"name": "zeppelin.kotlin.shortenTypes",
"value": true,
"type": "checkbox",
"description": "Show short types instead of full, e.g. List\u003cString\u003e
or kotlin.collections.List\u003ckotlin.String\u003e"
},
"spark.dynamicAllocation.executorIdleTimeout": {
"name": "spark.dynamicAllocation.executorIdleTimeout",
"value": "2m",
"type": "textarea"
},
"spark.dynamicAllocation.enabled": {
"name": "spark.dynamicAllocation.enabled",
"value": "true",
"type": "textarea"
},
"spark.dynamicAllocation.minExecutors": {
"name": "spark.dynamicAllocation.minExecutors",
"value": "4",
"type": "textarea"
},
"spark.shuffle.service.enabled": {
"name": "spark.shuffle.service.enabled",
"value": "true",
"type": "textarea"
},
"spark.yarn.queue": {
"name": "spark.yarn.queue",
"value": "pbrenner",
"type": "textarea"
},
"spark.dynamicAllocation.cachedExecutorIdleTimeout": {
"name": "spark.dynamicAllocation.cachedExecutorIdleTimeout",
"value": "2m",
"type": "textarea"
},
"spark.jars.repositories": {
"name": "spark.jars.repositories",
"value": "http://nexus.placeiq.net:8081/nexus/content/repositories/snapshots";,
"type": "textarea"
},
"spark.executor.memoryOverhead": {
"name": "spark.executor.memoryOverhead",
"value": "4g",
"type": "textarea"
},
"zeppelin.interpreter.connect.timeout": {
"name": "zeppelin.interpreter.connect.timeout",
"value": "300000",
"type": "textarea"
}
},
"status": "READY",
"interpreterGroup": [
{
"name": "spark",
"class": "org.apache.zeppelin.spark.SparkInterpreter",
"defaultInterpreter": true,
"editor": {
"language": "scala",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
}
},
{
"name": "sql",
"class": "org.apache.zeppelin.spark.SparkSqlInterpreter",
"defaultInterpreter": false,
"editor": {
"language": "sql",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
}
},
{
"name": "pyspark",
"class": "org.apache.zeppelin.spark.PySparkInterpreter",
"defaultInterpreter": false,
"editor": {
"language": "python",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
}
},
{
"name": "ipyspark",
"class": "org.apache.zeppelin.spark.IPySparkInterpreter",
"defaultInterpreter": false,
"editor": {
"language": "python",
"editOnDblClick": false,
"completionSupport": true,
"completionKey": "TAB"
}
},
{
"name": "r",
"class": "org.apache.zeppelin.spark.SparkRInterpreter",
"defaultInterpreter": false,
"editor": {
"language": "r",
"editOnDblClick": false,
"completionSupport": false,
"completionKey": "TAB"
}
},
{
"name": "ir",
"class": "org.apache.zeppelin.spark.SparkIRInterpreter",
"defaultInterpreter": false,
"editor": {
"language": "r",
"editOnDblClick": false,
"completionSupport": true,
"completionKey": "TAB"
}
},
{
"name": "shiny",
"class": "org.apache.zeppelin.spark.SparkShinyInterpreter",
"defaultInterpreter": false,
"editor": {
"language": "r",
"editOnDblClick": false,
"completionSupport": true,
"completionKey": "TAB"
}
},
{
"name": "kotlin",
"class": "org.apache.zeppelin.spark.KotlinSparkInterpreter",
"defaultInterpreter": false,
"editor": {
"language": "kotlin",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": false
}
}
],
"dependencies": [],
"option": {
"remote": true,
"port": -1,
"perNote": "isolated",
"perUser": "isolated",
"isExistingProcess": false,
"setPermission": false,
"owners": [],
"isUserImpersonate": true
}
},
Reporter: Paul Brenner
We built from source on March 26th with commit
85ed8e2e51e1ea10df38d4710216343efe218d60. DeployMode: Cluster is broken in this
build. When we try to use cluster mode we see the following error:
{code:java}
org.apache.zeppelin.interpreter.InterpreterException: java.io.IOException: Fail
to set additional jars for spark interpreter
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.open(RemoteInterpreter.java:129)
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.getFormType(RemoteInterpreter.java:271)
at org.apache.zeppelin.notebook.Paragraph.jobRun(Paragraph.java:442)
at org.apache.zeppelin.notebook.Paragraph.jobRun(Paragraph.java:71)
at org.apache.zeppelin.scheduler.Job.run(Job.java:172)
at
org.apache.zeppelin.scheduler.AbstractScheduler.runJob(AbstractScheduler.java:132)
at
org.apache.zeppelin.scheduler.RemoteScheduler$JobRunner.run(RemoteScheduler.java:182)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: Fail to set additional jars for spark
interpreter
at
org.apache.zeppelin.interpreter.launcher.SparkInterpreterLauncher.buildEnvFromProperties(SparkInterpreterLauncher.java:163)
at
org.apache.zeppelin.interpreter.launcher.StandardInterpreterLauncher.launchDirectly(StandardInterpreterLauncher.java:77)
at
org.apache.zeppelin.interpreter.launcher.InterpreterLauncher.launch(InterpreterLauncher.java:110)
at
org.apache.zeppelin.interpreter.InterpreterSetting.createInterpreterProcess(InterpreterSetting.java:847)
at
org.apache.zeppelin.interpreter.ManagedInterpreterGroup.getOrCreateInterpreterProcess(ManagedInterpreterGroup.java:66)
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.getOrCreateInterpreterProcess(RemoteInterpreter.java:104)
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.internal_create(RemoteInterpreter.java:154)
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.open(RemoteInterpreter.java:126)
... 13 more
Caused by: java.io.IOException: Cannot run program "null/bin/spark-submit":
error=2, No such file or directory
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048)
at
org.apache.zeppelin.interpreter.launcher.SparkInterpreterLauncher.detectSparkScalaVersion(SparkInterpreterLauncher.java:233)
at
org.apache.zeppelin.interpreter.launcher.SparkInterpreterLauncher.buildEnvFromProperties(SparkInterpreterLauncher.java:127)
... 20 more
Caused by: java.io.IOException: error=2, No such file or directory
at java.lang.UNIXProcess.forkAndExec(Native Method)
at java.lang.UNIXProcess.<init>(UNIXProcess.java:247)
at java.lang.ProcessImpl.start(ProcessImpl.java:134)
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1029)
... 22 more
{code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)