[jira] [Created] (ZEPPELIN-5310) Cluster mode is broken on latest build from source

Paul Brenner (Jira) Mon, 05 Apr 2021 07:01:14 -0700

Paul Brenner created ZEPPELIN-5310:
--------------------------------------

             Summary: Cluster mode is broken on latest build from source
                 Key: ZEPPELIN-5310
                 URL: https://issues.apache.org/jira/browse/ZEPPELIN-5310
             Project: Zeppelin
          Issue Type: Bug
    Affects Versions: 0.10.0
         Environment: Interpreter settings are as follows:


"spark_paul": {
 "id": "spark_paul",
 "name": "spark_paul",
 "group": "spark",
 "properties": {
 "SPARK_HOME": {
 "name": "SPARK_HOME",
 "value": "",
 "type": "string",
 "description": "Location of spark distribution"
 },
 "spark.master": {
 "name": "spark.master",
 "value": "yarn",
 "type": "string",
 "description": "Spark master uri. local | yarn-client | yarn-cluster | spark 
master address of standalone mode, ex) spark://master_host:7077"
 },
 "spark.submit.deployMode": {
 "name": "spark.submit.deployMode",
 "value": "client",
 "type": "string",
 "description": "The deploy mode of Spark driver program, either \"client\" or 
\"cluster\", Which means to launch driver program locally (\"client\") or 
remotely (\"cluster\") on one of the nodes inside the cluster."
 },
 "spark.app.name": {
 "name": "spark.app.name",
 "value": "zeppelin_dev_paul",
 "type": "string",
 "description": "The name of spark application."
 },
 "spark.driver.cores": {
 "name": "spark.driver.cores",
 "value": "1",
 "type": "number",
 "description": "Number of cores to use for the driver process, only in cluster 
mode."
 },
 "spark.driver.memory": {
 "name": "spark.driver.memory",
 "value": "5g",
 "type": "string",
 "description": "Amount of memory to use for the driver process, i.e. where 
SparkContext is initialized, in the same format as JVM memory strings with a 
size unit suffix (\"k\", \"m\", \"g\" or \"t\") (e.g. 512m, 2g)."
 },
 "spark.executor.cores": {
 "name": "spark.executor.cores",
 "value": "1",
 "type": "number",
 "description": "The number of cores to use on each executor"
 },
 "spark.executor.memory": {
 "name": "spark.executor.memory",
 "value": "3g",
 "type": "string",
 "description": "Executor memory per worker instance. ex) 512m, 32g"
 },
 "spark.executor.instances": {
 "name": "spark.executor.instances",
 "value": "2",
 "type": "number",
 "description": "The number of executors for static allocation."
 },
 "spark.files": {
 "name": "spark.files",
 "value": "",
 "type": "string",
 "description": "Comma-separated list of files to be placed in the working 
directory of each executor. Globs are allowed."
 },
 "spark.jars": {
 "name": "spark.jars",
 "value": 
"http://nexus.placeiq.net:8081/nexus/content/repositories/releases/com/placeiq/lap/4.1.25/lap-4.1.25.jar,hdfs://gandalf-nn.placeiq.net/lib/dap/0.1.0/dap-jar-assembled.jar";;,
 "type": "string",
 "description": "Comma-separated list of jars to include on the driver and 
executor classpaths. Globs are allowed."
 },
 "spark.jars.packages": {
 "name": "spark.jars.packages",
 "value": "ds-commons:ds-commons_2.11:0.1-SNAPSHOT",
 "type": "string",
 "description": "Comma-separated list of Maven coordinates of jars to include 
on the driver and executor classpaths. The coordinates should be 
groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will 
be resolved according to the configuration in the file, otherwise artifacts 
will be searched for in the local maven repo, then maven central and finally 
any additional remote repositories given by the command-line option 
--repositories."
 },
 "zeppelin.spark.useHiveContext": {
 "name": "zeppelin.spark.useHiveContext",
 "value": true,
 "type": "checkbox",
 "description": "Use HiveContext instead of SQLContext if it is true. Enable 
hive for SparkSession."
 },
 "zeppelin.spark.printREPLOutput": {
 "name": "zeppelin.spark.printREPLOutput",
 "value": true,
 "type": "checkbox",
 "description": "Print REPL output"
 },
 "zeppelin.spark.maxResult": {
 "name": "zeppelin.spark.maxResult",
 "value": "1000",
 "type": "number",
 "description": "Max number of result to display."
 },
 "zeppelin.spark.enableSupportedVersionCheck": {
 "name": "zeppelin.spark.enableSupportedVersionCheck",
 "value": true,
 "type": "checkbox",
 "description": "Whether checking supported spark version. Developer only 
setting, not for production use"
 },
 "zeppelin.spark.uiWebUrl": {
 "name": "zeppelin.spark.uiWebUrl",
 "value": "",
 "type": "string",
 "description": "Override Spark UI default URL. In Kubernetes mode, value can 
be Jinja template string with 3 template variables \u0027PORT\u0027, 
\u0027SERVICE_NAME\u0027 and \u0027SERVICE_DOMAIN\u0027. (ex: 
http://\{{PORT}}-\{{SERVICE_NAME}}.\{{SERVICE_DOMAIN}})"
 },
 "zeppelin.spark.ui.hidden": {
 "name": "zeppelin.spark.ui.hidden",
 "value": false,
 "type": "checkbox",
 "description": "Whether hide spark ui in zeppelin ui"
 },
 "spark.webui.yarn.useProxy": {
 "name": "spark.webui.yarn.useProxy",
 "value": false,
 "type": "checkbox",
 "description": "whether use yarn proxy url as spark weburl, e.g. 
http://localhost:8088/proxy/application_1583396598068_0004";;
 },
 "zeppelin.spark.scala.color": {
 "name": "zeppelin.spark.scala.color",
 "value": true,
 "type": "checkbox",
 "description": "Whether enable color output of spark scala interpreter"
 },
 "zeppelin.spark.deprecatedMsg.show": {
 "name": "zeppelin.spark.deprecatedMsg.show",
 "value": true,
 "type": "checkbox",
 "description": "Whether show the spark deprecated message, spark 2.2 and 
before are deprecated. Zeppelin will display warning message by default"
 },
 "zeppelin.spark.concurrentSQL": {
 "name": "zeppelin.spark.concurrentSQL",
 "value": true,
 "type": "checkbox",
 "description": "Execute multiple SQL concurrently if set true."
 },
 "zeppelin.spark.concurrentSQL.max": {
 "name": "zeppelin.spark.concurrentSQL.max",
 "value": "10",
 "type": "number",
 "description": "Max number of SQL concurrently executed"
 },
 "zeppelin.spark.sql.stacktrace": {
 "name": "zeppelin.spark.sql.stacktrace",
 "value": true,
 "type": "checkbox",
 "description": "Show full exception stacktrace for SQL queries if set to true."
 },
 "zeppelin.spark.sql.interpolation": {
 "name": "zeppelin.spark.sql.interpolation",
 "value": false,
 "type": "checkbox",
 "description": "Enable ZeppelinContext variable interpolation into spark sql"
 },
 "PYSPARK_PYTHON": {
 "name": "PYSPARK_PYTHON",
 "value": "python",
 "type": "string",
 "description": "Python binary executable to use for PySpark in both driver and 
workers (default is python2.7 if available, otherwise python). Property 
`spark.pyspark.python` take precedence if it is set"
 },
 "PYSPARK_DRIVER_PYTHON": {
 "name": "PYSPARK_DRIVER_PYTHON",
 "value": "python",
 "type": "string",
 "description": "Python binary executable to use for PySpark in driver only 
(default is `PYSPARK_PYTHON`). Property `spark.pyspark.driver.python` take 
precedence if it is set"
 },
 "zeppelin.pyspark.useIPython": {
 "name": "zeppelin.pyspark.useIPython",
 "value": true,
 "type": "checkbox",
 "description": "Whether use IPython when it is available"
 },
 "zeppelin.R.knitr": {
 "name": "zeppelin.R.knitr",
 "value": true,
 "type": "checkbox",
 "description": "Whether use knitr or not"
 },
 "zeppelin.R.cmd": {
 "name": "zeppelin.R.cmd",
 "value": "R",
 "type": "string",
 "description": "R binary executable path"
 },
 "zeppelin.R.image.width": {
 "name": "zeppelin.R.image.width",
 "value": "100%",
 "type": "number",
 "description": "Image width of R plotting"
 },
 "zeppelin.R.render.options": {
 "name": "zeppelin.R.render.options",
 "value": "out.format \u003d \u0027html\u0027, comment \u003d NA, echo \u003d 
FALSE, results \u003d \u0027asis\u0027, message \u003d F, warning \u003d F, 
fig.retina \u003d 2",
 "type": "textarea",
 "description": ""
 },
 "zeppelin.kotlin.shortenTypes": {
 "name": "zeppelin.kotlin.shortenTypes",
 "value": true,
 "type": "checkbox",
 "description": "Show short types instead of full, e.g. List\u003cString\u003e 
or kotlin.collections.List\u003ckotlin.String\u003e"
 },
 "spark.dynamicAllocation.executorIdleTimeout": {
 "name": "spark.dynamicAllocation.executorIdleTimeout",
 "value": "2m",
 "type": "textarea"
 },
 "spark.dynamicAllocation.enabled": {
 "name": "spark.dynamicAllocation.enabled",
 "value": "true",
 "type": "textarea"
 },
 "spark.dynamicAllocation.minExecutors": {
 "name": "spark.dynamicAllocation.minExecutors",
 "value": "4",
 "type": "textarea"
 },
 "spark.shuffle.service.enabled": {
 "name": "spark.shuffle.service.enabled",
 "value": "true",
 "type": "textarea"
 },
 "spark.yarn.queue": {
 "name": "spark.yarn.queue",
 "value": "pbrenner",
 "type": "textarea"
 },
 "spark.dynamicAllocation.cachedExecutorIdleTimeout": {
 "name": "spark.dynamicAllocation.cachedExecutorIdleTimeout",
 "value": "2m",
 "type": "textarea"
 },
 "spark.jars.repositories": {
 "name": "spark.jars.repositories",
 "value": "http://nexus.placeiq.net:8081/nexus/content/repositories/snapshots";;,
 "type": "textarea"
 },
 "spark.executor.memoryOverhead": {
 "name": "spark.executor.memoryOverhead",
 "value": "4g",
 "type": "textarea"
 },
 "zeppelin.interpreter.connect.timeout": {
 "name": "zeppelin.interpreter.connect.timeout",
 "value": "300000",
 "type": "textarea"
 }
 },
 "status": "READY",
 "interpreterGroup": [
 {
 "name": "spark",
 "class": "org.apache.zeppelin.spark.SparkInterpreter",
 "defaultInterpreter": true,
 "editor": {
 "language": "scala",
 "editOnDblClick": false,
 "completionKey": "TAB",
 "completionSupport": true
 }
 },
 {
 "name": "sql",
 "class": "org.apache.zeppelin.spark.SparkSqlInterpreter",
 "defaultInterpreter": false,
 "editor": {
 "language": "sql",
 "editOnDblClick": false,
 "completionKey": "TAB",
 "completionSupport": true
 }
 },
 {
 "name": "pyspark",
 "class": "org.apache.zeppelin.spark.PySparkInterpreter",
 "defaultInterpreter": false,
 "editor": {
 "language": "python",
 "editOnDblClick": false,
 "completionKey": "TAB",
 "completionSupport": true
 }
 },
 {
 "name": "ipyspark",
 "class": "org.apache.zeppelin.spark.IPySparkInterpreter",
 "defaultInterpreter": false,
 "editor": {
 "language": "python",
 "editOnDblClick": false,
 "completionSupport": true,
 "completionKey": "TAB"
 }
 },
 {
 "name": "r",
 "class": "org.apache.zeppelin.spark.SparkRInterpreter",
 "defaultInterpreter": false,
 "editor": {
 "language": "r",
 "editOnDblClick": false,
 "completionSupport": false,
 "completionKey": "TAB"
 }
 },
 {
 "name": "ir",
 "class": "org.apache.zeppelin.spark.SparkIRInterpreter",
 "defaultInterpreter": false,
 "editor": {
 "language": "r",
 "editOnDblClick": false,
 "completionSupport": true,
 "completionKey": "TAB"
 }
 },
 {
 "name": "shiny",
 "class": "org.apache.zeppelin.spark.SparkShinyInterpreter",
 "defaultInterpreter": false,
 "editor": {
 "language": "r",
 "editOnDblClick": false,
 "completionSupport": true,
 "completionKey": "TAB"
 }
 },
 {
 "name": "kotlin",
 "class": "org.apache.zeppelin.spark.KotlinSparkInterpreter",
 "defaultInterpreter": false,
 "editor": {
 "language": "kotlin",
 "editOnDblClick": false,
 "completionKey": "TAB",
 "completionSupport": false
 }
 }
 ],
 "dependencies": [],
 "option": {
 "remote": true,
 "port": -1,
 "perNote": "isolated",
 "perUser": "isolated",
 "isExistingProcess": false,
 "setPermission": false,
 "owners": [],
 "isUserImpersonate": true
 }
 },
            Reporter: Paul Brenner


We built from source on March 26th with commit  
85ed8e2e51e1ea10df38d4710216343efe218d60. DeployMode: Cluster is broken in this 
build. When we try to use cluster mode we see the following error:

 
{code:java}
org.apache.zeppelin.interpreter.InterpreterException: java.io.IOException: Fail 
to set additional jars for spark interpreter
       at 
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.open(RemoteInterpreter.java:129)
       at 
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.getFormType(RemoteInterpreter.java:271)
       at org.apache.zeppelin.notebook.Paragraph.jobRun(Paragraph.java:442)
       at org.apache.zeppelin.notebook.Paragraph.jobRun(Paragraph.java:71)
       at org.apache.zeppelin.scheduler.Job.run(Job.java:172)
       at 
org.apache.zeppelin.scheduler.AbstractScheduler.runJob(AbstractScheduler.java:132)
       at 
org.apache.zeppelin.scheduler.RemoteScheduler$JobRunner.run(RemoteScheduler.java:182)
       at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
       at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
       at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
       at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
       at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
       at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: Fail to set additional jars for spark 
interpreter
       at 
org.apache.zeppelin.interpreter.launcher.SparkInterpreterLauncher.buildEnvFromProperties(SparkInterpreterLauncher.java:163)
       at 
org.apache.zeppelin.interpreter.launcher.StandardInterpreterLauncher.launchDirectly(StandardInterpreterLauncher.java:77)
       at 
org.apache.zeppelin.interpreter.launcher.InterpreterLauncher.launch(InterpreterLauncher.java:110)
       at 
org.apache.zeppelin.interpreter.InterpreterSetting.createInterpreterProcess(InterpreterSetting.java:847)
       at 
org.apache.zeppelin.interpreter.ManagedInterpreterGroup.getOrCreateInterpreterProcess(ManagedInterpreterGroup.java:66)
       at 
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.getOrCreateInterpreterProcess(RemoteInterpreter.java:104)
       at 
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.internal_create(RemoteInterpreter.java:154)
       at 
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.open(RemoteInterpreter.java:126)
       ... 13 more
Caused by: java.io.IOException: Cannot run program "null/bin/spark-submit": 
error=2, No such file or directory
       at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048)
       at 
org.apache.zeppelin.interpreter.launcher.SparkInterpreterLauncher.detectSparkScalaVersion(SparkInterpreterLauncher.java:233)
       at 
org.apache.zeppelin.interpreter.launcher.SparkInterpreterLauncher.buildEnvFromProperties(SparkInterpreterLauncher.java:127)
       ... 20 more
Caused by: java.io.IOException: error=2, No such file or directory
       at java.lang.UNIXProcess.forkAndExec(Native Method)
       at java.lang.UNIXProcess.<init>(UNIXProcess.java:247)
       at java.lang.ProcessImpl.start(ProcessImpl.java:134)
       at java.lang.ProcessBuilder.start(ProcessBuilder.java:1029)
       ... 22 more
{code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

[jira] [Created] (ZEPPELIN-5310) Cluster mode is broken on latest build from source

Reply via email to