vaibhavsw opened a new issue, #16695:
URL: https://github.com/apache/dolphinscheduler/issues/16695

   ### Search before asking
   
   - [X] I had searched in the 
[issues](https://github.com/apache/dolphinscheduler/issues?q=is%3Aissue) and 
found no similar issues.
   
   
   ### What happened
   
   I tried setting up the Spark job using the Spark task, however, I keep 
getting below error even though the SPARK_HOME env variable is setup.
   
   ```
   [LOG-PATH]: 
/Users/vaibhav.swarnakar/Workspace/rnd/dolphin-scheduler/apache-dolphinscheduler-3.2.2-bin/standalone-server/logs/20241014/122351408400800/2/3/3.log,
 [HOST]:  192.168.64.1:1234
   [INFO] 2024-10-14 19:32:19.755 +0530 - 
   
***********************************************************************************************
   [INFO] 2024-10-14 19:32:19.758 +0530 - *********************************  
Initialize task context  ***********************************
   [INFO] 2024-10-14 19:32:19.758 +0530 - 
***********************************************************************************************
   [INFO] 2024-10-14 19:32:19.759 +0530 - Begin to initialize task
   [INFO] 2024-10-14 19:32:19.759 +0530 - Set task startTime: 1728914539759
   [INFO] 2024-10-14 19:32:19.759 +0530 - Set task appId: 3_3
   [INFO] 2024-10-14 19:32:19.759 +0530 - End initialize task {
     "taskInstanceId" : 3,
     "taskName" : "SparkPi",
     "firstSubmitTime" : 1728914539749,
     "startTime" : 1728914539759,
     "taskType" : "SPARK",
     "workflowInstanceHost" : "192.168.64.1:5678",
     "host" : "192.168.64.1:1234",
     "logPath" : 
"/Users/vaibhav.swarnakar/Workspace/rnd/dolphin-scheduler/apache-dolphinscheduler-3.2.2-bin/standalone-server/logs/20241014/122351408400800/2/3/3.log",
     "processId" : 0,
     "processDefineCode" : 122351408400800,
     "processDefineVersion" : 2,
     "processInstanceId" : 3,
     "scheduleTime" : 0,
     "executorId" : 1,
     "cmdTypeIfComplement" : 0,
     "tenantCode" : "default",
     "processDefineId" : 0,
     "projectId" : 0,
     "projectCode" : 122351346826656,
     "taskParams" : 
"{\"localParams\":[],\"rawScript\":\"\",\"resourceList\":[{\"id\":null,\"resourceName\":\"file:/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/default/resources/spark-examples_2.13-3.5.3.jar\",\"res\":null}],\"programType\":\"SCALA\",\"mainClass\":\"org.apache.spark.examples.SparkPi\",\"mainJar\":{\"id\":null,\"resourceName\":\"file:/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/default/resources/spark-examples_2.13-3.5.3.jar\",\"res\":null},\"deployMode\":\"cluster\",\"appName\":\"SparkPi\",\"others\":\"--conf
 spark.kubernetes.executor.podNamePrefix=ss-example --conf 
spark.kubernetes.container.image=bitnami/spark:3.5.3-debian-12-r0 --conf 
spark.kubernetes.authenticate.driver.serviceAccountName=spark-submit-sa --conf 
spark.kubernetes.authenticate.submission.oauthToken=$K8S_TOKEN --conf 
spark.executor.instances=2 --conf 
spark.kubernetes.driver.pod.name=spark-pi-driver --conf 
spark.executorEnv.LD_PRELOAD=/opt/bitnami/common/lib/libnss_wrapper.
 so --conf 
spark.kubernetes.driverEnv.SPARK_MASTER_URL=spark://spark-helm-master-0.spark-helm-headless.default.svc.cluster.local:7077
 --conf spark.kubernetes.driver.label.dolphinscheduler-label=2_2 --conf 
spark.kubernetes.namespace=default --conf 
spark.kubernetes.file.upload.path=/tmp\",\"namespace\":\"{\\\"name\\\":\\\"default\\\",\\\"cluster\\\":\\\"minikube\\\"}\",\"yarnQueue\":\"\",\"master\":\"k8s://http://127.0.0.1:9292\",\"driverCores\":1,\"driverMemory\":\"512M\",\"numExecutors\":2,\"executorMemory\":\"2G\",\"executorCores\":2,\"sqlExecutionType\":\"SCRIPT\"}";,
     "prepareParamsMap" : {
       "system.task.definition.name" : {
         "prop" : "system.task.definition.name",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : "SparkPi"
       },
       "system.project.name" : {
         "prop" : "system.project.name",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : null
       },
       "system.project.code" : {
         "prop" : "system.project.code",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : "122351346826656"
       },
       "system.workflow.instance.id" : {
         "prop" : "system.workflow.instance.id",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : "3"
       },
       "system.biz.curdate" : {
         "prop" : "system.biz.curdate",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : "20241014"
       },
       "system.biz.date" : {
         "prop" : "system.biz.date",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : "20241013"
       },
       "system.task.instance.id" : {
         "prop" : "system.task.instance.id",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : "3"
       },
       "system.workflow.definition.name" : {
         "prop" : "system.workflow.definition.name",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : "SparkPi"
       },
       "system.task.definition.code" : {
         "prop" : "system.task.definition.code",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : "122351352694176"
       },
       "system.workflow.definition.code" : {
         "prop" : "system.workflow.definition.code",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : "122351408400800"
       },
       "system.datetime" : {
         "prop" : "system.datetime",
         "direct" : "IN",
         "type" : "VARCHAR",
         "value" : "20241014193219"
       }
     },
     "taskAppId" : "3_3",
     "taskTimeout" : 2147483647,
     "workerGroup" : "default",
     "delayTime" : 0,
     "currentExecutionStatus" : "SUBMITTED_SUCCESS",
     "endTime" : 0,
     "k8sTaskExecutionContext" : {
       "configYaml" : 
"***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 
*************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 
*************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 
*************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 
*************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 
*************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 
************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************",
       "namespace" : "default"
     },
     "dryRun" : 0,
     "paramsMap" : { },
     "cpuQuota" : -1,
     "memoryMax" : -1,
     "testFlag" : 0,
     "logBufferEnable" : false,
     "dispatchFailTimes" : 0
   }
   [INFO] 2024-10-14 19:32:19.760 +0530 - 
   
***********************************************************************************************
   [INFO] 2024-10-14 19:32:19.760 +0530 - *********************************  
Load task instance plugin  *********************************
   [INFO] 2024-10-14 19:32:19.760 +0530 - 
***********************************************************************************************
   [INFO] 2024-10-14 19:32:19.762 +0530 - Send task status RUNNING_EXECUTION 
master: 192.168.64.1:1234
   [INFO] 2024-10-14 19:32:19.762 +0530 - Current tenant is default tenant, 
will use bootstrap user: vaibhav.swarnkar to execute the task
   [INFO] 2024-10-14 19:32:19.762 +0530 - TenantCode: vaibhav.swarnkar check 
successfully
   [INFO] 2024-10-14 19:32:19.764 +0530 - WorkflowInstanceExecDir: 
/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/exec/process/vaibhav.swarnkar/122351346826656/122351408400800_2/3/3
 check successfully
   [INFO] 2024-10-14 19:32:19.764 +0530 - Create TaskChannel: 
org.apache.dolphinscheduler.plugin.task.spark.SparkTaskChannel successfully
   [INFO] 2024-10-14 19:32:19.773 +0530 - Download resources successfully: 
   
ResourceContext(resourceItemMap={file:/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/default/resources/spark-examples_2.13-3.5.3.jar=ResourceContext.ResourceItem(resourceAbsolutePathInStorage=file:/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/default/resources/spark-examples_2.13-3.5.3.jar,
 
resourceAbsolutePathInLocal=/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/exec/process/vaibhav.swarnkar/122351346826656/122351408400800_2/3/3/spark-examples_2.13-3.5.3.jar)})
   [INFO] 2024-10-14 19:32:19.773 +0530 - Download upstream files: [] 
successfully
   [INFO] 2024-10-14 19:32:19.773 +0530 - Task plugin instance: SPARK create 
successfully
   [INFO] 2024-10-14 19:32:19.774 +0530 - Initialize spark task params {
     "localParams" : [ ],
     "varPool" : [ ],
     "mainJar" : {
       "id" : null,
       "resourceName" : 
"file:/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/default/resources/spark-examples_2.13-3.5.3.jar",
       "res" : null
     },
     "mainClass" : "org.apache.spark.examples.SparkPi",
     "master" : "k8s://http://127.0.0.1:9292";,
     "deployMode" : "cluster",
     "mainArgs" : null,
     "driverCores" : 1,
     "driverMemory" : "512M",
     "numExecutors" : 2,
     "executorCores" : 2,
     "executorMemory" : "2G",
     "appName" : "SparkPi",
     "yarnQueue" : "",
     "others" : "--conf spark.kubernetes.executor.podNamePrefix=ss-example 
--conf spark.kubernetes.container.image=bitnami/spark:3.5.3-debian-12-r0 --conf 
spark.kubernetes.authenticate.driver.serviceAccountName=spark-submit-sa --conf 
spark.kubernetes.authenticate.submission.oauthToken=$K8S_TOKEN --conf 
spark.executor.instances=2 --conf 
spark.kubernetes.driver.pod.name=spark-pi-driver --conf 
spark.executorEnv.LD_PRELOAD=/opt/bitnami/common/lib/libnss_wrapper.so --conf 
spark.kubernetes.driverEnv.SPARK_MASTER_URL=spark://spark-helm-master-0.spark-helm-headless.default.svc.cluster.local:7077
 --conf spark.kubernetes.driver.label.dolphinscheduler-label=2_2 --conf 
spark.kubernetes.namespace=default --conf 
spark.kubernetes.file.upload.path=/tmp",
     "programType" : "SCALA",
     "rawScript" : "",
     "namespace" : "{\"name\":\"default\",\"cluster\":\"minikube\"}",
     "resourceList" : [ {
       "id" : null,
       "resourceName" : 
"file:/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/default/resources/spark-examples_2.13-3.5.3.jar",
       "res" : null
     } ],
     "sqlExecutionType" : "SCRIPT"
   }
   [INFO] 2024-10-14 19:32:19.774 +0530 - Success initialized task plugin 
instance successfully
   [INFO] 2024-10-14 19:32:19.774 +0530 - Set taskVarPool: null successfully
   [INFO] 2024-10-14 19:32:19.774 +0530 - 
   
***********************************************************************************************
   [INFO] 2024-10-14 19:32:19.774 +0530 - *********************************  
Execute task instance  *************************************
   [INFO] 2024-10-14 19:32:19.774 +0530 - 
***********************************************************************************************
   [INFO] 2024-10-14 19:32:19.775 +0530 - Created kubernetes configuration 
file: 
/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/exec/process/vaibhav.swarnkar/122351346826656/122351408400800_2/3/3/config.
   [INFO] 2024-10-14 19:32:19.777 +0530 - Final Shell file is: 
   [INFO] 2024-10-14 19:32:19.777 +0530 - ****************************** Script 
Content *****************************************************************
   [INFO] 2024-10-14 19:32:19.777 +0530 - #!/bin/bash
   BASEDIR=$(cd `dirname $0`; pwd)
   cd $BASEDIR
   export 
KUBECONFIG=/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/exec/process/vaibhav.swarnkar/122351346826656/122351408400800_2/3/3/config
   ${SPARK_HOME}/bin/spark-submit --master k8s://http://127.0.0.1:9292 
--deploy-mode cluster --class org.apache.spark.examples.SparkPi --conf 
spark.driver.cores=1 --conf spark.driver.memory=512M --conf 
spark.executor.instances=2 --conf spark.executor.cores=2 --conf 
spark.executor.memory=2G --name SparkPi --conf 
spark.kubernetes.executor.podNamePrefix=ss-example --conf 
spark.kubernetes.container.image=bitnami/spark:3.5.3-debian-12-r0 --conf 
spark.kubernetes.authenticate.driver.serviceAccountName=spark-submit-sa --conf 
spark.kubernetes.authenticate.submission.oauthToken=$K8S_TOKEN --conf 
spark.executor.instances=2 --conf 
spark.kubernetes.driver.pod.name=spark-pi-driver --conf 
spark.executorEnv.LD_PRELOAD=/opt/bitnami/common/lib/libnss_wrapper.so --conf 
spark.kubernetes.driverEnv.SPARK_MASTER_URL=spark://spark-helm-master-0.spark-helm-headless.default.svc.cluster.local:7077
 --conf spark.kubernetes.driver.label.dolphinscheduler-label=2_2 --conf 
spark.kubernetes.namespace=default --conf s
 park.kubernetes.file.upload.path=/tmp --conf 
spark.kubernetes.driver.label.dolphinscheduler-label=3_3 --conf 
spark.kubernetes.namespace=default 
/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/exec/process/vaibhav.swarnkar/122351346826656/122351408400800_2/3/3/spark-examples_2.13-3.5.3.jar
   [INFO] 2024-10-14 19:32:19.777 +0530 - ****************************** Script 
Content *****************************************************************
   [INFO] 2024-10-14 19:32:19.777 +0530 - Executing shell command : sudo -u 
vaibhav.swarnkar -i 
/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/exec/process/vaibhav.swarnkar/122351346826656/122351408400800_2/3/3/3_3.sh
   [ERROR] 2024-10-14 19:32:19.788 +0530 - Get task pid failed
   java.lang.reflect.InaccessibleObjectException: Unable to make field private 
final int java.lang.ProcessImpl.pid accessible: module java.base does not 
"opens java.lang" to unnamed module @3359c0cf
        at 
java.base/java.lang.reflect.AccessibleObject.throwInaccessibleObjectException(AccessibleObject.java:391)
        at 
java.base/java.lang.reflect.AccessibleObject.checkCanSetAccessible(AccessibleObject.java:367)
        at 
java.base/java.lang.reflect.AccessibleObject.checkCanSetAccessible(AccessibleObject.java:315)
        at 
java.base/java.lang.reflect.Field.checkCanSetAccessible(Field.java:183)
        at java.base/java.lang.reflect.Field.setAccessible(Field.java:177)
        at 
org.apache.dolphinscheduler.plugin.task.api.AbstractCommandExecutor.getProcessId(AbstractCommandExecutor.java:339)
        at 
org.apache.dolphinscheduler.plugin.task.api.AbstractCommandExecutor.run(AbstractCommandExecutor.java:146)
        at 
org.apache.dolphinscheduler.plugin.task.api.AbstractYarnTask.handle(AbstractYarnTask.java:53)
        at 
org.apache.dolphinscheduler.server.worker.runner.DefaultWorkerTaskExecutor.executeTask(DefaultWorkerTaskExecutor.java:51)
        at 
org.apache.dolphinscheduler.server.worker.runner.WorkerTaskExecutor.run(WorkerTaskExecutor.java:172)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
        at java.base/java.lang.Thread.run(Thread.java:1583)
   [INFO] 2024-10-14 19:32:19.788 +0530 - process start, process id is: 0
   [INFO] 2024-10-14 19:32:20.791 +0530 -  -> 
        
/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/exec/process/vaibhav.swarnkar/122351346826656/122351408400800_2/3/3/3_3.sh:
 line 5: /bin/spark-submit: No such file or directory
   [ERROR] 2024-10-14 19:32:35.853 +0530 - Handle pod log error
   java.util.concurrent.ExecutionException: java.lang.RuntimeException: 
java.lang.RuntimeException: The driver pod does not exist.
        at java.base/java.util.concurrent.FutureTask.report(FutureTask.java:122)
        at java.base/java.util.concurrent.FutureTask.get(FutureTask.java:191)
        at 
org.apache.dolphinscheduler.plugin.task.api.AbstractCommandExecutor.run(AbstractCommandExecutor.java:182)
        at 
org.apache.dolphinscheduler.plugin.task.api.AbstractYarnTask.handle(AbstractYarnTask.java:53)
        at 
org.apache.dolphinscheduler.server.worker.runner.DefaultWorkerTaskExecutor.executeTask(DefaultWorkerTaskExecutor.java:51)
        at 
org.apache.dolphinscheduler.server.worker.runner.WorkerTaskExecutor.run(WorkerTaskExecutor.java:172)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
        at java.base/java.lang.Thread.run(Thread.java:1583)
   Caused by: java.lang.RuntimeException: java.lang.RuntimeException: The 
driver pod does not exist.
        at 
org.apache.dolphinscheduler.plugin.task.api.AbstractCommandExecutor.lambda$collectPodLogIfNeeded$0(AbstractCommandExecutor.java:254)
        at 
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
        at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
        at 
java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304)
        ... 3 common frames omitted
   Caused by: java.lang.RuntimeException: The driver pod does not exist.
        at 
org.apache.dolphinscheduler.plugin.task.api.AbstractCommandExecutor.lambda$collectPodLogIfNeeded$0(AbstractCommandExecutor.java:244)
        ... 6 common frames omitted
   [INFO] 2024-10-14 19:32:35.854 +0530 - process has exited. execute 
path:/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/exec/process/vaibhav.swarnkar/122351346826656/122351408400800_2/3/3,
 processId:0 ,exitStatusCode:127 ,processWaitForStatus:true 
,processExitValue:127
   [INFO] 2024-10-14 19:32:35.855 +0530 - Start finding appId in 
/Users/vaibhav.swarnakar/Workspace/rnd/dolphin-scheduler/apache-dolphinscheduler-3.2.2-bin/standalone-server/logs/20241014/122351408400800/2/3/3.log,
 fetch way: log 
   [INFO] 2024-10-14 19:32:35.856 +0530 - 
   
***********************************************************************************************
   [INFO] 2024-10-14 19:32:35.856 +0530 - *********************************  
Finalize task instance  ************************************
   [INFO] 2024-10-14 19:32:35.856 +0530 - 
***********************************************************************************************
   [INFO] 2024-10-14 19:32:35.857 +0530 - Upload output files: [] successfully
   [INFO] 2024-10-14 19:32:35.861 +0530 - Send task execute status: FAILURE to 
master : 192.168.64.1:1234
   [INFO] 2024-10-14 19:32:35.861 +0530 - Remove the current task execute 
context from worker cache
   [INFO] 2024-10-14 19:32:35.861 +0530 - The current execute mode isn't 
develop mode, will clear the task execute file: 
/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/exec/process/vaibhav.swarnkar/122351346826656/122351408400800_2/3/3
   [INFO] 2024-10-14 19:32:35.864 +0530 - Success clear the task execute file: 
/Users/vaibhav.swarnakar/Workspace/rnd/ds-resources-folder/exec/process/vaibhav.swarnkar/122351346826656/122351408400800_2/3/3
   [INFO] 2024-10-14 19:32:35.864 +0530 - FINALIZE_SESSION
   ```
   
   ### What you expected to happen
   
   All the setup is according to the documentation and even after that the job 
isn't scheduling and getting error. If a solution is provided, the job should 
be scheduled in the K8S cluster
   
   ### How to reproduce
   
   Do the setup according to the documentation (which is by the way very poor)
   
   ### Anything else
   
   _No response_
   
   ### Version
   
   3.2.x
   
   ### Are you willing to submit PR?
   
   - [ ] Yes I am willing to submit a PR!
   
   ### Code of Conduct
   
   - [X] I agree to follow this project's [Code of 
Conduct](https://www.apache.org/foundation/policies/conduct)
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: 
[email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to