Hi all, I try to get Thrift2 on Spark 3.1.2 running on K8S with one executor for the moment. This works so far but it fails at executor side during initialization. The issue seems to be related to access restrictions on certain directories... But I am not sure. Please see errors marked in yellow.
The AWS-SDK depency was provided via "-jars" option as "-packages" option failed as I was unable to get ivy using "/opt/spark/work-dir" nor "/tmp" as cache dir: # - --packages # - com.amazonaws:aws-java-sdk-s3:1.11.375,org.apache.hadoop:hadoop-aws:3.2.0 - --jars - local:///opt/spark/jars/aws-java-sdk-1.11.375.jar,local:///opt/spark/jars/hadoop-aws-3.2.0.jar Executor Log: ++ id -u + myuid=185 ++ id -g + mygid=0 + set +e ++ getent passwd 185 + uidentry= + set -e + '[' -z '' ']' + '[' -w /etc/passwd ']' + echo '185:x:185:0:anonymous uid:/opt/spark:/bin/false' + SPARK_CLASSPATH=':/opt/spark/jars/*' + env + grep SPARK_JAVA_OPT_ + sed 's/[^=]*=\(.*\)/\1/g' + sort -t_ -k4 -n + readarray -t SPARK_EXECUTOR_JAVA_OPTS + '[' -n '' ']' + '[' -z ']' + '[' -z ']' + '[' -n /opt/hadoop ']' + '[' -z '' ']' ++ /opt/hadoop/bin/hadoop classpath + export 'SPARK_DIST_CLASSPATH=/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/*:/opt/hadoop/share/hadoop/common/*:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/lib/*:/opt/hadoop/share/hadoop/hdfs/*:/opt/hadoop/share/hadoop/mapreduce/lib/*:/opt/hadoop/share/hadoop/mapreduce/*:/opt/hadoop/share/hadoop/yarn:/opt/hadoop/share/hadoop/yarn/lib/*:/opt/hadoop/share/hadoop/yarn/*' + SPARK_DIST_CLASSPATH='/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/*:/opt/hadoop/share/hadoop/common/*:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/lib/*:/opt/hadoop/share/hadoop/hdfs/*:/opt/hadoop/share/hadoop/mapreduce/lib/*:/opt/hadoop/share/hadoop/mapreduce/*:/opt/hadoop/share/hadoop/yarn:/opt/hadoop/share/hadoop/yarn/lib/*:/opt/hadoop/share/hadoop/yarn/*' + '[' -z ']' + '[' -z x ']' + SPARK_CLASSPATH='/opt/spark/conf::/opt/spark/jars/*' + case "$1" in + shift 1 + CMD=(${JAVA_HOME}/bin/java "${SPARK_EXECUTOR_JAVA_OPTS[@]}" -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP --resourceProfileId $SPARK_RESOURCE_PROFILE_ID) + exec /usr/bin/tini -s -- /usr/local/openjdk-8/bin/java -Dspark.hadoop.hive.server2.thrift.port=10000 -Dspark.driver.port=2222 -Dspark.driver.blockManager.port=7777 -Xms1024m -Xmx1024m -cp '/opt/spark/conf::/opt/spark/jars/*:/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/*:/opt/hadoop/share/hadoop/common/*:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/lib/*:/opt/hadoop/share/hadoop/hdfs/*:/opt/hadoop/share/hadoop/mapreduce/lib/*:/opt/hadoop/share/hadoop/mapreduce/*:/opt/hadoop/share/hadoop/yarn:/opt/hadoop/share/hadoop/yarn/lib/*:/opt/hadoop/share/hadoop/yarn/*' org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url spark://CoarseGrainedScheduler@spark-thrift-server-internal:2222 --executor-id 24 --cores 1 --app-id spark-application-1636134793639 --hostname 10.1.16.66 --resourceProfileId 0 log4j:ERROR setFile(null,true) call failed. java.io.FileNotFoundException: ./fairscheduler-statedump.log (Permission denied) at java.io.FileOutputStream.open0(Native Method) at java.io.FileOutputStream.open(FileOutputStream.java:270) at java.io.FileOutputStream.<init>(FileOutputStream.java:213) at java.io.FileOutputStream.<init>(FileOutputStream.java:133) at org.apache.log4j.FileAppender.setFile(FileAppender.java:294) at org.apache.log4j.RollingFileAppender.setFile(RollingFileAppender.java:207) at org.apache.log4j.FileAppender.activateOptions(FileAppender.java:165) at org.apache.log4j.config.PropertySetter.activate(PropertySetter.java:307) at org.apache.log4j.config.PropertySetter.setProperties(PropertySetter.java:172) at org.apache.log4j.config.PropertySetter.setProperties(PropertySetter.java:104) at org.apache.log4j.PropertyConfigurator.parseAppender(PropertyConfigurator.java:842) at org.apache.log4j.PropertyConfigurator.parseCategory(PropertyConfigurator.java:768) at org.apache.log4j.PropertyConfigurator.parseCatsAndRenderers(PropertyConfigurator.java:672) at org.apache.log4j.PropertyConfigurator.doConfigure(PropertyConfigurator.java:516) at org.apache.log4j.PropertyConfigurator.doConfigure(PropertyConfigurator.java:580) at org.apache.log4j.helpers.OptionConverter.selectAndConfigure(OptionConverter.java:526) at org.apache.log4j.LogManager.<clinit>(LogManager.java:127) at org.slf4j.impl.Log4jLoggerFactory.<init>(Log4jLoggerFactory.java:66) at org.slf4j.impl.StaticLoggerBinder.<init>(StaticLoggerBinder.java:72) at org.slf4j.impl.StaticLoggerBinder.<clinit>(StaticLoggerBinder.java:45) at org.apache.spark.internal.Logging$.org$apache$spark$internal$Logging$$isLog4j12(Logging.scala:222) at org.apache.spark.internal.Logging.initializeLogging(Logging.scala:127) at org.apache.spark.internal.Logging.initializeLogIfNecessary(Logging.scala:111) at org.apache.spark.internal.Logging.initializeLogIfNecessary$(Logging.scala:105) at org.apache.spark.executor.CoarseGrainedExecutorBackend$.initializeLogIfNecessary(CoarseGrainedExecutorBackend.scala:357) at org.apache.spark.internal.Logging.initializeLogIfNecessary(Logging.scala:102) at org.apache.spark.internal.Logging.initializeLogIfNecessary$(Logging.scala:101) at org.apache.spark.executor.CoarseGrainedExecutorBackend$.initializeLogIfNecessary(CoarseGrainedExecutorBackend.scala:357) at org.apache.spark.internal.Logging.log(Logging.scala:49) at org.apache.spark.internal.Logging.log$(Logging.scala:47) at org.apache.spark.executor.CoarseGrainedExecutorBackend$.log(CoarseGrainedExecutorBackend.scala:357) at org.apache.spark.executor.CoarseGrainedExecutorBackend$.run(CoarseGrainedExecutorBackend.scala:391) at org.apache.spark.executor.CoarseGrainedExecutorBackend$.main(CoarseGrainedExecutorBackend.scala:382) at org.apache.spark.executor.CoarseGrainedExecutorBackend.main(CoarseGrainedExecutorBackend.scala) SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/opt/spark/jars/slf4j-log4j12-1.7.30.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/opt/hadoop-3.2.0/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] 2021-11-05 17:56:22,269 INFO executor.CoarseGrainedExecutorBackend: Started daemon with process name: 34@spark-sql-f130977cf13baafe-exec-24 2021-11-05 17:56:22,280 INFO util.SignalUtils: Registering signal handler for TERM 2021-11-05 17:56:22,281 INFO util.SignalUtils: Registering signal handler for HUP 2021-11-05 17:56:22,282 INFO util.SignalUtils: Registering signal handler for INT 2021-11-05 17:56:22,885 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 2021-11-05 17:56:23,030 INFO spark.SecurityManager: Changing view acls to: 185 2021-11-05 17:56:23,031 INFO spark.SecurityManager: Changing modify acls to: 185 2021-11-05 17:56:23,032 INFO spark.SecurityManager: Changing view acls groups to: 2021-11-05 17:56:23,033 INFO spark.SecurityManager: Changing modify acls groups to: 2021-11-05 17:56:23,034 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(185); groups with view permissions: Set(); users with modify permissions: Set(185); groups with modify permissions: Set() 2021-11-05 17:56:23,520 INFO client.TransportClientFactory: Successfully created connection to spark-thrift-server-internal/10.1.16.42:2222 after 80 ms (0 ms spent in bootstraps) 2021-11-05 17:56:23,636 INFO spark.SecurityManager: Changing view acls to: 185 2021-11-05 17:56:23,636 INFO spark.SecurityManager: Changing modify acls to: 185 2021-11-05 17:56:23,636 INFO spark.SecurityManager: Changing view acls groups to: 2021-11-05 17:56:23,637 INFO spark.SecurityManager: Changing modify acls groups to: 2021-11-05 17:56:23,637 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(185); groups with view permissions: Set(); users with modify permissions: Set(185); groups with modify permissions: Set() 2021-11-05 17:56:23,711 INFO client.TransportClientFactory: Successfully created connection to spark-thrift-server-internal/10.1.16.42:2222 after 2 ms (0 ms spent in bootstraps) 2021-11-05 17:56:23,781 INFO storage.DiskBlockManager: Created local directory at /var/data/spark-6a927bb0-ffc4-4982-907d-d3b4014f2ad8/blockmgr-b9789f7d-0a70-4b0e-a96d-a27dccd86f79 2021-11-05 17:56:23,845 INFO memory.MemoryStore: MemoryStore started with capacity 413.9 MiB 2021-11-05 17:56:24,127 INFO executor.CoarseGrainedExecutorBackend: Connecting to driver: spark://CoarseGrainedScheduler@spark-thrift-server-internal:2222 2021-11-05 17:56:24,145 INFO resource.ResourceUtils: ============================================================== 2021-11-05 17:56:24,146 INFO resource.ResourceUtils: No custom resources configured for spark.executor. 2021-11-05 17:56:24,146 INFO resource.ResourceUtils: ============================================================== 2021-11-05 17:56:24,178 INFO executor.CoarseGrainedExecutorBackend: Successfully registered with driver 2021-11-05 17:56:24,183 INFO executor.Executor: Starting executor ID 24 on host 10.1.16.66 2021-11-05 17:56:24,302 INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 45265. 2021-11-05 17:56:24,302 INFO netty.NettyBlockTransferService: Server created on 10.1.16.66:45265 2021-11-05 17:56:24,304 INFO storage.BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy 2021-11-05 17:56:24,314 INFO storage.BlockManagerMaster: Registering BlockManager BlockManagerId(24, 10.1.16.66, 45265, None) 2021-11-05 17:56:24,322 INFO storage.BlockManagerMaster: Registered BlockManager BlockManagerId(24, 10.1.16.66, 45265, None) 2021-11-05 17:56:24,323 INFO storage.BlockManager: Initialized BlockManager: BlockManagerId(24, 10.1.16.66, 45265, None) 2021-11-05 17:56:24,350 INFO executor.Executor: Fetching file:/opt/spark/jars/aws-java-sdk-1.11.375.jar with timestamp 1636134790723 2021-11-05 17:56:24,365 INFO util.Utils: Copying /opt/spark/jars/aws-java-sdk-1.11.375.jar to /var/data/spark-6a927bb0-ffc4-4982-907d-d3b4014f2ad8/spark-fecb1369-3808-4bed-949f-bce25e5dfd96/-12573051811636134790723_cache 2021-11-05 17:56:24,366 INFO util.Utils: Copying /var/data/spark-6a927bb0-ffc4-4982-907d-d3b4014f2ad8/spark-fecb1369-3808-4bed-949f-bce25e5dfd96/-12573051811636134790723_cache to /opt/spark/work-dir/./aws-java-sdk-1.11.375.jar 2021-11-05 17:56:24,368 ERROR executor.CoarseGrainedExecutorBackend: Executor self-exiting due to : Unable to create executor due to ./aws-java-sdk-1.11.375.jar java.nio.file.AccessDeniedException: ./aws-java-sdk-1.11.375.jar at sun.nio.fs.UnixException.translateToIOException(UnixException.java:84) at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102) at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107) at sun.nio.fs.UnixCopyFile.copyFile(UnixCopyFile.java:243) at sun.nio.fs.UnixCopyFile.copy(UnixCopyFile.java:581) at sun.nio.fs.UnixFileSystemProvider.copy(UnixFileSystemProvider.java:253) at java.nio.file.Files.copy(Files.java:1274) at org.apache.spark.util.Utils$.copyRecursive(Utils.scala:726) at org.apache.spark.util.Utils$.copyFile(Utils.scala:697) at org.apache.spark.util.Utils$.fetchFile(Utils.scala:534) at org.apache.spark.executor.Executor.$anonfun$updateDependencies$13(Executor.scala:953) at org.apache.spark.executor.Executor.$anonfun$updateDependencies$13$adapted(Executor.scala:945) at scala.collection.TraversableLike$WithFilter.$anonfun$foreach$1(TraversableLike.scala:877) at scala.collection.mutable.HashMap.$anonfun$foreach$1(HashMap.scala:149) at scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237) at scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44) at scala.collection.mutable.HashMap.foreach(HashMap.scala:149) at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:876) at org.apache.spark.executor.Executor.org$apache$spark$executor$Executor$$updateDependencies(Executor.scala:945) at org.apache.spark.executor.Executor.<init>(Executor.scala:247) at org.apache.spark.executor.CoarseGrainedExecutorBackend$$anonfun$receive$1.applyOrElse(CoarseGrainedExecutorBackend.scala:159) at org.apache.spark.rpc.netty.Inbox.$anonfun$process$1(Inbox.scala:115) at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:213) at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:100) at org.apache.spark.rpc.netty.MessageLoop.org$apache$spark$rpc$netty$MessageLoop$$receiveLoop(MessageLoop.scala:75) at org.apache.spark.rpc.netty.MessageLoop$$anon$1.run(MessageLoop.scala:41) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 2021-11-05 17:56:24,382 ERROR util.Utils: Uncaught exception in thread shutdown-hook-0 java.lang.NullPointerException at org.apache.spark.executor.Executor.$anonfun$stop$3(Executor.scala:332) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.util.Utils$.withContextClassLoader(Utils.scala:222) at org.apache.spark.executor.Executor.stop(Executor.scala:332) at org.apache.spark.executor.Executor.$anonfun$new$2(Executor.scala:76) at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:214) at org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$2(ShutdownHookManager.scala:188) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1996) at org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$1(ShutdownHookManager.scala:188) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at scala.util.Try$.apply(Try.scala:213) at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 2021-11-05 17:56:24,383 INFO storage.DiskBlockManager: Shutdown hook called 2021-11-05 17:56:24,388 INFO util.ShutdownHookManager: Shutdown hook called 2021-11-05 17:56:24,389 INFO util.ShutdownHookManager: Deleting directory /var/data/spark-6a927bb0-ffc4-4982-907d-d3b4014f2ad8/spark-fecb1369-3808-4bed-949f-bce25e5dfd96 Thanks for your help, Meikel