#------------------------------------------------------------------------------------------------------------
*BATCH PIPELINE : * python3 batch.py \ --input beam-userbase.csv \ --output output/batch \ --runner=SparkRunner \ --spark_submit_uber_jar \ --job_endpoint=localhost:8099 \ --spark_master_url=spark://ip-10-51-3-xxx:7077 \ --spark_rest_url=http://iip-10-51-3-xxx:6066 \ --environment_cache_millis=300000 \ --environment_type=DOCKER \ --environment_config="apache/beam_python3.8_sdk:2.29.0" *OR* /home/ec2-user/spark-3.1.1-bin-hadoop2.7/bin/spark-submit \ --master spark://ip-10-51-3-xxx:7077 \ /home/ec2-user/apache-beam-py/batch.py \ --runner=PortableRunner \ --spark_submit_uber_jar \ --job_endpoint=localhost:8099 \ --spark_job_server_jar=beam-runners-spark-job-server-2.29.0.jar \ --input beam-userbase.csv \ --output output/batch #------------------------------------------------------------------------------------------------------------ *EXCEPTIONS : in both the above cases, i am getting below errors: * 2021/06/03 13:00:12 Initializing python harness: /opt/apache/beam/boot --id=1-1 --provision_endpoint=localhost:37997 2021/06/03 13:00:20 Failed to retrieve staged files: failed to retrieve /tmp/staged in 3 attempts: failed to retrieve chunk for /tmp/staged/pickled_main_session caused by: rpc error: code = Unknown desc = ; failed to retrieve chunk for /tmp/staged/pickled_main_session caused by: rpc error: code = Unknown desc = ; failed to retrieve chunk for /tmp/staged/pickled_main_session caused by: rpc error: code = Unknown desc = ; failed to retrieve chunk for /tmp/staged/pickled_main_session caused by: rpc error: code = Unknown desc = 21/06/03 18:30:22 WARN BlockManager: Putting block rdd_13_1 failed due to exception org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.UncheckedExecutionException: java.lang.IllegalStateException: No container running for id 1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73. 21/06/03 18:30:22 WARN BlockManager: Block rdd_13_1 could not be removed as it was not found on disk or in memory 21/06/03 18:30:22 WARN BlockManager: Putting block rdd_17_1 failed due to exception org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.UncheckedExecutionException: java.lang.IllegalStateException: No container running for id 1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73. 21/06/03 18:30:22 WARN BlockManager: Block rdd_17_1 could not be removed as it was not found on disk or in memory 21/06/03 18:30:22 ERROR Executor: Exception in task 1.0 in stage 0.0 (TID 1) org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.UncheckedExecutionException: java.lang.IllegalStateException: No container running for id 1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73 at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2050) at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache.get(LocalCache.java:3952) at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:3974) at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4958) at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$LocalLoadingCache.getUnchecked(LocalCache.java:4964) at org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$SimpleStageBundleFactory.(DefaultJobBundleFactory.java:451) at org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$SimpleStageBundleFactory.(DefaultJobBundleFactory.java:436) at org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory.forStage(DefaultJobBundleFactory.java:303) at org.apache.beam.runners.fnexecution.control.DefaultExecutableStageContext.getStageBundleFactory(DefaultExecutableStageContext.java:38) at org.apache.beam.runners.fnexecution.control.ReferenceCountingExecutableStageContextFactory$WrappedContext.getStageBundleFactory(ReferenceCountingExecutableStageContextFactory.java:202) at org.apache.beam.runners.spark.translation.SparkExecutableStageFunction.call(SparkExecutableStageFunction.java:135) at org.apache.beam.runners.spark.translation.SparkExecutableStageFunction.call(SparkExecutableStageFunction.java:80) at org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$4$1.apply(JavaRDDLike.scala:153) at org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$4$1.apply(JavaRDDLike.scala:153) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:823) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:823) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:359) at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:357) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357) at org.apache.spark.rdd.RDD.iterator(RDD.scala:308) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:359) at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:357) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357) at org.apache.spark.rdd.RDD.iterator(RDD.scala:308) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:411) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.IllegalStateException: No container running for id 1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73 at org.apache.beam.runners.fnexecution.environment.DockerEnvironmentFactory.createEnvironment(DockerEnvironmentFactory.java:140) at org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$1.load(DefaultJobBundleFactory.java:252) at org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$1.load(DefaultJobBundleFactory.java:231) at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3528) at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2277) at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2154) at org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2044) ... 54 more Suppressed: java.io.IOException: Received exit code 1 for command 'docker kill 1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73'. stderr: Error response from daemon: Cannot kill container: 1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73: Container 1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73 is not running at org.apache.beam.runners.fnexecution.environment.DockerCommand.runShortCommand(DockerCommand.java:237) at org.apache.beam.runners.fnexecution.environment.DockerCommand.runShortCommand(DockerCommand.java:171) at org.apache.beam.runners.fnexecution.environment.DockerCommand.killContainer(DockerCommand.java:151) at org.apache.beam.runners.fnexecution.environment.DockerEnvironmentFactory.createEnvironment(DockerEnvironmentFactory.java:164)
