#------------------------------------------------------------------------------------------------------------

*BATCH PIPELINE : *

python3 batch.py \
--input beam-userbase.csv \
--output output/batch \
--runner=SparkRunner \
--spark_submit_uber_jar \
--job_endpoint=localhost:8099 \
--spark_master_url=spark://ip-10-51-3-xxx:7077 \
--spark_rest_url=http://iip-10-51-3-xxx:6066 \
--environment_cache_millis=300000 \
--environment_type=DOCKER \
--environment_config="apache/beam_python3.8_sdk:2.29.0"


*OR*

/home/ec2-user/spark-3.1.1-bin-hadoop2.7/bin/spark-submit \
--master spark://ip-10-51-3-xxx:7077 \
/home/ec2-user/apache-beam-py/batch.py \
--runner=PortableRunner \
--spark_submit_uber_jar \
--job_endpoint=localhost:8099 \
--spark_job_server_jar=beam-runners-spark-job-server-2.29.0.jar \
--input beam-userbase.csv \
--output output/batch


#------------------------------------------------------------------------------------------------------------


*EXCEPTIONS :  in both the above cases, i am getting below errors: *
2021/06/03 13:00:12 Initializing python harness: /opt/apache/beam/boot
--id=1-1 --provision_endpoint=localhost:37997
2021/06/03 13:00:20 Failed to retrieve staged files: failed to retrieve
/tmp/staged in 3 attempts: failed to retrieve chunk for
/tmp/staged/pickled_main_session
caused by:
rpc error: code = Unknown desc = ; failed to retrieve chunk for
/tmp/staged/pickled_main_session
caused by:
rpc error: code = Unknown desc = ; failed to retrieve chunk for
/tmp/staged/pickled_main_session
caused by:
rpc error: code = Unknown desc = ; failed to retrieve chunk for
/tmp/staged/pickled_main_session
caused by:
rpc error: code = Unknown desc =
21/06/03 18:30:22 WARN BlockManager: Putting block rdd_13_1 failed due to
exception
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.UncheckedExecutionException:
java.lang.IllegalStateException: No container running for id
1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73.
21/06/03 18:30:22 WARN BlockManager: Block rdd_13_1 could not be removed as
it was not found on disk or in memory
21/06/03 18:30:22 WARN BlockManager: Putting block rdd_17_1 failed due to
exception
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.UncheckedExecutionException:
java.lang.IllegalStateException: No container running for id
1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73.
21/06/03 18:30:22 WARN BlockManager: Block rdd_17_1 could not be removed as
it was not found on disk or in memory
21/06/03 18:30:22 ERROR Executor: Exception in task 1.0 in stage 0.0 (TID 1)
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.UncheckedExecutionException:
java.lang.IllegalStateException: No container running for id
1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73
at
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2050)
at
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache.get(LocalCache.java:3952)
at
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:3974)
at
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4958)
at
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$LocalLoadingCache.getUnchecked(LocalCache.java:4964)
at
org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$SimpleStageBundleFactory.(DefaultJobBundleFactory.java:451)
at
org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$SimpleStageBundleFactory.(DefaultJobBundleFactory.java:436)
at
org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory.forStage(DefaultJobBundleFactory.java:303)
at
org.apache.beam.runners.fnexecution.control.DefaultExecutableStageContext.getStageBundleFactory(DefaultExecutableStageContext.java:38)
at
org.apache.beam.runners.fnexecution.control.ReferenceCountingExecutableStageContextFactory$WrappedContext.getStageBundleFactory(ReferenceCountingExecutableStageContextFactory.java:202)
at
org.apache.beam.runners.spark.translation.SparkExecutableStageFunction.call(SparkExecutableStageFunction.java:135)
at
org.apache.beam.runners.spark.translation.SparkExecutableStageFunction.call(SparkExecutableStageFunction.java:80)
at
org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$4$1.apply(JavaRDDLike.scala:153)
at
org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$4$1.apply(JavaRDDLike.scala:153)
at
org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:823)
at
org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:823)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:359)
at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:357)
at
org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
at
org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:308)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:359)
at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:357)
at
org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
at
org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:308)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at
org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:411)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalStateException: No container running for id
1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73
at
org.apache.beam.runners.fnexecution.environment.DockerEnvironmentFactory.createEnvironment(DockerEnvironmentFactory.java:140)
at
org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$1.load(DefaultJobBundleFactory.java:252)
at
org.apache.beam.runners.fnexecution.control.DefaultJobBundleFactory$1.load(DefaultJobBundleFactory.java:231)
at
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3528)
at
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2277)
at
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2154)
at
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2044)
... 54 more
Suppressed: java.io.IOException: Received exit code 1 for command 'docker
kill 1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73'.
stderr: Error response from daemon: Cannot kill container:
1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73: Container
1e052a63b1a31f08c319c39fcea39f227d6a3b768a90d36ffa90f284180e5e73 is not
running
at
org.apache.beam.runners.fnexecution.environment.DockerCommand.runShortCommand(DockerCommand.java:237)
at
org.apache.beam.runners.fnexecution.environment.DockerCommand.runShortCommand(DockerCommand.java:171)
at
org.apache.beam.runners.fnexecution.environment.DockerCommand.killContainer(DockerCommand.java:151)
at
org.apache.beam.runners.fnexecution.environment.DockerEnvironmentFactory.createEnvironment(DockerEnvironmentFactory.java:164)

Reply via email to