Fabian Rosenthal created BEAM-9386:
--------------------------------------
Summary: _ReadRange in filebasedsource.py fails with IndexError
Key: BEAM-9386
URL: https://issues.apache.org/jira/browse/BEAM-9386
Project: Beam
Issue Type: Bug
Components: io-py-files, runner-dataflow, sdk-py-core
Affects Versions: 2.17.0
Reporter: Fabian Rosenthal
We are using Apache Beam in version 2.17.0 (Python SDK using Python 3.7) with
the Dataflow runner running on the Google Cloud Platform.
We are getting frequently a index out of range exception in the
filebasedsource.py, i.e. in that line:
[https://github.com/apache/beam/blob/release-2.17.0/sdks/python/apache_beam/io/filebasedsource.py#L370]
The whole stack trace:
{code:java}
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/dataflow_worker/batchworker.py",
line 650, in do_work
work_executor.execute()
File "/usr/local/lib/python3.7/site-packages/dataflow_worker/executor.py",
line 176, in execute
op.start()
File "dataflow_worker/shuffle_operations.py", line 50, in
dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
File "dataflow_worker/shuffle_operations.py", line 51, in
dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
File "dataflow_worker/shuffle_operations.py", line 66, in
dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
File "dataflow_worker/shuffle_operations.py", line 67, in
dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
File "dataflow_worker/shuffle_operations.py", line 71, in
dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
File "apache_beam/runners/worker/operations.py", line 256, in
apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py", line 143, in
apache_beam.runners.worker.operations.SingletonConsumerSet.receive
File "dataflow_worker/shuffle_operations.py", line 234, in
dataflow_worker.shuffle_operations.BatchGroupAlsoByWindowsOperation.process
File "dataflow_worker/shuffle_operations.py", line 241, in
dataflow_worker.shuffle_operations.BatchGroupAlsoByWindowsOperation.process
File "apache_beam/runners/worker/operations.py", line 256, in
apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py", line 143, in
apache_beam.runners.worker.operations.SingletonConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 593, in
apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 594, in
apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 776, in
apache_beam.runners.common.DoFnRunner.receive
File "apache_beam/runners/common.py", line 782, in
apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 834, in
apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 780, in
apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 440, in
apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 919, in
apache_beam.runners.common._OutputProcessor.process_outputs
File "apache_beam/runners/worker/operations.py", line 143, in
apache_beam.runners.worker.operations.SingletonConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 593, in
apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 594, in
apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 776, in
apache_beam.runners.common.DoFnRunner.receive
File "apache_beam/runners/common.py", line 782, in
apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 834, in
apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 780, in
apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 440, in
apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 919, in
apache_beam.runners.common._OutputProcessor.process_outputs
File "apache_beam/runners/worker/operations.py", line 143, in
apache_beam.runners.worker.operations.SingletonConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 593, in
apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 594, in
apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 776, in
apache_beam.runners.common.DoFnRunner.receive
File "apache_beam/runners/common.py", line 782, in
apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 849, in
apache_beam.runners.common.DoFnRunner._reraise_augmented
File "/usr/local/lib/python3.7/site-packages/future/utils/__init__.py", line
421, in raise_with_traceback
raise exc.with_traceback(traceback)
File "apache_beam/runners/common.py", line 780, in
apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 440, in
apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 895, in
apache_beam.runners.common._OutputProcessor.process_outputs
File
"/usr/local/lib/python3.7/site-packages/apache_beam/io/filebasedsource.py",
line 370, in process
source = list(source.split(float('inf')))[0].source
IndexError: list index out of range [while running
'example/new_data/read/ReadAllFiles/ReadRange']
{code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)