[ 
https://issues.apache.org/jira/browse/BEAM-3956?focusedWorklogId=87400&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-87400
 ]

ASF GitHub Bot logged work on BEAM-3956:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 04/Apr/18 01:49
            Start Date: 04/Apr/18 01:49
    Worklog Time Spent: 10m 
      Work Description: shoyer commented on issue #4959: [BEAM-3956] Preserve 
stacktraces for Python exceptions
URL: https://github.com/apache/beam/pull/4959#issuecomment-378453995
 
 
   @robertwb Indeed, `six.reraise()` is a definite improvement! It results in 
the original stacktrace being directly included as part of the result 
stacktrace:
   
   <details>
   ```python-stacktrace
   ---------------------------------------------------------------------------
   ValueError                                Traceback (most recent call last)
   <ipython-input-1-2377312c7b45> in <module>()
         8   raise ValueError('internal failure!')
         9 
   ---> 10 [1, 2, 3] | beam.Map(f)
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/transforms/ptransform.pyc
 in __ror__(self, left, label)
       491     _allocate_materialized_pipeline(p)
       492     materialized_result = 
_AddMaterializationTransforms().visit(result)
   --> 493     p.run().wait_until_finish()
       494     _release_materialized_pipeline(p)
       495     return _FinalizeMaterialization().visit(materialized_result)
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/pipeline.pyc
 in run(self, test_runner_api)
       387     if test_runner_api and self._verify_runner_api_compatible():
       388       return Pipeline.from_runner_api(
   --> 389           self.to_runner_api(), self.runner, 
self._options).run(False)
       390 
       391     if self._options.view_as(TypeOptions).runtime_type_check:
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/pipeline.pyc
 in run(self, test_runner_api)
       400       finally:
       401         shutil.rmtree(tmpdir)
   --> 402     return self.runner.run_pipeline(self)
       403 
       404   def __enter__(self):
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/direct/direct_runner.pyc
 in run_pipeline(self, pipeline)
       133       runner = BundleBasedDirectRunner()
       134 
   --> 135     return runner.run_pipeline(pipeline)
       136 
       137 
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/portability/fn_api_runner.pyc
 in run_pipeline(self, pipeline)
       213     from apache_beam.runners.dataflow.dataflow_runner import 
DataflowRunner
       214     pipeline.visit(DataflowRunner.group_by_key_input_visitor())
   --> 215     return self.run_via_runner_api(pipeline.to_runner_api())
       216 
       217   def run_via_runner_api(self, pipeline_proto):
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/portability/fn_api_runner.pyc
 in run_via_runner_api(self, pipeline_proto)
       216 
       217   def run_via_runner_api(self, pipeline_proto):
   --> 218     return self.run_stages(*self.create_stages(pipeline_proto))
       219 
       220   def create_stages(self, pipeline_proto):
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/portability/fn_api_runner.pyc
 in run_stages(self, pipeline_components, stages, safe_coders)
       835         metrics_by_stage[stage.name] = self.run_stage(
       836             controller, pipeline_components, stage,
   --> 837             pcoll_buffers, safe_coders).process_bundle.metrics
       838     finally:
       839       controller.close()
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/portability/fn_api_runner.pyc
 in run_stage(self, controller, pipeline_components, stage, pcoll_buffers, 
safe_coders)
       936     return BundleManager(
       937         controller, get_buffer, process_bundle_descriptor,
   --> 938         self._progress_frequency).process_bundle(data_input, 
data_output)
       939 
       940   # These classes are used to interact with the worker.
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/portability/fn_api_runner.pyc
 in process_bundle(self, inputs, expected_outputs)
      1108         process_bundle=beam_fn_api_pb2.ProcessBundleRequest(
      1109             
process_bundle_descriptor_reference=self._bundle_descriptor.id))
   -> 1110     result_future = 
self._controller.control_handler.push(process_bundle)
      1111 
      1112     with ProgressRequester(
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/portability/fn_api_runner.pyc
 in push(self, request)
      1001         request.instruction_id = 'control_%s' % self._uid_counter
      1002       logging.debug('CONTROL REQUEST %s', request)
   -> 1003       response = self.worker.do_instruction(request)
      1004       logging.debug('CONTROL RESPONSE %s', response)
      1005       return ControlFuture(request.instruction_id, response)
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/worker/sdk_worker.pyc
 in do_instruction(self, request)
       199       # E.g. if register is set, this will call 
self.register(request.register))
       200       return getattr(self, request_type)(getattr(request, 
request_type),
   --> 201                                          request.instruction_id)
       202     else:
       203       raise NotImplementedError
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/worker/sdk_worker.pyc
 in process_bundle(self, request, instruction_id)
       216             self.state_handler, self.data_channel_factory)
       217     try:
   --> 218       processor.process_bundle(instruction_id)
       219     finally:
       220       del self.bundle_processors[instruction_id]
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/worker/bundle_processor.pyc
 in process_bundle(self, instruction_id)
       284       for op in reversed(self.ops.values()):
       285         logging.info('start %s', op)
   --> 286         op.start()
       287 
       288       # Inject inputs from data plane.
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/worker/operations.pyc
 in start(self)
       236           else:
       237             windowed_value = 
_globally_windowed_value.with_value(value)
   --> 238           self.output(windowed_value)
       239 
       240 
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/worker/operations.pyc
 in output(self, windowed_value, output_index)
       157 
       158   def output(self, windowed_value, output_index=0):
   --> 159     cython.cast(Receiver, 
self.receivers[output_index]).receive(windowed_value)
       160 
       161   def add_receiver(self, operation, output_index=0):
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/worker/operations.pyc
 in receive(self, windowed_value)
        83     self.update_counters_start(windowed_value)
        84     for consumer in self.consumers:
   ---> 85       cython.cast(Operation, consumer).process(windowed_value)
        86     self.update_counters_finish()
        87 
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/worker/operations.pyc
 in process(self, o)
       390   def process(self, o):
       391     with self.scoped_process_state:
   --> 392       self.dofn_receiver.receive(o)
       393 
       394   def progress_metrics(self):
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/common.py
 in receive(self, windowed_value)
       486 
       487   def receive(self, windowed_value):
   --> 488     self.process(windowed_value)
       489 
       490   def process(self, windowed_value):
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/common.py
 in process(self, windowed_value)
       494       self.do_fn_invoker.invoke_process(windowed_value)
       495     except BaseException as exn:
   --> 496       self._reraise_augmented(exn)
       497     finally:
       498       self.scoped_metrics_container.exit()
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/common.py
 in _reraise_augmented(self, exn)
       537           + step_annotation)
       538       new_exn._tagged_with_step = True
   --> 539     six.reraise(type(new_exn), new_exn, original_traceback)
       540 
       541 
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/common.py
 in process(self, windowed_value)
       492       self.logging_context.enter()
       493       self.scoped_metrics_container.enter()
   --> 494       self.do_fn_invoker.invoke_process(windowed_value)
       495     except BaseException as exn:
       496       self._reraise_augmented(exn)
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/runners/common.py
 in invoke_process(self, windowed_value, restriction_tracker, output_processor, 
additional_args, additional_kwargs)
       282       output_processor = self.output_processor
       283     output_processor.process_outputs(
   --> 284         windowed_value, self.process_method(windowed_value.value))
       285 
       286 
   
   
/usr/local/google/home/shoyer/open-source/beam/sdks/python/apache_beam/transforms/core.pyc
 in <lambda>(x)
       970     wrapper = lambda x, *args, **kwargs: [fn(x, *args, **kwargs)]
       971   else:
   --> 972     wrapper = lambda x: [fn(x)]
       973 
       974   label = 'Map(%s)' % ptransform.label_from_callable(fn)
   
   <ipython-input-1-2377312c7b45> in f(x)
         2 
         3 def f(x):
   ----> 4   return g(x)
         5 def g(x):
         6   return h(x)
   
   <ipython-input-1-2377312c7b45> in g(x)
         4   return g(x)
         5 def g(x):
   ----> 6   return h(x)
         7 def h(x):
         8   raise ValueError('internal failure!')
   
   <ipython-input-1-2377312c7b45> in h(x)
         6   return h(x)
         7 def h(x):
   ----> 8   raise ValueError('internal failure!')
         9 
        10 [1, 2, 3] | beam.Map(f)
   
   ValueError: internal failure! [while running 'Map(f)']
   ```
   </details>

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 87400)
    Time Spent: 2.5h  (was: 2h 20m)

> Stacktraces from exceptions in user code should be preserved in the Python SDK
> ------------------------------------------------------------------------------
>
>                 Key: BEAM-3956
>                 URL: https://issues.apache.org/jira/browse/BEAM-3956
>             Project: Beam
>          Issue Type: Bug
>          Components: sdk-py-core
>            Reporter: Stephan Hoyer
>            Priority: Major
>          Time Spent: 2.5h
>  Remaining Estimate: 0h
>
> Currently, Beam's Python SDK loses stacktraces for exceptions. It does 
> helpfully add a tag like "[while running StageA]" to exception error 
> messages, but that doesn't include the stacktrace of Python functions being 
> called.
> Including the full stacktraces would make a big difference for the ease of 
> debugging Beam pipelines when things go wrong.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to