Hi Beam,

I'm trying to prebuild a container image for my Beam job (thanks for the
tip, Cham!), but my job running on Dataflow eventually fails with a
pickling error and ModuleNotFoundError:
Traceback (most recent call last):
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 267, in _execute
    response = task()
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 340, in <lambda>
    lambda: self.create_worker().do_instruction(request), request)
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 580, in do_instruction
    return getattr(self, request_type)(
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 611, in process_bundle
    bundle_processor = self.bundle_processor_cache.get(
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 441, in get
    processor = bundle_processor.BundleProcessor(
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 863, in __init__
    self.ops = self.create_execution_tree(self.process_bundle_descriptor)
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 916, in create_execution_tree
    return collections.OrderedDict([(
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 919, in <listcomp>
    get_operation(transform_id))) for transform_id in sorted(
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 806, in wrapper
    result = cache[args] = func(*args)
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 898, in get_operation
    transform_consumers = {
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <dictcomp>
    tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <listcomp>
    tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 806, in wrapper
    result = cache[args] = func(*args)
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 898, in get_operation
    transform_consumers = {
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <dictcomp>
    tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <listcomp>
    tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 806, in wrapper
    result = cache[args] = func(*args)
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 898, in get_operation
    transform_consumers = {
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <dictcomp>
    tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <listcomp>
    tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 806, in wrapper
    result = cache[args] = func(*args)
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 898, in get_operation
    transform_consumers = {
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <dictcomp>
    tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <listcomp>
    tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 806, in wrapper
    result = cache[args] = func(*args)
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 903, in get_operation
    return transform_factory.create_operation(
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 1198, in create_operation
    return creator(self, transform_id, transform_proto, payload, consumers)
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 1545, in create_par_do
    return _create_pardo_operation(
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 1588, in _create_pardo_operation
    dofn_data = pickler.loads(serialized_fn)
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/internal/pickler.py",
line 51, in loads
    return desired_pickle_lib.loads(
  File
"/usr/local/lib/python3.9/site-packages/apache_beam/internal/dill_pickler.py",
line 289, in loads
    return dill.loads(s)
  File "/usr/local/lib/python3.9/site-packages/dill/_dill.py", line 275, in
loads
    return load(file, ignore, **kwds)
  File "/usr/local/lib/python3.9/site-packages/dill/_dill.py", line 270, in
load
    return Unpickler(file, ignore=ignore, **kwds).load()
  File "/usr/local/lib/python3.9/site-packages/dill/_dill.py", line 472, in
load
    obj = StockUnpickler.load(self)
  File "/usr/local/lib/python3.9/site-packages/dill/_dill.py", line 462, in
find_class
    return StockUnpickler.find_class(self, module, name)
ModuleNotFoundError: No module named 'canonicalization'

So, my local dependencies are not found. They worked fine before I tried
prebuildling.

We're using Bazel, so I could see that being part of the problem. My build
rule is as follows:

py_binary(


    name = "beam_prebuild",

    main = "beam_ami_parser.py",

    srcs = ["beam_ami_parser.py"],

    deps = [


        requirement("apache-beam"),


        "//canonicalization:bigtable",

        "//canonicalization:constants",


    ],

    target_compatible_with = ["//build:python3_9"],

)


I'd appreciate any help on figuring out why my modules aren't found.

Thanks!
-Lina

Reply via email to