This is an automated email from the ASF dual-hosted git repository. damccorm pushed a commit to branch users/damccorm/split-requirements in repository https://gitbox.apache.org/repos/asf/beam.git
commit bf4937c3c4b09acb4cefe8856d4302a7a5ba218a Author: Danny Mccormick <[email protected]> AuthorDate: Thu Nov 6 15:52:31 2025 -0500 Split some requirements into extras --- sdks/python/apache_beam/io/tfrecordio.py | 17 +++++++++++++++-- sdks/python/container/common.gradle | 6 +++--- sdks/python/setup.py | 2 +- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/sdks/python/apache_beam/io/tfrecordio.py b/sdks/python/apache_beam/io/tfrecordio.py index c6c59b2c2be..073cbc1d211 100644 --- a/sdks/python/apache_beam/io/tfrecordio.py +++ b/sdks/python/apache_beam/io/tfrecordio.py @@ -24,8 +24,6 @@ import logging import struct from functools import partial -import crcmod - from apache_beam import coders from apache_beam.io import filebasedsink from apache_beam.io.filebasedsource import FileBasedSource @@ -35,6 +33,16 @@ from apache_beam.io.iobase import Read from apache_beam.io.iobase import Write from apache_beam.transforms import PTransform +try: + import crcmod +except ImportError: + logging.warning( + 'crcmod package not found. This package is required if ' + 'python-snappy or google-crc32c are not installed. To ensure crcmod is ' + 'installed, install the tfrecord extra: pip install ' + 'apache-beam[tfrecord]') + crcmod = None + __all__ = ['ReadFromTFRecord', 'ReadAllFromTFRecord', 'WriteToTFRecord'] _LOGGER = logging.getLogger(__name__) @@ -67,6 +75,11 @@ def _default_crc32c_fn(value): pass if not _default_crc32c_fn.fn: + if crcmod is None: + raise RuntimeError( + 'Could not find python-snappy, google-crc32c, or crcmod. To allow ' + 'execution to succeed, make sure that one of these packages is ' + 'installed or pip install apache-beam[tfrecord]') _LOGGER.warning( 'Couldn\'t find python-snappy or google-crc32c so the ' 'implementation of _TFRecordUtil._masked_crc32c is not as fast ' diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index 8ee31cf4e50..ad64dbbb660 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -42,7 +42,7 @@ def generatePythonRequirements = tasks.register("generatePythonRequirements") { "${files(configurations.sdkSourceTarball.files).singleFile} " + "base_image_requirements.txt " + "container " + - "[gcp,dataframe,test] " + + "[gcp,dataframe,test,tfrecord] " + "${pipExtraOptions}" } // Generate versions for ML dependencies @@ -53,7 +53,7 @@ def generatePythonRequirements = tasks.register("generatePythonRequirements") { "${files(configurations.sdkSourceTarball.files).singleFile} " + "base_image_requirements.txt " + "container/ml " + - "[gcp,dataframe,test,ml_cpu] " + + "[gcp,dataframe,test,ml_cpu,tfrecord] " + "${pipExtraOptions}" } // TODO(https://github.com/apache/beam/issues/36637) @@ -73,7 +73,7 @@ def generatePythonRequirements = tasks.register("generatePythonRequirements") { "${files(configurations.sdkSourceTarball.files).singleFile} " + "gpu_image_requirements.txt " + "container/ml " + - "[gcp,dataframe,test,tensorflow,torch,transformers,vllm] " + + "[gcp,dataframe,test,tensorflow,tfrecord,torch,transformers,vllm] " + "${pipExtraOptions}" } } diff --git a/sdks/python/setup.py b/sdks/python/setup.py index c50050d9241..3c486eebbba 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -373,7 +373,6 @@ if __name__ == '__main__': }, ext_modules=extensions, install_requires=[ - 'crcmod>=1.7,<2.0', 'cryptography>=39.0.0,<48.0.0', 'fastavro>=0.23.6,<2', 'fasteners>=0.3,<1.0', @@ -596,6 +595,7 @@ if __name__ == '__main__': , 'dill' ], + 'tfrecord': ['crcmod>=1.7,<2.0'] 'onnx': [ 'onnxruntime==1.13.1', 'torch==1.13.1',
