This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a commit to branch users/damccorm/split-requirements
in repository https://gitbox.apache.org/repos/asf/beam.git

commit bf4937c3c4b09acb4cefe8856d4302a7a5ba218a
Author: Danny Mccormick <[email protected]>
AuthorDate: Thu Nov 6 15:52:31 2025 -0500

    Split some requirements into extras
---
 sdks/python/apache_beam/io/tfrecordio.py | 17 +++++++++++++++--
 sdks/python/container/common.gradle      |  6 +++---
 sdks/python/setup.py                     |  2 +-
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/sdks/python/apache_beam/io/tfrecordio.py 
b/sdks/python/apache_beam/io/tfrecordio.py
index c6c59b2c2be..073cbc1d211 100644
--- a/sdks/python/apache_beam/io/tfrecordio.py
+++ b/sdks/python/apache_beam/io/tfrecordio.py
@@ -24,8 +24,6 @@ import logging
 import struct
 from functools import partial
 
-import crcmod
-
 from apache_beam import coders
 from apache_beam.io import filebasedsink
 from apache_beam.io.filebasedsource import FileBasedSource
@@ -35,6 +33,16 @@ from apache_beam.io.iobase import Read
 from apache_beam.io.iobase import Write
 from apache_beam.transforms import PTransform
 
+try:
+  import crcmod
+except ImportError:
+  logging.warning(
+      'crcmod package not found. This package is required if '
+      'python-snappy or google-crc32c are not installed. To ensure crcmod is '
+      'installed, install the tfrecord extra: pip install '
+      'apache-beam[tfrecord]')
+  crcmod = None
+
 __all__ = ['ReadFromTFRecord', 'ReadAllFromTFRecord', 'WriteToTFRecord']
 
 _LOGGER = logging.getLogger(__name__)
@@ -67,6 +75,11 @@ def _default_crc32c_fn(value):
       pass
 
     if not _default_crc32c_fn.fn:
+      if crcmod is None:
+        raise RuntimeError(
+            'Could not find python-snappy, google-crc32c, or crcmod. To allow '
+            'execution to succeed, make sure that one of these packages is '
+            'installed or pip install apache-beam[tfrecord]')
       _LOGGER.warning(
           'Couldn\'t find python-snappy or google-crc32c so the '
           'implementation of _TFRecordUtil._masked_crc32c is not as fast '
diff --git a/sdks/python/container/common.gradle 
b/sdks/python/container/common.gradle
index 8ee31cf4e50..ad64dbbb660 100644
--- a/sdks/python/container/common.gradle
+++ b/sdks/python/container/common.gradle
@@ -42,7 +42,7 @@ def generatePythonRequirements = 
tasks.register("generatePythonRequirements") {
               "${files(configurations.sdkSourceTarball.files).singleFile} " +
               "base_image_requirements.txt " +
               "container " +
-              "[gcp,dataframe,test] " +
+              "[gcp,dataframe,test,tfrecord] " +
               "${pipExtraOptions}"
     }
     // Generate versions for ML dependencies
@@ -53,7 +53,7 @@ def generatePythonRequirements = 
tasks.register("generatePythonRequirements") {
               "${files(configurations.sdkSourceTarball.files).singleFile} " +
               "base_image_requirements.txt " +
               "container/ml " +
-              "[gcp,dataframe,test,ml_cpu] " +
+              "[gcp,dataframe,test,ml_cpu,tfrecord] " +
               "${pipExtraOptions}"
     }
     // TODO(https://github.com/apache/beam/issues/36637)
@@ -73,7 +73,7 @@ def generatePythonRequirements = 
tasks.register("generatePythonRequirements") {
                 "${files(configurations.sdkSourceTarball.files).singleFile} " +
                 "gpu_image_requirements.txt " +
                 "container/ml " +
-                "[gcp,dataframe,test,tensorflow,torch,transformers,vllm] " +
+                
"[gcp,dataframe,test,tensorflow,tfrecord,torch,transformers,vllm] " +
                 "${pipExtraOptions}"
       }
     }
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index c50050d9241..3c486eebbba 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -373,7 +373,6 @@ if __name__ == '__main__':
       },
       ext_modules=extensions,
       install_requires=[
-          'crcmod>=1.7,<2.0',
           'cryptography>=39.0.0,<48.0.0',
           'fastavro>=0.23.6,<2',
           'fasteners>=0.3,<1.0',
@@ -596,6 +595,7 @@ if __name__ == '__main__':
               ,
               'dill'
           ],
+          'tfrecord': ['crcmod>=1.7,<2.0']
           'onnx': [
               'onnxruntime==1.13.1',
               'torch==1.13.1',

Reply via email to