This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b89fcff0ec Split some requirements into extras (#36749)
1b89fcff0ec is described below

commit 1b89fcff0ec4d59268b404479c5adb7efcbb8c31
Author: Danny McCormick <[email protected]>
AuthorDate: Fri Nov 7 12:57:28 2025 -0500

    Split some requirements into extras (#36749)
    
    * Split some requirements into extras
    
    * comma
    
    * test fixes
---
 sdks/python/apache_beam/io/tfrecordio.py      | 17 +++++++++++++++--
 sdks/python/apache_beam/io/tfrecordio_test.py |  7 ++++++-
 sdks/python/container/common.gradle           |  6 +++---
 sdks/python/setup.py                          |  2 +-
 sdks/python/tox.ini                           |  2 +-
 5 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/sdks/python/apache_beam/io/tfrecordio.py 
b/sdks/python/apache_beam/io/tfrecordio.py
index c6c59b2c2be..073cbc1d211 100644
--- a/sdks/python/apache_beam/io/tfrecordio.py
+++ b/sdks/python/apache_beam/io/tfrecordio.py
@@ -24,8 +24,6 @@ import logging
 import struct
 from functools import partial
 
-import crcmod
-
 from apache_beam import coders
 from apache_beam.io import filebasedsink
 from apache_beam.io.filebasedsource import FileBasedSource
@@ -35,6 +33,16 @@ from apache_beam.io.iobase import Read
 from apache_beam.io.iobase import Write
 from apache_beam.transforms import PTransform
 
+try:
+  import crcmod
+except ImportError:
+  logging.warning(
+      'crcmod package not found. This package is required if '
+      'python-snappy or google-crc32c are not installed. To ensure crcmod is '
+      'installed, install the tfrecord extra: pip install '
+      'apache-beam[tfrecord]')
+  crcmod = None
+
 __all__ = ['ReadFromTFRecord', 'ReadAllFromTFRecord', 'WriteToTFRecord']
 
 _LOGGER = logging.getLogger(__name__)
@@ -67,6 +75,11 @@ def _default_crc32c_fn(value):
       pass
 
     if not _default_crc32c_fn.fn:
+      if crcmod is None:
+        raise RuntimeError(
+            'Could not find python-snappy, google-crc32c, or crcmod. To allow '
+            'execution to succeed, make sure that one of these packages is '
+            'installed or pip install apache-beam[tfrecord]')
       _LOGGER.warning(
           'Couldn\'t find python-snappy or google-crc32c so the '
           'implementation of _TFRecordUtil._masked_crc32c is not as fast '
diff --git a/sdks/python/apache_beam/io/tfrecordio_test.py 
b/sdks/python/apache_beam/io/tfrecordio_test.py
index 6522ade36d8..e88ed177863 100644
--- a/sdks/python/apache_beam/io/tfrecordio_test.py
+++ b/sdks/python/apache_beam/io/tfrecordio_test.py
@@ -33,7 +33,6 @@ import unittest
 import zlib
 from datetime import datetime
 
-import crcmod
 import pytz
 
 import apache_beam as beam
@@ -61,6 +60,11 @@ except ImportError:
     tf = None  # pylint: disable=invalid-name
     logging.warning('Tensorflow is not installed, so skipping some tests.')
 
+try:
+  import crcmod
+except ImportError:
+  crcmod = None
+
 # Created by running following code in python:
 # >>> import tensorflow as tf
 # >>> import base64
@@ -121,6 +125,7 @@ class TestTFRecordUtil(unittest.TestCase):
         0xe4999b0,
         _TFRecordUtil._masked_crc32c(b'\x03\x00\x00\x00\x00\x00\x00\x00'))
 
+  @unittest.skipIf(crcmod is None, 'crcmod not installed.')
   def test_masked_crc32c_crcmod(self):
     crc32c_fn = crcmod.predefined.mkPredefinedCrcFun('crc-32c')
     self.assertEqual(
diff --git a/sdks/python/container/common.gradle 
b/sdks/python/container/common.gradle
index 8ee31cf4e50..ad64dbbb660 100644
--- a/sdks/python/container/common.gradle
+++ b/sdks/python/container/common.gradle
@@ -42,7 +42,7 @@ def generatePythonRequirements = 
tasks.register("generatePythonRequirements") {
               "${files(configurations.sdkSourceTarball.files).singleFile} " +
               "base_image_requirements.txt " +
               "container " +
-              "[gcp,dataframe,test] " +
+              "[gcp,dataframe,test,tfrecord] " +
               "${pipExtraOptions}"
     }
     // Generate versions for ML dependencies
@@ -53,7 +53,7 @@ def generatePythonRequirements = 
tasks.register("generatePythonRequirements") {
               "${files(configurations.sdkSourceTarball.files).singleFile} " +
               "base_image_requirements.txt " +
               "container/ml " +
-              "[gcp,dataframe,test,ml_cpu] " +
+              "[gcp,dataframe,test,ml_cpu,tfrecord] " +
               "${pipExtraOptions}"
     }
     // TODO(https://github.com/apache/beam/issues/36637)
@@ -73,7 +73,7 @@ def generatePythonRequirements = 
tasks.register("generatePythonRequirements") {
                 "${files(configurations.sdkSourceTarball.files).singleFile} " +
                 "gpu_image_requirements.txt " +
                 "container/ml " +
-                "[gcp,dataframe,test,tensorflow,torch,transformers,vllm] " +
+                
"[gcp,dataframe,test,tensorflow,tfrecord,torch,transformers,vllm] " +
                 "${pipExtraOptions}"
       }
     }
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index c50050d9241..6c9a0d41f18 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -373,7 +373,6 @@ if __name__ == '__main__':
       },
       ext_modules=extensions,
       install_requires=[
-          'crcmod>=1.7,<2.0',
           'cryptography>=39.0.0,<48.0.0',
           'fastavro>=0.23.6,<2',
           'fasteners>=0.3,<1.0',
@@ -596,6 +595,7 @@ if __name__ == '__main__':
               ,
               'dill'
           ],
+          'tfrecord': ['crcmod>=1.7,<2.0'],
           'onnx': [
               'onnxruntime==1.13.1',
               'torch==1.13.1',
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index d47de67df5d..7d84ca7a2c6 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -33,7 +33,7 @@ pip_pre = True
 # allow apps that support color to use it.
 passenv=TERM,CLOUDSDK_CONFIG,DOCKER_*,TESTCONTAINERS_*,TC_*,ALLOYDB_PASSWORD
 # Set [] options for pip installation of apache-beam tarball.
-extras = test,dataframe,yaml
+extras = test,dataframe,tfrecord,yaml
 # Don't warn that these commands aren't installed.
 allowlist_externals =
   false

Reply via email to