This is an automated email from the ASF dual-hosted git repository.
damccorm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 1b89fcff0ec Split some requirements into extras (#36749)
1b89fcff0ec is described below
commit 1b89fcff0ec4d59268b404479c5adb7efcbb8c31
Author: Danny McCormick <[email protected]>
AuthorDate: Fri Nov 7 12:57:28 2025 -0500
Split some requirements into extras (#36749)
* Split some requirements into extras
* comma
* test fixes
---
sdks/python/apache_beam/io/tfrecordio.py | 17 +++++++++++++++--
sdks/python/apache_beam/io/tfrecordio_test.py | 7 ++++++-
sdks/python/container/common.gradle | 6 +++---
sdks/python/setup.py | 2 +-
sdks/python/tox.ini | 2 +-
5 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/sdks/python/apache_beam/io/tfrecordio.py
b/sdks/python/apache_beam/io/tfrecordio.py
index c6c59b2c2be..073cbc1d211 100644
--- a/sdks/python/apache_beam/io/tfrecordio.py
+++ b/sdks/python/apache_beam/io/tfrecordio.py
@@ -24,8 +24,6 @@ import logging
import struct
from functools import partial
-import crcmod
-
from apache_beam import coders
from apache_beam.io import filebasedsink
from apache_beam.io.filebasedsource import FileBasedSource
@@ -35,6 +33,16 @@ from apache_beam.io.iobase import Read
from apache_beam.io.iobase import Write
from apache_beam.transforms import PTransform
+try:
+ import crcmod
+except ImportError:
+ logging.warning(
+ 'crcmod package not found. This package is required if '
+ 'python-snappy or google-crc32c are not installed. To ensure crcmod is '
+ 'installed, install the tfrecord extra: pip install '
+ 'apache-beam[tfrecord]')
+ crcmod = None
+
__all__ = ['ReadFromTFRecord', 'ReadAllFromTFRecord', 'WriteToTFRecord']
_LOGGER = logging.getLogger(__name__)
@@ -67,6 +75,11 @@ def _default_crc32c_fn(value):
pass
if not _default_crc32c_fn.fn:
+ if crcmod is None:
+ raise RuntimeError(
+ 'Could not find python-snappy, google-crc32c, or crcmod. To allow '
+ 'execution to succeed, make sure that one of these packages is '
+ 'installed or pip install apache-beam[tfrecord]')
_LOGGER.warning(
'Couldn\'t find python-snappy or google-crc32c so the '
'implementation of _TFRecordUtil._masked_crc32c is not as fast '
diff --git a/sdks/python/apache_beam/io/tfrecordio_test.py
b/sdks/python/apache_beam/io/tfrecordio_test.py
index 6522ade36d8..e88ed177863 100644
--- a/sdks/python/apache_beam/io/tfrecordio_test.py
+++ b/sdks/python/apache_beam/io/tfrecordio_test.py
@@ -33,7 +33,6 @@ import unittest
import zlib
from datetime import datetime
-import crcmod
import pytz
import apache_beam as beam
@@ -61,6 +60,11 @@ except ImportError:
tf = None # pylint: disable=invalid-name
logging.warning('Tensorflow is not installed, so skipping some tests.')
+try:
+ import crcmod
+except ImportError:
+ crcmod = None
+
# Created by running following code in python:
# >>> import tensorflow as tf
# >>> import base64
@@ -121,6 +125,7 @@ class TestTFRecordUtil(unittest.TestCase):
0xe4999b0,
_TFRecordUtil._masked_crc32c(b'\x03\x00\x00\x00\x00\x00\x00\x00'))
+ @unittest.skipIf(crcmod is None, 'crcmod not installed.')
def test_masked_crc32c_crcmod(self):
crc32c_fn = crcmod.predefined.mkPredefinedCrcFun('crc-32c')
self.assertEqual(
diff --git a/sdks/python/container/common.gradle
b/sdks/python/container/common.gradle
index 8ee31cf4e50..ad64dbbb660 100644
--- a/sdks/python/container/common.gradle
+++ b/sdks/python/container/common.gradle
@@ -42,7 +42,7 @@ def generatePythonRequirements =
tasks.register("generatePythonRequirements") {
"${files(configurations.sdkSourceTarball.files).singleFile} " +
"base_image_requirements.txt " +
"container " +
- "[gcp,dataframe,test] " +
+ "[gcp,dataframe,test,tfrecord] " +
"${pipExtraOptions}"
}
// Generate versions for ML dependencies
@@ -53,7 +53,7 @@ def generatePythonRequirements =
tasks.register("generatePythonRequirements") {
"${files(configurations.sdkSourceTarball.files).singleFile} " +
"base_image_requirements.txt " +
"container/ml " +
- "[gcp,dataframe,test,ml_cpu] " +
+ "[gcp,dataframe,test,ml_cpu,tfrecord] " +
"${pipExtraOptions}"
}
// TODO(https://github.com/apache/beam/issues/36637)
@@ -73,7 +73,7 @@ def generatePythonRequirements =
tasks.register("generatePythonRequirements") {
"${files(configurations.sdkSourceTarball.files).singleFile} " +
"gpu_image_requirements.txt " +
"container/ml " +
- "[gcp,dataframe,test,tensorflow,torch,transformers,vllm] " +
+
"[gcp,dataframe,test,tensorflow,tfrecord,torch,transformers,vllm] " +
"${pipExtraOptions}"
}
}
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index c50050d9241..6c9a0d41f18 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -373,7 +373,6 @@ if __name__ == '__main__':
},
ext_modules=extensions,
install_requires=[
- 'crcmod>=1.7,<2.0',
'cryptography>=39.0.0,<48.0.0',
'fastavro>=0.23.6,<2',
'fasteners>=0.3,<1.0',
@@ -596,6 +595,7 @@ if __name__ == '__main__':
,
'dill'
],
+ 'tfrecord': ['crcmod>=1.7,<2.0'],
'onnx': [
'onnxruntime==1.13.1',
'torch==1.13.1',
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index d47de67df5d..7d84ca7a2c6 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -33,7 +33,7 @@ pip_pre = True
# allow apps that support color to use it.
passenv=TERM,CLOUDSDK_CONFIG,DOCKER_*,TESTCONTAINERS_*,TC_*,ALLOYDB_PASSWORD
# Set [] options for pip installation of apache-beam tarball.
-extras = test,dataframe,yaml
+extras = test,dataframe,tfrecord,yaml
# Don't warn that these commands aren't installed.
allowlist_externals =
false