This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 394820b201d Validate Dataproc batch labels max 63 characters (#55196)
394820b201d is described below
commit 394820b201de080d03d3e2a11fa8d77a10fc82d4
Author: Chris Nauroth <[email protected]>
AuthorDate: Wed Sep 3 18:07:02 2025 -0700
Validate Dataproc batch labels max 63 characters (#55196)
As per
[Dataproc
documentation](https://cloud.google.com/dataproc/docs/guides/creating-managing-labels)
the maximum length for a label value is 63.
`DataprocCreateBatchOperator` performs validation to prevent setting
longer labels. However, the regex currently allows 64 characters, which
causes a failure when trying to create the batch. Update the regex and
unit tests to enforce a max of 63.
---
.../providers/google/cloud/operators/dataproc.py | 2 +-
.../unit/google/cloud/operators/test_dataproc.py | 23 +++++++++++++++++++++-
2 files changed, 23 insertions(+), 2 deletions(-)
diff --git
a/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
b/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
index 72a5fda7ed8..3dcf64dbeee 100644
--- a/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
+++ b/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
@@ -2573,7 +2573,7 @@ class
DataprocCreateBatchOperator(GoogleCloudBaseOperator):
dag_id = re.sub(r"[.\s]", "_", self.dag_id.lower())
task_id = re.sub(r"[.\s]", "_", self.task_id.lower())
- labels_regex = re.compile(r"^[a-z][\w-]{0,63}$")
+ labels_regex = re.compile(r"^[a-z][\w-]{0,62}$")
if not labels_regex.match(dag_id) or not labels_regex.match(task_id):
return
diff --git
a/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
b/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
index b631716e6fa..d71be1318c4 100644
--- a/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
+++ b/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
@@ -3874,6 +3874,27 @@ class TestDataprocCreateBatchOperator:
TestDataprocCreateBatchOperator.__assert_batch_create(mock_hook,
expected_batch)
+ @mock.patch(DATAPROC_PATH.format("Batch.to_dict"))
+ @mock.patch(DATAPROC_PATH.format("DataprocHook"))
+ def test_create_batch_asdict_taskid_max_length_labels_updated(self,
mock_hook, to_dict_mock):
+ long_task_id = "a" * 63
+ expected_batch = {
+ **BATCH,
+ "labels": {
+ "airflow-dag-id": TEST_DAG_ID,
+ "airflow-dag-display-name": TEST_DAG_ID,
+ "airflow-task-id": long_task_id,
+ },
+ }
+ DataprocCreateBatchOperator(
+ task_id=long_task_id,
+ dag=DAG(dag_id=TEST_DAG_ID),
+ batch=BATCH,
+ region=GCP_REGION,
+ ).execute(context=EXAMPLE_CONTEXT)
+
+ TestDataprocCreateBatchOperator.__assert_batch_create(mock_hook,
expected_batch)
+
@mock.patch(DATAPROC_PATH.format("Batch.to_dict"))
@mock.patch(DATAPROC_PATH.format("DataprocHook"))
def test_create_batch_invalid_taskid_labels_ignored(self, mock_hook,
to_dict_mock):
@@ -3890,7 +3911,7 @@ class TestDataprocCreateBatchOperator:
@mock.patch(DATAPROC_PATH.format("DataprocHook"))
def test_create_batch_long_taskid_labels_ignored(self, mock_hook,
to_dict_mock):
DataprocCreateBatchOperator(
- task_id="a" * 65,
+ task_id="a" * 64,
dag=DAG(dag_id=TEST_DAG_ID),
batch=BATCH,
region=GCP_REGION,