This is an automated email from the ASF dual-hosted git repository.
shahar pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new e68c525ea7f Add ClusterType field for Zero-Scale cluster support
(#62207)
e68c525ea7f is described below
commit e68c525ea7f4b4ffa09529d6620753b7cf1c77cf
Author: Anmol Garg <[email protected]>
AuthorDate: Mon Mar 2 12:21:29 2026 +0530
Add ClusterType field for Zero-Scale cluster support (#62207)
---
providers/google/README.rst | 2 +-
providers/google/docs/index.rst | 2 +-
providers/google/pyproject.toml | 2 +-
.../src/airflow/providers/google/cloud/operators/dataproc.py | 6 ++++++
.../google/cloud/dataproc/example_dataproc_cluster_generator.py | 1 +
.../tests/system/google/cloud/dataproc/example_dataproc_hive.py | 1 +
.../tests/system/google/cloud/dataproc/example_dataproc_pig.py | 1 +
.../tests/system/google/cloud/dataproc/example_dataproc_pyspark.py | 1 +
.../google/tests/unit/google/cloud/operators/test_dataproc.py | 7 +++++++
9 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/providers/google/README.rst b/providers/google/README.rst
index cab9338982c..826714d44cc 100644
--- a/providers/google/README.rst
+++ b/providers/google/README.rst
@@ -92,7 +92,7 @@ PIP package Version required
``google-cloud-dataflow-client`` ``>=0.8.6``
``google-cloud-dataform`` ``>=0.5.0``
``google-cloud-dataplex`` ``>=2.6.0``
-``google-cloud-dataproc`` ``>=5.21.0``
+``google-cloud-dataproc`` ``>=5.25.0``
``google-cloud-dataproc-metastore`` ``>=1.12.0``
``google-cloud-dlp`` ``>=3.12.0``
``google-cloud-kms`` ``>=2.15.0``
diff --git a/providers/google/docs/index.rst b/providers/google/docs/index.rst
index 51ddd5cd02e..e7da96e24af 100644
--- a/providers/google/docs/index.rst
+++ b/providers/google/docs/index.rst
@@ -144,7 +144,7 @@ PIP package Version required
``google-cloud-dataflow-client`` ``>=0.8.6``
``google-cloud-dataform`` ``>=0.5.0``
``google-cloud-dataplex`` ``>=2.6.0``
-``google-cloud-dataproc`` ``>=5.21.0``
+``google-cloud-dataproc`` ``>=5.25.0``
``google-cloud-dataproc-metastore`` ``>=1.12.0``
``google-cloud-dlp`` ``>=3.12.0``
``google-cloud-kms`` ``>=2.15.0``
diff --git a/providers/google/pyproject.toml b/providers/google/pyproject.toml
index 1175400a23e..933cbdb8e86 100644
--- a/providers/google/pyproject.toml
+++ b/providers/google/pyproject.toml
@@ -96,7 +96,7 @@ dependencies = [
"google-cloud-dataflow-client>=0.8.6",
"google-cloud-dataform>=0.5.0",
"google-cloud-dataplex>=2.6.0",
- "google-cloud-dataproc>=5.21.0",
+ "google-cloud-dataproc>=5.25.0",
"google-cloud-dataproc-metastore>=1.12.0",
"google-cloud-dlp>=3.12.0",
"google-cloud-kms>=2.15.0",
diff --git
a/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
b/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
index c7c23c3c21d..7b08af16a98 100644
--- a/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
+++ b/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
@@ -213,6 +213,7 @@ class ClusterGenerator:
see
https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
:param secondary_worker_accelerator_count: Number of accelerator cards
(GPUs) to attach to the secondary workers
:param cluster_tier: The tier of the cluster (e.g. "CLUSTER_TIER_STANDARD"
/ "CLUSTER_TIER_PREMIUM").
+ :param cluster_type: The type of the cluster (e.g. "STANDARD" /
"SINGLE_NODE" / "ZERO_SCALE")
"""
def __init__(
@@ -263,6 +264,7 @@ class ClusterGenerator:
secondary_worker_accelerator_count: int | None = None,
*,
cluster_tier: str | None = None,
+ cluster_type: str | None = None,
**kwargs,
) -> None:
self.project_id = project_id
@@ -311,6 +313,7 @@ class ClusterGenerator:
self.secondary_worker_accelerator_type =
secondary_worker_accelerator_type
self.secondary_worker_accelerator_count =
secondary_worker_accelerator_count
self.cluster_tier = cluster_tier
+ self.cluster_type = cluster_type
if self.custom_image and self.image_version:
raise ValueError("The custom_image and image_version can't be both
set")
@@ -519,6 +522,9 @@ class ClusterGenerator:
if self.cluster_tier:
cluster_data["cluster_tier"] = self.cluster_tier
+ if self.cluster_type:
+ cluster_data["cluster_type"] = self.cluster_type
+
cluster_data = self._build_gce_cluster_config(cluster_data)
if self.single_node:
diff --git
a/providers/google/tests/system/google/cloud/dataproc/example_dataproc_cluster_generator.py
b/providers/google/tests/system/google/cloud/dataproc/example_dataproc_cluster_generator.py
index 48fd9e560ab..344869188ac 100644
---
a/providers/google/tests/system/google/cloud/dataproc/example_dataproc_cluster_generator.py
+++
b/providers/google/tests/system/google/cloud/dataproc/example_dataproc_cluster_generator.py
@@ -80,6 +80,7 @@ CLUSTER_GENERATOR_CONFIG = ClusterGenerator(
preemptibility="PREEMPTIBLE",
internal_ip_only=False,
cluster_tier="CLUSTER_TIER_STANDARD",
+ cluster_type="STANDARD",
).make()
# [END how_to_cloud_dataproc_create_cluster_generate_cluster_config]
diff --git
a/providers/google/tests/system/google/cloud/dataproc/example_dataproc_hive.py
b/providers/google/tests/system/google/cloud/dataproc/example_dataproc_hive.py
index 39a4c8e49d3..3edff84de17 100644
---
a/providers/google/tests/system/google/cloud/dataproc/example_dataproc_hive.py
+++
b/providers/google/tests/system/google/cloud/dataproc/example_dataproc_hive.py
@@ -54,6 +54,7 @@ REGION = "europe-west1"
# [START how_to_cloud_dataproc_create_cluster]
CLUSTER_CONFIG = {
+ "cluster_type": "STANDARD",
"cluster_tier": "CLUSTER_TIER_STANDARD",
"master_config": {
"num_instances": 1,
diff --git
a/providers/google/tests/system/google/cloud/dataproc/example_dataproc_pig.py
b/providers/google/tests/system/google/cloud/dataproc/example_dataproc_pig.py
index 1167025598a..47d0a605d7a 100644
---
a/providers/google/tests/system/google/cloud/dataproc/example_dataproc_pig.py
+++
b/providers/google/tests/system/google/cloud/dataproc/example_dataproc_pig.py
@@ -53,6 +53,7 @@ REGION = "europe-west1"
# Cluster definition
CLUSTER_CONFIG = {
+ "cluster_type": "STANDARD",
"cluster_tier": "CLUSTER_TIER_STANDARD",
"master_config": {
"num_instances": 1,
diff --git
a/providers/google/tests/system/google/cloud/dataproc/example_dataproc_pyspark.py
b/providers/google/tests/system/google/cloud/dataproc/example_dataproc_pyspark.py
index 111775f802e..953e61393f6 100644
---
a/providers/google/tests/system/google/cloud/dataproc/example_dataproc_pyspark.py
+++
b/providers/google/tests/system/google/cloud/dataproc/example_dataproc_pyspark.py
@@ -63,6 +63,7 @@ REGION = "europe-west1"
# Cluster definition
CLUSTER_CONFIG = {
+ "cluster_type": "STANDARD",
"cluster_tier": "CLUSTER_TIER_STANDARD",
"master_config": {
"num_instances": 1,
diff --git
a/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
b/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
index bbfc702914b..3db5f497e4d 100644
--- a/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
+++ b/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
@@ -134,6 +134,7 @@ CONFIG = {
"autoscaling_config": {"policy_uri": "autoscaling_policy"},
"config_bucket": "storage_bucket",
"cluster_tier": "CLUSTER_TIER_STANDARD",
+ "cluster_type": "STANDARD",
"initialization_actions": [
{"executable_file": "init_actions_uris", "execution_timeout":
{"seconds": 600}}
],
@@ -601,6 +602,7 @@ class TestsClusterGenerator:
driver_pool_id="cluster_driver_pool",
driver_pool_size=2,
cluster_tier="CLUSTER_TIER_STANDARD",
+ cluster_type="STANDARD",
)
cluster = generator.make()
assert cluster == CONFIG
@@ -757,6 +759,11 @@ class TestsClusterGenerator:
cluster = generator.make()
assert cluster["cluster_tier"] == "CLUSTER_TIER_STANDARD"
+ def test_build_with_cluster_type(self):
+ generator = ClusterGenerator(project_id="project_id",
cluster_type="STANDARD")
+ cluster = generator.make()
+ assert cluster["cluster_type"] == "STANDARD"
+
class TestDataprocCreateClusterOperator(DataprocClusterTestBase):
def test_deprecation_warning(self):