This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new b7fe72676fa Add validation for pool names to prevent
InvalidStatsNameException (#59938)
b7fe72676fa is described below
commit b7fe72676fadbe3e494d1c368a938aa4a92f0415
Author: Pradeep Kalluri <[email protected]>
AuthorDate: Sat Jan 3 17:35:42 2026 +0000
Add validation for pool names to prevent InvalidStatsNameException (#59938)
* Add Validation for pool names to prevent InvalidStatsNameException Pool
names can now only contain ASCII alphabets, numbers , underscores,dots,and
dashes to ensure compatibility with stats naming requirments. fixes #59935
* Add unit Tests and news fragment for pool name validation
* Fix test file with proper database session handling
* Fix test assertions to use pytest.raises match parameter
* Add newline at the end of pool.py file
* Fix trailing whitespace and quote style in pool.py
* Fix syntax error: use consistent double quotes in regex
* Add missing newline and __init__.py file
* Add missing newline and __init__.py with license
* Fix __init__.py with proper license and single newline
* Implement pool name normalization for stats reporting
Following @potiuk's feedback, changed approach from preventing invalid
pool names to normalizing them for stats reporting.
Changes:
- Replaced validate_pool_name() with normalize_pool_name_for_stats()
- Pool names with invalid characters are normalized (replaced with _)
when emitting metrics
- Logs warning when normalization occurs, suggesting pool rename
- Removed validation from create_or_update_pool()
- Updated scheduler_job_runner.py to use normalized names for stats
- Removed validation tests
- Updated news fragment
This prevents InvalidStatsNameException without breaking existing pools
with invalid names.
Fixes #59935
* Implement pool name normalization for stats reporting
Following @potiuk's feedback, changed approach from preventing invalid
pool names to normalizing them for stats reporting.
Changes:
- Added normalize_pool_name_for_stats() function in pool.py
- Pool names with invalid characters are normalized (replaced with _)
when emitting metrics in scheduler_job_runner.py
- Logs warning when normalization occurs
- Removed validation tests
- Removed accidental fix_quote.py file
- Updated news fragment
This prevents InvalidStatsNameException without breaking existing pools.
Fixes #59935
* Add missing 're' module import
* Remove duplicate import
* Apply CI formatting fixes - import order and blank lines
* Fix formatting - add proper blank lines per ruff format requirements
---
.../src/airflow/jobs/scheduler_job_runner.py | 22 ++++++++-------
airflow-core/src/airflow/models/pool.py | 33 ++++++++++++++++++++++
newsfragments/59938.bugfix.rst | 1 +
tests/models/__init__.py | 16 +++++++++++
tests/models/test_pool.py | 18 ++++++++++++
5 files changed, 80 insertions(+), 10 deletions(-)
diff --git a/airflow-core/src/airflow/jobs/scheduler_job_runner.py
b/airflow-core/src/airflow/jobs/scheduler_job_runner.py
index bada07e48bc..98e743ae423 100644
--- a/airflow-core/src/airflow/jobs/scheduler_job_runner.py
+++ b/airflow-core/src/airflow/jobs/scheduler_job_runner.py
@@ -74,6 +74,7 @@ from airflow.models.dagbag import DBDagBag
from airflow.models.dagbundle import DagBundleModel
from airflow.models.dagrun import DagRun
from airflow.models.dagwarning import DagWarning, DagWarningType
+from airflow.models.pool import normalize_pool_name_for_stats
from airflow.models.serialized_dag import SerializedDagModel
from airflow.models.taskinstance import TaskInstance
from airflow.models.team import Team
@@ -2534,11 +2535,12 @@ class SchedulerJobRunner(BaseJobRunner, LoggingMixin):
with DebugTrace.start_span(span_name="emit_pool_metrics",
component="SchedulerJobRunner") as span:
pools = Pool.slots_stats(session=session)
for pool_name, slot_stats in pools.items():
- Stats.gauge(f"pool.open_slots.{pool_name}", slot_stats["open"])
- Stats.gauge(f"pool.queued_slots.{pool_name}",
slot_stats["queued"])
- Stats.gauge(f"pool.running_slots.{pool_name}",
slot_stats["running"])
- Stats.gauge(f"pool.deferred_slots.{pool_name}",
slot_stats["deferred"])
- Stats.gauge(f"pool.scheduled_slots.{pool_name}",
slot_stats["scheduled"])
+ normalized_pool_name = normalize_pool_name_for_stats(pool_name)
+ Stats.gauge(f"pool.open_slots.{normalized_pool_name}",
slot_stats["open"])
+ Stats.gauge(f"pool.queued_slots.{normalized_pool_name}",
slot_stats["queued"])
+ Stats.gauge(f"pool.running_slots.{normalized_pool_name}",
slot_stats["running"])
+ Stats.gauge(f"pool.deferred_slots.{normalized_pool_name}",
slot_stats["deferred"])
+ Stats.gauge(f"pool.scheduled_slots.{normalized_pool_name}",
slot_stats["scheduled"])
# Same metrics with tagging
Stats.gauge("pool.open_slots", slot_stats["open"],
tags={"pool_name": pool_name})
@@ -2550,11 +2552,11 @@ class SchedulerJobRunner(BaseJobRunner, LoggingMixin):
span.set_attributes(
{
"category": "scheduler",
- f"pool.open_slots.{pool_name}": slot_stats["open"],
- f"pool.queued_slots.{pool_name}": slot_stats["queued"],
- f"pool.running_slots.{pool_name}":
slot_stats["running"],
- f"pool.deferred_slots.{pool_name}":
slot_stats["deferred"],
- f"pool.scheduled_slots.{pool_name}":
slot_stats["scheduled"],
+ f"pool.open_slots.{normalized_pool_name}":
slot_stats["open"],
+ f"pool.queued_slots.{normalized_pool_name}":
slot_stats["queued"],
+ f"pool.running_slots.{normalized_pool_name}":
slot_stats["running"],
+ f"pool.deferred_slots.{normalized_pool_name}":
slot_stats["deferred"],
+ f"pool.scheduled_slots.{normalized_pool_name}":
slot_stats["scheduled"],
}
)
diff --git a/airflow-core/src/airflow/models/pool.py
b/airflow-core/src/airflow/models/pool.py
index 13ec3e9dccf..341a9232283 100644
--- a/airflow-core/src/airflow/models/pool.py
+++ b/airflow-core/src/airflow/models/pool.py
@@ -17,6 +17,8 @@
# under the License.
from __future__ import annotations
+import logging
+import re
from collections.abc import Sequence
from typing import TYPE_CHECKING, Any, TypedDict
@@ -35,6 +37,37 @@ if TYPE_CHECKING:
from sqlalchemy.orm.session import Session
from sqlalchemy.sql import Select
+logger = logging.getLogger(__name__)
+
+
+def normalize_pool_name_for_stats(name: str) -> str:
+ """
+ Normalize pool name for stats reporting by replacing invalid characters.
+
+ Stats names must only contain ASCII alphabets, numbers, underscores, dots,
and dashes.
+ Invalid characters are replaced with underscores.
+
+ :param name: The pool name to normalize
+ :return: Normalized pool name safe for stats reporting
+ """
+ # Check if normalization is needed
+ if re.match(r"^[a-zA-Z0-9_.-]+$", name):
+ return name
+
+ # Replace invalid characters with underscores
+ normalized = re.sub(r"[^a-zA-Z0-9_.-]", "_", name)
+
+ # Log warning
+ logger.warning(
+ "Pool name '%s' contains invalid characters for stats reporting. "
+ "Reporting stats with normalized name '%s'. "
+ "Consider renaming the pool to avoid this warning.",
+ name,
+ normalized,
+ )
+
+ return normalized
+
class PoolStats(TypedDict):
"""Dictionary containing Pool Stats."""
diff --git a/newsfragments/59938.bugfix.rst b/newsfragments/59938.bugfix.rst
new file mode 100644
index 00000000000..db9ecabfc0c
--- /dev/null
+++ b/newsfragments/59938.bugfix.rst
@@ -0,0 +1 @@
+Pool names with invalid characters for stats reporting are now automatically
normalized (invalid characters replaced with underscores) when emitting
metrics, preventing ``InvalidStatsNameException``. A warning is logged when
normalization occurs, suggesting the pool be renamed.
diff --git a/tests/models/__init__.py b/tests/models/__init__.py
new file mode 100644
index 00000000000..13a83393a91
--- /dev/null
+++ b/tests/models/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/tests/models/test_pool.py b/tests/models/test_pool.py
new file mode 100644
index 00000000000..2df32291cca
--- /dev/null
+++ b/tests/models/test_pool.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations