This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new b7fe72676fa Add validation for pool names to prevent 
InvalidStatsNameException (#59938)
b7fe72676fa is described below

commit b7fe72676fadbe3e494d1c368a938aa4a92f0415
Author: Pradeep Kalluri <[email protected]>
AuthorDate: Sat Jan 3 17:35:42 2026 +0000

    Add validation for pool names to prevent InvalidStatsNameException (#59938)
    
    * Add Validation for pool names to prevent InvalidStatsNameException Pool 
names can now only contain ASCII alphabets, numbers , underscores,dots,and 
dashes to ensure compatibility with stats naming requirments. fixes #59935
    
    * Add unit Tests and news fragment for pool name validation
    
    * Fix test file with proper database session handling
    
    * Fix test assertions to use pytest.raises match parameter
    
    * Add newline at the end of pool.py file
    
    * Fix trailing whitespace and quote style in pool.py
    
    * Fix syntax error: use consistent double quotes in regex
    
    * Add missing newline and __init__.py file
    
    * Add missing newline and __init__.py with license
    
    * Fix __init__.py with proper license and single newline
    
    * Implement pool name normalization for stats reporting
    
    Following @potiuk's feedback, changed approach from preventing invalid
    pool names to normalizing them for stats reporting.
    
    Changes:
    - Replaced validate_pool_name() with normalize_pool_name_for_stats()
    - Pool names with invalid characters are normalized (replaced with _)
      when emitting metrics
    - Logs warning when normalization occurs, suggesting pool rename
    - Removed validation from create_or_update_pool()
    - Updated scheduler_job_runner.py to use normalized names for stats
    - Removed validation tests
    - Updated news fragment
    
    This prevents InvalidStatsNameException without breaking existing pools
    with invalid names.
    
    Fixes #59935
    
    * Implement pool name normalization for stats reporting
    
    Following @potiuk's feedback, changed approach from preventing invalid
    pool names to normalizing them for stats reporting.
    
    Changes:
    - Added normalize_pool_name_for_stats() function in pool.py
    - Pool names with invalid characters are normalized (replaced with _)
      when emitting metrics in scheduler_job_runner.py
    - Logs warning when normalization occurs
    - Removed validation tests
    - Removed accidental fix_quote.py file
    - Updated news fragment
    
    This prevents InvalidStatsNameException without breaking existing pools.
    
    Fixes #59935
    
    * Add missing 're' module import
    
    * Remove duplicate import
    
    * Apply CI formatting fixes - import order and blank lines
    
    * Fix formatting - add proper blank lines per ruff format requirements
---
 .../src/airflow/jobs/scheduler_job_runner.py       | 22 ++++++++-------
 airflow-core/src/airflow/models/pool.py            | 33 ++++++++++++++++++++++
 newsfragments/59938.bugfix.rst                     |  1 +
 tests/models/__init__.py                           | 16 +++++++++++
 tests/models/test_pool.py                          | 18 ++++++++++++
 5 files changed, 80 insertions(+), 10 deletions(-)

diff --git a/airflow-core/src/airflow/jobs/scheduler_job_runner.py 
b/airflow-core/src/airflow/jobs/scheduler_job_runner.py
index bada07e48bc..98e743ae423 100644
--- a/airflow-core/src/airflow/jobs/scheduler_job_runner.py
+++ b/airflow-core/src/airflow/jobs/scheduler_job_runner.py
@@ -74,6 +74,7 @@ from airflow.models.dagbag import DBDagBag
 from airflow.models.dagbundle import DagBundleModel
 from airflow.models.dagrun import DagRun
 from airflow.models.dagwarning import DagWarning, DagWarningType
+from airflow.models.pool import normalize_pool_name_for_stats
 from airflow.models.serialized_dag import SerializedDagModel
 from airflow.models.taskinstance import TaskInstance
 from airflow.models.team import Team
@@ -2534,11 +2535,12 @@ class SchedulerJobRunner(BaseJobRunner, LoggingMixin):
         with DebugTrace.start_span(span_name="emit_pool_metrics", 
component="SchedulerJobRunner") as span:
             pools = Pool.slots_stats(session=session)
             for pool_name, slot_stats in pools.items():
-                Stats.gauge(f"pool.open_slots.{pool_name}", slot_stats["open"])
-                Stats.gauge(f"pool.queued_slots.{pool_name}", 
slot_stats["queued"])
-                Stats.gauge(f"pool.running_slots.{pool_name}", 
slot_stats["running"])
-                Stats.gauge(f"pool.deferred_slots.{pool_name}", 
slot_stats["deferred"])
-                Stats.gauge(f"pool.scheduled_slots.{pool_name}", 
slot_stats["scheduled"])
+                normalized_pool_name = normalize_pool_name_for_stats(pool_name)
+                Stats.gauge(f"pool.open_slots.{normalized_pool_name}", 
slot_stats["open"])
+                Stats.gauge(f"pool.queued_slots.{normalized_pool_name}", 
slot_stats["queued"])
+                Stats.gauge(f"pool.running_slots.{normalized_pool_name}", 
slot_stats["running"])
+                Stats.gauge(f"pool.deferred_slots.{normalized_pool_name}", 
slot_stats["deferred"])
+                Stats.gauge(f"pool.scheduled_slots.{normalized_pool_name}", 
slot_stats["scheduled"])
 
                 # Same metrics with tagging
                 Stats.gauge("pool.open_slots", slot_stats["open"], 
tags={"pool_name": pool_name})
@@ -2550,11 +2552,11 @@ class SchedulerJobRunner(BaseJobRunner, LoggingMixin):
                 span.set_attributes(
                     {
                         "category": "scheduler",
-                        f"pool.open_slots.{pool_name}": slot_stats["open"],
-                        f"pool.queued_slots.{pool_name}": slot_stats["queued"],
-                        f"pool.running_slots.{pool_name}": 
slot_stats["running"],
-                        f"pool.deferred_slots.{pool_name}": 
slot_stats["deferred"],
-                        f"pool.scheduled_slots.{pool_name}": 
slot_stats["scheduled"],
+                        f"pool.open_slots.{normalized_pool_name}": 
slot_stats["open"],
+                        f"pool.queued_slots.{normalized_pool_name}": 
slot_stats["queued"],
+                        f"pool.running_slots.{normalized_pool_name}": 
slot_stats["running"],
+                        f"pool.deferred_slots.{normalized_pool_name}": 
slot_stats["deferred"],
+                        f"pool.scheduled_slots.{normalized_pool_name}": 
slot_stats["scheduled"],
                     }
                 )
 
diff --git a/airflow-core/src/airflow/models/pool.py 
b/airflow-core/src/airflow/models/pool.py
index 13ec3e9dccf..341a9232283 100644
--- a/airflow-core/src/airflow/models/pool.py
+++ b/airflow-core/src/airflow/models/pool.py
@@ -17,6 +17,8 @@
 # under the License.
 from __future__ import annotations
 
+import logging
+import re
 from collections.abc import Sequence
 from typing import TYPE_CHECKING, Any, TypedDict
 
@@ -35,6 +37,37 @@ if TYPE_CHECKING:
     from sqlalchemy.orm.session import Session
     from sqlalchemy.sql import Select
 
+logger = logging.getLogger(__name__)
+
+
+def normalize_pool_name_for_stats(name: str) -> str:
+    """
+    Normalize pool name for stats reporting by replacing invalid characters.
+
+    Stats names must only contain ASCII alphabets, numbers, underscores, dots, 
and dashes.
+    Invalid characters are replaced with underscores.
+
+    :param name: The pool name to normalize
+    :return: Normalized pool name safe for stats reporting
+    """
+    # Check if normalization is needed
+    if re.match(r"^[a-zA-Z0-9_.-]+$", name):
+        return name
+
+    # Replace invalid characters with underscores
+    normalized = re.sub(r"[^a-zA-Z0-9_.-]", "_", name)
+
+    # Log warning
+    logger.warning(
+        "Pool name '%s' contains invalid characters for stats reporting. "
+        "Reporting stats with normalized name '%s'. "
+        "Consider renaming the pool to avoid this warning.",
+        name,
+        normalized,
+    )
+
+    return normalized
+
 
 class PoolStats(TypedDict):
     """Dictionary containing Pool Stats."""
diff --git a/newsfragments/59938.bugfix.rst b/newsfragments/59938.bugfix.rst
new file mode 100644
index 00000000000..db9ecabfc0c
--- /dev/null
+++ b/newsfragments/59938.bugfix.rst
@@ -0,0 +1 @@
+Pool names with invalid characters for stats reporting are now automatically 
normalized (invalid characters replaced with underscores) when emitting 
metrics, preventing ``InvalidStatsNameException``. A warning is logged when 
normalization occurs, suggesting the pool be renamed.
diff --git a/tests/models/__init__.py b/tests/models/__init__.py
new file mode 100644
index 00000000000..13a83393a91
--- /dev/null
+++ b/tests/models/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/tests/models/test_pool.py b/tests/models/test_pool.py
new file mode 100644
index 00000000000..2df32291cca
--- /dev/null
+++ b/tests/models/test_pool.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations

Reply via email to