This is an automated email from the ASF dual-hosted git repository.

uranusjr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 339fd06026 Better table name validation for db clean (#28246)
339fd06026 is described below

commit 339fd060266d87c7dd650f7ec2e1256fffa6bb67
Author: Jed Cunningham <66968678+jedcunning...@users.noreply.github.com>
AuthorDate: Wed Dec 21 23:49:08 2022 -0600

    Better table name validation for db clean (#28246)
    
    Co-authored-by: Tzu-ping Chung <uranu...@gmail.com>
    Co-authored-by: Kaxil Naik <kaxiln...@gmail.com>
---
 airflow/utils/db_cleanup.py    | 14 +++++++++++---
 tests/utils/test_db_cleanup.py | 14 ++++++++++++++
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/airflow/utils/db_cleanup.py b/airflow/utils/db_cleanup.py
index 51bfbb7963..ac96a6abdb 100644
--- a/airflow/utils/db_cleanup.py
+++ b/airflow/utils/db_cleanup.py
@@ -336,8 +336,16 @@ def run_cleanup(
     :param session: Session representing connection to the metadata database.
     """
     clean_before_timestamp = timezone.coerce_datetime(clean_before_timestamp)
-    effective_table_names = table_names if table_names else 
list(config_dict.keys())
-    effective_config_dict = {k: v for k, v in config_dict.items() if k in 
effective_table_names}
+    desired_table_names = set(table_names or config_dict)
+    effective_config_dict = {k: v for k, v in config_dict.items() if k in 
desired_table_names}
+    effective_table_names = set(effective_config_dict)
+    if desired_table_names != effective_table_names:
+        outliers = desired_table_names - effective_table_names
+        logger.warning(
+            "The following table(s) are not valid choices and will be skipped: 
%s", sorted(outliers)
+        )
+    if not effective_table_names:
+        raise SystemExit("No tables selected for db cleanup. Please choose 
valid table names.")
     if dry_run:
         print("Performing dry run for db cleanup.")
         print(
@@ -346,7 +354,7 @@ def run_cleanup(
         )
         _print_config(configs=effective_config_dict)
     if not dry_run and confirm:
-        _confirm_delete(date=clean_before_timestamp, 
tables=list(effective_config_dict.keys()))
+        _confirm_delete(date=clean_before_timestamp, 
tables=sorted(effective_table_names))
     existing_tables = reflect_tables(tables=None, session=session).tables
     for table_name, table_config in effective_config_dict.items():
         if table_name not in existing_tables:
diff --git a/tests/utils/test_db_cleanup.py b/tests/utils/test_db_cleanup.py
index b00fde82a8..9775d9867c 100644
--- a/tests/utils/test_db_cleanup.py
+++ b/tests/utils/test_db_cleanup.py
@@ -121,6 +121,20 @@ class TestDBCleanup:
         run_cleanup(**base_kwargs, table_names=table_names)
         assert clean_table_mock.call_count == len(table_names) if table_names 
else len(config_dict)
 
+    @patch("airflow.utils.db_cleanup._cleanup_table")
+    @patch("airflow.utils.db_cleanup._confirm_delete")
+    def test_validate_tables_all_invalid(self, confirm_delete_mock, 
clean_table_mock):
+        """If only invalid tables are provided, don't try cleaning anything"""
+        base_kwargs = dict(
+            clean_before_timestamp=None,
+            dry_run=None,
+            verbose=None,
+        )
+        with pytest.raises(SystemExit) as execinfo:
+            run_cleanup(**base_kwargs, table_names=["all", "fake"])
+        assert "No tables selected for db cleanup" in str(execinfo.value)
+        confirm_delete_mock.assert_not_called()
+
     @pytest.mark.parametrize(
         "dry_run",
         [None, True, False],

Reply via email to