This is an automated email from the ASF dual-hosted git repository. uranusjr pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push: new 339fd06026 Better table name validation for db clean (#28246) 339fd06026 is described below commit 339fd060266d87c7dd650f7ec2e1256fffa6bb67 Author: Jed Cunningham <66968678+jedcunning...@users.noreply.github.com> AuthorDate: Wed Dec 21 23:49:08 2022 -0600 Better table name validation for db clean (#28246) Co-authored-by: Tzu-ping Chung <uranu...@gmail.com> Co-authored-by: Kaxil Naik <kaxiln...@gmail.com> --- airflow/utils/db_cleanup.py | 14 +++++++++++--- tests/utils/test_db_cleanup.py | 14 ++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/airflow/utils/db_cleanup.py b/airflow/utils/db_cleanup.py index 51bfbb7963..ac96a6abdb 100644 --- a/airflow/utils/db_cleanup.py +++ b/airflow/utils/db_cleanup.py @@ -336,8 +336,16 @@ def run_cleanup( :param session: Session representing connection to the metadata database. """ clean_before_timestamp = timezone.coerce_datetime(clean_before_timestamp) - effective_table_names = table_names if table_names else list(config_dict.keys()) - effective_config_dict = {k: v for k, v in config_dict.items() if k in effective_table_names} + desired_table_names = set(table_names or config_dict) + effective_config_dict = {k: v for k, v in config_dict.items() if k in desired_table_names} + effective_table_names = set(effective_config_dict) + if desired_table_names != effective_table_names: + outliers = desired_table_names - effective_table_names + logger.warning( + "The following table(s) are not valid choices and will be skipped: %s", sorted(outliers) + ) + if not effective_table_names: + raise SystemExit("No tables selected for db cleanup. Please choose valid table names.") if dry_run: print("Performing dry run for db cleanup.") print( @@ -346,7 +354,7 @@ def run_cleanup( ) _print_config(configs=effective_config_dict) if not dry_run and confirm: - _confirm_delete(date=clean_before_timestamp, tables=list(effective_config_dict.keys())) + _confirm_delete(date=clean_before_timestamp, tables=sorted(effective_table_names)) existing_tables = reflect_tables(tables=None, session=session).tables for table_name, table_config in effective_config_dict.items(): if table_name not in existing_tables: diff --git a/tests/utils/test_db_cleanup.py b/tests/utils/test_db_cleanup.py index b00fde82a8..9775d9867c 100644 --- a/tests/utils/test_db_cleanup.py +++ b/tests/utils/test_db_cleanup.py @@ -121,6 +121,20 @@ class TestDBCleanup: run_cleanup(**base_kwargs, table_names=table_names) assert clean_table_mock.call_count == len(table_names) if table_names else len(config_dict) + @patch("airflow.utils.db_cleanup._cleanup_table") + @patch("airflow.utils.db_cleanup._confirm_delete") + def test_validate_tables_all_invalid(self, confirm_delete_mock, clean_table_mock): + """If only invalid tables are provided, don't try cleaning anything""" + base_kwargs = dict( + clean_before_timestamp=None, + dry_run=None, + verbose=None, + ) + with pytest.raises(SystemExit) as execinfo: + run_cleanup(**base_kwargs, table_names=["all", "fake"]) + assert "No tables selected for db cleanup" in str(execinfo.value) + confirm_delete_mock.assert_not_called() + @pytest.mark.parametrize( "dry_run", [None, True, False],