potiuk commented on code in PR #28631:
URL: https://github.com/apache/airflow/pull/28631#discussion_r1058823025


##########
tests/providers/conftest.py:
##########
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from functools import lru_cache
+from pathlib import Path
+
+import pytest
+
+from tests.test_utils import db
+
+_CLEAR_DB_PROVIDERS = set()
+
+
+@lru_cache(maxsize=None)
+def providers_packages():
+    """Get providers packages full qualname."""
+
+    current_dir = Path(__file__).absolute().parent
+    providers = set()
+    for root in current_dir.iterdir():
+        if not root.is_dir():
+            continue
+
+        providers_dirs = set()
+        for sentinel in {"hooks", "operators", "sensors"}:
+            providers_dirs = providers_dirs.union({p.parent for p in 
root.rglob(sentinel) if p.is_dir()})
+
+        if providers_dirs:
+            for d in providers_dirs:
+                providers.add(".".join(d.relative_to(current_dir).parts))
+        else:
+            providers.add(root.name)
+
+    return providers
+
+
+def get_test_provider_name(m):
+    """Extract provider name from module full qualname."""
+    _, _, name = m.__name__.partition("providers.")
+    for provider in providers_packages():
+        if name.startswith(provider):
+            return provider
+    return None
+
+
[email protected](scope="module", autouse=True)
+def _clear_db_between_providers_tests(request):
+    """Clear DB between each separate provider package test runs."""
+    provider_name = get_test_provider_name(request.module)
+    if provider_name and provider_name not in _CLEAR_DB_PROVIDERS:
+        _CLEAR_DB_PROVIDERS.add(provider_name)
+        db.clear_db_runs()
+        db.clear_db_datasets()
+        db.clear_db_dags()
+        db.clear_db_serialized_dags()
+        db.clear_db_sla_miss()
+        db.clear_db_pools()
+        db.clear_db_connections()
+        db.clear_db_variables()
+        db.clear_db_dag_code()
+        db.clear_db_callbacks()
+        db.clear_rendered_ti_fields()
+        db.clear_db_import_errors()
+        db.clear_db_dag_warnings()
+        db.clear_db_xcom()
+        db.clear_db_logs()
+        db.clear_db_jobs()
+        db.clear_db_task_fail()
+        db.clear_db_task_reschedule()
+        db.clear_dag_specific_permissions()
+        db.create_default_connections()
+        db.set_default_pool_slots(128)
+    yield

Review Comment:
   We still will need to see how much it will impact the overall test time, but 
I am cautiously optimistic it won't be a lot. And we can move it up to package 
as well if we find module is too much.
   
   And this approach has really some interesting properties if we do such 
cleanup unconditionally before every module:
   
   1) We can stop worrying about any DB side-effects outside of the modules. We 
have all but guarantee that any side-effects will not cross the module 
boundaries
   
   2) Running all tests in a module is usually what people do - because it is 
easy. And whenever we see a suspicious side-effect, it will be super-easy to 
reproduce - just run the whole module - it will run identically in CI and 
locally because each module is isolated.
   
   3) When a new user checks out and run tests, they will not have to think 
about initializing the database - similarly when the database has changed. So 
far we had `--with-db-init` switch to let airflow reset the unit test DB but it 
was pretty brittle and not many people even knew about its existence. We will 
be able to remove that now - because every time you run a test you have 
guarantee to get a fresh DB with the current structure.
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to