pierrejeambrun commented on code in PR #62294:
URL: https://github.com/apache/airflow/pull/62294#discussion_r2933158937
##########
airflow-core/src/airflow/utils/db.py:
##########
@@ -1682,11 +1688,47 @@ def get_query_count(query_stmt: Select, *, session:
Session) -> int:
:meta private:
"""
+ dialect_name = get_dialect_name(session)
+
+ if allow_estimation and dialect_name in ("postgresql", "mysql") and
_is_simple_select_from(query_stmt):
+ table = getattr(query_stmt.get_final_froms()[0], "name", None)
+ if dialect_name == "postgresql":
+ estimate_stmt = text("SELECT reltuples FROM pg_class WHERE relname
= :table")
+ elif dialect_name == "mysql":
+ estimate_stmt = text(
+ "SELECT NUM_ROWS FROM INFORMATION_SCHEMA.INNODB_TABLESTATS
where NAME = 'airflow/:table'"
+ )
+ result = session.scalar(estimate_stmt, {"table": table})
+
+ # If the row count estimate is small, query count(*) to get the exact
number
+ if result is not None and result > threshold:
+ return result
+
count_stmt =
select(func.count()).select_from(query_stmt.order_by(None).subquery())
result = session.scalar(count_stmt)
return result or 0
+def _is_simple_select_from(query_stmt: Select) -> bool:
+ """
+ Check whether a query has a simple `SELECT ... FROM ... ORDER BY ...`
pattern.
+
+ Return false if there is any where / join / groupby / limit / offset clause
+ or subquery in the query statement.
+ """
Review Comment:
This will never trigger. There is always at least the permission check
filter. (unless admins is used and no filters are provided, which is a super
minor case) I don't think we should rely on that
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]