This is an automated email from the ASF dual-hosted git repository.
dstandish pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new e20b400317 Remove usused index on task instance (#36737)
e20b400317 is described below
commit e20b400317ae4eb41181c5b0cee466eff768b521
Author: Daniel Standish <[email protected]>
AuthorDate: Fri Jan 12 20:32:14 2024 -0800
Remove usused index on task instance (#36737)
Index is only helpful for a user's custom query -- not for airflow in
general (see comment
https://github.com/apache/airflow/pull/30762#issuecomment-1886658295). Noticed
that this query had zero scans over a period of months. I also observed that
it also takes up as much space as the table itself. Since it's not generally
useful, it doesn't belong in airflow OSS.
Reverts #30762
---
.../0126_2_7_0_add_index_to_task_instance_table.py | 16 +++++-----
...e.py => 0133_2_8_1_refactor_dag_run_indexes.py} | 34 +++++++++++-----------
airflow/models/taskinstance.py | 1 -
airflow/utils/db.py | 1 +
docs/apache-airflow/img/airflow_erd.sha256 | 2 +-
docs/apache-airflow/img/airflow_erd.svg | 4 +--
docs/apache-airflow/migrations-ref.rst | 4 ++-
7 files changed, 33 insertions(+), 29 deletions(-)
diff --git
a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
b/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
index 225776119e..6730611a8d 100644
--- a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
+++ b/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
@@ -37,14 +37,16 @@ airflow_version = "2.7.0"
def upgrade():
"""Apply Add index to task_instance table"""
- op.create_index(
- "ti_state_incl_start_date",
- "task_instance",
- ["dag_id", "task_id", "state"],
- postgresql_include=["start_date"],
- )
+ # We don't add this index anymore because it's not useful.
+ pass
def downgrade():
"""Unapply Add index to task_instance table"""
- op.drop_index("ti_state_incl_start_date", table_name="task_instance")
+ # At 2.8.1 we removed this index as it is not used, and changed this
migration not to add it
+ # So we use drop if exists (cus it might not be there)
+ import sqlalchemy
+ from contextlib import suppress
+
+ with suppress(sqlalchemy.exc.DatabaseError): # mysql does not support
drop if exists index
+ op.drop_index("ti_state_incl_start_date", table_name="task_instance",
if_exists=True)
diff --git
a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
b/airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py
similarity index 59%
copy from
airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
copy to airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py
index 225776119e..43a24141ee 100644
--- a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
+++ b/airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py
@@ -16,35 +16,35 @@
# specific language governing permissions and limitations
# under the License.
-"""Add index to task_instance table
+"""Drop unused TI index
-Revision ID: 937cbd173ca1
-Revises: c804e5c76e3e
-Create Date: 2023-05-03 11:31:32.527362
+Revision ID: 88344c1d9134
+Revises: 10b52ebd31f7
+Create Date: 2024-01-11 11:54:48.232030
"""
-from __future__ import annotations
+import sqlalchemy as sa
from alembic import op
+
# revision identifiers, used by Alembic.
-revision = "937cbd173ca1"
-down_revision = "c804e5c76e3e"
+revision = "88344c1d9134"
+down_revision = "10b52ebd31f7"
branch_labels = None
depends_on = None
-airflow_version = "2.7.0"
+airflow_version = "2.8.1"
def upgrade():
- """Apply Add index to task_instance table"""
- op.create_index(
- "ti_state_incl_start_date",
- "task_instance",
- ["dag_id", "task_id", "state"],
- postgresql_include=["start_date"],
- )
+ """Apply refactor dag run indexes"""
+ # This index may have been created in 2.7 but we've since removed it from
migrations
+ import sqlalchemy
+ from contextlib import suppress
+
+ with suppress(sqlalchemy.exc.DatabaseError): # mysql does not support
drop if exists index
+ op.drop_index("ti_state_incl_start_date", table_name="task_instance",
if_exists=True)
def downgrade():
- """Unapply Add index to task_instance table"""
- op.drop_index("ti_state_incl_start_date", table_name="task_instance")
+ """Unapply refactor dag run indexes"""
diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py
index 942250eabf..c0427715a2 100644
--- a/airflow/models/taskinstance.py
+++ b/airflow/models/taskinstance.py
@@ -1259,7 +1259,6 @@ class TaskInstance(Base, LoggingMixin):
# Existing "ti_state_lkp" is not enough for such query when this table
has millions of rows, since
# rows have to be fetched in order to retrieve the start_date column.
With this index, INDEX ONLY SCAN
# is performed and that query runs within milliseconds.
- Index("ti_state_incl_start_date", dag_id, task_id, state,
postgresql_include=["start_date"]),
Index("ti_pool", pool, state, priority_weight),
Index("ti_job_id", job_id),
Index("ti_trigger_id", trigger_id),
diff --git a/airflow/utils/db.py b/airflow/utils/db.py
index 3d7d681094..03bb33cfac 100644
--- a/airflow/utils/db.py
+++ b/airflow/utils/db.py
@@ -89,6 +89,7 @@ _REVISION_HEADS_MAP = {
"2.6.2": "c804e5c76e3e",
"2.7.0": "405de8318b3a",
"2.8.0": "10b52ebd31f7",
+ "2.8.1": "88344c1d9134",
}
diff --git a/docs/apache-airflow/img/airflow_erd.sha256
b/docs/apache-airflow/img/airflow_erd.sha256
index eb2a21ae34..9a2d1e898d 100644
--- a/docs/apache-airflow/img/airflow_erd.sha256
+++ b/docs/apache-airflow/img/airflow_erd.sha256
@@ -1 +1 @@
-194706fc390025f473f73ce934bfe4b394b50ce76748e5df33ae643e38538357
\ No newline at end of file
+243075d59223245db8034a97c7d6f53bd8a39ee0dc02831229a6ec743c2c920a
\ No newline at end of file
diff --git a/docs/apache-airflow/img/airflow_erd.svg
b/docs/apache-airflow/img/airflow_erd.svg
index 8e85b5fa0c..497ef76975 100644
--- a/docs/apache-airflow/img/airflow_erd.svg
+++ b/docs/apache-airflow/img/airflow_erd.svg
@@ -1342,14 +1342,14 @@
<g id="edge41" class="edge">
<title>task_instance--xcom</title>
<path fill="none" stroke="#7f7f7f" stroke-dasharray="5,2"
d="M1166.1,-776.37C1196.72,-770.7 1228.55,-765.52 1258.36,-761.38"/>
-<text text-anchor="start" x="1248.36" y="-750.18" font-family="Times,serif"
font-size="14.00">1</text>
+<text text-anchor="start" x="1227.36" y="-750.18" font-family="Times,serif"
font-size="14.00">0..N</text>
<text text-anchor="start" x="1166.1" y="-765.17" font-family="Times,serif"
font-size="14.00">1</text>
</g>
<!-- task_instance--xcom -->
<g id="edge42" class="edge">
<title>task_instance--xcom</title>
<path fill="none" stroke="#7f7f7f" stroke-dasharray="5,2"
d="M1166.1,-789.67C1196.72,-784.35 1228.55,-779.06 1258.36,-774.33"/>
-<text text-anchor="start" x="1227.36" y="-778.13" font-family="Times,serif"
font-size="14.00">0..N</text>
+<text text-anchor="start" x="1248.36" y="-778.13" font-family="Times,serif"
font-size="14.00">1</text>
<text text-anchor="start" x="1166.1" y="-793.47" font-family="Times,serif"
font-size="14.00">1</text>
</g>
<!-- task_instance--xcom -->
diff --git a/docs/apache-airflow/migrations-ref.rst
b/docs/apache-airflow/migrations-ref.rst
index 2d54b5d996..0b068e5e53 100644
--- a/docs/apache-airflow/migrations-ref.rst
+++ b/docs/apache-airflow/migrations-ref.rst
@@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are
executed via when you ru
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
| Revision ID | Revises ID | Airflow Version |
Description |
+=================================+===================+===================+==============================================================+
-| ``10b52ebd31f7`` (head) | ``bd5dfbe21f88`` | ``2.8.0`` |
Add processor_subdir to ImportError. |
+| ``88344c1d9134`` (head) | ``10b52ebd31f7`` | ``2.8.1`` |
Drop unused TI index |
++---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
+| ``10b52ebd31f7`` | ``bd5dfbe21f88`` | ``2.8.0`` |
Add processor_subdir to ImportError. |
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
| ``bd5dfbe21f88`` | ``f7bf2a57d0a6`` | ``2.8.0`` |
Make connection login/password TEXT |
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+