This is an automated email from the ASF dual-hosted git repository.

dstandish pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new e20b400317 Remove usused index on task instance (#36737)
e20b400317 is described below

commit e20b400317ae4eb41181c5b0cee466eff768b521
Author: Daniel Standish <[email protected]>
AuthorDate: Fri Jan 12 20:32:14 2024 -0800

    Remove usused index on task instance (#36737)
    
    Index is only helpful for a user's custom query -- not for airflow in 
general (see comment 
https://github.com/apache/airflow/pull/30762#issuecomment-1886658295).  Noticed 
that this query had zero scans over a period of months.  I also observed that 
it also takes up as much space as the table itself.  Since it's not generally 
useful, it doesn't belong in airflow OSS.
    
    Reverts #30762
---
 .../0126_2_7_0_add_index_to_task_instance_table.py | 16 +++++-----
 ...e.py => 0133_2_8_1_refactor_dag_run_indexes.py} | 34 +++++++++++-----------
 airflow/models/taskinstance.py                     |  1 -
 airflow/utils/db.py                                |  1 +
 docs/apache-airflow/img/airflow_erd.sha256         |  2 +-
 docs/apache-airflow/img/airflow_erd.svg            |  4 +--
 docs/apache-airflow/migrations-ref.rst             |  4 ++-
 7 files changed, 33 insertions(+), 29 deletions(-)

diff --git 
a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py 
b/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
index 225776119e..6730611a8d 100644
--- a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
+++ b/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
@@ -37,14 +37,16 @@ airflow_version = "2.7.0"
 
 def upgrade():
     """Apply Add index to task_instance table"""
-    op.create_index(
-        "ti_state_incl_start_date",
-        "task_instance",
-        ["dag_id", "task_id", "state"],
-        postgresql_include=["start_date"],
-    )
+    # We don't add this index anymore because it's not useful.
+    pass
 
 
 def downgrade():
     """Unapply Add index to task_instance table"""
-    op.drop_index("ti_state_incl_start_date", table_name="task_instance")
+    # At 2.8.1 we removed this index as it is not used, and changed this 
migration not to add it
+    # So we use drop if exists (cus it might not be there)
+    import sqlalchemy
+    from contextlib import suppress
+
+    with suppress(sqlalchemy.exc.DatabaseError):  # mysql does not support 
drop if exists index
+        op.drop_index("ti_state_incl_start_date", table_name="task_instance", 
if_exists=True)
diff --git 
a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py 
b/airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py
similarity index 59%
copy from 
airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
copy to airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py
index 225776119e..43a24141ee 100644
--- a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
+++ b/airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py
@@ -16,35 +16,35 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Add index to task_instance table
+"""Drop unused TI index
 
-Revision ID: 937cbd173ca1
-Revises: c804e5c76e3e
-Create Date: 2023-05-03 11:31:32.527362
+Revision ID: 88344c1d9134
+Revises: 10b52ebd31f7
+Create Date: 2024-01-11 11:54:48.232030
 
 """
-from __future__ import annotations
 
+import sqlalchemy as sa
 from alembic import op
 
+
 # revision identifiers, used by Alembic.
-revision = "937cbd173ca1"
-down_revision = "c804e5c76e3e"
+revision = "88344c1d9134"
+down_revision = "10b52ebd31f7"
 branch_labels = None
 depends_on = None
-airflow_version = "2.7.0"
+airflow_version = "2.8.1"
 
 
 def upgrade():
-    """Apply Add index to task_instance table"""
-    op.create_index(
-        "ti_state_incl_start_date",
-        "task_instance",
-        ["dag_id", "task_id", "state"],
-        postgresql_include=["start_date"],
-    )
+    """Apply refactor dag run indexes"""
+    # This index may have been created in 2.7 but we've since removed it from 
migrations
+    import sqlalchemy
+    from contextlib import suppress
+
+    with suppress(sqlalchemy.exc.DatabaseError):  # mysql does not support 
drop if exists index
+        op.drop_index("ti_state_incl_start_date", table_name="task_instance", 
if_exists=True)
 
 
 def downgrade():
-    """Unapply Add index to task_instance table"""
-    op.drop_index("ti_state_incl_start_date", table_name="task_instance")
+    """Unapply refactor dag run indexes"""
diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py
index 942250eabf..c0427715a2 100644
--- a/airflow/models/taskinstance.py
+++ b/airflow/models/taskinstance.py
@@ -1259,7 +1259,6 @@ class TaskInstance(Base, LoggingMixin):
         # Existing "ti_state_lkp" is not enough for such query when this table 
has millions of rows, since
         # rows have to be fetched in order to retrieve the start_date column. 
With this index, INDEX ONLY SCAN
         # is performed and that query runs within milliseconds.
-        Index("ti_state_incl_start_date", dag_id, task_id, state, 
postgresql_include=["start_date"]),
         Index("ti_pool", pool, state, priority_weight),
         Index("ti_job_id", job_id),
         Index("ti_trigger_id", trigger_id),
diff --git a/airflow/utils/db.py b/airflow/utils/db.py
index 3d7d681094..03bb33cfac 100644
--- a/airflow/utils/db.py
+++ b/airflow/utils/db.py
@@ -89,6 +89,7 @@ _REVISION_HEADS_MAP = {
     "2.6.2": "c804e5c76e3e",
     "2.7.0": "405de8318b3a",
     "2.8.0": "10b52ebd31f7",
+    "2.8.1": "88344c1d9134",
 }
 
 
diff --git a/docs/apache-airflow/img/airflow_erd.sha256 
b/docs/apache-airflow/img/airflow_erd.sha256
index eb2a21ae34..9a2d1e898d 100644
--- a/docs/apache-airflow/img/airflow_erd.sha256
+++ b/docs/apache-airflow/img/airflow_erd.sha256
@@ -1 +1 @@
-194706fc390025f473f73ce934bfe4b394b50ce76748e5df33ae643e38538357
\ No newline at end of file
+243075d59223245db8034a97c7d6f53bd8a39ee0dc02831229a6ec743c2c920a
\ No newline at end of file
diff --git a/docs/apache-airflow/img/airflow_erd.svg 
b/docs/apache-airflow/img/airflow_erd.svg
index 8e85b5fa0c..497ef76975 100644
--- a/docs/apache-airflow/img/airflow_erd.svg
+++ b/docs/apache-airflow/img/airflow_erd.svg
@@ -1342,14 +1342,14 @@
 <g id="edge41" class="edge">
 <title>task_instance&#45;&#45;xcom</title>
 <path fill="none" stroke="#7f7f7f" stroke-dasharray="5,2" 
d="M1166.1,-776.37C1196.72,-770.7 1228.55,-765.52 1258.36,-761.38"/>
-<text text-anchor="start" x="1248.36" y="-750.18" font-family="Times,serif" 
font-size="14.00">1</text>
+<text text-anchor="start" x="1227.36" y="-750.18" font-family="Times,serif" 
font-size="14.00">0..N</text>
 <text text-anchor="start" x="1166.1" y="-765.17" font-family="Times,serif" 
font-size="14.00">1</text>
 </g>
 <!-- task_instance&#45;&#45;xcom -->
 <g id="edge42" class="edge">
 <title>task_instance&#45;&#45;xcom</title>
 <path fill="none" stroke="#7f7f7f" stroke-dasharray="5,2" 
d="M1166.1,-789.67C1196.72,-784.35 1228.55,-779.06 1258.36,-774.33"/>
-<text text-anchor="start" x="1227.36" y="-778.13" font-family="Times,serif" 
font-size="14.00">0..N</text>
+<text text-anchor="start" x="1248.36" y="-778.13" font-family="Times,serif" 
font-size="14.00">1</text>
 <text text-anchor="start" x="1166.1" y="-793.47" font-family="Times,serif" 
font-size="14.00">1</text>
 </g>
 <!-- task_instance&#45;&#45;xcom -->
diff --git a/docs/apache-airflow/migrations-ref.rst 
b/docs/apache-airflow/migrations-ref.rst
index 2d54b5d996..0b068e5e53 100644
--- a/docs/apache-airflow/migrations-ref.rst
+++ b/docs/apache-airflow/migrations-ref.rst
@@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are 
executed via when you ru
 
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
 | Revision ID                     | Revises ID        | Airflow Version   | 
Description                                                  |
 
+=================================+===================+===================+==============================================================+
-| ``10b52ebd31f7`` (head)         | ``bd5dfbe21f88``  | ``2.8.0``         | 
Add processor_subdir to ImportError.                         |
+| ``88344c1d9134`` (head)         | ``10b52ebd31f7``  | ``2.8.1``         | 
Drop unused TI index                                         |
++---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
+| ``10b52ebd31f7``                | ``bd5dfbe21f88``  | ``2.8.0``         | 
Add processor_subdir to ImportError.                         |
 
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
 | ``bd5dfbe21f88``                | ``f7bf2a57d0a6``  | ``2.8.0``         | 
Make connection login/password TEXT                          |
 
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+

Reply via email to