ashb commented on code in PR #24044:
URL: https://github.com/apache/airflow/pull/24044#discussion_r895513165


##########
airflow/utils/db.py:
##########
@@ -1638,6 +1638,47 @@ def create_global_lock(
             pass
 
 
+def compare_type(context, inspected_column, metadata_column, inspected_type, 
metadata_type):
+    """
+    Compare types between ORM and DB .
+
+    return False if the metadata_type is the same as the inspected_type
+    or None to allow the default implementation to compare these
+    types. a return value of True means the two types do not
+    match and should result in a type change operation.
+    """
+    if context.dialect.name == 'mysql':
+        from sqlalchemy import String
+        from sqlalchemy.dialects import mysql
+
+        if isinstance(inspected_type, mysql.VARCHAR) and 
isinstance(metadata_type, String):
+            # This is a hack to get around MySQL VARCHAR collation
+            # not being possible to change from utf8_bin to utf8mb3_bin
+            return False
+    return None
+
+
+def compare_server_default(
+    context, inspected_column, metadata_column, inspected_default, 
metadata_default, rendered_metadata_default
+):
+    """
+    Compare server defaults between ORM and DB .
+
+    return True if the defaults are different, False if not, or None to allow 
the default implementation
+    to compare these defaults
+    """
+    if context.connection.dialect.name in ['mssql', 'sqlite']:
+        # autogenerate doesn't work when comparing server_default in MSSQL
+        # e.g inspected_default != metadata_default
+        # TODO: Make this work
+        # SQLite: task_instance.map_index & task_reschedule.map_index
+        # are not comparing well(flaky).
+        # Note that this feature have varied accuracy
+        # depending on backends(check doc).
+        return False

Review Comment:
   Can you give more details? How is this not working for sqlite and mssql?



##########
airflow/migrations/versions/0111_2_4_0_compare_types_between_orm_and_db.py:
##########
@@ -0,0 +1,259 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""compare types between ORM and DB
+
+Revision ID: 44b7034f6bdc
+Revises: 3c94c427fdf6
+Create Date: 2022-05-31 09:16:44.558754
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+from airflow.migrations.db_types import TIMESTAMP
+
+# revision identifiers, used by Alembic.
+revision = '44b7034f6bdc'
+down_revision = '3c94c427fdf6'
+branch_labels = None
+depends_on = None
+airflow_version = '2.4.0'
+
+
+def upgrade():
+    """Apply compare types between ORM and DB"""
+    conn = op.get_bind()
+    with op.batch_alter_table('connection', schema=None) as batch_op:
+        batch_op.alter_column(
+            'extra',
+            existing_type=sa.TEXT(),
+            type_=sa.Text(),
+            existing_nullable=True,
+        )
+    with op.batch_alter_table('log_template', schema=None) as batch_op:
+        batch_op.alter_column(
+            'created_at', existing_type=sa.DateTime(), type_=TIMESTAMP(), 
existing_nullable=False
+        )
+    with op.batch_alter_table('task_instance', schema=None) as batch_op:
+        batch_op.alter_column(
+            'trigger_timeout', existing_type=sa.DateTime(), type_=TIMESTAMP(), 
existing_nullable=True
+        )
+    with op.batch_alter_table('serialized_dag', schema=None) as batch_op:
+        # drop server_default by not providing existing_server_default
+        batch_op.alter_column(
+            'dag_hash',
+            existing_type=sa.String(32),
+            server_default=None,
+            type_=sa.String(32),
+            existing_nullable=False,
+        )
+    # pool_slots server_default mistakenly dropped in 7b2661a43ba3 for 
postgresql.
+    # existing_server_default not used in alter
+    if conn.dialect.name == 'postgresql':
+        with op.batch_alter_table('task_instance', schema=None) as batch_op:
+            batch_op.alter_column(
+                'pool_slots', existing_type=sa.Integer(), 
server_default=sa.text('1'), existing_nullable=False
+            )
+
+    if conn.dialect.name != 'sqlite':
+        return
+    with op.batch_alter_table('serialized_dag', schema=None) as batch_op:
+        batch_op.alter_column('fileloc_hash', existing_type=sa.Integer, 
type_=sa.BigInteger())
+    # Some sqlite date are not in db_types.TIMESTAMP. Convert these to 
TIMESTAMP.
+    with op.batch_alter_table('dag', schema=None) as batch_op:
+        batch_op.alter_column(
+            'last_pickled', existing_type=sa.DATETIME(), type_=TIMESTAMP(), 
existing_nullable=True

Review Comment:
   How come this is only an issue for SQLlite? Just a bug in a previous 
migration?



##########
airflow/utils/sqlalchemy.py:
##########
@@ -42,21 +43,21 @@
 
 class UtcDateTime(TypeDecorator):
     """
-    Almost equivalent to :class:`~sqlalchemy.types.DateTime` with
+    Almost equivalent to :class:`~sqlalchemy.types.TIMESTAMP` with

Review Comment:
   See previous comment -- this is not the right change, not for Postgresql 
anyway.



##########
airflow/utils/sqlalchemy.py:
##########
@@ -92,6 +93,13 @@ def process_result_value(self, value, dialect):
 
         return value
 
+    def load_dialect_impl(self, dialect):
+        if dialect.name == 'mssql':
+            return mssql.DATETIME2(precision=6)
+        elif dialect.name == 'mysql':
+            return mysql.TIMESTAMP(fsp=6)

Review Comment:
   I don't think this is the right type for mysql because of the date range.
   
   >  The TIMESTAMP data type is used for values that contain both date and 
time parts. TIMESTAMP has a range of '1970-01-01 00:00:01' UTC to '2038-01-19 
03:14:07' UTC. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to