[ 
https://issues.apache.org/jira/browse/AIRFLOW-1930?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16302091#comment-16302091
 ] 

George Leslie-Waksman commented on AIRFLOW-1930:
------------------------------------------------

I owe you an apology. I've been bitten enough times by using client time 
instead of server time that I forgot databases other than PostgreSQL handle 
things VERY differently. Sorry.

I still think that we should handle times on the server side so 
workers/scheduler/client clock skew doesn't negatively impact things but it is 
a much trickier problem than my initial thinking of "just use func.now()". I 
would consider this change a regression on PostgreSQL, even if it is an 
improvement on MySQL and sqlite.

Digging a bit more, I did a comparison of various time options and wrote up a 
test script to show various results. For reference, I found myself looking at:
https://stackoverflow.com/questions/13370317/sqlalchemy-default-datetime/33532154#33532154
http://docs.sqlalchemy.org/en/latest/core/compiler.html#utc-timestamp-function
and a bunch of dialect specific Google search results

Here's what I ended up using to test various options:

{code:python}
"""Testing datetime timezones with various DBs"""

import datetime as dt
import time

import sqlalchemy as sa  # pip install sqlalchemy
from sqlalchemy.sql import expression
from sqlalchemy.ext.compiler import compiles
import sqlalchemy_utc as sa_utc  # pip install sqlalchemy-utc
import testing.postgresql  # pip install testing-postgresl
import testing.mysqld  # pip install testing-mysqld

UTC = dt.timezone.utc


def af_utcnow():
    """Standin for airflow.utils.timezone.utcnow"""
    d = dt.datetime.utcnow()
    d = d.replace(tzinfo=UTC)
    return d


class sql_utcnow(expression.FunctionElement):
    """UTCNOW() expression for multiple dialects."""
    type = sa_utc.UtcDateTime()


@compiles(sql_utcnow)
def default_sql_utcnow(element, compiler, **kw):
    """Assume, by default, time zones work correctly.

    Note:
        This is a valid assumption for PostgreSQL and Oracle.
    """
    return 'CURRENT_TIMESTAMP'


@compiles(sql_utcnow, 'mysql')
def mysql_sql_utcnow(element, compiler, **kw):
    """MySQL returns now as localtime, so we convert to UTC.

    Warning:
        This must be executed and cannot be used as a
        column server_default value.
    """
    return "CONVERT_TZ(CURRENT_TIMESTAMP,@@session.time_zone,'+00:00')"


@compiles(sql_utcnow, 'sqlite')
def sqlite_sql_utcnow(element, compiler, **kw):
    return "DATETIME('NOW')"


@compiles(sql_utcnow, 'mssql')
def mssql_sql_utcnow(element, compiler, **kw):
    """MS SQL provides a function for the UTC datetime."""
    return 'GETUTCDATE()'


def get_test_table(engine):
    table = sa.Table(
        'test_table', sa.MetaData(engine),
        sa.Column('id', sa.Integer, primary_key=True),
        sa.Column('func_now', sa_utc.UtcDateTime,
                  default=sa.func.now()),
        sa.Column('server_func_now', sa_utc.UtcDateTime,
                  server_default=sa.func.now()),
        sa.Column('af_utcnow', sa_utc.UtcDateTime,
                  default=af_utcnow),
        # mysql version of sql_utcnow not compatible with server_default
        sa.Column('sql_utcnow', sa_utc.UtcDateTime,
                  default=sql_utcnow()),
    )
    table.create(engine)
    return table


def test_suite(engine):
    """Run a handful of checks on the given engine."""
    table = get_test_table(engine)
    now = dt.datetime.now(UTC)
    with engine.begin() as conn:
        print('Insert row 1')
        conn.execute(table.insert(), [{'id': 1}])
        print('Sleep')
        time.sleep(1)
        print('Insert row 2')
        conn.execute(table.insert(), [{'id': 2}])
    with engine.begin() as conn2:
        print('Results')
        id_to_row = {r['id']: r for r in conn2.execute(sa.select([table]))}

    print('n: ' + str(now))
    print('default=func.now()')
    print('1: ' + str(id_to_row[1]['func_now'].astimezone(UTC)))
    print('2: ' + str(id_to_row[2]['func_now'].astimezone(UTC)))
    print('server_default=func.now()')
    print('1: ' + str(id_to_row[1]['server_func_now'].astimezone(UTC)))
    print('2: ' + str(id_to_row[2]['server_func_now'].astimezone(UTC)))
    print('default=timezone.utcnow')
    print('1: ' + str(id_to_row[1]['af_utcnow'].astimezone(UTC)))
    print('2: ' + str(id_to_row[2]['af_utcnow'].astimezone(UTC)))
    print('default=sql_utcnow()')
    print('1: ' + str(id_to_row[1]['sql_utcnow'].astimezone(UTC)))
    print('2: ' + str(id_to_row[2]['sql_utcnow'].astimezone(UTC)))

    print()


print('Testing against PostgreSQL')
with testing.postgresql.Postgresql() as pg:
    pg_eng = sa.create_engine(pg.url())
    test_suite(pg_eng)

print('Testing against MySQL')
with testing.mysqld.Mysqld() as my:
    my_eng = sa.create_engine(my.url())
    test_suite(my_eng)

print('Testing against SQLite')
lite_eng = sa.create_engine('sqlite://')
test_suite(lite_eng)

{code}

Running this on my system, I get:


{noformat}
Testing against PostgreSQL
Insert row 1
Sleep
Insert row 2
Results
n: 2017-12-22 23:06:46.802493+00:00
default=func.now()
1: 2017-12-22 23:06:46.803103+00:00
2: 2017-12-22 23:06:46.803103+00:00
server_default=func.now()
1: 2017-12-22 23:06:46.803103+00:00
2: 2017-12-22 23:06:46.803103+00:00
default=timezone.utcnow
1: 2017-12-22 23:06:46.802980+00:00
2: 2017-12-22 23:06:47.806383+00:00
default=sql_utcnow()
1: 2017-12-22 23:06:46.803103+00:00
2: 2017-12-22 23:06:46.803103+00:00

Testing against MySQL
Insert row 1
Sleep
Insert row 2
Results
n: 2017-12-22 23:06:52.300311+00:00
default=func.now()
1: 2017-12-22 15:06:52+00:00
2: 2017-12-22 15:06:53+00:00
server_default=func.now()
1: 2017-12-22 15:06:52+00:00
2: 2017-12-22 15:06:53+00:00
default=timezone.utcnow
1: 2017-12-22 23:06:52+00:00
2: 2017-12-22 23:06:53+00:00
default=sql_utcnow()
1: 2017-12-22 23:06:52+00:00
2: 2017-12-22 23:06:53+00:00

Testing against SQLite
Insert row 1
Sleep
Insert row 2
Results
n: 2017-12-22 23:06:56.692794+00:00
default=func.now()
1: 2017-12-22 23:06:56+00:00
2: 2017-12-22 23:06:57+00:00
server_default=func.now()
1: 2017-12-22 23:06:56+00:00
2: 2017-12-22 23:06:57+00:00
default=timezone.utcnow
1: 2017-12-22 23:06:56.693171+00:00
2: 2017-12-22 23:06:57.694473+00:00
default=sql_utcnow()
1: 2017-12-22 23:06:56+00:00
2: 2017-12-22 23:06:57+00:00
{noformat}

Personally, I'm inclined toward the compile our own utcnow expression approach 
because it gives us cross-dialect UTC timestamps and calculates timestamps on 
the server.

> start_date and execution_date should default to timezone.utcnow() not to 
> func.now()
> -----------------------------------------------------------------------------------
>
>                 Key: AIRFLOW-1930
>                 URL: https://issues.apache.org/jira/browse/AIRFLOW-1930
>             Project: Apache Airflow
>          Issue Type: Bug
>    Affects Versions: 1.9.0, 1.8.2
>            Reporter: Bolke de Bruin
>            Assignee: Bolke de Bruin
>             Fix For: 1.9.1
>
>
> func.now() defaults to the time zone of the database, while we assume every 
> date in the db is UTC. 



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to