[
https://issues.apache.org/jira/browse/AIRFLOW-1930?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16302091#comment-16302091
]
George Leslie-Waksman commented on AIRFLOW-1930:
------------------------------------------------
I owe you an apology. I've been bitten enough times by using client time
instead of server time that I forgot databases other than PostgreSQL handle
things VERY differently. Sorry.
I still think that we should handle times on the server side so
workers/scheduler/client clock skew doesn't negatively impact things but it is
a much trickier problem than my initial thinking of "just use func.now()". I
would consider this change a regression on PostgreSQL, even if it is an
improvement on MySQL and sqlite.
Digging a bit more, I did a comparison of various time options and wrote up a
test script to show various results. For reference, I found myself looking at:
https://stackoverflow.com/questions/13370317/sqlalchemy-default-datetime/33532154#33532154
http://docs.sqlalchemy.org/en/latest/core/compiler.html#utc-timestamp-function
and a bunch of dialect specific Google search results
Here's what I ended up using to test various options:
{code:python}
"""Testing datetime timezones with various DBs"""
import datetime as dt
import time
import sqlalchemy as sa # pip install sqlalchemy
from sqlalchemy.sql import expression
from sqlalchemy.ext.compiler import compiles
import sqlalchemy_utc as sa_utc # pip install sqlalchemy-utc
import testing.postgresql # pip install testing-postgresl
import testing.mysqld # pip install testing-mysqld
UTC = dt.timezone.utc
def af_utcnow():
"""Standin for airflow.utils.timezone.utcnow"""
d = dt.datetime.utcnow()
d = d.replace(tzinfo=UTC)
return d
class sql_utcnow(expression.FunctionElement):
"""UTCNOW() expression for multiple dialects."""
type = sa_utc.UtcDateTime()
@compiles(sql_utcnow)
def default_sql_utcnow(element, compiler, **kw):
"""Assume, by default, time zones work correctly.
Note:
This is a valid assumption for PostgreSQL and Oracle.
"""
return 'CURRENT_TIMESTAMP'
@compiles(sql_utcnow, 'mysql')
def mysql_sql_utcnow(element, compiler, **kw):
"""MySQL returns now as localtime, so we convert to UTC.
Warning:
This must be executed and cannot be used as a
column server_default value.
"""
return "CONVERT_TZ(CURRENT_TIMESTAMP,@@session.time_zone,'+00:00')"
@compiles(sql_utcnow, 'sqlite')
def sqlite_sql_utcnow(element, compiler, **kw):
return "DATETIME('NOW')"
@compiles(sql_utcnow, 'mssql')
def mssql_sql_utcnow(element, compiler, **kw):
"""MS SQL provides a function for the UTC datetime."""
return 'GETUTCDATE()'
def get_test_table(engine):
table = sa.Table(
'test_table', sa.MetaData(engine),
sa.Column('id', sa.Integer, primary_key=True),
sa.Column('func_now', sa_utc.UtcDateTime,
default=sa.func.now()),
sa.Column('server_func_now', sa_utc.UtcDateTime,
server_default=sa.func.now()),
sa.Column('af_utcnow', sa_utc.UtcDateTime,
default=af_utcnow),
# mysql version of sql_utcnow not compatible with server_default
sa.Column('sql_utcnow', sa_utc.UtcDateTime,
default=sql_utcnow()),
)
table.create(engine)
return table
def test_suite(engine):
"""Run a handful of checks on the given engine."""
table = get_test_table(engine)
now = dt.datetime.now(UTC)
with engine.begin() as conn:
print('Insert row 1')
conn.execute(table.insert(), [{'id': 1}])
print('Sleep')
time.sleep(1)
print('Insert row 2')
conn.execute(table.insert(), [{'id': 2}])
with engine.begin() as conn2:
print('Results')
id_to_row = {r['id']: r for r in conn2.execute(sa.select([table]))}
print('n: ' + str(now))
print('default=func.now()')
print('1: ' + str(id_to_row[1]['func_now'].astimezone(UTC)))
print('2: ' + str(id_to_row[2]['func_now'].astimezone(UTC)))
print('server_default=func.now()')
print('1: ' + str(id_to_row[1]['server_func_now'].astimezone(UTC)))
print('2: ' + str(id_to_row[2]['server_func_now'].astimezone(UTC)))
print('default=timezone.utcnow')
print('1: ' + str(id_to_row[1]['af_utcnow'].astimezone(UTC)))
print('2: ' + str(id_to_row[2]['af_utcnow'].astimezone(UTC)))
print('default=sql_utcnow()')
print('1: ' + str(id_to_row[1]['sql_utcnow'].astimezone(UTC)))
print('2: ' + str(id_to_row[2]['sql_utcnow'].astimezone(UTC)))
print()
print('Testing against PostgreSQL')
with testing.postgresql.Postgresql() as pg:
pg_eng = sa.create_engine(pg.url())
test_suite(pg_eng)
print('Testing against MySQL')
with testing.mysqld.Mysqld() as my:
my_eng = sa.create_engine(my.url())
test_suite(my_eng)
print('Testing against SQLite')
lite_eng = sa.create_engine('sqlite://')
test_suite(lite_eng)
{code}
Running this on my system, I get:
{noformat}
Testing against PostgreSQL
Insert row 1
Sleep
Insert row 2
Results
n: 2017-12-22 23:06:46.802493+00:00
default=func.now()
1: 2017-12-22 23:06:46.803103+00:00
2: 2017-12-22 23:06:46.803103+00:00
server_default=func.now()
1: 2017-12-22 23:06:46.803103+00:00
2: 2017-12-22 23:06:46.803103+00:00
default=timezone.utcnow
1: 2017-12-22 23:06:46.802980+00:00
2: 2017-12-22 23:06:47.806383+00:00
default=sql_utcnow()
1: 2017-12-22 23:06:46.803103+00:00
2: 2017-12-22 23:06:46.803103+00:00
Testing against MySQL
Insert row 1
Sleep
Insert row 2
Results
n: 2017-12-22 23:06:52.300311+00:00
default=func.now()
1: 2017-12-22 15:06:52+00:00
2: 2017-12-22 15:06:53+00:00
server_default=func.now()
1: 2017-12-22 15:06:52+00:00
2: 2017-12-22 15:06:53+00:00
default=timezone.utcnow
1: 2017-12-22 23:06:52+00:00
2: 2017-12-22 23:06:53+00:00
default=sql_utcnow()
1: 2017-12-22 23:06:52+00:00
2: 2017-12-22 23:06:53+00:00
Testing against SQLite
Insert row 1
Sleep
Insert row 2
Results
n: 2017-12-22 23:06:56.692794+00:00
default=func.now()
1: 2017-12-22 23:06:56+00:00
2: 2017-12-22 23:06:57+00:00
server_default=func.now()
1: 2017-12-22 23:06:56+00:00
2: 2017-12-22 23:06:57+00:00
default=timezone.utcnow
1: 2017-12-22 23:06:56.693171+00:00
2: 2017-12-22 23:06:57.694473+00:00
default=sql_utcnow()
1: 2017-12-22 23:06:56+00:00
2: 2017-12-22 23:06:57+00:00
{noformat}
Personally, I'm inclined toward the compile our own utcnow expression approach
because it gives us cross-dialect UTC timestamps and calculates timestamps on
the server.
> start_date and execution_date should default to timezone.utcnow() not to
> func.now()
> -----------------------------------------------------------------------------------
>
> Key: AIRFLOW-1930
> URL: https://issues.apache.org/jira/browse/AIRFLOW-1930
> Project: Apache Airflow
> Issue Type: Bug
> Affects Versions: 1.9.0, 1.8.2
> Reporter: Bolke de Bruin
> Assignee: Bolke de Bruin
> Fix For: 1.9.1
>
>
> func.now() defaults to the time zone of the database, while we assume every
> date in the db is UTC.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)