[
https://issues.apache.org/jira/browse/AIRFLOW-4134?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Daniel Standish updated AIRFLOW-4134:
-------------------------------------
Description:
I am finding with 1.10.2 that I seem to get a warning {{DB connection
invalidated. Reconnecting...}} very frequently.
I to try to diagnose I added logging of the triggering error on line 79 in
airflow/utils/sqlalchemy.py, from which this warning is generated.
Call stack:
{code}
webserver_1 | Call stack:
webserver_1 | File "/usr/local/bin/airflow", line 32, in <module>
webserver_1 | args.func(args)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/cli.py", line 74, in
wrapper
webserver_1 | return f(*args, **kwargs)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/bin/cli.py", line 992, in
scheduler
webserver_1 | job.run()
webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/jobs.py",
line 205, in run
webserver_1 | self._execute()
webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/jobs.py",
line 1532, in _execute
webserver_1 | self._execute_helper()
webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/jobs.py",
line 1562, in _execute_helper
webserver_1 | self.processor_agent.start()
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line
511, in start
webserver_1 | self._async_mode)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line
565, in _launch_process
webserver_1 | p.start()
webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py",
line 105, in start
webserver_1 | self._popen = self._Popen(self)
webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/context.py",
line 223, in _Popen
webserver_1 | return
_default_context.get_context().Process._Popen(process_obj)
webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/context.py",
line 277, in _Popen
webserver_1 | return Popen(process_obj)
webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/popen_fork.py",
line 19, in __init__
webserver_1 | self._launch(process_obj)
webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/popen_fork.py",
line 73, in _launch
webserver_1 | code = process_obj._bootstrap()
webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py",
line 258, in _bootstrap
webserver_1 | self.run()
webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py",
line 93, in run
webserver_1 | self._target(*self._args, **self._kwargs)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line
560, in helper
webserver_1 | processor_manager.start()
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line
797, in start
webserver_1 | self.start_in_async()
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line
820, in start_in_async
webserver_1 | simple_dags = self.heartbeat()
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line
1190, in heartbeat
webserver_1 | zombies = self._find_zombies()
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/db.py", line 73, in
wrapper
webserver_1 | return func(*args, **kwargs)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line
1236, in _find_zombies
webserver_1 | LJ.latest_heartbeat < limit_dttm,
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 2925, in
all
webserver_1 | return list(self)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3081, in
__iter__
webserver_1 | return self._execute_and_instances(context)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3103, in
_execute_and_instances
webserver_1 | querycontext, self._connection_from_session,
close_with_result=True
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3111, in
_get_bind_args
webserver_1 | mapper=self._bind_mapper(), clause=querycontext.statement,
**kw
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3096, in
_connection_from_session
webserver_1 | conn = self.session.connection(**kw)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line 1120,
in connection
webserver_1 | execution_options=execution_options,
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line 1126,
in _connection_for_bind
webserver_1 | engine, execution_options
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line 424,
in _connection_for_bind
webserver_1 | conn = bind.contextual_connect()
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 2194,
in contextual_connect
webserver_1 | **kwargs
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 125,
in __init__
webserver_1 | self.dispatch.engine_connect(self, self.__branch)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/event/attr.py", line 297, in
__call__
webserver_1 | fn(*args, **kw)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/sqlalchemy.py", line 79,
in ping_connection
webserver_1 | log.warning("DB connection invalidated. Reconnecting...",
err)
webserver_1 | Message: 'DB connection invalidated. Reconnecting...'
webserver_1 | Arguments: (OperationalError('(psycopg2.OperationalError) server
closed the connection unexpectedly\n\tThis probably means the server terminated
abnormally\n\tbefore or while processing the request.\n',),)
{code}
Traceback:
{code}
webserver_1 | Traceback (most recent call last):
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/sqlalchemy.py", line 68,
in ping_connection
webserver_1 | connection.scalar(select([1]))
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 912,
in scalar
webserver_1 | return self.execute(object_, *multiparams, **params).scalar()
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 980,
in execute
webserver_1 | return meth(self, multiparams, params)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/sql/elements.py", line 273,
in _execute_on_connection
webserver_1 | return connection._execute_clauseelement(self, multiparams,
params)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1099,
in _execute_clauseelement
webserver_1 | distilled_params,
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1240,
in _execute_context
webserver_1 | e, statement, parameters, cursor, context
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1458,
in _handle_dbapi_exception
webserver_1 | util.raise_from_cause(sqlalchemy_exception, exc_info)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 296,
in raise_from_cause
webserver_1 | reraise(type(exception), exception, tb=exc_tb, cause=cause)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 276,
in reraise
webserver_1 | raise value.with_traceback(tb)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1236,
in _execute_context
webserver_1 | cursor, statement, parameters, context
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/default.py", line
536, in do_execute
webserver_1 | cursor.execute(statement, parameters)
webserver_1 | sqlalchemy.exc.OperationalError: (psycopg2.OperationalError)
server closed the connection unexpectedly
webserver_1 | This probably means the server terminated abnormally
webserver_1 | before or while processing the request.
webserver_1 | [SQL: 'SELECT 1'] (Background on this error at:
http://sqlalche.me/e/e3q8)
{code}
It has something to do with the configure_orm function in airflow/settings.py,
because that is the only usage of setup_event_handlers (from
airflow/utils/sqlalchemy.py).
And if I disable connection pooling, then the warning seems to go away.
Beyond that, I am not sure where to go from here. But something must be wrong.
was:
I am finding with 1.10.2 that I seem to get a warning {{DB connection
invalidated. Reconnecting...}} very frequently.
I to try to diagnose I added logging of the triggering error on line 79 in
airflow/utils/sqlalchemy.py, from which this warning is generated.
Here's the traceback:
{code}
webserver_1 | Traceback (most recent call last):
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/airflow/utils/sqlalchemy.py", line 68,
in ping_connection
webserver_1 | connection.scalar(select([1]))
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 912,
in scalar
webserver_1 | return self.execute(object_, *multiparams, **params).scalar()
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 980,
in execute
webserver_1 | return meth(self, multiparams, params)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/sql/elements.py", line 273,
in _execute_on_connection
webserver_1 | return connection._execute_clauseelement(self, multiparams,
params)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1099,
in _execute_clauseelement
webserver_1 | distilled_params,
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1240,
in _execute_context
webserver_1 | e, statement, parameters, cursor, context
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1458,
in _handle_dbapi_exception
webserver_1 | util.raise_from_cause(sqlalchemy_exception, exc_info)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 296,
in raise_from_cause
webserver_1 | reraise(type(exception), exception, tb=exc_tb, cause=cause)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 276,
in reraise
webserver_1 | raise value.with_traceback(tb)
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1236,
in _execute_context
webserver_1 | cursor, statement, parameters, context
webserver_1 | File
"/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/default.py", line
536, in do_execute
webserver_1 | cursor.execute(statement, parameters)
webserver_1 | sqlalchemy.exc.OperationalError: (psycopg2.OperationalError)
server closed the connection unexpectedly
webserver_1 | This probably means the server terminated abnormally
webserver_1 | before or while processing the request.
webserver_1 | [SQL: 'SELECT 1'] (Background on this error at:
http://sqlalche.me/e/e3q8)
{code}
It has something to do with the configure_orm function in airflow/settings.py,
because that is the only usage of setup_event_handlers (from
airflow/utils/sqlalchemy.py).
And if I disable connection pooling, then the warning seems to go away.
Beyond that, I am not sure where to go from here. But something must be wrong.
> "DB connection invalidated" warning at every zombie check
> ---------------------------------------------------------
>
> Key: AIRFLOW-4134
> URL: https://issues.apache.org/jira/browse/AIRFLOW-4134
> Project: Apache Airflow
> Issue Type: Bug
> Affects Versions: 1.10.2
> Reporter: Daniel Standish
> Priority: Major
>
> I am finding with 1.10.2 that I seem to get a warning {{DB connection
> invalidated. Reconnecting...}} very frequently.
> I to try to diagnose I added logging of the triggering error on line 79 in
> airflow/utils/sqlalchemy.py, from which this warning is generated.
> Call stack:
> {code}
> webserver_1 | Call stack:
> webserver_1 | File "/usr/local/bin/airflow", line 32, in <module>
> webserver_1 | args.func(args)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/cli.py", line 74, in
> wrapper
> webserver_1 | return f(*args, **kwargs)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/bin/cli.py", line 992, in
> scheduler
> webserver_1 | job.run()
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/jobs.py", line 205, in run
> webserver_1 | self._execute()
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/jobs.py", line 1532, in
> _execute
> webserver_1 | self._execute_helper()
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/jobs.py", line 1562, in
> _execute_helper
> webserver_1 | self.processor_agent.start()
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py",
> line 511, in start
> webserver_1 | self._async_mode)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py",
> line 565, in _launch_process
> webserver_1 | p.start()
> webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py",
> line 105, in start
> webserver_1 | self._popen = self._Popen(self)
> webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/context.py",
> line 223, in _Popen
> webserver_1 | return
> _default_context.get_context().Process._Popen(process_obj)
> webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/context.py",
> line 277, in _Popen
> webserver_1 | return Popen(process_obj)
> webserver_1 | File
> "/usr/local/lib/python3.6/multiprocessing/popen_fork.py", line 19, in __init__
> webserver_1 | self._launch(process_obj)
> webserver_1 | File
> "/usr/local/lib/python3.6/multiprocessing/popen_fork.py", line 73, in _launch
> webserver_1 | code = process_obj._bootstrap()
> webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py",
> line 258, in _bootstrap
> webserver_1 | self.run()
> webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py",
> line 93, in run
> webserver_1 | self._target(*self._args, **self._kwargs)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py",
> line 560, in helper
> webserver_1 | processor_manager.start()
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py",
> line 797, in start
> webserver_1 | self.start_in_async()
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py",
> line 820, in start_in_async
> webserver_1 | simple_dags = self.heartbeat()
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py",
> line 1190, in heartbeat
> webserver_1 | zombies = self._find_zombies()
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/db.py", line 73, in
> wrapper
> webserver_1 | return func(*args, **kwargs)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py",
> line 1236, in _find_zombies
> webserver_1 | LJ.latest_heartbeat < limit_dttm,
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 2925,
> in all
> webserver_1 | return list(self)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3081,
> in __iter__
> webserver_1 | return self._execute_and_instances(context)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3103,
> in _execute_and_instances
> webserver_1 | querycontext, self._connection_from_session,
> close_with_result=True
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3111,
> in _get_bind_args
> webserver_1 | mapper=self._bind_mapper(), clause=querycontext.statement,
> **kw
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3096,
> in _connection_from_session
> webserver_1 | conn = self.session.connection(**kw)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line
> 1120, in connection
> webserver_1 | execution_options=execution_options,
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line
> 1126, in _connection_for_bind
> webserver_1 | engine, execution_options
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line 424,
> in _connection_for_bind
> webserver_1 | conn = bind.contextual_connect()
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line
> 2194, in contextual_connect
> webserver_1 | **kwargs
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 125,
> in __init__
> webserver_1 | self.dispatch.engine_connect(self, self.__branch)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/event/attr.py", line 297,
> in __call__
> webserver_1 | fn(*args, **kw)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/sqlalchemy.py", line
> 79, in ping_connection
> webserver_1 | log.warning("DB connection invalidated. Reconnecting...",
> err)
> webserver_1 | Message: 'DB connection invalidated. Reconnecting...'
> webserver_1 | Arguments: (OperationalError('(psycopg2.OperationalError)
> server closed the connection unexpectedly\n\tThis probably means the server
> terminated abnormally\n\tbefore or while processing the request.\n',),)
> {code}
> Traceback:
> {code}
> webserver_1 | Traceback (most recent call last):
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/airflow/utils/sqlalchemy.py", line
> 68, in ping_connection
> webserver_1 | connection.scalar(select([1]))
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 912,
> in scalar
> webserver_1 | return self.execute(object_, *multiparams,
> **params).scalar()
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 980,
> in execute
> webserver_1 | return meth(self, multiparams, params)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/sql/elements.py", line
> 273, in _execute_on_connection
> webserver_1 | return connection._execute_clauseelement(self,
> multiparams, params)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line
> 1099, in _execute_clauseelement
> webserver_1 | distilled_params,
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line
> 1240, in _execute_context
> webserver_1 | e, statement, parameters, cursor, context
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line
> 1458, in _handle_dbapi_exception
> webserver_1 | util.raise_from_cause(sqlalchemy_exception, exc_info)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 296,
> in raise_from_cause
> webserver_1 | reraise(type(exception), exception, tb=exc_tb, cause=cause)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 276,
> in reraise
> webserver_1 | raise value.with_traceback(tb)
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line
> 1236, in _execute_context
> webserver_1 | cursor, statement, parameters, context
> webserver_1 | File
> "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/default.py", line
> 536, in do_execute
> webserver_1 | cursor.execute(statement, parameters)
> webserver_1 | sqlalchemy.exc.OperationalError: (psycopg2.OperationalError)
> server closed the connection unexpectedly
> webserver_1 | This probably means the server terminated abnormally
> webserver_1 | before or while processing the request.
> webserver_1 | [SQL: 'SELECT 1'] (Background on this error at:
> http://sqlalche.me/e/e3q8)
> {code}
> It has something to do with the configure_orm function in
> airflow/settings.py, because that is the only usage of setup_event_handlers
> (from airflow/utils/sqlalchemy.py).
> And if I disable connection pooling, then the warning seems to go away.
> Beyond that, I am not sure where to go from here. But something must be
> wrong.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)