yiqijiu opened a new pull request, #31998:
URL: https://github.com/apache/airflow/pull/31998

   In my production environment, the airflow application and the database are 
located on different networks. Due to the usage of the nullpool pool from 
SQLAlchemy in the celery's DatabaseBackend, network fluctuations occur, which 
can lead to the termination of the scheduler process.I have added retry logic 
based on the provided retry code from celery's DatabaseBackend.
   Although my version is 2.2.3, I have analyzed it and found that it is also 
suitable for the current version.
   Below is the error message:
   
-------------------------------------------------------------------------------------------------------------------
   The above exception was the direct cause of the following exception: 
Traceback (most recent call last):
   File"/app/airflow2.2.3/airflow/airflow/jobs/schedulerjob.py”, line 672, in 
_execute self._run_scheduler_loop()
   File"/app/airflow2.2.3/airflow/airflow/jobs/scheduler_job.py", line 754, in 
_run_scheduler_loop self.executor.heartbeat()
   File"/app/airflow2.2.3/airflow/airflow/executors/base_executor.py”, line 
168, in heartbeat self.sync()
   File"/app/airflow2.2.3/airflow/airflow/executors/celery_executorpy”, line 
330, in sync self.update_all_task_states()
   File"/app/airflow223/airflow/airflow/executors/celery_executor.py”,line 
442,in update_all_task_states 
state_and_info_by_celery_task_id=self.bulk_state_fetcher.get_many(self.tasks. 
values()) 
File"/app/airflow2.2.3/airflow/airflow/executors/celery_executorpy”,line 598, 
in get_many result = self._get many_from db backend(async_results)
   File"/app/airflow2.2.3/airflow/airflow/executors/celery_executor.py”,line 
618, in _get_many_from_db_backend 
tasks-session.query(task_cls).filter(task_cls.task_id.in_(task_ids)).all()
   
File“/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/orm/query.py”,
 line 3373, in all return list(self)
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/orm/query.py”,
 line 3535, in _iter_ return self._execute_and_instances(context)
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/orm/query.py”,line
 3556, in _execute_and_instances conn =self._get bind args(
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/salalchemy/orm/query.py”,
 line 3571, in _get_bind_args return fn(
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/orm/query.py”,line
 3550, in _connection_from_session conn=self.session.connection(**kw)
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/orm/session.py”,
 line 1142, in connection return self._connection_for_bind(
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/orm/session.py”,line
 1150, in _connection_for_bind return self.transaction.connection_for bind(
   
File“/app/airflow2.2.3/airflow2_env/Iib/python3.8/site-packages/sqlalchemy/orm/session.py”,
 line 433, in _connection_for_bind conn=bind._contextual_connect()
   
File“/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/salalchemy/engine/base.py”,line
 2302, in _contextual_connect self._wrap_pool_connect(self.pool.connect,None),
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/engine/base.py”,
 line 2339, in _wrap_pool_connect
   Tracking     Connection.handle dbapi_exception_noconnection( 
   File 
"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/engine/base.py”,
 line 1583,in _handle_dbapi_exception_noconnection util.raise (
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/util/compat.py”,
 line 182, in raise_
   ents raise exception 
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/salalchemy/engine/base.py”,
 line 2336, in _wrap_pool_connect    
   return fn()
   
---------------------------------------------------------------------------------------------------
   2023-06-05 16:39:05.069 ERROR -Exception when executing SchedulerJob. run 
scheduler loop Traceback (most recent call last):
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/engine/base.py”,
 line 2336,in _wrap_pool_connect return fno
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/pool/base.py”,
 line 364, in connect returnConnectionFairy.checkout(self)
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/pool/base.py”,
 line 778, in _checkout fairy=ConnectionRecordcheckout(pool)
   
File"/app/airflow2.2.3/airflow2_env/lib/python3.8/site-packages/sqlalchemy/pool/base.py”,
 line 495, in checkout rec=pool. do_get()
   
File“/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/pool/impl.py”,
 line 241, in _do_get return self._createconnection()
   
File"/app/airflow2.2.3/airflow2_env/lib/python3.8/site-packages/salalchemy/pool/base.py”,
 line 309, in _create_connection return _ConnectionRecord(self)
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/sitepackages/sqlalchemy/pool/base.py”,
 line 440, in _init_ self. connect(firstconnectcheck=True)
   
File"/app/airflow2.2.3/airflow2_env/lib/python3.8/site-packages/sqlalchemy/pool/base.py”,
 line 661, in _connect pool.logger.debug"Error onconnect(:%s",e)
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/util/langhelpers.py”,
 line 68, in _exit_ compat.raise_(
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/salalchemy/util/compat.py",
 line 182, in raise raise exception
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/pool/base.py”,
 line 656, in _connect connection =pool._invoke_creator(sel f)
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/engine/strategies.py”,
 line 114, in connect return dialect.connect(*cargs, **cparans)
   
File"/app/airflow2.2.3/airflow2_env/1ib/python3.8/site-packages/sqlalchemy/engine/default.py”,line
 508, in connect return self.dbapi.connect(*cargs, **cparams)
   
File"/app/airflow2.2.3/airflow2_env/lib/python3.8/site-packages/psycopg2/_init_.py”,
 line 126, in connect conn=connect(dsn,connection_factory=connection_factory, 
**kwasync) psycopg2.0perationalError: could not connect to server: Connection 
timed out
   Is the server running on host"xxxxxxxxxx”and accepting TCP/IP connections on 
port 5432?
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to