aki263 commented on issue #36998:
URL: https://github.com/apache/airflow/issues/36998#issuecomment-1925613030
I am experiencing similar problem in 2.7.3
`[2024-02-04T07:19:20.201+0000] {scheduler_job_runner.py:1081} DEBUG -
Executor full, skipping critical section
[2024-02-04T07:19:20.203+0000] {base_executor.py:217} DEBUG - 32 running
task instances
[2024-02-04T07:19:20.203+0000] {base_executor.py:218} DEBUG - 0 in queue
[2024-02-04T07:19:20.203+0000] {base_executor.py:219} DEBUG - 0 open slots`
Running two scheduler replica and getting a lot of msgs like `scheduling was
skipped, probably because the DAG record was locked`
I manually killed one of the scheduler pod and it helped to evaluate the
issue. My scheduler were not restarted from last 5 days.
```
airflow-scheduler-686459bbff-g68xj
2/2 Running 0 19m
airflow-scheduler-686459bbff-gv9vc
2/2 Running 0 5d11h
```
Airflow config
```
[core]
dags_folder = /opt/airflow/dags
hostname_callable = airflow.utils.net.getfqdn
might_contain_dag_callable =
airflow.utils.file.might_contain_dag_via_default_heuristic
default_timezone = utc
executor = KubernetesExecutor
auth_manager = airflow.auth.managers.fab.fab_auth_manager.FabAuthManager
parallelism = 32
max_active_tasks_per_dag = 16
dags_are_paused_at_creation = True
max_active_runs_per_dag = 16
# mp_start_method =
load_examples = false
plugins_folder = /opt/airflow/plugins
execute_tasks_new_python_interpreter = False
fernet_key = xxxxxxx=
donot_pickle = True
dagbag_import_timeout = 30.0
dagbag_import_error_tracebacks = True
dagbag_import_error_traceback_depth = 2
dag_file_processor_timeout = 50
task_runner = StandardTaskRunner
default_impersonation =
security =
unit_test_mode = False
enable_xcom_pickling = False
allowed_deserialization_classes = airflow\..*
killed_task_cleanup_time = 60
dag_run_conf_overrides_params = True
dag_discovery_safe_mode = True
dag_ignore_file_syntax = regexp
default_task_retries = 0
default_task_retry_delay = 300
max_task_retry_delay = 86400
default_task_weight_rule = downstream
default_task_execution_timeout =
min_serialized_dag_update_interval = 30
compress_serialized_dags = False
min_serialized_dag_fetch_interval = 10
max_num_rendered_ti_fields_per_task = 30
check_slas = True
xcom_backend = airflow.models.xcom.BaseXCom
lazy_load_plugins = True
lazy_discover_providers = True
hide_sensitive_var_conn_fields = True
sensitive_var_conn_names =
default_pool_task_slot_count = 512
max_map_length = 1024
daemon_umask = 0o077
# dataset_manager_class =
# dataset_manager_kwargs =
database_access_isolation = False
# internal_api_url =
test_connection = Disabled
colored_console_log = True
remote_logging = True
[database]
alembic_ini_file_path = alembic.ini
sql_alchemy_conn = postgresql+psycopg2://.....
# sql_alchemy_engine_args =
sql_engine_encoding = utf-8
# sql_engine_collation_for_ids =
sql_alchemy_pool_enabled = True
sql_alchemy_pool_size = 5
sql_alchemy_max_overflow = 10
sql_alchemy_pool_recycle = 1800
sql_alchemy_pool_pre_ping = True
sql_alchemy_schema =
# sql_alchemy_connect_args =
load_default_connections = True
max_db_retries = 3
check_migrations = True
[logging]
base_log_folder = /opt/airflow/logs
remote_logging = True
remote_log_conn_id = aws_conn_s3
delete_local_logs = False
google_key_path =
remote_base_log_folder = s3://s3-paas/airflow-prod-logs/
remote_task_handler_kwargs =
encrypt_s3_logs = False
logging_level = DEBUG
celery_logging_level =
fab_logging_level = WARNING
logging_config_class =
colored_console_log = True
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s]
{%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d}
%%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
colored_formatter_class =
airflow.utils.log.colored_log.CustomTTYColoredFormatter
log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s -
%%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
dag_processor_log_target = file
dag_processor_log_format = [%%(asctime)s] [SOURCE:DAG_PROCESSOR]
{%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s
log_formatter_class = airflow.utils.log.timezone_aware.TimezoneAware
secret_mask_adapter =
task_log_prefix_template =
log_filename_template = dag_id={{ ti.dag_id }}/run_id={{ ti.run_id
}}/task_id={{ ti.task_id }}/{%% if ti.map_index >= 0 %%}map_index={{
ti.map_index }}/{%% endif %%}attempt={{ try_number }}.log
log_processor_filename_template = {{ filename }}.log
dag_processor_manager_log_location =
/opt/airflow/logs/dag_processor_manager/dag_processor_manager.log
task_log_reader = task
extra_logger_names =
worker_log_server_port = 8793
trigger_log_server_port = 8794
# interleave_timestamp_parser =
file_task_handler_new_folder_permissions = 0o775
file_task_handler_new_file_permissions = 0o664
celery_stdout_stderr_separation = False
delete_worker_pods = False
[metrics]
metrics_allow_list =
metrics_block_list =
statsd_on = True
statsd_host = airflow-statsd
statsd_port = 9125
statsd_prefix = airflow
stat_name_handler =
statsd_datadog_enabled = False
statsd_datadog_tags =
statsd_datadog_metrics_tags = True
# statsd_custom_client_path =
statsd_disabled_tags = job_id,run_id
statsd_influxdb_enabled = False
otel_on = False
otel_host = localhost
otel_port = 8889
otel_prefix = airflow
otel_interval_milliseconds = 60000
otel_debugging_on = False
otel_ssl_active = False
[secrets]
backend =
backend_kwargs =
use_cache = False
cache_ttl_seconds = 900
[cli]
api_client = airflow.api.client.local_client
endpoint_url = http://localhost:8080
[debug]
fail_fast = False
[api]
enable_experimental_api = False
auth_backends =
airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session
maximum_page_limit = 100
fallback_page_limit = 100
google_oauth2_audience =
google_key_path =
access_control_allow_headers =
access_control_allow_methods =
access_control_allow_origins =
enable_xcom_deserialize_support = False
[lineage]
backend =
[operators]
default_owner = airflow
default_deferrable = false
default_cpus = 1
default_ram = 512
default_disk = 512
default_gpus = 0
default_queue = default
allow_illegal_arguments = False
[webserver]
access_denied_message = Access is Denied
config_file = /opt/airflow/webserver_config.py
base_url = http://localhost:8080
default_ui_timezone = UTC
web_server_host = 0.0.0.0
web_server_port = 8080
web_server_ssl_cert =
web_server_ssl_key =
session_backend = database
web_server_master_timeout = 120
web_server_worker_timeout = 120
worker_refresh_batch_size = 1
worker_refresh_interval = 6000
reload_on_plugin_change = False
secret_key =xxxxxxxx
workers = 4
worker_class = sync
access_logfile = -
error_logfile = -
access_logformat =
expose_config = False
expose_hostname = False
expose_stacktrace = False
dag_default_view = grid
dag_orientation = LR
grid_view_sorting_order = topological
log_fetch_timeout_sec = 5
log_fetch_delay_sec = 2
log_auto_tailing_offset = 30
log_animation_speed = 1000
hide_paused_dags_by_default = False
page_size = 100
navbar_color = #fff
default_dag_run_display_number = 25
enable_proxy_fix = True
proxy_fix_x_for = 1
proxy_fix_x_proto = 1
proxy_fix_x_host = 1
proxy_fix_x_port = 1
proxy_fix_x_prefix = 1
cookie_secure = False
cookie_samesite = Lax
default_wrap = False
x_frame_enabled = True
# analytics_tool =
# analytics_id =
show_recent_stats_for_completed_runs = True
update_fab_perms = True
session_lifetime_minutes = 43200
# instance_name =
instance_name_has_markup = False
auto_refresh_interval = 3
warn_deployment_exposure = True
audit_view_excluded_events =
gantt,landing_times,tries,duration,calendar,graph,grid,tree,tree_data
# audit_view_included_events =
enable_swagger_ui = True
run_internal_api = False
auth_rate_limited = True
auth_rate_limit = 5 per 40 second
caching_hash_method = md5
show_trigger_form_if_no_params = False
rbac = True
[email]
email_backend = airflow.utils.email.send_email_smtp
email_conn_id = smtp_default
default_email_on_retry = True
default_email_on_failure = True
# subject_template =
# html_content_template =
# from_email =
ssl_context = default
[smtp]
smtp_host = smtp.gmail.com
smtp_starttls = true
smtp_ssl = true
smtp_user = [email protected]
smtp_password = [email protected]
smtp_port = 587
smtp_mail_from = [email protected]
smtp_timeout = 30
smtp_retry_limit = 5
[sentry]
sentry_on = false
sentry_dsn =
# before_send =
[scheduler]
job_heartbeat_sec = 5
scheduler_heartbeat_sec = 5
local_task_job_heartbeat_sec = 0
num_runs = -1
scheduler_idle_sleep_time = 1
min_file_process_interval = 30
parsing_cleanup_interval = 60
stale_dag_threshold = 50
dag_dir_list_interval = 30
print_stats_interval = 30
pool_metrics_interval = 5.0
scheduler_health_check_threshold = 30
enable_health_check = False
scheduler_health_check_server_port = 8974
orphaned_tasks_check_interval = 300.0
child_process_log_directory = /opt/airflow/logs/scheduler
scheduler_zombie_task_threshold = 300
zombie_detection_interval = 10.0
catchup_by_default = True
ignore_first_depends_on_past_by_default = True
max_tis_per_query = 16
use_row_level_locking = True
max_dagruns_to_create_per_loop = 10
max_dagruns_per_loop_to_schedule = 20
schedule_after_task_execution = True
parsing_pre_import_modules = True
parsing_processes = 2
file_parsing_sort_mode = modified_time
standalone_dag_processor = False
max_callbacks_per_loop = 20
dag_stale_not_seen_duration = 600
use_job_schedule = True
allow_trigger_in_future = False
trigger_timeout_check_interval = 15
task_queued_timeout = 600.0
task_queued_timeout_check_interval = 120.0
allowed_run_id_pattern = ^[A-Za-z0-9_.~:+-]+$
logging_level = DEBUG
run_duration = 41460
statsd_host = airflow-statsd
statsd_on = True
statsd_port = 9125
statsd_prefix = airflow
[triggerer]
default_capacity = 1000
job_heartbeat_sec = 5
triggerer_health_check_threshold = 30
[sensors]
default_timeout = 604800
[aws]
# session_factory =
cloudwatch_task_handler_json_serializer =
airflow.providers.amazon.aws.log.cloudwatch_task_handler.json_serialize_legacy
[aws_ecs_executor]
conn_id = aws_default
# region_name =
assign_public_ip = False
# cluster =
# container_name =
launch_type = FARGATE
platform_version = LATEST
# security_groups =
# subnets =
# task_definition =
max_run_task_attempts = 3
# run_task_kwargs =
[celery_kubernetes_executor]
kubernetes_queue = kubernetes
[celery]
celery_app_name = airflow.providers.celery.executors.celery_executor
worker_concurrency = 16
# worker_autoscale =
worker_prefetch_multiplier = 1
worker_enable_remote_control = true
broker_url = redis://redis:6379/0
# result_backend =
result_backend_sqlalchemy_engine_options =
flower_host = 0.0.0.0
flower_url_prefix = /
flower_port = 5555
flower_basic_auth =
sync_parallelism = 0
celery_config_options =
airflow.providers.celery.executors.default_celery.DEFAULT_CELERY_CONFIG
ssl_active = False
ssl_key =
ssl_cert =
ssl_cacert =
pool = prefork
operation_timeout = 1.0
task_track_started = True
task_publish_max_retries = 3
worker_precheck = False
[celery_broker_transport_options]
# visibility_timeout =
# sentinel_kwargs =
[local_kubernetes_executor]
kubernetes_queue = kubernetes
[kubernetes_executor]
api_client_retry_configuration =
logs_task_metadata = False
pod_template_file = /opt/airflow/pod_templates/pod_template_file.yaml
worker_container_repository = xyz.com/airflow-prod
worker_container_tag = a6a136ee
namespace = airflow
delete_worker_pods = True
delete_worker_pods_on_failure = False
worker_pods_creation_batch_size = 1
multi_namespace_mode = False
multi_namespace_mode_namespace_list =
in_cluster = True
# cluster_context =
# config_file =
kube_client_request_args =
delete_option_kwargs =
enable_tcp_keepalive = True
tcp_keep_idle = 120
tcp_keep_intvl = 30
tcp_keep_cnt = 6
verify_ssl = True
worker_pods_queued_check_interval = 60
ssl_ca_cert =
[dask]
cluster_address = 127.0.0.1:8786
tls_ca =
tls_cert =
tls_key =
[azure_remote_logging]
remote_wasb_log_container = airflow-logs
[kubernetes]
airflow_configmap = airflow-airflow-config
airflow_local_settings_configmap = airflow-airflow-config
multi_namespace_mode = False
namespace = airflow
pod_template_file = /opt/airflow/pod_templates/pod_template_file.yaml
worker_container_repository = xyz.com/airflow-prod
worker_container_tag = a6a136ee
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]