bparhy opened a new issue, #30915:
URL: https://github.com/apache/airflow/issues/30915

   ### Official Helm Chart version
   
   1.6.0
   
   ### Apache Airflow version
   
   2.2.5
   
   ### Kubernetes Version
   
   1.20
   
   ### Helm Chart configuration
   
   ```
   $ airflow config list
   [api]
   enable_experimental_api = True
   auth_backend = airflow.api.auth.backend.default
   maximum_page_limit = 100
   fallback_page_limit = 100
   google_oauth2_audience =
   google_key_path =
   access_control_allow_headers =
   access_control_allow_methods =
   access_control_allow_origins =
   
   [cli]
   api_client = airflow.api.client.json_client
   endpoint_url = https://airflow.crunchanalytics.cloud
   
   [core]
   dags_folder = /usr/local/airflow/dags
   hostname_callable = socket.getfqdn
   default_timezone = utc
   executor = KubernetesExecutor
   sql_alchemy_conn = 
postgresql://postgres:postgres@abcd:5432/postgres?sslmode=disable
   sql_engine_encoding = utf-8
   sql_alchemy_pool_enabled = True
   sql_alchemy_pool_size = 5
   sql_alchemy_max_overflow = 10
   sql_alchemy_pool_recycle = 1800
   sql_alchemy_pool_pre_ping = True
   sql_alchemy_schema =
   parallelism = 32
   max_active_tasks_per_dag = 16
   dags_are_paused_at_creation = True
   max_active_runs_per_dag = 16
   load_examples = False
   load_default_connections = True
   plugins_folder = /usr/local/airflow/dags/plugins
   execute_tasks_new_python_interpreter = False
   fernet_key = abcdddddd
   donot_pickle = True
   dagbag_import_timeout = 30
   dagbag_import_error_tracebacks = True
   dagbag_import_error_traceback_depth = 2
   dag_file_processor_timeout = 50
   task_runner = StandardTaskRunner
   default_impersonation =
   security =
   unit_test_mode = False
   enable_xcom_pickling = True
   killed_task_cleanup_time = 60
   dag_run_conf_overrides_params = True
   dag_discovery_safe_mode = True
   default_task_retries = 0
   default_task_weight_rule = downstream
   min_serialized_dag_update_interval = 600
   min_serialized_dag_fetch_interval = 300
   max_num_rendered_ti_fields_per_task = 30
   check_slas = True
   xcom_backend = airflow.models.xcom.BaseXCom
   lazy_load_plugins = True
   lazy_discover_providers = True
   max_db_retries = 3
   hide_sensitive_var_conn_fields = True
   sensitive_var_conn_names =
   default_pool_task_slot_count = 128
   colored_console_log = False
   remote_logging = False
   
   [email]
   email_backend = airflow.utils.email.send_email_smtp
   email_conn_id = smtp_default
   default_email_on_retry = True
   default_email_on_failure = True
   
   [kubernetes]
   pod_template_file = /usr/local/airflow/pod_templates/pod_template_file.yaml
   worker_container_repository = abcd
   worker_container_tag = 2.2.5
   namespace = abc
   delete_worker_pods = True
   delete_worker_pods_on_failure = False
   worker_pods_creation_batch_size = 1
   multi_namespace_mode = False
   in_cluster = True
   kube_client_request_args =
   delete_option_kwargs =
   enable_tcp_keepalive = True
   tcp_keep_idle = 120
   tcp_keep_intvl = 30
   tcp_keep_cnt = 6
   verify_ssl = True
   worker_pods_pending_timeout = 300
   worker_pods_pending_timeout_check_interval = 120
   worker_pods_queued_check_interval = 60
   worker_pods_pending_timeout_batch_size = 100
   airflow_configmap = xyz-airflow-config
   airflow_local_settings_configmap = xyz-airflow-config
   
   
   [logging]
   base_log_folder = /usr/local/airflow/logs
   remote_logging = False
   remote_log_conn_id =
   google_key_path =
   remote_base_log_folder =
   encrypt_s3_logs = False
   logging_level = INFO
   fab_logging_level = WARNING
   logging_config_class =
   colored_console_log = False
   colored_log_format = [%(blue)s%(asctime)s%(reset)s] 
{%(blue)s%(filename)s:%(reset)s%(lineno)d} %(log_color)s%(levelname)s%(reset)s 
- %(log_color)s%(message)s%(reset)s
   colored_formatter_class = 
airflow.utils.log.colored_log.CustomTTYColoredFormatter
   log_format = [%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - 
%(message)s
   simple_log_format = %(asctime)s %(levelname)s - %(message)s
   task_log_prefix_template =
   log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ 
try_number }}.log
   log_processor_filename_template = {{ filename }}.log
   dag_processor_manager_log_location = 
/usr/local/airflow/logs/dag_processor_manager/dag_processor_manager.log
   task_log_reader = task
   extra_logger_names =
   worker_log_server_port = 8793
   
   [scheduler]
   job_heartbeat_sec = 5
   scheduler_heartbeat_sec = 5
   num_runs = -1
   scheduler_idle_sleep_time = 1
   min_file_process_interval = 600
   deactivate_stale_dags_interval = 60
   dag_dir_list_interval = 20
   print_stats_interval = 30
   pool_metrics_interval = 5.0
   scheduler_health_check_threshold = 30
   orphaned_tasks_check_interval = 300.0
   child_process_log_directory = /usr/local/airflow/logs/scheduler
   scheduler_zombie_task_threshold = 300
   catchup_by_default = True
   max_tis_per_query = 512
   use_row_level_locking = True
   max_dagruns_to_create_per_loop = 10
   max_dagruns_per_loop_to_schedule = 20
   schedule_after_task_execution = False
   parsing_processes = 4
   file_parsing_sort_mode = modified_time
   use_job_schedule = True
   allow_trigger_in_future = False
   dependency_detector = 
airflow.serialization.serialized_objects.DependencyDetector
   trigger_timeout_check_interval = 15
   
   [smtp]
   smtp_host = xyz.net
   smtp_starttls = False
   smtp_ssl = False
   smtp_port = 25
   smtp_mail_from = [email protected]
   smtp_timeout = 30
   smtp_retry_limit = 5
   
   [webserver]
   base_url = http://localhost:8080
   default_ui_timezone = UTC
   web_server_host = 0.0.0.0
   web_server_port = 8080
   web_server_ssl_cert =
   web_server_ssl_key =
   session_backend = database
   web_server_master_timeout = 120
   web_server_worker_timeout = 120
   worker_refresh_batch_size = 1
   worker_refresh_interval = 6000
   reload_on_plugin_change = False
   secret_key = abcd
   workers = 4
   worker_class = sync
   access_logfile = -
   error_logfile = -
   access_logformat =
   expose_config = False
   expose_hostname = True
   expose_stacktrace = True
   dag_default_view = tree
   dag_orientation = LR
   log_fetch_timeout_sec = 5
   log_fetch_delay_sec = 2
   log_auto_tailing_offset = 30
   log_animation_speed = 1000
   hide_paused_dags_by_default = False
   page_size = 100
   navbar_color = #fff
   default_dag_run_display_number = 25
   enable_proxy_fix = False
   proxy_fix_x_for = 1
   proxy_fix_x_proto = 1
   proxy_fix_x_host = 1
   proxy_fix_x_port = 1
   proxy_fix_x_prefix = 1
   cookie_secure = False
   cookie_samesite = Lax
   default_wrap = False
   x_frame_enabled = True
   show_recent_stats_for_completed_runs = True
   update_fab_perms = True
   session_lifetime_minutes = 43200
   auto_refresh_interval = 3
   auth_backend = airflow.contrib.auth.backends.ldap_auth
   rbac = True
   ```
   
   ### Docker Image customizations
   
   _No response_
   
   ### What happened
   
   We have migrated to official helm chart version 1.6 and we are noticing once 
the webserver comes up the scheduler is taking significant time( 3 to 4 minutes 
to parse all the dags and throwing a mysterious ModuleNotFoundError error. 
   
   Broken DAG: 
[/usr/local/airflow/dags/business/source/abcd/abcd_xyz_forcast_dly_dags.py] 
Traceback (most recent call last):
     File "<frozen importlib._bootstrap>", line 219, in 
_call_with_frames_removed
     File 
"/usr/local/airflow/dags/business/source/abcd/abcd_xyz_forcast_dly_dags.py", 
line 5, in <module>
       from xy_module import (
   ModuleNotFoundError: No module named 'xy_module'
   
   We have a local module which is single large file and we see the above error 
initially and subsequently the error goes away and once the scheduler runs 
multiple iterations over the dags.
   
   We have around 1500 dags and we have another instance with 300 dags and we 
are seeing the same issue there.
   
   We have added the module to common_package and added it to .airflowignore 
but that is not helping as well.
   
   ### What you think should happen instead
   
   The current unofficial helm chart version of the deployment does not have 
the issue. As soon as the scheduler and webserver comes up we are seeing 
scheduler has already parsed all the dags and they are ready to be used.
   
   We want the same exact behavior using official helm chart. 
   
   We are using the same exact version for both the official and unofficial 
helm chart so behavior should be same.
   
   One other thing that is different is the posgres version 
   
    Postgres for the unofficial version : 9.6.2
   Postgres for official helm chart: 11.12
   
   ### How to reproduce
   
   Using the official helm chart with k8s.
   
   ### Anything else
   
   This occurs every time we create a brand new instance. When I postgres 
instance gets created it looks like scheduler starts parsing the dags we put 
into the /usr/local/airflow/dags volume and starts giving the import error and 
after 2 to 3 minutes when its done parsing those dags the error goes away.
   
   Then all the dags show up.
   
   Next deployment of new dags happen without error.
   
   ### Are you willing to submit PR?
   
   - [X] Yes I am willing to submit a PR!
   
   ### Code of Conduct
   
   - [X] I agree to follow this project's [Code of 
Conduct](https://github.com/apache/airflow/blob/main/CODE_OF_CONDUCT.md)
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to