This is an automated email from the ASF dual-hosted git repository.

ephraimanierobi pushed a commit to branch v2-4-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 4706f09699653383da68e58bbd666e9beda79aa7
Author: Jarek Potiuk <[email protected]>
AuthorDate: Tue Oct 25 10:38:40 2022 +0200

    Make RotatingFilehandler used in DagProcessor non-caching (#27223)
    
    The RotatingFileHandler is used when you enable it via
    `CONFIG_PROCESSOR_MANAGER_LOGGER=True` and it exhibits similar
    behaviour as the FileHandler had when it comes to caching
    the file on the Kernel level. While it is harmless (the cache
    will be freed when needed), it is also misleading for those who
    are trying to understand memory usage by Airlfow.
    
    The fix is to add a custom non-caching RotatingFileHandler similarly
    as in #18054.
    
    Note that it will require to manually modify local settings if
    the settings were created before this change.
    
    Fixes: #27065
    (cherry picked from commit 126b7b8a073f75096d24378ffd749ce166267826)
---
 airflow/config_templates/airflow_local_settings.py |  2 +-
 airflow/utils/log/non_caching_file_handler.py      | 47 ++++++++++++++++------
 newsfragments/27065.misc.rst                       |  1 +
 3 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/airflow/config_templates/airflow_local_settings.py 
b/airflow/config_templates/airflow_local_settings.py
index 317544ca7e..aa5791a324 100644
--- a/airflow/config_templates/airflow_local_settings.py
+++ b/airflow/config_templates/airflow_local_settings.py
@@ -158,7 +158,7 @@ if EXTRA_LOGGER_NAMES:
 DEFAULT_DAG_PARSING_LOGGING_CONFIG: dict[str, dict[str, dict[str, Any]]] = {
     'handlers': {
         'processor_manager': {
-            'class': 'logging.handlers.RotatingFileHandler',
+            'class': 
'airflow.utils.log.non_caching_file_handler.NonCachingRotatingFileHandler',
             'formatter': 'airflow',
             'filename': DAG_PROCESSOR_MANAGER_LOG_LOCATION,
             'mode': 'a',
diff --git a/airflow/utils/log/non_caching_file_handler.py 
b/airflow/utils/log/non_caching_file_handler.py
index 5fbed3a1e1..46fe69a99b 100644
--- a/airflow/utils/log/non_caching_file_handler.py
+++ b/airflow/utils/log/non_caching_file_handler.py
@@ -16,11 +16,25 @@
 # under the License.
 from __future__ import annotations
 
-import logging
 import os
+from logging import FileHandler
+from logging.handlers import RotatingFileHandler
+from typing import IO
 
 
-class NonCachingFileHandler(logging.FileHandler):
+def make_file_io_non_caching(io: IO[str]) -> IO[str]:
+    try:
+        fd = io.fileno()
+        os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED)
+    except Exception:
+        # in case either file descriptor cannot be retrieved or fadvise is not 
available
+        # we should simply return the wrapper retrieved by FileHandler's open 
method
+        # the advice to the kernel is just an advice and if we cannot give it, 
we won't
+        pass
+    return io
+
+
+class NonCachingFileHandler(FileHandler):
     """
     This is an extension of the python FileHandler that advises the Kernel to 
not cache the file
     in PageCache when it is written. While there is nothing wrong with such 
cache (it will be cleaned
@@ -35,13 +49,22 @@ class NonCachingFileHandler(logging.FileHandler):
     """
 
     def _open(self):
-        wrapper = super()._open()
-        try:
-            fd = wrapper.fileno()
-            os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED)
-        except Exception:
-            # in case either file descriptor cannot be retrieved or fadvise is 
not available
-            # we should simply return the wrapper retrieved by FileHandler's 
open method
-            # the advise to the kernel is just an advise and if we cannot give 
it, we won't
-            pass
-        return wrapper
+        return make_file_io_non_caching(super()._open())
+
+
+class NonCachingRotatingFileHandler(RotatingFileHandler):
+    """
+    This is an extension of the python RotatingFileHandler that advises the 
Kernel to not cache the file
+    in PageCache when it is written. While there is nothing wrong with such 
cache (it will be cleaned
+    when memory is needed), it causes ever-growing memory usage when scheduler 
is running as it keeps
+    on writing new log files and the files are not rotated later on. This 
might lead to confusion
+    for our users, who are monitoring memory usage of Scheduler - without 
realising that it is
+    harmless and expected in this case.
+
+    See https://github.com/apache/airflow/issues/27065
+
+    Adding the advice to Kernel might help with not generating the cache 
memory growth in the first place.
+    """
+
+    def _open(self):
+        return make_file_io_non_caching(super()._open())
diff --git a/newsfragments/27065.misc.rst b/newsfragments/27065.misc.rst
new file mode 100644
index 0000000000..215bc8321d
--- /dev/null
+++ b/newsfragments/27065.misc.rst
@@ -0,0 +1 @@
+In case you want to decrease cache memory when 
``CONFIG_PROCESSOR_MANAGER_LOGGER=True``, and you have your local settings 
created before, you can update ``processor_manager_handler`` to use 
``airflow.utils.log.non_caching_file_handler.NonCachingRotatingFileHandler`` 
handler instead of ``logging.RotatingFileHandler``.

Reply via email to