uranusjr commented on code in PR #27223:
URL: https://github.com/apache/airflow/pull/27223#discussion_r1002915227


##########
airflow/utils/log/non_caching_file_handler.py:
##########
@@ -16,11 +16,39 @@
 # under the License.
 from __future__ import annotations
 
-import logging
 import os
+from logging import FileHandler
+from logging.handlers import RotatingFileHandler
 
 
-class NonCachingFileHandler(logging.FileHandler):
+class NonCachingFileHandler(FileHandler):
+    """
+    This is an extension of the python FileHandler that advises the Kernel to 
not cache the file
+    in PageCache when it is written. While there is nothing wrong with such 
cache (it will be cleaned
+    when memory is needed), it causes ever-growing memory usage when scheduler 
is running as it keeps
+    on writing new log files and the files are not rotated later on. This 
might lead to confusion
+    for our users, who are monitoring memory usage of Scheduler - without 
realising that it is
+    harmless and expected in this case.
+
+    See https://github.com/apache/airflow/issues/14924
+
+    Adding the advice to Kernel might help with not generating the cache 
memory growth in the first place.
+    """
+
+    def _open(self):
+        wrapper = super()._open()
+        try:
+            fd = wrapper.fileno()
+            os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED)
+        except Exception:
+            # in case either file descriptor cannot be retrieved or fadvise is 
not available
+            # we should simply return the wrapper retrieved by FileHandler's 
open method
+            # the advise to the kernel is just an advise and if we cannot give 
it, we won't
+            pass
+        return wrapper
+
+
+class NonCachingRotatingFileHandler(RotatingFileHandler):

Review Comment:
   DOcstring of this does not match



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to