https://github.com/python/cpython/commit/3f830adf58745723dba5b7fcf7cb39e4c0d0224b commit: 3f830adf58745723dba5b7fcf7cb39e4c0d0224b branch: 3.12 author: Miss Islington (bot) <[email protected]> committer: serhiy-storchaka <[email protected]> date: 2024-03-03T07:59:10Z summary:
[3.12] gh-115809: Improve TimedRotatingFileHandler.getFilesToDelete() (GH-115812) (GH-116261) Improve algorithm for computing which rolled-over log files to delete in logging.TimedRotatingFileHandler. It is now reliable for handlers without namer and with arbitrary deterministic namer that leaves the datetime part in the file name unmodified. (cherry picked from commit 87faec28c78f6fa8eaaebbd1ababf687c7508e71) Co-authored-by: Serhiy Storchaka <[email protected]> files: A Misc/NEWS.d/next/Library/2024-02-22-11-29-27.gh-issue-115809.9H1DhB.rst M Lib/logging/handlers.py M Lib/test/test_logging.py diff --git a/Lib/logging/handlers.py b/Lib/logging/handlers.py index 6dbee7bec721d2..af9b3459f26169 100644 --- a/Lib/logging/handlers.py +++ b/Lib/logging/handlers.py @@ -232,19 +232,19 @@ def __init__(self, filename, when='h', interval=1, backupCount=0, if self.when == 'S': self.interval = 1 # one second self.suffix = "%Y-%m-%d_%H-%M-%S" - self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$" + extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(?!\d)" elif self.when == 'M': self.interval = 60 # one minute self.suffix = "%Y-%m-%d_%H-%M" - self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(\.\w+)?$" + extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(?!\d)" elif self.when == 'H': self.interval = 60 * 60 # one hour self.suffix = "%Y-%m-%d_%H" - self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}(\.\w+)?$" + extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}(?!\d)" elif self.when == 'D' or self.when == 'MIDNIGHT': self.interval = 60 * 60 * 24 # one day self.suffix = "%Y-%m-%d" - self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$" + extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}(?!\d)" elif self.when.startswith('W'): self.interval = 60 * 60 * 24 * 7 # one week if len(self.when) != 2: @@ -253,11 +253,17 @@ def __init__(self, filename, when='h', interval=1, backupCount=0, raise ValueError("Invalid day specified for weekly rollover: %s" % self.when) self.dayOfWeek = int(self.when[1]) self.suffix = "%Y-%m-%d" - self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$" + extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}(?!\d)" else: raise ValueError("Invalid rollover interval specified: %s" % self.when) - self.extMatch = re.compile(self.extMatch, re.ASCII) + # extMatch is a pattern for matching a datetime suffix in a file name. + # After custom naming, it is no longer guaranteed to be separated by + # periods from other parts of the filename. The lookup statements + # (?<!\d) and (?!\d) ensure that the datetime suffix (which itself + # starts and ends with digits) is not preceded or followed by digits. + # This reduces the number of false matches and improves performance. + self.extMatch = re.compile(extMatch, re.ASCII) self.interval = self.interval * interval # multiply by units requested # The following line added because the filename passed in could be a # path object (see Issue #27493), but self.baseFilename will be a string @@ -376,31 +382,22 @@ def getFilesToDelete(self): for fileName in fileNames: if fileName[:plen] == prefix: suffix = fileName[plen:] - if self.extMatch.match(suffix): + if self.extMatch.fullmatch(suffix): result.append(os.path.join(dirName, fileName)) else: - # See bpo-44753: Don't use the extension when computing the prefix. - n, e = os.path.splitext(baseName) - prefix = n + '.' - plen = len(prefix) for fileName in fileNames: - # Our files could be just about anything after custom naming, but - # likely candidates are of the form - # foo.log.DATETIME_SUFFIX or foo.DATETIME_SUFFIX.log - if (not fileName.startswith(baseName) and fileName.endswith(e) and - len(fileName) > (plen + 1) and not fileName[plen+1].isdigit()): - continue + # Our files could be just about anything after custom naming, + # but they should contain the datetime suffix. + # Try to find the datetime suffix in the file name and verify + # that the file name can be generated by this handler. + m = self.extMatch.search(fileName) + while m: + dfn = self.namer(self.baseFilename + "." + m[0]) + if os.path.basename(dfn) == fileName: + result.append(os.path.join(dirName, fileName)) + break + m = self.extMatch.search(fileName, m.start() + 1) - if fileName[:plen] == prefix: - suffix = fileName[plen:] - # See bpo-45628: The date/time suffix could be anywhere in the - # filename - - parts = suffix.split('.') - for part in parts: - if self.extMatch.match(part): - result.append(os.path.join(dirName, fileName)) - break if len(result) < self.backupCount: result = [] else: diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 857f656498dff2..224447df8901ae 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -6205,7 +6205,7 @@ def test_compute_files_to_delete(self): for i in range(10): times.append(dt.strftime('%Y-%m-%d_%H-%M-%S')) dt += datetime.timedelta(seconds=5) - prefixes = ('a.b', 'a.b.c', 'd.e', 'd.e.f') + prefixes = ('a.b', 'a.b.c', 'd.e', 'd.e.f', 'g') files = [] rotators = [] for prefix in prefixes: @@ -6218,10 +6218,22 @@ def test_compute_files_to_delete(self): if prefix.startswith('a.b'): for t in times: files.append('%s.log.%s' % (prefix, t)) - else: - rotator.namer = lambda name: name.replace('.log', '') + '.log' + elif prefix.startswith('d.e'): + def namer(filename): + dirname, basename = os.path.split(filename) + basename = basename.replace('.log', '') + '.log' + return os.path.join(dirname, basename) + rotator.namer = namer for t in times: files.append('%s.%s.log' % (prefix, t)) + elif prefix == 'g': + def namer(filename): + dirname, basename = os.path.split(filename) + basename = 'g' + basename[6:] + '.oldlog' + return os.path.join(dirname, basename) + rotator.namer = namer + for t in times: + files.append('g%s.oldlog' % t) # Create empty files for fn in files: p = os.path.join(wd, fn) @@ -6231,18 +6243,23 @@ def test_compute_files_to_delete(self): for i, prefix in enumerate(prefixes): rotator = rotators[i] candidates = rotator.getFilesToDelete() - self.assertEqual(len(candidates), 3) + self.assertEqual(len(candidates), 3, candidates) if prefix.startswith('a.b'): p = '%s.log.' % prefix for c in candidates: d, fn = os.path.split(c) self.assertTrue(fn.startswith(p)) - else: + elif prefix.startswith('d.e'): for c in candidates: d, fn = os.path.split(c) - self.assertTrue(fn.endswith('.log')) + self.assertTrue(fn.endswith('.log'), fn) self.assertTrue(fn.startswith(prefix + '.') and fn[len(prefix) + 2].isdigit()) + elif prefix == 'g': + for c in candidates: + d, fn = os.path.split(c) + self.assertTrue(fn.endswith('.oldlog')) + self.assertTrue(fn.startswith('g') and fn[1].isdigit()) def test_compute_files_to_delete_same_filename_different_extensions(self): # See GH-93205 for background @@ -6266,6 +6283,8 @@ def test_compute_files_to_delete_same_filename_different_extensions(self): rotators.append(rotator) for t in times: files.append('%s.%s' % (prefix, t)) + for t in times: + files.append('a.log.%s.c' % t) # Create empty files for f in files: (wd / f).touch() @@ -6274,11 +6293,11 @@ def test_compute_files_to_delete_same_filename_different_extensions(self): backupCount = i+1 rotator = rotators[i] candidates = rotator.getFilesToDelete() - self.assertEqual(len(candidates), n_files - backupCount) - matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$") + self.assertEqual(len(candidates), n_files - backupCount, candidates) + matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}\Z") for c in candidates: d, fn = os.path.split(c) - self.assertTrue(fn.startswith(prefix)) + self.assertTrue(fn.startswith(prefix+'.')) suffix = fn[(len(prefix)+1):] self.assertRegex(suffix, matcher) diff --git a/Misc/NEWS.d/next/Library/2024-02-22-11-29-27.gh-issue-115809.9H1DhB.rst b/Misc/NEWS.d/next/Library/2024-02-22-11-29-27.gh-issue-115809.9H1DhB.rst new file mode 100644 index 00000000000000..2a47efbae5c84d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-22-11-29-27.gh-issue-115809.9H1DhB.rst @@ -0,0 +1,4 @@ +Improve algorithm for computing which rolled-over log files to delete in +:class:`logging.TimedRotatingFileHandler`. It is now reliable for handlers +without ``namer`` and with arbitrary deterministic ``namer`` that leaves the +datetime part in the file name unmodified. _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: [email protected]
