[MediaWiki-commits] [Gerrit] Don't ignore DoNotArchiveUntil timestamps - change (pywikibot/core)
Merlijn van Deen has uploaded a new change for review. https://gerrit.wikimedia.org/r/223869 Change subject: Don't ignore DoNotArchiveUntil timestamps .. Don't ignore DoNotArchiveUntil timestamps Don't ignore timestamps written in HTML comments, e.g. as used by DoNotArchiveUntil. See: - https://commons.wikimedia.org/wiki/Template:DNAU - https://en.wikipedia.org/wiki/Template:Do_not_archive_until Analyze comments separately from rest of each line to avoid to skip dates in comments, as the date matched by timestripper is the rightmost one. Bug: T102423 Change-Id: I079d9f6b636ac0a145dd04a3190a65c61b9d1b31 --- M pywikibot/textlib.py M tests/timestripper_tests.py 2 files changed, 72 insertions(+), 3 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/69/223869/1 diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 26b291f..a1d2c55 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -1394,6 +1394,10 @@ ] self.linkP = compileLinkR() +self.comment_pattern = re.compile(r'!--(.*?)--') + +self.tzinfo = tzoneFixedOffset(self.site.siteinfo['timeoffset'], + self.site.siteinfo['timezone']) def findmarker(self, text, base=u'@@', delta='@'): Find a string which is not part of text. @@ -1446,6 +1450,17 @@ # match date fields dateDict = dict() +# Analyze comments separately from rest of each line to avoid to skip +# dates in comments, as the date matched by timestripper is the +# rightmost one. +most_recent = [] +for comment in self.comment_pattern.finditer(line): +# Recursion levels can be maximum two. If a comment is found, it will +# not for sure be found in the next level. +# Nested cmments are excluded by design. +timestamp = self.timestripper(comment.group(1)) +most_recent.append(timestamp) + # Remove parts that are not supposed to contain the timestamp, in order # to reduce false positives. line = removeDisabledParts(line) @@ -1481,12 +1496,17 @@ % (v, k)) # find timezone -dateDict['tzinfo'] = tzoneFixedOffset(self.site.siteinfo['timeoffset'], - self.site.siteinfo['timezone']) +dateDict['tzinfo'] = self.tzinfo timestamp = datetime.datetime(**dateDict) - else: timestamp = None +most_recent.append(timestamp) + +try: +timestamp = max(ts for ts in most_recent if ts is not None) +except ValueError: +timestamp = None + return timestamp diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py index 80d70e0..a3f7e77 100644 --- a/tests/timestripper_tests.py +++ b/tests/timestripper_tests.py @@ -231,6 +231,55 @@ self.assertEqual(self.ts.timestripper(txtNoMatch), None) +class TestTimeStripperDoNotArchiveUntil(TestCase): + +Test cases for Do Not Archive Until templates. + +See https://commons.wikimedia.org/wiki/Template:DNAU and +https://en.wikipedia.org/wiki/Template:Do_not_archive_until. + + +family = 'wikisource' +code = 'en' + +cached = True + +username = '[[User:DoNotArchiveUntil]]' +date = '06:57 06 June 2015 (UTC)' +user_and_date = username + ' ' + date +tzone = tzoneFixedOffset(0, 'UTC') + +def test_timestripper_match(self): +Test that dates in comments are correctly recognised. +ts = TimeStripper(self.get_site()) + +txt_match = '!-- [[User:Do___ArchiveUntil]] ' + self.date + ' --' +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +txt_match = '!-- -- !-- ' + self.user_and_date + ' !-- --' +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +txt_match = '!-- ' + self.user_and_date + ' --' +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +def test_timestripper_match_only(self): +Test that latest date is used instead of other dates. +ts = TimeStripper(self.get_site()) + +later_date = '10:57 06 June 2015 (UTC)' +txt_match = '!-- -- ' + self.user_and_date + ' !-- --' + later_date +res = datetime.datetime(2015, 6, 6, 10, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +earlier_date = '02:57 06 June 2015 (UTC)' +txt_match = '!-- ' + self.user_and_date + ' -- ' + earlier_date +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +
[MediaWiki-commits] [Gerrit] Don't ignore DoNotArchiveUntil timestamps - change (pywikibot/core)
jenkins-bot has submitted this change and it was merged. Change subject: Don't ignore DoNotArchiveUntil timestamps .. Don't ignore DoNotArchiveUntil timestamps Don't ignore timestamps written in HTML comments, e.g. as used by DoNotArchiveUntil. See: - https://commons.wikimedia.org/wiki/Template:DNAU - https://en.wikipedia.org/wiki/Template:Do_not_archive_until Analyze comments separately from rest of each line to avoid to skip dates in comments, as the date matched by timestripper is the rightmost one. Bug: T102423 Change-Id: I079d9f6b636ac0a145dd04a3190a65c61b9d1b31 --- M pywikibot/textlib.py M tests/timestripper_tests.py 2 files changed, 72 insertions(+), 3 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 26b291f..a1d2c55 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -1394,6 +1394,10 @@ ] self.linkP = compileLinkR() +self.comment_pattern = re.compile(r'!--(.*?)--') + +self.tzinfo = tzoneFixedOffset(self.site.siteinfo['timeoffset'], + self.site.siteinfo['timezone']) def findmarker(self, text, base=u'@@', delta='@'): Find a string which is not part of text. @@ -1446,6 +1450,17 @@ # match date fields dateDict = dict() +# Analyze comments separately from rest of each line to avoid to skip +# dates in comments, as the date matched by timestripper is the +# rightmost one. +most_recent = [] +for comment in self.comment_pattern.finditer(line): +# Recursion levels can be maximum two. If a comment is found, it will +# not for sure be found in the next level. +# Nested cmments are excluded by design. +timestamp = self.timestripper(comment.group(1)) +most_recent.append(timestamp) + # Remove parts that are not supposed to contain the timestamp, in order # to reduce false positives. line = removeDisabledParts(line) @@ -1481,12 +1496,17 @@ % (v, k)) # find timezone -dateDict['tzinfo'] = tzoneFixedOffset(self.site.siteinfo['timeoffset'], - self.site.siteinfo['timezone']) +dateDict['tzinfo'] = self.tzinfo timestamp = datetime.datetime(**dateDict) - else: timestamp = None +most_recent.append(timestamp) + +try: +timestamp = max(ts for ts in most_recent if ts is not None) +except ValueError: +timestamp = None + return timestamp diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py index 80d70e0..a3f7e77 100644 --- a/tests/timestripper_tests.py +++ b/tests/timestripper_tests.py @@ -231,6 +231,55 @@ self.assertEqual(self.ts.timestripper(txtNoMatch), None) +class TestTimeStripperDoNotArchiveUntil(TestCase): + +Test cases for Do Not Archive Until templates. + +See https://commons.wikimedia.org/wiki/Template:DNAU and +https://en.wikipedia.org/wiki/Template:Do_not_archive_until. + + +family = 'wikisource' +code = 'en' + +cached = True + +username = '[[User:DoNotArchiveUntil]]' +date = '06:57 06 June 2015 (UTC)' +user_and_date = username + ' ' + date +tzone = tzoneFixedOffset(0, 'UTC') + +def test_timestripper_match(self): +Test that dates in comments are correctly recognised. +ts = TimeStripper(self.get_site()) + +txt_match = '!-- [[User:Do___ArchiveUntil]] ' + self.date + ' --' +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +txt_match = '!-- -- !-- ' + self.user_and_date + ' !-- --' +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +txt_match = '!-- ' + self.user_and_date + ' --' +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +def test_timestripper_match_only(self): +Test that latest date is used instead of other dates. +ts = TimeStripper(self.get_site()) + +later_date = '10:57 06 June 2015 (UTC)' +txt_match = '!-- -- ' + self.user_and_date + ' !-- --' + later_date +res = datetime.datetime(2015, 6, 6, 10, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +earlier_date = '02:57 06 June 2015 (UTC)' +txt_match = '!-- ' + self.user_and_date + ' -- ' + earlier_date +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + + if __name__ ==
[MediaWiki-commits] [Gerrit] Don't ignore DoNotArchiveUntil timestamps - change (pywikibot/core)
jenkins-bot has submitted this change and it was merged. Change subject: Don't ignore DoNotArchiveUntil timestamps .. Don't ignore DoNotArchiveUntil timestamps Don't ignore timestamps written in HTML comments, e.g. as used by DoNotArchiveUntil. See: - https://commons.wikimedia.org/wiki/Template:DNAU - https://en.wikipedia.org/wiki/Template:Do_not_archive_until Analyze comments separately from rest of each line to avoid to skip dates in comments, as the date matched by timestripper is the rightmost one. Bug: T102423 Change-Id: I079d9f6b636ac0a145dd04a3190a65c61b9d1b31 --- M pywikibot/textlib.py M tests/timestripper_tests.py 2 files changed, 72 insertions(+), 3 deletions(-) Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 2fae512..de627c2 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -1609,6 +1609,10 @@ ] self.linkP = compileLinkR() +self.comment_pattern = re.compile(r'!--(.*?)--') + +self.tzinfo = tzoneFixedOffset(self.site.siteinfo['timeoffset'], + self.site.siteinfo['timezone']) def findmarker(self, text, base=u'@@', delta='@'): Find a string which is not part of text. @@ -1661,6 +1665,17 @@ # match date fields dateDict = dict() +# Analyze comments separately from rest of each line to avoid to skip +# dates in comments, as the date matched by timestripper is the +# rightmost one. +most_recent = [] +for comment in self.comment_pattern.finditer(line): +# Recursion levels can be maximum two. If a comment is found, it will +# not for sure be found in the next level. +# Nested cmments are excluded by design. +timestamp = self.timestripper(comment.group(1)) +most_recent.append(timestamp) + # Remove parts that are not supposed to contain the timestamp, in order # to reduce false positives. line = removeDisabledParts(line) @@ -1696,12 +1711,17 @@ % (v, k)) # find timezone -dateDict['tzinfo'] = tzoneFixedOffset(self.site.siteinfo['timeoffset'], - self.site.siteinfo['timezone']) +dateDict['tzinfo'] = self.tzinfo timestamp = datetime.datetime(**dateDict) - else: timestamp = None +most_recent.append(timestamp) + +try: +timestamp = max(ts for ts in most_recent if ts is not None) +except ValueError: +timestamp = None + return timestamp diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py index 80d70e0..a3f7e77 100644 --- a/tests/timestripper_tests.py +++ b/tests/timestripper_tests.py @@ -231,6 +231,55 @@ self.assertEqual(self.ts.timestripper(txtNoMatch), None) +class TestTimeStripperDoNotArchiveUntil(TestCase): + +Test cases for Do Not Archive Until templates. + +See https://commons.wikimedia.org/wiki/Template:DNAU and +https://en.wikipedia.org/wiki/Template:Do_not_archive_until. + + +family = 'wikisource' +code = 'en' + +cached = True + +username = '[[User:DoNotArchiveUntil]]' +date = '06:57 06 June 2015 (UTC)' +user_and_date = username + ' ' + date +tzone = tzoneFixedOffset(0, 'UTC') + +def test_timestripper_match(self): +Test that dates in comments are correctly recognised. +ts = TimeStripper(self.get_site()) + +txt_match = '!-- [[User:Do___ArchiveUntil]] ' + self.date + ' --' +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +txt_match = '!-- -- !-- ' + self.user_and_date + ' !-- --' +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +txt_match = '!-- ' + self.user_and_date + ' --' +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +def test_timestripper_match_only(self): +Test that latest date is used instead of other dates. +ts = TimeStripper(self.get_site()) + +later_date = '10:57 06 June 2015 (UTC)' +txt_match = '!-- -- ' + self.user_and_date + ' !-- --' + later_date +res = datetime.datetime(2015, 6, 6, 10, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + +earlier_date = '02:57 06 June 2015 (UTC)' +txt_match = '!-- ' + self.user_and_date + ' -- ' + earlier_date +res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone) +self.assertEqual(ts.timestripper(txt_match), res) + + if __name__ ==
[MediaWiki-commits] [Gerrit] Don't ignore DoNotArchiveUntil timestamps - change (pywikibot/core)
Mpaa has uploaded a new change for review. https://gerrit.wikimedia.org/r/218436 Change subject: Don't ignore DoNotArchiveUntil timestamps .. Don't ignore DoNotArchiveUntil timestamps Don't ignore DoNotArchiveUntil timestamps written in HTML comments. See: - https://commons.wikimedia.org/wiki/Template:DNAU - https://en.wikipedia.org/wiki/Template:Do_not_archive_until Bug: T102423 Change-Id: I079d9f6b636ac0a145dd04a3190a65c61b9d1b31 --- M pywikibot/textlib.py M tests/timestripper_tests.py 2 files changed, 33 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/36/218436/1 diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 2fae512..0572538 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -1609,6 +1609,7 @@ ] self.linkP = compileLinkR() +self.commentP = re.compile(r'!--.*?\[\[User:DoNotArchiveUntil\]\].*--') def findmarker(self, text, base=u'@@', delta='@'): Find a string which is not part of text. @@ -1663,7 +1664,11 @@ dateDict = dict() # Remove parts that are not supposed to contain the timestamp, in order # to reduce false positives. -line = removeDisabledParts(line) +do_not_archive_until = self.commentP.search(line) +if do_not_archive_until: +line = removeDisabledParts(line, include=['comments']) +else: +line = removeDisabledParts(line) line = self.linkP.sub('', line) # remove external links line = self.fix_digits(line) diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py index 80d70e0..0df5c4f 100644 --- a/tests/timestripper_tests.py +++ b/tests/timestripper_tests.py @@ -231,6 +231,33 @@ self.assertEqual(self.ts.timestripper(txtNoMatch), None) +class TestTimeStripperDoNotArchiveUntil(TestCase): + +Test cases for Do Not Archive Until templates. + +See https://commons.wikimedia.org/wiki/Template:DNAU and +https://en.wikipedia.org/wiki/Template:Do_not_archive_until. + + +family = 'wikisource' +code = 'en' + +cached = True + +def test_timestripper_match(self): +Test that DNAU convention is correctly recognised. +self.ts = TimeStripper(self.get_site()) + +tzone = tzoneFixedOffset(0, 'UTC') + +txtNoMatch = u'!-- [[User:Do___ArchiveUntil]] 06:57 06 June 2015 (UTC) --' +self.assertEqual(self.ts.timestripper(txtNoMatch), None) + +txtMatch = u'!-- [[User:DoNotArchiveUntil]] 06:57 06 June 2015 (UTC) --' +res = datetime.datetime(2015, 6, 6, 06, 57, tzinfo=tzone) +self.assertEqual(self.ts.timestripper(txtMatch), res) + + if __name__ == '__main__': try: unittest.main() -- To view, visit https://gerrit.wikimedia.org/r/218436 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I079d9f6b636ac0a145dd04a3190a65c61b9d1b31 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Mpaa mpaa.w...@gmail.com ___ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits