jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/409321 )
Change subject: archivebot: count removed characters when excluding comments etc ...................................................................... archivebot: count removed characters when excluding comments etc Follows up I2e7183f431ce3bb4cdd1729e592b7509cbd37b94. Bug: T182496 Change-Id: If45dfbb807f413e3e459e682ceeef337755dce1d --- M scripts/archivebot.py M tests/archivebot_tests.py 2 files changed, 40 insertions(+), 2 deletions(-) Approvals: Dalba: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/archivebot.py b/scripts/archivebot.py index 56ff18b..33000c9 100755 --- a/scripts/archivebot.py +++ b/scripts/archivebot.py @@ -454,8 +454,9 @@ self.archives = {} self.archived_threads = 0 text = self.get() + # Replace text in following exceptions by spaces, but don't change line - # numbers + # numbers and character positions exceptions = ['comment', 'code', 'pre', 'source', 'nowiki'] exc_regexes = _get_regexes(exceptions, self.site) stripped_text = text @@ -464,8 +465,9 @@ before = stripped_text[:match.start()] restricted = stripped_text[match.start():match.end()] after = stripped_text[match.end():] - restricted = re.sub(r'[^\n]', r'', restricted) + restricted = re.sub(r'[^\n]', ' ', restricted) stripped_text = before + restricted + after + # Find thread headers in stripped text and return their line numbers stripped_lines = stripped_text.split('\n') thread_headers = [] diff --git a/tests/archivebot_tests.py b/tests/archivebot_tests.py index 3340b89..7270059 100644 --- a/tests/archivebot_tests.py +++ b/tests/archivebot_tests.py @@ -19,6 +19,7 @@ from tests.aspects import unittest, TestCase + THREADS = { 'als': 4, 'ar': 1, 'bar': 0, 'bg': 0, 'bjn': 1, 'bs': 0, 'ca': 5, 'ckb': 2, 'cs': 0, 'de': 1, 'en': 25, 'eo': 2, 'es': 13, 'fa': 2, 'fr': 25, 'frr': 2, @@ -229,6 +230,41 @@ expected_failures = [] +class TestDiscussionPageObject(TestCase): + + """Test DiscussionPage object.""" + + cached = True + family = 'test' + code = 'test' + + def testTwoThreadsWithCommentedOutThread(self): + """Test recognizing two threads and ignoring a commented out thread. + + Talk:For-pywikibot-archivebot must have: + + {{User:MiszaBot/config + |archive = Talk:Main_Page/archive + |algo = old(30d) + }} + <!-- normal comments --> + == A == + foo bar + <!-- + == Z == + foo bar bar + --> + == B == + foo bar bar bar + """ + page = pywikibot.Page(self.get_site(), 'Talk:For-pywikibot-archivebot') + archiver = archivebot.PageArchiver( + page=page, tpl='User:MiszaBot/config', salt='', force=False) + page = archivebot.DiscussionPage(page, archiver) + page.load_page() + self.assertEqual([x.title for x in page.threads], ['A', 'B']) + + if __name__ == '__main__': # pragma: no cover try: unittest.main() -- To view, visit https://gerrit.wikimedia.org/r/409321 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-MessageType: merged Gerrit-Change-Id: If45dfbb807f413e3e459e682ceeef337755dce1d Gerrit-Change-Number: 409321 Gerrit-PatchSet: 4 Gerrit-Owner: Whym <w...@whym.org> Gerrit-Reviewer: Dalba <dalba.w...@gmail.com> Gerrit-Reviewer: Dvorapa <dvor...@seznam.cz> Gerrit-Reviewer: John Vandenberg <jay...@gmail.com> Gerrit-Reviewer: Whym <w...@whym.org> Gerrit-Reviewer: Zoranzoki21 <zorandori4...@gmail.com> Gerrit-Reviewer: jenkins-bot <>
_______________________________________________ Pywikibot-commits mailing list Pywikibot-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits