jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/409321 )

Change subject: archivebot: count removed characters when excluding comments etc
......................................................................

archivebot: count removed characters when excluding comments etc

Follows up I2e7183f431ce3bb4cdd1729e592b7509cbd37b94.

Bug: T182496
Change-Id: If45dfbb807f413e3e459e682ceeef337755dce1d
---
M scripts/archivebot.py
M tests/archivebot_tests.py
2 files changed, 40 insertions(+), 2 deletions(-)

Approvals:
  Dalba: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index 56ff18b..33000c9 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -454,8 +454,9 @@
         self.archives = {}
         self.archived_threads = 0
         text = self.get()
+
         # Replace text in following exceptions by spaces, but don't change line
-        # numbers
+        # numbers and character positions
         exceptions = ['comment', 'code', 'pre', 'source', 'nowiki']
         exc_regexes = _get_regexes(exceptions, self.site)
         stripped_text = text
@@ -464,8 +465,9 @@
                 before = stripped_text[:match.start()]
                 restricted = stripped_text[match.start():match.end()]
                 after = stripped_text[match.end():]
-                restricted = re.sub(r'[^\n]', r'', restricted)
+                restricted = re.sub(r'[^\n]', ' ', restricted)
                 stripped_text = before + restricted + after
+
         # Find thread headers in stripped text and return their line numbers
         stripped_lines = stripped_text.split('\n')
         thread_headers = []
diff --git a/tests/archivebot_tests.py b/tests/archivebot_tests.py
index 3340b89..7270059 100644
--- a/tests/archivebot_tests.py
+++ b/tests/archivebot_tests.py
@@ -19,6 +19,7 @@

 from tests.aspects import unittest, TestCase

+
 THREADS = {
     'als': 4, 'ar': 1, 'bar': 0, 'bg': 0, 'bjn': 1, 'bs': 0, 'ca': 5, 'ckb': 2,
     'cs': 0, 'de': 1, 'en': 25, 'eo': 2, 'es': 13, 'fa': 2, 'fr': 25, 'frr': 2,
@@ -229,6 +230,41 @@
     expected_failures = []


+class TestDiscussionPageObject(TestCase):
+
+    """Test DiscussionPage object."""
+
+    cached = True
+    family = 'test'
+    code = 'test'
+
+    def testTwoThreadsWithCommentedOutThread(self):
+        """Test recognizing two threads and ignoring a commented out thread.
+
+        Talk:For-pywikibot-archivebot must have:
+
+        {{User:MiszaBot/config
+        |archive = Talk:Main_Page/archive
+        |algo = old(30d)
+        }}
+        <!-- normal comments -->
+        == A ==
+        foo bar
+        <!--
+        == Z ==
+        foo bar bar
+        -->
+        == B ==
+        foo bar bar bar
+        """
+        page = pywikibot.Page(self.get_site(), 'Talk:For-pywikibot-archivebot')
+        archiver = archivebot.PageArchiver(
+            page=page, tpl='User:MiszaBot/config', salt='', force=False)
+        page = archivebot.DiscussionPage(page, archiver)
+        page.load_page()
+        self.assertEqual([x.title for x in page.threads], ['A', 'B'])
+
+
 if __name__ == '__main__':  # pragma: no cover
     try:
         unittest.main()

--
To view, visit https://gerrit.wikimedia.org/r/409321
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: If45dfbb807f413e3e459e682ceeef337755dce1d
Gerrit-Change-Number: 409321
Gerrit-PatchSet: 4
Gerrit-Owner: Whym <w...@whym.org>
Gerrit-Reviewer: Dalba <dalba.w...@gmail.com>
Gerrit-Reviewer: Dvorapa <dvor...@seznam.cz>
Gerrit-Reviewer: John Vandenberg <jay...@gmail.com>
Gerrit-Reviewer: Whym <w...@whym.org>
Gerrit-Reviewer: Zoranzoki21 <zorandori4...@gmail.com>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
Pywikibot-commits mailing list
Pywikibot-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to