jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/529616 )

Change subject: proofreadpage.py: fix footer detection
......................................................................

proofreadpage.py: fix footer detection

Fix footer detection when footer contains a </div> tag at the end.

Bug: T230301
Change-Id: Ia2be695b5e8ea06f9760c86eebd3e8e836305a77
---
M pywikibot/proofreadpage.py
M tests/proofreadpage_tests.py
2 files changed, 24 insertions(+), 6 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index e323d76..5ca3dfd 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -126,6 +126,7 @@
     close_tag = '</noinclude>'
     p_open = re.compile(r'<noinclude>')
     p_close = re.compile(r'(</div>|\n\n\n)?</noinclude>')
+    p_close_no_div = re.compile('</noinclude>')  # V2 page format.

     # phetools ocr utility
     _HOCR_CMD = ('https://tools.wmflabs.org/phetools/hocr_cgi.py?'
@@ -461,24 +462,32 @@
         @raise Error: the page is not formatted according to ProofreadPage
             extension.
         """
+        def _assert_len(len_oq, len_cq, title):
+            if (len_oq != len_cq) or (len_oq < 2 or len_cq < 2):
+                raise pywikibot.Error('ProofreadPage %s: invalid format'
+                                      % title)
+
         # Property force page text loading.
         if not (hasattr(self, '_text') or self.text):
             self._create_empty_page()
             return

+        _title = self.title(as_link=True)
+
         open_queue = list(self.p_open.finditer(self._text))
         close_queue = list(self.p_close.finditer(self._text))
-
-        len_oq = len(open_queue)
-        len_cq = len(close_queue)
-        if (len_oq != len_cq) or (len_oq < 2 or len_cq < 2):
-            raise pywikibot.Error('ProofreadPage %s: invalid format'
-                                  % self.title(as_link=True))
+        _assert_len(len(open_queue), len(close_queue), _title)

         f_open, f_close = open_queue[0], close_queue[0]
         self._full_header = FullHeader(
             self._text[f_open.end():f_close.start()])

+        # check version of page format and in case recompute last match,
+        # in order not to include </div>.
+        if not self._full_header._has_div:
+            close_queue = list(self.p_close_no_div.finditer(self._text))
+            _assert_len(len(open_queue), len(close_queue), _title)
+
         l_open, l_close = open_queue[-1], close_queue[-1]
         self._footer = self._text[l_open.end():l_close.start()]

diff --git a/tests/proofreadpage_tests.py b/tests/proofreadpage_tests.py
index 8a55cce..99722c4 100644
--- a/tests/proofreadpage_tests.py
+++ b/tests/proofreadpage_tests.py
@@ -172,6 +172,10 @@
         'title1': 'User:Popular Science Monthly Volume 1.djvu/12'
     }

+    div_in_footer = {
+        'title': 'Page:Pywikibot unlinked test page',
+    }
+
     class_pagetext_fmt = {
         True: ('<div class="pagetext">\n\n\n', '</div>'),
         False: ('', ''),
@@ -232,6 +236,11 @@
         self.assertEqual(page.header, self.valid['header'])
         self.assertEqual(page.footer, self.valid['footer'])

+    def test_div_in_footer(self):
+        """Test ProofreadPage page parsing functions."""
+        page = ProofreadPage(self.site, self.div_in_footer['title'])
+        self.assertTrue(page.footer.endswith('</div>'))
+
     def test_decompose_recompose_text(self):
         """Test ProofreadPage page decomposing/composing text."""
         page = ProofreadPage(self.site, self.valid['title'])

--
To view, visit https://gerrit.wikimedia.org/r/529616
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: Ia2be695b5e8ea06f9760c86eebd3e8e836305a77
Gerrit-Change-Number: 529616
Gerrit-PatchSet: 1
Gerrit-Owner: Mpaa <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Mpaa <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot (75)
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to