Mpaa has uploaded a new change for review.
https://gerrit.wikimedia.org/r/243032
Change subject: proofreadpage.py: purge IndexPage when Index has no required
class set
......................................................................
proofreadpage.py: purge IndexPage when Index has no required class set
Purge index if class="qualityN prp-pagequality-N" is not found in HTML
parsing of IndexPage.
Raise ValueError if page.purge() has no effect.
Bug: T114318
Change-Id: I342df4e5b5e2743dbb5bdb8f55296e07242b6b58
---
M pywikibot/proofreadpage.py
1 file changed, 17 insertions(+), 4 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/32/243032/1
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index a14e2f2..deb0a65 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -376,15 +376,28 @@
def _get_page_labels(self):
"""Associate label and number for each page linked to the index."""
- self._parsed_text = self._get_parsed_page()
- self._soup = BeautifulSoup(self._parsed_text, 'html.parser')
- attrs = {'class': re.compile('prp-pagequality')}
-
# Search for attribute "prp-pagequality" in tags like:
# <a class="quality1 prp-pagequality-1"
# href="/wiki/Page:xxx.djvu/n"
# title="Page:xxx.djvu/n">m
# </a>
+ # Try to purge or raise ValueError
+ attrs = {'class': re.compile('prp-pagequality')}
+ purged = False
+ while True:
+ self._parsed_text = self._get_parsed_page()
+ self._soup = BeautifulSoup(self._parsed_text, 'html.parser')
+ if not self._soup.find_all('a', attrs=attrs):
+ if not purged:
+ self.purge()
+ purged = True
+ del self._parsed_text
+ else:
+ raise ValueError(
+ 'Missing class="qualityN prp-pagequality-N" in %s: '
+ % self)
+ else:
+ break
page_cnt = 0
for a_tag in self._soup.find_all('a', attrs=attrs):
--
To view, visit https://gerrit.wikimedia.org/r/243032
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I342df4e5b5e2743dbb5bdb8f55296e07242b6b58
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits