jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/699536 )

Change subject: [IMPR] Add WikiBlame support to Pywikibot
......................................................................

[IMPR] Add WikiBlame support to Pywikibot

- add main_authors() method to BasePage class which gives the
  5 topmost editors based on the current blamed text
- provide some tests

Change-Id: I25f1b23cfaa88f02c1721a7032da4938da1777b7
---
M docs/api_ref/pywikibot.page.rst
M pywikibot/CONTENT.rst
M pywikibot/page/_pages.py
A pywikibot/page/_toolforge.py
M tests/__init__.py
A tests/wikiblame_tests.py
6 files changed, 157 insertions(+), 1 deletion(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/docs/api_ref/pywikibot.page.rst b/docs/api_ref/pywikibot.page.rst
index 18e7eb7..f145976 100644
--- a/docs/api_ref/pywikibot.page.rst
+++ b/docs/api_ref/pywikibot.page.rst
@@ -24,3 +24,9 @@

 .. automodule:: page._revision
    :synopsis: Object representing page revision
+
+:mod:`page.\_toolforge` module
+---------------------------------
+
+.. automodule:: page._toolforge
+   :synopsis: Object representing interface to toolforge tools
\ No newline at end of file
diff --git a/pywikibot/CONTENT.rst b/pywikibot/CONTENT.rst
index 88fb524..096ce26 100644
--- a/pywikibot/CONTENT.rst
+++ b/pywikibot/CONTENT.rst
@@ -130,6 +130,8 @@
     
+----------------------------+------------------------------------------------------+
     | _revision.py               | Object representing page revision           
         |
     
+----------------------------+------------------------------------------------------+
+    | _toolforge.py              | BasePage interface to toolforge tools       
         |
+    
+----------------------------+------------------------------------------------------+
     | _user.py                   | Object representing a wiki user             
         |
     
+----------------------------+------------------------------------------------------+
     | _wikibase.py               | Objects representing wikibase structures    
         |
diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py
index 1ead289..cc11304 100644
--- a/pywikibot/page/_pages.py
+++ b/pywikibot/page/_pages.py
@@ -49,6 +49,7 @@
 )
 from pywikibot.page._decorators import allow_asynchronous
 from pywikibot.page._links import BaseLink, Link
+from pywikibot.page._toolforge import WikiBlameMixin
 from pywikibot.site import Namespace, NamespaceArgType
 from pywikibot.tools import (
     ComparableMixin,
@@ -2149,7 +2150,7 @@
         return link


-class Page(BasePage):
+class Page(BasePage, WikiBlameMixin):

     """Page: A MediaWiki page."""

diff --git a/pywikibot/page/_toolforge.py b/pywikibot/page/_toolforge.py
new file mode 100644
index 0000000..4dccfdb
--- /dev/null
+++ b/pywikibot/page/_toolforge.py
@@ -0,0 +1,110 @@
+"""Object representing interface to toolforge tools.
+
+.. versionadded:: 7.7
+"""
+#
+# (C) Pywikibot team, 2022
+#
+# Distributed under the terms of the MIT license.
+#
+import collections
+import re
+
+from typing import Optional
+
+import pywikibot
+
+from pywikibot import config
+
+
+class WikiBlameMixin:
+
+    """Page mixin for main authorship.
+
+    .. versionadded:: 7.7
+    """
+
+    #: Supported wikipedia site codes
+    WIKIBLAME_CODES = 'als', 'bar', 'de', 'en', 'it', 'nds', 'sco'
+
+    def _check_wh_supported(self):
+        """Check if WikiHistory is supported."""
+        if self.site.family.name != 'wikipedia':
+            raise NotImplementedError(
+                'main_authors method is implemented for wikipedia family only')
+
+        if self.site.code not in self.WIKIBLAME_CODES:
+            raise NotImplementedError(
+                'main_authors method is not implemented for wikipedia:{}'
+                .format(self.site.code))
+
+        if self.namespace() != pywikibot.site.Namespace.MAIN:
+            raise NotImplementedError(
+                'main_authors method is implemented for main namespace only')
+
+        if not self.exists():
+            raise pywikibot.exceptions.NoPageError(self)
+
+    def main_authors(self, *,
+                     onlynew: Optional[bool] = None) -> collections.Counter:
+        """Retrieve the 5 topmost main authors of an article.
+
+        This method uses WikiHistory to retrieve the text based main
+        authorship.
+
+        Sample:
+
+        >>> import pywikibot
+        >>> site = pywikibot.Site('wikipedia:nds')
+        >>> page = pywikibot.Page(site, 'Python (Programmeerspraak)')
+        >>> auth = page.main_authors(onlynew=False)
+        >>> auth
+        Counter({'RebeccaBreu': 99, 'Slomox': 1})
+
+        .. note:: Only implemented for main namespace pages.
+        .. note:: Only wikipedias of :attr:`WIKIBLAME_CODES` are supported.
+        .. seealso::
+           - https://wikihistory.toolforge.org
+           - https://de.wikipedia.org/wiki/Wikipedia:Technik/Cloud/wikihistory
+
+        :param onlynew: If False, use the cached values. If True,
+            calculate the Counter data which can take some time; it may
+            fail with TimeoutError after ``config.max_retries``. If None
+            it calculates new data like for True but uses data from
+            cache if new data cannot be calculated in meantime.
+        :return: Number of edits for each username
+        :raise NotImplementedError: unsupported site or unsupported namespace
+        :raise pywikibot.exceptions.NoPageError: The page does not exist
+        :raise pywikibot.exceptions.TimeoutError: Maximum retries exceeded
+        """
+        baseurl = 'https://wikihistory.toolforge.org'
+        pattern = (r'><bdi>(?P<author>.+?)</bdi></a>\s'
+                   r'\((?P<percent>\d{1,3})&')
+
+        self._check_wh_supported()
+
+        url = baseurl + '/wiki/getauthors.php?wiki={}wiki&page_id={}'.format(
+            self.site.code, self.pageid)
+        if onlynew:
+            url += '&onlynew=1'
+
+        for current_retries in range(config.max_retries):
+            r = pywikibot.comms.http.fetch(url)
+            if r.status_code != 200:
+                r.raise_for_status()
+
+            if 'Timeout' not in r.text:  # window.setTimeout in result
+                return collections.Counter(
+                    {user: int(cnt)
+                     for user, cnt in re.findall(pattern, r.text)})
+
+            delay = pywikibot.config.retry_wait * 2 ** current_retries
+            pywikibot.warning('WikiHistory timeout.\n'
+                              'Waiting {:.1f} seconds before retrying.'
+                              .format(delay))
+            pywikibot.sleep(delay)
+            if onlynew is None and current_retries >= config.max_retries - 2:
+                url += '&onlynew=1'
+
+        raise pywikibot.exceptions.TimeoutError(
+            'Maximum retries attempted without success.')
diff --git a/tests/__init__.py b/tests/__init__.py
index 82303c3..ccac997 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -133,6 +133,7 @@
     'user',
     'wikibase',
     'wikibase_edit',
+    'wikiblame',
     'wikistats',
     'xmlreader'
 }
diff --git a/tests/wikiblame_tests.py b/tests/wikiblame_tests.py
new file mode 100644
index 0000000..06f59b2
--- /dev/null
+++ b/tests/wikiblame_tests.py
@@ -0,0 +1,36 @@
+"""Tests for the WikiHistoryMixin."""
+#
+# (C) Pywikibot team, 2022
+#
+# Distributed under the terms of the MIT license.
+#
+import unittest
+
+from contextlib import suppress
+
+import pywikibot
+
+from tests.aspects import TestCase
+
+
+class TestWikiBlameMixin(TestCase):
+
+    """Test WikiBlameMixin using nds wiki."""
+
+    family = 'wikipedia'
+    code = 'nds'
+
+    def test_main_authors(self):
+        """Test main_authors() method."""
+        page = pywikibot.Page(self.site, 'Python (Programmeerspraak)')
+        auth = page.main_authors(onlynew=False)
+        self.assertLessEqual(len(auth), 5)
+        self.assertLessEqual(sum(auth.values()), 100)
+        user, value = auth.most_common(1)[0]
+        self.assertEqual(user, 'RebeccaBreu')
+        self.assertGreater(value, 0)
+
+
+if __name__ == '__main__':  # pragma: no cover
+    with suppress(SystemExit):
+        unittest.main()

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/699536
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I25f1b23cfaa88f02c1721a7032da4938da1777b7
Gerrit-Change-Number: 699536
Gerrit-PatchSet: 21
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: JJMC89 <[email protected]>
Gerrit-Reviewer: Wurgl <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: DannyS712 <[email protected]>
Gerrit-CC: Meno25 <[email protected]>
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to