Xqt has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1225151?usp=email )

Change subject: Add WikiWho API support to pywikibot
......................................................................

Add WikiWho API support to pywikibot

This adds support for accessing WikiWho API to get token-level
provenance annotations for Wikipedia articles. The implementation
includes:

- New get_annotations() method to retrieve WikiWho data
- Support for 15 Wikipedia language editions (ar, de, en, es, eu,
  fr, hu, id, it, ja, nl, pl, pt, tr, zh)
- Helper methods for WikiWho API URL construction and validation
- Comprehensive test coverage for the new functionality

The WikiWho API provides token-level authorship information showing
who added each token in an article and when, which is useful for
article provenance analysis.

Bug: T414071
Change-Id: Id5024134d98ead21b9d34ce705aeaeb7669ccf85
---
M pywikibot/page/_toolforge.py
M tests/wikiblame_tests.py
2 files changed, 143 insertions(+), 0 deletions(-)

Approvals:
  Xqt: Verified; Looks good to me, approved




diff --git a/pywikibot/page/_toolforge.py b/pywikibot/page/_toolforge.py
index e6c5820..d00d983 100644
--- a/pywikibot/page/_toolforge.py
+++ b/pywikibot/page/_toolforge.py
@@ -11,7 +11,9 @@

 import collections
 import re
+import urllib.parse
 from http import HTTPStatus
+from typing import Any
 from warnings import warn

 import pywikibot
@@ -28,6 +30,12 @@
     #: Supported wikipedia site codes
     WIKIBLAME_CODES = 'als', 'bar', 'de', 'en', 'it', 'nds', 'sco'

+    #: Supported WikiWho API language codes
+    WIKIWHO_CODES = (
+        'ar', 'de', 'en', 'es', 'eu', 'fr', 'hu', 'id', 'it', 'ja', 'nl', 'pl',
+        'pt', 'tr', 'zh'
+    )
+
     def _check_wh_supported(self) -> None:
         """Check if WikiHistory is supported."""
         if self.site.family.name != 'wikipedia':
@@ -45,6 +53,45 @@
         if not self.exists():
             raise pywikibot.exceptions.NoPageError(self)

+    def _check_wikiwho_supported(self) -> None:
+        """Check if WikiWho API is supported.
+
+        .. versionadded:: 11.0
+
+        :raise NotImplementedError: unsupported site, language, or namespace
+        :raise NoPageError: page does not exist
+        """
+        if self.site.family.name != 'wikipedia':
+            raise NotImplementedError(
+                'WikiWho API is implemented for wikipedia family only')
+
+        if (code := self.site.code) not in self.WIKIWHO_CODES:
+            raise NotImplementedError(
+                f'WikiWho API is not implemented for wikipedia:{code}')
+
+        if (ns := self.namespace()) != 0:
+            raise NotImplementedError(
+                f'WikiWho API is not implemented for {ns} namespace')
+
+        if not self.exists():
+            raise pywikibot.exceptions.NoPageError(self)
+
+    def _build_wikiwho_url(self, endpoint: str) -> str:
+        """Build WikiWho API URL for the given endpoint.
+
+        .. versionadded:: 11.0
+
+        :param endpoint: API endpoint (all_content, rev_content,
+            edit_persistence)
+        :return: Complete API URL
+        """
+        article_title = self.title(with_ns=False, with_section=False)
+        encoded_title = urllib.parse.quote(article_title, safe='')
+        base_url = 'https://wikiwho-api.wmcloud.org'
+        url = (f'{base_url}/{self.site.code}/api/v1.0.0-beta/{endpoint}/'
+               f'{encoded_title}/')
+        return url
+
     @deprecated('authorsship', since='9.3.0')
     @deprecated_args(onlynew=None)  # since 9.2.0
     def main_authors(self) -> collections.Counter[str, int]:
@@ -207,3 +254,56 @@
                 break

         return {user: (chars, percent) for user, chars, percent in result}
+
+    def get_annotations(self) -> dict[str, Any]:
+        """Get WikiWho annotations for article revisions.
+
+        This method uses the public WikiWho API to get token-level
+        provenance annotations showing who added each token in the article.
+
+        Sample:
+
+        >>> import pywikibot
+        >>> site = pywikibot.Site('wikipedia:en')
+        >>> page = pywikibot.Page(site, 'Python (programming language)')
+        >>> data = page.get_annotations()  # doctest: +SKIP
+        >>> data['article_title']  # doctest: +SKIP
+        'Python (programming language)'
+
+        .. important:: Only implemented for main namespace pages and only
+           Wikipedias of :attr:`WIKIWHO_CODES` are supported.
+        .. versionadded:: 11.0
+        .. seealso::
+           - https://wikiwho-api.wmcloud.org
+           - https://www.mediawiki.org/wiki/WikiWho
+
+        :return: Dictionary containing article_title, page_id, and revisions
+            with token-level annotations
+
+        :raise NotImplementedError: unsupported site, language, or namespace
+        :raise NoPageError: page does not exist
+        :raise pywikibot.exceptions.ServerError: WikiWho API error
+        :raise requests.exceptions.HTTPError: HTTP error from WikiWho API
+        """
+        self._check_wikiwho_supported()
+
+        url = self._build_wikiwho_url('all_content')
+        url = f'{url}?editor=true&o_rev_id=true'
+
+        r = pywikibot.comms.http.fetch(url)
+
+        if r.status_code != HTTPStatus.OK:
+            r.raise_for_status()
+
+        try:
+            data = r.json()
+        except Exception as e:
+            raise pywikibot.exceptions.ServerError(
+                f'Failed to parse WikiWho API response: {e}')
+
+        if 'Error' in data or 'error' in data:
+            error_msg = data.get('Error') or data.get('error', 'Unknown error')
+            raise pywikibot.exceptions.ServerError(
+                f'WikiWho API error: {error_msg}')
+
+        return data
diff --git a/tests/wikiblame_tests.py b/tests/wikiblame_tests.py
index ced4a57..b4d3e30 100644
--- a/tests/wikiblame_tests.py
+++ b/tests/wikiblame_tests.py
@@ -63,6 +63,49 @@
                 self.assertGreaterEqual(chars, 100)
                 self.assertGreaterEqual(pct, 5.0)

+    def test_wikiwho_exceptions(self) -> None:
+        """Test that get_annotations fails for unsupported configurations."""
+        en_site = pywikibot.Site('wikipedia:en')
+        page = pywikibot.Page(en_site, 'NonExistentPageXYZ123')
+        with self.assertRaisesRegex(pywikibot.exceptions.NoPageError,
+                                    "doesn't exist"):
+            page.get_annotations()
+
+        page = pywikibot.Page(en_site, 'Talk:Wikipedia')
+        with self.assertRaisesRegex(
+            NotImplementedError,
+                'WikiWho API is not implemented for Talk: namespace'):
+            page.get_annotations()
+
+        page = pywikibot.Page(pywikibot.Site('wikipedia:ru'),
+                              'Python')
+        with self.assertRaisesRegex(
+            NotImplementedError,
+                'WikiWho API is not implemented for wikipedia:ru'):
+            page.get_annotations()
+
+    def test_wikiwho_url_construction(self) -> None:
+        """Test WikiWho URL construction."""
+        page = pywikibot.Page(pywikibot.Site('wikipedia:en'), 'Test')
+        url = page._build_wikiwho_url('all_content')
+        expected = ('https://wikiwho-api.wmcloud.org/en/api/v1.0.0-beta/'
+                    'all_content/Test/')
+        self.assertEqual(url, expected)
+
+        page = pywikibot.Page(pywikibot.Site('wikipedia:en'),
+                              'Python (programming language)')
+        url = page._build_wikiwho_url('all_content')
+        self.assertIn('Python%20%28programming%20language%29', url)
+
+    def test_wikiwho_supported_languages(self) -> None:
+        """Test that WIKIWHO_CODES contains expected languages."""
+        from pywikibot.page._toolforge import WikiBlameMixin
+        codes = WikiBlameMixin.WIKIWHO_CODES
+        expected_langs = ['ar', 'de', 'en', 'es', 'eu', 'fr', 'hu', 'id',
+                          'it', 'ja', 'nl', 'pl', 'pt', 'tr', 'zh']
+        for lang in expected_langs:
+            self.assertIn(lang, codes)
+

 if __name__ == '__main__':
     with suppress(SystemExit):

--
To view, visit 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1225151?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Id5024134d98ead21b9d34ce705aeaeb7669ccf85
Gerrit-Change-Number: 1225151
Gerrit-PatchSet: 4
Gerrit-Owner: Harshita2208 <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to