Xqt has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1225151?usp=email )
Change subject: Add WikiWho API support to pywikibot
......................................................................
Add WikiWho API support to pywikibot
This adds support for accessing WikiWho API to get token-level
provenance annotations for Wikipedia articles. The implementation
includes:
- New get_annotations() method to retrieve WikiWho data
- Support for 15 Wikipedia language editions (ar, de, en, es, eu,
fr, hu, id, it, ja, nl, pl, pt, tr, zh)
- Helper methods for WikiWho API URL construction and validation
- Comprehensive test coverage for the new functionality
The WikiWho API provides token-level authorship information showing
who added each token in an article and when, which is useful for
article provenance analysis.
Bug: T414071
Change-Id: Id5024134d98ead21b9d34ce705aeaeb7669ccf85
---
M pywikibot/page/_toolforge.py
M tests/wikiblame_tests.py
2 files changed, 143 insertions(+), 0 deletions(-)
Approvals:
Xqt: Verified; Looks good to me, approved
diff --git a/pywikibot/page/_toolforge.py b/pywikibot/page/_toolforge.py
index e6c5820..d00d983 100644
--- a/pywikibot/page/_toolforge.py
+++ b/pywikibot/page/_toolforge.py
@@ -11,7 +11,9 @@
import collections
import re
+import urllib.parse
from http import HTTPStatus
+from typing import Any
from warnings import warn
import pywikibot
@@ -28,6 +30,12 @@
#: Supported wikipedia site codes
WIKIBLAME_CODES = 'als', 'bar', 'de', 'en', 'it', 'nds', 'sco'
+ #: Supported WikiWho API language codes
+ WIKIWHO_CODES = (
+ 'ar', 'de', 'en', 'es', 'eu', 'fr', 'hu', 'id', 'it', 'ja', 'nl', 'pl',
+ 'pt', 'tr', 'zh'
+ )
+
def _check_wh_supported(self) -> None:
"""Check if WikiHistory is supported."""
if self.site.family.name != 'wikipedia':
@@ -45,6 +53,45 @@
if not self.exists():
raise pywikibot.exceptions.NoPageError(self)
+ def _check_wikiwho_supported(self) -> None:
+ """Check if WikiWho API is supported.
+
+ .. versionadded:: 11.0
+
+ :raise NotImplementedError: unsupported site, language, or namespace
+ :raise NoPageError: page does not exist
+ """
+ if self.site.family.name != 'wikipedia':
+ raise NotImplementedError(
+ 'WikiWho API is implemented for wikipedia family only')
+
+ if (code := self.site.code) not in self.WIKIWHO_CODES:
+ raise NotImplementedError(
+ f'WikiWho API is not implemented for wikipedia:{code}')
+
+ if (ns := self.namespace()) != 0:
+ raise NotImplementedError(
+ f'WikiWho API is not implemented for {ns} namespace')
+
+ if not self.exists():
+ raise pywikibot.exceptions.NoPageError(self)
+
+ def _build_wikiwho_url(self, endpoint: str) -> str:
+ """Build WikiWho API URL for the given endpoint.
+
+ .. versionadded:: 11.0
+
+ :param endpoint: API endpoint (all_content, rev_content,
+ edit_persistence)
+ :return: Complete API URL
+ """
+ article_title = self.title(with_ns=False, with_section=False)
+ encoded_title = urllib.parse.quote(article_title, safe='')
+ base_url = 'https://wikiwho-api.wmcloud.org'
+ url = (f'{base_url}/{self.site.code}/api/v1.0.0-beta/{endpoint}/'
+ f'{encoded_title}/')
+ return url
+
@deprecated('authorsship', since='9.3.0')
@deprecated_args(onlynew=None) # since 9.2.0
def main_authors(self) -> collections.Counter[str, int]:
@@ -207,3 +254,56 @@
break
return {user: (chars, percent) for user, chars, percent in result}
+
+ def get_annotations(self) -> dict[str, Any]:
+ """Get WikiWho annotations for article revisions.
+
+ This method uses the public WikiWho API to get token-level
+ provenance annotations showing who added each token in the article.
+
+ Sample:
+
+ >>> import pywikibot
+ >>> site = pywikibot.Site('wikipedia:en')
+ >>> page = pywikibot.Page(site, 'Python (programming language)')
+ >>> data = page.get_annotations() # doctest: +SKIP
+ >>> data['article_title'] # doctest: +SKIP
+ 'Python (programming language)'
+
+ .. important:: Only implemented for main namespace pages and only
+ Wikipedias of :attr:`WIKIWHO_CODES` are supported.
+ .. versionadded:: 11.0
+ .. seealso::
+ - https://wikiwho-api.wmcloud.org
+ - https://www.mediawiki.org/wiki/WikiWho
+
+ :return: Dictionary containing article_title, page_id, and revisions
+ with token-level annotations
+
+ :raise NotImplementedError: unsupported site, language, or namespace
+ :raise NoPageError: page does not exist
+ :raise pywikibot.exceptions.ServerError: WikiWho API error
+ :raise requests.exceptions.HTTPError: HTTP error from WikiWho API
+ """
+ self._check_wikiwho_supported()
+
+ url = self._build_wikiwho_url('all_content')
+ url = f'{url}?editor=true&o_rev_id=true'
+
+ r = pywikibot.comms.http.fetch(url)
+
+ if r.status_code != HTTPStatus.OK:
+ r.raise_for_status()
+
+ try:
+ data = r.json()
+ except Exception as e:
+ raise pywikibot.exceptions.ServerError(
+ f'Failed to parse WikiWho API response: {e}')
+
+ if 'Error' in data or 'error' in data:
+ error_msg = data.get('Error') or data.get('error', 'Unknown error')
+ raise pywikibot.exceptions.ServerError(
+ f'WikiWho API error: {error_msg}')
+
+ return data
diff --git a/tests/wikiblame_tests.py b/tests/wikiblame_tests.py
index ced4a57..b4d3e30 100644
--- a/tests/wikiblame_tests.py
+++ b/tests/wikiblame_tests.py
@@ -63,6 +63,49 @@
self.assertGreaterEqual(chars, 100)
self.assertGreaterEqual(pct, 5.0)
+ def test_wikiwho_exceptions(self) -> None:
+ """Test that get_annotations fails for unsupported configurations."""
+ en_site = pywikibot.Site('wikipedia:en')
+ page = pywikibot.Page(en_site, 'NonExistentPageXYZ123')
+ with self.assertRaisesRegex(pywikibot.exceptions.NoPageError,
+ "doesn't exist"):
+ page.get_annotations()
+
+ page = pywikibot.Page(en_site, 'Talk:Wikipedia')
+ with self.assertRaisesRegex(
+ NotImplementedError,
+ 'WikiWho API is not implemented for Talk: namespace'):
+ page.get_annotations()
+
+ page = pywikibot.Page(pywikibot.Site('wikipedia:ru'),
+ 'Python')
+ with self.assertRaisesRegex(
+ NotImplementedError,
+ 'WikiWho API is not implemented for wikipedia:ru'):
+ page.get_annotations()
+
+ def test_wikiwho_url_construction(self) -> None:
+ """Test WikiWho URL construction."""
+ page = pywikibot.Page(pywikibot.Site('wikipedia:en'), 'Test')
+ url = page._build_wikiwho_url('all_content')
+ expected = ('https://wikiwho-api.wmcloud.org/en/api/v1.0.0-beta/'
+ 'all_content/Test/')
+ self.assertEqual(url, expected)
+
+ page = pywikibot.Page(pywikibot.Site('wikipedia:en'),
+ 'Python (programming language)')
+ url = page._build_wikiwho_url('all_content')
+ self.assertIn('Python%20%28programming%20language%29', url)
+
+ def test_wikiwho_supported_languages(self) -> None:
+ """Test that WIKIWHO_CODES contains expected languages."""
+ from pywikibot.page._toolforge import WikiBlameMixin
+ codes = WikiBlameMixin.WIKIWHO_CODES
+ expected_langs = ['ar', 'de', 'en', 'es', 'eu', 'fr', 'hu', 'id',
+ 'it', 'ja', 'nl', 'pl', 'pt', 'tr', 'zh']
+ for lang in expected_langs:
+ self.assertIn(lang, codes)
+
if __name__ == '__main__':
with suppress(SystemExit):
--
To view, visit
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1225151?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Id5024134d98ead21b9d34ce705aeaeb7669ccf85
Gerrit-Change-Number: 1225151
Gerrit-PatchSet: 4
Gerrit-Owner: Harshita2208 <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]