jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1197011?usp=email )
Change subject: [Bugfix] Replace timetravel.mementoweb.org with web.archive.org ...................................................................... [Bugfix] Replace timetravel.mementoweb.org with web.archive.org mementoweb.org is not reachable. Therefore: - replace http://timetravel.mementoweb.org/timegate/ with https://web.archive.org/web/ and set it to new default time gate - update tests - update weblinkchecker.py - update documentation Bug: T400570 Bug: T407694 Change-Id: Iead2f5c5b81faa56d81986ddc6593ad2e5793344 --- M docs/api_ref/pywikibot.data.rst M pyproject.toml M pywikibot/data/memento.py M scripts/weblinkchecker.py M tests/memento_tests.py 5 files changed, 39 insertions(+), 32 deletions(-) Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved diff --git a/docs/api_ref/pywikibot.data.rst b/docs/api_ref/pywikibot.data.rst index e6612b6..4a5693d 100644 --- a/docs/api_ref/pywikibot.data.rst +++ b/docs/api_ref/pywikibot.data.rst @@ -23,6 +23,8 @@ .. automodule:: data.memento :synopsis: Fix ups for memento-client package version 0.6.1 +.. autodata:: data.memento.DEFAULT_TIMEGATE_BASE_URI + :mod:`data.mysql` --- Mysql Requests ==================================== diff --git a/pyproject.toml b/pyproject.toml index d37f51c..9c305cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -195,7 +195,7 @@ [tool.rstcheck] -ignore_directives = ["automodule", "autoclass", "autofunction", "tabs"] +ignore_directives = ["automodule", "autoclass", "autodata", "autofunction", "tabs"] ignore_messages = '(Undefined substitution referenced: "(release|today|version)")' ignore_roles = ["api", "phab", "pylib", "source", "wiki"] diff --git a/pywikibot/data/memento.py b/pywikibot/data/memento.py index 4cb6cb0..97ef58b 100644 --- a/pywikibot/data/memento.py +++ b/pywikibot/data/memento.py @@ -1,6 +1,8 @@ """Fix ups for memento-client package version 0.6.1. .. versionadded:: 7.4 +.. versionchanged:: 10.7 + Set default timegate to :attr`DEFAULT_TIMEGATE_BASE_URI` .. seealso:: https://github.com/mementoweb/py-memento-client#readme """ # @@ -32,6 +34,10 @@ ) +#: Default timegate; overrides the origin library setting. +DEFAULT_TIMEGATE_BASE_URI: str = 'https://web.archive.org/web/' + + class MementoClient(OldMementoClient): """A Memento Client. @@ -41,6 +47,8 @@ .. versionchanged:: 7.4 `timeout` is used in several methods. + .. versionchanged:: 10.7 + Set default timegate to :attr`DEFAULT_TIMEGATE_BASE_URI` Basic usage: @@ -50,7 +58,7 @@ >>> mi['original_uri'] 'http://www.bbc.com/' >>> mi['timegate_uri'] - 'http://timetravel.mementoweb.org/timegate/http://www.bbc.com/' + 'https://web.archive.org/web/http://www.bbc.com/' >>> sorted(mi['mementos']) ['closest', 'first', 'last', 'next', 'prev'] >>> from pprint import pprint @@ -67,32 +75,38 @@ 'prev': {'datetime': datetime.datetime(2009, 10, 15, 19, 7, 5), 'uri': ['http://wayback.nli.org.il:8080/20091015190705/http://www.bbc.com/']}} - The output conforms to the Memento API format explained here: - http://timetravel.mementoweb.org/guide/api/#memento-json + The output conforms to the Memento API format but its description at + http://timetravel.mementoweb.org/guide/api/#memento-json is no + longer available .. note:: The mementos result is not deterministic. It may be different for the same parameters. - By default, MementoClient uses the Memento Aggregator: - http://mementoweb.org/depot/ - It is also possible to use different TimeGate, simply initialize - with a preferred timegate base uri. Toggle check_native_timegate to - see if the original uri has its own timegate. The native timegate, - if found will be used instead of the timegate_uri preferred. If no - native timegate is found, the preferred timegate_uri will be used. + with a preferred timegate base uri. Toggle *check_native_timegate* + to see if the original uri has its own timegate. The native + timegate, if found will be used instead of the *timegate_uri* + preferred. If no native timegate is found, the preferred + *timegate_uri* will be used. :param str timegate_uri: A valid HTTP base uri for a timegate. - Must start with http(s):// and end with a /. + Must start with http(s):// and end with a /. Default is + :attr:`DEFAULT_TIMEGATE_BASE_URI` + :param bool check_native_timegate: If True, the client will first + check whether the original URI has a native TimeGate. If found, + the native TimeGate is used instead of the preferred + *timegate_uri*. If False, the preferred *timegate_uri* is always + used. Default is True. :param int max_redirects: the maximum number of redirects allowed - for all HTTP requests to be made. + for all HTTP requests to be made. Default is 30. + :param requests.Session|None session: a Session object :return: A :class:`MementoClient` obj. """ # noqa: E501, W505 def __init__(self, *args, **kwargs) -> None: """Initializer.""" - # To prevent documentation inclusion from inherited class - # because it is malformed. + if 'timegate_uri' not in kwargs and not args: + kwargs['timegate_uri'] = DEFAULT_TIMEGATE_BASE_URI super().__init__(*args, **kwargs) def get_memento_info(self, request_uri: str, @@ -326,7 +340,7 @@ datetime is used if none is provided. :param timegate_uri: A valid HTTP base uri for a timegate. Must start with http(s):// and end with a /. Default value is - http://timetravel.mementoweb.org/timegate/. + :attr:`DEFAULT_TIMEGATE_BASE_URI`. :param timeout: The timeout value for the HTTP connection. If None, a default value is used in :meth:`MementoClient.request_head`. """ diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py index 122e02c..4da1a67 100755 --- a/scripts/weblinkchecker.py +++ b/scripts/weblinkchecker.py @@ -175,17 +175,6 @@ ] -def get_archive_url(url): - """Get archive URL.""" - try: - return get_closest_memento_url( - url, timegate_uri='http://web.archive.org/web/') - except Exception: - return get_closest_memento_url( - url, - timegate_uri='http://timetravel.mementoweb.org/webcite/timegate/') - - def weblinks_from_text( text, without_bracketed: bool = False, @@ -410,7 +399,7 @@ if time_since_first_found > 60 * 60 * 24 * weblink_dead_days: # search for archived page try: - archive_url = get_archive_url(url) + archive_url = get_closest_memento_url(url) except Exception as e: pywikibot.warning( f'get_closest_memento_url({url}) failed: {e}') diff --git a/tests/memento_tests.py b/tests/memento_tests.py index 8647906..f5d5269 100755 --- a/tests/memento_tests.py +++ b/tests/memento_tests.py @@ -39,10 +39,10 @@ class TestMementoArchive(MementoTestCase): - """New WebCite Memento tests.""" + """Web Archive Memento tests.""" - timegate_uri = 'http://timetravel.mementoweb.org/timegate/' - hostname = timegate_uri.replace('gate/', 'map/json/http://google.com') + timegate_uri = 'https://web.archive.org/web/' + hostname = timegate_uri def test_newest(self) -> None: """Test Archive for an old https://google.com.""" @@ -55,7 +55,7 @@ class TestMementoDefault(MementoTestCase): - """Test InternetArchive is default Memento timegate.""" + """Test Web Archive is default Memento timegate.""" timegate_uri = None net = True @@ -64,6 +64,8 @@ """Test getting memento for newest https://google.com.""" archivedversion = self._get_archive_url('https://google.com') self.assertIsNotNone(archivedversion) + from pywikibot.data.memento import DEFAULT_TIMEGATE_BASE_URI + self.assertStartsWith(archivedversion, DEFAULT_TIMEGATE_BASE_URI) def test_invalid(self) -> None: """Test getting memento for invalid URL.""" -- To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1197011?usp=email To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Change-Id: Iead2f5c5b81faa56d81986ddc6593ad2e5793344 Gerrit-Change-Number: 1197011 Gerrit-PatchSet: 8 Gerrit-Owner: Xqt <[email protected]> Gerrit-Reviewer: JJMC89 <[email protected]> Gerrit-Reviewer: Xqt <[email protected]> Gerrit-Reviewer: jenkins-bot
_______________________________________________ Pywikibot-commits mailing list -- [email protected] To unsubscribe send an email to [email protected]
