jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1197011?usp=email )

Change subject: [Bugfix] Replace timetravel.mementoweb.org with web.archive.org
......................................................................

[Bugfix] Replace timetravel.mementoweb.org with web.archive.org

mementoweb.org is not reachable. Therefore:
- replace http://timetravel.mementoweb.org/timegate/ with
  https://web.archive.org/web/ and set it to new default time gate
- update tests
- update weblinkchecker.py
- update documentation

Bug: T400570
Bug: T407694
Change-Id: Iead2f5c5b81faa56d81986ddc6593ad2e5793344
---
M docs/api_ref/pywikibot.data.rst
M pyproject.toml
M pywikibot/data/memento.py
M scripts/weblinkchecker.py
M tests/memento_tests.py
5 files changed, 39 insertions(+), 32 deletions(-)

Approvals:
  jenkins-bot: Verified
  Xqt: Looks good to me, approved




diff --git a/docs/api_ref/pywikibot.data.rst b/docs/api_ref/pywikibot.data.rst
index e6612b6..4a5693d 100644
--- a/docs/api_ref/pywikibot.data.rst
+++ b/docs/api_ref/pywikibot.data.rst
@@ -23,6 +23,8 @@
 .. automodule:: data.memento
    :synopsis: Fix ups for memento-client package version 0.6.1
 
+.. autodata:: data.memento.DEFAULT_TIMEGATE_BASE_URI
+
 :mod:`data.mysql` --- Mysql Requests
 ====================================

diff --git a/pyproject.toml b/pyproject.toml
index d37f51c..9c305cd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -195,7 +195,7 @@


 [tool.rstcheck]
-ignore_directives = ["automodule", "autoclass", "autofunction", "tabs"]
+ignore_directives = ["automodule", "autoclass", "autodata", "autofunction", 
"tabs"]
 ignore_messages = '(Undefined substitution referenced: 
"(release|today|version)")'
 ignore_roles = ["api", "phab", "pylib", "source", "wiki"]

diff --git a/pywikibot/data/memento.py b/pywikibot/data/memento.py
index 4cb6cb0..97ef58b 100644
--- a/pywikibot/data/memento.py
+++ b/pywikibot/data/memento.py
@@ -1,6 +1,8 @@
 """Fix ups for memento-client package version 0.6.1.

 .. versionadded:: 7.4
+.. versionchanged:: 10.7
+   Set default timegate to :attr`DEFAULT_TIMEGATE_BASE_URI`
 .. seealso:: https://github.com/mementoweb/py-memento-client#readme
 """
 #
@@ -32,6 +34,10 @@
 )


+#: Default timegate; overrides the origin library setting.
+DEFAULT_TIMEGATE_BASE_URI: str = 'https://web.archive.org/web/'
+
+
 class MementoClient(OldMementoClient):

     """A Memento Client.
@@ -41,6 +47,8 @@

     .. versionchanged:: 7.4
        `timeout` is used in several methods.
+    .. versionchanged:: 10.7
+       Set default timegate to :attr`DEFAULT_TIMEGATE_BASE_URI`

     Basic usage:

@@ -50,7 +58,7 @@
     >>> mi['original_uri']
     'http://www.bbc.com/'
     >>> mi['timegate_uri']
-    'http://timetravel.mementoweb.org/timegate/http://www.bbc.com/'
+    'https://web.archive.org/web/http://www.bbc.com/'
     >>> sorted(mi['mementos'])
     ['closest', 'first', 'last', 'next', 'prev']
     >>> from pprint import pprint
@@ -67,32 +75,38 @@
      'prev': {'datetime': datetime.datetime(2009, 10, 15, 19, 7, 5),
               'uri': 
['http://wayback.nli.org.il:8080/20091015190705/http://www.bbc.com/']}}

-    The output conforms to the Memento API format explained here:
-    http://timetravel.mementoweb.org/guide/api/#memento-json
+    The output conforms to the Memento API format but its description at
+    http://timetravel.mementoweb.org/guide/api/#memento-json is no
+    longer available

     .. note:: The mementos result is not deterministic. It may be
        different for the same parameters.

-    By default, MementoClient uses the Memento Aggregator:
-    http://mementoweb.org/depot/
-
     It is also possible to use different TimeGate, simply initialize
-    with a preferred timegate base uri. Toggle check_native_timegate to
-    see if the original uri has its own timegate. The native timegate,
-    if found will be used instead of the timegate_uri preferred. If no
-    native timegate is found, the preferred timegate_uri will be used.
+    with a preferred timegate base uri. Toggle *check_native_timegate*
+    to see if the original uri has its own timegate. The native
+    timegate, if found will be used instead of the *timegate_uri*
+    preferred. If no native timegate is found, the preferred
+    *timegate_uri* will be used.
 
     :param str timegate_uri: A valid HTTP base uri for a timegate.
-        Must start with http(s):// and end with a /.
+        Must start with http(s):// and end with a /. Default is
+        :attr:`DEFAULT_TIMEGATE_BASE_URI`
+    :param bool check_native_timegate: If True, the client will first
+        check whether the original URI has a native TimeGate. If found,
+        the native TimeGate is used instead of the preferred
+        *timegate_uri*. If False, the preferred *timegate_uri* is always
+        used. Default is True.
     :param int max_redirects: the maximum number of redirects allowed
-        for all HTTP requests to be made.
+        for all HTTP requests to be made. Default is 30.
+    :param requests.Session|None session: a Session object
     :return: A :class:`MementoClient` obj.
     """  # noqa: E501, W505

     def __init__(self, *args, **kwargs) -> None:
         """Initializer."""
-        # To prevent documentation inclusion from inherited class
-        # because it is malformed.
+        if 'timegate_uri' not in kwargs and not args:
+            kwargs['timegate_uri'] = DEFAULT_TIMEGATE_BASE_URI
         super().__init__(*args, **kwargs)

     def get_memento_info(self, request_uri: str,
@@ -326,7 +340,7 @@
         datetime is used if none is provided.
     :param timegate_uri: A valid HTTP base uri for a timegate. Must
         start with http(s):// and end with a /. Default value is
-        http://timetravel.mementoweb.org/timegate/.
+        :attr:`DEFAULT_TIMEGATE_BASE_URI`.
     :param timeout: The timeout value for the HTTP connection. If None,
         a default value is used in :meth:`MementoClient.request_head`.
     """
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index 122e02c..4da1a67 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -175,17 +175,6 @@
 ]

 
-def get_archive_url(url):
-    """Get archive URL."""
-    try:
-        return get_closest_memento_url(
-            url, timegate_uri='http://web.archive.org/web/')
-    except Exception:
-        return get_closest_memento_url(
-            url,
-            timegate_uri='http://timetravel.mementoweb.org/webcite/timegate/')
-
-
 def weblinks_from_text(
     text,
     without_bracketed: bool = False,
@@ -410,7 +399,7 @@
                 if time_since_first_found > 60 * 60 * 24 * weblink_dead_days:
                     # search for archived page
                     try:
-                        archive_url = get_archive_url(url)
+                        archive_url = get_closest_memento_url(url)
                     except Exception as e:
                         pywikibot.warning(
                             f'get_closest_memento_url({url}) failed: {e}')
diff --git a/tests/memento_tests.py b/tests/memento_tests.py
index 8647906..f5d5269 100755
--- a/tests/memento_tests.py
+++ b/tests/memento_tests.py
@@ -39,10 +39,10 @@

 class TestMementoArchive(MementoTestCase):
 
-    """New WebCite Memento tests."""
+    """Web Archive Memento tests."""

-    timegate_uri = 'http://timetravel.mementoweb.org/timegate/'
-    hostname = timegate_uri.replace('gate/', 'map/json/http://google.com')
+    timegate_uri = 'https://web.archive.org/web/'
+    hostname = timegate_uri

     def test_newest(self) -> None:
         """Test Archive for an old https://google.com.""";
@@ -55,7 +55,7 @@

 class TestMementoDefault(MementoTestCase):

-    """Test InternetArchive is default Memento timegate."""
+    """Test Web Archive is default Memento timegate."""

     timegate_uri = None
     net = True
@@ -64,6 +64,8 @@
         """Test getting memento for newest https://google.com.""";
         archivedversion = self._get_archive_url('https://google.com')
         self.assertIsNotNone(archivedversion)
+        from pywikibot.data.memento import DEFAULT_TIMEGATE_BASE_URI
+        self.assertStartsWith(archivedversion, DEFAULT_TIMEGATE_BASE_URI)

     def test_invalid(self) -> None:
         """Test getting memento for invalid URL."""

--
To view, visit 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1197011?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Iead2f5c5b81faa56d81986ddc6593ad2e5793344
Gerrit-Change-Number: 1197011
Gerrit-PatchSet: 8
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: JJMC89 <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to