jenkins-bot has submitted this change and it was merged.

Change subject: Convert weblinkchecker to requests
......................................................................


Convert weblinkchecker to requests

Bug: T113140
Change-Id: Ib9ec7c6294c73f88f562003bdcb30402e0d1fd55
---
M scripts/weblinkchecker.py
1 file changed, 26 insertions(+), 6 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index d1f0834..51bce05 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -128,7 +128,10 @@
 from pywikibot.pagegenerators import (
     XMLDumpPageGenerator as _XMLDumpPageGenerator,
 )
+from pywikibot.tools import deprecated
 from pywikibot.tools.formatter import color_format
+
+import requests
 
 # TODO: Convert to httlib2
 if sys.version_info[0] > 2:
@@ -268,6 +271,7 @@
     """The link is not an URL."""
 
 
+@deprecated('requests')
 class LinkChecker(object):
 
     """
@@ -537,7 +541,17 @@
         threading.Thread.__init__(self)
         self.page = page
         self.url = url
+        self._user_agent = comms.http.get_fake_user_agent()
         self.history = history
+        self.header = {
+            'User-agent': self._user_agent,
+            'Accept': 'text/xml,application/xml,application/xhtml+xml,'
+                      'text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
+            'Accept-Language': 'de-de,de;q=0.8,en-us;q=0.5,en;q=0.3',
+            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+            'Keep-Alive': '30',
+            'Connection': 'keep-alive',
+        }
         # identification for debugging purposes
         self.setName((u'%s - %s' % (page.title(), url)).encode('utf-8',
                                                                'replace'))
@@ -545,19 +559,24 @@
         self.day = day
 
     def run(self):
-        linkChecker = LinkChecker(self.url, HTTPignore=self.HTTPignore)
+        ok = False
         try:
-            ok, message = linkChecker.check()
-        except NotAnURLError:
-            ok = False
+            header = self.header
+            timeout = pywikibot.config.socket_timeout
+            r = requests.get(self.url, headers=header, timeout=timeout)
+        except requests.exceptions.InvalidURL:
             message = i18n.twtranslate(self.page.site,
                                        'weblinkchecker-badurl_msg',
                                        {'URL': self.url})
-
         except:
             pywikibot.output('Exception while processing URL %s in page %s'
                              % (self.url, self.page.title()))
             raise
+        if (r.status_code == requests.codes.ok and
+                str(r.status_code) not in self.HTTPignore):
+            ok = True
+        else:
+            message = '{0} {1}'.format(r.status_code, r.reason)
         if ok:
             if self.history.setLinkAlive(self.url):
                 pywikibot.output('*Link to %s in [[%s]] is back alive.'
@@ -871,8 +890,9 @@
     return i
 
 
+@deprecated('requests')
 def check(url):
-    """Peform a check on URL."""
+    """DEPRECATED: Use requests instead. Perform a check on URL."""
     c = LinkChecker(url)
     return c.check()
 

-- 
To view, visit https://gerrit.wikimedia.org/r/265398
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib9ec7c6294c73f88f562003bdcb30402e0d1fd55
Gerrit-PatchSet: 17
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: MtDu <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: MtDu <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to