jenkins-bot has submitted this change and it was merged.
Change subject: Convert weblinkchecker to requests
......................................................................
Convert weblinkchecker to requests
Bug: T113140
Change-Id: Ib9ec7c6294c73f88f562003bdcb30402e0d1fd55
---
M scripts/weblinkchecker.py
1 file changed, 26 insertions(+), 6 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index d1f0834..51bce05 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -128,7 +128,10 @@
from pywikibot.pagegenerators import (
XMLDumpPageGenerator as _XMLDumpPageGenerator,
)
+from pywikibot.tools import deprecated
from pywikibot.tools.formatter import color_format
+
+import requests
# TODO: Convert to httlib2
if sys.version_info[0] > 2:
@@ -268,6 +271,7 @@
"""The link is not an URL."""
+@deprecated('requests')
class LinkChecker(object):
"""
@@ -537,7 +541,17 @@
threading.Thread.__init__(self)
self.page = page
self.url = url
+ self._user_agent = comms.http.get_fake_user_agent()
self.history = history
+ self.header = {
+ 'User-agent': self._user_agent,
+ 'Accept': 'text/xml,application/xml,application/xhtml+xml,'
+ 'text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
+ 'Accept-Language': 'de-de,de;q=0.8,en-us;q=0.5,en;q=0.3',
+ 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+ 'Keep-Alive': '30',
+ 'Connection': 'keep-alive',
+ }
# identification for debugging purposes
self.setName((u'%s - %s' % (page.title(), url)).encode('utf-8',
'replace'))
@@ -545,19 +559,24 @@
self.day = day
def run(self):
- linkChecker = LinkChecker(self.url, HTTPignore=self.HTTPignore)
+ ok = False
try:
- ok, message = linkChecker.check()
- except NotAnURLError:
- ok = False
+ header = self.header
+ timeout = pywikibot.config.socket_timeout
+ r = requests.get(self.url, headers=header, timeout=timeout)
+ except requests.exceptions.InvalidURL:
message = i18n.twtranslate(self.page.site,
'weblinkchecker-badurl_msg',
{'URL': self.url})
-
except:
pywikibot.output('Exception while processing URL %s in page %s'
% (self.url, self.page.title()))
raise
+ if (r.status_code == requests.codes.ok and
+ str(r.status_code) not in self.HTTPignore):
+ ok = True
+ else:
+ message = '{0} {1}'.format(r.status_code, r.reason)
if ok:
if self.history.setLinkAlive(self.url):
pywikibot.output('*Link to %s in [[%s]] is back alive.'
@@ -871,8 +890,9 @@
return i
+@deprecated('requests')
def check(url):
- """Peform a check on URL."""
+ """DEPRECATED: Use requests instead. Perform a check on URL."""
c = LinkChecker(url)
return c.check()
--
To view, visit https://gerrit.wikimedia.org/r/265398
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ib9ec7c6294c73f88f562003bdcb30402e0d1fd55
Gerrit-PatchSet: 17
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: MtDu <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: MtDu <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits