Matěj Suchánek has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/365281 )
Change subject: Support url datatype in harvest_template.py
......................................................................
Support url datatype in harvest_template.py
Use Pywikibot's native regex for matching external links.
Shame this is implemented in 2017.
Change-Id: I1645857a5eb8765d9eff1909f51b51035fb2396d
---
M scripts/harvest_template.py
1 file changed, 7 insertions(+), 1 deletion(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/81/365281/1
diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py
index b9954af..4957e39 100755
--- a/scripts/harvest_template.py
+++ b/scripts/harvest_template.py
@@ -52,7 +52,7 @@
signal.signal(signal.SIGINT, _signal_handler)
import pywikibot
-from pywikibot import pagegenerators as pg, WikidataBot
+from pywikibot import pagegenerators as pg, WikidataBot, textlib
docuReplacements = {'¶ms;': pywikibot.pagegenerators.parameterHelp}
@@ -80,6 +80,7 @@
self.fields = fields
self.cacheSources()
self.templateTitles = self.getTemplateSynonyms(self.templateTitle)
+ self.linkR = textlib.compileLinkR()
def getTemplateSynonyms(self, title):
"""Fetch redirects of the title, so we can check against them."""
@@ -185,6 +186,11 @@
claim.setTarget(linked_item)
elif claim.type in ('string', 'external-id'):
claim.setTarget(value.strip())
+ elif claim.type == 'url':
+ match = self.linkR.search(value)
+ if not match:
+ continue
+ claim.setTarget(match.group('url'))
elif claim.type == 'commonsMedia':
commonssite = pywikibot.Site('commons',
'commons')
--
To view, visit https://gerrit.wikimedia.org/r/365281
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I1645857a5eb8765d9eff1909f51b51035fb2396d
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Matěj Suchánek <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits