Xqt has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/810536 )
Change subject: [IMPR] Detach field handling out of treat_page_and_item method
......................................................................
[IMPR] Detach field handling out of treat_page_and_item method
Change-Id: If767efaa2ccc0cd379137a2e7ce1441df7cc3e9b
---
M scripts/harvest_template.py
1 file changed, 91 insertions(+), 80 deletions(-)
Approvals:
jenkins-bot: Verified
Xqt: Looks good to me, approved
diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py
index a225b8d..137cfd7 100755
--- a/scripts/harvest_template.py
+++ b/scripts/harvest_template.py
@@ -270,103 +270,114 @@
raise KeyboardInterrupt
templates = page.raw_extracted_templates
- for (template, fielddict) in templates:
+ for template, fielddict in templates:
# Clean up template
try:
- template = pywikibot.Page(page.site, template,
- ns=10).title(with_ns=False)
+ template = pywikibot.Page(page.site, template, ns=10)
except InvalidTitleError:
pywikibot.error(
- "Failed parsing template; '{}' should be "
+ 'Failed parsing template; {!r} should be '
'the template name.'.format(template))
continue
- if template not in self.templateTitles:
+ if template.title(with_ns=False) not in self.templateTitles:
continue
+
# We found the template we were looking for
- for field, value in fielddict.items():
- field = field.strip()
- # todo: extend the list of tags to ignore
- value = textlib.removeDisabledParts(
- # todo: eventually we may want to import the references
- value, tags=['ref'], site=page.site).strip()
- if not field or not value:
- continue
+ for field_item in fielddict.items():
+ self.treat_field(page, item, field_item)
- if field not in self.fields:
- continue
+ def treat_field(self, page, item, field_item):
+ """Process a single field of template fileddict."""
+ field, value = field_item
+ field = field.strip()
+ # todo: extend the list of tags to ignore
+ value = textlib.removeDisabledParts(
+ # todo: eventually we may want to import the references
+ value, tags=['ref'], site=page.site).strip()
- # This field contains something useful for us
- prop, options = self.fields[field]
- claim = pywikibot.Claim(self.repo, prop)
- exists_arg = self._get_option_with_fallback(options, 'exists')
- if claim.type == 'wikibase-item':
- do_multi = self._get_option_with_fallback(
- options, 'multi')
- matched = False
- # Try to extract a valid page
- for match in pywikibot.link_regex.finditer(value):
- matched = True
- link_text = match.group(1)
- linked_item = self.template_link_target(
- item, link_text)
- added = False
- if linked_item:
- claim.setTarget(linked_item)
- added = self.user_add_claim_unless_exists(
- item, claim, exists_arg, page.site,
- pywikibot.output)
- claim = pywikibot.Claim(self.repo, prop)
- # stop after the first match if not supposed to add
- # multiple values
- if not do_multi:
- break
- # update exists_arg, so we can add more values
- if 'p' not in exists_arg and added:
- exists_arg += 'p'
+ if not field or not value or field not in self.fields:
+ return
- if matched:
- continue
+ # This field contains something useful for us
+ prop, options = self.fields[field]
+ claim = pywikibot.Claim(self.repo, prop)
+ exists_arg = self._get_option_with_fallback(options, 'exists')
- if not self._get_option_with_fallback(options, 'islink'):
- pywikibot.output(
- '{} field {} value {} is not a wikilink. Skipping.'
- .format(claim.getID(), field, value))
- continue
+ if claim.type == 'wikibase-item':
+ do_multi = self._get_option_with_fallback(options, 'multi')
+ matched = False
- linked_item = self.template_link_target(item, value)
- if not linked_item:
- continue
+ # Try to extract a valid page
+ for match in pywikibot.link_regex.finditer(value):
+ matched = True
+ link_text = match.group(1)
+ linked_item = self.template_link_target(item, link_text)
+ added = False
+ if linked_item:
claim.setTarget(linked_item)
- elif claim.type in ('string', 'external-id'):
- claim.setTarget(value.strip())
- elif claim.type == 'url':
- match = self.linkR.search(value)
- if not match:
- continue
- claim.setTarget(match.group('url'))
- elif claim.type == 'commonsMedia':
- commonssite = pywikibot.Site('commons')
- imagelink = pywikibot.Link(
- value, source=commonssite, default_namespace=6)
- image = pywikibot.FilePage(imagelink)
- if image.isRedirectPage():
- image = pywikibot.FilePage(image.getRedirectTarget())
- if not image.exists():
- pywikibot.output(
- "{} doesn't exist. I can't link to it"
- .format(image.title(as_link=True)))
- continue
- claim.setTarget(image)
- else:
- pywikibot.output('{} is not a supported datatype.'
- .format(claim.type))
- continue
+ added = self.user_add_claim_unless_exists(
+ item, claim, exists_arg, page.site, pywikibot.info)
+ claim = pywikibot.Claim(self.repo, prop)
- # A generator might yield pages from multiple sites
- self.user_add_claim_unless_exists(
- item, claim, exists_arg, page.site, pywikibot.output)
+ # stop after the first match if not supposed to add
+ # multiple values
+ if not do_multi:
+ break
+
+ # update exists_arg, so we can add more values
+ if 'p' not in exists_arg and added:
+ exists_arg += 'p'
+
+ if matched:
+ return
+
+ if not self._get_option_with_fallback(options, 'islink'):
+ pywikibot.info(
+ '{} field {} value {} is not a wikilink. Skipping.'
+ .format(claim.getID(), field, value))
+ return
+
+ linked_item = self.template_link_target(item, value)
+ if not linked_item:
+ return
+
+ claim.setTarget(linked_item)
+
+ elif claim.type in ('string', 'external-id'):
+ claim.setTarget(value.strip())
+
+ elif claim.type == 'url':
+ match = self.linkR.search(value)
+ if not match:
+ return
+
+ claim.setTarget(match.group('url'))
+
+ elif claim.type == 'commonsMedia':
+ commonssite = pywikibot.Site('commons')
+ imagelink = pywikibot.Link(
+ value, source=commonssite, default_namespace=6)
+ image = pywikibot.FilePage(imagelink)
+ if image.isRedirectPage():
+ image = pywikibot.FilePage(image.getRedirectTarget())
+
+ if not image.exists():
+ pywikibot.info("{} doesn't exist. I can't link to it"
+ .format(image.title(as_link=True)))
+ return
+
+ claim.setTarget(image)
+
+ else:
+ pywikibot.info('{} is not a supported datatype.'
+ .format(claim.type))
+ return
+
+ # A generator might yield pages from multiple sites
+ self.user_add_claim_unless_exists(
+ item, claim, exists_arg, page.site, pywikibot.info)
def main(*args: str) -> None:
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/810536
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: If767efaa2ccc0cd379137a2e7ce1441df7cc3e9b
Gerrit-Change-Number: 810536
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: D3r1ck01 <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]