[Pywikibot-commits] [Gerrit] ...core[master]: [IMPR] Detach field handling out of treat_page_and_item method

Xqt (Code Review) Sun, 03 Jul 2022 03:32:59 -0700

Xqt has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/810536 )


Change subject: [IMPR] Detach field handling out of treat_page_and_item method
......................................................................

[IMPR] Detach field handling out of treat_page_and_item method

Change-Id: If767efaa2ccc0cd379137a2e7ce1441df7cc3e9b
---
M scripts/harvest_template.py
1 file changed, 91 insertions(+), 80 deletions(-)

Approvals:
  jenkins-bot: Verified
  Xqt: Looks good to me, approved



diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py
index a225b8d..137cfd7 100755
--- a/scripts/harvest_template.py
+++ b/scripts/harvest_template.py
@@ -270,103 +270,114 @@
             raise KeyboardInterrupt

         templates = page.raw_extracted_templates
-        for (template, fielddict) in templates:
+        for template, fielddict in templates:
             # Clean up template
             try:
-                template = pywikibot.Page(page.site, template,
-                                          ns=10).title(with_ns=False)
+                template = pywikibot.Page(page.site, template, ns=10)
             except InvalidTitleError:
                 pywikibot.error(
-                    "Failed parsing template; '{}' should be "
+                    'Failed parsing template; {!r} should be '
                     'the template name.'.format(template))
                 continue

-            if template not in self.templateTitles:
+            if template.title(with_ns=False) not in self.templateTitles:
                 continue
+
             # We found the template we were looking for
-            for field, value in fielddict.items():
-                field = field.strip()
-                # todo: extend the list of tags to ignore
-                value = textlib.removeDisabledParts(
-                    # todo: eventually we may want to import the references
-                    value, tags=['ref'], site=page.site).strip()
-                if not field or not value:
-                    continue
+            for field_item in fielddict.items():
+                self.treat_field(page, item, field_item)

-                if field not in self.fields:
-                    continue
+    def treat_field(self, page, item, field_item):
+        """Process a single field of template fileddict."""
+        field, value = field_item
+        field = field.strip()
+        # todo: extend the list of tags to ignore
+        value = textlib.removeDisabledParts(
+            # todo: eventually we may want to import the references
+            value, tags=['ref'], site=page.site).strip()

-                # This field contains something useful for us
-                prop, options = self.fields[field]
-                claim = pywikibot.Claim(self.repo, prop)
-                exists_arg = self._get_option_with_fallback(options, 'exists')
-                if claim.type == 'wikibase-item':
-                    do_multi = self._get_option_with_fallback(
-                        options, 'multi')
-                    matched = False
-                    # Try to extract a valid page
-                    for match in pywikibot.link_regex.finditer(value):
-                        matched = True
-                        link_text = match.group(1)
-                        linked_item = self.template_link_target(
-                            item, link_text)
-                        added = False
-                        if linked_item:
-                            claim.setTarget(linked_item)
-                            added = self.user_add_claim_unless_exists(
-                                item, claim, exists_arg, page.site,
-                                pywikibot.output)
-                            claim = pywikibot.Claim(self.repo, prop)
-                        # stop after the first match if not supposed to add
-                        # multiple values
-                        if not do_multi:
-                            break
-                        # update exists_arg, so we can add more values
-                        if 'p' not in exists_arg and added:
-                            exists_arg += 'p'
+        if not field or not value or field not in self.fields:
+            return

-                    if matched:
-                        continue
+        # This field contains something useful for us
+        prop, options = self.fields[field]
+        claim = pywikibot.Claim(self.repo, prop)
+        exists_arg = self._get_option_with_fallback(options, 'exists')

-                    if not self._get_option_with_fallback(options, 'islink'):
-                        pywikibot.output(
-                            '{} field {} value {} is not a wikilink. Skipping.'
-                            .format(claim.getID(), field, value))
-                        continue
+        if claim.type == 'wikibase-item':
+            do_multi = self._get_option_with_fallback(options, 'multi')
+            matched = False

-                    linked_item = self.template_link_target(item, value)
-                    if not linked_item:
-                        continue
+            # Try to extract a valid page
+            for match in pywikibot.link_regex.finditer(value):
+                matched = True
+                link_text = match.group(1)
+                linked_item = self.template_link_target(item, link_text)
+                added = False

+                if linked_item:
                     claim.setTarget(linked_item)
-                elif claim.type in ('string', 'external-id'):
-                    claim.setTarget(value.strip())
-                elif claim.type == 'url':
-                    match = self.linkR.search(value)
-                    if not match:
-                        continue
-                    claim.setTarget(match.group('url'))
-                elif claim.type == 'commonsMedia':
-                    commonssite = pywikibot.Site('commons')
-                    imagelink = pywikibot.Link(
-                        value, source=commonssite, default_namespace=6)
-                    image = pywikibot.FilePage(imagelink)
-                    if image.isRedirectPage():
-                        image = pywikibot.FilePage(image.getRedirectTarget())
-                    if not image.exists():
-                        pywikibot.output(
-                            "{} doesn't exist. I can't link to it"
-                            .format(image.title(as_link=True)))
-                        continue
-                    claim.setTarget(image)
-                else:
-                    pywikibot.output('{} is not a supported datatype.'
-                                     .format(claim.type))
-                    continue
+                    added = self.user_add_claim_unless_exists(
+                        item, claim, exists_arg, page.site, pywikibot.info)
+                    claim = pywikibot.Claim(self.repo, prop)

-                # A generator might yield pages from multiple sites
-                self.user_add_claim_unless_exists(
-                    item, claim, exists_arg, page.site, pywikibot.output)
+                # stop after the first match if not supposed to add
+                # multiple values
+                if not do_multi:
+                    break
+
+                # update exists_arg, so we can add more values
+                if 'p' not in exists_arg and added:
+                    exists_arg += 'p'
+
+            if matched:
+                return
+
+            if not self._get_option_with_fallback(options, 'islink'):
+                pywikibot.info(
+                    '{} field {} value {} is not a wikilink. Skipping.'
+                    .format(claim.getID(), field, value))
+                return
+
+            linked_item = self.template_link_target(item, value)
+            if not linked_item:
+                return
+
+            claim.setTarget(linked_item)
+
+        elif claim.type in ('string', 'external-id'):
+            claim.setTarget(value.strip())
+
+        elif claim.type == 'url':
+            match = self.linkR.search(value)
+            if not match:
+                return
+
+            claim.setTarget(match.group('url'))
+
+        elif claim.type == 'commonsMedia':
+            commonssite = pywikibot.Site('commons')
+            imagelink = pywikibot.Link(
+                value, source=commonssite, default_namespace=6)
+            image = pywikibot.FilePage(imagelink)
+            if image.isRedirectPage():
+                image = pywikibot.FilePage(image.getRedirectTarget())
+
+            if not image.exists():
+                pywikibot.info("{} doesn't exist. I can't link to it"
+                               .format(image.title(as_link=True)))
+                return
+
+            claim.setTarget(image)
+
+        else:
+            pywikibot.info('{} is not a supported datatype.'
+                           .format(claim.type))
+            return
+
+        # A generator might yield pages from multiple sites
+        self.user_add_claim_unless_exists(
+            item, claim, exists_arg, page.site, pywikibot.info)


 def main(*args: str) -> None:

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/810536
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: If767efaa2ccc0cd379137a2e7ce1441df7cc3e9b
Gerrit-Change-Number: 810536
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: D3r1ck01 <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged

_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[Pywikibot-commits] [Gerrit] ...core[master]: [IMPR] Detach field handling out of treat_page_and_item method

Reply via email to