jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/561697 )

Change subject: [FEAT] Add ability to preload categories
......................................................................

[FEAT] Add ability to preload categories

Bug: T241689
Change-Id: If855e98008e5f13e0a087af66de5d4d09511198e
---
M pywikibot/data/api/_generators.py
M pywikibot/page/_pages.py
M pywikibot/site/_generators.py
M tests/site_tests.py
4 files changed, 55 insertions(+), 14 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/data/api/_generators.py 
b/pywikibot/data/api/_generators.py
index f4802f0..21b39c4 100644
--- a/pywikibot/data/api/_generators.py
+++ b/pywikibot/data/api/_generators.py
@@ -914,21 +914,30 @@

 def _update_templates(page, templates) -> None:
     """Update page templates."""
-    templ_pages = [pywikibot.Page(page.site, tl['title']) for tl in templates]
+    templ_pages = {pywikibot.Page(page.site, tl['title']) for tl in templates}
     if hasattr(page, '_templates'):
-        page._templates.extend(templ_pages)
+        page._templates |= templ_pages
     else:
         page._templates = templ_pages


+def _update_categories(page, categories):
+    """Update page categories."""
+    cat_pages = {pywikibot.Page(page.site, ct['title']) for ct in categories}
+    if hasattr(page, '_categories'):
+        page._categories |= cat_pages
+    else:
+        page._categories = cat_pages
+
+
 def _update_langlinks(page, langlinks) -> None:
     """Update page langlinks."""
-    links = [pywikibot.Link.langlinkUnsafe(link['lang'], link['*'],
+    links = {pywikibot.Link.langlinkUnsafe(link['lang'], link['*'],
                                            source=page.site)
-             for link in langlinks]
+             for link in langlinks}

     if hasattr(page, '_langlinks'):
-        page._langlinks.extend(links)
+        page._langlinks |= links
     else:
         page._langlinks = links

@@ -996,12 +1005,17 @@
     if 'templates' in pagedict:
         _update_templates(page, pagedict['templates'])
     elif 'templates' in props:
-        page._templates = []
+        page._templates = set()
+
+    if 'categories' in pagedict:
+        _update_categories(page, pagedict['categories'])
+    elif 'categories' in props:
+        page._categories = set()

     if 'langlinks' in pagedict:
         _update_langlinks(page, pagedict['langlinks'])
     elif 'langlinks' in props:
-        page._langlinks = []
+        page._langlinks = set()

     if 'coordinates' in pagedict:
         _update_coordinates(page, pagedict['coordinates'])
diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py
index 1ead289..1df86a1 100644
--- a/pywikibot/page/_pages.py
+++ b/pywikibot/page/_pages.py
@@ -31,7 +31,7 @@

 import pywikibot
 from pywikibot import Timestamp, config, date, i18n, textlib
-from pywikibot.backports import Generator, Iterable, List
+from pywikibot.backports import Generator, Iterable, Iterator, List
 from pywikibot.cosmetic_changes import CANCEL, CosmeticChangesToolkit
 from pywikibot.exceptions import (
     Error,
@@ -1492,10 +1492,10 @@
         # this list if the method was called with include_obsolete=False
         # (which is the default)
         if not hasattr(self, '_langlinks'):
-            self._langlinks = list(self.iterlanglinks(include_obsolete=True))
+            self._langlinks = set(self.iterlanglinks(include_obsolete=True))

         if include_obsolete:
-            return self._langlinks
+            return list(self._langlinks)
         return [i for i in self._langlinks if not i.site.obsolete]

     def iterlanglinks(self,
@@ -1526,7 +1526,7 @@
         """
         return pywikibot.ItemPage.fromPage(self)

-    def templates(self, content: bool = False):
+    def templates(self, content: bool = False) -> List['pywikibot.Page']:
         """
         Return a list of Page objects for templates used on this Page.

@@ -1546,9 +1546,9 @@
             del self._templates

         if not hasattr(self, '_templates'):
-            self._templates = list(self.itertemplates(content=content))
+            self._templates = set(self.itertemplates(content=content))

-        return self._templates
+        return list(self._templates)

     def itertemplates(self,
                       total: Optional[int] = None,
@@ -1584,7 +1584,7 @@
     def categories(self,
                    with_sort_key: bool = False,
                    total: Optional[int] = None,
-                   content: bool = False):
+                   content: bool = False) -> Iterator['pywikibot.Page']:
         """
         Iterate categories that the article is in.

@@ -1599,6 +1599,15 @@
         if with_sort_key:
             raise NotImplementedError('with_sort_key is not implemented')

+        # Data might have been preloaded
+        # Delete cache if content is needed and elements have no content
+        if hasattr(self, '_categories'):
+            if (content
+                    and not all(c.has_content() for c in self._categories)):
+                del self._categories
+            else:
+                return itertools.islice(self._categories, total)
+
         return self.site.pagecategories(self, total=total, content=content)

     def extlinks(self, total: Optional[int] = None):
diff --git a/pywikibot/site/_generators.py b/pywikibot/site/_generators.py
index aad1b86..41e9f2c 100644
--- a/pywikibot/site/_generators.py
+++ b/pywikibot/site/_generators.py
@@ -94,6 +94,7 @@
         templates: bool = False,
         langlinks: bool = False,
         pageprops: bool = False,
+        categories: bool = False,
         content: bool = True
     ):
         """Return a generator to a list of preloaded pages.
@@ -108,6 +109,7 @@
         :param langlinks: preload all language links from the provided pages
             to other languages
         :param pageprops: preload various properties defined in page content
+        @param categories: preload page categories
         :param content: preload page content
         """
         props = 'revisions|info|categoryinfo'
@@ -117,6 +119,8 @@
             props += '|langlinks'
         if pageprops:
             props += '|pageprops'
+        if categories:
+            props += '|categories'

         for sublist in itergroup(pagelist, min(groupsize, self.maxlimit)):
             # Do not use p.pageid property as it will force page loading.
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 32ae187..b22c0df 100755
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -3183,6 +3183,20 @@
             if count >= 5:
                 break

+    def test_preload_categories(self):
+        """Test preloading categories works."""
+        mysite = self.get_site()
+        cats = mysite.randompages(total=10, namespaces=14)
+        gen = mysite.preloadpages(cats, categories=True)
+        for count, page in enumerate(gen):
+            with self.subTest(page=page.title()):
+                self.assertTrue(hasattr(page, '_categories'))
+                # content=True will bypass cache
+                self.assertEqual(page._categories,
+                                 set(page.categories(content=True)))
+            if count >= 5:
+                break
+
     def test_preload_content(self):
         """Test preloading templates and langlinks works."""
         mysite = self.get_site()

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/561697
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: If855e98008e5f13e0a087af66de5d4d09511198e
Gerrit-Change-Number: 561697
Gerrit-PatchSet: 8
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Dvorapa <[email protected]>
Gerrit-Reviewer: Erutuon <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Mpaa <[email protected]>
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to