jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/818135 )

Change subject: [bugfix]: fix partial caching in Category.subcategories()
......................................................................

[bugfix]: fix partial caching in Category.subcategories()

Partial caching in category.subcategories() leads to incomplete data
in subsequent calls.

Changes:
- save api calls for categories with no subcategories, based on
  cat.categoryinfo['subcats'] info.

  At the expense of an extra api call for the base category, for a
  cleaner design.
  Subcategories have categoryinfo already cached (no extra api calls
  needed).

- consider cache valid only if all subcategories have been fetched.
  this is guaranteed only if total=none.

- also invalidate cache if called with content=true, after chache is
  created with content=false in previous calls.

Bug: T88217
Change-Id: Ifdd4ae5eb1e78764b8d1aaaa286004bcb7705760
---
M pywikibot/page/_pages.py
M tests/category_tests.py
2 files changed, 70 insertions(+), 6 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py
index 9537cd8..b5ebb4a 100644
--- a/pywikibot/page/_pages.py
+++ b/pywikibot/page/_pages.py
@@ -2390,15 +2390,23 @@
         :param content: if True, retrieve the content of the current version
             of each category description page (default False)
         """
+
+        def is_cache_valid(cache: dict, content: bool) -> bool:
+            return cache['content'] or not content
+
+        if not self.categoryinfo['subcats']:
+            return
+
         if not isinstance(recurse, bool) and recurse:
             recurse = recurse - 1

-        if not hasattr(self, '_subcats'):
-            self._subcats = []
-            for member in self.site.categorymembers(
+        if (not hasattr(self, '_subcats')
+                or not is_cache_valid(self._subcats, content)):
+            cache = {'data': [], 'content': content}
+
+            for subcat in self.site.categorymembers(
                     self, member_type='subcat', total=total, content=content):
-                subcat = Category(member)
-                self._subcats.append(subcat)
+                cache['data'].append(subcat)
                 yield subcat
                 if total is not None:
                     total -= 1
@@ -2415,8 +2423,11 @@
                         total -= 1
                         if total == 0:
                             return
+            else:
+                # cache is valid only if all subcategories are fetched (T88217)
+                self._subcats = cache
         else:
-            for subcat in self._subcats:
+            for subcat in self._subcats['data']:
                 yield subcat
                 if total is not None:
                     total -= 1
diff --git a/tests/category_tests.py b/tests/category_tests.py
index 2e947c2..3bd8f1f 100755
--- a/tests/category_tests.py
+++ b/tests/category_tests.py
@@ -103,9 +103,62 @@
         self.assertIn(c1, subcategories)
         self.assertNotIn(c2, subcategories)

+        cat = pywikibot.Category(site, 'Category:Wikipedians by gender')
         subcategories_total = list(cat.subcategories(total=2))
         self.assertLength(subcategories_total, 2)

+    def test_subcategories_cache_length(self):
+        """Test the subcategories cache length."""
+        site = self.get_site()
+
+        # test cache is valid only if all members of cat are iterated.
+        cat = pywikibot.Category(site, 'Category:Wikipedians by gender')
+        subcategories = list(cat.subcategories(total=2))
+        self.assertLength(subcategories, 2)
+        self.assertFalse(hasattr(cat, '_subcats'))
+
+        subcategories = list(cat.subcategories())
+        self.assertGreater(len(subcategories), 2)
+        self.assertTrue(hasattr(cat, '_subcats'))
+
+        # new cat, no cached data.
+        cat = pywikibot.Category(site, 'Category:Wikipedians by gender')
+
+        # cache not available yet due to partial iteration.
+        gen = cat.subcategories()
+        _ = next(gen)
+        self.assertFalse(hasattr(cat, '_subcats'))
+
+        # cache available.
+        _ = list(gen)
+        self.assertTrue(hasattr(cat, '_subcats'))
+
+    def test_subcategories_cache_content(self):
+        """Test the subcategories cache content."""
+        site = self.get_site()
+        cat = pywikibot.Category(site, 'Category:Wikipedians by gender')
+
+        subcategories = list(cat.subcategories(content=False))
+        cache_id_1 = id(cat._subcats)
+        cache_len_1 = len(subcategories)
+        subcat = subcategories[0]
+        self.assertFalse(subcat.has_content())
+
+        # toggle content.
+        subcategories = list(cat.subcategories(content=True))
+        cache_len_2 = len(subcategories)
+        cache_id_2 = id(cat._subcats)
+        subcat = subcategories[0]
+        self.assertTrue(subcat.has_content())
+        # cache reloaded.
+        self.assertNotEqual(cache_id_1, cache_id_2)
+        self.assertTrue(cache_len_1, cache_len_2)
+
+        # cache not reloaded.
+        _ = list(cat.subcategories(content=True))
+        cache_id_3 = id(cat._subcats)
+        self.assertEqual(cache_id_2, cache_id_3)
+
     def test_subcategories_recurse(self):
         """Test the subcategories method with recurse=True."""
         site = self.get_site()

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/818135
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ifdd4ae5eb1e78764b8d1aaaa286004bcb7705760
Gerrit-Change-Number: 818135
Gerrit-PatchSet: 6
Gerrit-Owner: Mpaa <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to