jenkins-bot has submitted this change and it was merged.

Change subject: Preloading tests
......................................................................


Preloading tests

Adds tests for
- PropertyGenerator,
- APISite.preloadpages
- PreloadingGenerator

With several expected failures due to bugs with FIXME notes.

Bug: 73461
Change-Id: I11df5361bd2b67cce132faba252552bfa5053827
---
M pywikibot/site.py
M tests/api_tests.py
M tests/pagegenerators_tests.py
M tests/site_tests.py
4 files changed, 467 insertions(+), 20 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, but someone else must approve
  Mpaa: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/site.py b/pywikibot/site.py
index 799d99c..1242c70 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -2457,8 +2457,8 @@
                                 break
                         else:
                             pywikibot.warning(
-                                u"preloadpages: Query returned unexpected 
title"
-                                u"'%s'" % pagedata['title'])
+                                u"preloadpages: Query returned unexpected "
+                                u"title '%s'" % pagedata['title'])
                             continue
                 except KeyError:
                     pywikibot.debug(u"No 'title' in %s" % pagedata, _logger)
diff --git a/tests/api_tests.py b/tests/api_tests.py
index 5d6a07b..7c6c273 100644
--- a/tests/api_tests.py
+++ b/tests/api_tests.py
@@ -239,9 +239,9 @@
         self.assertIn('revisions', pi.prefixes)
 
 
-class TestPageGenerator(TestCase):
+class TestDryPageGenerator(TestCase):
 
-    """API PageGenerator object test class."""
+    """Dry API PageGenerator object test class."""
 
     family = 'wikipedia'
     code = 'en'
@@ -249,7 +249,7 @@
     dry = True
 
     def setUp(self):
-        super(TestPageGenerator, self).setUp()
+        super(TestDryPageGenerator, self).setUp()
         mysite = self.get_site()
         self.gen = api.PageGenerator(site=mysite,
                                      generator="links",
@@ -328,6 +328,134 @@
         self.assertEqual(len(results), 4)  # total=-1 but 4 expected
 
 
+class TestPropertyGenerator(TestCase):
+
+    """API PropertyGenerator object test class."""
+
+    family = 'wikipedia'
+    code = 'en'
+
+    def test_info(self):
+        mainpage = self.get_mainpage()
+        links = list(self.site.pagelinks(mainpage, total=10))
+        titles = [l.title(withSection=False)
+                  for l in links]
+        gen = api.PropertyGenerator(site=self.site,
+                                    prop="info",
+                                    titles='|'.join(titles))
+
+        count = 0
+        for pagedata in gen:
+            self.assertIsInstance(pagedata, dict)
+            self.assertIn('pageid', pagedata)
+            self.assertIn('lastrevid', pagedata)
+            count += 1
+        self.assertEqual(len(links), count)
+
+    def test_one_continuation(self):
+        mainpage = self.get_mainpage()
+        links = list(self.site.pagelinks(mainpage, total=10))
+        titles = [l.title(withSection=False)
+                  for l in links]
+        gen = api.PropertyGenerator(site=self.site,
+                                    prop="revisions",
+                                    titles='|'.join(titles))
+        gen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter
+
+        count = 0
+        for pagedata in gen:
+            self.assertIsInstance(pagedata, dict)
+            self.assertIn('pageid', pagedata)
+            self.assertIn('revisions', pagedata)
+            self.assertIn('revid', pagedata['revisions'][0])
+            count += 1
+        self.assertEqual(len(links), count)
+
+    def test_two_continuations(self):
+        mainpage = self.get_mainpage()
+        links = list(self.site.pagelinks(mainpage, total=10))
+        titles = [l.title(withSection=False)
+                  for l in links]
+        gen = api.PropertyGenerator(site=self.site,
+                                    prop="revisions|coordinates",
+                                    titles='|'.join(titles))
+        gen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter
+
+        count = 0
+        for pagedata in gen:
+            self.assertIsInstance(pagedata, dict)
+            self.assertIn('pageid', pagedata)
+            self.assertIn('revisions', pagedata)
+            self.assertIn('revid', pagedata['revisions'][0])
+            count += 1
+        self.assertEqual(len(links), count)
+
+    @unittest.expectedFailure
+    def test_many_continuations_limited(self):
+        mainpage = self.get_mainpage()
+        links = list(self.site.pagelinks(mainpage, total=30))
+        titles = [l.title(withSection=False)
+                  for l in links]
+        gen = api.PropertyGenerator(site=self.site,
+                                    
prop="revisions|info|categoryinfo|langlinks|templates",
+                                    
rvprop="ids|flags|timestamp|user|comment|content",
+                                    titles='|'.join(titles))
+
+        # An APIError is raised if set_maximum_items is not called.
+        gen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter
+        # Force the generator into continuation mode
+        gen.set_query_increment(5)
+
+        count = 0
+        for pagedata in gen:
+            self.assertIsInstance(pagedata, dict)
+            self.assertIn('pageid', pagedata)
+            count += 1
+        self.assertEqual(len(links), count)
+        # FIXME: AssertionError: 30 != 6150
+
+    @unittest.expectedFailure
+    def test_two_continuations_limited(self):
+        # FIXME: test fails
+        mainpage = self.get_mainpage()
+        links = list(self.site.pagelinks(mainpage, total=30))
+        titles = [l.title(withSection=False)
+                  for l in links]
+        gen = api.PropertyGenerator(site=self.site,
+                                    
prop="info|categoryinfo|langlinks|templates",
+                                    titles='|'.join(titles))
+        # Force the generator into continuation mode
+        gen.set_query_increment(5)
+
+        count = 0
+        for pagedata in gen:
+            self.assertIsInstance(pagedata, dict)
+            self.assertIn('pageid', pagedata)
+            count += 1
+        self.assertEqual(len(links), count)
+        # FIXME: AssertionError: 30 != 11550
+
+    # FIXME: test disabled as it takes longer than 10 minutes
+    def _test_two_continuations_limited_long_test(self):
+        """Long duration test, with total & step that are a real scenario."""
+        mainpage = self.get_mainpage()
+        links = list(mainpage.backlinks(total=300))
+        titles = [l.title(withSection=False)
+                  for l in links]
+        gen = api.PropertyGenerator(site=self.site,
+                                    
prop="info|categoryinfo|langlinks|templates",
+                                    titles='|'.join(titles))
+        # Force the generator into continuation mode
+        gen.set_query_increment(50)
+
+        count = 0
+        for pagedata in gen:
+            self.assertIsInstance(pagedata, dict)
+            self.assertIn('pageid', pagedata)
+            count += 1
+        self.assertEqual(len(links), count)
+
+
 class TestCachedRequest(DefaultSiteTestCase):
 
     """Test API Request caching.
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 81720d1..9e19bde 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -14,7 +14,10 @@
 import pywikibot
 from pywikibot import pagegenerators
 
-from pywikibot.pagegenerators import PagesFromTitlesGenerator
+from pywikibot.pagegenerators import (
+    PagesFromTitlesGenerator,
+    PreloadingGenerator,
+)
 
 from tests import _data_dir
 from tests.aspects import (
@@ -270,6 +273,41 @@
         self.assertPagelistTitles(titles, self.expected_titles[site.case()])
 
 
+class TestPreloadingGenerator(DefaultSiteTestCase):
+
+    """Test preloading generator on lists."""
+
+    def test_basic(self):
+        """Test PreloadingGenerator with a list of pages."""
+        mainpage = self.get_mainpage()
+        links = list(self.site.pagelinks(mainpage, total=10))
+        count = 0
+        for page in PreloadingGenerator(links, step=20):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+            count += 1
+        self.assertEqual(len(links), count)
+
+    def test_low_step(self):
+        """Test PreloadingGenerator with a list of pages."""
+        mainpage = self.get_mainpage()
+        links = list(self.site.pagelinks(mainpage, total=20))
+        count = 0
+        for page in PreloadingGenerator(links, step=10):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+            count += 1
+        self.assertEqual(len(links), count)
+
+
 class TestDequePreloadingGenerator(DefaultSiteTestCase):
 
     """Test preloading generator on lists."""
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 408c60b..262bfd7 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -280,20 +280,6 @@
         if a:
             self.assertEqual(a[0], mainpage)
 
-    def testPreload(self):
-        """Test that preloading works."""
-        mysite = self.get_site()
-        mainpage = self.get_mainpage()
-        count = 0
-        for page in mysite.preloadpages(mysite.pagelinks(mainpage, total=10)):
-            self.assertIsInstance(page, pywikibot.Page)
-            self.assertIsInstance(page.exists(), bool)
-            if page.exists():
-                self.assertTrue(hasattr(page, "_text"))
-            count += 1
-            if count >= 5:
-                break
-
     def testLinkMethods(self):
         """Test site methods for getting links to and from a page."""
         mysite = self.get_site()
@@ -1562,6 +1548,301 @@
             self.assertTrue(site.is_uploaddisabled())
 
 
+class TestPagePreloading(DefaultSiteTestCase):
+
+    """Test site.preloadpages()."""
+
+    def test_pageids(self):
+        """Test basic preloading with pageids."""
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        links = mysite.pagelinks(mainpage, total=10)
+        # preloadpages will send the page ids,
+        # as they have already been loaded by pagelinks
+        for page in mysite.preloadpages(links):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+            count += 1
+            if count >= 5:
+                break
+
+    def test_titles(self):
+        """Test basic preloading with titles."""
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        links = mysite.pagelinks(mainpage, total=10)
+
+        # remove the pageids that have already been loaded above by pagelinks
+        # so that preloadpages will use the titles instead
+        for page in links:
+            del page._pageid
+
+        for page in mysite.preloadpages(links):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+            count += 1
+            if count >= 5:
+                break
+
+    def test_preload_continuation(self):
+        """Test preloading continuation works."""
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        links = mysite.pagelinks(mainpage, total=10)
+        for page in mysite.preloadpages(links, groupsize=5):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+            count += 1
+            if count >= 6:
+                break
+
+    def test_preload_high_groupsize(self):
+        """Test preloading continuation with groupsize greater than total."""
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+
+        # Determine if there are enough links on the main page,
+        # for the test to be useful.
+        link_count = len(list(mysite.pagelinks(mainpage, total=10)))
+        if link_count < 2:
+            raise unittest.SkipTest('insufficient links on main page')
+
+        # get a fresh generator; we now know how many results it will have,
+        # if it is less than 10.
+        links = mysite.pagelinks(mainpage, total=10)
+        for page in mysite.preloadpages(links, groupsize=50):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+            count += 1
+        self.assertEqual(count, link_count)
+
+    def test_preload_low_groupsize(self):
+        """Test preloading continuation with groupsize greater than total."""
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+
+        # Determine if there are enough links on the main page,
+        # for the test to be useful.
+        link_count = len(list(mysite.pagelinks(mainpage, total=10)))
+        if link_count < 2:
+            raise unittest.SkipTest('insufficient links on main page')
+
+        # get a fresh generator; we now know how many results it will have,
+        # if it is less than 10.
+        links = mysite.pagelinks(mainpage, total=10)
+        for page in mysite.preloadpages(links, groupsize=5):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+            count += 1
+        self.assertEqual(count, link_count)
+
+    def test_preload_unexpected_titles_using_pageids(self):
+        """Test sending pageids with unnormalized titles, causing warnings."""
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        links = list(mysite.pagelinks(mainpage, total=10))
+        if len(links) < 2:
+            raise unittest.SkipTest('insufficient links on main page')
+
+        # change the title of the page, to test sametitle().
+        # preloadpages will send the page ids, as they have already been loaded
+        # by pagelinks, and preloadpages should complain the returned titles
+        # do not match any title in the pagelist.
+        for page in links:
+            page._link._text += ' '
+
+        gen = mysite.preloadpages(links, groupsize=5)
+        for page in gen:
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertFalse(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 0)
+                self.assertFalse(hasattr(page, '_pageprops'))
+            count += 1
+            if count > 5:
+                break
+
+    def test_preload_unexpected_titles_using_titles(self):
+        """Test sending unnormalized titles, causing warnings."""
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        links = list(mysite.pagelinks(mainpage, total=10))
+        if len(links) < 2:
+            raise unittest.SkipTest('insufficient links on main page')
+
+        # change the title of the page _and_ delete the pageids.
+        # preloadpages can only send the titles, and preloadpages should
+        # complain the returned titles do not match any title in the pagelist.
+        for page in links:
+            page._link._text += ' '
+            del page._pageid
+
+        gen = mysite.preloadpages(links, groupsize=5)
+        for page in gen:
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertFalse(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 0)
+                self.assertFalse(hasattr(page, '_pageprops'))
+            count += 1
+            if count > 5:
+                break
+
+    def test_preload_invalid_titles_without_pageids(self):
+        """Test sending invalid titles. No warnings issued, but it should."""
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        links = list(mysite.pagelinks(mainpage, total=10))
+        if len(links) < 2:
+            raise unittest.SkipTest('insufficient links on main page')
+
+        for page in links:
+            page._link._text += ' foobar'
+            del page._pageid
+
+        gen = mysite.preloadpages(links, groupsize=5)
+        for page in gen:
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            self.assertFalse(page.exists())
+            count += 1
+            if count > 5:
+                break
+
+    @unittest.expectedFailure
+    def test_preload_langlinks_normal(self):
+        """Test preloading continuation works."""
+        # FIXME: test fails
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        links = mysite.pagelinks(mainpage, total=10)
+        for page in mysite.preloadpages(links, groupsize=5, langlinks=True):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+                self.assertTrue(hasattr(page, '_langlinks'))
+            count += 1
+            if count >= 6:
+                break
+
+    @unittest.expectedFailure
+    def test_preload_langlinks_count(self):
+        """Test preloading continuation works."""
+        # FIXME: test fails
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        links = mysite.pagelinks(mainpage, total=20)
+        pages = list(mysite.preloadpages(links, groupsize=5,
+                                         langlinks=True))
+        for page in pages:
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+            count += 1
+
+        self.assertEqual(len(links), count)
+
+    def _test_preload_langlinks_long(self):
+        """Test preloading continuation works."""
+        # FIXME: test fails.  It is disabled as it takes more
+        # than 10 minutes on travis for English Wikipedia
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        links = mainpage.backlinks(total=100)
+        for page in mysite.preloadpages(links, groupsize=50,
+                                        langlinks=True):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+                self.assertTrue(hasattr(page, '_langlinks'))
+            count += 1
+
+        self.assertEqual(len(links), count)
+
+    @unittest.expectedFailure
+    def test_preload_templates(self):
+        """Test preloading templates works."""
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        # Use backlinks, as any backlink has at least one link
+        links = mysite.pagelinks(mainpage, total=10)
+        for page in mysite.preloadpages(links, templates=True):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+                self.assertTrue(hasattr(page, '_templates'))
+            count += 1
+            if count >= 6:
+                break
+
+    @unittest.expectedFailure
+    def test_preload_templates_and_langlinks(self):
+        """Test preloading templates and langlinks works."""
+        mysite = self.get_site()
+        mainpage = self.get_mainpage()
+        count = 0
+        # Use backlinks, as any backlink has at least one link
+        links = mysite.pagebacklinks(mainpage, total=10)
+        for page in mysite.preloadpages(links, langlinks=True, templates=True):
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertIsInstance(page.exists(), bool)
+            if page.exists():
+                self.assertTrue(hasattr(page, "_text"))
+                self.assertEqual(len(page._revisions), 1)
+                self.assertFalse(hasattr(page, '_pageprops'))
+                self.assertTrue(hasattr(page, '_templates'))
+                self.assertTrue(hasattr(page, '_langlinks'))
+            count += 1
+            if count >= 6:
+                break
+
+
 class TestDataSitePreloading(WikidataTestCase):
 
     """Test DataSite.preloaditempages for repo pages."""

-- 
To view, visit https://gerrit.wikimedia.org/r/173499
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I11df5361bd2b67cce132faba252552bfa5053827
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: Mpaa <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to