XZise has uploaded a new change for review.
https://gerrit.wikimedia.org/r/218313
Change subject: [FEAT] SingleSiteBot: A bot class for one site
......................................................................
[FEAT] SingleSiteBot: A bot class for one site
Several bots can only work with pages on one site as they initialize stuff
which is site specific. In theory page generators can return pages for
different sites which can cause errors.
Change-Id: I5e8759ccaef0063d6d6f9842bf3d3af3baa0f567
---
M pywikibot/bot.py
A tests/bot_tests.py
2 files changed, 182 insertions(+), 20 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/13/218313/1
diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index 6cca348..791db45 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -1124,7 +1124,7 @@
"""The user has cancelled processing at a prompt."""
-class Bot(object):
+class BaseBot(object):
"""
Generic Bot to be subclassed.
@@ -1411,26 +1411,21 @@
% (self.__class__.__name__, self._site, site))
self._site = site
+ def pre_run(self):
+ """Initialize the bot before calling run."""
+ pass
+
+ def pre_treat(self, page):
+ """Return whether treat should be executed for the page."""
+ return True
+
def run(self):
"""Process all pages in generator."""
if not hasattr(self, 'generator'):
raise NotImplementedError('Variable %s.generator not set.'
% self.__class__.__name__)
- # This check is to remove the possibility that the superclass changing
- # self.site causes bugs in subclasses.
- # If the subclass has set self.site before run(), it may be that the
- # bot processes pages on sites other than self.site, and therefore
- # this method cant alter self.site. To use this functionality, don't
- # set self.site in __init__, and use page.site in treat().
- auto_update_site = not self._site
- if not auto_update_site:
- warning(
- '%s.__init__ set the Bot.site property; this is only needed '
- 'when the Bot accesses many sites.' % self.__class__.__name__)
- else:
- log('Bot is managing the %s.site property in run()'
- % self.__class__.__name__)
+ self.pre_run()
maxint = 0
if sys.version_info[0] == 2:
@@ -1438,11 +1433,10 @@
try:
for page in self.generator:
- # When in auto update mode, set the site when it changes,
- # so subclasses can hook onto changes to site.
- if (auto_update_site and
- (not self._site or page.site != self.site)):
- self.site = page.site
+ if not self.pre_treat(page):
+ pywikibot.log('Skipped page "{0}" due to '
+ 'pre_treat.'.format(page))
+ continue
# Process the page
self.treat(page)
@@ -1467,6 +1461,74 @@
self.exit()
+class Bot(BaseBot):
+
+ """Generic bot subclass for multiple sites."""
+
+ def pre_run(self):
+ """Check if it automatically updates the site before run."""
+ # This check is to remove the possibility that the superclass changing
+ # self.site causes bugs in subclasses.
+ # If the subclass has set self.site before run(), it may be that the
+ # bot processes pages on sites other than self.site, and therefore
+ # this method cant alter self.site. To use this functionality, don't
+ # set self.site in __init__, and use page.site in treat().
+ self._auto_update_site = not self._site
+ if not self._auto_update_site:
+ warning(
+ '%s.__init__ set the Bot.site property; this is only needed '
+ 'when the Bot accesses many sites.' % self.__class__.__name__)
+ else:
+ log('Bot is managing the %s.site property in run()'
+ % self.__class__.__name__)
+
+ def pre_treat(self, page):
+ """Update site before calling treat."""
+ # When in auto update mode, set the site when it changes,
+ # so subclasses can hook onto changes to site.
+ if (self._auto_update_site and
+ (not self._site or page.site != self.site)):
+ self.site = page.site
+ return True
+
+
+class SingleSiteBot(BaseBot):
+
+ """
+ A bot only working on one site and ignoring the others.
+
+ If no site is given from the start it'll use the first page's site. Any
page
+ after the site has been defined and is not on the defined site will be
+ ignored.
+ """
+
+ @property
+ def site(self):
+ """Get the current site."""
+ return super(SingleSiteBot, self).site
+
+ @site.setter
+ def site(self, value):
+ """Set the current site but warns if different."""
+ if self._site:
+ pywikibot.warning('Changed the site from "{0}" to '
+ '"{1}"'.format(self._site, value))
+ self._site = value
+
+ def pre_treat(self, page):
+ """Set site if not defined and return if it's on the defined site."""
+ if not self._site:
+ self.site = page.site
+ return True
+ elif page.site == self.site:
+ return True
+ else:
+ pywikibot.warning('Retrieved page "{0}" on site "{1}" but this bot
'
+ 'works only on site "{2}"'.format(
+ page.title(), page.site, self.site))
+ return False
+
+
class CurrentPageBot(Bot):
"""A bot which automatically sets 'current_page' on each treat()."""
diff --git a/tests/bot_tests.py b/tests/bot_tests.py
new file mode 100644
index 0000000..a1489f4
--- /dev/null
+++ b/tests/bot_tests.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""Bot tests."""
+#
+# (C) Pywikibot team, 2015
+#
+# Distributed under the terms of the MIT license.
+#
+from __future__ import unicode_literals
+
+__version__ = '$Id$'
+#
+import sys
+
+import pywikibot
+import pywikibot.bot
+
+from tests.aspects import unittest, SiteAttributeTestCase
+
+
+class TestDrySiteBot(SiteAttributeTestCase):
+
+ """Tests for the BaseBot subclasses."""
+
+ dry = True
+
+ sites = {
+ 'de': {
+ 'family': 'wikipedia',
+ 'code': 'de'
+ },
+ 'en': {
+ 'family': 'wikipedia',
+ 'code': 'en'
+ }
+ }
+
+ def _generator(self):
+ """Generic generator."""
+ yield pywikibot.Page(self.de, 'Page 1')
+ yield pywikibot.Page(self.en, 'Page 2')
+
+ def _treat(self, pages):
+ """Get tests which are executed on each treat."""
+ def treat(page):
+ self.assertEqual(page, next(self._page_iter))
+ self.assertIsNotNone(self.bot._site)
+ self.assertEqual(self.bot.site, self.bot._site)
+ if self._treat_site:
+ self.assertEqual(self.bot._site, self._treat_site)
+ elif self._treat_site is None:
+ self._treat_site = self.bot.site
+ self.assertEqual(page.site, self.bot.site)
+ self._page_iter = iter(pages)
+ return treat
+
+ def _exit(self, treated, written, exception):
+ """Get tests which are executed on exit."""
+ def exit():
+ self.assertEqual(self.bot._treat_counter, treated)
+ self.assertEqual(self.bot._save_counter, written)
+ exc = sys.exc_info()[0]
+ if exception:
+ self.assertIsInstance(exc, exception.__class__)
+ else:
+ self.assertIsNone(exc)
+ self.assertRaises(StopIteration, next, self._page_iter)
+ return exit
+
+ def test_SingleSiteBot_automatic(self):
+ """Test SingleSiteBot class with no predefined site."""
+ self._treat_site = None
+ self.bot = pywikibot.bot.SingleSiteBot(generator=self._generator())
+ self.bot.treat = self._treat([pywikibot.Page(self.de, 'Page 1')])
+ self.bot.exit = self._exit(1, 0, None)
+ self.bot.run()
+
+ def test_SingleSiteBot_specific(self):
+ """Test SingleSiteBot class with predefined site."""
+ self._treat_site = self.en
+ self.bot = pywikibot.bot.SingleSiteBot(generator=self._generator())
+ self.bot.site = self.en
+ self.bot.treat = self._treat([pywikibot.Page(self.en, 'Page 2')])
+ self.bot.exit = self._exit(1, 0, None)
+ self.bot.run()
+
+ def test_Bot(self):
+ """Test normal Bot class."""
+ # Assert no specific site
+ self._treat_site = False
+ self.bot = pywikibot.bot.Bot(generator=self._generator())
+ self.bot.treat = self._treat(self._generator())
+ self.bot.exit = self._exit(2, 0, None)
+ self.bot.run()
+
+
+if __name__ == '__main__':
+ try:
+ unittest.main()
+ except SystemExit:
+ pass
--
To view, visit https://gerrit.wikimedia.org/r/218313
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I5e8759ccaef0063d6d6f9842bf3d3af3baa0f567
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits