jenkins-bot has submitted this change and it was merged.

Change subject: Filters needed for newitem.py
......................................................................


Filters needed for newitem.py

EdittimeFilterPageGenerator, functionally equivalent to the compat
version, but better.

Also added filter WikibaseItemFilterPageGenerator, and
Page.oldest_revision.

Renamed WikidataItemGenerator to WikibaseItemGenerator.

Bug: 55007
Change-Id: I71b051818773e8b2f78eab534c4dfb23072d0ee9
---
M pywikibot/page.py
M pywikibot/pagegenerators.py
M tests/pagegenerators_tests.py
3 files changed, 221 insertions(+), 31 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, but someone else must approve
  Ladsgroup: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/page.py b/pywikibot/page.py
index 961fe6f..cc094bf 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -563,6 +563,9 @@
         """
         return self.site.page_exists(self)
 
+    def oldest_revision(self):
+        return self.getVersionHistory(reverseOrder=True, total=1)[0]
+
     def isRedirectPage(self):
         """Return True if this is a redirect, False if not or not existing."""
         return self.site.page_isredirect(self)
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index f946bfb..922c0a9 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -21,10 +21,11 @@
 __version__ = '$Id$'
 #
 
-import sys
 import codecs
+import datetime
 import itertools
 import re
+import sys
 import time
 
 import pywikibot
@@ -977,13 +978,30 @@
             yield page
 
 
-def RedirectFilterPageGenerator(generator, no_redirects=True):
-    """Yield pages from another generator that are redirects or not."""
-    for page in generator:
-        if not page.isRedirectPage() and no_redirects:
-            yield page
-        elif page.isRedirectPage() and not no_redirects:
-            yield page
+def RedirectFilterPageGenerator(generator, no_redirects=True,
+                                show_filtered=False):
+    """
+    Yield pages from another generator that are redirects or not.
+
+    @param no_redirects: Exclude redirects if True, else only include
+        redirects.
+    @param no_redirects: bool
+    @param show_filtered: Output a message for each page not yielded
+    @type show_filtered: bool
+    """
+    for page in generator or []:
+        if no_redirects:
+            if not page.isRedirectPage():
+                yield page
+            elif show_filtered:
+                pywikibot.output(u'%s is a redirect page. Skipping.' % page)
+
+        else:
+            if page.isRedirectPage():
+                yield page
+            elif show_filtered:
+                pywikibot.output(u'%s is not a redirect page. Skipping.'
+                                 % page)
 
 
 def DuplicateFilterPageGenerator(generator):
@@ -1079,6 +1097,74 @@
 # name the generator methods
 RegexFilterPageGenerator = RegexFilter.titlefilter
 RegexBodyFilterPageGenerator = RegexFilter.contentfilter
+
+
+@deprecated_args(begintime='last_edit_start', endtime='last_edit_end')
+def EdittimeFilterPageGenerator(generator,
+                                last_edit_start=None,
+                                last_edit_end=None,
+                                first_edit_start=None,
+                                first_edit_end=None,
+                                show_filtered=False):
+    """
+    Wrap a generator to filter pages outside last or first edit range.
+
+    @param generator: A generator object
+    @param last_edit_start: Only yield pages last edited after this time
+    @type last_edit_start: datetime
+    @param last_edit_end: Only yield pages last edited before this time
+    @type last_edit_end: datetime
+    @param first_edit_start: Only yield pages first edited after this time
+    @type first_edit_start: datetime
+    @param first_edit_end: Only yield pages first edited before this time
+    @type first_edit_end: datetime
+    @param show_filtered: Output a message for each page not yielded
+    @type show_filtered: bool
+
+    """
+    do_last_edit = last_edit_start or last_edit_end
+    do_first_edit = first_edit_start or first_edit_end
+
+    last_edit_start = last_edit_start or datetime.datetime.min
+    last_edit_end = last_edit_end or datetime.datetime.max
+    first_edit_start = first_edit_start or datetime.datetime.min
+    first_edit_end = first_edit_end or datetime.datetime.max
+
+    for page in generator or []:
+        if do_last_edit:
+            last_edit = page.editTime()
+
+            if last_edit < last_edit_start:
+                if show_filtered:
+                    pywikibot.output(
+                        u'Last edit on %s was on %s.\nToo old. Skipping.'
+                        % (page, last_edit.isoformat()))
+                continue
+
+            if last_edit > last_edit_end:
+                if show_filtered:
+                    pywikibot.output(
+                        u'Last edit on %s was on %s.\nToo recent. Skipping.'
+                        % (page, last_edit.isoformat()))
+                continue
+
+        if do_first_edit:
+            first_edit = page.oldest_revision().timestamp
+
+            if first_edit < first_edit_start:
+                if show_filtered:
+                    pywikibot.output(
+                        u'First edit on %s was on %s.\nToo old. Skipping.'
+                        % (page, first_edit.isoformat()))
+
+            if first_edit > first_edit_end:
+                if show_filtered:
+                    pywikibot.output(
+                        u'First edit on %s was on %s.\nToo recent. Skipping.'
+                        % (page, first_edit.isoformat()))
+                continue
+
+        yield page
 
 
 def CombinedPageGenerator(generators):
@@ -1273,9 +1359,9 @@
         yield entry.title()
 
 
-def WikidataItemGenerator(gen):
+def WikibaseItemGenerator(gen):
     """
-    A wrapper generator used to yield Wikidata items of another generator.
+    A wrapper generator used to yield Wikibase items of another generator.
 
     @param gen: Generator to wrap.
     @type gen: generator
@@ -1286,13 +1372,52 @@
         if isinstance(page, pywikibot.ItemPage):
             yield page
         elif page.site.data_repository() == page.site:
-            # These are already items, just not item pages
+            # These are already items, as they have a DataSite in page.site.
+            # However generator is yielding Page, so convert to ItemPage.
             # FIXME: If we've already fetched content, we should retain it
             yield pywikibot.ItemPage(page.site, page.title())
         else:
             yield pywikibot.ItemPage.fromPage(page)
 
 
+WikidataItemGenerator = WikibaseItemGenerator
+
+
+def WikibaseItemFilterPageGenerator(generator, has_item=True,
+                                    show_filtered=False):
+    """
+    A wrapper generator used to exclude if page has a wikibase item or not.
+
+    @param gen: Generator to wrap.
+    @type gen: generator
+    @param has_item: Exclude pages without an item if True, or only
+        include pages without an item if False
+    @type has_item: bool
+    @param show_filtered: Output a message for each page not yielded
+    @type show_filtered: bool
+    @return: Wrapped generator
+    @rtype: generator
+    """
+    for page in generator or []:
+        try:
+            page_item = pywikibot.ItemPage.fromPage(page, lazy_load=False)
+        except pywikibot.NoPage:
+            page_item = None
+
+        if page_item:
+            if not has_item:
+                if show_filtered:
+                    pywikibot.output(
+                        '%s has a wikidata item.  Skipping.' % page)
+                continue
+        else:
+            if has_item:
+                if show_filtered:
+                    pywikibot.output(
+                        '%s doesn\'t have a wikidata item.  Skipping.' % page)
+                continue
+
+
 # TODO below
 @deprecated_args(extension=None, number="total", repeat=None)
 def UnusedFilesGenerator(total=100, site=None, extension=None):
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 9492579..0ce3bba 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -7,11 +7,14 @@
 # Distributed under the terms of the MIT license.
 __version__ = '$Id$'
 
+import datetime
 import os
 import sys
 
 import pywikibot
 from pywikibot import pagegenerators
+
+from pywikibot.pagegenerators import PagesFromTitlesGenerator
 
 from tests import _data_dir
 from tests.aspects import (
@@ -23,7 +26,28 @@
 from tests.thread_tests import GeneratorIntersectTestCase
 
 
-class TestPageGenerators(TestCase):
+en_wp_page_titles = (
+    # just a bunch of randomly selected titles for English Wikipedia tests
+    u"Eastern Sayan",
+    u"The Addams Family (pinball)",
+    u"Talk:Nowy Sącz",
+    u"Talk:Battle of Węgierska Górka",
+    u"Template:!",
+    u"Template:Template",
+)
+
+en_wp_nopage_titles = (
+    u"Cities in Burkina Faso",
+    u"Talk:Hispanic (U.S. Census)",
+    u"Talk:Stołpce",
+    u"Template:!/Doc",
+    u"Template:!/Meta",
+    u"Template:Template/Doc",
+    u"Template:Template/Meta",
+)
+
+
+class TestDryPageGenerators(TestCase):
 
     """Test pagegenerators methods."""
 
@@ -32,25 +56,10 @@
 
     dry = True
 
-    titles = (
-        # just a bunch of randomly selected titles
-        u"Cities in Burkina Faso",
-        u"Eastern Sayan",
-        u"The Addams Family (pinball)",
-        u"Talk:Hispanic (U.S. Census)",
-        u"Talk:Stołpce",
-        u"Talk:Nowy Sącz",
-        u"Talk:Battle of Węgierska Górka",
-        u"Template:!",
-        u"Template:!/Doc",
-        u"Template:!/Meta",
-        u"Template:Template",
-        u"Template:Template/Doc",
-        u"Template:Template/Meta",
-    )
+    titles = en_wp_page_titles + en_wp_nopage_titles
 
     def setUp(self):
-        super(TestPageGenerators, self).setUp()
+        super(TestDryPageGenerators, self).setUp()
         self.site = self.get_site()
 
     def assertFunction(self, obj):
@@ -113,8 +122,8 @@
         gen = pagegenerators.RegexFilterPageGenerator(gen, ['template', 
'/meta'],
                                                       quantifier='any')
         self.assertPagelistTitles(gen,
-                                  ('Template:!/Meta',
-                                   'Template:Template',
+                                  ('Template:Template',
+                                   'Template:!/Meta',
                                    'Template:Template/Doc',
                                    'Template:Template/Meta'))
         gen = pagegenerators.PagesFromTitlesGenerator(self.titles,
@@ -156,6 +165,59 @@
         self.assertEqual(len(tuple(gen)), 9)
 
 
+class EdittimeFilterPageGeneratorTestCase(TestCase):
+
+    """Test EdittimeFilterPageGenerator."""
+
+    family = 'wikipedia'
+    code = 'en'
+
+    titles = en_wp_page_titles
+
+    def test_first_edit(self):
+        expect = (
+            u'The Addams Family (pinball)',
+            u'Talk:Nowy Sącz',
+            u'Template:Template',
+        )
+        gen = PagesFromTitlesGenerator(self.titles, self.site)
+        gen = pagegenerators.EdittimeFilterPageGenerator(
+            gen, first_edit_end=datetime.datetime(2006, 1, 1))
+        self.assertPagelistTitles(gen, titles=expect, site=self.site)
+
+        gen = PagesFromTitlesGenerator(self.titles, self.site)
+        gen = pagegenerators.EdittimeFilterPageGenerator(
+            gen, first_edit_start=datetime.datetime(2006, 1, 1))
+        opposite_pages = list(gen)
+        self.assertTrue(all(isinstance(p, pywikibot.Page)
+                            for p in opposite_pages))
+        self.assertTrue(all(p.title not in expect for p in opposite_pages))
+
+    def test_last_edit(self):
+        two_days_ago = datetime.datetime.now() - datetime.timedelta(days=2)
+        nine_days_ago = datetime.datetime.now() - datetime.timedelta(days=9)
+
+        gen = PagesFromTitlesGenerator(['Wikipedia:Sandbox'], self.site)
+        gen = pagegenerators.EdittimeFilterPageGenerator(
+            gen, last_edit_start=two_days_ago)
+        self.assertEqual(len(list(gen)), 1)
+
+        gen = PagesFromTitlesGenerator(['Wikipedia:Sandbox'], self.site)
+        gen = pagegenerators.EdittimeFilterPageGenerator(
+            gen, last_edit_end=two_days_ago)
+        self.assertEqual(len(list(gen)), 0)
+
+        gen = PagesFromTitlesGenerator(['Template:Sidebox'], self.site)
+        gen = pagegenerators.EdittimeFilterPageGenerator(
+            gen, last_edit_end=nine_days_ago)
+        self.assertEqual(len(list(gen)), 1)
+
+        gen = PagesFromTitlesGenerator(['Template:Sidebox'], self.site)
+        gen = pagegenerators.EdittimeFilterPageGenerator(
+            gen, last_edit_start=nine_days_ago)
+        self.assertEqual(len(list(gen)), 0)
+
+
 class TestRepeatingGenerator(TestCase):
 
     """Test RepeatingGenerator."""

-- 
To view, visit https://gerrit.wikimedia.org/r/172493
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I71b051818773e8b2f78eab534c4dfb23072d0ee9
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to