John Vandenberg has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/172493

Change subject: Filters needed for newitem.py
......................................................................

Filters needed for newitem.py

EdittimeFilterPageGenerator, functionally equivalent to the compat
version, but better.

Also added filter WikibaseItemFilterPageGenerator, and
Page.oldest_revision.

Renamed WikidataItemGenerator to WikibaseItemGenerator.

Bug: 55007
Change-Id: I71b051818773e8b2f78eab534c4dfb23072d0ee9
---
M pywikibot/page.py
M pywikibot/pagegenerators.py
2 files changed, 139 insertions(+), 11 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/93/172493/1

diff --git a/pywikibot/page.py b/pywikibot/page.py
index 961fe6f..e7bcf97 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -563,6 +563,9 @@
         """
         return self.site.page_exists(self)
 
+     def oldest_revision(self):
+         return self.getVersionHistory(reverseOrder=True, total=1)[0]
+
     def isRedirectPage(self):
         """Return True if this is a redirect, False if not or not existing."""
         return self.site.page_isredirect(self)
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index f946bfb..5b009ce 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -21,10 +21,11 @@
 __version__ = '$Id$'
 #
 
-import sys
 import codecs
+import datetime
 import itertools
 import re
+import sys
 import time
 
 import pywikibot
@@ -977,13 +978,30 @@
             yield page
 
 
-def RedirectFilterPageGenerator(generator, no_redirects=True):
-    """Yield pages from another generator that are redirects or not."""
-    for page in generator:
-        if not page.isRedirectPage() and no_redirects:
-            yield page
-        elif page.isRedirectPage() and not no_redirects:
-            yield page
+def RedirectFilterPageGenerator(generator, no_redirects=True,
+                                show_filtered=False):
+    """
+    Yield pages from another generator that are redirects or not.
+
+    @param no_redirects: Exclude redirects if True, else only include
+        redirects.
+    @param no_redirects: bool
+    @param show_filtered: Output a message for each page not yielded
+    @type show_filtered: bool
+    """
+    for page in generator or []:
+        if no_redirects:
+            if not page.isRedirectPage():
+                yield page
+            elif show_filtered:
+                pywikibot.output(u'%s is a redirect page. Skipping.' % page)
+
+        elif not no_redirects:
+            if page.isRedirectPage():
+                yield page
+            elif show_filtered:
+                pywikibot.output(u'%s is not a redirect page. Skipping.'
+                                 % page)
 
 
 def DuplicateFilterPageGenerator(generator):
@@ -1079,6 +1097,74 @@
 # name the generator methods
 RegexFilterPageGenerator = RegexFilter.titlefilter
 RegexBodyFilterPageGenerator = RegexFilter.contentfilter
+
+
+@deprecated_args(begintime='last_edit_start', endtime='last_edit_end')
+def EdittimeFilterPageGenerator(generator,
+                                last_edit_start=None,
+                                last_edit_end=None,
+                                first_edit_start=None,
+                                first_edit_end=None,
+                                show_filtered=False):
+    """
+    Wrap a generator to filter pages outside last or first edit range.
+
+    @param generator: A generator object
+    @param last_edit_start: Only yield pages last edited after this time
+    @type last_edit_start: datetime
+    @param last_edit_end: Only yield pages last edited before this time
+    @type last_edit_end: datetime
+    @param first_edit_start: Only yield pages first edited after this time
+    @type first_edit_start: datetime
+    @param first_edit_end: Only yield pages first edited before this time
+    @type first_edit_end: datetime
+    @param show_filtered: Output a message for each page not yielded
+    @type show_filtered: bool
+
+    """
+    do_last_edit = last_edit_start or last_edit_end
+    do_first_edit = first_edit_start or first_edit_end
+
+    last_edit_start = last_edit_start or datetime.datetime.min
+    last_edit_end = last_edit_end or datetime.datetime.max
+    first_edit_start = first_edit_start or datetime.datetime.min
+    first_edit_end = first_edit_end or datetime.datetime.max
+
+    for page in generator or []:
+        if do_last_edit:
+            last_edit = page.editTime()
+
+            if last_edit < last_edit_start:
+                if show_filtered:
+                    pywikibot.output(
+                        u'Last edit on %s was on %s.\nToo old. Skipping.'
+                        % (page, last_edit.isoformat()))
+                continue
+
+            if last_edit > last_edit_end:
+                if show_filtered:
+                    pywikibot.output(
+                        u'Last edit on %s was on %s.\nToo recent. Skipping.'
+                        % (page, last_edit.isoformat()))
+                continue
+
+        if do_first_edit:
+            first_edit = page.oldest_revision().timestamp
+
+            if first_edit < last_edit_start:
+                if show_filtered:
+                    pywikibot.output(
+                        u'First edit on %s was on %s.\nToo old. Skipping.'
+                        % (page, first_edit.isoformat()))
+
+            if first_edit > last_edit_end:
+                if show_filtered:
+                    pywikibot.output(
+                        u'First edit on %s was on %s.\nToo recent. Skipping.'
+                        % (page, first_edit.isoformat()))
+                continue
+
+        yield page
 
 
 def CombinedPageGenerator(generators):
@@ -1273,9 +1359,9 @@
         yield entry.title()
 
 
-def WikidataItemGenerator(gen):
+def WikibaseItemGenerator(gen):
     """
-    A wrapper generator used to yield Wikidata items of another generator.
+    A wrapper generator used to yield Wikibase items of another generator.
 
     @param gen: Generator to wrap.
     @type gen: generator
@@ -1286,13 +1372,52 @@
         if isinstance(page, pywikibot.ItemPage):
             yield page
         elif page.site.data_repository() == page.site:
-            # These are already items, just not item pages
+            # These are already items, as they have a DataSite in page.site.
+            # However generator is yielding Page, so convert to ItemPage.
             # FIXME: If we've already fetched content, we should retain it
             yield pywikibot.ItemPage(page.site, page.title())
         else:
             yield pywikibot.ItemPage.fromPage(page)
 
 
+WikidataItemGenerator = WikibaseItemGenerator
+
+
+def WikibaseItemFilterPageGenerator(generator, has_item=True,
+                                    show_filtered=False):
+    """
+    A wrapper generator used to exclude if page has a wikibase item or not.
+
+    @param gen: Generator to wrap.
+    @type gen: generator
+    @param has_item: Exclude pages without an item if True, or only
+        include pages without an item if False
+    @type has_item: bool
+    @param show_filtered: Output a message for each page not yielded
+    @type show_filtered: bool
+    @return: Wrapped generator
+    @rtype: generator
+    """
+    for page in generator or []:
+        try:
+            page_item = pywikibot.ItemPage.fromPage(page, lazy_load=False)
+        except pywikibot.NoPage:
+            page_item = None
+
+        if page_item:
+            if not has_item:
+                if show_filtered:
+                    pywikibot.output(
+                        '%s has a wikidata item.  Skipping.' % page)
+                continue
+        else:
+            if has_item:
+                if show_filtered:
+                    pywikibot.output(
+                        '%s doesn\'t have a wikidata item.  Skipping.' % page)
+                continue
+
+
 # TODO below
 @deprecated_args(extension=None, number="total", repeat=None)
 def UnusedFilesGenerator(total=100, site=None, extension=None):

-- 
To view, visit https://gerrit.wikimedia.org/r/172493
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I71b051818773e8b2f78eab534c4dfb23072d0ee9
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to