Revision: 7576
Author:   wikipedian
Date:     2009-10-31 00:35:16 +0000 (Sat, 31 Oct 2009)

Log Message:
-----------
used page generators to make the ignore list feature available to other 
scripts

Modified Paths:
--------------
    trunk/pywikipedia/pagegenerators.py
    trunk/pywikipedia/solve_disambiguation.py

Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-10-30 23:57:42 UTC (rev 7575)
+++ trunk/pywikipedia/pagegenerators.py 2009-10-31 00:35:16 UTC (rev 7576)
@@ -734,6 +734,32 @@
         if page.namespace() in namespaces:
             yield page
 
+def PageTitleFilterPageGenerator(generator, ignoreList):
+    """
+    Wraps around another generator. Yields only those pages are not
+    listed in the ignore list.
+
+    The ignoreList is a dictionary. Family names are mapped to
+    dictionaries in which language codes are mapped to lists of
+    page titles.
+    """
+
+    def isIgnored(page):
+        if not (page.site().family.name in ignoreList and page.site().lang in 
ignoreList[page.site().family.name]):
+            return False
+
+        for ig in ignoreList[page.site().family.name][page.site().lang]:
+            if re.match(ig, page.title()):
+                return True
+        return False
+
+    for page in generator:
+        if isIgnored(page):
+            if wikipedia.verbose:
+                wikipedia.output('Ignoring page %s' % page.title())
+        else:
+            yield page
+
 def RedirectFilterPageGenerator(generator):
     """
     Wraps around another generator. Yields only those pages that are not 
redirects.

Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py   2009-10-30 23:57:42 UTC (rev 
7575)
+++ trunk/pywikipedia/solve_disambiguation.py   2009-10-31 00:35:16 UTC (rev 
7576)
@@ -412,28 +412,14 @@
 class ReferringPageGeneratorWithIgnore:
     def __init__(self, disambPage, primary=False, minimum = 0):
         self.disambPage = disambPage
-        # if run with the -primary argument, enable the ignore manager
-        self.primaryIgnoreManager = PrimaryIgnoreManager(disambPage,
-                                                         enabled=primary)
         self.minimum = minimum
-        
+
     def __iter__(self):
-        # TODO: start yielding before all referring pages have been found
-        refs = [page for page in 
self.disambPage.getReferences(follow_redirects = False, withTemplateInclusion = 
False)]
-        pywikibot.output(u"Found %d references." % len(refs))
-        # Remove ignorables
-        if self.disambPage.site().family.name in ignore_title and 
self.disambPage.site().lang in ignore_title[self.disambPage.site().family.name]:
-            for ig in 
ignore_title[self.disambPage.site().family.name][self.disambPage.site().lang]:
-                for i in range(len(refs)-1, -1, -1):
-                    if re.match(ig, refs[i].title()):
-                        if pywikibot.verbose:
-                            pywikibot.output('Ignoring page %s'
-                                             % refs[i].title())
-                        del refs[i]
-        for i in range(len(refs)-1, -1, -1):
-            if self.primaryIgnoreManager.isIgnored(refs[i]):
-                #pywikibot.output('Ignoring page %s because it was skipped 
before' % refs[i].title())
-                del refs[i]
+        generator = pagegenerators.ReferringPageGenerator(self.disambPage, 
followRedirects  = False, withTemplateInclusion = False)
+        generator = pagegenerators.PageTitleFilterPageGenerator(generator, 
ignore_title)
+
+        refs = [page for page in generator]
+
         if len(refs) < self.minimum:
             pywikibot.output(u"Found only %d pages to work on; skipping." % 
len(refs))
             return



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to