pagegenerators.py

cosoleto Wed, 06 May 2009 04:30:30 -0700

Revision: 6839
Author:   cosoleto
Date:     2009-05-06 11:30:21 +0000 (Wed, 06 May 2009)


Log Message:
-----------
Accept page links separated by newline when no title enclosed with [[brackets]] 
is found using -file (#2783431).

Modified Paths:
--------------
    trunk/pywikipedia/pagegenerators.py

Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-05-06 06:51:08 UTC (rev 6838)
+++ trunk/pywikipedia/pagegenerators.py 2009-05-06 11:30:21 UTC (rev 6839)
@@ -42,8 +42,9 @@
 -uncatfiles       Work on all files which are not categorised.
 
 -file             Read a list of pages to treat from the named text file.
-                  Page titles in the file must be enclosed with [[brackets]].
-                  Argument can also be given as "-file:filename".
+                  Page titles in the file must be enclosed with [[brackets]]
+                  or separated by newlines. Argument can also be given as
+                  "-file:filename".
 
 -filelinks        Work on all pages that use a certain image/media file.
                   Argument can also be given as "-filelinks:filename".
@@ -405,9 +406,10 @@
 
 def TextfilePageGenerator(filename=None, site=None):
     '''
-    Read a file of page links between double-square-brackets, and return
-    them as a list of Page objects. filename is the name of the file that
-    should be read. If no name is given, the generator prompts the user.
+    Read a file of page links between double-square-brackets or, in
+    alternative, separated by newlines, and return them as a list of Page
+    objects. filename is the name of the file that should be read. If no
+    name is given, the generator prompts the user.
     '''
     if filename is None:
         filename = wikipedia.input(u'Please enter the filename:')
@@ -415,6 +417,7 @@
         site = wikipedia.getSite()
     f = codecs.open(filename, 'r', config.textfile_encoding)
     R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)') # title ends either before | or 
before ]]
+    pageTitle = None
     for pageTitle in R.findall(f.read()):
         # If the link doesn't refer to this site, the Page constructor
         # will automatically choose the correct site.
@@ -422,6 +425,12 @@
         # text file, but also could be dangerous because you might
         # inadvertently change pages on another wiki!
         yield wikipedia.Page(site, pageTitle)
+    if pageTitle is None:
+        f.seek(0)
+        for title in f:
+            title = title.strip()
+            if title:
+                yield wikipedia.Page(site, title)
     f.close()
 
 def PagesFromTitlesGenerator(iterable, site = None):



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

[Pywikipedia-svn] SVN: [6839] trunk/pywikipedia/pagegenerators.py

Reply via email to