Revision: 7638
Author:   xqt
Date:     2009-11-12 15:01:35 +0000 (Thu, 12 Nov 2009)

Log Message:
-----------
* Don't append in restore/continue mode
* remove duplicate pages from generator

Modified Paths:
--------------
    trunk/pywikipedia/interwiki.py

Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py      2009-11-12 14:29:17 UTC (rev 7637)
+++ trunk/pywikipedia/interwiki.py      2009-11-12 15:01:35 UTC (rev 7638)
@@ -1738,12 +1738,14 @@
         self.generateNumber = number
         self.generateUntil = until
 
-    def dump(self):
+    def dump(self, append = True):
         site = pywikibot.getSite()
         dumpfn = pywikibot.config.datafilepath(
                      'interwiki-dumps',
                      'interwikidump-%s-%s.txt' % (site.family.name, site.lang))
-        f = codecs.open(dumpfn, 'a', 'utf-8')
+        if append: mode = 'a'
+        else: mode = 'w'
+        f = codecs.open(dumpfn, mode, 'utf-8')
         for subj in self.subjects:
             f.write(subj.originPage.aslink(None)+'\n')
         f.close()
@@ -2187,18 +2189,21 @@
                                u'interwikidump-%s-%s.txt'
                                  % (site.family.name, site.lang))
             hintlessPageGen = 
pagegenerators.TextfilePageGenerator(dumpFileName)
+            hintlessPageGen = 
pagegenerators.DuplicateFilterPageGenerator(hintlessPageGen)
             if optContinue:
                 # We waste this generator to find out the last page's title
                 # This is an ugly workaround.
+                nextPage = "!"
+                namespace = 0
                 for page in hintlessPageGen:
-                    pass
-                try:
+                    lastPage = page.titleWithoutNamespace()
+                    if lastPage > nextPage:
+                        nextPage = lastPage
+                        namespace = page.namespace()
+                if nextPage == "!":
+                    pywikibot.output(u"Dump file is empty?! Starting at the 
beginning.")
+                else:
                     nextPage = page.titleWithoutNamespace() + '!'
-                    namespace = page.namespace()
-                except NameError:
-                    pywikibot.output(u"Dump file is empty?! Starting at the 
beginning.")
-                    nextPage = "!"
-                    namespace = 0
                 # old generator is used up, create a new one
                 hintlessPageGen = 
pagegenerators.CombinedPageGenerator([pagegenerators.TextfilePageGenerator(dumpFileName),
 pagegenerators.AllpagesPageGenerator(nextPage, namespace, includeredirects = 
False)])
 
@@ -2225,10 +2230,10 @@
             try:
                 bot.run()
             except KeyboardInterrupt:
-                bot.dump()
+                bot.dump(not (optRestore or optContinue))
                 dumped = True
             except:
-                bot.dump()
+                bot.dump(not (optRestore or optContinue))
                 dumped = True
                 raise
         finally:



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to