Revision: 8765
Author:   purodha
Date:     2010-12-07 15:18:55 +0000 (Tue, 07 Dec 2010)
Log Message:
-----------
Add -hintsonly option to interwiki.py making the 1st existing hinted page the 
start page if none supplied.

Modified Paths:
--------------
    trunk/pywikipedia/interwiki.py

Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py      2010-12-07 12:50:59 UTC (rev 8764)
+++ trunk/pywikipedia/interwiki.py      2010-12-07 15:18:55 UTC (rev 8765)
@@ -110,6 +110,18 @@
                    could be used for further explainings of the bot action.
                    This will only be used in non-autonomous mode.
 
+    -hintsonly     The bot does not ask for a page to work on, even if none of
+                   the above page sources was specified.  This will make the
+                   first existing page of -hint or -hinfile slip in as the 
start
+                   page, determining properties like namespace, disambiguation
+                   state, and so on.  When no existing page is found in the
+                   hints, the bot does nothing.
+                   Hitting return without input on the "Which page to check:"
+                   prompt has the same effect as using -hintsonly.
+                   Options like -back, -same or -wiktionary are in effect only
+                   after a page has been found to work on.
+                   (note: without ending colon)
+
 These arguments are useful to provide hints to the bot:
 
     -hint:         used as  -hint:de:Anweisung  to give the robot a hint
@@ -952,19 +964,21 @@
         this Object.
     """
 
-    def __init__(self, originPage, hints = None):
+    def __init__(self, originPage = None, hints = None):
         """Constructor. Takes as arguments the Page on the home wiki
            plus optionally a list of hints for translation"""
 
         if globalvar.contentsondisk:
-            originPage = StoredPage(originPage)
+            if originPage:
+                originPage = StoredPage(originPage)
 
         # Remember the "origin page"
         self.originPage = originPage
         # todo is a list of all pages that still need to be analyzed.
         # Mark the origin page as todo.
         self.todo = PageTree()
-        self.todo.add(originPage)
+        if originPage:
+            self.todo.add(originPage)
 
         # done is a list of all pages that have been analyzed and that
         # are known to belong to this subject.
@@ -973,7 +987,10 @@
         # pages are values. It stores where we found each page.
         # As we haven't yet found a page that links to the origin page, we
         # start with an empty list for it.
-        self.foundIn = {self.originPage:[]}
+        if originPage:
+            self.foundIn = {self.originPage:[]}
+        else:
+            self.foundIn = {}
         # This is a list of all pages that are currently scheduled for
         # download.
         self.pending = PageTree()
@@ -1024,23 +1041,25 @@
         """
         for tree in [self.done, self.pending, self.todo]:
             for page in tree.filter(site):
-                if page.namespace() == self.originPage.namespace():
+                # -hintsonly: before we have an origin page, any namespace 
will do.
+                if self.originPage and page.namespace() == 
self.originPage.namespace():
                     if page.exists() and not page.isRedirectPage() and not 
page.isCategoryRedirect():
                         return page
         return None
 
     def translate(self, hints = None, keephintedsites = False):
         """Add the given translation hints to the todo list"""
-        if globalvar.same:
+        if globalvar.same and self.originPage:
             if hints:
-                pages = titletranslate.translate(self.originPage, hints = 
hints + ['all:'], auto = globalvar.auto, removebrackets
-= globalvar.hintnobracket)
+                pages = titletranslate.translate(self.originPage, hints = 
hints + ['all:'],
+                           auto = globalvar.auto, removebrackets = 
globalvar.hintnobracket)
             else:
-                pages = titletranslate.translate(self.originPage, hints = 
['all:'], auto = globalvar.auto, removebrackets
-= globalvar.hintnobracket)
+                pages = titletranslate.translate(self.originPage, hints = 
['all:'],
+                           auto = globalvar.auto, removebrackets = 
globalvar.hintnobracket)
         else:
-            pages = titletranslate.translate(self.originPage, hints = hints, 
auto = globalvar.auto, removebrackets
-= globalvar.hintnobracket)
+            pages = titletranslate.translate(self.originPage, hints = hints,
+                           auto = globalvar.auto, removebrackets = 
globalvar.hintnobracket,
+                           site = pywikibot.getSite() )
         for page in pages:
             if globalvar.contentsondisk:
                 page = StoredPage(page)
@@ -1100,7 +1119,7 @@
         """
         if self.forcedStop:
             return False
-        if globalvar.nobackonly:
+        if globalvar.nobackonly and originPage: # cannot check backlink before 
we have an origin page
             if page == self.originPage:
                 try:
                     pywikibot.output(u"%s has a backlink from %s."
@@ -1138,7 +1157,7 @@
         if linkedPage in self.foundIn:
             # We have seen this page before, don't ask again.
             return False
-        elif self.originPage.namespace() != linkedPage.namespace():
+        elif self.originPage and self.originPage.namespace() != 
linkedPage.namespace():
             # Allow for a mapping between different namespaces
             crossFrom = 
self.originPage.site().family.crossnamespace.get(self.originPage.namespace(), 
{})
             crossTo = crossFrom.get(self.originPage.site().language(), 
crossFrom.get('_default', {}))
@@ -1181,10 +1200,11 @@
                         return True
         else:
             # same namespaces, no problem
+            # or no origin page yet, also no problem
             return False
 
     def wiktionaryMismatch(self, page):
-        if globalvar.same=='wiktionary':
+        if self.originPage and globalvar.same=='wiktionary':
             if page.title().lower() != self.originPage.title().lower():
                 pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary 
mode" % (page.title(asLink=True), self.originPage.title(asLink=True)))
                 return True
@@ -1207,6 +1227,8 @@
         alternativePage is either None, or a page that the user has
         chosen to use instead of the given page.
         """
+        if not self.originPage:
+            return (False, None) # any page matches until we have an origin 
page
         if globalvar.autonomous:
             if self.originPage.isDisambig() and not page.isDisambig():
                 pywikibot.output(u"NOTE: Ignoring link from disambiguation 
page %s to non-disambiguation %s"
@@ -1296,8 +1318,8 @@
                     elif not newhint:
                         break
                     else:
-                        pages = titletranslate.translate(self.originPage, 
hints = [newhint], auto = globalvar.auto, removebrackets
-= globalvar.hintnobracket)
+                        pages = titletranslate.translate(self.originPage, 
hints = [newhint],
+                                   auto = globalvar.auto, removebrackets = 
globalvar.hintnobracket)
                         for page in pages:
                             self.addIfNew(page, counter, None)
                             if globalvar.hintsareright:
@@ -1323,9 +1345,10 @@
             if globalvar.skipauto:
                 dictName, year = page.autoFormat()
                 if dictName is not None:
-                    pywikibot.output(u'WARNING: %s:%s relates to %s:%s, which 
is an auto entry %s(%s)'
-                                     % (self.originPage.site().language(), 
self.originPage.title(),
-                                        
page.site().language(),page.title(),dictName,year))
+                    if self.originPage:
+                        pywikibot.output(u'WARNING: %s:%s relates to %s:%s, 
which is an auto entry %s(%s)'
+                                         % (self.originPage.site().language(), 
self.originPage.title(),
+                                            
page.site().language(),page.title(),dictName,year))
 
                     # Abort processing if the bot is running in autonomous 
mode.
                     if globalvar.autonomous:
@@ -1371,7 +1394,8 @@
                 if not globalvar.quiet or pywikibot.verbose:
                     pywikibot.output(u"NOTE: %s is %sredirect to %s"
                                      % (page.aslink(True), redir, 
redirectTargetPage.aslink(True)))
-                if page == self.originPage:
+                if self.originPage is None or page == self.originPage:
+                    # the 1st existig page becomes the origin page, if none 
was supplied
                     if globalvar.initialredirect:
                         if globalvar.contentsondisk:
                             redirectTargetPage = StoredPage(redirectTargetPage)
@@ -1396,8 +1420,8 @@
                     if self.addIfNew(redirectTargetPage, counter, page):
                         if config.interwiki_shownew or pywikibot.verbose:
                             pywikibot.output(u"%s: %s gives new %sredirect %s"
-                                             %  
(self.originPage.title(asLink=True), page.aslink(True),
-                                                 redir, 
redirectTargetPage.aslink(True)))
+                                             % 
(self.originPage.title(asLink=True), page.aslink(True),
+                                                redir, 
redirectTargetPage.aslink(True)))
                 continue
 
             # must be behind the page.isRedirectPage() part
@@ -1410,7 +1434,8 @@
                     for site, count in self.todo.siteCounts():
                         counter.minus(site, count)
                     self.todo = PageTree()
-                    self.done = PageTree() 
+                    self.done = PageTree()
+                    self.originPage = None
                 continue
 
             elif page.section():
@@ -1419,6 +1444,9 @@
                 continue
 
             # Page exists, isnt a redirect, and is a plain link (no section)
+            if self.originPage is None:
+                # the 1st existig page becomes the origin page, if none was 
supplied
+                self.originPage = page
             try:
                 iw = page.interwiki()
             except pywikibot.NoSuchSite:
@@ -1665,10 +1693,11 @@
         if self.forcedStop: # autonomous with problem
             pywikibot.output(u"======Aborted processing %s======" % 
self.originPage.aslink(True))
             return
-        if self.originPage.isRedirectPage():
-            return
-        if self.originPage.isCategoryRedirect():
-            return
+        if self.originPage:
+            if self.originPage.isRedirectPage():
+                return
+            if self.originPage.isCategoryRedirect():
+                return
         if not self.untranslated and globalvar.untranslatedonly:
             return
         # The following check is not always correct and thus disabled.
@@ -1677,14 +1706,18 @@
 #         if len(self.done) == 1:
 #             # No interwiki at all
 #             return
-        pywikibot.output(u"======Post-processing %s======" % 
self.originPage.aslink(True))
+        if self.originPage:
+            pywikibot.output(u"======Post-processing %s======" % 
self.originPage.aslink(True))
         # Assemble list of accepted interwiki links
         new = self.assemble()
         if new is None: # User said give up
             pywikibot.output(u"======Aborted processing %s======" % 
self.originPage.aslink(True))
             return
+        if not len(new): # nothing else to do
+            return
 
         # Make sure new contains every page link, including the page we are 
processing
+        # TODO: sould be move to assemble()
         # replaceLinks will skip the site it's working on.
         if self.originPage.site() not in new:
           if not self.originPage.site().family.interwiki_forward: #TODO: make 
this possible as well.
@@ -2155,7 +2188,8 @@
         else: mode = 'written'
         f = codecs.open(dumpfn, mode[0], 'utf-8')
         for subj in self.subjects:
-            f.write(subj.originPage.aslink(None)+'\n')
+            if subj.originPage:
+                f.write(subj.originPage.aslink(None)+'\n')
         f.close()
         pywikibot.output(u'Dump %s (%s) %s.' % (site.lang, site.family.name, 
mode))
         return dumpfn
@@ -2430,6 +2464,7 @@
 
 def main():
     singlePageTitle = []
+    opthintsonly = False
     start = None
     # Which namespaces should be processed?
     # default to [] which means all namespaces will be processed
@@ -2485,6 +2520,8 @@
             optRestore = not globalvar.restoreAll
         elif arg == '-continue':
             optContinue = True
+        elif arg == '-hintsonly':
+            opthintsonly = True
         elif arg.startswith('-namespace:'):
             try:
                 namespaces.append(int(arg[11:]))
@@ -2593,9 +2630,12 @@
         readWarnfile(warnfile, bot)
     else:
         singlePageTitle = ' '.join(singlePageTitle)
-        if not singlePageTitle:
-            singlePageTitle = pywikibot.input(u'Which page to check:')
-        singlePage = pywikibot.Page(pywikibot.getSite(), singlePageTitle)
+        if not singlePageTitle and not opthintsonly:
+             singlePageTitle = pywikibot.input(u'Which page to check:')
+        if singlePageTitle:
+            singlePage = pywikibot.Page(pywikibot.getSite(), singlePageTitle)
+        else:
+            singlePage = None
         bot.add(singlePage, hints = globalvar.hints)
 
     try:


_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to