Revision: 8760
Author:   purodha
Date:     2010-12-05 22:07:02 +0000 (Sun, 05 Dec 2010)
Log Message:
-----------
titletranslate() better documented, saner and slightly quicker code, made more 
flexible.

Modified Paths:
--------------
    trunk/pywikipedia/interwiki.py
    trunk/pywikipedia/titletranslate.py

Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py      2010-12-05 21:09:39 UTC (rev 8759)
+++ trunk/pywikipedia/interwiki.py      2010-12-05 22:07:02 UTC (rev 8760)
@@ -108,10 +108,9 @@
                        
 These arguments are useful to provide hints to the bot:
 
-    -hint:         used as -hint:de:Anweisung to give the robot a hint
-                   where to start looking for translations. This is only
-                   useful if you specify a single page to work on. If no
-                   text is given after the second ':', the name of the page
+    -hint:         used as  -hint:de:Anweisung  to give the robot a hint
+                   where to start looking for translations. If no text
+                   is given after the second ':', the name of the page
                    itself is used as the title for the hint, unless the
                    -hintnobracket command line option (see there) is also
                    selected.
@@ -122,12 +121,15 @@
                        * 10:      The 10 largest languages (sites with most
                                   articles). Analogous for any other natural
                                   number.
-                       * arab:    All languages useing the Arabic alphabet.
+                       * arab:    All languages using the Arabic alphabet.
                        * cyril:   All languages that use the Cyrillic alphabet.
                        * chinese: All Chinese dialects.
                        * latin:   All languages using the Latin script.
                        * scand:   All Scandinavian languages.
 
+                   Languages and groups having the same page title can be
+                   combined, as in  -hint:5,scand,sr,pt:New_York 
+
     -hintfile:     similar to -hint, except that hints are taken from the given
                    file, enclosed in [[]] each, instead of the command line.
 

Modified: trunk/pywikipedia/titletranslate.py
===================================================================
--- trunk/pywikipedia/titletranslate.py 2010-12-05 21:09:39 UTC (rev 8759)
+++ trunk/pywikipedia/titletranslate.py 2010-12-05 22:07:02 UTC (rev 8760)
@@ -13,14 +13,34 @@
 import wikipedia as pywikibot
 import date
 
-def translate(page, hints = None, auto = True, removebrackets = False):
+def _join_to_(result, join):
+    for x in join:
+        if x not in result:
+            result.append(x)
+
+def translate(page, hints = None, auto = True, removebrackets = False, site = 
None, family = None):
     """
     Please comment your source code! --Daniel
 
     Does some magic stuff. Returns a list of pages.
+
+    Goes through all entries in 'hints'. Returns a list of pages.
+
+    Entries for single page titles list those pages. Page titles for entries
+    such as "all:" or "xyz:" or "20:" are first built from the page title of
+    'page' and then listed. When 'removebrackets' is True, a trailing pair of
+    brackets and the text between them is removed from the page title.
+    If 'auto' is true, known year and date page titles are autotranslated
+    to all known target languages and inserted into the list.
+    
     """
     result = []
-    site = page.site()
+    if site is None and page:
+       site = page.site()
+    if family is None and site:
+       family = site.family
+    if site:
+       sitelang = site.language()
     if hints:
         for h in hints:
             if ':' not in h:
@@ -33,9 +53,11 @@
                 # if given as -hint:xy or -hint:xy:, assume that there should
                 # be a page in language xy with the same title as the page
                 # we're currently working on ...
+                if page is None:
+                   continue
                 ns = page.namespace()
                 if ns:
-                    newname = u'%s:%s' % (site.family.namespace('_default', 
ns),
+                    newname = u'%s:%s' % (family.namespace('_default', ns),
                                           page.titleWithoutNamespace())
                 else:
                     # article in the main namespace
@@ -43,45 +65,49 @@
                 # ... unless we do want brackets
                 if removebrackets:
                     newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?", 
re.UNICODE), u" ", newname)
-            try:
-                number = int(codes)
-                codes = site.family.languages_by_size[:number]
-            except ValueError:
-                if codes == 'all':
-                    codes = site.family.languages_by_size
-                elif codes in site.family.language_groups:
-                    codes = site.family.language_groups[codes]
-                else:
-                    codes = codes.split(',')
+            codesplit = codes.split(',')
+            codes = []
+            for code in codesplit:
+                try:
+                    number = int(code)
+                    _join_to_(codes, family.languages_by_size[:number] )
+                except ValueError:
+                    if code == 'all':
+                        _join_to_(codes, family.languages_by_size )
+                    elif code in family.language_groups:
+                        _join_to_(codes, family.language_groups[code] )
+                    elif code:
+                        _join_to_(codes, [ code ] )
             for newcode in codes:
-                if newcode in site.languages():
-                    if newcode != site.language():
-                        x = pywikibot.Page(site.getSite(code=newcode), newname)
-                        if x not in result:
-                            result.append(x)
+                x = None
+                if newcode in family.langs.keys():
+                    if ( page is None ) or ( newcode != sitelang ):
+                        x = pywikibot.Page(pywikibot.getSite(fam=family, 
code=newcode), newname)
+#                elif newcode in family.interwiki_forwarded_from:
+#                    x = pywikibot.Page(pywikibot.getSite(fam=newcode, 
code=newcode), newname)
                 else:
                     if pywikibot.verbose:
-                        pywikibot.output(u"Ignoring unknown language code %s"
-                                         % newcode)
+                        pywikibot.output(u"Ignoring the unknown language code 
%s" % newcode)
+                if x:
+                    _join_to_(result, [ x ] )
 
     # Autotranslate dates into all other languages, the rest will come from
     # existing interwiki links.
-    if auto:
+    if auto and page:
         # search inside all dictionaries for this link
-        dictName, value = date.getAutoFormat(page.site().language(),
-                                             page.title())
+        dictName, value = date.getAutoFormat(sitelang, page.title())
         if dictName:
             if not (dictName == 'yearsBC' and
-                    page.site().language() in date.maxyearBC and
-                    value > date.maxyearBC[page.site().language()]) or \
+                    sitelang in date.maxyearBC and
+                    value > date.maxyearBC[sitelang]) or \
                     (dictName == 'yearsAD' and
-                     page.site().language() in date.maxyearAD and
-                     value > date.maxyearAD[page.site().language()]):
+                     sitelang in date.maxyearAD and
+                     value > date.maxyearAD[sitelang]):
                 pywikibot.output(
                     u'TitleTranslate: %s was recognized as %s with value %d'
                     % (page.title(), dictName, value))
                 for entryLang, entry in date.formats[dictName].iteritems():
-                    if entryLang != page.site().language():
+                    if entryLang != sitelang:
                         if dictName == 'yearsBC' and \
                            entryLang in date.maxyearBC and \
                            value > date.maxyearBC[entryLang]:
@@ -94,9 +120,8 @@
                             newname = entry(value)
                             x = pywikibot.Page(
                                 pywikibot.getSite(code=entryLang,
-                                                  fam=site.family), newname)
-                            if x not in result:
-                                result.append(x) # add new page
+                                                  fam=family), newname)
+                            _join_to_(result, [ x ] )
     return result
 
 bcDateErrors = [u'[[ko:%d년]]']


_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to