Hey guys! I wrote a little program in Python which might help translate the contents of continents-cities. Hope it does its job for you!
Here's the code (works as of 27th of May 2009, might require adjustments later if Wikipedia's way of displaying stuff changes): __________________________________________________________________________ # coding: utf-8 # Wacław Jacek <[email protected]> # Licence: GPLv3 (it's available here: http://www.gnu.org/licenses/gpl.txt) ### CHANGE THE TWO BELOW!!! ### locale_code = 'pl' # Wikipedia URL prefix, eg. 'pl' for pl.wikipedia.org translation_file_name = 'continents-cities-pl_PL.po' # your .po file headers = {'User-Agent' : 'Super Groovy Geo Name Translator/1.0'} import urllib import urllib2 # get the list of words from the file words = [] translation_file = open( translation_file_name, 'r' ) for line in translation_file: if line[ : line.find( ' ' ) ] == 'msgid': begin_pos = line.find( '"' ) + 1 # "+ 1" to skip the quotation mark itself end_pos = line.find( '"', begin_pos ) # closing quotation mark position word = line[ begin_pos : end_pos ] if word != '': # if the string isn't blank words.append( word ) print '"' + word + '"' # debug translation_file.close() print '\nDone getting words from .po file. Will now attempt to translate.\n' # and off we go! (getting the translations from English Wikipedia) translations = [] for word in words: # construct the URL url = 'http://en.wikipedia.org/w/index.php?title=' + urllib.pathname2url( word ) + '&printable=yes' # connect and get request = urllib2.Request( url, None, headers ) try: # if could reach the site connection = urllib2.urlopen( request ) except urllib2.HTTPError: translations.append( '' ) # no translation found continue site = connection.read() connection.close() # find it query = '<li class="interwiki-' + locale_code + '"><a href="http://' + locale_code + '.wikipedia.org/wiki/' # what to find in the source begin_pos = site.find( query ) if begin_pos != -1: # if found begin_pos = begin_pos + len( query ) end_pos = site.find( '"', begin_pos ) # where to trim the output # format it translation = site[ begin_pos : end_pos ] translation = urllib.url2pathname( translation ) # decode the URL translation = translation.replace( '_', ' ' ) # replace underscores with spaces # print it (just for debug purposes) print word + ' -- "' + translation + '" (' + str( begin_pos ) + ' : ' + str( end_pos ) + ')' translations.append( translation ) else: translations.append( '' ) # no translation available in Wikipedia's interwiki (links to local sites) print '\nDone fetching translations from English Wikipedia. Will now put them in a file.\n' # put the translations into the translation file output_file = open( translation_file_name + '.autotranslated', 'w' ) for id in range( len( words ) ): output_file.write( 'msgid "' + words[ id ] + '"\nmsgstr "' + translations[ id ] + '"\n\n' ) output_file.close() print 'Done!' __________________________________________________________________________ Cheers, W. J.
smime.p7s
Description: S/MIME Cryptographic Signature
_______________________________________________ wp-polyglots mailing list [email protected] http://lists.automattic.com/mailman/listinfo/wp-polyglots
