I whipped up quickly something that seems to work for me. Haven't really had
time to test it properly. Attached below.
Ilja
--
You received this message because you are subscribed to the Google Groups
"DSpace Technical Support" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/dspace-tech.
For more options, visit https://groups.google.com/d/optout.
#!/usr/bin/env python
import sys
import codecs
class MessagesXmlParser():
def __init__(self, filename):
import xml.etree.ElementTree as etree
self.keys = []
self.values = []
self.dict_by_key = {}
tree = etree.parse(filename)
root = tree.getroot()
for message in root:
self.keys.append(message.attrib['key'])
self.values.append(message.text)
if message.attrib['key'] in self.dict_by_key.keys():
print "WARNING: key %s used in %s at least twice" % (message.attrib['key'], filename)
self.dict_by_key[message.attrib['key']] = message.text
class MessagesPropertiesParser():
def __init__(self, filename):
try:
import jprops
except:
print('Error: jprops module for parsing .properties files is missing. Download and follow installation instructions from http://mgood.github.com/jprops/')
sys.exit(2)
self.keys = []
self.values = []
with open(filename) as fp:
for key, value in jprops.iter_properties(fp):
self.keys.append(key)
self.values.append(value)
if __name__ == "__main__":
if len(sys.argv) == 1:
print("Usage:")
print(" %s --compare messages.xml messages_XX.xml" % (sys.argv[0]))
print("or")
print(" %s --compare Messages.properties Messages_XX.properties" % (sys.argv[0]))
print("or")
print(" %s --dump-messages messages.xml" % (sys.args[0]))
print("or")
print(" %s --dump-tsv-for-translation messages.xml messages_XX.xml ..." % (sys.argv[0]))
print("or")
print(" %s --convert-tsv-to-xml messages.tsv messages_XX.xml ..." % (sys.argv[0]))
sys.exit(1)
if sys.argv[1] == "--compare":
testfile = open(sys.argv[2], 'rb')
if testfile.readline().find('<?xml') != -1:
# xml file detected, assume messages.xml
messages_tmpl = MessagesXmlParser(sys.argv[2])
messages_in = MessagesXmlParser(sys.argv[3])
else:
# assume Messages.properties
messages_tmpl = MessagesPropertiesParser(sys.argv[2])
messages_in = MessagesPropertiesParser(sys.argv[3])
print "number of keys in [%s]: [%d]" % (sys.argv[2], len(messages_tmpl.keys))
print "number of keys in [%s]: [%d]" % (sys.argv[3], len(messages_in.keys))
print "Present in %s but missing in %s:" % (sys.argv[2], sys.argv[3])
for i in set(messages_tmpl.keys) - set(messages_in.keys):
print i
print "\nPresent in %s but missing in %s:" % (sys.argv[3], sys.argv[2])
for i in set(messages_in.keys) - set(messages_tmpl.keys):
print i
elif sys.argv[1] == "--dump-messages":
testfile = open(sys.argv[2], 'rb')
if testfile.readline().find('<?xml') != -1:
# xml file detected, assume messages.xml
messages = MessagesXmlParser(sys.argv[2])
else:
# assume Messages.properties
messages = MessagesPropertiesParser(sys.argv[2])
UTF8Writer = codecs.getwriter('utf8')
sys.stdout = UTF8Writer(sys.stdout)
for i in set(messages.values):
print i
elif sys.argv[1] == "--dump-tsv-for-translation":
translations = {}
reference_file = ""
for dump_filename in sys.argv[2:]:
if reference_file == "":
reference_file = dump_filename
print "chose %s as reference file" % reference_file
print dump_filename
testfile = open(dump_filename, 'rb')
if testfile.readline().find('<?xml') != -1:
translations[dump_filename] = MessagesXmlParser(dump_filename)
else:
translations[dump_filename] = MessagesPropertiesParser(dump_filename)
for msg_id, msg_text in translations[reference_file].dict_by_key.iteritems():
line = "%s\t" % msg_id
for filename, file_struct in translations.iteritems():
if msg_id in file_struct.dict_by_key.keys():
line = line + "%s\t" % (file_struct.dict_by_key[msg_id])
else:
line = line + "\t"
print line.encode('utf-8')
elif sys.argv[1] == "--convert-tsv-to-xml":
if len(sys.argv) < 4:
print "Input file(s) missing. TSV-filename and at least one target filename are required."
sys.exit(1)
keys = []
values = {}
outputs = sys.argv[3:]
for fn in outputs:
values[fn] = {}
import io
with io.open(sys.argv[2], encoding='utf-8') as f:
for line in f:
parts = [p.strip('"') for p in line.strip().split('\t')]
k = parts[0]
for (f,v) in zip(outputs, parts[1:]):
values[f][k] = v
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from xml.dom import minidom
for output in outputs:
with io.open(output, 'w', encoding='utf-8') as f:
top = Element('catalogue', attrib={'xml:lang':'en', 'xmlns:i18n':'http://apache.org/cocoon/i18n/2.1'})
for k in values[output]:
child = SubElement(top, 'message', attrib={'key': k})
child.text = values[output][k]
unpretty = ElementTree.tostring(top, 'utf-8')
reparsed = minidom.parseString(unpretty)
pretty = reparsed.toprettyxml(indent=" ")
f.write(pretty)
> On 04 Jan 2017, at 14:19, helix84 <[email protected]> wrote:
>
> I wrote dspace-l10n-check.py, consider it CC0 (do whatever you want with it).
> Petya Kohts wrote dspace-i10n-check2.py. If you write a new one, I can put it
> to the same wiki page, too.
>
>
> Regards,
> ~~helix84
>
> Compulsory reading: DSpace Mailing List Etiquette
> https://wiki.duraspace.org/display/DSPACE/Mailing+List+Etiquette
>
>
> On Wed, Jan 4, 2017 at 1:11 PM, Sidoroff, Ilja <[email protected]>
> wrote:
> I don't know if I have time to do that, but I might extend the script to do
> the opposite transformation (tsv -> xml). In the script there's no licence or
> author (helix84?). Does anyone know the author and if I have the time to do
> the modifications, is there a preference, where the modified version should
> be put out?
>
>
> Ilja Sidoroff
> Information Systems Specialist
>
>
> > On 23 Dec 2016, at 02:05, helix84 <[email protected]> wrote:
> >
> > Here's a specialized one:
> >
> > https://wiki.duraspace.org/pages/viewpage.action?pageId=19006307#InternationalizationSupport(I18nSupport)-Pythonscripttocheckformissing/extrastrings
> >
> >
> > Regards,
> > ~~helix84
> >
> > Compulsory reading: DSpace Mailing List Etiquette
> > https://wiki.duraspace.org/display/DSPACE/Mailing+List+Etiquette
> >
> >
> > On Thu, Dec 22, 2016 at 6:16 PM, Sidoroff, Ilja <[email protected]>
> > wrote:
> > Hello all,
> >
> > Related to a upgrade, I'm trying to update messages.xml -files. Is there
> > any tool that can be used to compare two versions of the message files? For
> > instance, I could use a tool that lists missing keys between two files and
> > merge translation files. Quick googling didn't turn up anything, but I
> > would be surprised, if there isn't anything readymade for such a common
> > task.
> >
> > br,
> >
> > Ilja Sidoroff
> > Information Systems Specialist
> > Helsinki University Library
> >
> > --
> > You received this message because you are subscribed to the Google Groups
> > "DSpace Technical Support" group.
> > To unsubscribe from this group and stop receiving emails from it, send an
> > email to [email protected].
> > To post to this group, send email to [email protected].
> > Visit this group at https://groups.google.com/group/dspace-tech.
> > For more options, visit https://groups.google.com/d/optout.
> >
>
--
You received this message because you are subscribed to the Google Groups
"DSpace Technical Support" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/dspace-tech.
For more options, visit https://groups.google.com/d/optout.