> [: Marw :] > Makao sam zareze iz praznih redova i redove i skripta je odradila posao > (00-000_SR-RS.csv).
Мхмх… није ми баш јасно откуд ти редови само са зарезом, али не делује проблематично. Eво малчице допуњена скрипта да то аутоматски игнорише.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
try:
import fallback_import_paths
except:
pass
import csv
import hashlib
import locale
import optparse
import os
import sys
from pology.catalog import Catalog
from pology.fsops import str_to_unicode
from pology.msgreport import warning_on_msg
from pology.report import warning, error
from pology.split import proper_words
def main ():
locale.setlocale(locale.LC_ALL, "")
usage = (
"\n"
" %prog extract POTFILE TABFILE [OPTIONS]\n"
" %prog inject TABFILE... POFILE [OPTIONS]")
desc = (
"Simplistically extract a POT file into a monolanguage table file. "
"Inject translated monolanguage table file into a PO file. "
"Encoding of the table file is expected to be UTF-8 at all times, "
"regardless of the PO file encoding.")
ver = (
u"%prog x1\n"
u"Copyright © 2011, "
u"Chusslove Illich (Часлав Илић) <[email protected]>")
opars = optparse.OptionParser(usage=usage, description=desc, version=ver)
_all_formats = ["csv", "html", "xls"]
_def_format = "csv"
_def_dialect = "excel"
opars.add_option(
"-d", "--dialect", metavar="DIALECT",
action="store", dest="dialect", default=_def_dialect,
help="The dialect for the CSV format "
"(known: %s; default: %s)."
% (", ".join(sorted(csv.list_dialects())), _def_dialect))
opars.add_option(
"-f", "--format", metavar="FORMAT",
action="store", dest="format", default=_def_format,
help="Format of the table file "
"(known: %s; default: %s)."
% (", ".join(_all_formats), _def_format))
opars.add_option(
"-t", "--translator", metavar="'NAME <EMAIL>'",
action="store", dest="translator", default=None,
help="Name and email address of the translator, "
"to update the PO header on injection.")
_min_words_per_chunk = 500
opars.add_option(
"-w", "--words-per-chunk", metavar="NUM",
action="store", dest="words_per_chunk", default=None,
help="Chunk output into several table files, "
"such that each contains at most this many words (minimum: %d). "
"Resulting files will have suffixes -000, -001, etc. "
"inserted before the extension."
% _min_words_per_chunk)
options, free_args = opars.parse_args(str_to_unicode(sys.argv[1:]))
maxwordcnt = None
if options.words_per_chunk:
try:
maxwordcnt = int(options.words_per_chunk)
except:
error("Words per chunk argument must be an integer.")
if maxwordcnt < _min_words_per_chunk:
error("Words per chunk argument must be at least %d."
% _min_words_per_chunk)
mode = free_args.pop(0)
paths = free_args
if mode == "extract":
if len(paths) != 2:
opars.print_usage()
exit(1)
popath, tabpath = paths
extdata = extract(popath, bool(maxwordcnt))
if maxwordcnt:
extchunks = chunk(extdata, maxwordcnt, tabpath)
else:
extchunks = [(extdata, tabpath)]
for cextdata, ctabpath in extchunks:
if options.format == "csv":
write_csv(cextdata, ctabpath, dialect=options.dialect)
elif options.format == "html":
write_html(extdata, ctabpath)
elif options.format == "xls":
write_xls(cextdata, ctabpath)
else:
error("Unknown format '%s' for the table file."
% options.format)
elif mode == "inject":
if len(paths) < 2:
opars.print_usage()
exit(1)
tabpaths = paths[:-1]
popath = paths[-1]
if options.format == "csv":
injdata = read_csv(tabpaths, dialect=options.dialect)
elif options.format == "html":
error("Injection from HTML not implemented yet.")
injdata = read_html(tabpaths)
elif options.format == "xls":
injdata = read_xls(tabpaths)
else:
error("Unknown format '%s' for the table file." % options.format)
inject(injdata, popath, translator=options.translator)
else:
error("Unknown operation mode '%s'." % mode)
def extract (filepath, wordcnt=False):
cat = Catalog(filepath, monitored=False)
extdata = []
for msg in cat:
if msg.obsolete:
continue
if msg.msgid_plural is not None:
warning_on_msg(
"Extraction of plural messages not implemented yet, "
"skipping.", msg, cat)
continue
msghex = get_msg_hex(msg)
if wordcnt:
words = proper_words(msg.msgid, True, cat.accelerator(), msg.format)
extdata.append((msg, msghex, len(words)))
else:
extdata.append((msg, msghex))
return extdata
def write_csv (extdata, filepath, dialect="excel"):
ofl = open(filepath, "w")
data = csv.writer(ofl, dialect=dialect)
for msg, msghex in extdata:
data.writerow([msghex, msg.msgid.encode("utf8")])
ofl.close()
def write_xls (extdata, filepath):
try:
import xlwt
except:
error("Cannot import Python module '%s'." % "xlwt")
book = xlwt.Workbook(encoding="UTF-8")
sheet = book.add_sheet("PO-extract")
enclines = []
for i, (msg, msghex) in enumerate(extdata):
sheet.write(i, 0, msghex)
sheet.write(i, 1, msg.msgid)
book.save(filepath)
# NOTE: Does not really work properly, added just for a quick test.
def write_html (extdata, filepath):
enclines = []
enclines.append("<?xml version='1.0' encoding='UTF-8'?>")
enclines.append("<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>")
enclines.append("<html>")
enclines.append("<body>")
for msg, msghex in extdata:
line = "<p id=\"%s\">%s</p>" % (msghex, msg.msgid)
encline = line.encode("utf8")
enclines.append(encline)
enclines.append("</body>")
enclines.append("</html>")
enclines.append("")
csv = open(filepath, "w")
csv.writelines("\n".join(enclines))
csv.close()
def inject (injdata, filepath, translator=None):
cat = Catalog(filepath)
msgs_by_hex = {}
for msg in cat:
msghex = get_msg_hex(msg)
msgstr, fname, rno = injdata.get(msghex, [None] * 3)
if msgstr is None:
# Does not apply, input may be one chunk (-w option).
#warning_on_msg("Not found in input file.", msg, cat)
continue
injdata.pop(msghex)
if msgstr:
msg.msgstr[0] = msgstr
msg.unfuzzy()
if translator is not None:
cat.update_header(name=translator)
cat.sync()
extras_by_file = {}
for msgstr, fname, rno in injdata.values():
if fname not in extras_by_file:
extras_by_file[fname] = []
extras_by_file[fname].append(rno)
for fname, rnos in sorted(extras_by_file.items()):
warning("Some messages from input file '%s' do not exist "
"in the PO file, at rows: %s."
% (fname, ", ".join(map(str, sorted(rnos)))))
def read_csv (filepaths, dialect="excel"):
injdata = {}
for filepath in filepaths:
ifl = open(filepath)
data = csv.reader(ifl, dialect=dialect)
for i, row in enumerate(data):
msghex = row[0].strip()
if msghex and len(row) >= 2:
msgstr = row[1].decode("utf8")
injdata[msghex] = (msgstr, filepath, i + 1)
ifl.close()
return injdata
def read_xls (filepaths):
try:
import xlrd
except:
error("Cannot import Python module '%s'." % "xlrd")
injdata = {}
for filepath in filepaths:
book = xlrd.open_workbook(filepath)
sheet = book.sheet_by_index(0)
for i in xrange(sheet.nrows):
msghex = sheet.cell(i, 0).value
msgstr = sheet.cell(i, 1).value
injdata[msghex] = (msgstr, filepath, i + 1)
return injdata
def get_msg_hex (msg):
h = hashlib.md5()
h.update(msg.key.encode("utf8"))
msghex = h.hexdigest()
return msghex
def chunk (extdata, maxwordcnt, basepath):
p = basepath.rfind(".")
if p < 0:
p = len(argoutpath)
nextpath = basepath[:p]
ext = basepath[p:]
extchunks = []
nmsgs = len(extdata)
for i in range(nmsgs + 1):
if i < nmsgs:
msg, msghex, wordcnt = extdata[i]
if i == 0 or i == nmsgs or csumwordcnt + wordcnt > maxwordcnt:
if i > 0:
ctabpath = "%s-%03d%s" % (nextpath, len(extchunks), ext)
extchunks.append((cextdata, ctabpath))
cextdata = []
csumwordcnt = 0
if i < nmsgs:
csumwordcnt += wordcnt
cextdata.append((msg, msghex))
return extchunks
if __name__ == "__main__":
main()
signature.asc
Description: This is a digitally signed message part.

