Op 25-01-2026 om 17:19 schreef Egmont Koblinger:
Another random find:
9.9 tarball's hu.po line 3252. The translation is outdated, it misses
the "+" flag. Accordingly, it's marked as fuzzy.
This made me think: when a msgid-msgstr pair is marked as fuzzy,
then the split.py script should _not_ split any options that are
in the msgid and msgstr, because whatever is in the msgstr does
not correspond to what is in the msgid -- or at least not fully.
One can observe this problem when running ./split.py on sk.po
and then searching for "--no-dereference". Oops.
(In hu.po at the TP there are no fuzzies, so I didn't notice the
problem there.)
So split.py has to check for the "#, fuzzy" marker, and skip the
splitting of the subsequent msgid-msgstr pair. I've implemented
that in the attached updated script.
(But maybe it is better to split them anyway and mark every
resulting pair as fuzzy?)
--
Regards,
Benno
#!/usr/bin/env python3
import sys
import re
def split_po_entries(lines):
i = 0
fuzzy = False
while i < len(lines):
line = lines[i]
if "#, fuzzy" in line:
fuzzy = True
if line.strip() == 'msgid ""':
start_i = i
msgid_lines = []
i += 1
while i < len(lines) and lines[i].startswith('"'):
msgid_lines.append(lines[i])
i += 1
if i < len(lines) and lines[i].strip() == 'msgstr ""':
msgstr_lines = []
i += 1
while i < len(lines) and lines[i].startswith('"'):
msgstr_lines.append(lines[i])
i += 1
def is_option(line):
if line.startswith('" --'):
return True
if line.startswith('" -'):
text = line[4:]
if text.startswith('M '):
return False
if len(text) > 0 and text[0] != ' ':
return True
if re.match(r'^" \S+ -\S\S \S+ ', line):
return True
if re.match(r'^" [a-z]+=\S+ ', line):
return True
return False
def is_option_relaxed(line):
if re.match(r'^" {1,6}--', line):
return True
if line.startswith('" -'):
text = line[4:]
if text.startswith('M '):
return False
if len(text) > 0 and text[0] != ' ':
return True
if re.match(r'^" \S+ -\S\S \S+ ', line):
return True
if re.match(r'^" [a-z]+=\S+ ', line):
return True
return False
has_options = any(is_option(line) for line in msgid_lines)
if has_options and not fuzzy:
first_non_empty = None
for j, line in enumerate(msgid_lines):
if line.strip() not in ('""', '"\\n"'):
first_non_empty = j
break
if first_non_empty is not None and is_option(msgid_lines[first_non_empty]):
msgid_lines = msgid_lines[first_non_empty:]
msgstr_lines = msgstr_lines[first_non_empty:] if first_non_empty < len(msgstr_lines) else msgstr_lines
msgid_groups = []
msgstr_groups = []
msgid_indices = [0]
for j in range(1, len(msgid_lines)):
if is_option(msgid_lines[j]):
msgid_indices.append(j)
msgid_indices.append(len(msgid_lines))
msgstr_indices = [0]
for j in range(1, len(msgstr_lines)):
if is_option_relaxed(msgstr_lines[j]):
msgstr_indices.append(j)
msgstr_indices.append(len(msgstr_lines))
for k in range(len(msgid_indices) - 1):
msgid_groups.append(msgid_lines[msgid_indices[k]:msgid_indices[k+1]])
for k in range(len(msgstr_indices) - 1):
msgstr_groups.append(msgstr_lines[msgstr_indices[k]:msgstr_indices[k+1]])
for msgid_group, msgstr_group in zip(msgid_groups, msgstr_groups):
print('msgid ""')
for line in msgid_group:
print(line, end='')
print('msgstr ""')
for line in msgstr_group:
print(line, end='')
print()
continue
for j in range(start_i, i):
print(lines[j], end='')
fuzzy = False
continue
print(line, end='')
i += 1
if __name__ == '__main__':
if len(sys.argv) > 1:
with open(sys.argv[1]) as f:
split_po_entries(f.readlines())
else:
split_po_entries(sys.stdin.readlines())