On 26/01/2026 15:47, [email protected] wrote:
Op 25-01-2026 om 19:30 schreef Pádraig Brady:
Oh... Can you give two examples of commands for which option
descriptions aren't on the next line?
cat, ptx, truncate at least
as the descriptions on those are succinct enough.
For `truncate` it would be just two options that would get wrapped
when they shouldn't be -- that would still be acceptable. But for
`cat` it would be ten and for `ptx` sixteen. :/
Well, the script could check for "src/cat.c" and "src/ptx.c" in
the preceding line and skip the wrapping when the relevant bools
are set. So... please implement the wrapping and I'll implement
the exceptions.
(That is: the wrapping should only happen when options are split,
not for any options that are already single. This will not prevent
all valid translations from becoming fuzzy when msgmerged, but a
good amount.)
I've attached an updated split.py that wraps iff splitting,and also auto
excludes the commands that don't wrap.
It does result in a lot less fuzzy:
$ diff pl-new-orig.po pl-new.po | grep -- '-#, fuzzy' | wc -l
233
Note sk.po has an invalid utf8 char which stops processing,
so I manually edited sk.po so that the non utf8 ç
in Fran.*Pinard was replaced, _before_ I ran the script.
Note also af.po and gl.po should be run with
LC_ALL=en_US.iso-8859-1 or equivalent.
thanks,
Padraig
#!/usr/bin/env python3
import sys
import re
# Files to exclude from msgid wrapping
EXCLUDED_FILES_PATTERN = re.compile(r'src/(cat|nl|ptx|realpath|runcon|shuf|stdbuf|stty|sync|tac|truncate|uname|who)\.c')
def wrap_msgid_line(line):
"""Wrap a single msgid line by splitting option from description.
Returns a list of lines after wrapping.
"""
# Remove trailing newline for processing
content = line.rstrip('\n')
# Content should be like: " -a, --multiple description\n"
if not content.startswith('"') or not content.endswith('"'):
return [line]
# Get the inner content (without outer quotes)
inner = content[1:-1]
# Check if it ends with \n
has_trailing_newline = inner.endswith('\\n')
if has_trailing_newline:
inner_no_newline = inner[:-2]
else:
inner_no_newline = inner
# Pattern to match option followed by 2+ spaces and description
# Options: leading spaces, optional short opt (-X, ), long opt (--something)
match = re.match(r'^(\s+(?:-\S,\s+)?--?[^\s]+)\s{2,}(.+)$', inner_no_newline)
if not match:
return [line]
option = match.group(1)
description = match.group(2)
# Build wrapped lines
option_line = '"' + option + '\\n"\n'
desc_line = '" ' + description + ('\\n"\n' if has_trailing_newline else '"\n')
return [option_line, desc_line]
def split_po_entries(lines):
i = 0
fuzzy = False
current_files = []
prev_was_location = False
while i < len(lines):
line = lines[i]
# Track current files from location comments (can span multiple consecutive #: lines)
if line.startswith('#:'):
if not prev_was_location:
# Start of a new entry's location comments - reset
current_files = []
current_files.append(line)
prev_was_location = True
else:
prev_was_location = False
if "#, fuzzy" in line:
fuzzy = True
if line.strip() == 'msgid ""':
start_i = i
msgid_lines = []
i += 1
while i < len(lines) and lines[i].startswith('"'):
msgid_lines.append(lines[i])
i += 1
if i < len(lines) and lines[i].strip() == 'msgstr ""':
msgstr_lines = []
i += 1
while i < len(lines) and lines[i].startswith('"'):
msgstr_lines.append(lines[i])
i += 1
def is_option(line):
if line.startswith('" --'):
return True
if line.startswith('" -'):
text = line[4:]
if text.startswith('M '):
return False
if len(text) > 0 and text[0] != ' ':
return True
if re.match(r'^" \S+ -\S\S \S+ ', line):
return True
if re.match(r'^" [a-z]+=\S+ ', line):
return True
return False
def is_option_relaxed(line):
if re.match(r'^" {1,6}--', line):
return True
return is_option(line)
has_options = any(is_option(line) for line in msgid_lines)
# Check if wrapping should be excluded for any of the tagged files
should_wrap = True
if any(EXCLUDED_FILES_PATTERN.search(f) for f in current_files):
should_wrap = False
if has_options and not fuzzy:
first_non_empty = None
for j, line in enumerate(msgid_lines):
if line.strip() not in ('""', '"\\n"'):
first_non_empty = j
break
if first_non_empty is not None and is_option(msgid_lines[first_non_empty]):
msgid_lines = msgid_lines[first_non_empty:]
msgstr_lines = msgstr_lines[first_non_empty:] if first_non_empty < len(msgstr_lines) else msgstr_lines
msgid_groups = []
msgstr_groups = []
msgid_indices = [0]
for j in range(1, len(msgid_lines)):
if is_option(msgid_lines[j]):
msgid_indices.append(j)
msgid_indices.append(len(msgid_lines))
msgstr_indices = [0]
for j in range(1, len(msgstr_lines)):
if is_option_relaxed(msgstr_lines[j]):
msgstr_indices.append(j)
msgstr_indices.append(len(msgstr_lines))
for k in range(len(msgid_indices) - 1):
msgid_groups.append(msgid_lines[msgid_indices[k]:msgid_indices[k+1]])
for k in range(len(msgstr_indices) - 1):
msgstr_groups.append(msgstr_lines[msgstr_indices[k]:msgstr_indices[k+1]])
for msgid_group, msgstr_group in zip(msgid_groups, msgstr_groups):
# Wrap msgid lines if appropriate
if should_wrap:
wrapped_msgid = []
for mline in msgid_group:
wrapped_msgid.extend(wrap_msgid_line(mline))
else:
wrapped_msgid = msgid_group
# Output msgid
print('msgid ' + wrapped_msgid[0], end='')
for wline in wrapped_msgid[1:]:
print(wline, end='')
# Output msgstr
if len(msgstr_group) == 1:
print('msgstr ' + msgstr_group[0], end='')
else:
print('msgstr ""')
for mline in msgstr_group:
print(mline, end='')
print()
continue
for j in range(start_i, i):
print(lines[j], end='')
fuzzy = False
continue
print(line, end='')
i += 1
if __name__ == '__main__':
if len(sys.argv) > 1:
with open(sys.argv[1]) as f:
split_po_entries(f.readlines())
else:
split_po_entries(sys.stdin.readlines())