William Grant has proposed merging lp:~wgrant/launchpad/bug-1707890 into lp:launchpad.
Commit message: Fix Translations' text_to_html to not parse HTML as a C format string. Requested reviews: Launchpad code reviewers (launchpad-reviewers) Related bugs: Bug #1707890 in Launchpad itself: "Broken HTML in translations" https://bugs.launchpad.net/launchpad/+bug/1707890 For more details, see: https://code.launchpad.net/~wgrant/launchpad/bug-1707890/+merge/328652 Fix Translations' text_to_html to not parse HTML as a C format string. The c-format flag was previously applied after the input had been converted to HTML, which was pretty crazy. -- Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/bug-1707890 into lp:launchpad.
=== modified file 'lib/lp/translations/browser/browser_helpers.py' --- lib/lp/translations/browser/browser_helpers.py 2012-11-29 06:35:35 +0000 +++ lib/lp/translations/browser/browser_helpers.py 2017-08-07 09:09:17 +0000 @@ -54,7 +54,7 @@ if text is None: return None - lines = [] + markup_lines = [] # Replace leading and trailing spaces on each line with special markup. if u'\r\n' in text: newline_chars = u'\r\n' @@ -62,7 +62,7 @@ newline_chars = u'\r' else: newline_chars = u'\n' - for line in html_escape(text).split(newline_chars): + for line in text.split(newline_chars): # Pattern: # - group 1: zero or more spaces: leading whitespace # - group 2: zero or more groups of (zero or @@ -72,37 +72,32 @@ match = re.match(u'^( *)((?: *[^ ]+)*)( *)$', line) if match: - lines.append( - space * len(match.group(1)) + - match.group(2) + - space * len(match.group(3))) + format_segments = None + if 'c-format' in flags: + try: + format_segments = parse_cformat_string(match.group(2)) + except UnrecognisedCFormatString: + pass + if format_segments is not None: + markup = '' + for segment in format_segments: + type, content = segment + + if type == 'interpolation': + markup += (u'<code>%s</code>' % html_escape(content)) + elif type == 'string': + markup += html_escape(content) + else: + markup = html_escape(match.group(2)) + markup_lines.append( + space * len(match.group(1)) + + markup + + space * len(match.group(3))) else: raise AssertionError( "A regular expression that should always match didn't.") - if 'c-format' in flags: - # Replace c-format sequences with marked-up versions. If there is a - # problem parsing the c-format sequences on a particular line, that - # line is left unformatted. - for i in range(len(lines)): - formatted_line = '' - - try: - segments = parse_cformat_string(lines[i]) - except UnrecognisedCFormatString: - continue - - for segment in segments: - type, content = segment - - if type == 'interpolation': - formatted_line += (u'<code>%s</code>' % content) - elif type == 'string': - formatted_line += content - - lines[i] = formatted_line - - return expand_rosetta_escapes(newline.join(lines)) + return expand_rosetta_escapes(newline.join(markup_lines)) def convert_newlines_to_web_form(unicode_text): === modified file 'lib/lp/translations/doc/browser-helpers.txt' --- lib/lp/translations/doc/browser-helpers.txt 2010-10-08 16:58:10 +0000 +++ lib/lp/translations/doc/browser-helpers.txt 2017-08-07 09:09:17 +0000 @@ -206,6 +206,19 @@ >>> text_to_html(u'foo\rbar', []) u'foo<img alt="" src="/@@/translation-newline" /><br/>\nbar' +HTML in the input string is escaped. + + >>> text_to_html(u'<b>Test %d</b>', []) + u'<b>Test %d</b>' + >>> text_to_html(u'<b>Test %d</b>', ['c-format']) + u'<b>Test <code>%d</code></b>' + +Format strings are parsed before markup is generated (the %q is invalid +as it has no conversion specifier until the <samp> is injected): + + >>> text_to_html(u'Test %q: ', ['c-format']) + u'Test %q:<samp> </samp>' + convert_newlines_to_web_form ----------------------------
_______________________________________________ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : launchpad-reviewers@lists.launchpad.net Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp