jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/390348 )
Change subject: T178253: Handle pipe ending table attributes in figure captions ...................................................................... T178253: Handle pipe ending table attributes in figure captions * Fixes http://localhost:8000/nl.wikipedia.org/v3/page/html/Klimaatclassificatie_van_K%C3%B6ppen/50339930 * We do this by providing a nested parsing context in which to parse the table. In the php parser, `doTableStuff` happens before `replaceInternalLinks` so none of the table syntax pipes should ever be breaking for a "linkdesc". The only place this is really an issue currently are the two "table_attributes" instances in "table_row_tag" and "table_start_tag" where we don't normally break on pipes, since they're optional because the php parser considers the rest of the line as attributes. That permits nonsense like, {| testing | class="four" | ha |} In the common case, it would be sufficient to just optionally capture pipes there at those places with `(spaces* pipe)?` and be done with it. However, this more permissive solution seems slightly more robust and is likelier to match the users intent. For example, [[File:Foobar.jpg|thumb| {| | hi ho | jo |} ]] Here, the php parser would break on the pipe in " ho | jo" since that isn't valid table syntax, but we're tokenizing it as raw text in the table cell. Unfortunately, the support for templates returning options as pipe-separated strings in `renderFile` breaks that for us, matching the php parser rendering. Change-Id: I88f54399094d21a1a9db769cd46a1258691459a9 --- M lib/wt2html/pegTokenizer.pegjs M lib/wt2html/tt/LinkHandler.js M tests/parserTests.txt 3 files changed, 34 insertions(+), 2 deletions(-) Approvals: Subramanya Sastry: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs index c8fc9d7..8a91a14 100644 --- a/lib/wt2html/pegTokenizer.pegjs +++ b/lib/wt2html/pegTokenizer.pegjs @@ -1725,7 +1725,12 @@ full_table_in_link_caption = (! inline_breaks / & '{{!}}' ) r:( - & { return stops.push('table', true); } + // Note that "linkdesc" is suppressed here to provide a nested parsing + // context in which to parse the table. Otherwise, we may break on + // on pipes in the `table_start_tag` and `table_row_tag` attributes. + // However, as a result, this can be more permissive than the current + // php implementation, but likelier to match the users intent. + & { stops.push('linkdesc', false); return stops.push('table', true); } tbl:( table_start_tag optionalNewlines // Accept multiple end tags since a nested table may have been @@ -1733,10 +1738,11 @@ ((sol table_content_line optionalNewlines)* sol table_end_tag)+ ){ + stops.pop('linkdesc'); stops.pop('table'); return tbl; } - / & { return stops.pop('table'); } + / & { stops.pop('linkdesc'); return stops.pop('table'); } ) { return r; } table_lines diff --git a/lib/wt2html/tt/LinkHandler.js b/lib/wt2html/tt/LinkHandler.js index b6d7aa8..77c4858 100644 --- a/lib/wt2html/tt/LinkHandler.js +++ b/lib/wt2html/tt/LinkHandler.js @@ -1673,6 +1673,12 @@ // image options as a pipe-separated string. We aren't // really providing editing support for this yet, or // ever, maybe. + // + // TODO(arlolra): Tables in captions suppress breaking on + // "linkdesc" pipes so `stringifyOptionTokens` should account + // for pipes in table cell content. For the moment, breaking + // here is acceptable since it matches the php implementation + // bug for bug. var pieces = oText.split("|").map(function(s) { return new KV("mw:maybeContent", s); }); diff --git a/tests/parserTests.txt b/tests/parserTests.txt index ff574d1..4b5f7ab 100644 --- a/tests/parserTests.txt +++ b/tests/parserTests.txt @@ -14835,6 +14835,26 @@ !! end !! test +Image with table with attributes in caption +!! options +parsoid=wt2html,html2html +!! wikitext +[[File:Foobar.jpg|thumb| +{| class="123" | +|- class="456" | +| ha +|} +]] +!! html/parsoid +<figure class="mw-default-size" typeof="mw:Image/Thumb" data-parsoid='{"optList":[{"ck":"thumbnail","ak":"thumb"},{"ck":"caption","ak":"\n{| class=\"123\" |\n|- class=\"456\" |\n| ha\n|}\n"}]}'><a href="./File:Foobar.jpg" data-parsoid='{"a":{"href":"./File:Foobar.jpg"},"sa":{"href":"File:Foobar.jpg"}}'><img resource="./File:Foobar.jpg" src="//example.com/images/thumb/3/3a/Foobar.jpg/220px-Foobar.jpg" data-file-width="1941" data-file-height="220" data-file-type="bitmap" height="25" width="220" data-parsoid='{"a":{"resource":"./File:Foobar.jpg","height":"25","width":"220"},"sa":{"resource":"File:Foobar.jpg"}}'/></a><figcaption> +<table class="123"> +<tbody><tr class="456" data-parsoid='{"startTagSrc":"|-"}'> +<td> ha</td></tr> +</tbody></table> +</figcaption></figure> +!! end + +!! test Image with nested tables in caption !! wikitext [[File:Foobar.jpg|thumb|Foo<br /> -- To view, visit https://gerrit.wikimedia.org/r/390348 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I88f54399094d21a1a9db769cd46a1258691459a9 Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: C. Scott Ananian <canan...@wikimedia.org> Gerrit-Reviewer: Sbailey <sbai...@wikimedia.org> Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits