jenkins-bot has submitted this change and it was merged. Change subject: Bug 54946: Alternative solution for <pre> tokenization ......................................................................
Bug 54946: Alternative solution for <pre> tokenization This patch attempts to fix the same issue as https://gerrit.wikimedia.org/r/#/c/87632/ It adds a nested_block_line production and uses that in a few places where inlineline was used before. Change-Id: Ie508dedaa8d23974689537ef656ee0c0837e382c --- M js/lib/pegTokenizer.pegjs.txt M js/tests/parserTests-blacklist.js M js/tests/parserTests.txt 3 files changed, 37 insertions(+), 10 deletions(-) Approvals: Subramanya Sastry: Looks good to me, approved jenkins-bot: Verified diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt index ab5dabc..d250d4b 100644 --- a/js/lib/pegTokenizer.pegjs.txt +++ b/js/lib/pegTokenizer.pegjs.txt @@ -573,6 +573,10 @@ */ nested_block = !inline_breaks b:block { return b; } +nested_block_line = bs:(!sol !inline_breaks b:block { return b; })* { + return flattenIfArray(bs); +} + /* * The same, but suitable for use inside a table construct. * Doesn't match table_heading_tag, table_row_tag, table_data_tag, @@ -679,15 +683,17 @@ return null !== pegArgs.pegTokenizer.inline_breaks( input, pos, stops ); } +pre_start = "<" pre_tag_name (' '* [^>]*)? ">" + inline - = c:(urltext / (! inline_breaks (inline_element / . )))+ { + = c:(urltext / (! inline_breaks !pre_start (inline_element / . )))+ { //console.warn('inline out:' + pp(c)); return flatten_stringlist( c ); } inlineline - = c:(urltext / ! inline_breaks (inline_element / [^\r\n]))+ { + = c:(urltext / ! inline_breaks !pre_start (inline_element / [^\r\n]))+ { //console.warn('inlineline out:' + pp(c) + input.substr(pos0, pos)); return flatten_stringlist( c ); } @@ -713,7 +719,7 @@ r:( s:'='+ // moved in here to make s accessible to inner action & { return stops.inc('h'); } - c:inlineline + c:nested_block_line e:'='+ endTPos:({ return pos; }) spc:(spaces / comment)* @@ -1349,7 +1355,7 @@ "<" pre_tag_name attribs:generic_attribute* ">" - l:inlineline + l:nested_block_line ls:(sol pre_indent_line)* "</" pre_tag_name ">" { @@ -1361,7 +1367,7 @@ / & { return stops.dec('pre'); } // Don't recognize tabs -pre_indent_line = " " l:inlineline { +pre_indent_line = " " l:nested_block_line { //console.warn( JSON.stringify( [s, l] ) ); return [' '].concat(l); } @@ -1384,7 +1390,7 @@ ts:( newlineToken / (htmlentity / [^&<]+)+ / nowiki - / !("</" pre_tag_name ">") t2:(htmlentity / .) { return t2; })+ + / !("</" pre_tag_name ">") t2:(htmlentity / .) { return t2; })* ("</" pre_tag_name ">" / eof) { stops.dec('pre'); // return nowiki tags as well? @@ -1877,7 +1883,7 @@ lists = (dtdd / hacky_dl_uses / li) (sol (dtdd / hacky_dl_uses / li))* li = bullets:list_char+ - c:inlineline? + c:nested_block_line &eolf { if ( c === '' ) { @@ -1912,12 +1918,12 @@ = bullets:(!(";" !list_char) lc:list_char { return lc; })* ";" & {return stops.inc('colon');} - c:inlineline + c:nested_block_line cpos:(":" { return pos; }) // Fortunately dtdds cannot be nested, so we can simply set the flag // back to 0 to disable it. & { stops.counters.colon = 0; return true;} - d:inlineline? + d:nested_block_line? &eolf { // Leave bullets as an array -- list handler expects this var li1 = new TagTk( 'listItem', [], { tsr: [pos0, pos0 + bullets.length] } ); @@ -2578,7 +2584,7 @@ directive // Eat insane tags-inside-attributes. Example: // <hiddentext>generated with.. </hiddentext> - / &generic_tag inlineline + / &generic_tag nested_block_line / !(space_or_newline / [\[>]) c:. { //console.warn( 'aptl: ' + pp(c) ); return c; diff --git a/js/tests/parserTests-blacklist.js b/js/tests/parserTests-blacklist.js index 2895db1..0cbe18b 100644 --- a/js/tests/parserTests-blacklist.js +++ b/js/tests/parserTests-blacklist.js @@ -712,6 +712,7 @@ add("html2html", "<pre> with forbidden attribute (bug 3202)"); add("html2html", "<pre> with forbidden attribute values (bug 3202)"); add("html2html", "<nowiki> inside <pre> (bug 13238)"); +add("html2html", "Empty pre; pre inside other HTML tags (bug 54946)"); add("html2html", "3a. Indent-Pre and block tags (single-line html)"); add("html2html", "3b. Indent-Pre and block tags (pre-content on separate line)"); add("html2html", "4. Multiple spaces at start-of-line"); @@ -1262,6 +1263,7 @@ add("html2wt", "<nowiki> inside <pre> (bug 13238)"); add("html2wt", "<nowiki> and <pre> preference (first one wins)"); add("html2wt", "</pre> inside nowiki"); +add("html2wt", "Empty pre; pre inside other HTML tags (bug 54946)"); add("html2wt", "Templates: Indent-Pre: 1a. Templates that break a line should suppress <pre>"); add("html2wt", "Templates: Indent-Pre: 1b. Templates that break a line should suppress <pre>"); add("html2wt", "Templates: Indent-Pre: 1c: Wrapping should be based on expanded content"); diff --git a/js/tests/parserTests.txt b/js/tests/parserTests.txt index 5073eab..c718512 100644 --- a/js/tests/parserTests.txt +++ b/js/tests/parserTests.txt @@ -1560,6 +1560,25 @@ </p> !! end +!! test +Empty pre; pre inside other HTML tags (bug 54946) +!! input +a + +<div><pre> +foo +</pre></div> +<pre></pre> +!! result +<p>a +</p> +<div><pre> +foo +</pre></div> +<pre></pre> + +!! end + !!test Templates: Indent-Pre: 1a. Templates that break a line should suppress <pre> !!input -- To view, visit https://gerrit.wikimedia.org/r/92469 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ie508dedaa8d23974689537ef656ee0c0837e382c Gerrit-PatchSet: 5 Gerrit-Project: mediawiki/extensions/Parsoid Gerrit-Branch: master Gerrit-Owner: GWicke <[email protected]> Gerrit-Reviewer: Subramanya Sastry <[email protected]> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
