jenkins-bot has submitted this change and it was merged.

Change subject: Bug 54946: Alternative solution for <pre> tokenization
......................................................................


Bug 54946: Alternative solution for <pre> tokenization

This patch attempts to fix the same issue as
https://gerrit.wikimedia.org/r/#/c/87632/

It adds a nested_block_line production and uses that in a few places where
inlineline was used before.

Change-Id: Ie508dedaa8d23974689537ef656ee0c0837e382c
---
M js/lib/pegTokenizer.pegjs.txt
M js/tests/parserTests-blacklist.js
M js/tests/parserTests.txt
3 files changed, 37 insertions(+), 10 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index ab5dabc..d250d4b 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -573,6 +573,10 @@
  */
 nested_block = !inline_breaks b:block { return b; }
 
+nested_block_line = bs:(!sol !inline_breaks b:block { return b; })* { 
+    return flattenIfArray(bs); 
+}
+
 /*
  * The same, but suitable for use inside a table construct.
  * Doesn't match table_heading_tag, table_row_tag, table_data_tag,
@@ -679,15 +683,17 @@
         return null !== pegArgs.pegTokenizer.inline_breaks( input, pos, stops 
);
       }
 
+pre_start = "<" pre_tag_name (' '* [^>]*)? ">"
+
 inline
-  = c:(urltext / (! inline_breaks (inline_element / . )))+ {
+  = c:(urltext / (! inline_breaks !pre_start (inline_element / . )))+ {
       //console.warn('inline out:' + pp(c));
       return flatten_stringlist( c );
 }
 
 
 inlineline
-  = c:(urltext / ! inline_breaks (inline_element / [^\r\n]))+ {
+  = c:(urltext / ! inline_breaks !pre_start (inline_element / [^\r\n]))+ {
       //console.warn('inlineline out:' + pp(c) + input.substr(pos0, pos));
       return flatten_stringlist( c );
 }
@@ -713,7 +719,7 @@
     r:(
      s:'='+ // moved in here to make s accessible to inner action
      & { return stops.inc('h'); }
-     c:inlineline
+     c:nested_block_line
      e:'='+
      endTPos:({ return pos; })
      spc:(spaces / comment)*
@@ -1349,7 +1355,7 @@
     "<" pre_tag_name
     attribs:generic_attribute*
     ">"
-    l:inlineline
+    l:nested_block_line
     ls:(sol pre_indent_line)*
     "</" pre_tag_name ">"
   {
@@ -1361,7 +1367,7 @@
   / & { return stops.dec('pre'); }
 
 // Don't recognize tabs
-pre_indent_line = " " l:inlineline {
+pre_indent_line = " " l:nested_block_line {
     //console.warn( JSON.stringify( [s, l] ) );
     return [' '].concat(l);
 }
@@ -1384,7 +1390,7 @@
     ts:(    newlineToken
                 / (htmlentity / [^&<]+)+
                 / nowiki
-                / !("</" pre_tag_name ">") t2:(htmlentity / .) { return t2; })+
+                / !("</" pre_tag_name ">") t2:(htmlentity / .) { return t2; })*
     ("</" pre_tag_name ">" / eof) {
         stops.dec('pre');
         // return nowiki tags as well?
@@ -1877,7 +1883,7 @@
 lists = (dtdd / hacky_dl_uses / li) (sol (dtdd / hacky_dl_uses / li))*
 
 li = bullets:list_char+
-     c:inlineline?
+     c:nested_block_line
      &eolf
 {
     if ( c === '' ) {
@@ -1912,12 +1918,12 @@
   = bullets:(!(";" !list_char) lc:list_char { return lc; })*
     ";"
     & {return stops.inc('colon');}
-    c:inlineline
+    c:nested_block_line
     cpos:(":" { return pos; })
     // Fortunately dtdds cannot be nested, so we can simply set the flag
     // back to 0 to disable it.
     & { stops.counters.colon = 0; return true;}
-    d:inlineline?
+    d:nested_block_line?
     &eolf {
         // Leave bullets as an array -- list handler expects this
         var li1 = new TagTk( 'listItem', [], { tsr: [pos0, pos0 + 
bullets.length] } );
@@ -2578,7 +2584,7 @@
                 directive
               // Eat insane tags-inside-attributes. Example:
               // <hiddentext>generated with.. </hiddentext>
-              / &generic_tag inlineline
+              / &generic_tag nested_block_line
               / !(space_or_newline / [\[>]) c:. {
                     //console.warn( 'aptl: ' + pp(c) );
                     return c;
diff --git a/js/tests/parserTests-blacklist.js 
b/js/tests/parserTests-blacklist.js
index 2895db1..0cbe18b 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -712,6 +712,7 @@
 add("html2html", "<pre> with forbidden attribute (bug 3202)");
 add("html2html", "<pre> with forbidden attribute values (bug 3202)");
 add("html2html", "<nowiki> inside <pre> (bug 13238)");
+add("html2html", "Empty pre; pre inside other HTML tags (bug 54946)");
 add("html2html", "3a. Indent-Pre and block tags (single-line html)");
 add("html2html", "3b. Indent-Pre and block tags (pre-content on separate 
line)");
 add("html2html", "4. Multiple spaces at start-of-line");
@@ -1262,6 +1263,7 @@
 add("html2wt", "<nowiki> inside <pre> (bug 13238)");
 add("html2wt", "<nowiki> and <pre> preference (first one wins)");
 add("html2wt", "</pre> inside nowiki");
+add("html2wt", "Empty pre; pre inside other HTML tags (bug 54946)");
 add("html2wt", "Templates: Indent-Pre: 1a. Templates that break a line should 
suppress <pre>");
 add("html2wt", "Templates: Indent-Pre: 1b. Templates that break a line should 
suppress <pre>");
 add("html2wt", "Templates: Indent-Pre: 1c: Wrapping should be based on 
expanded content");
diff --git a/js/tests/parserTests.txt b/js/tests/parserTests.txt
index 5073eab..c718512 100644
--- a/js/tests/parserTests.txt
+++ b/js/tests/parserTests.txt
@@ -1560,6 +1560,25 @@
 </p>
 !! end
 
+!! test
+Empty pre; pre inside other HTML tags (bug 54946)
+!! input
+a
+
+<div><pre>
+foo
+</pre></div>
+<pre></pre>
+!! result
+<p>a
+</p>
+<div><pre>
+foo
+</pre></div>
+<pre></pre>
+
+!! end
+
 !!test
 Templates: Indent-Pre: 1a. Templates that break a line should suppress <pre>
 !!input

-- 
To view, visit https://gerrit.wikimedia.org/r/92469
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie508dedaa8d23974689537ef656ee0c0837e382c
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to