GWicke has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/92469


Change subject: WIP Bug 54946: Alternative solution for <pre> tokenization
......................................................................

WIP Bug 54946: Alternative solution for <pre> tokenization

This patch attempts to fix the same issue as
https://gerrit.wikimedia.org/r/#/c/87632/

It adds a nested_block_line production and uses that in a few places where
inlineline was used before.

Change-Id: Ie508dedaa8d23974689537ef656ee0c0837e382c
---
M js/lib/pegTokenizer.pegjs.txt
1 file changed, 16 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/69/92469/1

diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index d7b79dc..b659846 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -572,6 +572,10 @@
  */
 nested_block = !inline_breaks b:block { return b; }
 
+nested_block_line = bs:(!sol !inline_breaks b:block { return b; })* { 
+    return flattenIfArray(bs); 
+}
+
 /*
  * The same, but suitable for use inside a table construct.
  * Doesn't match table_heading_tag, table_row_tag, table_data_tag,
@@ -678,15 +682,17 @@
         return null !== pegArgs.pegTokenizer.inline_breaks( input, pos, stops 
);
       }
 
+pre_start = "<" pre_tag_name (' '* [^>]*)? ">"
+
 inline
-  = c:(urltext / (! inline_breaks (inline_element / . )))+ {
+  = c:(urltext / (! inline_breaks !pre_start (inline_element / . )))+ {
       //console.warn('inline out:' + pp(c));
       return flatten_stringlist( c );
 }
 
 
 inlineline
-  = c:(urltext / ! inline_breaks (inline_element / [^\r\n]))+ {
+  = c:(urltext / ! inline_breaks !pre_start (inline_element / [^\r\n]))+ {
       //console.warn('inlineline out:' + pp(c) + input.substr(pos0, pos));
       return flatten_stringlist( c );
 }
@@ -712,7 +718,7 @@
     r:(
      s:'='+ // moved in here to make s accessible to inner action
      & { return stops.inc('h'); }
-     c:inlineline
+     c:nested_block_line
      e:'='+
      endTPos:({ return pos; })
      spc:(spaces / comment)*
@@ -1348,7 +1354,7 @@
     "<" pre_tag_name
     attribs:generic_attribute*
     ">"
-    l:inlineline
+    l:nested_block_line
     ls:(sol pre_indent_line)*
     "</" pre_tag_name ">"
   {
@@ -1360,7 +1366,7 @@
   / & { return stops.dec('pre'); }
 
 // Don't recognize tabs
-pre_indent_line = " " l:inlineline {
+pre_indent_line = " " l:nested_block_line {
     //console.warn( JSON.stringify( [s, l] ) );
     return [' '].concat(l);
 }
@@ -1383,7 +1389,7 @@
     ts:(    newlineToken
                 / (htmlentity / [^&<]+)+
                 / nowiki
-                / !("</" pre_tag_name ">") t2:(htmlentity / .) { return t2; })+
+                / !("</" pre_tag_name ">") t2:(htmlentity / .) { return t2; })*
     ("</" pre_tag_name ">" / eof) {
         stops.dec('pre');
         // return nowiki tags as well?
@@ -1876,7 +1882,7 @@
 lists = (dtdd / hacky_dl_uses / li) (sol (dtdd / hacky_dl_uses / li))*
 
 li = bullets:list_char+
-     c:inlineline?
+     c:nested_block_line
      &eolf
 {
     if ( c === '' ) {
@@ -1911,12 +1917,12 @@
   = bullets:(!(";" !list_char) lc:list_char { return lc; })*
     ";"
     & {return stops.inc('colon');}
-    c:inlineline
+    c:nested_block_line
     cpos:(":" { return pos; })
     // Fortunately dtdds cannot be nested, so we can simply set the flag
     // back to 0 to disable it.
     & { stops.counters.colon = 0; return true;}
-    d:inlineline?
+    d:nested_block_line?
     &eolf {
         // Leave bullets as an array -- list handler expects this
         var li1 = new TagTk( 'listItem', [], { tsr: [pos0, pos0 + 
bullets.length] } );
@@ -2577,7 +2583,7 @@
                 directive
               // Eat insane tags-inside-attributes. Example:
               // <hiddentext>generated with.. </hiddentext>
-              / &generic_tag inlineline
+              / &generic_tag nested_block_line
               / !(space_or_newline / [\[>]) c:. {
                     //console.warn( 'aptl: ' + pp(c) );
                     return c;

-- 
To view, visit https://gerrit.wikimedia.org/r/92469
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie508dedaa8d23974689537ef656ee0c0837e382c
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: GWicke <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to