[MediaWiki-commits] [Gerrit] Some followup cleanups to the tokenizer - change (mediawiki...parsoid)

Arlolra (Code Review) Thu, 04 Jun 2015 17:58:45 -0700

Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/216016


Change subject: Some followup cleanups to the tokenizer
......................................................................

Some followup cleanups to the tokenizer

 * After reviewing I3b6b1a4214c143d84a856195fa7038b890f815f7

Change-Id: Ib1af09b8af36775831d9ce7a0db33e52a893ff09
---
M lib/pegTokenizer.pegjs.txt
1 file changed, 24 insertions(+), 24 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/16/216016/1

diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt
index 790cd00..463e0c8 100644
--- a/lib/pegTokenizer.pegjs.txt
+++ b/lib/pegTokenizer.pegjs.txt
@@ -69,8 +69,9 @@
   = rw:redirect_word
     sp:$space_or_newline*
     c:$( ":" space_or_newline* )?
-    link:(wl:wikilink { return wl[0]; })
+    wl:wikilink
 {
+    var link = wl[0];
     if (!link || link.constructor === String) {
         peg$currPos = peg$reportedPos;
         return peg$FAILED;
@@ -385,8 +386,7 @@
 // Behavior switches. See:
 // https://www.mediawiki.org/wiki/Help:Magic_words#Behavior_switches
 behavior_switch
-  = '__' behavior_text '__' {
-    var bs = text();
+  = bs:$('__' behavior_text '__') {
     return [ new SelfclosingTagTk( 'behavior-switch', [ new KV('word', bs) ], {
       tsr: [ peg$reportedPos, peg$currPos ],
       src: bs
@@ -1989,7 +1989,7 @@
  */
 
 include_limits =
-  "<" c:"/"? name:$[0-9a-zA-Z]+ space_or_newline* ">" {
+  "<" c:"/"? name:$[oyinclude]i+ space_or_newline* ">" {
     var incl = name.toLowerCase();
 
     if ( incl !== "noinclude" && incl !== "onlyinclude" && incl !== 
"includeonly" ) {
@@ -2072,7 +2072,7 @@
 wikilink_preprocessor_text
   = r:( t:$[^<[{\n\r\t|!\]}{ &\-]+
         // XXX gwicke: any more chars we need to allow here?
-        / !inline_breaks wr:( directive / !"]]" c:( text_char / [!<\-] ) { 
return c; } )
+        / !inline_breaks wr:( directive / $( !"]]" ( text_char / [!<\-] ) ) )
         { return wr; }
     )+ {
       return tu.flatten_stringlist( r );
@@ -2082,7 +2082,7 @@
   // added special separator character class inline: separates url from
   // description / text
   = r:( $[^'<~[{\n\r|!\]}\t&="' 
\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+
-  / !inline_breaks r:( directive / no_punctuation_char ) { return r; }
+  / !inline_breaks s:( directive / no_punctuation_char ) { return s; }
   /// urlencoded_char
   // !inline_breaks no_punctuation_char
   / $([.:,] !(space / eolf))
@@ -2096,10 +2096,10 @@
   = r:( $(!inline_breaks [^=<>{}\n\r&'"\t/ \-])+
   / !inline_breaks
     ! '/>'
-    r:(
+    s:(
           directive
         / [&%/{}\-]
-    ) { return r; }
+    ) { return s; }
   )+
   {
       return tu.flatten_string( r );
@@ -2107,9 +2107,9 @@
 
 attribute_preprocessor_text_single
   = r:( $[^{}&'<\-]+
-  / !inline_breaks r:(
+  / !inline_breaks s:(
       directive
-    / [{}&<\-] ) { return r; }
+    / [{}&<\-] ) { return s; }
   )*
   {
       return tu.flatten_string( r );
@@ -2117,9 +2117,9 @@
 
 attribute_preprocessor_text_single_broken
   = r:( $[^{}&'<>|\-]+
-  / !inline_breaks r:(
+  / !inline_breaks s:(
       directive
-    / [{}&<\-] ) { return r; }
+    / [{}&<\-] ) { return s; }
   )*
   {
       return tu.flatten_string( r );
@@ -2127,9 +2127,9 @@
 
 attribute_preprocessor_text_double
   = r:( $[^{}&"<\-]+
-  / !inline_breaks r:(
+  / !inline_breaks s:(
       directive
-    / [{}&<\-] ) { return r; }
+    / [{}&<\-] ) { return s; }
   )*
   {
       return tu.flatten_string( r );
@@ -2137,9 +2137,9 @@
 
 attribute_preprocessor_text_double_broken
   = r:( $[^{}&"<>|\-]+
-  / !inline_breaks r:(
+  / !inline_breaks s:(
       directive
-    / [{}&<\-] ) { return r; }
+    / [{}&<\-] ) { return s; }
   )*
   {
       return tu.flatten_string( r );
@@ -2166,36 +2166,36 @@
 
 attribute_preprocessor_text_single_line
   = r:( $[^{}&'<\n\-]+
-  / !inline_breaks r:(
+  / !inline_breaks s:(
       directive
-    / $( ![\r\n] [{}&<\-] ) ) { return r; }
+    / $( ![\r\n] [{}&<\-] ) ) { return s; }
   )* {
       return tu.flatten_string( r );
   }
 
 attribute_preprocessor_text_single_line_broken
   = r:( $[^{}&'<>|!\n\-]+
-  / !inline_breaks r:(
+  / !inline_breaks s:(
       directive
-    / $( ![\r\n] q:[{}&<\-] ) ) { return r; }
+    / $( ![\r\n] q:[{}&<\-] ) ) { return s; }
   )* {
       return tu.flatten_string( r );
   }
 
 attribute_preprocessor_text_double_line
   = r:( $[^{}&"<\n\-]+
-  / !inline_breaks r:(
+  / !inline_breaks s:(
       directive
-    / $( ![\r\n] [{}&<\-] ) ) { return r; }
+    / $( ![\r\n] [{}&<\-] ) ) { return s; }
   )* {
       return tu.flatten_string( r );
   }
 
 attribute_preprocessor_text_double_line_broken
   = r:( $[^{}&"<>|!\n\-]+
-  / !inline_breaks r:(
+  / !inline_breaks s:(
       directive
-    / $( ![\r\n] q:[{}&<\-] ) ) { return r; }
+    / $( ![\r\n] q:[{}&<\-] ) ) { return s; }
   )* {
       return tu.flatten_string( r );
   }

-- 
To view, visit https://gerrit.wikimedia.org/r/216016
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib1af09b8af36775831d9ce7a0db33e52a893ff09
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] Some followup cleanups to the tokenizer - change (mediawiki...parsoid)

Reply via email to