Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/155868

Change subject: b/i context aware parsing of definition list colon
......................................................................

b/i context aware parsing of definition list colon

Bug: 69219
Change-Id: I2d8403eb732fdb022e8c87a990fd392a62e7f477
---
M lib/mediawiki.tokenizer.utils.js
M lib/pegTokenizer.pegjs.txt
M tests/parserTests.txt
3 files changed, 40 insertions(+), 15 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/68/155868/1

diff --git a/lib/mediawiki.tokenizer.utils.js b/lib/mediawiki.tokenizer.utils.js
index 370c852..89b755c 100644
--- a/lib/mediawiki.tokenizer.utils.js
+++ b/lib/mediawiki.tokenizer.utils.js
@@ -210,6 +210,7 @@
                        case ":":
                                return counters.colon &&
                                        ! stops.onStack( 'extlink' ) &&
+                                       ! stops.onStack( 'quote' ) &&
                                        ! counters.linkdesc;
                        case "\r":
                                return stops.onStack( 'table' ) &&
@@ -325,6 +326,15 @@
        return false;
 };
 
+SyntaxStops.prototype.popAll = function ( name ) {
+       var stack = this.stacks[name];
+       if ( Array.isArray( stack ) ) {
+               while ( stack.length > 0 ) {
+                       stack.pop();
+               }
+       }
+};
+
 SyntaxStops.prototype.onStack = function ( name ) {
        var stack = this.stacks[name];
        if ( stack === undefined || stack.length === 0 ) {
diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt
index 1118bd3..181506d 100644
--- a/lib/pegTokenizer.pegjs.txt
+++ b/lib/pegTokenizer.pegjs.txt
@@ -99,17 +99,15 @@
 /* The 'redirect' magic word.
  * The leading whitespace allowed is due to the PHP trim() function.
  */
-redirect_word = sp:[ \t\n\r\0\x0b]* rw:((!space_or_newline ![:\[] c:.{return 
c;})+)
+redirect_word = sp:[ \t\n\r\0\x0b]* rw:(!space_or_newline ![:\[] c:.{return 
c;})+
 {
-    if ( !rw ) {
-        rw = "";
-    }
     rw = rw.join('');
     if ( options.env.conf.wiki.getMagicWordMatcher( 'redirect' ).test( rw ) ) {
         return sp.join('') + rw;
+    } else {
+        peg$currPos = peg$reportedPos;
+        return peg$FAILED;
     }
-    peg$currPos = peg$reportedPos;
-    return peg$FAILED;
 }
 
 /*
@@ -535,7 +533,6 @@
 // javascript does not support unicode features..
 unicode_separator_space = [ \u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]
 
-
 urlencoded_char = "%" c0:[0-9a-fA-F] c1:[0-9a-fA-F] {
     try {
         return decodeURI("%" + c0 + c1);
@@ -893,7 +890,6 @@
     }
   / & { stops.pop('pipe'); return stops.dec('linkdesc'); }
 
-link_end = "]]"
 
 /* Generic quote production for italic and bold, further processed in a token
  * stream transformation in doQuotes. Relies on NlTk tokens being emitted
@@ -919,6 +915,15 @@
         { tsr: [peg$reportedPos + plainticks, peg$currPos] } );
     mwq.value = quotes.substring(plainticks);
     result.push(mwq);
+
+    // Keep a stack of reasonably well-behaved open/close quotes in an
+    // attempt to handle context-sensitive colons in dtdd.
+    if ( stops.counters.colon ) {
+        var len = quotes.length - plainticks;
+        var q = stops.onStack("quote");
+        stops[ q === len ? "pop" : "push" ]("quote", len);
+    }
+
     return result;
 }
 
@@ -1487,6 +1492,8 @@
  *********************************************************/
 lists = (dtdd / hacky_dl_uses / li) (sol (dtdd / hacky_dl_uses / li))*
 
+list_char = [*#:;]
+
 li = bullets:list_char+
      c:nested_block_line
      &eolf
@@ -1522,7 +1529,7 @@
 dtdd
   = bullets:(!(";" !list_char) lc:list_char { return lc; })*
     ";"
-    & {return stops.inc('colon');}
+    & { stops.popAll('quote'); return stops.inc('colon');}
     c:nested_block_line
     cpos:(":" { return peg$currPos; })
     // Fortunately dtdds cannot be nested, so we can simply set the flag
@@ -1540,15 +1547,11 @@
         var li2 = new TagTk( 'listItem', [], { tsr: [cpos-1, cpos], stx: 'row' 
} );
         li2.bullets = bullets.slice();
         li2.bullets.push(":");
-
+        stops.popAll('quote');
         return [ li1 ].concat( c, [ li2 ], d || [] );
     }
   // Fall-back case to clear the colon flag
-  / & { stops.counters.colon = 0; return false; }
-
-
-list_char = [*#:;]
-
+  / & { stops.popAll('quote'); stops.counters.colon = 0; return false; }
 
 
 /*********************************************************************
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index cda40b6..40d4540 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -3157,6 +3157,18 @@
 !! end
 
 !! test
+Definition lists: quotes with colon
+!! wikitext
+; '''Sunday August 10, 12:30, [[Frobisher 123]]'''
+!! html/php
+<dl><dt><b>Sunday August 10, 12:30, <a href="/wiki/Frobisher_123" 
title="Frobisher 123">Frobisher 123</a></b></dt></dl>
+
+!! html/parsoid
+<dl><dt> <b>Sunday August 10, 12:30, <a rel="mw:WikiLink" 
href="./Frobisher_123" title="Frobisher 123" 
data-parsoid='{"stx":"simple","a":{"href":"./Frobisher_123"},"sa":{"href":"Frobisher
 123"}}'>Frobisher 123</a></b></dt></dl>
+
+!! end
+
+!! test
 Bug 11748: Literal closing tags
 !! wikitext
 <dl>

-- 
To view, visit https://gerrit.wikimedia.org/r/155868
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2d8403eb732fdb022e8c87a990fd392a62e7f477
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to