Arlolra has uploaded a new change for review.
https://gerrit.wikimedia.org/r/155868
Change subject: b/i context aware parsing of definition list colon
......................................................................
b/i context aware parsing of definition list colon
Bug: 69219
Change-Id: I2d8403eb732fdb022e8c87a990fd392a62e7f477
---
M lib/mediawiki.tokenizer.utils.js
M lib/pegTokenizer.pegjs.txt
M tests/parserTests.txt
3 files changed, 40 insertions(+), 15 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/68/155868/1
diff --git a/lib/mediawiki.tokenizer.utils.js b/lib/mediawiki.tokenizer.utils.js
index 370c852..89b755c 100644
--- a/lib/mediawiki.tokenizer.utils.js
+++ b/lib/mediawiki.tokenizer.utils.js
@@ -210,6 +210,7 @@
case ":":
return counters.colon &&
! stops.onStack( 'extlink' ) &&
+ ! stops.onStack( 'quote' ) &&
! counters.linkdesc;
case "\r":
return stops.onStack( 'table' ) &&
@@ -325,6 +326,15 @@
return false;
};
+SyntaxStops.prototype.popAll = function ( name ) {
+ var stack = this.stacks[name];
+ if ( Array.isArray( stack ) ) {
+ while ( stack.length > 0 ) {
+ stack.pop();
+ }
+ }
+};
+
SyntaxStops.prototype.onStack = function ( name ) {
var stack = this.stacks[name];
if ( stack === undefined || stack.length === 0 ) {
diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt
index 1118bd3..181506d 100644
--- a/lib/pegTokenizer.pegjs.txt
+++ b/lib/pegTokenizer.pegjs.txt
@@ -99,17 +99,15 @@
/* The 'redirect' magic word.
* The leading whitespace allowed is due to the PHP trim() function.
*/
-redirect_word = sp:[ \t\n\r\0\x0b]* rw:((!space_or_newline ![:\[] c:.{return
c;})+)
+redirect_word = sp:[ \t\n\r\0\x0b]* rw:(!space_or_newline ![:\[] c:.{return
c;})+
{
- if ( !rw ) {
- rw = "";
- }
rw = rw.join('');
if ( options.env.conf.wiki.getMagicWordMatcher( 'redirect' ).test( rw ) ) {
return sp.join('') + rw;
+ } else {
+ peg$currPos = peg$reportedPos;
+ return peg$FAILED;
}
- peg$currPos = peg$reportedPos;
- return peg$FAILED;
}
/*
@@ -535,7 +533,6 @@
// javascript does not support unicode features..
unicode_separator_space = [ \u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]
-
urlencoded_char = "%" c0:[0-9a-fA-F] c1:[0-9a-fA-F] {
try {
return decodeURI("%" + c0 + c1);
@@ -893,7 +890,6 @@
}
/ & { stops.pop('pipe'); return stops.dec('linkdesc'); }
-link_end = "]]"
/* Generic quote production for italic and bold, further processed in a token
* stream transformation in doQuotes. Relies on NlTk tokens being emitted
@@ -919,6 +915,15 @@
{ tsr: [peg$reportedPos + plainticks, peg$currPos] } );
mwq.value = quotes.substring(plainticks);
result.push(mwq);
+
+ // Keep a stack of reasonably well-behaved open/close quotes in an
+ // attempt to handle context-sensitive colons in dtdd.
+ if ( stops.counters.colon ) {
+ var len = quotes.length - plainticks;
+ var q = stops.onStack("quote");
+ stops[ q === len ? "pop" : "push" ]("quote", len);
+ }
+
return result;
}
@@ -1487,6 +1492,8 @@
*********************************************************/
lists = (dtdd / hacky_dl_uses / li) (sol (dtdd / hacky_dl_uses / li))*
+list_char = [*#:;]
+
li = bullets:list_char+
c:nested_block_line
&eolf
@@ -1522,7 +1529,7 @@
dtdd
= bullets:(!(";" !list_char) lc:list_char { return lc; })*
";"
- & {return stops.inc('colon');}
+ & { stops.popAll('quote'); return stops.inc('colon');}
c:nested_block_line
cpos:(":" { return peg$currPos; })
// Fortunately dtdds cannot be nested, so we can simply set the flag
@@ -1540,15 +1547,11 @@
var li2 = new TagTk( 'listItem', [], { tsr: [cpos-1, cpos], stx: 'row'
} );
li2.bullets = bullets.slice();
li2.bullets.push(":");
-
+ stops.popAll('quote');
return [ li1 ].concat( c, [ li2 ], d || [] );
}
// Fall-back case to clear the colon flag
- / & { stops.counters.colon = 0; return false; }
-
-
-list_char = [*#:;]
-
+ / & { stops.popAll('quote'); stops.counters.colon = 0; return false; }
/*********************************************************************
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index cda40b6..40d4540 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -3157,6 +3157,18 @@
!! end
!! test
+Definition lists: quotes with colon
+!! wikitext
+; '''Sunday August 10, 12:30, [[Frobisher 123]]'''
+!! html/php
+<dl><dt><b>Sunday August 10, 12:30, <a href="/wiki/Frobisher_123"
title="Frobisher 123">Frobisher 123</a></b></dt></dl>
+
+!! html/parsoid
+<dl><dt> <b>Sunday August 10, 12:30, <a rel="mw:WikiLink"
href="./Frobisher_123" title="Frobisher 123"
data-parsoid='{"stx":"simple","a":{"href":"./Frobisher_123"},"sa":{"href":"Frobisher
123"}}'>Frobisher 123</a></b></dt></dl>
+
+!! end
+
+!! test
Bug 11748: Literal closing tags
!! wikitext
<dl>
--
To view, visit https://gerrit.wikimedia.org/r/155868
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I2d8403eb732fdb022e8c87a990fd392a62e7f477
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits