Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/49848
Change subject: Tokenizer fixes for pre and nowiki parsing.
......................................................................
Tokenizer fixes for pre and nowiki parsing.
* pre_indent_in_tags production was incrementing the 'pre' stop
late in the production but decrementing it if the production
failed to match. This led to the stops counter going pretty
negative (-5 in some cases) even when the stops weren't
unbalanced. Fixed it by moving the increment as the first
thing the production does.
* Split the nowiki production from 2 rules to 3 rules:
- <nowiki> .. </nowiki>
-- treated as nowiki
- <nowiki />
-- treated as nowiki with mw:Placeholder meta
- unbalanced <nowiki> or </nowiki> tags in pre-context.
-- treated as plain text with mw:Placeholder span tags.
-- Fixed rule to explictly required stops.counter.pre to
be > 0 (in case it is negative -- although not sure
if this going to be the case anymore with the fix
above)
* Minor syntactic fixes (added missing ; in a few places and
replace foo['bar'] with foo.bar accessor).
* No change in parser test results, but there are diffs.
in parser test output. The diffs are all for the better and
could result in more tests passing with other unrelated fixes.
* These fixes also brings the test results in the chunky tokenizer
patch to parity with the a full-tokenizing-in-one-pass runs.
Change-Id: If3d461195431883c718d0becb3264fc270fdaf82
---
M js/lib/pegTokenizer.pegjs.txt
1 file changed, 29 insertions(+), 27 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/48/49848/1
diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index c3b4346..ff0b54e 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -488,12 +488,12 @@
/ st:optionalSpaceToken
r:( & [{}|!] tl:table_lines { return tl; }
// tag-only lines should not trigger pre either
- / bts:(bt:block_tag stl:optionalSpaceToken { return bt.concat(stl) })+
- &eolf { return bts }
+ / bts:(bt:block_tag stl:optionalSpaceToken { return bt.concat(stl); })+
+ &eolf { return bts; }
) {
return st.concat(r);
}
- / ! { return stops.counters.nopre } pre_indent
+ / ! { return stops.counters.nopre; } pre_indent
/ pre
/ // Horizontal rules
"----" d:"-"*
@@ -1186,11 +1186,11 @@
// possibly merge with the regular 'pre' production.
// FIXME: fix tag end position
pre_indent_in_tags
- = space+ // XXX: capture space for round-tripping
+ = & { return stops.inc('pre'); }
+ space+ // XXX: capture space for round-tripping
"<pre"
attribs:generic_attribute*
">"
- & { return stops.inc('pre'); }
l:inlineline
ls:(sol pre_indent_line)*
"</pre>"
@@ -1228,21 +1228,6 @@
/ !"</pre>" t2:(htmlentity / .) { return t2 })+
("</pre>" / eof) {
stops.dec('pre');
- /*
- * The VE will be fixed up to preserve this newline real soon, so
disable it for now.
- var firstTok = ts[0];
- if (firstTok.constructor === String && firstTok.match(/^\n/)) {
- // Strip leading newline from the token
- ts.shift();
- if (firstTok.length > 1) {
- ts.unshift(firstTok.substr(1));
- }
- // Re-add it with protection
- ts.unshift(new EndTagTk( 'span' ));
- ts.unshift("\n");
- ts.unshift(new TagTk( 'span', [ new KV( 'typeof', 'mw:Placeholder'
) ], { src: '\n' } ));
- }
- */
// return nowiki tags as well?
return [ new TagTk( 'pre', attribs, { stx: 'html', tsr: [pos0, endpos]
} ) ]
.concat(flatten_stringlist(ts), [ new EndTagTk( 'pre', [],
{ tsr: [pos - 6, pos] } ) ]);
@@ -1299,11 +1284,10 @@
{ tsr: [pos - 9, pos] })
] );
}
- // nowiki fallback: source-based round-tripping.
- / ! { return stops.counters.pre }
- nw0:({return pos})
- "<" "/"? "nowiki" [ ]* "/"? ">" {
- //console.warn('nowiki fallback');
+ // nowiki fallback: source-based round-tripping of <nowiki />.
+ / nw0:({return pos})
+ "<nowiki" [ ]* "/>" {
+ //console.warn('<nowiki/>');
return [
new SelfclosingTagTk('meta',
[new KV('typeof', 'mw:Placeholder')],
@@ -1312,6 +1296,24 @@
tsr: [nw0, pos]
})
];
+ }
+ // nowiki fallback: source-based round-tripping
+ // of unbalanced nowiki tags that are treated as text.
+ / ! { return stops.counters.pre > 0; }
+ nw0:({return pos})
+ "<" "/"? "nowiki" [ ]* "/"? ">" {
+ //console.warn('nowiki text');
+ var nowiki = input.substring(nw0, pos);
+ return [
+ new TagTk( 'span', [
+ new KV( 'typeof', 'mw:Placeholder' )
+ ], {
+ src: nowiki,
+ tsr: [nw0, pos]
+ } ),
+ nowiki,
+ new EndTagTk( 'span' )
+ ];
}
// Should abort the nowiki match:
@@ -1535,7 +1537,7 @@
cpos:(":" { return pos })
// Fortunately dtdds cannot be nested, so we can simply set the flag
// back to 0 to disable it.
- & { stops.counters['colon'] = 0; return true;}
+ & { stops.counters.colon = 0; return true;}
d:inlineline?
&eolf {
// Leave bullets as an array -- list handler expects this
@@ -1549,7 +1551,7 @@
return [ li1 ].concat( c, [ li2 ], d || '' );
}
// Fall-back case to clear the colon flag
- / & { return true; } { stops.counters['colon'] = 0; return null; }
+ / & { return true; } { stops.counters.colon = 0; return null; }
list_char = [*#:;]
--
To view, visit https://gerrit.wikimedia.org/r/49848
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: If3d461195431883c718d0becb3264fc270fdaf82
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits