GWicke has submitted this change and it was merged.
Change subject: Improved p-wrapping in the presence of comments-and-ws-only
lines.
......................................................................
Improved p-wrapping in the presence of comments-and-ws-only lines.
* Lines that contained comments and white-text only used to trigger
p-wrapping based on newlines seen till that point. However, the
PHP parser effectively discards such lines (including the newline)
which leads to newlines before/afther such a line to be considered
as sequential.
* This patch mimics the PHP behavior by effectively ignoring the
comments-and-ws-only lines (but not discarding them since they
still need to be roundtripped back).
* In addition, changed newline output in p-br-p case to emit the
3rd newline after the <br/>. To account for this, modified:
- DSR computation to allocate zero-dsr width to the br tag
since the newline after it will account for it.
- serializer br handler to set mix/max constraints to 2 newlines
instead of 3 before since the emitted newline will ensure that
the 3rd newline is output. To be verified if this is the
right fix.
* This now fixes parsing (and thus roundtripping of the following
test snippets):
1. "a\n\n <!--foo-->\n\nb"
2. "a\n\n \n\nb"
3. "a\n\n\n=b="
4. "a\n<!--foo-->\n\n\nb"
5. "a\n\n<!--foo-->\n\nb"
6. "a\n\n\n<!--foo-->\nb"
Additional tests:
1. "a\n<!--foo-->\nb" vs "a\n<!--foo--><!--bar-->\nb"
The latter doesn't rt correctly, but the parse output now
mimics the php parser idiosynrasy of being dependent on
# of comments on a line.
* Eliminates all semantic RT errors on:
- en:Ayat al-Kursi (also zero rt errors)
- en:West Yorkshire abbeys and priories (also zero rt errors)
- en:St._Louis_County_Road_91
* No change in parser tests results, but some change in HTML output
seen in failed tests (and possibly passed tests).
* TODO: Add the above and other parser tests to parserTests.txt
Change-Id: I51f0ee39dcf7e7a7e2800117a0711f4a4659d4ec
---
M js/lib/ext.core.ParagraphWrapper.js
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.WikitextSerializer.js
3 files changed, 66 insertions(+), 32 deletions(-)
Approvals:
GWicke: Verified; Looks good to me, approved
jenkins-bot: Verified
diff --git a/js/lib/ext.core.ParagraphWrapper.js
b/js/lib/ext.core.ParagraphWrapper.js
index 58d9e38..0ae4268 100644
--- a/js/lib/ext.core.ParagraphWrapper.js
+++ b/js/lib/ext.core.ParagraphWrapper.js
@@ -26,16 +26,13 @@
this.tableTags = [];
this.nlWsTokens = [];
this.nonNlTokens = [];
- this.currLine = {
- tokens: [],
- hasBlockToken: this.options.inBlockToken === undefined ? false
: this.options.inBlockToken,
- hasWrappableTokens: false
- };
this.newLineCount = 0;
this.hasOpenPTag = false;
this.hasOpenHTMLPTag = false;
this.inPre = false;
-}
+ this.resetCurrLine(true);
+ this.currLine.hasBlockToken = this.options.inBlockToken === undefined ?
false : this.options.inBlockToken;
+};
// constants
ParagraphWrapper.prototype.newlineRank = 2.95;
@@ -64,12 +61,14 @@
return resToks;
};
-ParagraphWrapper.prototype.discardOneNlTk = function() {
- for (var i = 0, n = this.nlWsTokens.length; i < n; i++) {
- var t = this.nlWsTokens[i];
+ParagraphWrapper.prototype.discardOneNlTk = function(out) {
+ var i = 0, n = this.nlWsTokens.length;
+ while (i < n) {
+ var t = this.nlWsTokens.shift();
if (t.constructor === NlTk) {
- this.nlWsTokens = this.nlWsTokens.splice(i+1);
return t;
+ } else {
+ out.push(t);
}
}
@@ -84,9 +83,11 @@
}
};
-ParagraphWrapper.prototype.resetCurrLine = function () {
+ParagraphWrapper.prototype.resetCurrLine = function(atEOL) {
this.currLine = {
tokens: [],
+ isNewline: atEOL,
+ commentCount: 0,
hasBlockToken: false,
hasWrappableTokens: false
};
@@ -106,12 +107,21 @@
this.hasOpenPTag = true;
}
+ // PHP parser ignores (= strips out during preprocessing) lines with
+ // a single comment and other white-space. This flag checks if we are
+ // on such a line.
+ var emptyLineWithSingleComment =
+ l.isNewline &&
+ !l.hasWrappableTokens &&
+ l.commentCount === 1;
+
// this.nonNlTokens += this.currLine.tokens
this.nonNlTokens = this.nonNlTokens.concat(l.tokens);
- this.resetCurrLine();
- this.nlWsTokens.push(token);
+ this.resetCurrLine(true);
+
if (token.constructor === EOFTk) {
+ this.nlWsTokens.push(token);
this.closeOpenPTag(this.nonNlTokens);
var res = this.processPendingNLs(false);
this.inPre = false;
@@ -119,8 +129,19 @@
this.hasOpenHTMLPTag = false;
this.reset();
return { tokens: res };
+ } else if (emptyLineWithSingleComment) {
+ // 1. Dont increment newline count on "empty" lines with
+ // a single comment -- see comment above
+ //
+ // 2. Convert the NlTk to a String-representation so that
+ // it doesn't get processed by discardOneNlTk -- this
+ // newline needs to be emitted (so it gets RTed) without
+ // being processed for p-wrapping.
+ this.nlWsTokens.push("\n");
+ return {};
} else {
this.newLineCount++;
+ this.nlWsTokens.push(token);
return {};
}
};
@@ -144,45 +165,43 @@
if (!topTag || topTag === 'td' || topTag === 'th') {
// 2. Discard 3 newlines (the p-br-p section
// serializes back to 3 newlines)
- nlTk = this.discardOneNlTk();
- nlTk2 = this.discardOneNlTk();
- this.discardOneNlTk();
+ resToks.push(this.discardOneNlTk(resToks));
+ resToks.push(this.discardOneNlTk(resToks));
+ nlTk = this.discardOneNlTk(resToks);
// Preserve nls for pretty-printing and dsr
reliability
- resToks.push(nlTk);
- resToks.push(nlTk2);
// 3. Insert <p><br></p> sections
// FIXME: Mark this as a placeholder for now
until the
// editor handles this properly
resToks.push(new TagTk( 'p', [new KV('typeof',
'mw:Placeholder')] ));
resToks.push(new SelfclosingTagTk('br'));
+ resToks.push(nlTk);
if (newLineCount > 3) {
resToks.push(new EndTagTk('p'));
} else {
this.hasOpenPTag = true;
}
} else {
- resToks.push(this.discardOneNlTk());
- resToks.push(this.discardOneNlTk());
- resToks.push(this.discardOneNlTk());
+ resToks.push(this.discardOneNlTk(resToks));
+ resToks.push(this.discardOneNlTk(resToks));
+ resToks.push(this.discardOneNlTk(resToks));
}
newLineCount -= 3;
}
if (newLineCount === 2) {
- nlTk = this.discardOneNlTk();
- nlTk2 = this.discardOneNlTk();
+ resToks.push(this.discardOneNlTk(resToks));
+ nlTk = this.discardOneNlTk(resToks);
this.closeOpenPTag(resToks);
resToks.push(nlTk);
- resToks.push(nlTk2);
}
}
if (isBlockToken) {
if (newLineCount === 1){
- nlTk = this.discardOneNlTk();
+ nlTk = this.discardOneNlTk(resToks);
this.closeOpenPTag(resToks);
resToks.push(nlTk);
} else {
@@ -292,11 +311,25 @@
}
} else if (tc === EOFTk || this.inPre) {
return { tokens: [token] };
- } else if ((tc === String && token.match( /^[\t ]*$/)) ||
- (tc === CommentTk) ||
+ } else if ((tc === String && token.match( /^[\t ]*$/)) || tc ===
CommentTk) {
+ if (tc === CommentTk) {
+ this.currLine.commentCount++;
+ }
+
+ if (this.newLineCount === 0) {
+ this.currLine.tokens.push(token);
+ // Since we have no pending newlines to trip us up,
+ // no need to buffer -- just emit everything
+ return { tokens: this._getTokensAndReset() };
+ } else {
+ // We are in buffering mode waiting till we are ready to
+ // process pending newlines.
+ this.nlWsTokens.push(token);
+ return {};
+ }
+ } else if (isNamedTagToken(token, {'link':1}) ||
// TODO: narrow this down a bit more to take typeof
into account
- (tc === SelfclosingTagTk && token.name === 'meta') ||
- isNamedTagToken(token, {'link':1}) )
+ (tc === SelfclosingTagTk && token.name === 'meta' &&
token.dataAttribs.stx !== 'html'))
{
if (this.newLineCount === 0) {
this.currLine.tokens.push(token);
diff --git a/js/lib/mediawiki.DOMPostProcessor.js
b/js/lib/mediawiki.DOMPostProcessor.js
index 0296ddb..af5464a 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -45,7 +45,7 @@
"i" : [2,2],
"td" : [null,0],
"th" : [null,0],
- "br" : [1,0]
+ "br" : [0,0]
// span, figure, caption, figcaption, br, a, i, b
};
diff --git a/js/lib/mediawiki.WikitextSerializer.js
b/js/lib/mediawiki.WikitextSerializer.js
index 12303fb..167a660 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -1867,8 +1867,9 @@
node.parentNode.childNodes.length === 1) {
// p/br pair
// Hackhack ;)
- state.sep.constraints.min = 3;
- state.sep.constraints.max = 3;
+ // SSS FIXME: With the change I made,
this check can be simplified?
+ state.sep.constraints.min = 2;
+ state.sep.constraints.max = 2;
cb('');
} else {
cb('');
--
To view, visit https://gerrit.wikimedia.org/r/55240
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I51f0ee39dcf7e7a7e2800117a0711f4a4659d4ec
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits