Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/55240
Change subject: Improved p-wrapping in the presence of comments-and-ws-only
lines.
......................................................................
Improved p-wrapping in the presence of comments-and-ws-only lines.
* Lines that contained comments and white-text only used to trigger
p-wrapping based on newlines seen till that point. However, the
PHP parser effectively discards such lines (including the newline)
which leads to newlines before/afther such a line to be considered
as sequential.
* This patch mimics the PHP behavior by effectively ignoring the
comments-and-ws-only lines (but not discarding them since they
still need to be roundtripped back).
* This now fixes parsing (and thus roundtripping of the following
test snippets, among others):
----------
a
<!--boo-->
b
-------------
a
b
-------------
* Eliminates all RT errors on en:Ayat al-Kursi
* No change in parser tests results, but some change in HTML output
seen in failed tests (and possibly passed tests).
* TODO: Add the above and other parser tests to parserTests.txt
Change-Id: I51f0ee39dcf7e7a7e2800117a0711f4a4659d4ec
---
M js/lib/ext.core.ParagraphWrapper.js
1 file changed, 51 insertions(+), 23 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/40/55240/1
diff --git a/js/lib/ext.core.ParagraphWrapper.js
b/js/lib/ext.core.ParagraphWrapper.js
index 58d9e38..ff8aa17 100644
--- a/js/lib/ext.core.ParagraphWrapper.js
+++ b/js/lib/ext.core.ParagraphWrapper.js
@@ -35,7 +35,7 @@
this.hasOpenPTag = false;
this.hasOpenHTMLPTag = false;
this.inPre = false;
-}
+};
// constants
ParagraphWrapper.prototype.newlineRank = 2.95;
@@ -64,12 +64,14 @@
return resToks;
};
-ParagraphWrapper.prototype.discardOneNlTk = function() {
- for (var i = 0, n = this.nlWsTokens.length; i < n; i++) {
- var t = this.nlWsTokens[i];
+ParagraphWrapper.prototype.discardOneNlTk = function(out) {
+ var i = 0, n = this.nlWsTokens.length;
+ while (i < n) {
+ var t = this.nlWsTokens.shift();
if (t.constructor === NlTk) {
- this.nlWsTokens = this.nlWsTokens.splice(i+1);
return t;
+ } else {
+ out.push(t);
}
}
@@ -87,6 +89,7 @@
ParagraphWrapper.prototype.resetCurrLine = function () {
this.currLine = {
tokens: [],
+ commentCount: 0,
hasBlockToken: false,
hasWrappableTokens: false
};
@@ -106,12 +109,15 @@
this.hasOpenPTag = true;
}
+ var emptyLineWithSingleComment = l.commentCount === 1 &&
!l.hasWrappableTokens;
+
// this.nonNlTokens += this.currLine.tokens
this.nonNlTokens = this.nonNlTokens.concat(l.tokens);
+
this.resetCurrLine();
- this.nlWsTokens.push(token);
if (token.constructor === EOFTk) {
+ this.nlWsTokens.push(token);
this.closeOpenPTag(this.nonNlTokens);
var res = this.processPendingNLs(false);
this.inPre = false;
@@ -120,7 +126,18 @@
this.reset();
return { tokens: res };
} else {
- this.newLineCount++;
+ // Dont increment newline count on lines with a single comment
+ // and other non-wrappable content.
+ if (!emptyLineWithSingleComment) {
+ this.newLineCount++;
+ this.nlWsTokens.push(token);
+ } else {
+ // Convert the NlTk to a String-representation so that
+ // it doesn't get processed by discardOneNlTk -- this
+ // newline needs to be emitted without being processed
+ // for p-wrapping.
+ this.nlWsTokens.push("\n");
+ }
return {};
}
};
@@ -144,13 +161,11 @@
if (!topTag || topTag === 'td' || topTag === 'th') {
// 2. Discard 3 newlines (the p-br-p section
// serializes back to 3 newlines)
- nlTk = this.discardOneNlTk();
- nlTk2 = this.discardOneNlTk();
- this.discardOneNlTk();
+ resToks.push(this.discardOneNlTk(resToks));
+ resToks.push(this.discardOneNlTk(resToks));
+ this.discardOneNlTk(resToks);
// Preserve nls for pretty-printing and dsr
reliability
- resToks.push(nlTk);
- resToks.push(nlTk2);
// 3. Insert <p><br></p> sections
// FIXME: Mark this as a placeholder for now
until the
@@ -163,26 +178,25 @@
this.hasOpenPTag = true;
}
} else {
- resToks.push(this.discardOneNlTk());
- resToks.push(this.discardOneNlTk());
- resToks.push(this.discardOneNlTk());
+ resToks.push(this.discardOneNlTk(resToks));
+ resToks.push(this.discardOneNlTk(resToks));
+ resToks.push(this.discardOneNlTk(resToks));
}
newLineCount -= 3;
}
if (newLineCount === 2) {
- nlTk = this.discardOneNlTk();
- nlTk2 = this.discardOneNlTk();
+ resToks.push(this.discardOneNlTk(resToks));
+ nlTk = this.discardOneNlTk(resToks);
this.closeOpenPTag(resToks);
resToks.push(nlTk);
- resToks.push(nlTk2);
}
}
if (isBlockToken) {
if (newLineCount === 1){
- nlTk = this.discardOneNlTk();
+ nlTk = this.discardOneNlTk(resToks);
this.closeOpenPTag(resToks);
resToks.push(nlTk);
} else {
@@ -292,11 +306,25 @@
}
} else if (tc === EOFTk || this.inPre) {
return { tokens: [token] };
- } else if ((tc === String && token.match( /^[\t ]*$/)) ||
- (tc === CommentTk) ||
+ } else if ((tc === String && token.match( /^[\t ]*$/)) || tc ===
CommentTk) {
+ if (tc === CommentTk) {
+ this.currLine.commentCount++;
+ }
+
+ if (this.newLineCount === 0) {
+ this.currLine.tokens.push(token);
+ // Since we have no pending newlines to trip us up,
+ // no need to buffer -- just emit everything
+ return { tokens: this._getTokensAndReset() };
+ } else {
+ // We are in buffering mode waiting till we are ready to
+ // process pending newlines.
+ this.nlWsTokens.push(token);
+ return {};
+ }
+ } else if (isNamedTagToken(token, {'link':1}) ||
// TODO: narrow this down a bit more to take typeof
into account
- (tc === SelfclosingTagTk && token.name === 'meta') ||
- isNamedTagToken(token, {'link':1}) )
+ (tc === SelfclosingTagTk && token.name === 'meta' &&
token.dataAttribs.stx !== 'html'))
{
if (this.newLineCount === 0) {
this.currLine.tokens.push(token);
--
To view, visit https://gerrit.wikimedia.org/r/55240
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I51f0ee39dcf7e7a7e2800117a0711f4a4659d4ec
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits