Subramanya Sastry has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/55240


Change subject: Improved p-wrapping in the presence of comments-and-ws-only 
lines.
......................................................................

Improved p-wrapping in the presence of comments-and-ws-only lines.

* Lines that contained comments and white-text only used to trigger
  p-wrapping based on newlines seen till that point.  However, the
  PHP parser effectively discards such lines (including the newline)
  which leads to newlines before/afther such a line to be considered
  as sequential.

* This patch mimics the PHP behavior by effectively ignoring the
  comments-and-ws-only lines (but not discarding them since they
  still need to be roundtripped back).

* This now fixes parsing (and thus roundtripping of the following
  test snippets, among others):
----------
a

   <!--boo-->

b
-------------
a

b
-------------

* Eliminates all RT errors on en:Ayat al-Kursi

* No change in parser tests results, but some change in HTML output
  seen in failed tests (and possibly passed tests).

* TODO: Add the above and other parser tests to parserTests.txt

Change-Id: I51f0ee39dcf7e7a7e2800117a0711f4a4659d4ec
---
M js/lib/ext.core.ParagraphWrapper.js
1 file changed, 51 insertions(+), 23 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/40/55240/1

diff --git a/js/lib/ext.core.ParagraphWrapper.js 
b/js/lib/ext.core.ParagraphWrapper.js
index 58d9e38..ff8aa17 100644
--- a/js/lib/ext.core.ParagraphWrapper.js
+++ b/js/lib/ext.core.ParagraphWrapper.js
@@ -35,7 +35,7 @@
        this.hasOpenPTag = false;
        this.hasOpenHTMLPTag = false;
        this.inPre = false;
-}
+};
 
 // constants
 ParagraphWrapper.prototype.newlineRank = 2.95;
@@ -64,12 +64,14 @@
        return resToks;
 };
 
-ParagraphWrapper.prototype.discardOneNlTk = function() {
-       for (var i = 0, n = this.nlWsTokens.length; i < n; i++) {
-               var t = this.nlWsTokens[i];
+ParagraphWrapper.prototype.discardOneNlTk = function(out) {
+       var i = 0, n = this.nlWsTokens.length;
+       while (i < n) {
+               var t = this.nlWsTokens.shift();
                if (t.constructor === NlTk) {
-                       this.nlWsTokens = this.nlWsTokens.splice(i+1);
                        return t;
+               } else {
+                       out.push(t);
                }
        }
 
@@ -87,6 +89,7 @@
 ParagraphWrapper.prototype.resetCurrLine = function () {
        this.currLine = {
                tokens: [],
+               commentCount: 0,
                hasBlockToken: false,
                hasWrappableTokens: false
        };
@@ -106,12 +109,15 @@
                this.hasOpenPTag = true;
        }
 
+       var emptyLineWithSingleComment = l.commentCount === 1 && 
!l.hasWrappableTokens;
+
        // this.nonNlTokens += this.currLine.tokens
        this.nonNlTokens = this.nonNlTokens.concat(l.tokens);
+
        this.resetCurrLine();
 
-       this.nlWsTokens.push(token);
        if (token.constructor === EOFTk) {
+               this.nlWsTokens.push(token);
                this.closeOpenPTag(this.nonNlTokens);
                var res = this.processPendingNLs(false);
                this.inPre = false;
@@ -120,7 +126,18 @@
                this.reset();
                return { tokens: res };
        } else {
-               this.newLineCount++;
+               // Dont increment newline count on lines with a single comment
+               // and other non-wrappable content.
+               if (!emptyLineWithSingleComment) {
+                       this.newLineCount++;
+                       this.nlWsTokens.push(token);
+               } else {
+                       // Convert the NlTk to a String-representation so that
+                       // it doesn't get processed by discardOneNlTk -- this
+                       // newline needs to be emitted without being processed
+                       // for p-wrapping.
+                       this.nlWsTokens.push("\n");
+               }
                return {};
        }
 };
@@ -144,13 +161,11 @@
                        if (!topTag || topTag === 'td' || topTag === 'th') {
                                // 2. Discard 3 newlines (the p-br-p section
                                // serializes back to 3 newlines)
-                               nlTk = this.discardOneNlTk();
-                               nlTk2 = this.discardOneNlTk();
-                               this.discardOneNlTk();
+                               resToks.push(this.discardOneNlTk(resToks));
+                               resToks.push(this.discardOneNlTk(resToks));
+                               this.discardOneNlTk(resToks);
 
                                // Preserve nls for pretty-printing and dsr 
reliability
-                               resToks.push(nlTk);
-                               resToks.push(nlTk2);
 
                                // 3. Insert <p><br></p> sections
                                // FIXME: Mark this as a placeholder for now 
until the
@@ -163,26 +178,25 @@
                                        this.hasOpenPTag = true;
                                }
                        } else {
-                               resToks.push(this.discardOneNlTk());
-                               resToks.push(this.discardOneNlTk());
-                               resToks.push(this.discardOneNlTk());
+                               resToks.push(this.discardOneNlTk(resToks));
+                               resToks.push(this.discardOneNlTk(resToks));
+                               resToks.push(this.discardOneNlTk(resToks));
                        }
 
                        newLineCount -= 3;
                }
 
                if (newLineCount === 2) {
-                       nlTk = this.discardOneNlTk();
-                       nlTk2 = this.discardOneNlTk();
+                       resToks.push(this.discardOneNlTk(resToks));
+                       nlTk = this.discardOneNlTk(resToks);
                        this.closeOpenPTag(resToks);
                        resToks.push(nlTk);
-                       resToks.push(nlTk2);
                }
        }
 
        if (isBlockToken) {
                if (newLineCount === 1){
-                       nlTk = this.discardOneNlTk();
+                       nlTk = this.discardOneNlTk(resToks);
                        this.closeOpenPTag(resToks);
                        resToks.push(nlTk);
                } else {
@@ -292,11 +306,25 @@
                }
        } else if (tc === EOFTk || this.inPre) {
                return { tokens: [token] };
-       } else if ((tc === String && token.match( /^[\t ]*$/)) ||
-                       (tc === CommentTk) ||
+       } else if ((tc === String && token.match( /^[\t ]*$/)) || tc === 
CommentTk) {
+               if (tc === CommentTk) {
+                       this.currLine.commentCount++;
+               }
+
+               if (this.newLineCount === 0) {
+                       this.currLine.tokens.push(token);
+                       // Since we have no pending newlines to trip us up,
+                       // no need to buffer -- just emit everything
+                       return { tokens: this._getTokensAndReset() };
+               } else {
+                       // We are in buffering mode waiting till we are ready to
+                       // process pending newlines.
+                       this.nlWsTokens.push(token);
+                       return {};
+               }
+       } else if (isNamedTagToken(token, {'link':1}) ||
                        // TODO: narrow this down a bit more to take typeof 
into account
-                       (tc === SelfclosingTagTk && token.name === 'meta') ||
-                       isNamedTagToken(token, {'link':1}) )
+                       (tc === SelfclosingTagTk && token.name === 'meta' && 
token.dataAttribs.stx !== 'html'))
        {
                if (this.newLineCount === 0) {
                        this.currLine.tokens.push(token);

-- 
To view, visit https://gerrit.wikimedia.org/r/55240
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I51f0ee39dcf7e7a7e2800117a0711f4a4659d4ec
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to