GWicke has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/80681


Change subject: Bug 53287: More robust in-table state tracking in the paragraph 
wrapper
......................................................................

Bug 53287: More robust in-table state tracking in the paragraph wrapper

From the bug report:

Unbalanced table tokens in pages like http://fi.wikipedia.org/wiki/Kypros throw
off the in-table tracking in the paragraph wrapper. This causes newlines in
fosterable position to be paragraph-wrapped, which is then fostered out.

Broken behavior:

echo -ne '{|\n<td><td>fo</td>\n\n\n\n|}' | node parse
WARNING: DSR inconsistency: cs/s mismatch for node: BODY s: 0; cs: -1
<body><p data-parsoid='{"autoInsertedEnd":true,"dsr":[-1,-1,0,0]}'><br
data-parsoid='{"dsr":[-1,-1,0,0]}'></p>
<table data-parsoid='{"dsr":[0,24,2,2]}'>
<tbody data-parsoid='{"dsr":[3,22,0,0]}'><tr
data-parsoid='{"dsr":[3,19,0,0]}'><td
data-parsoid='{"stx":"html","autoInsertedEnd":true,"dsr":[3,7,4,0]}'></td><td
data-parsoid='{"stx":"html","dsr":[7,18,4,5]}'>fo</td></tr>

</tbody></table></body>

Correct behavior:

echo -ne '{|\n<td><td>fo</td>\n\n\n\n|}' | node parse
<body data-parsoid='{"dsr":[0,24,0,0]}'><table
data-parsoid='{"dsr":[0,24,2,2]}'>
<tbody data-parsoid='{"dsr":[3,22,0,0]}'><tr
data-parsoid='{"autoInsertedEnd":true,"dsr":[3,18,0,0]}'><td
data-parsoid='{"stx":"html","autoInsertedEnd":true,"dsr":[3,7,4,0]}'></td><td
data-parsoid='{"stx":"html","dsr":[7,18,4,5]}'>fo</td></tr>

</tbody></table></body>

Change-Id: I4c607885ad52be3f39a1f72eb5ead9239e014820
---
M js/lib/ext.core.ParagraphWrapper.js
M js/lib/mediawiki.WikitextSerializer.js
M js/tests/parserTests.txt
3 files changed, 60 insertions(+), 20 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/81/80681/1

diff --git a/js/lib/ext.core.ParagraphWrapper.js 
b/js/lib/ext.core.ParagraphWrapper.js
index 2bd9440..67452fb 100644
--- a/js/lib/ext.core.ParagraphWrapper.js
+++ b/js/lib/ext.core.ParagraphWrapper.js
@@ -227,12 +227,17 @@
 
 ParagraphWrapper.prototype.onAny = function ( token, frame ) {
        function updateTableContext(tblTags, token) {
-               function popTags(tblTags, tokenName, altTag1, altTag2) {
-                       while (tblTags.length > 0) {
-                               var topTag = tblTags.pop();
-                               if (topTag === tokenName || topTag === altTag1 
|| topTag === altTag2) {
-                                       break;
-                               }
+               // popUntil: pop anything until one of the tag in this array is 
found.
+               //           Pass null to disable.
+               // popThen: after a stop is reached (or popUntil was null), 
continue
+               //                      popping as long as the elements in this 
array match. Pass
+               //                      null to disable.
+               function popTags(tblTags, popUntil, popThen) {
+                       while (popUntil && tblTags.length > 0 && 
popUntil.indexOf(tblTags.last()) === -1) {
+                               tblTags.pop();
+                       }
+                       while (popThen && tblTags.length > 0 && 
popThen.indexOf(tblTags.last()) !== -1) {
+                               tblTags.pop();
                        }
                }
 
@@ -243,23 +248,24 @@
                        } else {
                                switch (tokenName) {
                                case "table":
-                                       // Pop till we match
-                                       popTags(tblTags, tokenName);
+                                       // Pop a table scope
+                                       popTags(tblTags, ["table"], ["table"]);
                                        break;
                                case "tbody":
-                                       // Pop till we match
-                                       popTags(tblTags, tokenName, "table");
+                                       // Pop to the nearest table
+                                       popTags(tblTags, ["table"], null);
                                        break;
                                case "tr":
-                                       // Pop till we match
-                                       popTags(tblTags, tokenName, "table", 
"tbody");
+                               case "thead":
+                               case "tfoot":
+                               case "caption":
+                                       // Pop to tbody or table, whichever is 
nearer
+                                       popTags(tblTags, ["tbody", "table"], 
null);
                                        break;
                                case "td":
                                case "th":
-                                       // Pop just the topmost tag if it 
matches the token
-                                       if (tblTags.last() === token.name) {
-                                               tblTags.pop();
-                                       }
+                                       // Pop to tr or (if that fails) to 
tbody or table.
+                                       popTags(tblTags, ["tr", "tbody", 
"table"], null);
                                        break;
                                }
                        }
diff --git a/js/lib/mediawiki.WikitextSerializer.js 
b/js/lib/mediawiki.WikitextSerializer.js
index a5c9187..69a8cd8 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -556,6 +556,9 @@
 
        // console.warn("SOL: " + sol + "; text: " + text);
 
+       // escape nowiki tags
+       text = text.replace(/<(\/?nowiki\s*\/?\s*)>/gi, '&lt;$1&gt;');
+
        if (fullWrap) {
                return ["<nowiki>", text, "</nowiki>", nls].join('');
        } else {
@@ -588,7 +591,8 @@
                                }
                        }
 
-                       buf.push(str);
+                       // escape nowiki tags
+                       buf.push(str.replace(/<(\/?nowiki\s*\/?\s*)>/gi, 
'&lt;$1&gt;'));
 
                        if (close) {
                                if ((i < numToks-1 && tokens[i+1].constructor 
=== String && tokens[i+1].length >= maxExcessWrapLength) ||
@@ -612,7 +616,7 @@
                                        if (sol && t.match(/(^|\n)[ \t]/)) {
                                                smartNowikier(true, true, t, i, 
n);
                                        } else {
-                                               buf.push(t);
+                                               
buf.push(t.replace(/<(\/?nowiki\s*\/?\s*)>/gi, '&lt;$1&gt;'));
                                        }
                                        sol = false;
                                }
@@ -706,6 +710,9 @@
                return text;
        }
 
+       // escape nowiki tags
+       text = text.replace(/<(\/?nowiki\s*\/?\s*)>/gi, '&lt;$1&gt;');
+
        // Context-specific escape handler
        var wteHandler = state.wteHandlerStack.last();
        if (wteHandler && wteHandler(state, text, opts)) {
@@ -756,8 +763,6 @@
                return this.escapedText(state, sol, text, fullCheckNeeded);
        }
 
-       // escape nowiki tags
-       text = text.replace(/<(\/?nowiki\s*\/?\s*)>/gi, '&lt;$1&gt;');
 
        // Use the tokenizer to see if we have any wikitext tokens
        //
diff --git a/js/tests/parserTests.txt b/js/tests/parserTests.txt
index 77523bf..f43f9be 100644
--- a/js/tests/parserTests.txt
+++ b/js/tests/parserTests.txt
@@ -1600,6 +1600,35 @@
 </pre>
 !!end
 
+!! test
+Parsoid: Don't paragraph-wrap fosterable content even if table syntax is 
unbalanced
+!! options
+parsoid=wt2html
+!! input
+{|
+<td>
+<td>
+</td>
+
+
+
+|}
+!! result
+<table>
+
+<tbody>
+<tr>
+<td></td>
+
+<td>
+</td></tr>
+
+
+
+</tbody></table>
+!! end
+
+
 #--------------------------------------------------------------------
 # Transclusion parameter whitespace stripping tests
 # Behavior is different for positional and named parameters

-- 
To view, visit https://gerrit.wikimedia.org/r/80681
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I4c607885ad52be3f39a1f72eb5ead9239e014820
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: GWicke <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to