Arlolra has uploaded a new change for review.
https://gerrit.wikimedia.org/r/199301
Change subject: Pop comments from the end of table tag attributes
......................................................................
Pop comments from the end of table tag attributes
* The tokenizer is currently hiding them from editors.
* Doing this in the production (comment / space)* !eol would be
preferable but it was a lot less hairy in the action.
* Introduces a bit of syntactic diff in whitespace..
Bug: T87069
Change-Id: I1039a253e1f52f8de2d3f652c207309467363d12
---
M lib/mediawiki.tokenizer.utils.js
M lib/pegTokenizer.pegjs.txt
M tests/parserTests.txt
3 files changed, 86 insertions(+), 19 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/01/199301/1
diff --git a/lib/mediawiki.tokenizer.utils.js b/lib/mediawiki.tokenizer.utils.js
index cca7466..bf20536 100644
--- a/lib/mediawiki.tokenizer.utils.js
+++ b/lib/mediawiki.tokenizer.utils.js
@@ -9,7 +9,8 @@
var KV = defines.KV,
TagTk = defines.TagTk,
SelfclosingTagTk = defines.SelfclosingTagTk,
- EndTagTk = defines.EndTagTk;
+ EndTagTk = defines.EndTagTk,
+ CommentTk = defines.CommentTk;
var tu = {
@@ -256,6 +257,37 @@
return false;
}
},
+
+ // Pop off the end comments, if any.
+ popComments: function( attrs ) {
+ var i, kv, buf = [];
+ for ( i = attrs.length - 1; i > -1; i-- ) {
+ kv = attrs[i];
+ if ( typeof kv.k === "string" && !kv.v && /^\s*$/.test(
kv.k ) ) {
+ // permit whitespace
+ buf.unshift( kv.k );
+ } else if ( Array.isArray( kv.k ) && !kv.v ) {
+ // all should be comments
+ if ( kv.k.some(function( k ) {
+ return !(k instanceof CommentTk);
+ }) ) { break; }
+ buf.unshift.apply( buf, kv.k );
+ } else {
+ break;
+ }
+ }
+ // ensure we found a comment
+ while ( buf.length && !(buf[0] instanceof CommentTk) ) {
+ buf.shift();
+ }
+ if ( buf.length ) {
+ attrs.splice( -buf.length, buf.length );
+ return { buf: buf, endPos: buf[0].dataAttribs.tsr[0] };
+ } else {
+ return null;
+ }
+ }
+
};
diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt
index bd5cd8f..34be959 100644
--- a/lib/pegTokenizer.pegjs.txt
+++ b/lib/pegTokenizer.pegjs.txt
@@ -1668,18 +1668,22 @@
ta:(generic_attribute / broken_table_attribute_name_char)*
tsEndPos:({stops.pop('table'); return peg$currPos;})
{
- var tblStart = new TagTk( 'table', [], { tsr: [startPos, tsEndPos] } );
- if (p !== "|") {
- // Variation form default
- // "<brace-char>"+p is triggering some bug in pegJS
- // I cannot even use that expression in the comment!
- tblStart.dataAttribs.startTagSrc = b+p;
- }
- if ( ta ) {
- tblStart.attribs = ta;
+ var coms = tu.popComments( ta );
+ if ( coms ) {
+ tsEndPos = coms.endPos;
}
- return sc.concat([tblStart]);
+ var da = { tsr: [startPos, tsEndPos] };
+ if ( p !== "|" ) {
+ // Variation from default
+ da.startTagSrc = b + p;
+ }
+
+ sc.push( new TagTk( 'table', ta, da ) );
+ if ( coms ) {
+ sc = sc.concat( coms.buf );
+ }
+ return sc;
}
table_caption_tag
@@ -1704,16 +1708,27 @@
// handle tables with missing table cells after a row
td:implicit_table_data_tag?
{
+ var coms = tu.popComments( a );
+ if ( coms ) {
+ tagEndPos = coms.endPos;
+ }
+
+ var da = {
+ tsr: [ peg$reportedPos, tagEndPos ],
+ startTagSrc: p + dashes.join('')
+ };
+
// We rely on our tree builder to close the row as needed. This is
// needed to support building tables from fragment templates with
// individual cells or rows.
- var trToken = new TagTk( 'tr', a, { tsr: [peg$reportedPos, tagEndPos],
startTagSrc: p + dashes.join('') } );
- var res;
- if ( !td ) {
- res = [trToken];
- } else {
- //console.warn( 'tr result: ' + pp(trToken.concat(td)) + ' stops:
' + pp(stops));
- res = [trToken].concat(td);
+ var trToken = new TagTk( 'tr', a, da );
+
+ var res = [ trToken ];
+ if ( coms ) {
+ res = res.concat( coms.buf );
+ }
+ if ( td ) {
+ res = res.concat( td );
}
return res;
}
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 8841c13..ebbb174 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -5940,7 +5940,7 @@
<!-- c0 -->
| foo
<!-- c1 -->
-|- <!-- c2 -->
+|-<!-- c2 -->
<!-- c3 -->
|<!-- c4 -->
<!-- c5 -->
@@ -5957,6 +5957,26 @@
!! end
!! test
+Wikitext table comments represented in parsoid dom
+!! wikitext
+{|<!--c1--><!--c2-->
+|-<!--c3-->
+| x
+|}
+!! html/php+tidy
+<table>
+<tr>
+<td>x</td>
+</tr>
+</table>
+!! html/parsoid
+<table><!--c1--><!--c2-->
+<tbody><tr data-parsoid='{"startTagSrc":"|-","autoInsertedEnd":true}'><!--c3-->
+<td data-parsoid='{"autoInsertedEnd":true}'> x</td></tr>
+</tbody></table>
+!! end
+
+!! test
Wikitext table with double-line table cell
!! wikitext
{|
--
To view, visit https://gerrit.wikimedia.org/r/199301
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I1039a253e1f52f8de2d3f652c207309467363d12
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits