Arlolra has uploaded a new change for review.
https://gerrit.wikimedia.org/r/80675
Change subject: WIP: Generalize foster parented content detection
......................................................................
WIP: Generalize foster parented content detection
Fostered content is boxed with an inserted meta tag.
Bug: 53110
Change-Id: I04f3751660caf37964fb4a1d79e8d16fc9464055
---
M js/lib/dom.markFosteredContent.js
A js/lib/ext.core.FosterMetaInserter.js
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.parser.js
4 files changed, 92 insertions(+), 74 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/75/80675/1
diff --git a/js/lib/dom.markFosteredContent.js
b/js/lib/dom.markFosteredContent.js
index 0fcacd3..aa3af55 100644
--- a/js/lib/dom.markFosteredContent.js
+++ b/js/lib/dom.markFosteredContent.js
@@ -2,85 +2,54 @@
var DU = require('./mediawiki.DOMUtils.js').DOMUtils;
-/*
------------------------------------------------------------------------------------
- * Non-IEW (inter-element-whitespace) can only be found in <td> <th> and
<caption> tags
- * in a table. If found elsewhere within a table, such content will be moved
out of
- * the table and be "adopted" by the table's sibling ("foster parent"). The
content
- * that gets adopted is "fostered content".
+/* ------------------------------------------------------------------------
+ * Non-IEW (inter-element-whitespace) can only be found in <td> <th> and
+ * <caption> tags in a table. If found elsewhere within a table, such
+ * content will be moved out of the table and be "adopted" by the table's
+ * sibling ("foster parent"). The content that gets adopted is "fostered
+ * content".
*
- * See http://dev.w3.org/html5/spec-LC/tree-construction.html#foster-parenting
- *
------------------------------------------------------------------------------------
*/
-function markFosteredContent(node, env) {
- function findFosteredContent(table) {
- var tableTagId = table.data.parsoid.tagId,
- n = table.previousSibling,
- initPos = table.data.parsoid.tsr ?
table.data.parsoid.tsr[0] : null,
- fosteredText = "",
- nodeBuf = [],
- tsrGap = 0;
+ * http://dev.w3.org/html5/spec-LC/tree-construction.html#foster-parenting
+ * ------------------------------------------------------------------------ */
- while (n) {
- if (DU.isElt(n)) {
- if (typeof(n.data.parsoid.tagId) !== 'number'
|| n.data.parsoid.tagId < tableTagId) {
- if (initPos && n.data.parsoid.tsr &&
DU.tsrSpansTagDOM(n, n.data.parsoid)) {
- var expectedGap = initPos -
n.data.parsoid.tsr[1];
- if (tsrGap !== expectedGap) {
- /*
- console.log("Fostered
text/comments: " +
-
JSON.stringify(fosteredText.substring(expectedGap)));
- */
- while (nodeBuf.length >
0) {
- // Wrap each
node in a span wrapper
- var x =
nodeBuf.pop();
- var span =
table.ownerDocument.createElement('span');
- span.data = {
parsoid: { fostered: true } };
-
x.parentNode.insertBefore(span, x);
-
span.appendChild(x);
- }
- }
- } else {
- /* jshint noempty: false */
+function markFosteredContent( node, env ) {
- // No clue if the text in
fosteredText is really fostered content.
- // If we ran this pass
post-dsr-computation, we might be able to
- // detect this in more
scenarios. Something to consider.
+ (function findFosteredContent( node, foster ) {
+ var str, span;
+ var sibling = node.nextSibling;
- /*
- console.warn("initPos: " +
initPos);
- console.warn("have tsr: " +
n.data.parsoid.tsr);
- console.warn("spans tsr: " +
(n.data.parsoid.tsr && DU.tsrSpansTagDOM(n, n.data.parsoid)));
- */
- }
- // All good at this point
- break;
- } else {
- n.data.parsoid.fostered = true;
- }
+ if ( foster ) {
+ if ( DU.isElt( node ) && node.nodeName === "TABLE" ) {
+ foster = false;
} else {
- var str = DU.isText(n) ? n.nodeValue : "<!--" +
n.nodeValue + "-->";
- tsrGap += str.length;
- fosteredText = str + fosteredText;
- nodeBuf.push(n);
+
+ if ( DU.isElt( node ) ) {
+ node.data.parsoid.fostered = true;
+ } else {
+ span =
node.ownerDocument.createElement( "span" );
+ span.data = { parsoid: { fostered: true
} };
+ node.parentNode.insertBefore( span,
node );
+ span.appendChild( node );
+ }
+
}
- n = n.previousSibling;
- }
- }
-
- var c = node.firstChild;
- while (c) {
- var sibling = c.nextSibling;
-
- if (DU.isElt(c) && c.nodeName === 'TABLE') {
- findFosteredContent(c);
+ } else if ( DU.isMarkerMeta( node, "mw:FosterBox" ) ) {
+ foster = true;
+ node.parentNode.removeChild( node )
}
- if (c.childNodes.length > 0) {
- markFosteredContent(c, env);
+ if ( node.childNodes.length > 0 ) {
+ markFosteredContent( node, env );
}
- c = sibling;
- }
+
+ if ( sibling ) {
+ findFosteredContent( sibling , foster );
+ }
+
+ }( node.firstChild, false ));
+
}
-if (typeof module === "object") {
+if ( typeof module === "object" ) {
module.exports.markFosteredContent = markFosteredContent;
-}
+}
\ No newline at end of file
diff --git a/js/lib/ext.core.FosterMetaInserter.js
b/js/lib/ext.core.FosterMetaInserter.js
new file mode 100644
index 0000000..e0b2e33
--- /dev/null
+++ b/js/lib/ext.core.FosterMetaInserter.js
@@ -0,0 +1,47 @@
+/*
+ * Insert meta tags to ease fostered content detection
+ */
+
+"use strict";
+
+var defines = require( "./mediawiki.parser.defines.js" ),
+ SelfclosingTagTk = defines.SelfclosingTagTk,
+ TagTk = defines.TagTk,
+ KV = defines.KV;
+
+function FosterMetaInserter( manager ) {
+ this.manager = manager;
+ this.env = manager.env;
+
+ this.trace = this.env.conf.parsoid.debug || (
+ this.env.conf.parsoid.traceFlags &&
+ this.env.conf.parsoid.traceFlags.indexOf( "list" ) !== -1
+ );
+
+ this.manager.addTransform( this.onTable.bind( this ),
+ "FosterMetaInserter:onTable", this.rank, 'tag', 'table' );
+}
+
+FosterMetaInserter.prototype.rank = 2.98;
+
+FosterMetaInserter.prototype.onTable = function ( token, frame, prevToken ) {
+ var kv, meta, tokens;
+
+ if ( this.trace ) {
+ console.warn( "T:fosterInsertMeta:table " + JSON.stringify(
token ) );
+ }
+
+ tokens = [ token ];
+
+ if ( token.constructor === TagTk ) {
+ kv = new KV( "typeof", "mw:FosterBox" );
+ meta = new SelfclosingTagTk( "meta", [ kv ] );
+ tokens.unshift( meta ) ;
+ }
+
+ return { tokens: tokens }
+};
+
+if ( typeof module === "object" ) {
+ module.exports.FosterMetaInserter = FosterMetaInserter;
+}
\ No newline at end of file
diff --git a/js/lib/mediawiki.DOMPostProcessor.js
b/js/lib/mediawiki.DOMPostProcessor.js
index da2fb3e..b190c8b 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -110,8 +110,8 @@
// Common post processing
this.processors = [
dataParsoidLoader.traverse.bind( dataParsoidLoader ),
- handleUnbalancedTables,
markFosteredContent,
+ handleUnbalancedTables,
migrateStartMetas,
markTreeBuilderFixups,
handlePres,
diff --git a/js/lib/mediawiki.parser.js b/js/lib/mediawiki.parser.js
index 403ca15..8d9d5c9 100644
--- a/js/lib/mediawiki.parser.js
+++ b/js/lib/mediawiki.parser.js
@@ -37,7 +37,8 @@
BehaviorSwitchHandler = BehaviorSwitch.BehaviorSwitchHandler,
BehaviorSwitchPreprocessor = BehaviorSwitch.BehaviorSwitchPreprocessor,
TreeBuilder =
require('./mediawiki.HTML5TreeBuilder.node.js').FauxHTML5.TreeBuilder,
- DOMPostProcessor =
require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor;
+ DOMPostProcessor =
require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor,
+ FosterMetaInserter =
require('./ext.core.FosterMetaInserter.js').FosterMetaInserter;
var ParserPipeline; // forward declaration
@@ -144,14 +145,15 @@
// add before transforms that depend on
behavior switches
// examples: toc generation, edit
sections
BehaviorSwitchHandler, // 2.14
-
ListHandler, // 2.49
Sanitizer, // 2.90, 2.91
// Wrap tokens into paragraphs
post-sanitization so that
// tags that converted to text by the
sanitizer have a chance
// of getting wrapped into paragraphs.
The sanitizer does not
// require the existence of p-tags for
its functioning.
- ParagraphWrapper // 2.95 -- 2.97
+ ParagraphWrapper, // 2.95 -- 2.97
+ // Insert meta tags for foster detection
+ FosterMetaInserter // 2.98
]
],
--
To view, visit https://gerrit.wikimedia.org/r/80675
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I04f3751660caf37964fb4a1d79e8d16fc9464055
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits