jenkins-bot has submitted this change and it was merged.
Change subject: Move data-parsoid into a JSON structure outside the DOM
......................................................................
Move data-parsoid into a JSON structure outside the DOM
* IDs are generate with the following pattern,
mw<base64-encoded counter>
* Adds two flags to parse.js,
--dp, Outputs the JSON structure
--dpin <JSON>, Accepts the output from the above
* Adds a configuration env.conf.parsoid.storeDataParsoid
which is off by default to keep parserTests passing,
but turned on with the above flags.
Bug: 52936
Change-Id: Ie58620d0909be3fcd3e9dcf84efaaa16c96d556e
---
M js/lib/dom.cleanup.js
M js/lib/domTraverser.js
M js/lib/jsutils.js
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.DOMUtils.js
M js/lib/mediawiki.ParsoidConfig.js
M js/tests/parse.js
7 files changed, 107 insertions(+), 13 deletions(-)
Approvals:
GWicke: Looks good to me, approved
jenkins-bot: Verified
diff --git a/js/lib/dom.cleanup.js b/js/lib/dom.cleanup.js
index e023b31..c192a36 100644
--- a/js/lib/dom.cleanup.js
+++ b/js/lib/dom.cleanup.js
@@ -29,7 +29,7 @@
/**
* Perform some final cleaup and save data-parsoid attributes on each node.
*/
-function cleanupAndSaveDataParsoid( node ) {
+function cleanupAndSaveDataParsoid( env, node ) {
if ( DU.isElt(node) && node.data ) {
var dp = node.data.parsoid;
if (dp) {
@@ -77,6 +77,10 @@
if (dp.fostered && dp.dsr &&
!DU.isFirstEncapsulationWrapperNode(node)) {
dp.dsr[0] = dp.dsr[1];
}
+
+ if ( env.conf.parsoid.storeDataParsoid ) {
+ DU.storeDataParsoid( node, dp );
+ }
}
DU.saveDataAttribs( node );
}
diff --git a/js/lib/domTraverser.js b/js/lib/domTraverser.js
index 815e1d5..be3adae 100644
--- a/js/lib/domTraverser.js
+++ b/js/lib/domTraverser.js
@@ -70,13 +70,13 @@
* - continue regular processing on current node
*/
DOMTraverser.prototype.traverse = function ( node ) {
+ var result, workNode;
if (node.nodeType === node.DOCUMENT_NODE) {
// skip to body
- node = node.body;
+ workNode = node.body;
+ } else {
+ workNode = node.firstChild;
}
-
- var workNode = node.firstChild;
- var result;
while ( workNode !== null ) {
// Call the handlers on this workNode
diff --git a/js/lib/jsutils.js b/js/lib/jsutils.js
index 51459df..0a6334f 100644
--- a/js/lib/jsutils.js
+++ b/js/lib/jsutils.js
@@ -47,7 +47,25 @@
this.deepFreeze(prop); // Recursively call deepFreeze.
}
+ },
+
+ // Convert a counter to a Base64 encoded string.
+ // Padding is stripped. \,+ are replaced with _,- respectively.
+ // Warning: Max integer is 2^31 - 1 for bitwise operations.
+ counterToBase64: function ( n ) {
+ /* jshint bitwise: false */
+ var arr = [];
+ do {
+ arr.unshift( n & 0xff );
+ n >>= 8;
+ } while ( n > 0 );
+ return ( new Buffer( arr ) )
+ .toString( "base64" )
+ .replace( /=/g, "" )
+ .replace( /\//g, "_" )
+ .replace( /\+/g, "-" );
}
+
};
if (typeof module === "object") {
diff --git a/js/lib/mediawiki.DOMPostProcessor.js
b/js/lib/mediawiki.DOMPostProcessor.js
index 4cd14de..ca3293b 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -82,8 +82,8 @@
}
/**
- * Migrate data-parsoid attributes into a property on each DOM node. We'll
- * migrate them back in the final DOM traversal.
+ * Migrate data-parsoid attributes into a property on each DOM node.
+ * We may migrate them back in the final DOM traversal.
*
* Various mw metas are converted to comments before the tree build to
* avoid fostering. Piggy-backing the reconversion here to avoid excess
@@ -91,6 +91,10 @@
*/
function prepareDOM( node ) {
DU.loadDataParsoid( node );
+
+ if ( DU.isElt( node ) ) {
+ node.removeAttribute( "data-parsoid" );
+ }
if ( DU.isComment( node ) && /^\{[^]+\}$/.test( node.data ) ) {
@@ -181,7 +185,7 @@
domVisitor2.addHandler( 'td',
tableFixer.reparseTemplatedAttributes.bind( tableFixer, env ) );
domVisitor2.addHandler( 'th',
tableFixer.reparseTemplatedAttributes.bind( tableFixer, env ) );
// 4. Save data.parsoid into data-parsoid html attribute.
- domVisitor2.addHandler( null, cleanupAndSaveDataParsoid );
+ domVisitor2.addHandler( null, cleanupAndSaveDataParsoid.bind( null, env
) );
this.processors.push(domVisitor2.traverse.bind(domVisitor2));
}
@@ -202,6 +206,16 @@
console.warn("--------------------------------");
}
+ // holder for data-parsoid
+ if ( psd.storeDataParsoid ) {
+ document.data = {
+ parsoid: {
+ counter: -1,
+ ids: {}
+ }
+ };
+ }
+
for (var i = 0; i < this.processors.length; i++) {
try {
this.processors[i](document, this.env, this.options);
@@ -209,10 +223,6 @@
env.errCB(e);
}
}
-
- // DOMTraverser only processes document.body.childNodes
- document.body.data.parsoid.tmp = undefined;
- DU.saveDataAttribs(document.body);
// add <head> element if it was missing
if (!document.head) {
diff --git a/js/lib/mediawiki.DOMUtils.js b/js/lib/mediawiki.DOMUtils.js
index 0b5a9d3..af47ecb 100644
--- a/js/lib/mediawiki.DOMUtils.js
+++ b/js/lib/mediawiki.DOMUtils.js
@@ -8,6 +8,7 @@
var domino = require( './domino' ),
entities = require( 'entities' ),
Util = require('./mediawiki.Util.js').Util,
+ JSUtils = require('./jsutils').JSUtils,
Consts = require('./mediawiki.wikitext.constants.js').WikitextConstants,
pd = require('./mediawiki.parser.defines.js'),
XMLSerializer = require('./XMLSerializer');
@@ -1489,6 +1490,40 @@
isComment: function( node ) {
return this.hasNodeName( node, "#comment" );
+ },
+
+ // Applies a data-parsoid JSON structure to the document.
+ // Removes the generated ids from each elements,
+ // and adds back the data-parsoid attributes.
+ applyDataParsoid: function ( document, dp ) {
+ Object.keys( dp.ids ).forEach(function ( key ) {
+ var el = document.getElementById( key );
+ if ( el ) {
+ this.setJSONAttribute( el, 'data-parsoid',
dp.ids[key] );
+ if ( /^mw[\w-]{2,}$/.test( key ) ) {
+ el.removeAttribute( 'id' );
+ }
+ }
+ }.bind( this ));
+ },
+
+ // Removes the data-parsoid attribute from a node,
+ // and migrates the data to the document's JSON store.
+ // Generates a unique id with the following format:
+ // mw<base64-encoded counter>
+ // but attempts to keep user defined ids.
+ storeDataParsoid: function ( node, dp ) {
+ var uid = node.id;
+ var document = node.ownerDocument;
+ if ( !uid ) {
+ do {
+ document.data.parsoid.counter += 1;
+ uid = "mw" + JSUtils.counterToBase64(
document.data.parsoid.counter );
+ } while ( document.getElementById( uid ) );
+ node.setAttribute( "id", uid );
+ }
+ document.data.parsoid.ids[uid] = dp;
+ delete node.data.parsoid;
}
};
diff --git a/js/lib/mediawiki.ParsoidConfig.js
b/js/lib/mediawiki.ParsoidConfig.js
index 8606f01..59cce1d 100644
--- a/js/lib/mediawiki.ParsoidConfig.js
+++ b/js/lib/mediawiki.ParsoidConfig.js
@@ -149,6 +149,11 @@
*/
ParsoidConfig.prototype.version = 0;
+/**
+ * @property {boolean} storeDataParsoid
+ */
+ParsoidConfig.prototype.storeDataParsoid = false;
+
if (typeof module === "object") {
module.exports.ParsoidConfig = ParsoidConfig;
}
diff --git a/js/tests/parse.js b/js/tests/parse.js
index c430b43..b5405c7 100755
--- a/js/tests/parse.js
+++ b/js/tests/parse.js
@@ -184,6 +184,16 @@
description: 'List of valid extensions - of form
foo,bar,baz',
'boolean': false,
'default': ''
+ },
+ 'dp': {
+ description: 'Output data-parsoid JSON',
+ 'boolean': true,
+ 'default': false
+ },
+ 'dpin': {
+ description: 'Input data-parsoid JSON',
+ 'boolean': false,
+ 'default': ''
}
});
@@ -233,6 +243,10 @@
Util.setDebuggingFlags( env.conf.parsoid, argv );
+ if ( argv.dp ) {
+ env.conf.parsoid.storeDataParsoid = true;
+ }
+
var i, validExtensions;
if ( validExtensions !== '' ) {
@@ -277,7 +291,9 @@
if (argv.html2wt || argv.html2html) {
var doc = DU.parseHTML(input.replace(/\r/g, '')),
wt = '';
-
+ if ( argv.dpin.length > 0 ) {
+ DU.applyDataParsoid( doc, JSON.parse(
argv.dpin ) );
+ }
serializer.serializeDOM( doc.body, function ( chunk ) {
wt += chunk;
}, function () {
@@ -310,6 +326,9 @@
}
};
if (argv.wt2html) {
+ if ( argv.dp ) {
+ console.log(
JSON.stringify( document.data.parsoid ) );
+ }
if ( argv.normalize ) {
res = Util.normalizeOut
(DU.serializeNode(document.body),
@@ -320,6 +339,9 @@
finishCb(true);
} else {
res = '';
+ if ( argv.dp ) {
+ DU.applyDataParsoid(
document, document.data.parsoid );
+ }
serializer.serializeDOM(
DU.parseHTML(DU.serializeNode(document, true)).body,
function ( chunk ) {
--
To view, visit https://gerrit.wikimedia.org/r/88395
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie58620d0909be3fcd3e9dcf84efaaa16c96d556e
Gerrit-PatchSet: 11
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits