jenkins-bot has submitted this change and it was merged.

Change subject: Move data-parsoid into a JSON structure outside the DOM
......................................................................


Move data-parsoid into a JSON structure outside the DOM

 * IDs are generate with the following pattern,

     mw<base64-encoded counter>

 * Adds two flags to parse.js,

     --dp, Outputs the JSON structure

     --dpin <JSON>, Accepts the output from the above

 * Adds a configuration env.conf.parsoid.storeDataParsoid
   which is off by default to keep parserTests passing,
   but turned on with the above flags.

Bug: 52936
Change-Id: Ie58620d0909be3fcd3e9dcf84efaaa16c96d556e
---
M js/lib/dom.cleanup.js
M js/lib/domTraverser.js
M js/lib/jsutils.js
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.DOMUtils.js
M js/lib/mediawiki.ParsoidConfig.js
M js/tests/parse.js
7 files changed, 107 insertions(+), 13 deletions(-)

Approvals:
  GWicke: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/js/lib/dom.cleanup.js b/js/lib/dom.cleanup.js
index e023b31..c192a36 100644
--- a/js/lib/dom.cleanup.js
+++ b/js/lib/dom.cleanup.js
@@ -29,7 +29,7 @@
 /**
  * Perform some final cleaup and save data-parsoid attributes on each node.
  */
-function cleanupAndSaveDataParsoid( node ) {
+function cleanupAndSaveDataParsoid( env, node ) {
        if ( DU.isElt(node) && node.data ) {
                var dp = node.data.parsoid;
                if (dp) {
@@ -77,6 +77,10 @@
                        if (dp.fostered && dp.dsr && 
!DU.isFirstEncapsulationWrapperNode(node)) {
                                dp.dsr[0] = dp.dsr[1];
                        }
+
+                       if ( env.conf.parsoid.storeDataParsoid ) {
+                               DU.storeDataParsoid( node, dp );
+                       }
                }
                DU.saveDataAttribs( node );
        }
diff --git a/js/lib/domTraverser.js b/js/lib/domTraverser.js
index 815e1d5..be3adae 100644
--- a/js/lib/domTraverser.js
+++ b/js/lib/domTraverser.js
@@ -70,13 +70,13 @@
  *   - continue regular processing on current node
  */
 DOMTraverser.prototype.traverse = function ( node ) {
+       var result, workNode;
        if (node.nodeType === node.DOCUMENT_NODE) {
                // skip to body
-               node = node.body;
+               workNode = node.body;
+       } else {
+               workNode = node.firstChild;
        }
-
-       var workNode = node.firstChild;
-       var result;
 
        while ( workNode !== null ) {
                // Call the handlers on this workNode
diff --git a/js/lib/jsutils.js b/js/lib/jsutils.js
index 51459df..0a6334f 100644
--- a/js/lib/jsutils.js
+++ b/js/lib/jsutils.js
@@ -47,7 +47,25 @@
 
                        this.deepFreeze(prop); // Recursively call deepFreeze.
                }
+       },
+
+       // Convert a counter to a Base64 encoded string.
+       // Padding is stripped. \,+ are replaced with _,- respectively.
+       // Warning: Max integer is 2^31 - 1 for bitwise operations.
+       counterToBase64: function ( n ) {
+               /* jshint bitwise: false */
+               var arr = [];
+               do {
+                       arr.unshift( n & 0xff );
+                       n >>= 8;
+               } while ( n > 0 );
+               return ( new Buffer( arr ) )
+                       .toString( "base64" )
+                       .replace( /=/g, "" )
+                       .replace( /\//g, "_" )
+                       .replace( /\+/g, "-" );
        }
+
 };
 
 if (typeof module === "object") {
diff --git a/js/lib/mediawiki.DOMPostProcessor.js 
b/js/lib/mediawiki.DOMPostProcessor.js
index 4cd14de..ca3293b 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -82,8 +82,8 @@
 }
 
 /**
- * Migrate data-parsoid attributes into a property on each DOM node. We'll
- * migrate them back in the final DOM traversal.
+ * Migrate data-parsoid attributes into a property on each DOM node.
+ * We may migrate them back in the final DOM traversal.
  *
  * Various mw metas are converted to comments before the tree build to
  * avoid fostering. Piggy-backing the reconversion here to avoid excess
@@ -91,6 +91,10 @@
  */
 function prepareDOM( node ) {
        DU.loadDataParsoid( node );
+
+       if ( DU.isElt( node ) ) {
+               node.removeAttribute( "data-parsoid" );
+       }
 
        if ( DU.isComment( node ) && /^\{[^]+\}$/.test( node.data ) ) {
 
@@ -181,7 +185,7 @@
        domVisitor2.addHandler( 'td', 
tableFixer.reparseTemplatedAttributes.bind( tableFixer, env ) );
        domVisitor2.addHandler( 'th', 
tableFixer.reparseTemplatedAttributes.bind( tableFixer, env ) );
        // 4. Save data.parsoid into data-parsoid html attribute.
-       domVisitor2.addHandler( null, cleanupAndSaveDataParsoid );
+       domVisitor2.addHandler( null, cleanupAndSaveDataParsoid.bind( null, env 
) );
        this.processors.push(domVisitor2.traverse.bind(domVisitor2));
 }
 
@@ -202,6 +206,16 @@
                console.warn("--------------------------------");
        }
 
+       // holder for data-parsoid
+       if ( psd.storeDataParsoid ) {
+               document.data = {
+                       parsoid: {
+                               counter: -1,
+                               ids: {}
+                       }
+               };
+       }
+
        for (var i = 0; i < this.processors.length; i++) {
                try {
                        this.processors[i](document, this.env, this.options);
@@ -209,10 +223,6 @@
                        env.errCB(e);
                }
        }
-
-       // DOMTraverser only processes document.body.childNodes
-       document.body.data.parsoid.tmp = undefined;
-       DU.saveDataAttribs(document.body);
 
        // add <head> element if it was missing
        if (!document.head) {
diff --git a/js/lib/mediawiki.DOMUtils.js b/js/lib/mediawiki.DOMUtils.js
index 0b5a9d3..af47ecb 100644
--- a/js/lib/mediawiki.DOMUtils.js
+++ b/js/lib/mediawiki.DOMUtils.js
@@ -8,6 +8,7 @@
 var domino = require( './domino' ),
        entities = require( 'entities' ),
        Util = require('./mediawiki.Util.js').Util,
+       JSUtils = require('./jsutils').JSUtils,
        Consts = require('./mediawiki.wikitext.constants.js').WikitextConstants,
        pd = require('./mediawiki.parser.defines.js'),
        XMLSerializer = require('./XMLSerializer');
@@ -1489,6 +1490,40 @@
 
        isComment: function( node ) {
                return this.hasNodeName( node, "#comment" );
+       },
+
+       // Applies a data-parsoid JSON structure to the document.
+       // Removes the generated ids from each elements,
+       // and adds back the data-parsoid attributes.
+       applyDataParsoid: function ( document, dp ) {
+               Object.keys( dp.ids ).forEach(function ( key ) {
+                       var el = document.getElementById( key );
+                       if ( el ) {
+                               this.setJSONAttribute( el, 'data-parsoid', 
dp.ids[key] );
+                               if ( /^mw[\w-]{2,}$/.test( key ) ) {
+                                       el.removeAttribute( 'id' );
+                               }
+                       }
+               }.bind( this ));
+       },
+
+       // Removes the data-parsoid attribute from a node,
+       // and migrates the data to the document's JSON store.
+       // Generates a unique id with the following format:
+       //   mw<base64-encoded counter>
+       // but attempts to keep user defined ids.
+       storeDataParsoid: function ( node, dp ) {
+               var uid = node.id;
+               var document = node.ownerDocument;
+               if ( !uid ) {
+                       do {
+                               document.data.parsoid.counter += 1;
+                               uid = "mw" + JSUtils.counterToBase64( 
document.data.parsoid.counter );
+                       } while ( document.getElementById( uid ) );
+                       node.setAttribute( "id", uid );
+               }
+               document.data.parsoid.ids[uid] = dp;
+               delete node.data.parsoid;
        }
 
 };
diff --git a/js/lib/mediawiki.ParsoidConfig.js 
b/js/lib/mediawiki.ParsoidConfig.js
index 8606f01..59cce1d 100644
--- a/js/lib/mediawiki.ParsoidConfig.js
+++ b/js/lib/mediawiki.ParsoidConfig.js
@@ -149,6 +149,11 @@
  */
 ParsoidConfig.prototype.version = 0;
 
+/**
+ * @property {boolean} storeDataParsoid
+ */
+ParsoidConfig.prototype.storeDataParsoid = false;
+
 if (typeof module === "object") {
        module.exports.ParsoidConfig = ParsoidConfig;
 }
diff --git a/js/tests/parse.js b/js/tests/parse.js
index c430b43..b5405c7 100755
--- a/js/tests/parse.js
+++ b/js/tests/parse.js
@@ -184,6 +184,16 @@
                        description: 'List of valid extensions - of form 
foo,bar,baz',
                        'boolean': false,
                        'default': ''
+               },
+               'dp': {
+                       description: 'Output data-parsoid JSON',
+                       'boolean': true,
+                       'default': false
+               },
+               'dpin': {
+                       description: 'Input data-parsoid JSON',
+                       'boolean': false,
+                       'default': ''
                }
        });
 
@@ -233,6 +243,10 @@
 
                Util.setDebuggingFlags( env.conf.parsoid, argv );
 
+               if ( argv.dp ) {
+                       env.conf.parsoid.storeDataParsoid = true;
+               }
+
                var i, validExtensions;
 
                if ( validExtensions !== '' ) {
@@ -277,7 +291,9 @@
             if (argv.html2wt || argv.html2html) {
                 var doc = DU.parseHTML(input.replace(/\r/g, '')),
                     wt = '';
-
+                               if ( argv.dpin.length > 0 ) {
+                                       DU.applyDataParsoid( doc, JSON.parse( 
argv.dpin ) );
+                               }
                 serializer.serializeDOM( doc.body, function ( chunk ) {
                     wt += chunk;
                 }, function () {
@@ -310,6 +326,9 @@
                         }
                     };
                     if (argv.wt2html) {
+                                               if ( argv.dp ) {
+                                                       console.log( 
JSON.stringify( document.data.parsoid ) );
+                                               }
                                                if ( argv.normalize ) {
                                                        res = Util.normalizeOut
                                                                
(DU.serializeNode(document.body),
@@ -320,6 +339,9 @@
                         finishCb(true);
                     } else {
                         res = '';
+                                               if ( argv.dp ) {
+                                                       DU.applyDataParsoid( 
document, document.data.parsoid );
+                                               }
                         serializer.serializeDOM(
                                                        
DU.parseHTML(DU.serializeNode(document, true)).body,
                                                        function ( chunk ) {

-- 
To view, visit https://gerrit.wikimedia.org/r/88395
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie58620d0909be3fcd3e9dcf84efaaa16c96d556e
Gerrit-PatchSet: 11
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to