Esanders has uploaded a new change for review. https://gerrit.wikimedia.org/r/92885
Change subject: Store inner whitespace of the body and compare it on conversion ...................................................................... Store inner whitespace of the body and compare it on conversion Calculate and store the two inner whitespace values of the body in the dm.Document. When converting back, make sure the first/last nodes pre/post outer whitespace matches the inner left/right whitespace of the body. Bug: 54964 Change-Id: I45f1ffd63669f25a6cae878400bfe21719ed58ee --- M modules/ve-mw/init/targets/ve.init.mw.ViewPageTarget.js M modules/ve/dm/ve.dm.Converter.js M modules/ve/dm/ve.dm.Document.js M modules/ve/test/dm/ve.dm.example.js M modules/ve/test/ve.test.utils.js 5 files changed, 86 insertions(+), 31 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/VisualEditor refs/changes/85/92885/1 diff --git a/modules/ve-mw/init/targets/ve.init.mw.ViewPageTarget.js b/modules/ve-mw/init/targets/ve.init.mw.ViewPageTarget.js index 0a59927..b775daa 100644 --- a/modules/ve-mw/init/targets/ve.init.mw.ViewPageTarget.js +++ b/modules/ve-mw/init/targets/ve.init.mw.ViewPageTarget.js @@ -736,11 +736,11 @@ if ( this.pageExists ) { // Has no callback, handled via target.onShowChanges this.showChanges( - ve.dm.converter.getDomFromData( doc.getFullData(), doc.getStore(), doc.getInternalList() ) + ve.dm.converter.getDomFromData( doc.getFullData(), doc.getStore(), doc.getInternalList(), doc.getInnerWhitespace() ) ); } else { this.serialize( - ve.dm.converter.getDomFromData( doc.getFullData(), doc.getStore(), doc.getInternalList() ), + ve.dm.converter.getDomFromData( doc.getFullData(), doc.getStore(), doc.getInternalList(), doc.getInnerWhitespace() ), ve.bind( this.onSerialize, this ) ); } @@ -786,7 +786,7 @@ this.saveDialog.saveButton.setDisabled( true ); this.saveDialog.$loadingIcon.show(); this.save( - ve.dm.converter.getDomFromData( doc.getFullData(), doc.getStore(), doc.getInternalList() ), + ve.dm.converter.getDomFromData( doc.getFullData(), doc.getStore(), doc.getInternalList(), doc.getInnerWhitespace() ), saveOptions ); } @@ -856,10 +856,11 @@ // Build linmod var store = new ve.dm.IndexValueStore(), internalList = new ve.dm.InternalList(), - data = ve.dm.converter.getDataFromDom( doc, store, internalList ); + innerWhitespace = new Array( 2 ), + data = ve.dm.converter.getDataFromDom( doc, store, internalList, innerWhitespace ); setTimeout( function () { // Build DM tree - var dmDoc = new ve.dm.Document( data, doc, undefined, internalList ); + var dmDoc = new ve.dm.Document( data, doc, undefined, internalList, innerWhitespace ); setTimeout( function () { // Create ui.Surface (also creates ce.Surface and dm.Surface and builds CE tree) target.surface = new ve.ui.Surface( dmDoc, target.surfaceOptions ); @@ -929,8 +930,8 @@ // <body> were ignored in the conversion. So compare each child separately. var i, len = oldDom.body.childNodes.length, - newDoc = new ve.dm.Document( data, oldDom, undefined, doc.getInternalList() ), - newDom = ve.dm.converter.getDomFromData( newDoc.getFullData(), newDoc.getStore(), newDoc.getInternalList() ); + newDoc = new ve.dm.Document( data, oldDom, undefined, doc.getInternalList(), doc.getInnerWhitespace() ), + newDom = ve.dm.converter.getDomFromData( newDoc.getFullData(), newDoc.getStore(), newDoc.getInternalList(), newDoc.getInnerWhitespace() ); // Explicitly unlink our full copy of the original version of the document data data = undefined; diff --git a/modules/ve/dm/ve.dm.Converter.js b/modules/ve/dm/ve.dm.Converter.js index c031de6..de792b9 100644 --- a/modules/ve/dm/ve.dm.Converter.js +++ b/modules/ve/dm/ve.dm.Converter.js @@ -385,9 +385,10 @@ * @param {HTMLDocument} doc HTML document to convert * @param {ve.dm.IndexValueStore} store Index-value store * @param {ve.dm.InternalList} internalList Internal list + * @param {Array} innerWhitespace Inner whitespace * @returns {ve.dm.FlatLinearData} Linear model data */ -ve.dm.Converter.prototype.getDataFromDom = function ( doc, store, internalList ) { +ve.dm.Converter.prototype.getDataFromDom = function ( doc, store, internalList, innerWhitespace ) { var linearData, refData; // Set up the converter state @@ -403,6 +404,8 @@ ); refData = this.internalList.convertToData( this, doc ); linearData.batchSplice( linearData.getLength(), 0, refData ); + + this.setInnerWhitespace( innerWhitespace, linearData ); // Clear the state this.doc = null; @@ -977,6 +980,39 @@ }; /** + * Set inner whitespace from linear data + * + * @param {Array} innerWhitespace Inner whitespace + * @param {ve.dm.FlatLinearData} data Linear model data + */ +ve.dm.Converter.prototype.setInnerWhitespace = function ( innerWhitespace, data ) { + var whitespace, + stack = 0, + last = data.getLength() - 1; + + if ( data.isOpenElementData( 0 ) ) { + whitespace = ve.getProp( data.getData( 0 ), 'internal', 'whitespace' ); + innerWhitespace[0] = whitespace ? whitespace[0] : undefined; + } + if ( data.isCloseElementData( last ) ) { + // Find matching opening tag of the last close tag + stack++; + while ( --last ) { + if ( data.isCloseElementData( last ) ) { + stack++; + } else if ( data.isOpenElementData( last ) ) { + stack--; + if ( stack === 0 && data.getType( last ) !== 'internalList' ) { + break; + } + } + } + whitespace = ve.getProp( data.getData( last ), 'internal', 'whitespace' ); + innerWhitespace[1] = whitespace ? whitespace[3] : undefined; + } +}; + +/** * Check if all the domElements provided are metadata or whitespace. * * A list of model names to exclude when matching can optionally be passed. @@ -1031,9 +1067,10 @@ * @param {Array} documentData Linear model data * @param {ve.dm.IndexValueStore} store Index-value store * @param {ve.dm.InternalList} internalList Internal list + * @param {Array} innerWhitespace Inner whitespace * @returns {HTMLDocument} Document containing the resulting HTML */ -ve.dm.Converter.prototype.getDomFromData = function ( documentData, store, internalList ) { +ve.dm.Converter.prototype.getDomFromData = function ( documentData, store, internalList, innerWhitespace ) { var doc = ve.createDocumentFromHtml( '' ); // Set up the converter state @@ -1041,7 +1078,7 @@ this.store = store; this.internalList = internalList; - this.getDomSubtreeFromData( documentData, doc.body ); + this.getDomSubtreeFromData( documentData, doc.body, innerWhitespace ); // Clear the state this.documentData = null; @@ -1056,9 +1093,10 @@ * * @param {Array} data Linear model data * @param {HTMLElement} container DOM element to add the generated elements to. Should be empty. + * @param {Array} [innerWhitespace] Inner whitespace if the container is the body * @throws Unbalanced data: looking for closing /type */ -ve.dm.Converter.prototype.getDomSubtreeFromData = function ( data, container ) { +ve.dm.Converter.prototype.getDomSubtreeFromData = function ( data, container, innerWhitespace ) { var text, i, j, isStart, annotations, dataElement, dataElementOrSlice, childDomElements, pre, ours, theirs, parentDomElement, lastChild, isContentNode, sibling, previousSiblings, doUnwrap, textNode, type, annotatedDomElementStack, annotatedDomElements, @@ -1440,9 +1478,8 @@ // Get previous sibling's outerPost theirs = parentDomElement.lastOuterPost; } else if ( parentDomElement === container ) { - // outerPre of the very first node in the document, this one - // has no duplicate - theirs = ours; + // outerPre of the very first node in the document, check against body innerWhitespace + theirs = innerWhitespace[0]; } else { // First child, get parent's innerPre if ( @@ -1473,8 +1510,8 @@ } } } - // Process the outerPost whitespace of the very last node - if ( container.lastOuterPost !== undefined ) { + // Check outerPost whitespace of the very last node against body innerWhitespace + if ( container.lastOuterPost !== undefined && container.lastOuterPost === innerWhitespace[1] ) { if ( container.lastChild && container.lastChild.nodeType === Node.TEXT_NODE ) { // Last child is a TextNode, append to it container.lastChild.appendData( container.lastOuterPost ); diff --git a/modules/ve/dm/ve.dm.Document.js b/modules/ve/dm/ve.dm.Document.js index 4b4554c..de4c636 100644 --- a/modules/ve/dm/ve.dm.Document.js +++ b/modules/ve/dm/ve.dm.Document.js @@ -23,8 +23,9 @@ * ignored. * @param {ve.dm.Document} [parentDocument] Document to use as root for created nodes * @param {ve.dm.InternalList} [internalList] Internal list to clone; passed when creating a document slice + * @param {Array} [innerWhitespace] Inner whitespace to clone; passed when creating a document slice */ -ve.dm.Document = function VeDmDocument( data, htmlDocument, parentDocument, internalList ) { +ve.dm.Document = function VeDmDocument( data, htmlDocument, parentDocument, internalList, innerWhitespace ) { // Parent constructor ve.Document.call( this, new ve.dm.DocumentNode() ); @@ -37,6 +38,7 @@ this.documentNode.setRoot( root ); this.documentNode.setDocument( doc ); this.internalList = internalList ? internalList.clone( this ) : new ve.dm.InternalList( this ); + this.innerWhitespace = innerWhitespace ? ve.copy( innerWhitespace ) : new Array( 2 ); // Properties this.parentDocument = parentDocument; @@ -51,7 +53,7 @@ fullData = data; } else if ( !ve.isArray( data ) && typeof data === 'object' ) { // HTMLDocument - fullData = ve.dm.converter.getDataFromDom( data, new ve.dm.IndexValueStore(), this.getInternalList() ); + fullData = ve.dm.converter.getDataFromDom( data, new ve.dm.IndexValueStore(), this.getInternalList(), this.getInnerWhitespace() ); htmlDocument = data; } else { // Raw linear model data @@ -353,6 +355,14 @@ }; /** + * Get the document's inner whitespace + * @returns {Array} The document's inner whitespace + */ +ve.dm.Document.prototype.getInnerWhitespace = function () { + return this.innerWhitespace; +}; + +/** * Clone a sub-document from a range in this document. The new document's store and internal list will be * clones of the ones in this document. * diff --git a/modules/ve/test/dm/ve.dm.example.js b/modules/ve/test/dm/ve.dm.example.js index c2831af..1f25506 100644 --- a/modules/ve/test/dm/ve.dm.example.js +++ b/modules/ve/test/dm/ve.dm.example.js @@ -1787,7 +1787,8 @@ { 'type': '/list' }, { 'type': 'internalList' }, { 'type': '/internalList' } - ] + ], + 'innerWhitespace': [ '\n', '\t\n\t\n' ] }, 'outer whitespace preservation in a list with bare text and a sublist': { 'body': '<ul>\n<li>\n\nBa re\n\n\n<ul>\n\n\n\n<li> <p> P </p> </li>\t</ul>\t\t</li>\t\t\t</ul>', @@ -1855,7 +1856,8 @@ { 'type': '/paragraph' }, { 'type': 'internalList' }, { 'type': '/internalList' } - ] + ], + 'innerWhitespace': [ undefined, ' ' ] }, 'whitespace preservation with non-edge content whitespace with nested annotations': { 'body': '<p> A B <b> C\t<i>\t\tD\t\t\t</i>\t\t\t\tE\n</b>\n\nF\n\n\n</p>', @@ -2026,7 +2028,8 @@ { 'type': '/alienBlock' }, { 'type': 'internalList' }, { 'type': '/internalList' } - ] + ], + 'innerWhitespace': [ ' ', ' \n ' ] }, 'whitespace preservation not triggered inside <pre>': { 'body': '\n<pre>\n\n\nFoo\n\n\nBar\n\n\n\n</pre>\n\n\n\n\n', @@ -2050,7 +2053,8 @@ { 'type': '/preformatted' }, { 'type': 'internalList' }, { 'type': '/internalList' } - ] + ], + 'innerWhitespace': [ '\n', '\n\n\n\n\n' ] }, 'whitespace preservation in table cell starting with text and ending with annotation': { 'body': '<table><tbody><tr><td>Foo <b>Bar</b></td></tr></tbody></table>', @@ -2343,7 +2347,8 @@ { 'type': 'internalList' }, { 'type': '/internalList' } ], - 'normalizedBody': ' <ul><li><p>\tA\n</p> <p>B</p></li></ul> ' + 'innerWhitespace': [ '\t', '\n' ], + 'normalizedBody': '<ul><li><p>\tA\n</p> <p>B</p></li></ul>' }, 'order of nested annotations is preserved': { 'body': '<p><b><u><i>Foo</i></u></b></p>', @@ -2556,7 +2561,8 @@ { 'type': '/alienBlock' }, { 'type': 'internalList' }, { 'type': '/internalList' } - ] + ], + 'innerWhitespace': [ ' ', ' ' ] }, 'block node inside annotation node is alienated': { 'body': '<span>\n<p>Bar</p></span>', diff --git a/modules/ve/test/ve.test.utils.js b/modules/ve/test/ve.test.utils.js index 6f94ea2..f29de89 100644 --- a/modules/ve/test/ve.test.utils.js +++ b/modules/ve/test/ve.test.utils.js @@ -48,7 +48,7 @@ }; ve.test.utils.runGetDataFromDomTests = function( assert, cases ) { - var msg, doc, store, internalList, i, length, hash, data, html, n = 0; + var msg, doc, store, i, length, hash, data, html, n = 0; // TODO: this is a hack to make normal heading/preformatted // nodes the most recently registered, instead of the MW versions @@ -57,7 +57,7 @@ for ( msg in cases ) { if ( cases[msg].head !== undefined || cases[msg].body !== undefined ) { - n++; + n += 2; if ( cases[msg].storeItems ) { n += cases[msg].storeItems.length; } @@ -69,14 +69,14 @@ if ( cases[msg].head !== undefined || cases[msg].body !== undefined ) { doc = new ve.dm.Document( [] ); store = doc.getStore(); - internalList = doc.getInternalList(); html = '<head>' + ( cases[msg].head || '' ) + '</head><body>' + cases[msg].body + '</body>'; data = ve.dm.converter.getDataFromDom( - ve.createDocumentFromHtml( html ), store, internalList - ).getData(); + ve.createDocumentFromHtml( html ), store, doc.getInternalList(), doc.getInnerWhitespace() + ); ve.dm.example.preprocessAnnotations( cases[msg].data, store ); - assert.deepEqualWithDomElements( data, cases[msg].data, msg ); + assert.deepEqualWithDomElements( data.getData(), cases[msg].data, msg + ': data' ); + assert.deepEqual( doc.getInnerWhitespace(), cases[msg].innerWhitespace || new Array( 2 ), msg + ': inner whitespace' ); // check storeItems have been added to store if ( cases[msg].storeItems ) { for ( i = 0, length = cases[msg].storeItems.length; i < length; i++ ) { @@ -112,10 +112,11 @@ cases[msg].modify( cases[msg].data ); } doc = new ve.dm.Document( ve.dm.example.preprocessAnnotations( cases[msg].data, store ) ); + doc.innerWhitespace = cases[msg].innerWhitespace ? ve.copy( cases[msg].innerWhitespace ) : new Array( 2 ); originalData = ve.copy( doc.getFullData() ); html = '<body>' + ( cases[msg].normalizedBody || cases[msg].body ) + '</body>'; assert.equalDomElement( - ve.dm.converter.getDomFromData( doc.getFullData(), doc.getStore(), doc.getInternalList() ), + ve.dm.converter.getDomFromData( doc.getFullData(), doc.getStore(), doc.getInternalList(), doc.getInnerWhitespace() ), ve.createDocumentFromHtml( html ), msg ); -- To view, visit https://gerrit.wikimedia.org/r/92885 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I45f1ffd63669f25a6cae878400bfe21719ed58ee Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/VisualEditor Gerrit-Branch: master Gerrit-Owner: Esanders <esand...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits