[MediaWiki-commits] [Gerrit] mediawiki...cxserver[master]: Remove parsoid generated section tags if any from the parsed...
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/383329 ) Change subject: Remove parsoid generated section tags if any from the parsed page content .. Remove parsoid generated section tags if any from the parsed page content Makes cxserver content parser ready to support T114072 - Section wrapping for MediaWiki sections. CX will do its own section wrapping based on translatable section concept. Added tests and did some code cleanup for LinearDoc.test.js Bug: T177752 Change-Id: I743ca44b1f431e9a42a7f907922b8fe6048b6081 --- M lib/lineardoc/Builder.js M test/lineardoc/LinearDoc.test.js A test/lineardoc/data/test6-result.xhtml A test/lineardoc/data/test6-result.xml A test/lineardoc/data/test6.xhtml 5 files changed, 130 insertions(+), 27 deletions(-) Approvals: jenkins-bot: Verified Nikerabbit: Looks good to me, approved diff --git a/lib/lineardoc/Builder.js b/lib/lineardoc/Builder.js index b8fc92d..33cacf0 100644 --- a/lib/lineardoc/Builder.js +++ b/lib/lineardoc/Builder.js @@ -32,7 +32,14 @@ Builder.prototype.pushBlockTag = function ( tag ) { this.finishTextBlock(); this.blockTags.push( tag ); + if ( this.isIgnoredTag( tag ) ) { + return; + } this.doc.addItem( 'open', tag ); +}; + +Builder.prototype.isIgnoredTag = function ( tag ) { + return tag.name === 'section' && tag.attributes[ 'data-section-number' ]; }; Builder.prototype.popBlockTag = function ( tagName ) { @@ -43,7 +50,11 @@ ); } this.finishTextBlock(); - this.doc.addItem( 'close', tag ); + + if ( !this.isIgnoredTag( tag ) ) { + this.doc.addItem( 'close', tag ); + } + return tag; }; diff --git a/test/lineardoc/LinearDoc.test.js b/test/lineardoc/LinearDoc.test.js index 3907fc1..f93d5d0 100644 --- a/test/lineardoc/LinearDoc.test.js +++ b/test/lineardoc/LinearDoc.test.js @@ -1,54 +1,57 @@ 'use strict'; -var assert = require( '../utils/assert.js' ), +const assert = require( '../utils/assert' ), LinearDoc = require( '../../lib/lineardoc' ), fs = require( 'fs' ), transTests = require( __dirname + '/translate.test.json' ); -describe( 'LinearDoc', function () { - it( 'should be possible to linearise all kind of HTML inputs', function () { - var parser, testXhtmlFile, resultXmlFile, resultXhtmlFile, testXhtml, resultXml, - resultXhtml, i, - numTests = 5; - for ( i = 1; i <= numTests; i++ ) { - testXhtmlFile = __dirname + '/data/test' + i + '.xhtml'; - resultXmlFile = __dirname + '/data/test' + i + '-result.xml'; - resultXhtmlFile = __dirname + '/data/test' + i + '-result.xhtml'; +function normalize( html ) { + const normalizer = new LinearDoc.Normalizer(); + normalizer.init(); + normalizer.write( html.replace( /(\r\n|\n|\t|\r)/gm, '' ) ); + return normalizer.getHtml(); +} - testXhtml = fs.readFileSync( testXhtmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); - resultXml = fs.readFileSync( resultXmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); - resultXhtml = fs.readFileSync( resultXhtmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); - parser = new LinearDoc.Parser( new LinearDoc.MwContextualizer() ); +describe( 'LinearDoc', () => { + it( 'should be possible to linearise all kind of HTML inputs', () => { + const numTests = 6; + for ( let i = 1; i <= numTests; i++ ) { + const testXhtmlFile = __dirname + '/data/test' + i + '.xhtml'; + const resultXmlFile = __dirname + '/data/test' + i + '-result.xml'; + const resultXhtmlFile = __dirname + '/data/test' + i + '-result.xhtml'; + + const testXhtml = fs.readFileSync( testXhtmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); + const expectedXml = fs.readFileSync( resultXmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); + const expectedXhtml = fs.readFileSync( resultXhtmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); + const parser = new LinearDoc.Parser( new LinearDoc.MwContextualizer() ); parser.init(); parser.write( testXhtml ); assert.deepEqual( - parser.builder.doc.dumpXml(), - resultXml, + normalize( parser.builder.doc.dumpXml() ), + normalize( expectedXml ), 'Linearised structure' ); assert.deepEqual( -
[MediaWiki-commits] [Gerrit] mediawiki...cxserver[master]: Remove parsoid generated section tags if any from the parsed...
Santhosh has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/383329 ) Change subject: Remove parsoid generated section tags if any from the parsed page content .. Remove parsoid generated section tags if any from the parsed page content Makes cxserver content parser ready to support T114072 - Section wrapping for MediaWiki sections. CX Will do its own section wrapping based on translatable section concept. Added tests and did some code cleanup for LinearDoc.test.js Bug: T177752 Change-Id: I743ca44b1f431e9a42a7f907922b8fe6048b6081 --- M lib/lineardoc/Builder.js M test/lineardoc/LinearDoc.test.js A test/lineardoc/data/test6-result.xhtml A test/lineardoc/data/test6-result.xml A test/lineardoc/data/test6.xhtml 5 files changed, 101 insertions(+), 27 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver refs/changes/29/383329/1 diff --git a/lib/lineardoc/Builder.js b/lib/lineardoc/Builder.js index b8fc92d..36e9a67 100644 --- a/lib/lineardoc/Builder.js +++ b/lib/lineardoc/Builder.js @@ -32,7 +32,14 @@ Builder.prototype.pushBlockTag = function ( tag ) { this.finishTextBlock(); this.blockTags.push( tag ); + if ( this.isParsoidSection( tag ) ) { + return; + } this.doc.addItem( 'open', tag ); +}; + +Builder.prototype.isParsoidSection = function ( tag ) { + return tag.name === 'section' && tag.attributes[ 'data-section-number' ]; }; Builder.prototype.popBlockTag = function ( tagName ) { @@ -43,7 +50,11 @@ ); } this.finishTextBlock(); - this.doc.addItem( 'close', tag ); + + if ( !this.isParsoidSection( tag ) ) { + this.doc.addItem( 'close', tag ); + } + return tag; }; diff --git a/test/lineardoc/LinearDoc.test.js b/test/lineardoc/LinearDoc.test.js index 3907fc1..e2bc9ce 100644 --- a/test/lineardoc/LinearDoc.test.js +++ b/test/lineardoc/LinearDoc.test.js @@ -1,54 +1,57 @@ 'use strict'; -var assert = require( '../utils/assert.js' ), +const assert = require( '../utils/assert' ), LinearDoc = require( '../../lib/lineardoc' ), fs = require( 'fs' ), transTests = require( __dirname + '/translate.test.json' ); -describe( 'LinearDoc', function () { - it( 'should be possible to linearise all kind of HTML inputs', function () { - var parser, testXhtmlFile, resultXmlFile, resultXhtmlFile, testXhtml, resultXml, - resultXhtml, i, - numTests = 5; - for ( i = 1; i <= numTests; i++ ) { - testXhtmlFile = __dirname + '/data/test' + i + '.xhtml'; - resultXmlFile = __dirname + '/data/test' + i + '-result.xml'; - resultXhtmlFile = __dirname + '/data/test' + i + '-result.xhtml'; +function normalize( html ) { + const normalizer = new LinearDoc.Normalizer(); + normalizer.init(); + normalizer.write( html.replace( /(\r\n|\n|\t|\r)/gm, '' ) ); + return normalizer.getHtml(); +} - testXhtml = fs.readFileSync( testXhtmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); - resultXml = fs.readFileSync( resultXmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); - resultXhtml = fs.readFileSync( resultXhtmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); - parser = new LinearDoc.Parser( new LinearDoc.MwContextualizer() ); +describe( 'LinearDoc', () => { + it( 'should be possible to linearise all kind of HTML inputs', () => { + const numTests = 6; + for ( let i = 1; i <= numTests; i++ ) { + const testXhtmlFile = __dirname + '/data/test' + i + '.xhtml'; + const resultXmlFile = __dirname + '/data/test' + i + '-result.xml'; + const resultXhtmlFile = __dirname + '/data/test' + i + '-result.xhtml'; + + const testXhtml = fs.readFileSync( testXhtmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); + const resultXml = fs.readFileSync( resultXmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); + const resultXhtml = fs.readFileSync( resultXhtmlFile, 'utf8' ).replace( /^\s+|\s+$/, '' ); + const parser = new LinearDoc.Parser( new LinearDoc.MwContextualizer() ); parser.init(); parser.write( testXhtml ); assert.deepEqual( - parser.builder.doc.dumpXml(), - resultXml, + normalize( parser.builder.doc.dumpXml() ), + normalize( resultXml ), 'Linearised structure' ); assert.deepEqual( -