[MediaWiki-commits] [Gerrit] mediawiki...cxserver[master]: Remove parsoid generated section tags if any from the parsed...

2017-10-16 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/383329 )

Change subject: Remove parsoid generated section tags if any from the parsed 
page content
..


Remove parsoid generated section tags if any from the parsed page content

Makes cxserver content parser ready to support T114072 - Section
wrapping for MediaWiki sections. CX will do its own section wrapping
based on translatable section concept.

Added tests and did some code cleanup for LinearDoc.test.js

Bug: T177752
Change-Id: I743ca44b1f431e9a42a7f907922b8fe6048b6081
---
M lib/lineardoc/Builder.js
M test/lineardoc/LinearDoc.test.js
A test/lineardoc/data/test6-result.xhtml
A test/lineardoc/data/test6-result.xml
A test/lineardoc/data/test6.xhtml
5 files changed, 130 insertions(+), 27 deletions(-)

Approvals:
  jenkins-bot: Verified
  Nikerabbit: Looks good to me, approved



diff --git a/lib/lineardoc/Builder.js b/lib/lineardoc/Builder.js
index b8fc92d..33cacf0 100644
--- a/lib/lineardoc/Builder.js
+++ b/lib/lineardoc/Builder.js
@@ -32,7 +32,14 @@
 Builder.prototype.pushBlockTag = function ( tag ) {
this.finishTextBlock();
this.blockTags.push( tag );
+   if ( this.isIgnoredTag( tag ) ) {
+   return;
+   }
this.doc.addItem( 'open', tag );
+};
+
+Builder.prototype.isIgnoredTag = function ( tag ) {
+   return tag.name === 'section' && tag.attributes[ 'data-section-number' 
];
 };
 
 Builder.prototype.popBlockTag = function ( tagName ) {
@@ -43,7 +50,11 @@
);
}
this.finishTextBlock();
-   this.doc.addItem( 'close', tag );
+
+   if ( !this.isIgnoredTag( tag ) ) {
+   this.doc.addItem( 'close', tag );
+   }
+
return tag;
 };
 
diff --git a/test/lineardoc/LinearDoc.test.js b/test/lineardoc/LinearDoc.test.js
index 3907fc1..f93d5d0 100644
--- a/test/lineardoc/LinearDoc.test.js
+++ b/test/lineardoc/LinearDoc.test.js
@@ -1,54 +1,57 @@
 'use strict';
 
-var assert = require( '../utils/assert.js' ),
+const assert = require( '../utils/assert' ),
LinearDoc = require( '../../lib/lineardoc' ),
fs = require( 'fs' ),
transTests = require( __dirname + '/translate.test.json' );
 
-describe( 'LinearDoc', function () {
-   it( 'should be possible to linearise all kind of HTML inputs', function 
() {
-   var parser, testXhtmlFile, resultXmlFile, resultXhtmlFile, 
testXhtml, resultXml,
-   resultXhtml, i,
-   numTests = 5;
-   for ( i = 1; i <= numTests; i++ ) {
-   testXhtmlFile = __dirname + '/data/test' + i + '.xhtml';
-   resultXmlFile = __dirname + '/data/test' + i + 
'-result.xml';
-   resultXhtmlFile = __dirname + '/data/test' + i + 
'-result.xhtml';
+function normalize( html ) {
+   const normalizer = new LinearDoc.Normalizer();
+   normalizer.init();
+   normalizer.write( html.replace( /(\r\n|\n|\t|\r)/gm, '' ) );
+   return normalizer.getHtml();
+}
 
-   testXhtml = fs.readFileSync( testXhtmlFile, 'utf8' 
).replace( /^\s+|\s+$/, '' );
-   resultXml = fs.readFileSync( resultXmlFile, 'utf8' 
).replace( /^\s+|\s+$/, '' );
-   resultXhtml = fs.readFileSync( resultXhtmlFile, 'utf8' 
).replace( /^\s+|\s+$/, '' );
-   parser = new LinearDoc.Parser( new 
LinearDoc.MwContextualizer() );
+describe( 'LinearDoc', () => {
+   it( 'should be possible to linearise all kind of HTML inputs', () => {
+   const numTests = 6;
+   for ( let i = 1; i <= numTests; i++ ) {
+   const testXhtmlFile = __dirname + '/data/test' + i + 
'.xhtml';
+   const resultXmlFile = __dirname + '/data/test' + i + 
'-result.xml';
+   const resultXhtmlFile = __dirname + '/data/test' + i + 
'-result.xhtml';
+
+   const testXhtml = fs.readFileSync( testXhtmlFile, 
'utf8' ).replace( /^\s+|\s+$/, '' );
+   const expectedXml = fs.readFileSync( resultXmlFile, 
'utf8' ).replace( /^\s+|\s+$/, '' );
+   const expectedXhtml = fs.readFileSync( resultXhtmlFile, 
'utf8' ).replace( /^\s+|\s+$/, '' );
+   const parser = new LinearDoc.Parser( new 
LinearDoc.MwContextualizer() );
parser.init();
parser.write( testXhtml );
assert.deepEqual(
-   parser.builder.doc.dumpXml(),
-   resultXml,
+   normalize( parser.builder.doc.dumpXml() ),
+   normalize( expectedXml ),
'Linearised structure'
);
assert.deepEqual(
-   

[MediaWiki-commits] [Gerrit] mediawiki...cxserver[master]: Remove parsoid generated section tags if any from the parsed...

2017-10-10 Thread Santhosh (Code Review)
Santhosh has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/383329 )

Change subject: Remove parsoid generated section tags if any from the parsed 
page content
..

Remove parsoid generated section tags if any from the parsed page content

Makes cxserver content parser ready to support T114072 - Section
wrapping for MediaWiki sections. CX Will do its own section wrapping
based on translatable section concept.

Added tests and did some code cleanup for LinearDoc.test.js

Bug: T177752
Change-Id: I743ca44b1f431e9a42a7f907922b8fe6048b6081
---
M lib/lineardoc/Builder.js
M test/lineardoc/LinearDoc.test.js
A test/lineardoc/data/test6-result.xhtml
A test/lineardoc/data/test6-result.xml
A test/lineardoc/data/test6.xhtml
5 files changed, 101 insertions(+), 27 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver 
refs/changes/29/383329/1

diff --git a/lib/lineardoc/Builder.js b/lib/lineardoc/Builder.js
index b8fc92d..36e9a67 100644
--- a/lib/lineardoc/Builder.js
+++ b/lib/lineardoc/Builder.js
@@ -32,7 +32,14 @@
 Builder.prototype.pushBlockTag = function ( tag ) {
this.finishTextBlock();
this.blockTags.push( tag );
+   if ( this.isParsoidSection( tag ) ) {
+   return;
+   }
this.doc.addItem( 'open', tag );
+};
+
+Builder.prototype.isParsoidSection = function ( tag ) {
+   return tag.name === 'section' && tag.attributes[ 'data-section-number' 
];
 };
 
 Builder.prototype.popBlockTag = function ( tagName ) {
@@ -43,7 +50,11 @@
);
}
this.finishTextBlock();
-   this.doc.addItem( 'close', tag );
+
+   if ( !this.isParsoidSection( tag ) ) {
+   this.doc.addItem( 'close', tag );
+   }
+
return tag;
 };
 
diff --git a/test/lineardoc/LinearDoc.test.js b/test/lineardoc/LinearDoc.test.js
index 3907fc1..e2bc9ce 100644
--- a/test/lineardoc/LinearDoc.test.js
+++ b/test/lineardoc/LinearDoc.test.js
@@ -1,54 +1,57 @@
 'use strict';
 
-var assert = require( '../utils/assert.js' ),
+const assert = require( '../utils/assert' ),
LinearDoc = require( '../../lib/lineardoc' ),
fs = require( 'fs' ),
transTests = require( __dirname + '/translate.test.json' );
 
-describe( 'LinearDoc', function () {
-   it( 'should be possible to linearise all kind of HTML inputs', function 
() {
-   var parser, testXhtmlFile, resultXmlFile, resultXhtmlFile, 
testXhtml, resultXml,
-   resultXhtml, i,
-   numTests = 5;
-   for ( i = 1; i <= numTests; i++ ) {
-   testXhtmlFile = __dirname + '/data/test' + i + '.xhtml';
-   resultXmlFile = __dirname + '/data/test' + i + 
'-result.xml';
-   resultXhtmlFile = __dirname + '/data/test' + i + 
'-result.xhtml';
+function normalize( html ) {
+   const normalizer = new LinearDoc.Normalizer();
+   normalizer.init();
+   normalizer.write( html.replace( /(\r\n|\n|\t|\r)/gm, '' ) );
+   return normalizer.getHtml();
+}
 
-   testXhtml = fs.readFileSync( testXhtmlFile, 'utf8' 
).replace( /^\s+|\s+$/, '' );
-   resultXml = fs.readFileSync( resultXmlFile, 'utf8' 
).replace( /^\s+|\s+$/, '' );
-   resultXhtml = fs.readFileSync( resultXhtmlFile, 'utf8' 
).replace( /^\s+|\s+$/, '' );
-   parser = new LinearDoc.Parser( new 
LinearDoc.MwContextualizer() );
+describe( 'LinearDoc', () => {
+   it( 'should be possible to linearise all kind of HTML inputs', () => {
+   const numTests = 6;
+   for ( let i = 1; i <= numTests; i++ ) {
+   const testXhtmlFile = __dirname + '/data/test' + i + 
'.xhtml';
+   const resultXmlFile = __dirname + '/data/test' + i + 
'-result.xml';
+   const resultXhtmlFile = __dirname + '/data/test' + i + 
'-result.xhtml';
+
+   const testXhtml = fs.readFileSync( testXhtmlFile, 
'utf8' ).replace( /^\s+|\s+$/, '' );
+   const resultXml = fs.readFileSync( resultXmlFile, 
'utf8' ).replace( /^\s+|\s+$/, '' );
+   const resultXhtml = fs.readFileSync( resultXhtmlFile, 
'utf8' ).replace( /^\s+|\s+$/, '' );
+   const parser = new LinearDoc.Parser( new 
LinearDoc.MwContextualizer() );
parser.init();
parser.write( testXhtml );
assert.deepEqual(
-   parser.builder.doc.dumpXml(),
-   resultXml,
+   normalize( parser.builder.doc.dumpXml() ),
+   normalize( resultXml ),
'Linearised structure'
);
assert.deepEqual(
-