https://www.mediawiki.org/wiki/Special:Code/MediaWiki/106281
Revision: 106281
Author: gwicke
Date: 2011-12-14 23:38:46 +0000 (Wed, 14 Dec 2011)
Log Message:
-----------
A collection of small bug fixes to the grammar, Cite, the Token format
converter and the HTML DOM -> WikiDom converter. The tokenizer now digests all
parserTests.
Modified Paths:
--------------
trunk/extensions/VisualEditor/modules/parser/ext.Cite.js
trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMConverter.js
trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js
trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
trunk/extensions/VisualEditor/tests/parser/parserTests.js
Modified: trunk/extensions/VisualEditor/modules/parser/ext.Cite.js
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/ext.Cite.js 2011-12-14
23:23:19 UTC (rev 106280)
+++ trunk/extensions/VisualEditor/modules/parser/ext.Cite.js 2011-12-14
23:38:46 UTC (rev 106281)
@@ -7,6 +7,8 @@
function Cite () {
this.refGroups = {};
this.refTokens = [];
+ // Within ref block
+ this.isActive = false;
}
/**
@@ -111,17 +113,27 @@
var token = tokenCTX.token;
// Collect all tokens between ref start and endtag
- if ( token.type === 'TAG' && token.name.toLowerCase() === 'ref' ) {
+ if ( ! this.isActive &&
+ token.type === 'TAG' &&
+ token.name.toLowerCase() === 'ref' ) {
this.curRef = tokenCTX.token;
// Prepend self for 'any' token type
tokenCTX.dispatcher.prependListener(this.onRefCB, 'any' );
tokenCTX.token = null;
+ this.isActive = true;
return tokenCTX;
- } else if ( token.type === 'ENDTAG' && token.name.toLowerCase() ===
'ref' ) {
+ } else if ( this.isActive &&
+ // Also accept really broken ref close tags..
+ ['TAG', 'ENDTAG', 'SELFCLOSINGTAG'].indexOf(token.type)
>= 0 &&
+ token.name.toLowerCase() === 'ref'
+ )
+ {
+ this.isActive = false;
tokenCTX.dispatcher.removeListener(this.onRefCB, 'any' );
// fall through for further processing!
} else {
// Inside ref block: Collect all other tokens in refTokens and
abort
+ console.log(JSON.stringify(tokenCTX.token, null, 2));
this.refTokens.push(tokenCTX.token);
tokenCTX.token = null;
return tokenCTX;
@@ -287,8 +299,9 @@
// Clean up
this.refGroups = {};
this.refTokens = [];
+ this.isActive = false;
return tokenCTX;
-}
+};
if (typeof module == "object") {
module.exports.Cite = Cite;
Modified: trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMConverter.js
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMConverter.js
2011-12-14 23:23:19 UTC (rev 106280)
+++ trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMConverter.js
2011-12-14 23:38:46 UTC (rev 106281)
@@ -178,7 +178,7 @@
case Node.ELEMENT_NODE:
// Call a handler for the particular node type
var hi = this.getHTMLHandlerInfo(
cnode.nodeName );
- var res = hi.handler.call(this, cnode, 0,
hi.type );
+ var res = hi.handler.call(this, cnode, hi.type
);
if ( hi.attribs ) {
$.extend( res.node.attributes,
hi.attribs );
}
@@ -208,15 +208,17 @@
* @param {Int} WikiDom offset within a block
* @returns {Object} WikiDom object
*/
-DOMConverter.prototype._convertHTMLBranch = function ( node, offset, type ) {
+DOMConverter.prototype._convertHTMLBranch = function ( node, type ) {
+
var children = node.childNodes,
wnode = {
type: type,
attributes: this._HTMLPropertiesToWikiAttributes( node
),
children: []
- };
-
- var parNode = null;
+ },
+ parNode = null,
+ offset = 0,
+ res;
function newPara () {
offset = 0;
@@ -238,29 +240,29 @@
var annotationtype =
this.getHTMLAnnotationType( cnode.nodeName );
if ( annotationtype ) {
if ( !parNode ) {
- newPara()
+ newPara();
}
- var res = this._convertHTMLAnnotation(
cnode, offset, annotationtype );
+ offset = 0;
+ res = this._convertHTMLAnnotation(
cnode, 0, annotationtype );
//console.log( 'res leaf: ' +
JSON.stringify(res, null, 2));
offset += res.text.length;
parNode.content.text += res.text;
//console.log( 'res annotations: ' +
JSON.stringify(res, null, 2));
parNode.content.annotations =
parNode.content.annotations
.concat( res.annotations );
- break;
} else {
// Close last paragraph, if still open.
parNode = null;
// Call a handler for the particular
node type
var hi = this.getHTMLHandlerInfo(
cnode.nodeName );
- var res = hi.handler.call(this, cnode,
0, hi.type );
+ res = hi.handler.call(this, cnode,
hi.type );
if ( hi.attribs ) {
$.extend( res.node.attributes,
hi.attribs );
}
wnode.children.push( res.node );
offset = res.offset;
- break;
}
+ break;
case Node.TEXT_NODE:
if ( !parNode ) {
newPara();
@@ -290,9 +292,8 @@
* @param {Int} WikiDom offset within a block
* @returns {Object} WikiDom object
*/
-DOMConverter.prototype._convertHTMLLeaf = function ( node, offset, type ) {
- // XXX Does the offset in every leaf start at zero?
- offset = 0;
+DOMConverter.prototype._convertHTMLLeaf = function ( node, type ) {
+ var offset = 0;
var children = node.childNodes,
wnode = {
@@ -394,7 +395,6 @@
for ( var i = 0, l = attribs.length; i < l; i++ ) {
var attrib = attribs.item(i),
key = attrib.name;
- console.log('key: ' + key);
if ( key.match( /^data-json-/ ) ) {
// strip data- prefix from data-*
out[key.replace( /^data-json-/, '' )] =
JSON.parse(attrib.value);
@@ -426,6 +426,9 @@
// XXX: This subsets html DOM
if ( ['title'].indexOf(key) != -1 ) {
out[key] = attrib.value;
+ } else {
+ // prefix key with 'html/'
+ out['html/' + key] = attrib.value;
}
}
}
Modified:
trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js
===================================================================
---
trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js
2011-12-14 23:23:19 UTC (rev 106280)
+++
trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js
2011-12-14 23:38:46 UTC (rev 106281)
@@ -55,6 +55,9 @@
this.emit('token', {type: 'StartTag',
name: token.name,
data: att(token.attribs)});
+ this.emit('token', {type: 'EndTag',
+ name: token.name,
+ data: att(token.attribs)});
break;
case "COMMENT":
this.emit('token', {type: 'Comment',
@@ -62,7 +65,6 @@
break;
case "END":
this.emit('end');
- console.log("at end..");
this.document = this.parser.document;
if ( ! this.document.body ) {
// HACK: This should not be needed really.
Modified: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
2011-12-14 23:23:19 UTC (rev 106280)
+++ trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
2011-12-14 23:38:46 UTC (rev 106281)
@@ -643,9 +643,12 @@
= "{{" target:template_target
params:(newline? "|" newline? p:template_param { return p })*
"}}" {
- var obj = { type: 'TAG', name: 'template',
- attribs: [['data-target', target]],
- args: {}}
+ var obj = {
+ type: 'SELFCLOSINGTAG',
+ name: 'template',
+ attribs: [['data-target', target]],
+ args: {}
+ };
if (params && params.length) {
var position = 1;
for ( var i = 0, l = params.length; i < l; i++ ) {
@@ -706,18 +709,19 @@
wikilink
= "[["
! url
- target:link_target text:("|" lt:link_text { return lt })* "]]"
suffix:text? {
+ target:link_target ltext:("|" lt:link_text { return lt })* "]]"
+ suffix:(![ \]] text_char)* {
var obj = {
type: 'TAG',
name: 'a',
attribs: [['data-type', 'internal']]
};
obj.attribs.push(['href', target]);
- if (text && text.length) {
- var textTokens = text;
+ if (ltext && ltext.length) {
+ var textTokens = ltext;
} else {
- if (suffix !== '') {
- target += suffix;
+ if (suffix) {
+ target += suffix.join('');
}
var textTokens = [{type: 'TEXT', value: target}];
}
@@ -735,13 +739,13 @@
)* { return h.join(''); }
link_text
- = h:( & { return setFlag('linkdesc'); }
- x:inlineline { return x }
- )* {
- clearFlag('linkdesc')
- return h;
- }
- / & { clearFlag('linkdesc') } { return null; }
+ = & { return setFlag('linkdesc'); }
+ h:inlineline
+ {
+ clearFlag('linkdesc');
+ return h;
+ }
+ / & { clearFlag('linkdesc'); return false }
link_end = "]]"
@@ -1002,7 +1006,7 @@
}
table_firstrow
- = td:table_data+ {
+ = td:(table_data / table_header)+ {
//dp('firstrow: ' + pp(td));
return [{ type: 'TAG', name: 'tr' }]
.concat(td, [{type: 'ENDTAG', name: 'tr'}]);
Modified: trunk/extensions/VisualEditor/tests/parser/parserTests.js
===================================================================
--- trunk/extensions/VisualEditor/tests/parser/parserTests.js 2011-12-14
23:23:19 UTC (rev 106280)
+++ trunk/extensions/VisualEditor/tests/parser/parserTests.js 2011-12-14
23:38:46 UTC (rev 106281)
@@ -63,7 +63,7 @@
_import(pj('parser', 'mediawiki.HTML5TreeBuilder.node.js'), ['FauxHTML5']);
_import(pj('parser', 'mediawiki.DOMPostProcessor.js'), ['DOMPostProcessor']);
-_import(pj('parser', 'mediawiki.DOMConverter'), ['DOMConverter']);
+_import(pj('parser', 'mediawiki.DOMConverter.js'), ['DOMConverter']);
_import(pj('parser', 'ext.core.QuoteTransformer.js'), ['QuoteTransformer']);
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs