Cscott has uploaded a new change for review.
https://gerrit.wikimedia.org/r/226734
Change subject: Build a mutation interface similar to mwparserfromhell.
......................................................................
Build a mutation interface similar to mwparserfromhell.
Change-Id: I7aa14c2ef697d360cb0fcae48eb7bc308744a081
---
A USAGE.md
M lib/index.js
M tests/mocha/jsapi.js
M tests/parse.js
4 files changed, 219 insertions(+), 3 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/34/226734/1
diff --git a/USAGE.md b/USAGE.md
new file mode 100644
index 0000000..da71dd1
--- /dev/null
+++ b/USAGE.md
@@ -0,0 +1,71 @@
+Usage
+=====
+
+This file describes usage of Parsoid as a standalone wikitext parsing
+package, in the spirit of `[mwparserfromhell]`. This is not the typical
+use case for Parsoid; it is more often used as a network service.
+See [Parsoid service] for more details.
+
+These examples will use the `[prfun]` library and [ES6 generators] in
+order to fluently express asynchronous operations. The library also
+exports vanilla `[Promise]`s if you wish to maintain compatibility
+with old versions of `node` at the cost of a little bit of readability.
+
+Use as a wikitext parser is straightforward (where `text` is
+wikitext input):
+```js
+#/usr/bin/node --harmony-generators
+var Promise = require('prfun');
+var Parsoid = require('parsoid');
+
+var main = Promise.async(function*() {
+ var text = "I love wikitext!";
+ var prdom = yield Parsoid.parse(text, { prdom: true });
+ console.log(prdom.document.outerHTML);
+});
+
+// start me up!
+main().done();
+```
+
+As you can see, there is a little bit of boilerplate needed to get the
+asynchronous machinery started. Future code examples will be assumed
+to be the body of the `main()` method above.
+
+The `prdom` object above is a `Parsoid.PrDOM` object, which has
+helpful methods to filter and manipulate the document. If you want
+to access the raw [Parsoid DOM], however, it is easily accessible
+via the `document` property, as shown above, and all normal DOM
+manipulation functions can be used on it (Parsoid uses `[domino]`
+to implement these methods). The extra methods provided by
+`PrDOM` can be quite useful, however. For example:
+
+```js
+> var text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?\n";
+> var prdom = yield Parsoid.parse(text, { prdom: true });
+> console.log(String(prdom));
+I has a template! {{foo|bar|baz|eggs=spam}} See it?
+> var templates = prdom.filterTemplates();
+> console.log(templates.map(String));
+[ '{{foo|bar|baz|eggs=spam}}' ]
+> var template = templates[0];
+> console.log(template.name);
+foo
+> template.name = 'notfoo';
+> console.log(String(template));
+{{notfoo|bar|baz|eggs=spam}}
+> console.log(template.params);
+[ '1', '2', 'eggs' ]
+> console.log(template.get(1).value);
+bar
+> console.log(template.get("eggs").value);
+spam
+```
+
+
+[mwparserfromhell]:
http://mwparserfromhell.readthedocs.org/en/latest/index.html
+[Parsoid service]: https://www.mediawiki.org/wiki/Parsoid
+[prfun]: https://github.com/cscott/prfun
+[Promise]:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
+[Parsoid DOM]: http://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec
+[domino]: https://www.npmjs.com/package/domino
diff --git a/lib/index.js b/lib/index.js
index 516fc31..17d60de 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -15,10 +15,121 @@
var json = require('../package.json');
var parseJs = require('../tests/parse.js');
var ParsoidConfig = require('../lib/mediawiki.ParsoidConfig.js').ParsoidConfig;
+var WikitextSerializer =
require('../lib/mediawiki.WikitextSerializer.js').WikitextSerializer;
+var DU = require('../lib/mediawiki.DOMUtils.js').DOMUtils;
+var util = require('util');
+
+// WTS helper
+var wts = function(env, node) {
+ // XXX: Serializing to wikitext is very user-friendly, but it depends on
+ // WTS.serializeDOMSync which we might not want to keep around forever.
+ // An alternative would be:
+ // return DU.normalizeOut(node, 'parsoidOnly');
+ // which might be almost as friendly.
+ if (!DU.isBody(node)) {
+ var nbody = node.ownerDocument.createElement('body');
+ nbody.appendChild(node.cloneNode(true));
+ node = nbody;
+ }
+ return (new WikitextSerializer({ env: env })).serializeDOMSync(node);
+};
var Parsoid = module.exports = {
name: json.name, // package name
version: json.version, // npm version #
+};
+
+// Wrappers in the style of mwparserfromhell
+
+var PrNode = function PrNode(prdom, node) {
+ this.prdom = prdom;
+ this.node = node;
+};
+Object.defineProperties(PrNode.prototype, {
+ attr: {
+ value: function(a) { return this.node.getAttribute(a); }
+ },
+ dataMw: {
+ get: function() { return DU.getDataMw(this.node); },
+ set: function(v) { return DU.storeDataMw(this.node, v); }
+ },
+ toString: { value: function() { return wts(this.prdom.env, this.node);
} }
+});
+
+var PrTemplate = function PrTemplate(prdom, node, which) {
+ PrNode.call(this, prdom, node);
+ this.which = which;
+};
+util.inherits(PrTemplate, PrNode);
+Object.defineProperties(PrTemplate.prototype, {
+ toString: {
+ value: function() {
+ // Templates are actually a collection of nodes.
+ var nbody =
this.node.ownerDocument.createElement('body');
+ var nodes = this.node.ownerDocument.querySelectorAll
+ ('[about="' + this.attr('about') + '"]');
+ for (var i = 0; i < nodes.length; i++) {
+ nbody.appendChild(nodes[i].cloneNode(true));
+ }
+ return wts(this.prdom.env, nbody);
+ }
+ },
+ _template: {
+ get: function() {
+ return this.dataMw.parts[this.which];
+ },
+ set: function(v) {
+ var dmw = this.dataMw;
+ dmw.parts[this.which] = v;
+ this.dataMw = dmw;
+ }
+ },
+ name: {
+ get: function() {
+ return this._template.template.target.wt;
+ },
+ set: function(v) {
+ var t = this._template;
+ t.template.target.wt = v;
+ t.template.target.href = './Template:' + v;
+ this._template = t;
+ }
+ },
+ params: {
+ get: function() {
+ return
Object.keys(this._template.template.params).sort();
+ }
+ },
+ get: {
+ value: function(k) {
+ var d = this._template.template.params[k];
+ return { key: k, value: d.wt, orig: d.key && d.key.wt };
+ }
+ }
+});
+
+var PrDOM = function PrDOM(env, doc) {
+ this.env = env;
+ this.document = doc;
+};
+PrDOM.prototype.filterTemplates = function() {
+ var prdom = this;
+ var result = [];
+ Array.from(
+ prdom.document.querySelectorAll('[typeof~="mw:Transclusion"]')
+ ).forEach(function(node) {
+ // each Transclusion node could represent multiple templates.
+ var dataMw = DU.getDataMw(node);
+ dataMw.parts.forEach(function(part, i) {
+ if (part.template) {
+ result.push(new PrTemplate(prdom, node, i));
+ }
+ });
+ });
+ return result;
+};
+PrDOM.prototype.toString = function() {
+ return wts(this.env, this.document.body);
};
// Sample usage:
@@ -29,6 +140,10 @@
options = options || {};
var argv = Object.create(parseJs.defaultOptions);
Object.keys(options).forEach(function(k) { argv[k] = options[k]; });
+
+ if (argv.prdom) {
+ argv.document = true;
+ }
if (argv.selser) {
argv.html2wt = true;
@@ -47,5 +162,7 @@
var parsoidConfig = options.parsoidConfig ||
new ParsoidConfig(options.config || null, {
defaultWiki: prefix });
- return parseJs.parse(input || '', argv, parsoidConfig,
prefix).nodify(optCb);
+ return parseJs.parse(input || '', argv, parsoidConfig,
prefix).then(function(res) {
+ return argv.prdom ? new PrDOM(res.env, res.out) : res;
+ }).nodify(optCb);
};
diff --git a/tests/mocha/jsapi.js b/tests/mocha/jsapi.js
index 8836f5c..e5ce77a 100644
--- a/tests/mocha/jsapi.js
+++ b/tests/mocha/jsapi.js
@@ -13,3 +13,31 @@
});
});
});
+
+describe('Examples from USAGE.md', function() {
+ it('converts simple wikitext to HTML', function() {
+ return Parsoid.parse('I love wikitext!', { prdom:
true}).then(function(prdom) {
+ prdom.should.have.property('document');
+ prdom.document.should.have.property('outerHTML');
+ });
+ });
+ it('filters out templates', function() {
+ var text = "I has a template! {{foo|bar|baz|eggs=spam}} See
it?\n";
+ return Parsoid.parse(text, { prdom: true
}).then(function(prdom) {
+ String(prdom).should.equal(text);
+ var templates = prdom.filterTemplates();
+ templates.length.should.equal(1);
+
String(templates[0]).should.equal('{{foo|bar|baz|eggs=spam}}');
+ var template = templates[0];
+ template.name.should.equal('foo');
+ template.name = 'notfoo';
+
String(template).should.equal('{{notfoo|bar|baz|eggs=spam}}');
+ template.params.length.should.equal(3);
+ template.params[0].should.equal('1');
+ template.params[1].should.equal('2');
+ template.params[2].should.equal('eggs');
+ template.get(1).value.should.equal('bar');
+ template.get('eggs').value.should.equal('spam');
+ });
+ });
+});
diff --git a/tests/parse.js b/tests/parse.js
index 32f17b5..b57ca59 100755
--- a/tests/parse.js
+++ b/tests/parse.js
@@ -148,7 +148,7 @@
}
return DU.serializeDOM(env, doc.body, argv.selser).then(function(out) {
if (argv.html2wt || argv.wt2wt) {
- return { trailingNL: true, out: out };
+ return { trailingNL: true, out: out, env: env };
} else {
return startsAtWikitext(argv, env, out);
}
@@ -172,7 +172,7 @@
} else {
out = DU.serializeNode(doc).str;
}
- return { trailingNL: true, out: out };
+ return { trailingNL: true, out: out, env: env };
} else {
return startsAtHTML(argv, env,
DU.serializeNode(doc).str);
}
--
To view, visit https://gerrit.wikimedia.org/r/226734
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I7aa14c2ef697d360cb0fcae48eb7bc308744a081
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits