jenkins-bot has submitted this change and it was merged.
Change subject: Build a mutation interface similar to mwparserfromhell.
......................................................................
Build a mutation interface similar to mwparserfromhell.
Change-Id: I7aa14c2ef697d360cb0fcae48eb7bc308744a081
---
A guides/jsapi/README.md
M lib/index.js
A lib/jsapi.js
M tests/mocha/jsapi.js
M tests/parse.js
5 files changed, 1,090 insertions(+), 21 deletions(-)
Approvals:
Arlolra: Looks good to me, approved
jenkins-bot: Verified
diff --git a/guides/jsapi/README.md b/guides/jsapi/README.md
new file mode 100644
index 0000000..f3224ef
--- /dev/null
+++ b/guides/jsapi/README.md
@@ -0,0 +1,144 @@
+Usage of the JavaScript API
+===========================
+
+This file describes usage of Parsoid as a standalone wikitext parsing
+package, in the spirit of [`mwparserfromhell`]. This is not the typical
+use case for Parsoid; it is more often used as a network service.
+See [the HTTP API guide](#!/guide/apiuse) or [Parsoid service] on the wiki
+for more details.
+
+These examples will use the [`prfun`] library and [ES6 generators] in
+order to fluently express asynchronous operations. The library also
+exports vanilla [`Promise`]s if you wish to maintain compatibility
+with old versions of `node` at the cost of a little bit of readability.
+
+Use as a wikitext parser is straightforward (where `text` is
+wikitext input):
+
+```
+#/usr/bin/node --harmony-generators
+var Promise = require('prfun');
+var Parsoid = require('parsoid');
+
+var main = Promise.async(function*() {
+ var text = "I love wikitext!";
+ var pdoc = yield Parsoid.parse(text, { pdoc: true });
+ console.log(pdoc.document.outerHTML);
+});
+
+// start me up!
+main().done();
+```
+
+As you can see, there is a little bit of boilerplate needed to get the
+asynchronous machinery started. Future code examples will be assumed
+to replace the body of the `main()` method above.
+
+The `pdoc` variable above holds a [`PDoc`] object, which has
+helpful methods to filter and manipulate the document. If you want
+to access the raw [Parsoid DOM], however, it is easily accessible
+via the [`document`](#!/api/PDoc-property-document) property, as shown above,
+and all normal DOM manipulation functions can be used on it (Parsoid uses
+[`domino`] to implement these methods). Be sure to call
+[`update()`](#!/api/PNode-method-update) after any direct DOM manipulation.
+[`PDoc`] is a subclass of [`PNodeList`], which provides a number of
+useful access and mutation methods -- and if you use these you won't need
+to manually call `update()`. These provided methods can be quite useful.
+For example:
+
+```
+> var text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?\n";
+> var pdoc = yield Parsoid.parse(text, { pdoc: true });
+> console.log(String(pdoc));
+I has a template! {{foo|bar|baz|eggs=spam}} See it?
+> var templates = pdoc.filterTemplates();
+> console.log(templates.map(String));
+[ '{{foo|bar|baz|eggs=spam}}' ]
+> var template = templates[0];
+> console.log(template.name);
+foo
+> template.name = 'notfoo';
+> console.log(String(template));
+{{notfoo|bar|baz|eggs=spam}}
+> console.log(template.params);
+[ '1', '2', 'eggs' ]
+> console.log(template.get(1).value);
+bar
+> console.log(template.get("eggs").value);
+spam
+```
+
+Getting nested templates is trivial:
+
+```
+> var text = "{{foo|bar={{baz|{{spam}}}}}}";
+> var pdoc = yield Parsoid.parse(text, { pdoc: true });
+> console.log(pdoc.filterTemplates().map(String));
+[ '{{foo|bar={{baz|{{spam}}}}}}',
+ '{{baz|{{spam}}}}',
+ '{{spam}}' ]
+```
+
+You can also pass `{ recursive: false }` to
+[`filterTemplates()`](#!/api/PNodeList-method-filterTemplates) and explore
+templates manually. This is possible because the
+[`get`](#!/api/PTemplate-method-get) method on a
+[`PTemplate`] object returns an object containing further [`PNodeList`]s:
+
+```
+> var text = "{{foo|this {{includes a|template}}}}";
+> var pdoc = yield Parsoid.parse(text, { pdoc: true });
+> var templates = pdoc.filterTemplates({ recursive: false });
+> console.log(templates.map(String));
+[ '{{foo|this {{includes a|template}}}}' ]
+> var foo = templates[0];
+> console.log(String(foo.get(1).value));
+this {{includes a|template}}
+> var more = foo.get(1).value.filterTemplates();
+> console.log(more.map(String));
+[ '{{includes a|template}}' ]
+> console.log(String(more[0].get(1).value));
+template
+```
+
+Templates can be easily modified to add, remove, or alter params.
+Templates also have a [`matches()`](#!/api/PTemplate-method-matches) method
+for comparing template names, which takes care of capitalization and
+white space:
+
+```
+> var text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}";
+> var pdoc = yield Parsoid.parse(text, { pdoc: true });
+> pdoc.filterTemplates().forEach(function(template) {
+... if (template.matches('Cleanup') && !template.has('date')) {
+... template.add('date', 'July 2012');
+... }
+... if (template.matches('uncategorized')) {
+... template.name = 'bar-stub';
+... }
+... });
+> console.log(String(pdoc));
+{{cleanup|date = July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
+```
+
+At any time you can convert the `pdoc` into HTML conforming to the
+[MediaWiki DOM spec] (by referencing the
+[`document`](#!/api/PDoc-property-document) property) or into wikitext (by
+invoking [`toString()`](#!/api/PNodeList-method-toString)). This allows you
+to save the page using either standard API methods or the RESTBase API
+(once [T101501](https://phabricator.wikimedia.org/T101501) is resolved).
+
+For more tips, check out [PNodeList's full method list](#!/api/PNodeList)
+and the list of [PNode](#!/api/PNode) subclasses.
+
+[`mwparserfromhell`]:
http://mwparserfromhell.readthedocs.org/en/latest/index.html
+[Parsoid service]: https://www.mediawiki.org/wiki/Parsoid
+[`prfun`]: https://github.com/cscott/prfun
+[ES6 generators]:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/function*
+[`Promise`]:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
+[Parsoid DOM]: http://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec
+[MediaWiki DOM spec]: http://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec
+[`domino`]: https://www.npmjs.com/package/domino
+[`PDoc`]: #!/api/PDoc
+[`PNodeList`]: #!/api/PNodeList
+[`PTemplate`]: #!/api/PTemplate
diff --git a/lib/index.js b/lib/index.js
index 516fc31..bc82d91 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -1,34 +1,58 @@
-/**
- * Main entry point for Parsoid's JavaScript API.
- *
- * Note that Parsoid's main interface is actually a web API, as
- * defined by the files in ../api.
- *
- * But some users would like to use Parsoid as a NPM package using
- * a native JavaScript API. This file provides that, more-or-less.
- * It should be considered unstable. Patches welcome.
- */
-
'use strict';
require('../lib/core-upgrade.js');
var json = require('../package.json');
var parseJs = require('../tests/parse.js');
var ParsoidConfig = require('../lib/mediawiki.ParsoidConfig.js').ParsoidConfig;
+var JsApi = require('./jsapi.js');
+/**
+ * Main entry point for Parsoid's JavaScript API.
+ *
+ * Note that Parsoid's main interface is actually a web API, as
+ * defined by {@link ParsoidService} (and the files in the `api` directory).
+ *
+ * But some users would like to use Parsoid as a NPM package using
+ * a native JavaScript API. This file provides that, more-or-less.
+ * It should be considered unstable. Patches welcome.
+ *
+ * See `USAGE.md` and `./jsapi.js` for a useful wrapper API which works
+ * well with this interface.
+ *
+ * @class
+ * @singleton
+ */
var Parsoid = module.exports = {
- name: json.name, // package name
- version: json.version, // npm version #
+ /** Name of the NPM package. */
+ name: json.name,
+ /** Version of the NPM package. */
+ version: json.version,
};
-// Sample usage:
-// Parsoid.parse('hi there', { document: true }).then(function(res) {
-// console.log(res.out.outerHTML);
-// }).done();
+/**
+ * Parse wikitext (or html) to html (or wikitext).
+ *
+ * Sample usage:
+ *
+ * Parsoid.parse('hi there', { document: true }).then(function(res) {
+ * console.log(res.out.outerHTML);
+ * }).done();
+ *
+ * Advanced usage using the {@link PDoc} API:
+ *
+ * Parsoid.parse('{{echo|hi}}', { pdoc: true }).then(function(pdoc) {
+ * var templates = pdoc.filterTemplates();
+ * console.log(templates[0].name);
+ * }).done();
+ */
Parsoid.parse = function(input, options, optCb) {
options = options || {};
var argv = Object.create(parseJs.defaultOptions);
Object.keys(options).forEach(function(k) { argv[k] = options[k]; });
+
+ if (argv.pdoc) {
+ argv.document = true;
+ }
if (argv.selser) {
argv.html2wt = true;
@@ -46,6 +70,16 @@
}
var parsoidConfig = options.parsoidConfig ||
- new ParsoidConfig(options.config || null, {
defaultWiki: prefix });
- return parseJs.parse(input || '', argv, parsoidConfig,
prefix).nodify(optCb);
+ new ParsoidConfig(options.config || null, { defaultWiki: prefix
});
+ if (argv.pdoc) {
+ parsoidConfig.addHTMLTemplateParameters = true;
+ }
+ return parseJs.parse(input || '', argv, parsoidConfig,
prefix).then(function(res) {
+ return argv.pdoc ? new JsApi.PDoc(res.env, res.out) : res;
+ }).nodify(optCb);
};
+
+// Expose other helpful objects.
+Object.keys(JsApi).forEach(function(k) {
+ Parsoid[k] = JsApi[k];
+});
diff --git a/lib/jsapi.js b/lib/jsapi.js
new file mode 100644
index 0000000..d1524e0
--- /dev/null
+++ b/lib/jsapi.js
@@ -0,0 +1,731 @@
+/**
+ * Handy JavaScript API for Parsoid DOM, inspired by the
+ * python `mwparserfromhell` package.
+ */
+'use strict';
+require('../lib/core-upgrade.js');
+
+// TO DO:
+// comment/tag/text/figure
+// PTemplate#get should return PParameter and support mutation.
+// PExtLink#url PWikiLink#title should handle mw:ExpandedAttrs
+// make separate package?
+
+var WikitextSerializer =
require('../lib/mediawiki.WikitextSerializer.js').WikitextSerializer;
+var DU = require('../lib/mediawiki.DOMUtils.js').DOMUtils;
+var util = require('util');
+
+// WTS helper
+var wts = function(env, nodes) {
+ // XXX: Serializing to wikitext is very user-friendly, but it depends on
+ // WTS.serializeDOMSync which we might not want to keep around forever.
+ // An alternative would be:
+ // return DU.normalizeOut(node, 'parsoidOnly');
+ // which might be almost as friendly.
+ var body;
+ if (nodes.length === 1 && DU.isBody(nodes[0])) {
+ body = nodes[0];
+ } else {
+ body = nodes[0].ownerDocument.createElement('body');
+ for (var i = 0; i < nodes.length; i++) {
+ body.appendChild(nodes[i].cloneNode(true));
+ }
+ }
+ return (new WikitextSerializer({ env: env })).serializeDOMSync(body);
+};
+
+// noop helper
+var noop = function() { };
+
+// Forward declarations of Wrapper classes.
+var PNode, PNodeList, PExtLink, PHeading, PHtmlEntity, PTemplate, PWikiLink;
+
+// HTML escape helper
+var toHtmlStr = function(node, v) {
+ if (typeof v === 'string') {
+ var div = node.ownerDocument.createElement('div');
+ div.textContent = v;
+ return div.innerHTML;
+ } else if (v instanceof PNodeList) {
+ return v.container.innerHTML;
+ } else {
+ return v.outerHTML;
+ }
+};
+
+
+/**
+ * The PNodeList class wraps a collection of DOM {@link Node}s.
+ * It provides methods that can be used to extract data from or
+ * modify the nodes. The `filter()` series of functions is very
+ * useful for extracting and iterating over, for example, all
+ * of the templates in the project (via {@link #filterTemplates}).
+ * @class PNodeList
+ * @alternateClassName Parsoid.PNodeList
+ */
+/**
+ * @method constructor
+ * @private
+ * @param {PDoc} pdoc The parent document for this {@link PNodeList}.
+ * @param {PNode|null} parent A {@link PNode} which will receive updates
+ * when this {@link PNodeList} is mutated.
+ * @param {Node} container A DOM {@link Node} which is the parent of all
+ * of the DOM {@link Node}s in this {@link PNodeList}. The container
+ * element itself is *not* considered part of the list.
+ * @param {Object} [opts]
+ * @param {Function} [opts.update]
+ * A function which will be invoked when {@link #update} is called.
+ */
+PNodeList = function PNodeList(pdoc, parent, container, opts) {
+ this.pdoc = pdoc;
+ this.parent = parent;
+ this.container = container;
+ this._update = (opts && opts.update);
+};
+Object.defineProperties(PNodeList.prototype, {
+ /**
+ * Returns an {@link Array} of the DOM {@link Node}s represented
+ * by this {@link PNodeList}.
+ * @property {Node[]}
+ */
+ nodes: {
+ get: function() { return Array.from(this.container.childNodes);
},
+ },
+ /**
+ * Call {@link #update} after manually mutating any of the DOM
+ * {@link Node}s represented by this {@link PNodeList} in order to
+ * ensure that any containing templates are refreshed with their
+ * updated contents.
+ *
+ * The mutation methods in the {@link PDoc}/{@link PNodeList} API
+ * automatically call {@link #update} for you when required.
+ * @method
+ */
+ update: { value: function() {
+ if (this._update) { this._update(); }
+ if (this.parent) { this.parent.update(); }
+ }, },
+ _querySelectorAll: { value: function(selector) {
+ return Array.from(this.container.querySelectorAll(selector));
+ }, },
+ _templatesForNode: { value: function(node) {
+ // each Transclusion node could represent multiple templates.
+ var parent = this;
+ var result = [];
+ DU.getDataMw(node).parts.forEach(function(part, i) {
+ if (part.template) {
+ result.push(new PTemplate(parent.pdoc, parent,
node, i));
+ }
+ });
+ return result;
+ }, },
+ /**
+ * @method
+ * @private
+ * @param {Object} [opts]
+ * @param {boolean} [opts.recursive]
+ * Set to `false` to avoid recursing into templates.
+ */
+ _filter: { value: function(result, selector, func, opts) {
+ var self = this;
+ var recursive = (opts && opts.recursive) !== false;
+ var tSelector = '[typeof~="mw:Transclusion"]';
+ if (selector) {
+ tSelector += ',' + selector;
+ }
+ this._querySelectorAll(tSelector).forEach(function(node) {
+ var ty = node.getAttribute('typeof') || '';
+ var isTemplate = /\bmw:Transclusion\b/.test(ty);
+ if (isTemplate) {
+
self._templatesForNode(node).forEach(function(t) {
+ if (!selector) {
+ result.push(t);
+ }
+ if (recursive) {
+ t.params.forEach(function(k) {
+ var td = t.get(k);
+ ['key',
'value'].forEach(function(prop) {
+ if (td[prop]) {
+
td[prop]._filter(result, selector, func, opts);
+ }
+ });
+ });
+ }
+ });
+ } else {
+ func(result, self, node, opts);
+ }
+ });
+ return result;
+ }, },
+
+ /**
+ * Return an array of {@link PExtLink} representing external links
+ * found in this {@link PNodeList}.
+ * @inheritdoc #_filter
+ * @return {PExtLink[]}
+ */
+ filterExtLinks: { value: function(opts) {
+ return this._filter([], 'a[rel="mw:ExtLink"]', function(r,
parent, node, opts) {
+ r.push(new PExtLink(parent.pdoc, parent, node));
+ }, opts);
+ }, },
+
+ /**
+ * Return an array of {@link PHeading} representing headings
+ * found in this {@link PNodeList}.
+ * @inheritdoc #_filter
+ * @return {PHeading[]}
+ */
+ filterHeadings: { value: function(opts) {
+ return this._filter([], 'h1,h2,h3,h4,h5,h6', function(r,
parent, node, opts) {
+ r.push(new PHeading(parent.pdoc, parent, node));
+ }, opts);
+ }, },
+
+ /**
+ * Return an array of {@link PHtmlEntity} representing HTML entities
+ * found in this {@link PNodeList}.
+ * @inheritdoc #_filter
+ * @return {PHtmlEntity[]}
+ */
+ filterHtmlEntities: { value: function(opts) {
+ return this._filter([], '[typeof="mw:Entity"]', function(r,
parent, node, opts) {
+ r.push(new PHtmlEntity(parent.pdoc, parent, node));
+ }, opts);
+ }, },
+
+ /**
+ * Return an array of {@link PTemplate} representing templates
+ * found in this {@link PNodeList}.
+ * @inheritdoc #_filter
+ * @return {PTemplate[]}
+ */
+ filterTemplates: { value: function(opts) {
+ return this._filter([], null, null, opts);
+ }, },
+
+
+ /**
+ * Return an array of {@link PWikiLink} representing wiki links
+ * found in this {@link PNodeList}.
+ * @inheritdoc #_filter
+ * @return {PWikiLink[]}
+ */
+ filterWikiLinks: { value: function(opts) {
+ return this._filter([], 'a[rel="mw:WikiLink"]', function(r,
parent, node, opts) {
+ r.push(new PWikiLink(parent.pdoc, parent, node));
+ }, opts);
+ }, },
+
+ /**
+ * Return a string representing the contents of this object
+ * as HTML conforming to the
+ * [MediaWiki DOM
specification](https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec).
+ * @return {String}
+ */
+ toHtml: { value: function() {
+ return this.container.innerHTML;
+ }, },
+
+ /**
+ * Return a string representing the contents of this object as wikitext.
+ * @return {String}
+ */
+ toString: { value: function() {
+ return wts(this.pdoc.env, this.nodes);
+ }, },
+});
+/**
+ * Create a {@link PNodeList} from a string containing HTML.
+ * @return {PNodeList}
+ * @static
+ */
+PNodeList.fromHTML = function(pdoc, html) {
+ var div = pdoc.document.createElement('div');
+ div.innerHTML = html;
+ return new PNodeList(pdoc, null, div);
+};
+
+/**
+ * @class PNode
+ * A PNode represents a specific DOM {@link Node}. Its subclasses provide
+ * specific accessors and mutators for associated semantic information.
+ *
+ * Useful subclasses of {@link PNode} include:
+ *
+ * - {@link PExtLink}: external links, like `[http://example.com Example]`
+ * - {@link PHeading}: headings, like `== Section 1 ==`
+ * - {@link PHtmlEntity}: html entities, like ` `
+ * - {@link PTemplate}: templates, like `{{foo|bar}}`
+ * - {@link PWikiLink}: wiki links, like `[[Foo|bar]]`
+ */
+/**
+ * @method constructor
+ * @private
+ * @param {PDoc} pdoc The parent document for this PNode.
+ * @param {PNodeList|null} parent A containing node list which will receive
+ * updates when this {@link PNode} is mutated.
+ * @param {Node} node The DOM node.
+ * @param {Object} [opts]
+ * @param {Function} [opts.update]
+ * A function which will be invoked when {@link #update} is called.
+ * @param {Function} [opts.wtsNodes]
+ * A function returning an array of {@link Node}s which can tweak the
+ * portion of the document serialized by {@link #toString}.
+ */
+PNode = function PNode(pdoc, parent, node, opts) {
+ /** @property {PDoc} pdoc The parent document for this {@link PNode}. */
+ this.pdoc = pdoc;
+ this.parent = parent;
+ /** @property {Node} node The underlying DOM {@link Node}. */
+ this.node = node;
+ this._update = (opts && opts.update);
+ this._wtsNodes = (opts && opts.wtsNodes);
+};
+Object.defineProperties(PNode.prototype, {
+ ownerDocument: {
+ get: function() { return this.node.ownerDocument; },
+ },
+ dataMw: {
+ get: function() { return DU.getDataMw(this.node); },
+ set: function(v) { DU.storeDataMw(this.node, v); this.update();
},
+ },
+ /**
+ * Call {@link #update} after manually mutating the DOM {@link Node}
+ * associated with this {@link PNode} in order to ensure that any
+ * containing templates are refreshed with their updated contents.
+ *
+ * The mutation methods in the API automatically call {@link #update}
+ * for you when required.
+ * @method
+ */
+ update: { value: function() {
+ if (this._update) { this._update(); }
+ if (this.parent) { this.parent.update(); }
+ }, },
+ /**
+ * @inheritdoc PNodeList#toHtml
+ * @method
+ */
+ toHtml: { value: function() {
+ var nodes = this._wtsNodes ? this._wtsNodes() : [ this.node ];
+ return nodes.map(function(n) { return n.outerHTML; }).join('');
+ }, },
+ /**
+ * @inheritdoc PNodeList#toString
+ * @method
+ */
+ toString: { value: function() {
+ var nodes = this._wtsNodes ? this._wtsNodes() : [ this.node ];
+ return wts(this.pdoc.env, nodes);
+ }, },
+});
+
+/**
+ * PTemplate represents a wikitext template, like `{{foo}}`.
+ * @class PTemplate
+ * @extends PNode
+ */
+/**
+ * @method constructor
+ * @private
+ * @inheritdoc PNode#constructor
+ * @param {PDoc} pdoc The parent document for this PNode.
+ * @param {PNodeList|null} parent A containing node list which will receive
+ * updates when this {@link PNode} is mutated.
+ * @param {Node} node The DOM node.
+ * @param {Number} which A single {@link Node} can represent multiple
+ * templates; this parameter serves to distinguish them.
+ */
+PTemplate = function PTemplate(pdoc, parent, node, which) {
+ PNode.call(this, pdoc, parent, node, {
+ wtsNodes: function() {
+ // Templates are actually a collection of nodes.
+ return this.parent._querySelectorAll
+ ('[about="' + this.node.getAttribute('about') +
'"]');
+ },
+ });
+ this.which = which;
+ this._cachedHtml = Object.create(null);
+};
+util.inherits(PTemplate, PNode);
+Object.defineProperties(PTemplate.prototype, {
+ _template: {
+ get: function() {
+ return this.dataMw.parts[this.which];
+ },
+ set: function(v) {
+ var dmw = this.dataMw;
+ dmw.parts[this.which] = v;
+ this.dataMw = dmw;
+ },
+ },
+ /**
+ * The name of the template, as a String.
+ *
+ * See: [T107194](https://phabricator.wikimedia.org/T107194)
+ * @property {String}
+ */
+ name: {
+ get: function() {
+ // This should really be a PNodeList; see T107194
+ return this._template.template.target.wt;
+ },
+ set: function(v) {
+ var t = this._template;
+ t.template.target.wt = v;
+ t.template.target.href = './' +
+ this.pdoc.env.normalizeTitle('Template:' + v);
+ this._template = t;
+ },
+ },
+ /**
+ * Test whether the name of this template matches a given string, after
+ * normalizing titles.
+ * @param {String} name The template name to test against.
+ * @return {Boolean}
+ */
+ matches: {
+ value: function(name) {
+ var href = './' +
this.pdoc.env.normalizeTitle('Template:' + name);
+ return this._template.template.target.href === href;
+ },
+ },
+ /**
+ * The names of the parameters supplied to this template.
+ * Unnamed parameters are given numeric indexes.
+ * @property {String[]}
+ */
+ params: {
+ get: function() {
+ return
Object.keys(this._template.template.params).sort();
+ },
+ },
+ /**
+ * Return `true` if any parameter in the template is named `name`.
+ * With `ignoreEmpty`, `false` will be returned even if the template
+ * contains a parameter named `name`, if the parameter's value is empty
+ * (ie, only contains whitespace). Note that a template may have
+ * multiple parameters with the same name, but only the last one is
+ * read by Parsoid (and the MediaWiki parser).
+ * @param {String} name
+ * @param {Object} [opts]
+ * @param {Boolean} [opts.ignoreEmpty=false]
+ */
+ has: {
+ value: function(name, opts) {
+ var t = this._template.template;
+ return Object.prototype.hasOwnProperty.call(t.params,
name) && (
+ (opts && opts.ignoreEmpty) ?
+ !/^\s*$/.test(t.params[name].html) :
true
+ );
+ },
+ },
+ /**
+ * Add a parameter to the template with a given `name` and `value`.
+ * If `name` is already a parameter in the template, we'll replace
+ * its value.
+ * @param {String} name
+ * @param {String|Node|PNodeList} value
+ */
+ add: {
+ value: function(k, v) {
+ var t = this._template;
+ var html = toHtmlStr(this.node, v);
+ t.template.params[k] = { html: html };
+ this._template = t;
+ },
+ },
+ /**
+ * Remove a parameter from the template with the given `name`.
+ * If `keepField` is `true`, we will keep the parameter's name but
+ * blank its value. Otherwise we will remove the parameter completely
+ * *unless* other parameters are dependent on it (e.g. removing
+ * `bar` from `{{foo|bar|baz}}` is unsafe because `{{foo|baz}}` is
+ * not what we expected, so `{{foo||baz}}` will be produced instead).
+ * @param {String} name
+ * @param {Object} [opts]
+ * @param {Boolean} [opts.keepField=false]
+ */
+ remove: {
+ value: function(k, opts) {
+ var t = this._template;
+ var keepField = opts && opts.keepField;
+ // if this is a numbered template, force keepField if
there
+ // are subsequent numbered templates.
+ var isNumeric = (String(+k) === String(k));
+ if (isNumeric && this.has(1 + (+k))) {
+ keepField = true;
+ }
+ if (keepField) {
+ t.template.params[k] = { html: '' };
+ } else {
+ delete t.template.params[k];
+ }
+ this._template = t;
+ },
+ },
+ // XXX we should return a PParameter instance, so we can make key/value
+ // into accessors and allow mutation.
+ /**
+ * Get the parameter whose name is `name`.
+ * @param {String} name
+ * @return {Object} The parameter record.
+ * @return {String} return.name The given parameter name.
+ * @return {PNodeList|undefined} return.key
+ * Source nodes corresponding to the parameter name.
+ * For example, in `{{echo|{{echo|1}}=hello}}` the parameter name
+ * is `"1"`, but the `key` field would contain the `{{echo|1}}`
+ * template invocation, as a {@link PNodeList}.
+ * @return {PNodeList} return.value
+ * The parameter value.
+ */
+ get: {
+ value: function(k) {
+ if (!this._cachedHtml[k]) {
+ var doc = this.ownerDocument;
+ var param = this._template.template.params[k];
+ var valDiv = doc.createElement('div');
+ valDiv.innerHTML = param.html;
+ this._cachedHtml[k] = {
+ name: k,
+ value: new PNodeList(this.pdoc, this,
valDiv, {
+ update: function() {
+ var t =
this.parent._template;
+ delete
t.template.params[k].wt;
+
t.template.params[k].html = this.container.innerHTML;
+ this.parent._template =
t;
+ },
+ }),
+ };
+ if (param.key && param.key.html) {
+ // T106852 means this doesn't always
work.
+ var keyDiv = doc.createElement('div');
+ keyDiv.innerHTML = param.key.html;
+ this._cachedHtml[k].key = new
PNodeList(this.pdoc, this, keyDiv, {
+ update: function() {
+ var t =
this.parent._template;
+ delete
t.template.params[k].key.wt;
+
t.template.params[k].key.html = this.container.innerHTML;
+ this.parent._template =
t;
+ },
+ });
+ }
+ }
+ return this._cachedHtml[k];
+ },
+ },
+});
+
+// Helper: getter and setter for the inner contents of a node.
+var innerAccessor = {
+ get: function() {
+ return new PNodeList(this.pdoc, this, this.node);
+ },
+ set: function(v) {
+ this.node.innerHTML = toHtmlStr(this.node, v);
+ this.update();
+ },
+};
+
+/**
+ * PExtLink represents an external link, like `[http://example.com Example]`.
+ * @class PExtLink
+ * @extends PNode
+ */
+/**
+ * @method constructor
+ * @private
+ * @inheritdoc PNode#constructor
+ */
+PExtLink = function PExtLink(pdoc, parent, node, opts) {
+ PNode.call(this, pdoc, parent, node, opts);
+};
+util.inherits(PExtLink, PNode);
+Object.defineProperties(PExtLink.prototype, {
+ /**
+ * The URL of the link target.
+ * @property {String}
+ */
+ url: {
+ // XXX url should be a PNodeList, but that requires handling
+ // typeof="mw:ExpandedAttrs"
+ get: function() {
+ return this.node.getAttribute('href');
+ },
+ set: function(v) {
+ this.node.setAttribute('href', v);
+ },
+ },
+ /**
+ * The link title, as a {@link PNodeList}.
+ * You can assign a String, Node, or PNodeList to mutate the title.
+ * @property {PNodeList}
+ */
+ title: innerAccessor,
+});
+
+/**
+ * PHeading represents a section heading in wikitext, like `== Foo ==`.
+ * @class PHeading
+ * @extends PNode
+ */
+/**
+ * @method constructor
+ * @private
+ * @inheritdoc PNode#constructor
+ */
+PHeading = function PHeading(pdoc, parent, node, opts) {
+ PNode.call(this, pdoc, parent, node, opts);
+};
+util.inherits(PHeading, PNode);
+Object.defineProperties(PHeading.prototype, {
+ /**
+ * The heading level, as an integer between 1 and 6 inclusive.
+ * @property {Number}
+ */
+ level: {
+ get: function() {
+ return +this.node.nodeName.slice(1);
+ },
+ set: function(v) {
+ v = +v;
+ if (v === this.level) {
+ return;
+ } else if (v >= 1 && v <= 6) {
+ var nh = this.ownerDocument.createElement('h' +
v);
+ while (this.node.firstChild !== null) {
+ nh.appendChild(this.node.firstChild);
+ }
+ this.node.parentNode.replaceChild(nh,
this.node);
+ this.node = nh;
+ this.update();
+ } else {
+ throw new Error("Level must be between 1 and 6,
inclusive.");
+ }
+ },
+ },
+ /**
+ * The title of the heading, as a {@link PNodeList}.
+ * You can assign a String, Node, or PNodeList to mutate the title.
+ * @property {PNodeList}
+ */
+ title: innerAccessor,
+});
+
+/**
+ * PHtmlEntity represents an HTML entity, like ` `.
+ * @class PHtmlEntity
+ * @extends PNode
+ */
+/**
+ * @method constructor
+ * @private
+ * @inheritdoc PNode#constructor
+ */
+PHtmlEntity = function PHtmlEntity(pdoc, parent, node, opts) {
+ PNode.call(this, pdoc, parent, node, opts);
+};
+util.inherits(PHtmlEntity, PNode);
+Object.defineProperties(PHtmlEntity.prototype, {
+ /**
+ * The character represented by the HTML entity.
+ * @property {String}
+ */
+ normalized: {
+ get: function() { return this.node.textContent; },
+ set: function(v) {
+ this.node.textContent = v;
+ this.node.removeAttribute('data-parsoid');
+ this.update();
+ },
+ },
+});
+
+
+/**
+ * PWikiLink represents an internal wikilink, like `[[Foo|Bar]]`.
+ * @class PWikiLink
+ * @extends PNode
+ */
+/**
+ * @method constructor
+ * @private
+ * @inheritdoc PNode#constructor
+ */
+PWikiLink = function PWikiLink(pdoc, parent, node, opts) {
+ PNode.call(this, pdoc, parent, node, opts);
+};
+util.inherits(PWikiLink, PNode);
+Object.defineProperties(PWikiLink.prototype, {
+ /**
+ * The title of the linked page.
+ * @property {String}
+ */
+ title: {
+ // XXX url should be a PNodeList, but that requires handling
+ // typeof="mw:ExpandedAttrs"
+ get: function() {
+ return this.node.getAttribute('href').replace(/^.\//,
'');
+ },
+ set: function(v) {
+ var href = './' + this.pdoc.env.normalizeTitle(v);
+ this.node.setAttribute('href', href);
+ this.update();
+ },
+ },
+ /**
+ * The text to display, as a {@link PNodeList}.
+ * You can assign a String, Node, or PNodeList to mutate the text.
+ * @property {PNodeList}
+ */
+ text: innerAccessor,
+});
+
+/**
+ * A PDoc object wraps an entire Parsoid document. Since it is an
+ * instance of {@link PNodeList}, you can filter it, mutate it, etc.
+ * But it also provides means to serialize the document as either
+ * HTML (via {@link #document} or {@link #toHtml}) or wikitext
+ * (via {@link #toString}).
+ * @class
+ * @extends PNodeList
+ * @alternateClassName Parsoid.PDoc
+ */
+var PDoc = function PDoc(env, doc) {
+ PNodeList.call(this, this, null, doc.body);
+ this.env = env;
+};
+util.inherits(PDoc, PNodeList);
+Object.defineProperties(PDoc.prototype, {
+ /**
+ * An HTML {@link Document} representing article content conforming to
the
+ * [MediaWiki DOM
specification](https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec).
+ * @property {Document}
+ */
+ document: {
+ get: function() { return this.container.ownerDocument; },
+ set: function(v) { this.container = v.body; },
+ },
+ /**
+ * Return a string representing the entire document as
+ * HTML conforming to the
+ * [MediaWiki DOM
specification](https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec).
+ * @inheritdoc PNodeList#toHtml
+ * @method
+ */
+ toHtml: { value: function() {
+ // document.outerHTML is a Parsoid-ism; real browsers don't
define it.
+ var html = this.document.outerHTML;
+ if (!html) {
+ html = this.document.body.outerHTML;
+ }
+ return html;
+ }, },
+});
+
+module.exports = {
+ PDoc: PDoc,
+ PNodeList: PNodeList,
+};
diff --git a/tests/mocha/jsapi.js b/tests/mocha/jsapi.js
index 8836f5c..b0c5f53 100644
--- a/tests/mocha/jsapi.js
+++ b/tests/mocha/jsapi.js
@@ -13,3 +13,163 @@
});
});
});
+
+describe('Examples from guides/jsapi', function() {
+ it('converts simple wikitext to HTML', function() {
+ return Parsoid.parse('I love wikitext!', { pdoc:
true}).then(function(pdoc) {
+ pdoc.should.have.property('document');
+ pdoc.document.should.have.property('outerHTML');
+ });
+ });
+ it('filters out templates', function() {
+ var text = "I has a template! {{foo|bar|baz|eggs=spam}} See
it?\n";
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ String(pdoc).should.equal(text);
+ var templates = pdoc.filterTemplates();
+ templates.length.should.equal(1);
+
String(templates[0]).should.equal('{{foo|bar|baz|eggs=spam}}');
+ var template = templates[0];
+ template.name.should.equal('foo');
+ template.name = 'notfoo';
+
String(template).should.equal('{{notfoo|bar|baz|eggs=spam}}');
+ template.params.length.should.equal(3);
+ template.params[0].should.equal('1');
+ template.params[1].should.equal('2');
+ template.params[2].should.equal('eggs');
+ String(template.get(1).value).should.equal('bar');
+ String(template.get('eggs').value).should.equal('spam');
+ });
+ });
+ it('filters templates, recursively', function() {
+ var text = "{{foo|{{bar}}={{baz|{{spam}}}}}}";
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ var templates = pdoc.filterTemplates();
+ // XXX note that {{bar}} as template name doesn't get
handled;
+ // that's bug T106852
+ templates.length.should.equal(3);
+ });
+ });
+ it('filters templates, non-recursively', function() {
+ var text = "{{foo|this {{includes a|template}}}}";
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ var templates = pdoc.filterTemplates({ recursive: false
});
+ templates.length.should.equal(1);
+ var foo = templates[0];
+ String(foo.get(1).value).should.equal('this {{includes
a|template}}');
+ var more = foo.get(1).value.filterTemplates();
+ more.length.should.equal(1);
+ String(more[0].get(1).value).should.equal('template');
+ });
+ });
+ it('is easy to mutate templates', function() {
+ var text = "{{cleanup}} '''Foo''' is a [[bar]].
{{uncategorized}}";
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ pdoc.filterTemplates().forEach(function(template) {
+ if (template.matches('Cleanup') &&
!template.has('date')) {
+ template.add('date', 'July 2012');
+ }
+ if (template.matches('uncategorized')) {
+ template.name = 'bar-stub';
+ }
+ });
+ String(pdoc).should.equal("{{cleanup|date = July 2012}}
'''Foo''' is a [[bar]]. {{bar-stub}}");
+ });
+ });
+});
+
+describe('Further examples of PDoc API', function() {
+ it('is easy to mutate templates (2)', function() {
+ // Works even on nested templates!
+ var text = "{{echo|{{cleanup}} '''Foo''' is a [[bar]].}}
{{uncategorized}}";
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ pdoc.filterTemplates().forEach(function(template) {
+ if (template.matches('Cleanup') &&
!template.has('date')) {
+ template.add('date', 'July 2012');
+ // Works even when there are special
characters
+ template.add('test1',
'{{foo}}&bar|bat<p>');
+ template.add('test2',
Parsoid.PNodeList.fromHTML(pdoc, "I'm so <b>bold</b>!"));
+ }
+ });
+ String(pdoc).should.equal("{{echo|{{cleanup|date = July
2012|test1 = <nowiki>{{foo}}</nowiki>&bar{{!}}bat<nowiki><p></nowiki>|test2 =
I'm so '''bold'''!}} '''Foo''' is a [[bar]].}} {{uncategorized}}");
+ });
+ });
+ it('is safe to mutate template arguments', function() {
+ var text = "{{echo|foo|bar}}";
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ var t = pdoc.filterTemplates()[0];
+ t.remove(1);
+ String(pdoc).should.equal('{{echo||bar}}');
+ });
+ });
+ it('filters and mutates headings', function() {
+ var text = "= one =\n== two ==\n=== three ===\n==== four
====\nbody";
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ var headings = pdoc.filterHeadings();
+ headings.length.should.equal(4);
+ headings[0].level.should.equal(1);
+ headings[1].level.should.equal(2);
+ headings[2].level.should.equal(3);
+ headings[3].level.should.equal(4);
+ headings[0].title.toHtml().should.equal(' one ');
+ headings[1].title.toHtml().should.equal(' two ');
+ headings[2].title.toHtml().should.equal(' three ');
+ headings[3].title.toHtml().should.equal(' four ');
+ headings[0].title = '=0=';
+ headings[1].title = headings[2].title;
+ headings[3].level = 3;
+ String(pdoc).should.equal('=<nowiki>=0=</nowiki>=\n==
three ==\n=== three ===\n\n=== four ===\nbody\n');
+ });
+ });
+ it('filters and mutates headings inside templates', function() {
+ var text = "{{echo|1=\n= one =\n}}";
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ var headings = pdoc.filterHeadings();
+ headings.length.should.equal(1);
+ headings[0].level = 2;
+ String(headings[0]).should.equal('== one ==\n');
+ String(pdoc).should.equal('{{echo|1=\n== one ==\n}}');
+ headings[0].title = 'two';
+ String(headings[0]).should.equal('== two ==\n');
+ String(pdoc).should.equal('{{echo|1=\n== two ==\n}}');
+ });
+ });
+ it('filters and mutates external links', function() {
+ var text = "[http://example.com {{echo|link content}}]";
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ var extlinks = pdoc.filterExtLinks();
+ extlinks.length.should.equal(1);
+
String(extlinks[0].url).should.equal('http://example.com');
+ String(extlinks[0].title).should.equal('{{echo|link
content}}');
+ extlinks[0].title = ']';
+ String(pdoc).should.equal('[http://example.com
<nowiki>]</nowiki>]\n');
+ });
+ });
+ it('filters and mutates wiki links', function() {
+ var text = "[[foo|1]] {{echo|[[bar|2]]}} [[{{echo|bat}}|3]]";
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ var extlinks = pdoc.filterWikiLinks();
+ extlinks.length.should.equal(3);
+ String(extlinks[0].title).should.equal('Foo');
+ String(extlinks[0].text).should.equal('1');
+ String(extlinks[1].title).should.equal('Bar');
+ String(extlinks[1].text).should.equal('2');
+ String(extlinks[2].text).should.equal('3');
+ extlinks[0].title = extlinks[0].text = 'foobar';
+ extlinks[1].text = 'A';
+ extlinks[2].text = 'B';
+ String(pdoc).should.equal('[[foobar]]
{{echo|[[bar|A]]}} [[{{echo|bat}}|B]]\n');
+ });
+ });
+ it('filters and mutates html entities', function() {
+ var text = '&{{echo|"}}';
+ return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) {
+ var entities = pdoc.filterHtmlEntities();
+ entities.length.should.equal(2);
+ entities[0].normalized.should.equal('&');
+ entities[1].normalized.should.equal('"');
+ entities[0].normalized = '<';
+ entities[1].normalized = '>';
+ String(pdoc).should.equal('<{{echo|>}}\n');
+ });
+ });
+});
diff --git a/tests/parse.js b/tests/parse.js
index 23fb656..39ed839 100755
--- a/tests/parse.js
+++ b/tests/parse.js
@@ -148,7 +148,7 @@
}
return DU.serializeDOM(env, doc.body, argv.selser).then(function(out) {
if (argv.html2wt || argv.wt2wt) {
- return { trailingNL: true, out: out };
+ return { trailingNL: true, out: out, env: env };
} else {
return startsAtWikitext(argv, env, out);
}
@@ -172,7 +172,7 @@
} else {
out = DU.serializeNode(doc).str;
}
- return { trailingNL: true, out: out };
+ return { trailingNL: true, out: out, env: env };
} else {
return startsAtHTML(argv, env,
DU.serializeNode(doc).str);
}
--
To view, visit https://gerrit.wikimedia.org/r/226734
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I7aa14c2ef697d360cb0fcae48eb7bc308744a081
Gerrit-PatchSet: 10
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits