Cscott has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/226734

Change subject: Build a mutation interface similar to mwparserfromhell.
......................................................................

Build a mutation interface similar to mwparserfromhell.

Change-Id: I7aa14c2ef697d360cb0fcae48eb7bc308744a081
---
A USAGE.md
M lib/index.js
M tests/mocha/jsapi.js
M tests/parse.js
4 files changed, 219 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/34/226734/1

diff --git a/USAGE.md b/USAGE.md
new file mode 100644
index 0000000..da71dd1
--- /dev/null
+++ b/USAGE.md
@@ -0,0 +1,71 @@
+Usage
+=====
+
+This file describes usage of Parsoid as a standalone wikitext parsing
+package, in the spirit of `[mwparserfromhell]`.  This is not the typical
+use case for Parsoid; it is more often used as a network service.
+See [Parsoid service] for more details.
+
+These examples will use the `[prfun]` library and [ES6 generators] in
+order to fluently express asynchronous operations.  The library also
+exports vanilla `[Promise]`s if you wish to maintain compatibility
+with old versions of `node` at the cost of a little bit of readability.
+
+Use as a wikitext parser is straightforward (where `text` is
+wikitext input):
+```js
+#/usr/bin/node --harmony-generators
+var Promise = require('prfun');
+var Parsoid = require('parsoid');
+
+var main = Promise.async(function*() {
+    var text = "I love wikitext!";
+    var prdom = yield Parsoid.parse(text, { prdom: true });
+    console.log(prdom.document.outerHTML);
+});
+
+// start me up!
+main().done();
+```
+
+As you can see, there is a little bit of boilerplate needed to get the
+asynchronous machinery started.  Future code examples will be assumed
+to be the body of the `main()` method above.
+
+The `prdom` object above is a `Parsoid.PrDOM` object, which has
+helpful methods to filter and manipulate the document.  If you want
+to access the raw [Parsoid DOM], however, it is easily accessible
+via the `document` property, as shown above, and all normal DOM
+manipulation functions can be used on it (Parsoid uses `[domino]`
+to implement these methods).  The extra methods provided by
+`PrDOM` can be quite useful, however.  For example:
+
+```js
+> var text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?\n";
+> var prdom = yield Parsoid.parse(text, { prdom: true });
+> console.log(String(prdom));
+I has a template! {{foo|bar|baz|eggs=spam}} See it?
+> var templates = prdom.filterTemplates();
+> console.log(templates.map(String));
+[ '{{foo|bar|baz|eggs=spam}}' ]
+> var template = templates[0];
+> console.log(template.name);
+foo
+> template.name = 'notfoo';
+> console.log(String(template));
+{{notfoo|bar|baz|eggs=spam}}
+> console.log(template.params);
+[ '1', '2', 'eggs' ]
+> console.log(template.get(1).value);
+bar
+> console.log(template.get("eggs").value);
+spam
+```
+
+
+[mwparserfromhell]: 
http://mwparserfromhell.readthedocs.org/en/latest/index.html
+[Parsoid service]: https://www.mediawiki.org/wiki/Parsoid
+[prfun]: https://github.com/cscott/prfun
+[Promise]: 
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
+[Parsoid DOM]: http://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec
+[domino]: https://www.npmjs.com/package/domino
diff --git a/lib/index.js b/lib/index.js
index 516fc31..17d60de 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -15,10 +15,121 @@
 var json = require('../package.json');
 var parseJs = require('../tests/parse.js');
 var ParsoidConfig = require('../lib/mediawiki.ParsoidConfig.js').ParsoidConfig;
+var WikitextSerializer = 
require('../lib/mediawiki.WikitextSerializer.js').WikitextSerializer;
+var DU = require('../lib/mediawiki.DOMUtils.js').DOMUtils;
+var util = require('util');
+
+// WTS helper
+var wts = function(env, node) {
+       // XXX: Serializing to wikitext is very user-friendly, but it depends on
+       // WTS.serializeDOMSync which we might not want to keep around forever.
+       // An alternative would be:
+       //    return DU.normalizeOut(node, 'parsoidOnly');
+       // which might be almost as friendly.
+       if (!DU.isBody(node)) {
+               var nbody = node.ownerDocument.createElement('body');
+               nbody.appendChild(node.cloneNode(true));
+               node = nbody;
+       }
+       return (new WikitextSerializer({ env: env })).serializeDOMSync(node);
+};
 
 var Parsoid = module.exports = {
        name: json.name, // package name
        version: json.version, // npm version #
+};
+
+// Wrappers in the style of mwparserfromhell
+
+var PrNode = function PrNode(prdom, node) {
+       this.prdom = prdom;
+       this.node = node;
+};
+Object.defineProperties(PrNode.prototype, {
+       attr: {
+               value: function(a) { return this.node.getAttribute(a); }
+       },
+       dataMw: {
+               get: function() { return DU.getDataMw(this.node); },
+               set: function(v) { return DU.storeDataMw(this.node, v); }
+       },
+       toString: { value: function() { return wts(this.prdom.env, this.node); 
} }
+});
+
+var PrTemplate = function PrTemplate(prdom, node, which) {
+       PrNode.call(this, prdom, node);
+       this.which = which;
+};
+util.inherits(PrTemplate, PrNode);
+Object.defineProperties(PrTemplate.prototype, {
+       toString: {
+               value: function() {
+                       // Templates are actually a collection of nodes.
+                       var nbody = 
this.node.ownerDocument.createElement('body');
+                       var nodes = this.node.ownerDocument.querySelectorAll
+                               ('[about="' + this.attr('about') + '"]');
+                       for (var i = 0; i < nodes.length; i++) {
+                               nbody.appendChild(nodes[i].cloneNode(true));
+                       }
+                       return wts(this.prdom.env, nbody);
+               }
+       },
+       _template: {
+               get: function() {
+                       return this.dataMw.parts[this.which];
+               },
+               set: function(v) {
+                       var dmw = this.dataMw;
+                       dmw.parts[this.which] = v;
+                       this.dataMw = dmw;
+               }
+       },
+       name: {
+               get: function() {
+                       return this._template.template.target.wt;
+               },
+               set: function(v) {
+                       var t = this._template;
+                       t.template.target.wt = v;
+                       t.template.target.href = './Template:' + v;
+                       this._template = t;
+               }
+       },
+       params: {
+               get: function() {
+                       return 
Object.keys(this._template.template.params).sort();
+               }
+       },
+       get: {
+               value: function(k) {
+                       var d = this._template.template.params[k];
+                       return { key: k, value: d.wt, orig: d.key && d.key.wt };
+               }
+       }
+});
+
+var PrDOM = function PrDOM(env, doc) {
+       this.env = env;
+       this.document = doc;
+};
+PrDOM.prototype.filterTemplates = function() {
+       var prdom = this;
+       var result = [];
+       Array.from(
+               prdom.document.querySelectorAll('[typeof~="mw:Transclusion"]')
+       ).forEach(function(node) {
+               // each Transclusion node could represent multiple templates.
+               var dataMw = DU.getDataMw(node);
+               dataMw.parts.forEach(function(part, i) {
+                       if (part.template) {
+                               result.push(new PrTemplate(prdom, node, i));
+                       }
+               });
+       });
+       return result;
+};
+PrDOM.prototype.toString = function() {
+       return wts(this.env, this.document.body);
 };
 
 // Sample usage:
@@ -29,6 +140,10 @@
        options = options || {};
        var argv = Object.create(parseJs.defaultOptions);
        Object.keys(options).forEach(function(k) { argv[k] = options[k]; });
+
+       if (argv.prdom) {
+               argv.document = true;
+       }
 
        if (argv.selser) {
                argv.html2wt = true;
@@ -47,5 +162,7 @@
 
        var parsoidConfig = options.parsoidConfig ||
                        new ParsoidConfig(options.config || null, { 
defaultWiki: prefix });
-       return parseJs.parse(input || '', argv, parsoidConfig, 
prefix).nodify(optCb);
+       return parseJs.parse(input || '', argv, parsoidConfig, 
prefix).then(function(res) {
+               return argv.prdom ? new PrDOM(res.env, res.out) : res;
+       }).nodify(optCb);
 };
diff --git a/tests/mocha/jsapi.js b/tests/mocha/jsapi.js
index 8836f5c..e5ce77a 100644
--- a/tests/mocha/jsapi.js
+++ b/tests/mocha/jsapi.js
@@ -13,3 +13,31 @@
                });
        });
 });
+
+describe('Examples from USAGE.md', function() {
+       it('converts simple wikitext to HTML', function() {
+               return Parsoid.parse('I love wikitext!', { prdom: 
true}).then(function(prdom) {
+                       prdom.should.have.property('document');
+                       prdom.document.should.have.property('outerHTML');
+               });
+       });
+       it('filters out templates', function() {
+               var text = "I has a template! {{foo|bar|baz|eggs=spam}} See 
it?\n";
+               return Parsoid.parse(text, { prdom: true 
}).then(function(prdom) {
+                       String(prdom).should.equal(text);
+                       var templates = prdom.filterTemplates();
+                       templates.length.should.equal(1);
+                       
String(templates[0]).should.equal('{{foo|bar|baz|eggs=spam}}');
+                       var template = templates[0];
+                       template.name.should.equal('foo');
+                       template.name = 'notfoo';
+                       
String(template).should.equal('{{notfoo|bar|baz|eggs=spam}}');
+                       template.params.length.should.equal(3);
+                       template.params[0].should.equal('1');
+                       template.params[1].should.equal('2');
+                       template.params[2].should.equal('eggs');
+                       template.get(1).value.should.equal('bar');
+                       template.get('eggs').value.should.equal('spam');
+               });
+       });
+});
diff --git a/tests/parse.js b/tests/parse.js
index 32f17b5..b57ca59 100755
--- a/tests/parse.js
+++ b/tests/parse.js
@@ -148,7 +148,7 @@
        }
        return DU.serializeDOM(env, doc.body, argv.selser).then(function(out) {
                if (argv.html2wt || argv.wt2wt) {
-                       return { trailingNL: true, out: out };
+                       return { trailingNL: true, out: out, env: env };
                } else {
                        return startsAtWikitext(argv, env, out);
                }
@@ -172,7 +172,7 @@
                        } else {
                                out = DU.serializeNode(doc).str;
                        }
-                       return { trailingNL: true, out: out };
+                       return { trailingNL: true, out: out, env: env };
                } else {
                        return startsAtHTML(argv, env, 
DU.serializeNode(doc).str);
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/226734
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7aa14c2ef697d360cb0fcae48eb7bc308744a081
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to