This particular `escapeHtml` implementation is limited to replacing single characters, but if you wanted to escape any characters that can be represented using a named character reference, you’re gonna need something more generic, as some named character references expand to multiple characters. That‘s what I was referring to earlier.
On Sun, May 20, 2018 at 12:27 PM, kai zhu <kaizhu...@gmail.com> wrote: > sorry, there was a bug in the standalone-solution i last posted. here’s > corrected version ^^;;; > > also highlighted in blue, the escapeHTML part of code relevant to this > discussion. and honestly, replacing those 6 blue-lines-of-code in this > real-world example, with the proposed map-replace doesn’t make much of a > difference in terms of overall readability/maintainability. > > ```js > /* > * example.js > * > * this zero-dependency, standalone program will render mustache-based > html-templates, > * with the given dictionary, and print it to stdout > * code derived from https://github.com/kaizhu256/ > node-utility2/blob/2018.1.13/lib.utility2.js#L5922 > > > > * example usage: > $ node example.js <template> <json-dictionary> > $ node example.js '<pre> > JSON.stringify("<b>hello world!</b>".toUpperCase()))= > {{hello.world toUpperCase jsonStringify}} > </pre> > > <ul> > {{#each myList}} > {{#if href}} > <li id="{{href encodeURIComponent}}"> > <a href="{{href}}"> > {{#if description}} > {{description notHtmlSafe}} > {{#unless description}} > no description > {{/if description}} > </a> > </li> > {{/if href}} > {{/each myList}} > </ul>' '{ > "hello": { > "world": "<b>hello world!</b>" > }, > "myList": [ > null, > { > "href": "https://www.example.com/1", > "description": "<b>click here!</b>" > }, > { > "href": "https://www.example.com/2" > } > ] > }' > > > > * example output: > <pre> > JSON.stringify("<b>hello world!</b>".toUpperCase()))= > "<B>HELLO WORLD!</B>" > </pre> > > <ul> > > <li id="https%3A%2F%2Fwww.example.com%2F1"> > <a href="https://www.example.com/1"> > > <b>click here!</b> > > </a> > </li> > > <li id="https%3A%2F%2Fwww.example.com%2F2"> > <a href="https://www.example.com/2"> > > no description > > </a> > </li> > > </ul> > */ > > > > > > > > /*jslint > node: true, > regexp: true > */ > 'use strict'; > var templateRender; > templateRender = function (template, dict, options) { > /* > * this function will render the template with the given dict > */ > var argList, getValue, match, renderPartial, rgx, tryCatch, skip, > value; > dict = dict || {}; > options = options || {}; > getValue = function (key) { > argList = key.split(' '); > value = dict; > if (argList[0] === '#this/') { > return; > } > // iteratively lookup nested values in the dict > argList[0].split('.').forEach(function (key) { > value = value && value[key]; > }); > return value; > }; > renderPartial = function (match0, helper, key, partial) { > switch (helper) { > case 'each': > case 'eachTrimRightComma': > value = getValue(key); > value = Array.isArray(value) > ? value.map(function (dict) { > // recurse with partial > return templateRender(partial, dict, options); > }).join('') > : ''; > // remove trailing-comma from last element > if (helper === 'eachTrimRightComma') { > value = value.trimRight().replace((/,$/), ''); > } > return value; > case 'if': > partial = partial.split('{{#unless ' + key + '}}'); > partial = getValue(key) > ? partial[0] > // handle 'unless' case > : partial.slice(1).join('{{#unless ' + key + '}}'); > // recurse with partial > return templateRender(partial, dict, options); > case 'unless': > return getValue(key) > ? '' > // recurse with partial > : templateRender(partial, dict, options); > default: > // recurse with partial > return match0[0] + templateRender(match0.slice(1), dict, > options); > } > }; > tryCatch = function (fnc, message) { > /* > * this function will prepend the message to errorCaught > */ > try { > return fnc(); > } catch (errorCaught) { > errorCaught.message = message + errorCaught.message; > throw errorCaught; > } > }; > // render partials > rgx = (/\{\{#(\w+) ([^}]+?)\}\}/g); > template = template || ''; > for (match = rgx.exec(template); match; match = rgx.exec(template)) { > rgx.lastIndex += 1 - match[0].length; > template = template.replace( > new RegExp('\\{\\{#(' + match[1] + ') (' + match[2] + > ')\\}\\}([\\S\\s]*?)\\{\\{/' + match[1] + ' ' + match[2] + > '\\}\\}'), > renderPartial > ); > } > // search for keys in the template > return template.replace((/\{\{[^}]+?\}\}/g), function (match0) { > var notHtmlSafe; > notHtmlSafe = options.notHtmlSafe; > return tryCatch(function () { > getValue(match0.slice(2, -2)); > if (value === undefined) { > return match0; > } > argList.slice(1).forEach(function (arg0, ii, list) { > switch (arg0) { > case 'alphanumeric': > value = value.replace((/\W/g), '_'); > break; > case 'decodeURIComponent': > value = decodeURIComponent(value); > break; > case 'encodeURIComponent': > value = encodeURIComponent(value); > break; > case 'jsonStringify': > value = JSON.stringify(value); > break; > case 'jsonStringify4': > value = JSON.stringify(value, null, 4); > break; > case 'notHtmlSafe': > notHtmlSafe = true; > break; > case 'truncate': > skip = ii + 1; > if (value.length > list[skip]) { > value = value.slice(0, list[skip] - 3).trimRight() > + '...'; > } > break; > // default to String.prototype[arg0]() > default: > if (ii === skip) { > break; > } > value = value[arg0](); > break; > } > }); > value = String(value); > // default to htmlSafe > if (!notHtmlSafe) { > value = value > .replace((/"/g), '"') > .replace((/&/g), '&') > .replace((/'/g), ''') > .replace((/</g), '<') > .replace((/>/g), '>') > .replace((/&(amp;|apos;|gt;|lt;|quot;)/ig), > '&$1'); > } > return value; > }, 'templateRender could not render expression ' + > JSON.stringify(match0) + '\n'); > }); > }; > > console.log(templateRender(process.argv[2], JSON.parse(process.argv[3]))); > ``` > > kai zhu > kaizhu...@gmail.com > > > > On 20 May 2018, at 10:01 PM, kai zhu <kaizhu...@gmail.com> wrote: > > @Kai > Have you ever tried writing an HTML template system on the front end? This > *will* almost inevitably come up, and most of my use cases for this is on > the front end itself handling various scenarios. > > > i have. if we want to move from toy-cases to real-world frontend-examples > [1] [2] [3], here's a zero-dependency, mustache-based template-system in > under 110 sloc, which i've been using for the past 5 years. and the trick > to simplify rendering-of-partials, is to recurse them inside string.replace > (see the red-highlighted sections of code). > > [1] standalone, static-function templateRender > https://github.com/kaizhu256/node-utility2/blob/2018.1.13/ > lib.utility2.js#L5922 > [2] test-cases showing capabilities of templateRender > https://github.com/kaizhu256/node-utility2/blob/2018.1.13/test.js#L1411 > [3] live website rendered using templateRender > https://kaizhu256.github.io/node-swgg-wechat-pay/build.. > beta..travis-ci.org/app/ > <https://kaizhu256.github.io/node-swgg-wechat-pay/build..beta..travis-ci.org/app/#!swgg_id__2Fpay_2Fcloseorder_20POST_1> > > <Screen Shot 2018-05-20 at 9.13.17 PM copy.jpg> > > ```js > /* > * example.js > * > * this zero-dependency, standalone program will render mustache-based > html-templates, > * with the given dictionary, and print it to stdout > * code derived from https://github.com/kaizhu256/node-utility2/blob/ > 2018.1.13/lib.utility2.js#L5922 > > > > * example usage: > $ node example.js <template> <json-dictionary> > $ node example.js '<pre> > JSON.stringify("<b>hello world!</b>".toUpperCase()))= > {{hello.world toUpperCase jsonStringify}} > </pre> > > <ul> > {{#each myList}} > {{#if href}} > <li id="{{href encodeURIComponent}}"> > <a href="{{href}}"> > {{#if description}} > {{description notHtmlSafe}} > {{#unless description}} > no description > {{/if description}} > </a> > </li> > {{/if href}} > {{/each myList}} > </ul>' '{ > "hello": { > "world": "<b>hello world!</b>" > }, > "myList": [ > null, > { > "href": "https://www.example.com/1", > "description": "<b>click here!</b>" > }, > { > "href": "https://www.example.com/2" > } > ] > }' > > > > * example output: > <pre> > JSON.stringify("<b>hello world!</b>".toUpperCase()))= > "<B>HELLO WORLD!</B>" > </pre> > > <ul> > > <li id="https%3A%2F%2Fwww.example.com <http://2fwww.example.com/>%2F1"> > <a href="https://www.example.com/1"> > > <b>click here!</b> > > </a> > </li> > > <li id="https%3A%2F%2Fwww.example.com <http://2fwww.example.com/>%2F2"> > <a href="https://www.example.com/2"> > > no description > > </a> > </li> > > </ul> > */ > > > > > > > > /*jslint > node: true, > regexp: true > */ > 'use strict'; > var templateRender; > templateRender = function (template, dict, notHtmlSafe) { > /* > * this function will render the template with the given dict > */ > var argList, getValue, match, renderPartial, rgx, skip, value; > dict = dict || {}; > getValue = function (key) { > argList = key.split(' '); > value = dict; > if (argList[0] === '#this/') { > return; > } > // iteratively lookup nested values in the dict > argList[0].split('.').forEach(function (key) { > value = value && value[key]; > }); > return value; > }; > renderPartial = function (match0, helper, key, partial) { > switch (helper) { > case 'each': > case 'eachTrimRightComma': > value = getValue(key); > value = Array.isArray(value) > ? value.map(function (dict) { > // recurse with partial > return templateRender(partial, dict, notHtmlSafe); > }).join('') > : ''; > // remove trailing-comma from last element > if (helper === 'eachTrimRightComma') { > value = value.trimRight().replace((/,$/), ''); > } > return value; > case 'if': > partial = partial.split('{{#unless ' + key + '}}'); > partial = getValue(key) > ? partial[0] > // handle 'unless' case > : partial.slice(1).join('{{#unless ' + key + '}}'); > // recurse with partial > return templateRender(partial, dict, notHtmlSafe); > case 'unless': > return getValue(key) > ? '' > // recurse with partial > : templateRender(partial, dict, notHtmlSafe); > default: > // recurse with partial > return match0[0] + templateRender(match0.slice(1), dict, > notHtmlSafe); > } > }; > // render partials > rgx = (/\{\{#(\w+) ([^}]+?)\}\}/g); > template = template || ''; > for (match = rgx.exec(template); match; match = rgx.exec(template)) { > rgx.lastIndex += 1 - match[0].length; > template = template.replace( > new RegExp('\\{\\{#(' + match[1] + ') (' + match[2] + > ')\\}\\}([\\S\\s]*?)\\{\\{/' + match[1] + ' ' + match[2] + > '\\}\\}'), > renderPartial > ); > } > // search for keys in the template > return template.replace((/\{\{[^}]+?\}\}/g), function (match0) { > getValue(match0.slice(2, -2)); > if (value === undefined) { > return match0; > } > argList.slice(1).forEach(function (arg0, ii, list) { > switch (arg0) { > case 'alphanumeric': > value = value.replace((/\W/g), '_'); > break; > case 'decodeURIComponent': > value = decodeURIComponent(value); > break; > case 'encodeURIComponent': > value = encodeURIComponent(value); > break; > case 'jsonStringify': > value = JSON.stringify(value); > break; > case 'jsonStringify4': > value = JSON.stringify(value, null, 4); > break; > case 'notHtmlSafe': > notHtmlSafe = true; > break; > case 'truncate': > skip = ii + 1; > if (value.length > list[skip]) { > value = value.slice(0, list[skip] - 3).trimRight() + > '...'; > } > break; > // default to String.prototype[arg0]() > default: > if (ii === skip) { > break; > } > value = value[arg0](); > break; > } > }); > value = String(value); > // default to htmlSafe > if (!notHtmlSafe) { > value = value > .replace((/"/g), '"') > .replace((/&/g), '&') > .replace((/'/g), ''') > .replace((/</g), '<') > .replace((/>/g), '>') > .replace((/&(amp;|apos;|gt;|lt;|quot;)/ig), '&$1'); > } > return value; > }); > }; > > console.log(templateRender(process.argv[2], JSON.parse(process.argv[3]))); > ``` > > > > kai zhu > kaizhu...@gmail.com > > > > On 20 May 2018, at 6:32 PM, Isiah Meadows <isiahmead...@gmail.com> wrote: > > @Mathias > > My partcular `escapeHTML` example *could* be written like that (and it > *is* somewhat in the prose). But you're right that in the prose, I did > bring up the potential for things like `str.replace({cheese: "cake", ham: > "eggs"})`. > > @Kai > > Have you ever tried writing an HTML template system on the front end? This > *will* almost inevitably come up, and most of my use cases for this is on > the front end itself handling various scenarios. > > @Cyril > > And every single one of those patterns is going to need compiled and > executed, and compiling and interpreting regular expressions is definitely > not quick, especially when you can nest Kleene stars. (See: > https://en.wikipedia.org/wiki/Regular_expression# > Implementations_and_running_times) That's why I'm against it - we don't > need to complicate this proposal with that mess. > > ----- > > Isiah Meadows > m...@isiahmeadows.com > www.isiahmeadows.com > > On Sat, May 19, 2018 at 7:04 PM, Mathias Bynens <math...@qiwi.be> wrote: > >> Hey Kai, you’re oversimplifying. Your solution works for a single Unicode >> symbol (corresponding to a single code point) but falls apart as soon as >> you need to match multiple symbols of possibly varying length, like in the >> `escapeHtml` example. >> >> On Sat, May 19, 2018 at 8:43 AM, kai zhu <kaizhu...@gmail.com> wrote: >> >>> again, you backend-engineers are making something more complicated than >>> needs be, when simple, throwaway glue-code will suffice. agree with >>> jordan, this feature is a needless cross-cut of String.prototype.replace. >>> >>> ``` >>> /*jslint >>> node: true >>> */ >>> 'use strict'; >>> var dict; >>> dict = { >>> '$': '^', >>> '1': '2', >>> '<': '<', >>> '🍌': '🍑', >>> '-': '_', >>> ']': '@' >>> }; >>> // output: "test🍐🍑_^^[22@ <foo>" >>> console.log('test🍐🍌-$$[11] <foo>'.replace((/[\S\s]/gu), function >>> (character) { >>> return dict.hasOwnProperty(character) >>> ? dict[character] >>> : character; >>> })); >>> ``` >>> >>> kai zhu >>> kaizhu...@gmail.com >>> >>> >>> >>> On 19 May 2018, at 4:08 PM, Cyril Auburtin <cyril.aubur...@gmail.com> >>> wrote: >>> >>> You can also have a >>> >>> ```js >>> var replacer = replacements => { >>> const re = new RegExp(replacements.map(([k,_,escaped=k]) => >>> escaped).join('|'), 'gu'); >>> const replaceMap = new Map(replacements); >>> return s => s.replace(re, w => replaceMap.get(w)); >>> } >>> var replace = replacer([['$', '^', String.raw`\$`], ['1', '2'], ['<', >>> '<'], ['🍌', '🍑'], ['-', '_'], [']', '@', String.raw`\]`]]); >>> replace('test🍐🍌-$$[11] <foo>') // "test🍐🍑_^^[22@ <foo>" >>> ``` >>> but it's quickly messy to work with escaping >>> >>> Le sam. 19 mai 2018 à 08:17, Isiah Meadows <isiahmead...@gmail.com> a >>> écrit : >>> >>>> Here's what I'd prefer instead: overload `String.prototype.replace` to >>>> take non-callable objects, as sugar for this: >>>> >>>> ```js >>>> const old = Function.call.bind(Function.ca <http://function.ca/>ll, >>>> String.prototype.replace) >>>> String.prototype.replace = function (regexp, object) { >>>> if (object == null && regexp != null && typeof regexp === "object") >>>> { >>>> const re = new RegExp( >>>> Object.keys(regexp) >>>> .map(key => `${old(key, /[\\^$*+?.()|[\]{}]/g, '\\$&')}`) >>>> .join("|") >>>> ) >>>> return old(this, re, m => object[m]) >>>> } else { >>>> return old(this, regexp, object) >>>> } >>>> } >>>> ``` >>>> >>>> This would cover about 99% of my use for something like this, with >>>> less runtime overhead (that of not needing to check for and >>>> potentially match multiple regular expressions at runtime) and better >>>> static analyzability (you only need to check it's an object literal or >>>> constant frozen object, not that it's argument is the result of the >>>> built-in `Map` call). It's exceptionally difficult to optimize for >>>> this unless you know everything's a string, but most cases where I had >>>> to pass a callback that wasn't super complex looked a lot like this: >>>> >>>> ```js >>>> // What I use: >>>> function escapeHTML(str) { >>>> return str.replace(/["'&<>]/g, m => { >>>> switch (m) { >>>> case '"': return """ >>>> case "'": return "'" >>>> case "&": return "&" >>>> case "<": return "<" >>>> case ">": return ">" >>>> default: throw new TypeError("unreachable") >>>> } >>>> }) >>>> } >>>> >>>> // What it could be >>>> function escapeHTML(str) { >>>> return str.replace({ >>>> '"': """, >>>> "'": "'", >>>> "&": "&", >>>> "<": "<", >>>> ">": ">", >>>> }) >>>> } >>>> ``` >>>> >>>> And yes, this enables optimizations engines couldn't easily produce >>>> otherwise. In this instance, an engine could find that the object is >>>> static with only single-character entries, and it could replace the >>>> call to a fast-path one that relies on a cheap lookup table instead >>>> (Unicode replacement would be similar, except you'd need an extra >>>> layer of indirection with astrals to avoid blowing up memory when >>>> generating these tables): >>>> >>>> ```js >>>> // Original >>>> function escapeHTML(str) { >>>> return str.replace({ >>>> '"': """, >>>> "'": "'", >>>> "&": "&", >>>> "<": "<", >>>> ">": ">", >>>> }) >>>> } >>>> >>>> // Not real JS, but think of it as how an engine might implement this. >>>> The >>>> // implementation of the runtime function `ReplaceWithLookupTable` is >>>> omitted >>>> // for brevity, but you could imagine how it could be implemented, >>>> given the >>>> // pseudo-TS signature: >>>> // >>>> // ```ts >>>> // declare function %ReplaceWithLookupTable( >>>> // str: string, >>>> // table: string[] >>>> // ): string >>>> // ``` >>>> function escapeHTML(str) { >>>> static { >>>> // A zero-initialized array with 2^16 entries (U+0000-U+FFFF), >>>> except >>>> // for the object's members. This takes up to about 70K per >>>> instance, >>>> // but these are *far* more often called than created. >>>> const _lookup_escapeHTML = %calloc(65536) >>>> >>>> _lookup_escapeHTML[34] = """ >>>> _lookup_escapeHTML[38] = "&" >>>> _lookup_escapeHTML[39] = "'" >>>> _lookup_escapeHTML[60] = ">" >>>> _lookup_escapeHTML[62] = "<" >>>> } >>>> >>>> return %ReplaceWithLookupTable(str, _lookup_escapeHTML) >>>> } >>>> ``` >>>> >>>> Likewise, similar, but more restrained, optimizations could be >>>> performed on objects with multibyte strings, since they can be reduced >>>> to a simple search trie. (These can be built in even the general case >>>> if the strings are large enough to merit it - small ropes are pretty >>>> cheap to create.) >>>> >>>> For what it's worth, there's precedent here in Ruby, which has support >>>> for `Hash`es as `String#gsub` parameters which work similarly. >>>> >>>> ----- >>>> >>>> Isiah Meadows >>>> m...@isiahmeadows.com >>>> www.isiahmeadows.com >>>> >>>> >>>> On Fri, May 18, 2018 at 1:01 PM, Logan Smyth <loganfsm...@gmail.com> >>>> wrote: >>>> >> It wouldn't necessarily break existing API, since >>>> String.prototype.replace >>>> >> currently accepts only RegExp or strings. >>>> > >>>> > Not quite accurate. It accepts anything with a `Symbol.replace` >>>> property, or >>>> > a string. >>>> > >>>> > Given that, what you're describing can be implemented as >>>> > ``` >>>> > Map.prototype[Symbol.replace] = function(str) { >>>> > for(const [key, value] of this) { >>>> > str = str.replace(key, value); >>>> > } >>>> > return str; >>>> > }; >>>> > ``` >>>> > >>>> >> I don't know if the ECMAScript spec mandates preserving a particular >>>> order >>>> >> to a Map's elements. >>>> > >>>> > It does, so you're good there. >>>> > >>>> >> Detecting collisions between matching regular expressions or strings. >>>> > >>>> > I think this would be my primary concern, but no so much ordering as >>>> > expectations. Like if you did >>>> > ``` >>>> > "1".replace(new Map([ >>>> > ['1', '2'], >>>> > ['2', '3], >>>> > ]); >>>> > ``` >>>> > is the result `2` or `3`? `3` seems surprising to me, at least in the >>>> > general sense, because there was no `2` in the original input, but >>>> it's also >>>> > hard to see how you'd spec the behavior to avoid that if general regex >>>> > replacement is supported. >>>> > >>>> > On Fri, May 18, 2018 at 9:47 AM, Alex Vincent <ajvinc...@gmail.com> >>>> wrote: >>>> >> >>>> >> Reading [1] in the digests, I think there might actually be an API >>>> >> improvement that is doable. >>>> >> >>>> >> Suppose the String.prototype.replace API allowed passing in a single >>>> >> argument, a Map instance where the keys were strings or regular >>>> expressions >>>> >> and the values were replacement strings or functions. >>>> >> >>>> >> Advantages: >>>> >> * Shorthand - instead of writing str.replace(a, b).replace(c, >>>> >> d).replace(e, f)... you get str.replace(regExpMap) >>>> >> * Reusable - the same regular expression/string map could be used for >>>> >> several strings (assuming of course the user didn't just abstract >>>> the call >>>> >> into a separate function) >>>> >> * Modifiable on demand - developers could easily add new regular >>>> >> expression matches to the map object, or remove them >>>> >> * It wouldn't necessarily break existing API, since >>>> >> String.prototype.replace currently accepts only RegExp or strings. >>>> >> >>>> >> Disadvantages / reasons not to do it: >>>> >> * Detecting collisions between matching regular expressions or >>>> strings. >>>> >> If two regular expressions match the same string, or a regular >>>> expression >>>> >> and a search string match, the expected results may vary because a >>>> Map's >>>> >> elements might not be consistently ordered. I don't know if the >>>> ECMAScript >>>> >> spec mandates preserving a particular order to a Map's elements. >>>> >> - if we preserve the same chaining capability >>>> >> (str.replace(map1).replace(map2)...), this might not be a big >>>> problem. >>>> >> >>>> >> The question is, how often do people chain replace calls together? >>>> >> >>>> >> * It's not particularly hard to chain several replace calls together. >>>> >> It's just verbose, which might not be a high enough burden to >>>> overcome for >>>> >> adding API. >>>> >> >>>> >> That's my two cents for the day. Thoughts? >>>> >> >>>> >> [1] https://esdiscuss.org/topic/adding-map-directly-to-string-pr >>>> ototype >>>> >> >>>> >> -- >>>> >> "The first step in confirming there is a bug in someone else's work >>>> is >>>> >> confirming there are no bugs in your own." >>>> >> -- Alexander J. Vincent, June 30, 2001 >>>> >> >>>> >> _______________________________________________ >>>> >> es-discuss mailing list >>>> >> es-discuss@mozilla.org >>>> >> https://mail.mozilla.org/listinfo/es-discuss >>>> >> >>>> > >>>> > >>>> > _______________________________________________ >>>> > es-discuss mailing list >>>> > es-discuss@mozilla.org >>>> > https://mail.mozilla.org/listinfo/es-discuss >>>> > >>>> _______________________________________________ >>>> es-discuss mailing list >>>> es-discuss@mozilla.org >>>> https://mail.mozilla.org/listinfo/es-discuss >>>> >>> _______________________________________________ >>> es-discuss mailing list >>> es-discuss@mozilla.org >>> https://mail.mozilla.org/listinfo/es-discuss >>> >>> >>> >>> _______________________________________________ >>> es-discuss mailing list >>> es-discuss@mozilla.org >>> https://mail.mozilla.org/listinfo/es-discuss >>> >>> >> >> _______________________________________________ >> es-discuss mailing list >> es-discuss@mozilla.org >> https://mail.mozilla.org/listinfo/es-discuss >> >> > > >
_______________________________________________ es-discuss mailing list es-discuss@mozilla.org https://mail.mozilla.org/listinfo/es-discuss