Re: Overload str.replace to take a Map?

Isiah Meadows Sun, 20 May 2018 20:50:55 -0700

Next challenge: how does it compare to these two?

```js
// Simplified version
function simpleEscape(text) {
  return text.replace(/<(?:\/?script)?|&lt;|>|&gt;|🍐🍌/gu, m => {
    switch (m) {
    case '<': return '[lt]',
    case '&lt;': return '[lt]',
    case '>': return '[gt]',
    case '&gt;': return '[gt]',
    case '<script': return '[lt]noscript',
    case '</script': return '[lt]/noscript',
    default: return '🍐🍑'
    }
  });
}


// Direct proposal equivalent
var replacements = {
  '<': '[lt]',
  '&lt;': '[lt]',
  '>': '[gt]',
  '&gt;': '[gt]',
  '<script': '[lt]noscript',
  '</script': '[lt]/noscript',
  '🍐🍌': '🍐🍑'
}
function objectEscape(text) {
  return text.replace(/<(?:\/?script)?|&[lg]t;|>|🍐🍌/gu, m => replacements[m]);
}
```

Oh, and with my proposal, your glue code could be simplified to this:

```js
var text = '<script>evilFunction("🍐🍌🍐🍌")&lt;/script>'
text = text.replace({
  '<': '[lt]',
  '&lt;': '[lt]',
  '>': '[gt]',
  '&gt': '[gt]',
  '<script': '[lt]noscript',
  '</script': '[lt]/noscript',
  '🍐🍌': '🍐🍑'
});
// output: [lt]noscript[gt]evilFunction("🍐🍑🍐🍑")[lt]/noscript[gt]
```

And BTW, my two main justifications are that 1. I don't want to have
to escape simple stuff like this, and 2. I'd like the engine to lower
this into a fast, simple replace loop without having to compile a
regular expression. (Also, my proposal here is the simplest among
them.)

-----

Isiah Meadows
[email protected]
www.isiahmeadows.com

On Sun, May 20, 2018 at 3:40 PM, kai zhu <[email protected]> wrote:
>
> hi mathias, i see...  here's some simple, throwaway glue-code that does what 
> i think you want.
>
> ```js
> /*jslint
> node: true
> */
> 'use strict';
> var text;
> text = '<script>evilFunction("")&lt;/script>';
> [
>     ['<', '[lt]'],
>     ['&lt;', '[lt]'],
>     ['>', '[gt]'],
>     ['&gt', '[gt]'],
>     ['[lt]script', '[lt]noscript'],
>     ['[lt]/script', '[lt]/noscript'],
>     ['', '']
> ].forEach(function (element) {
>     text = text.replace(
>         // 
> https://stackoverflow.com/questions/3561493/is-there-a-regexp-escape-function-in-javascript
>         new RegExp(element[0].replace(/[\-\/\\\^$*+?.()|\[\]{}]/g, '\\$&'), 
> 'gu'),
>         element[1]
>     );
> });
> // output: [lt]noscript[gt]evilFunction("")[lt]/noscript[gt]
> console.log(text);
> ```
>
> kai zhu
> [email protected]
>
>
>
> On 21 May 2018, at 1:38 AM, Mathias Bynens <[email protected]> wrote:
>
> This particular `escapeHtml` implementation is limited to replacing single 
> characters, but if you wanted to escape any characters that can be 
> represented using a named character reference, you’re gonna need something 
> more generic, as some named character references expand to multiple 
> characters. That‘s what I was referring to earlier.
>
> On Sun, May 20, 2018 at 12:27 PM, kai zhu <[email protected]> wrote:
>>
>> sorry, there was a bug in the standalone-solution i last posted. here’s 
>> corrected version ^^;;;
>>
>> also highlighted in blue, the escapeHTML part of code relevant to this 
>> discussion.  and honestly, replacing those 6 blue-lines-of-code in this 
>> real-world example, with the proposed map-replace doesn’t make much of a 
>> difference in terms of overall readability/maintainability.
>>
>> ```js
>> /*
>>  * example.js
>>  *
>>  * this zero-dependency, standalone program will render mustache-based 
>> html-templates,
>>  * with the given dictionary, and print it to stdout
>>  * code derived from 
>> https://github.com/kaizhu256/node-utility2/blob/2018.1.13/lib.utility2.js#L5922
>>
>>
>>
>>  * example usage:
>> $ node example.js <template> <json-dictionary>
>> $ node example.js '<pre>
>> JSON.stringify("<b>hello world!</b>".toUpperCase()))=
>> {{hello.world toUpperCase jsonStringify}}
>> </pre>
>>
>> <ul>
>> {{#each myList}}
>> {{#if href}}
>> <li id="{{href encodeURIComponent}}">
>>     <a href="{{href}}">
>>     {{#if description}}
>>     {{description notHtmlSafe}}
>>     {{#unless description}}
>>     no description
>>     {{/if description}}
>>     </a>
>> </li>
>> {{/if href}}
>> {{/each myList}}
>> </ul>' '{
>>     "hello": {
>>         "world": "<b>hello world!</b>"
>>     },
>>     "myList": [
>>         null,
>>         {
>>             "href": "https://www.example.com/1";,
>>             "description": "<b>click here!</b>"
>>         },
>>         {
>>             "href": "https://www.example.com/2";
>>         }
>>     ]
>> }'
>>
>>
>>
>>  * example output:
>> <pre>
>> JSON.stringify("<b>hello world!</b>".toUpperCase()))=
>> &quot;&lt;B&gt;HELLO WORLD!&lt;/B&gt;&quot;
>> </pre>
>>
>> <ul>
>>
>> <li id="https%3A%2F%2Fwww.example.com%2F1">
>>     <a href="https://www.example.com/1";>
>>
>>     <b>click here!</b>
>>
>>     </a>
>> </li>
>>
>> <li id="https%3A%2F%2Fwww.example.com%2F2">
>>     <a href="https://www.example.com/2";>
>>
>>     no description
>>
>>     </a>
>> </li>
>>
>> </ul>
>>  */
>>
>>
>>
>>
>>
>>
>>
>> /*jslint
>>     node: true,
>>     regexp: true
>> */
>> 'use strict';
>> var templateRender;
>> templateRender = function (template, dict, options) {
>> /*
>>  * this function will render the template with the given dict
>>  */
>>     var argList, getValue, match, renderPartial, rgx, tryCatch, skip, value;
>>     dict = dict || {};
>>     options = options || {};
>>     getValue = function (key) {
>>         argList = key.split(' ');
>>         value = dict;
>>         if (argList[0] === '#this/') {
>>             return;
>>         }
>>         // iteratively lookup nested values in the dict
>>         argList[0].split('.').forEach(function (key) {
>>             value = value && value[key];
>>         });
>>         return value;
>>     };
>>     renderPartial = function (match0, helper, key, partial) {
>>         switch (helper) {
>>         case 'each':
>>         case 'eachTrimRightComma':
>>             value = getValue(key);
>>             value = Array.isArray(value)
>>                 ? value.map(function (dict) {
>>                     // recurse with partial
>>                     return templateRender(partial, dict, options);
>>                 }).join('')
>>                 : '';
>>             // remove trailing-comma from last element
>>             if (helper === 'eachTrimRightComma') {
>>                 value = value.trimRight().replace((/,$/), '');
>>             }
>>             return value;
>>         case 'if':
>>             partial = partial.split('{{#unless ' + key + '}}');
>>             partial = getValue(key)
>>                 ? partial[0]
>>                 // handle 'unless' case
>>                 : partial.slice(1).join('{{#unless ' + key + '}}');
>>             // recurse with partial
>>             return templateRender(partial, dict, options);
>>         case 'unless':
>>             return getValue(key)
>>                 ? ''
>>                 // recurse with partial
>>                 : templateRender(partial, dict, options);
>>         default:
>>             // recurse with partial
>>             return match0[0] + templateRender(match0.slice(1), dict, 
>> options);
>>         }
>>     };
>>     tryCatch = function (fnc, message) {
>>     /*
>>      * this function will prepend the message to errorCaught
>>      */
>>         try {
>>             return fnc();
>>         } catch (errorCaught) {
>>             errorCaught.message = message + errorCaught.message;
>>             throw errorCaught;
>>         }
>>     };
>>     // render partials
>>     rgx = (/\{\{#(\w+) ([^}]+?)\}\}/g);
>>     template = template || '';
>>     for (match = rgx.exec(template); match; match = rgx.exec(template)) {
>>         rgx.lastIndex += 1 - match[0].length;
>>         template = template.replace(
>>             new RegExp('\\{\\{#(' + match[1] + ') (' + match[2] +
>>                 ')\\}\\}([\\S\\s]*?)\\{\\{/' + match[1] + ' ' + match[2] +
>>                 '\\}\\}'),
>>             renderPartial
>>         );
>>     }
>>     // search for keys in the template
>>     return template.replace((/\{\{[^}]+?\}\}/g), function (match0) {
>>         var notHtmlSafe;
>>         notHtmlSafe = options.notHtmlSafe;
>>         return tryCatch(function () {
>>             getValue(match0.slice(2, -2));
>>             if (value === undefined) {
>>                 return match0;
>>             }
>>             argList.slice(1).forEach(function (arg0, ii, list) {
>>                 switch (arg0) {
>>                 case 'alphanumeric':
>>                     value = value.replace((/\W/g), '_');
>>                     break;
>>                 case 'decodeURIComponent':
>>                     value = decodeURIComponent(value);
>>                     break;
>>                 case 'encodeURIComponent':
>>                     value = encodeURIComponent(value);
>>                     break;
>>                 case 'jsonStringify':
>>                     value = JSON.stringify(value);
>>                     break;
>>                 case 'jsonStringify4':
>>                     value = JSON.stringify(value, null, 4);
>>                     break;
>>                 case 'notHtmlSafe':
>>                     notHtmlSafe = true;
>>                     break;
>>                 case 'truncate':
>>                     skip = ii + 1;
>>                     if (value.length > list[skip]) {
>>                         value = value.slice(0, list[skip] - 3).trimRight() + 
>> '...';
>>                     }
>>                     break;
>>                 // default to String.prototype[arg0]()
>>                 default:
>>                     if (ii === skip) {
>>                         break;
>>                     }
>>                     value = value[arg0]();
>>                     break;
>>                 }
>>             });
>>             value = String(value);
>>             // default to htmlSafe
>>             if (!notHtmlSafe) {
>>                 value = value
>>                     .replace((/"/g), '&quot;')
>>                     .replace((/&/g), '&amp;')
>>                     .replace((/'/g), '&apos;')
>>                     .replace((/</g), '&lt;')
>>                     .replace((/>/g), '&gt;')
>>                     .replace((/&amp;(amp;|apos;|gt;|lt;|quot;)/ig), '&$1');
>>             }
>>             return value;
>>         }, 'templateRender could not render expression ' + 
>> JSON.stringify(match0) + '\n');
>>     });
>> };
>>
>> console.log(templateRender(process.argv[2], JSON.parse(process.argv[3])));
>> ```
>>
>> kai zhu
>> [email protected]
>>
>>
>>
>> On 20 May 2018, at 10:01 PM, kai zhu <[email protected]> wrote:
>>
>> @Kai
>> Have you ever tried writing an HTML template system on the front end? This 
>> *will* almost inevitably come up, and most of my use cases for this is on 
>> the front end itself handling various scenarios.
>>
>>
>> i have.  if we want to move from toy-cases to real-world frontend-examples 
>> [1] [2] [3], here's a zero-dependency, mustache-based template-system in 
>> under 110 sloc, which i've been using for the past 5 years.  and the trick 
>> to simplify rendering-of-partials, is to recurse them inside string.replace 
>> (see the red-highlighted sections of code).
>>
>> [1] standalone, static-function templateRender
>> https://github.com/kaizhu256/node-utility2/blob/2018.1.13/lib.utility2.js#L5922
>> [2] test-cases showing capabilities of templateRender
>> https://github.com/kaizhu256/node-utility2/blob/2018.1.13/test.js#L1411
>> [3] live website rendered using templateRender
>> https://kaizhu256.github.io/node-swgg-wechat-pay/build..beta..travis-ci.org/app/
>>
>> <Screen Shot 2018-05-20 at 9.13.17 PM copy.jpg>
>>
>> ```js
>> /*
>>  * example.js
>>  *
>>  * this zero-dependency, standalone program will render mustache-based 
>> html-templates,
>>  * with the given dictionary, and print it to stdout
>>  * code derived from 
>> https://github.com/kaizhu256/node-utility2/blob/2018.1.13/lib.utility2.js#L5922
>>
>>
>>
>>  * example usage:
>> $ node example.js <template> <json-dictionary>
>> $ node example.js '<pre>
>> JSON.stringify("<b>hello world!</b>".toUpperCase()))=
>> {{hello.world toUpperCase jsonStringify}}
>> </pre>
>>
>> <ul>
>> {{#each myList}}
>> {{#if href}}
>> <li id="{{href encodeURIComponent}}">
>>     <a href="{{href}}">
>>     {{#if description}}
>>     {{description notHtmlSafe}}
>>     {{#unless description}}
>>     no description
>>     {{/if description}}
>>     </a>
>> </li>
>> {{/if href}}
>> {{/each myList}}
>> </ul>' '{
>>     "hello": {
>>         "world": "<b>hello world!</b>"
>>     },
>>     "myList": [
>>         null,
>>         {
>>             "href": "https://www.example.com/1";,
>>             "description": "<b>click here!</b>"
>>         },
>>         {
>>             "href": "https://www.example.com/2";
>>         }
>>     ]
>> }'
>>
>>
>>
>>  * example output:
>> <pre>
>> JSON.stringify("<b>hello world!</b>".toUpperCase()))=
>> &quot;&lt;B&gt;HELLO WORLD!&lt;/B&gt;&quot;
>> </pre>
>>
>> <ul>
>>
>> <li id="https%3A%2F%2Fwww.example.com%2F1">
>>     <a href="https://www.example.com/1";>
>>
>>     <b>click here!</b>
>>
>>     </a>
>> </li>
>>
>> <li id="https%3A%2F%2Fwww.example.com%2F2">
>>     <a href="https://www.example.com/2";>
>>
>>     no description
>>
>>     </a>
>> </li>
>>
>> </ul>
>>  */
>>
>>
>>
>>
>>
>>
>>
>> /*jslint
>>     node: true,
>>     regexp: true
>> */
>> 'use strict';
>> var templateRender;
>> templateRender = function (template, dict, notHtmlSafe) {
>> /*
>>  * this function will render the template with the given dict
>>  */
>>     var argList, getValue, match, renderPartial, rgx, skip, value;
>>     dict = dict || {};
>>     getValue = function (key) {
>>         argList = key.split(' ');
>>         value = dict;
>>         if (argList[0] === '#this/') {
>>             return;
>>         }
>>         // iteratively lookup nested values in the dict
>>         argList[0].split('.').forEach(function (key) {
>>             value = value && value[key];
>>         });
>>         return value;
>>     };
>>     renderPartial = function (match0, helper, key, partial) {
>>         switch (helper) {
>>         case 'each':
>>         case 'eachTrimRightComma':
>>             value = getValue(key);
>>             value = Array.isArray(value)
>>                 ? value.map(function (dict) {
>>                     // recurse with partial
>>                     return templateRender(partial, dict, notHtmlSafe);
>>                 }).join('')
>>                 : '';
>>             // remove trailing-comma from last element
>>             if (helper === 'eachTrimRightComma') {
>>                 value = value.trimRight().replace((/,$/), '');
>>             }
>>             return value;
>>         case 'if':
>>             partial = partial.split('{{#unless ' + key + '}}');
>>             partial = getValue(key)
>>                 ? partial[0]
>>                 // handle 'unless' case
>>                 : partial.slice(1).join('{{#unless ' + key + '}}');
>>             // recurse with partial
>>             return templateRender(partial, dict, notHtmlSafe);
>>         case 'unless':
>>             return getValue(key)
>>                 ? ''
>>                 // recurse with partial
>>                 : templateRender(partial, dict, notHtmlSafe);
>>         default:
>>             // recurse with partial
>>             return match0[0] + templateRender(match0.slice(1), dict, 
>> notHtmlSafe);
>>         }
>>     };
>>     // render partials
>>     rgx = (/\{\{#(\w+) ([^}]+?)\}\}/g);
>>     template = template || '';
>>     for (match = rgx.exec(template); match; match = rgx.exec(template)) {
>>         rgx.lastIndex += 1 - match[0].length;
>>         template = template.replace(
>>             new RegExp('\\{\\{#(' + match[1] + ') (' + match[2] +
>>                 ')\\}\\}([\\S\\s]*?)\\{\\{/' + match[1] + ' ' + match[2] +
>>                 '\\}\\}'),
>>             renderPartial
>>         );
>>     }
>>     // search for keys in the template
>>     return template.replace((/\{\{[^}]+?\}\}/g), function (match0) {
>>         getValue(match0.slice(2, -2));
>>         if (value === undefined) {
>>             return match0;
>>         }
>>         argList.slice(1).forEach(function (arg0, ii, list) {
>>             switch (arg0) {
>>             case 'alphanumeric':
>>                 value = value.replace((/\W/g), '_');
>>                 break;
>>             case 'decodeURIComponent':
>>                 value = decodeURIComponent(value);
>>                 break;
>>             case 'encodeURIComponent':
>>                 value = encodeURIComponent(value);
>>                 break;
>>             case 'jsonStringify':
>>                 value = JSON.stringify(value);
>>                 break;
>>             case 'jsonStringify4':
>>                 value = JSON.stringify(value, null, 4);
>>                 break;
>>             case 'notHtmlSafe':
>>                 notHtmlSafe = true;
>>                 break;
>>             case 'truncate':
>>                 skip = ii + 1;
>>                 if (value.length > list[skip]) {
>>                     value = value.slice(0, list[skip] - 3).trimRight() + 
>> '...';
>>                 }
>>                 break;
>>             // default to String.prototype[arg0]()
>>             default:
>>                 if (ii === skip) {
>>                     break;
>>                 }
>>                 value = value[arg0]();
>>                 break;
>>             }
>>         });
>>         value = String(value);
>>         // default to htmlSafe
>>         if (!notHtmlSafe) {
>>             value = value
>>                 .replace((/"/g), '&quot;')
>>                 .replace((/&/g), '&amp;')
>>                 .replace((/'/g), '&apos;')
>>                 .replace((/</g), '&lt;')
>>                 .replace((/>/g), '&gt;')
>>                 .replace((/&amp;(amp;|apos;|gt;|lt;|quot;)/ig), '&$1');
>>         }
>>         return value;
>>     });
>> };
>>
>> console.log(templateRender(process.argv[2], JSON.parse(process.argv[3])));
>> ```
>>
>>
>>
>> kai zhu
>> [email protected]
>>
>>
>>
>> On 20 May 2018, at 6:32 PM, Isiah Meadows <[email protected]> wrote:
>>
>> @Mathias
>>
>> My partcular `escapeHTML` example *could* be written like that (and it *is* 
>> somewhat in the prose). But you're right that in the prose, I did bring up 
>> the potential for things like `str.replace({cheese: "cake", ham: "eggs"})`.
>>
>> @Kai
>>
>> Have you ever tried writing an HTML template system on the front end? This 
>> *will* almost inevitably come up, and most of my use cases for this is on 
>> the front end itself handling various scenarios.
>>
>> @Cyril
>>
>> And every single one of those patterns is going to need compiled and 
>> executed, and compiling and interpreting regular expressions is definitely 
>> not quick, especially when you can nest Kleene stars. (See: 
>> https://en.wikipedia.org/wiki/Regular_expression#Implementations_and_running_times)
>>  That's why I'm against it - we don't need to complicate this proposal with 
>> that mess.
>>
>> -----
>>
>> Isiah Meadows
>> [email protected]
>> www.isiahmeadows.com
>>
>> On Sat, May 19, 2018 at 7:04 PM, Mathias Bynens <[email protected]> wrote:
>>>
>>> Hey Kai, you’re oversimplifying. Your solution works for a single Unicode 
>>> symbol (corresponding to a single code point) but falls apart as soon as 
>>> you need to match multiple symbols of possibly varying length, like in the 
>>> `escapeHtml` example.
>>>
>>> On Sat, May 19, 2018 at 8:43 AM, kai zhu <[email protected]> wrote:
>>>>
>>>> again, you backend-engineers are making something more complicated than 
>>>> needs be, when simple, throwaway glue-code will suffice.  agree with 
>>>> jordan, this feature is a needless cross-cut of String.prototype.replace.
>>>>
>>>> ```
>>>> /*jslint
>>>>     node: true
>>>> */
>>>> 'use strict';
>>>> var dict;
>>>> dict = {
>>>>     '$': '^',
>>>>     '1': '2',
>>>>     '<': '&lt;',
>>>>     '': '',
>>>>     '-': '_',
>>>>     ']': '@'
>>>> };
>>>> // output: "test_^^[22@ &lt;foo>"
>>>> console.log('test-$$[11] <foo>'.replace((/[\S\s]/gu), function (character) 
>>>> {
>>>>     return dict.hasOwnProperty(character)
>>>>         ? dict[character]
>>>>         : character;
>>>> }));
>>>> ```
>>>>
>>>> kai zhu
>>>> [email protected]
>>>>
>>>>
>>>>
>>>> On 19 May 2018, at 4:08 PM, Cyril Auburtin <[email protected]> 
>>>> wrote:
>>>>
>>>> You can also have a
>>>>
>>>> ```js
>>>> var replacer = replacements => {
>>>>   const re = new RegExp(replacements.map(([k,_,escaped=k]) => 
>>>> escaped).join('|'), 'gu');
>>>>   const replaceMap = new Map(replacements);
>>>>   return s => s.replace(re, w => replaceMap.get(w));
>>>> }
>>>> var replace = replacer([['$', '^', String.raw`\$`], ['1', '2'], ['<', 
>>>> '&lt;'], ['', ''], ['-', '_'], [']', '@', String.raw`\]`]]);
>>>> replace('test-$$[11] <foo>') // "test_^^[22@ &lt;foo>"
>>>> ```
>>>> but it's quickly messy to work with escaping
>>>>
>>>> Le sam. 19 mai 2018 à 08:17, Isiah Meadows <[email protected]> a 
>>>> écrit :
>>>>>
>>>>> Here's what I'd prefer instead: overload `String.prototype.replace` to
>>>>> take non-callable objects, as sugar for this:
>>>>>
>>>>> ```js
>>>>> const old = Function.call.bind(Function.call, String.prototype.replace)
>>>>> String.prototype.replace = function (regexp, object) {
>>>>>     if (object == null && regexp != null && typeof regexp === "object") {
>>>>>         const re = new RegExp(
>>>>>             Object.keys(regexp)
>>>>>             .map(key => `${old(key, /[\\^$*+?.()|[\]{}]/g, '\\$&')}`)
>>>>>             .join("|")
>>>>>         )
>>>>>         return old(this, re, m => object[m])
>>>>>     } else {
>>>>>         return old(this, regexp, object)
>>>>>     }
>>>>> }
>>>>> ```
>>>>>
>>>>> This would cover about 99% of my use for something like this, with
>>>>> less runtime overhead (that of not needing to check for and
>>>>> potentially match multiple regular expressions at runtime) and better
>>>>> static analyzability (you only need to check it's an object literal or
>>>>> constant frozen object, not that it's argument is the result of the
>>>>> built-in `Map` call). It's exceptionally difficult to optimize for
>>>>> this unless you know everything's a string, but most cases where I had
>>>>> to pass a callback that wasn't super complex looked a lot like this:
>>>>>
>>>>> ```js
>>>>> // What I use:
>>>>> function escapeHTML(str) {
>>>>>     return str.replace(/["'&<>]/g, m => {
>>>>>         switch (m) {
>>>>>         case '"': return "&#34;"
>>>>>         case "'": return "&#39;"
>>>>>         case "&": return "&amp;"
>>>>>         case "<": return "&lt;"
>>>>>         case ">": return "&gt;"
>>>>>         default: throw new TypeError("unreachable")
>>>>>         }
>>>>>     })
>>>>> }
>>>>>
>>>>> // What it could be
>>>>> function escapeHTML(str) {
>>>>>     return str.replace({
>>>>>         '"': "&#34;",
>>>>>         "'": "&#39;",
>>>>>         "&": "&amp;",
>>>>>         "<": "&lt;",
>>>>>         ">": "&gt;",
>>>>>     })
>>>>> }
>>>>> ```
>>>>>
>>>>> And yes, this enables optimizations engines couldn't easily produce
>>>>> otherwise. In this instance, an engine could find that the object is
>>>>> static with only single-character entries, and it could replace the
>>>>> call to a fast-path one that relies on a cheap lookup table instead
>>>>> (Unicode replacement would be similar, except you'd need an extra
>>>>> layer of indirection with astrals to avoid blowing up memory when
>>>>> generating these tables):
>>>>>
>>>>> ```js
>>>>> // Original
>>>>> function escapeHTML(str) {
>>>>>     return str.replace({
>>>>>         '"': "&#34;",
>>>>>         "'": "&#39;",
>>>>>         "&": "&amp;",
>>>>>         "<": "&lt;",
>>>>>         ">": "&gt;",
>>>>>     })
>>>>> }
>>>>>
>>>>> // Not real JS, but think of it as how an engine might implement this. The
>>>>> // implementation of the runtime function `ReplaceWithLookupTable` is 
>>>>> omitted
>>>>> // for brevity, but you could imagine how it could be implemented, given 
>>>>> the
>>>>> // pseudo-TS signature:
>>>>> //
>>>>> // ```ts
>>>>> // declare function %ReplaceWithLookupTable(
>>>>> //     str: string,
>>>>> //     table: string[]
>>>>> // ): string
>>>>> // ```
>>>>> function escapeHTML(str) {
>>>>>     static {
>>>>>         // A zero-initialized array with 2^16 entries (U+0000-U+FFFF), 
>>>>> except
>>>>>         // for the object's members. This takes up to about 70K per 
>>>>> instance,
>>>>>         // but these are *far* more often called than created.
>>>>>         const _lookup_escapeHTML = %calloc(65536)
>>>>>
>>>>>         _lookup_escapeHTML[34] = "&#34;"
>>>>>         _lookup_escapeHTML[38] = "&amp;"
>>>>>         _lookup_escapeHTML[39] = "&#39;"
>>>>>         _lookup_escapeHTML[60] = "&gt;"
>>>>>         _lookup_escapeHTML[62] = "&lt;"
>>>>>     }
>>>>>
>>>>>     return %ReplaceWithLookupTable(str, _lookup_escapeHTML)
>>>>> }
>>>>> ```
>>>>>
>>>>> Likewise, similar, but more restrained, optimizations could be
>>>>> performed on objects with multibyte strings, since they can be reduced
>>>>> to a simple search trie. (These can be built in even the general case
>>>>> if the strings are large enough to merit it - small ropes are pretty
>>>>> cheap to create.)
>>>>>
>>>>> For what it's worth, there's precedent here in Ruby, which has support
>>>>> for `Hash`es as `String#gsub` parameters which work similarly.
>>>>>
>>>>> -----
>>>>>
>>>>> Isiah Meadows
>>>>> [email protected]
>>>>> www.isiahmeadows.com
>>>>>
>>>>>
>>>>> On Fri, May 18, 2018 at 1:01 PM, Logan Smyth <[email protected]> 
>>>>> wrote:
>>>>> >> It wouldn't necessarily break existing API, since 
>>>>> >> String.prototype.replace
>>>>> >> currently accepts only RegExp or strings.
>>>>> >
>>>>> > Not quite accurate. It accepts anything with a `Symbol.replace` 
>>>>> > property, or
>>>>> > a string.
>>>>> >
>>>>> > Given that, what you're describing can be implemented as
>>>>> > ```
>>>>> > Map.prototype[Symbol.replace] = function(str) {
>>>>> >   for(const [key, value] of this) {
>>>>> >     str = str.replace(key, value);
>>>>> >   }
>>>>> >   return str;
>>>>> > };
>>>>> > ```
>>>>> >
>>>>> >> I don't know if the ECMAScript spec mandates preserving a particular 
>>>>> >> order
>>>>> >> to a Map's elements.
>>>>> >
>>>>> > It does, so you're good there.
>>>>> >
>>>>> >> Detecting collisions between matching regular expressions or strings.
>>>>> >
>>>>> > I think this would be my primary concern, but no so much ordering as
>>>>> > expectations. Like if you did
>>>>> > ```
>>>>> > "1".replace(new Map([
>>>>> >   ['1', '2'],
>>>>> >   ['2', '3],
>>>>> > ]);
>>>>> > ```
>>>>> > is the result `2` or `3`? `3` seems surprising to me, at least in the
>>>>> > general sense, because there was no `2` in the original input, but it's 
>>>>> > also
>>>>> > hard to see how you'd spec the behavior to avoid that if general regex
>>>>> > replacement is supported.
>>>>> >
>>>>> > On Fri, May 18, 2018 at 9:47 AM, Alex Vincent <[email protected]> 
>>>>> > wrote:
>>>>> >>
>>>>> >> Reading [1] in the digests, I think there might actually be an API
>>>>> >> improvement that is doable.
>>>>> >>
>>>>> >> Suppose the String.prototype.replace API allowed passing in a single
>>>>> >> argument, a Map instance where the keys were strings or regular 
>>>>> >> expressions
>>>>> >> and the values were replacement strings or functions.
>>>>> >>
>>>>> >> Advantages:
>>>>> >> * Shorthand - instead of writing str.replace(a, b).replace(c,
>>>>> >> d).replace(e, f)... you get str.replace(regExpMap)
>>>>> >> * Reusable - the same regular expression/string map could be used for
>>>>> >> several strings (assuming of course the user didn't just abstract the 
>>>>> >> call
>>>>> >> into a separate function)
>>>>> >> * Modifiable on demand - developers could easily add new regular
>>>>> >> expression matches to the map object, or remove them
>>>>> >> * It wouldn't necessarily break existing API, since
>>>>> >> String.prototype.replace currently accepts only RegExp or strings.
>>>>> >>
>>>>> >> Disadvantages / reasons not to do it:
>>>>> >> * Detecting collisions between matching regular expressions or strings.
>>>>> >> If two regular expressions match the same string, or a regular 
>>>>> >> expression
>>>>> >> and a search string match, the expected results may vary because a 
>>>>> >> Map's
>>>>> >> elements might not be consistently ordered.  I don't know if the 
>>>>> >> ECMAScript
>>>>> >> spec mandates preserving a particular order to a Map's elements.
>>>>> >>   - if we preserve the same chaining capability
>>>>> >> (str.replace(map1).replace(map2)...), this might not be a big problem.
>>>>> >>
>>>>> >> The question is, how often do people chain replace calls together?
>>>>> >>
>>>>> >> * It's not particularly hard to chain several replace calls together.
>>>>> >> It's just verbose, which might not be a high enough burden to overcome 
>>>>> >> for
>>>>> >> adding API.
>>>>> >>
>>>>> >> That's my two cents for the day.  Thoughts?
>>>>> >>
>>>>> >> [1] https://esdiscuss.org/topic/adding-map-directly-to-string-prototype
>>>>> >>
>>>>> >> --
>>>>> >> "The first step in confirming there is a bug in someone else's work is
>>>>> >> confirming there are no bugs in your own."
>>>>> >> -- Alexander J. Vincent, June 30, 2001
>>>>> >>
>>>>> >> _______________________________________________
>>>>> >> es-discuss mailing list
>>>>> >> [email protected]
>>>>> >> https://mail.mozilla.org/listinfo/es-discuss
>>>>> >>
>>>>> >
>>>>> >
>>>>> > _______________________________________________
>>>>> > es-discuss mailing list
>>>>> > [email protected]
>>>>> > https://mail.mozilla.org/listinfo/es-discuss
>>>>> >
>>>>> _______________________________________________
>>>>> es-discuss mailing list
>>>>> [email protected]
>>>>> https://mail.mozilla.org/listinfo/es-discuss
>>>>
>>>> _______________________________________________
>>>> es-discuss mailing list
>>>> [email protected]
>>>> https://mail.mozilla.org/listinfo/es-discuss
>>>>
>>>>
>>>>
>>>> _______________________________________________
>>>> es-discuss mailing list
>>>> [email protected]
>>>> https://mail.mozilla.org/listinfo/es-discuss
>>>>
>>>
>>>
>>> _______________________________________________
>>> es-discuss mailing list
>>> [email protected]
>>> https://mail.mozilla.org/listinfo/es-discuss
>>>
>>
>>
>>
>
>
_______________________________________________
es-discuss mailing list
[email protected]
https://mail.mozilla.org/listinfo/es-discuss

Re: Overload str.replace to take a Map?

Reply via email to