With files frequently that size, it might be worth considering whether you should use a custom format+validator\* instead. It'd take a lot less memory, which could be helpful since the first row alone of [this file][1] takes about 4-5K in Firefox when deserialized - I verified this in the console (To be exact, 5032 the first time, 4128 the second, and 4416 the third). Also, a megabyte is a *lot* to send down the wire in Web terms.
\* In this case, you'd need a validator that uses minimal perfect hashes and a compact binary data representation that doesn't rely on a concrete start/end. That would avoid the mess of constantly having to look things up in memory, while leaving your IR much smaller. Another item of note: JS strings are 16-bit, which is wasteful in memory for your entire object. [1]: https://raw.githubusercontent.com/kaizhu256/node-swgg-github-all/2018.2.2/assets.swgg.swagger.json ----- Isiah Meadows [email protected] Looking for web consulting? Or a new website? Send me an email and we can get started. www.isiahmeadows.com On Fri, Mar 16, 2018 at 11:53 PM, kai zhu <[email protected]> wrote: > stepping aside from the security aspect, having your code-base’s json-files > normalized with sorted-keys is good-housekeeping, especially when you want > to sanely maintain ones >1mb in size (e.g. large swagger > json-documentations) [1]. > > and you can easily operationalize your build-process / pre-commit-checks to > auto-key-sort json-files with the following simple shell-function [2]. > > [1] > https://github.com/kaizhu256/node-swgg-github-all/blob/2018.2.2/assets.swgg.swagger.json > [2] > https://github.com/kaizhu256/node-utility2/blob/2018.1.13/lib.utility2.sh#L1513 > > > > ```shell > #!/bin/sh > # .bashrc > : ' > # to install, copy-paste the shell-function shFileJsonNormalize below > # into your shell startup script (.bashrc, .profile, etc...) > > > # example shell-usage: > > source ~/.bashrc > printf "{ > \"version\": \"0.0.1\", > \"name\": \"my-app\", > \"aa\": { > \"zz\": 1, > \"yy\": { > \"xx\": 2, > \"ww\": 3 > } > }, > \"bb\": [ > 3, > 2, > 1, > null > ] > }" > package.json > shFileJsonNormalize package.json > cat package.json > > > # key-sorted output: > { > "aa": { > "yy": { > "ww": 3, > "xx": 2 > }, > "zz": 1 > }, > "bb": [ > 3, > 2, > 1, > null > ], > "name": "my-app", > "version": "0.0.1" > } > ' > > > shFileJsonNormalize() {(set -e > # this shell-function will > # 1. read the json-data from $FILE > # 2. normalize the json-data > # 3. write the normalized json-data back to $FILE > FILE="$1" > node -e " > // <script> > /*jslint > bitwise: true, > browser: true, > maxerr: 8, > maxlen: 100, > node: true, > nomen: true, > regexp: true, > stupid: true > */ > 'use strict'; > var local; > local = {}; > local.fs = require('fs'); > local.jsonStringifyOrdered = function (jsonObj, replacer, space) { > /* > * this function will JSON.stringify the jsonObj, > * with object-keys sorted and circular-references removed > */ > var circularList, stringify, tmp; > stringify = function (jsonObj) { > /* > * this function will recursively JSON.stringify the jsonObj, > * with object-keys sorted and circular-references removed > */ > // if jsonObj is an object, then recurse its items with object-keys > sorted > if (jsonObj && > typeof jsonObj === 'object' && > typeof jsonObj.toJSON !== 'function') { > // ignore circular-reference > if (circularList.indexOf(jsonObj) >= 0) { > return; > } > circularList.push(jsonObj); > // if jsonObj is an array, then recurse its jsonObjs > if (Array.isArray(jsonObj)) { > return '[' + jsonObj.map(function (jsonObj) { > // recurse > tmp = stringify(jsonObj); > return typeof tmp === 'string' > ? tmp > : 'null'; > }).join(',') + ']'; > } > return '{' + Object.keys(jsonObj) > // sort object-keys > .sort() > .map(function (key) { > // recurse > tmp = stringify(jsonObj[key]); > if (typeof tmp === 'string') { > return JSON.stringify(key) + ':' + tmp; > } > }) > .filter(function (jsonObj) { > return typeof jsonObj === 'string'; > }) > .join(',') + '}'; > } > // else JSON.stringify as normal > return JSON.stringify(jsonObj); > }; > circularList = []; > return JSON.stringify(typeof jsonObj === 'object' && jsonObj > // recurse > ? JSON.parse(stringify(jsonObj)) > : jsonObj, replacer, space); > }; > local.fs.writeFileSync(process.argv[1], local.jsonStringifyOrdered( > JSON.parse(local.fs.readFileSync(process.argv[1], 'utf8')), > null, > 4 > ) + '\n'); > // </script> > " "$FILE" > )} > ``` > > On Mar 17, 2018, at 5:43 AM, Mike Samuel <[email protected]> wrote: > > > > On Fri, Mar 16, 2018, 4:58 PM Anders Rundgren > <[email protected]> wrote: >> >> On 2018-03-16 21:41, Mike Samuel wrote: >> > >> > >> > On Fri, Mar 16, 2018 at 4:34 PM, C. Scott Ananian <[email protected] >> > <mailto:[email protected]>> wrote: >> > >> > On Fri, Mar 16, 2018 at 4:07 PM, Anders Rundgren >> > <[email protected] <mailto:[email protected]>> >> > wrote: >> > >> >> > To restate my main objections: >> > >> > I think any proposal to offer an alternative stringify instead of a >> > string->string transform is not very good >> > and could be easily improved by rephrasing it as a string->string >> > transform. >> >> Could you give a concrete example on that? >> >> > > I've given three. As written, the proposal produces invalid or low quality > output given (undefined, objects with toJSON methods, and symbols as either > keys or values). These would not be problems for a real canonicalizer since > none are present in a string of JSON. > > In addition, two distant users of the canonicalizer who wish to check hashes > need to agree on the ancillary arguments like the replacer if canonicalize > takes the same arguments and actually uses them. They also need to agree on > implementation details of toJSON methods which is a backward compatibility > hazard. > > If you did solve the toJSON problem by incorporating calls to that method > you've now complicated cross-platform behavior. If you phrase in terms of > string->string it is much easier to disentangle the definition of > canonicalizers JSON from JS and make it language agnostic. > > Finally, your proposal is not the VHS of canonicalizers. That would be > x=>JSON.stringify(JSON.parse(x)) since it's deployed and used. > _______________________________________________ > es-discuss mailing list > [email protected] > https://mail.mozilla.org/listinfo/es-discuss > > > > _______________________________________________ > es-discuss mailing list > [email protected] > https://mail.mozilla.org/listinfo/es-discuss > _______________________________________________ es-discuss mailing list [email protected] https://mail.mozilla.org/listinfo/es-discuss

