This is an automated email from the ASF dual-hosted git repository.
wangweipeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new 235bd49c feat(javascript): Speed up string serializer (#2083)
235bd49c is described below
commit 235bd49c9aa7862b2b697760f3eddda241e45422
Author: weipeng <[email protected]>
AuthorDate: Wed Feb 26 11:42:11 2025 +0800
feat(javascript): Speed up string serializer (#2083)
## What does this PR do?
The PR
(https://github.com/nodejs/node/commit/6cb0690fccd6ed902f785edd1ae5b55855035998)
for string detection committed in the node repository is merged. Now we
can speed up string processing without hps module.
There are now three ways of string serialization, the first is with hps
module which has super high performance; the second is for higher
versions of nodejs which can detect string encoding at zero cost; the
third is the normal way which is used for lower version of nodejs and
browser env.
---
javascript/packages/fury/lib/writer/index.ts | 49 +++++++++++++++++++++++++---
1 file changed, 44 insertions(+), 5 deletions(-)
diff --git a/javascript/packages/fury/lib/writer/index.ts
b/javascript/packages/fury/lib/writer/index.ts
index 9f005728..295f5767 100644
--- a/javascript/packages/fury/lib/writer/index.ts
+++ b/javascript/packages/fury/lib/writer/index.ts
@@ -17,13 +17,21 @@
* under the License.
*/
-import { HalfMaxInt32, HalfMinInt32, Hps, LATIN1, UTF8 } from "../type";
+import { HalfMaxInt32, HalfMinInt32, Hps, LATIN1, UTF16, UTF8 } from "../type";
import { PlatformBuffer, alloc, strByteLength } from "../platformBuffer";
import { OwnershipError } from "../error";
import { toFloat16 } from "./number";
const MAX_POOL_SIZE = 1024 * 1024 * 3; // 3MB
+function getInternalStringDetector() {
+ if (!globalThis || !globalThis.require) {
+ return null;
+ }
+ const { isStringOneByteRepresentation } = global.require("node:v8");
+ return isStringOneByteRepresentation;
+}
+
export class BinaryWriter {
private cursor = 0;
private byteLength = 0;
@@ -36,6 +44,7 @@ export class BinaryWriter {
};
private hpsEnable = false;
+ private internalStringDetector: (((content: string) => boolean) | null) =
null;
constructor(config: {
hps?: Hps;
@@ -43,6 +52,7 @@ export class BinaryWriter {
this.initPoll();
this.config = config;
this.hpsEnable = Boolean(config?.hps);
+ this.internalStringDetector = getInternalStringDetector();
}
private initPoll() {
@@ -197,7 +207,32 @@ export class BinaryWriter {
this.cursor = serializeString(v, this.platformBuffer, this.cursor);
}
- stringOfVarUInt32Slow(v: string) {
+ stringOfVarUInt32WithDetector(v: string) {
+ const isLatin1 = this.internalStringDetector!(v);
+ if (isLatin1) {
+ const len = v.length;
+ this.dataView.setUint8(this.cursor++, LATIN1);
+ this.varUInt32(len);
+ this.reserve(len);
+ if (len < 40) {
+ for (let index = 0; index < v.length; index++) {
+ this.platformBuffer[this.cursor + index] = v.charCodeAt(index);
+ }
+ } else {
+ this.platformBuffer.write(v, this.cursor, "latin1");
+ }
+ this.cursor += len;
+ } else {
+ const len = v.length * 2;
+ this.dataView.setUint8(this.cursor++, UTF16);
+ this.varUInt32(len);
+ this.reserve(len);
+ this.platformBuffer.write(v, this.cursor, "utf16le");
+ this.cursor += len;
+ }
+ }
+
+ stringOfVarUInt32Compatibly(v: string) {
const len = strByteLength(v);
const isLatin1 = len === v.length;
this.dataView.setUint8(this.cursor++, isLatin1 ? LATIN1 : UTF8);
@@ -326,8 +361,12 @@ export class BinaryWriter {
}
stringOfVarUInt32(v: string) {
- return this.hpsEnable
- ? this.stringOfVarUInt32Fast(v)
- : this.stringOfVarUInt32Slow(v);
+ if (this.hpsEnable) {
+ return this.stringOfVarUInt32Fast(v);
+ }
+ if (this.internalStringDetector !== null) {
+ return this.stringOfVarUInt32WithDetector(v);
+ }
+ return this.stringOfVarUInt32Compatibly(v);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]