This is an automated email from the ASF dual-hosted git repository.
domoritz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 9c097d504a GH-39248: [JS] Unify code paths for utf8 and largeUtf8
(#39249)
9c097d504a is described below
commit 9c097d504a6acc193a5ce0a4cbf3551c948dcf90
Author: Dominik Moritz <[email protected]>
AuthorDate: Sun Dec 17 22:19:54 2023 -0500
GH-39248: [JS] Unify code paths for utf8 and largeUtf8 (#39249)
Reduce the code size by using common code paths. We only call `Number` a
few times on numbers, which should be a noop.
* Closes: #39248
---
js/.vscode/settings.json | 2 +-
js/src/visitor/get.ts | 20 +++-----------------
js/src/visitor/set.ts | 19 +++----------------
js/src/visitor/vectorassembler.ts | 24 +++---------------------
4 files changed, 10 insertions(+), 55 deletions(-)
diff --git a/js/.vscode/settings.json b/js/.vscode/settings.json
index 113a662180..e52da54e54 100644
--- a/js/.vscode/settings.json
+++ b/js/.vscode/settings.json
@@ -2,7 +2,7 @@
"typescript.tsdk": "node_modules/typescript/lib",
"editor.trimAutoWhitespace": true,
"editor.codeActionsOnSave": {
- "source.fixAll.eslint": false
+ "source.fixAll.eslint": "explicit"
},
"[javascript]": {
"editor.tabSize": 4,
diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts
index a801c90047..112d2f2983 100644
--- a/js/src/visitor/get.ts
+++ b/js/src/visitor/get.ts
@@ -116,16 +116,7 @@ function wrapGet<T extends DataType>(fn: (data: Data<T>,
_1: any) => any) {
/** @ignore */
const getNull = <T extends Null>(_data: Data<T>, _index: number): T['TValue']
=> null;
/** @ignore */
-const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array,
index: number) => {
- if (index + 1 >= valueOffsets.length) {
- return null as any;
- }
- const x = valueOffsets[index];
- const y = valueOffsets[index + 1];
- return values.subarray(x, y);
-};
-/** @ignore */
-const getLargeVariableWidthBytes = (values: Uint8Array, valueOffsets:
BigInt64Array, index: number) => {
+const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array |
BigInt64Array, index: number) => {
if (index + 1 >= valueOffsets.length) {
return null as any;
}
@@ -162,15 +153,10 @@ const getFixedSizeBinary = <T extends FixedSizeBinary>({
stride, values }: Data<
/** @ignore */
const getBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index:
number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index);
/** @ignore */
-const getUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index:
number): T['TValue'] => {
+const getUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }:
Data<T>, index: number): T['TValue'] => {
const bytes = getVariableWidthBytes(values, valueOffsets, index);
return bytes !== null ? decodeUtf8(bytes) : null as any;
};
-/** @ignore */
-const getLargeUtf8 = <T extends LargeUtf8>({ values, valueOffsets }: Data<T>,
index: number): T['TValue'] => {
- const bytes = getLargeVariableWidthBytes(values, valueOffsets, index);
- return bytes !== null ? decodeUtf8(bytes) : null as any;
-};
/* istanbul ignore next */
/** @ignore */
@@ -344,7 +330,7 @@ GetVisitor.prototype.visitFloat16 = wrapGet(getFloat16);
GetVisitor.prototype.visitFloat32 = wrapGet(getNumeric);
GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric);
GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8);
-GetVisitor.prototype.visitLargeUtf8 = wrapGet(getLargeUtf8);
+GetVisitor.prototype.visitLargeUtf8 = wrapGet(getUtf8);
GetVisitor.prototype.visitBinary = wrapGet(getBinary);
GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary);
GetVisitor.prototype.visitDate = wrapGet(getDate);
diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts
index a439ec8311..15b0721660 100644
--- a/js/src/visitor/set.ts
+++ b/js/src/visitor/set.ts
@@ -125,16 +125,7 @@ export const setEpochMsToNanosecondsLong = (data:
Int32Array, index: number, epo
};
/** @ignore */
-export const setVariableWidthBytes = <T extends Int32Array>(values:
Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
- if (index + 1 < valueOffsets.length) {
- const x = valueOffsets[index];
- const y = valueOffsets[index + 1];
- values.set(value.subarray(0, y - x), x);
- }
-};
-
-/** @ignore */
-export const setLargeVariableWidthBytes = <T extends BigInt64Array>(values:
Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
+export const setVariableWidthBytes = <T extends Int32Array |
BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value:
Uint8Array) => {
if (index + 1 < valueOffsets.length) {
const x = bigIntToNumber(valueOffsets[index]);
const y = bigIntToNumber(valueOffsets[index + 1]);
@@ -176,13 +167,9 @@ export const setFixedSizeBinary = <T extends
FixedSizeBinary>({ stride, values }
/** @ignore */
const setBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index:
number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets,
index, value);
/** @ignore */
-const setUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index:
number, value: T['TValue']) => {
+const setUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }:
Data<T>, index: number, value: T['TValue']) => {
setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
};
-/** @ignore */
-const setLargeUtf8 = <T extends LargeUtf8>({ values, valueOffsets }: Data<T>,
index: number, value: T['TValue']) => {
- setLargeVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
-};
/* istanbul ignore next */
export const setDate = <T extends Date_>(data: Data<T>, index: number, value:
T['TValue']): void => {
@@ -381,7 +368,7 @@ SetVisitor.prototype.visitFloat16 = wrapSet(setFloat16);
SetVisitor.prototype.visitFloat32 = wrapSet(setFloat);
SetVisitor.prototype.visitFloat64 = wrapSet(setFloat);
SetVisitor.prototype.visitUtf8 = wrapSet(setUtf8);
-SetVisitor.prototype.visitLargeUtf8 = wrapSet(setLargeUtf8);
+SetVisitor.prototype.visitLargeUtf8 = wrapSet(setUtf8);
SetVisitor.prototype.visitBinary = wrapSet(setBinary);
SetVisitor.prototype.visitFixedSizeBinary = wrapSet(setFixedSizeBinary);
SetVisitor.prototype.visitDate = wrapSet(setDate);
diff --git a/js/src/visitor/vectorassembler.ts
b/js/src/visitor/vectorassembler.ts
index 7a9d3bdd57..df820e6f5e 100644
--- a/js/src/visitor/vectorassembler.ts
+++ b/js/src/visitor/vectorassembler.ts
@@ -42,6 +42,7 @@ export interface VectorAssembler extends Visitor {
visitInt<T extends Int>(data: Data<T>): this;
visitFloat<T extends Float>(data: Data<T>): this;
visitUtf8<T extends Utf8>(data: Data<T>): this;
+ visitLargeUtf8<T extends LargeUtf8>(data: Data<T>): this;
visitBinary<T extends Binary>(data: Data<T>): this;
visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>): this;
visitDate<T extends Date_>(data: Data<T>): this;
@@ -202,29 +203,10 @@ function assembleFlatVector<T extends Int | Float |
FixedSizeBinary | Date_ | Ti
}
/** @ignore */
-function assembleFlatListVector<T extends Utf8 | Binary>(this:
VectorAssembler, data: Data<T>) {
- const { length, values, valueOffsets } = data;
- const { [0]: begin, [length]: end } = valueOffsets;
- return _assembleFlatListVector.call(this, length, begin, end, values,
valueOffsets);
-}
-
-/** @ignore */
-function assembleLargeFlatListVector<T extends Utf8 | Binary |
LargeUtf8>(this: VectorAssembler, data: Data<T>) {
+function assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(this:
VectorAssembler, data: Data<T>) {
const { length, values, valueOffsets } = data;
const begin = bigIntToNumber(valueOffsets[0]);
const end = bigIntToNumber(valueOffsets[length]);
- return _assembleFlatListVector.call(this, length, begin, end, values,
valueOffsets);
-}
-
-/** @ignore */
-function _assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(
- this: VectorAssembler,
- length: number,
- begin: number,
- end: number,
- values: T['TArray'],
- valueOffsets: T['TOffsetArray']
-) {
const byteLength = Math.min(end - begin, values.byteLength - begin);
// Push in the order FlatList types read their buffers
addBuffer.call(this, rebaseValueOffsets(-begin, length + 1, valueOffsets
as any)); // valueOffsets buffer first
@@ -255,7 +237,7 @@ VectorAssembler.prototype.visitBool = assembleBoolVector;
VectorAssembler.prototype.visitInt = assembleFlatVector;
VectorAssembler.prototype.visitFloat = assembleFlatVector;
VectorAssembler.prototype.visitUtf8 = assembleFlatListVector;
-VectorAssembler.prototype.visitLargeUtf8 = assembleLargeFlatListVector;
+VectorAssembler.prototype.visitLargeUtf8 = assembleFlatListVector;
VectorAssembler.prototype.visitBinary = assembleFlatListVector;
VectorAssembler.prototype.visitFixedSizeBinary = assembleFlatVector;
VectorAssembler.prototype.visitDate = assembleFlatVector;