This is an automated email from the ASF dual-hosted git repository.

domoritz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 9c097d504a GH-39248: [JS] Unify code paths for utf8 and largeUtf8 
(#39249)
9c097d504a is described below

commit 9c097d504a6acc193a5ce0a4cbf3551c948dcf90
Author: Dominik Moritz <[email protected]>
AuthorDate: Sun Dec 17 22:19:54 2023 -0500

    GH-39248: [JS] Unify code paths for utf8 and largeUtf8 (#39249)
    
    Reduce the code size by using common code paths. We only call `Number` a
    few times on numbers, which should be a noop.
    
    * Closes: #39248
---
 js/.vscode/settings.json          |  2 +-
 js/src/visitor/get.ts             | 20 +++-----------------
 js/src/visitor/set.ts             | 19 +++----------------
 js/src/visitor/vectorassembler.ts | 24 +++---------------------
 4 files changed, 10 insertions(+), 55 deletions(-)

diff --git a/js/.vscode/settings.json b/js/.vscode/settings.json
index 113a662180..e52da54e54 100644
--- a/js/.vscode/settings.json
+++ b/js/.vscode/settings.json
@@ -2,7 +2,7 @@
   "typescript.tsdk": "node_modules/typescript/lib",
   "editor.trimAutoWhitespace": true,
   "editor.codeActionsOnSave": {
-    "source.fixAll.eslint": false
+    "source.fixAll.eslint": "explicit"
   },
   "[javascript]": {
     "editor.tabSize": 4,
diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts
index a801c90047..112d2f2983 100644
--- a/js/src/visitor/get.ts
+++ b/js/src/visitor/get.ts
@@ -116,16 +116,7 @@ function wrapGet<T extends DataType>(fn: (data: Data<T>, 
_1: any) => any) {
 /** @ignore */
 const getNull = <T extends Null>(_data: Data<T>, _index: number): T['TValue'] 
=> null;
 /** @ignore */
-const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, 
index: number) => {
-    if (index + 1 >= valueOffsets.length) {
-        return null as any;
-    }
-    const x = valueOffsets[index];
-    const y = valueOffsets[index + 1];
-    return values.subarray(x, y);
-};
-/** @ignore */
-const getLargeVariableWidthBytes = (values: Uint8Array, valueOffsets: 
BigInt64Array, index: number) => {
+const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array | 
BigInt64Array, index: number) => {
     if (index + 1 >= valueOffsets.length) {
         return null as any;
     }
@@ -162,15 +153,10 @@ const getFixedSizeBinary = <T extends FixedSizeBinary>({ 
stride, values }: Data<
 /** @ignore */
 const getBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: 
number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index);
 /** @ignore */
-const getUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index: 
number): T['TValue'] => {
+const getUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }: 
Data<T>, index: number): T['TValue'] => {
     const bytes = getVariableWidthBytes(values, valueOffsets, index);
     return bytes !== null ? decodeUtf8(bytes) : null as any;
 };
-/** @ignore */
-const getLargeUtf8 = <T extends LargeUtf8>({ values, valueOffsets }: Data<T>, 
index: number): T['TValue'] => {
-    const bytes = getLargeVariableWidthBytes(values, valueOffsets, index);
-    return bytes !== null ? decodeUtf8(bytes) : null as any;
-};
 
 /* istanbul ignore next */
 /** @ignore */
@@ -344,7 +330,7 @@ GetVisitor.prototype.visitFloat16 = wrapGet(getFloat16);
 GetVisitor.prototype.visitFloat32 = wrapGet(getNumeric);
 GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric);
 GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8);
-GetVisitor.prototype.visitLargeUtf8 = wrapGet(getLargeUtf8);
+GetVisitor.prototype.visitLargeUtf8 = wrapGet(getUtf8);
 GetVisitor.prototype.visitBinary = wrapGet(getBinary);
 GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary);
 GetVisitor.prototype.visitDate = wrapGet(getDate);
diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts
index a439ec8311..15b0721660 100644
--- a/js/src/visitor/set.ts
+++ b/js/src/visitor/set.ts
@@ -125,16 +125,7 @@ export const setEpochMsToNanosecondsLong = (data: 
Int32Array, index: number, epo
 };
 
 /** @ignore */
-export const setVariableWidthBytes = <T extends Int32Array>(values: 
Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
-    if (index + 1 < valueOffsets.length) {
-        const x = valueOffsets[index];
-        const y = valueOffsets[index + 1];
-        values.set(value.subarray(0, y - x), x);
-    }
-};
-
-/** @ignore */
-export const setLargeVariableWidthBytes = <T extends BigInt64Array>(values: 
Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
+export const setVariableWidthBytes = <T extends Int32Array | 
BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value: 
Uint8Array) => {
     if (index + 1 < valueOffsets.length) {
         const x = bigIntToNumber(valueOffsets[index]);
         const y = bigIntToNumber(valueOffsets[index + 1]);
@@ -176,13 +167,9 @@ export const setFixedSizeBinary = <T extends 
FixedSizeBinary>({ stride, values }
 /** @ignore */
 const setBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: 
number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, 
index, value);
 /** @ignore */
-const setUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index: 
number, value: T['TValue']) => {
+const setUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }: 
Data<T>, index: number, value: T['TValue']) => {
     setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
 };
-/** @ignore */
-const setLargeUtf8 = <T extends LargeUtf8>({ values, valueOffsets }: Data<T>, 
index: number, value: T['TValue']) => {
-    setLargeVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
-};
 
 /* istanbul ignore next */
 export const setDate = <T extends Date_>(data: Data<T>, index: number, value: 
T['TValue']): void => {
@@ -381,7 +368,7 @@ SetVisitor.prototype.visitFloat16 = wrapSet(setFloat16);
 SetVisitor.prototype.visitFloat32 = wrapSet(setFloat);
 SetVisitor.prototype.visitFloat64 = wrapSet(setFloat);
 SetVisitor.prototype.visitUtf8 = wrapSet(setUtf8);
-SetVisitor.prototype.visitLargeUtf8 = wrapSet(setLargeUtf8);
+SetVisitor.prototype.visitLargeUtf8 = wrapSet(setUtf8);
 SetVisitor.prototype.visitBinary = wrapSet(setBinary);
 SetVisitor.prototype.visitFixedSizeBinary = wrapSet(setFixedSizeBinary);
 SetVisitor.prototype.visitDate = wrapSet(setDate);
diff --git a/js/src/visitor/vectorassembler.ts 
b/js/src/visitor/vectorassembler.ts
index 7a9d3bdd57..df820e6f5e 100644
--- a/js/src/visitor/vectorassembler.ts
+++ b/js/src/visitor/vectorassembler.ts
@@ -42,6 +42,7 @@ export interface VectorAssembler extends Visitor {
     visitInt<T extends Int>(data: Data<T>): this;
     visitFloat<T extends Float>(data: Data<T>): this;
     visitUtf8<T extends Utf8>(data: Data<T>): this;
+    visitLargeUtf8<T extends LargeUtf8>(data: Data<T>): this;
     visitBinary<T extends Binary>(data: Data<T>): this;
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>): this;
     visitDate<T extends Date_>(data: Data<T>): this;
@@ -202,29 +203,10 @@ function assembleFlatVector<T extends Int | Float | 
FixedSizeBinary | Date_ | Ti
 }
 
 /** @ignore */
-function assembleFlatListVector<T extends Utf8 | Binary>(this: 
VectorAssembler, data: Data<T>) {
-    const { length, values, valueOffsets } = data;
-    const { [0]: begin, [length]: end } = valueOffsets;
-    return _assembleFlatListVector.call(this, length, begin, end, values, 
valueOffsets);
-}
-
-/** @ignore */
-function assembleLargeFlatListVector<T extends Utf8 | Binary | 
LargeUtf8>(this: VectorAssembler, data: Data<T>) {
+function assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(this: 
VectorAssembler, data: Data<T>) {
     const { length, values, valueOffsets } = data;
     const begin = bigIntToNumber(valueOffsets[0]);
     const end = bigIntToNumber(valueOffsets[length]);
-    return _assembleFlatListVector.call(this, length, begin, end, values, 
valueOffsets);
-}
-
-/** @ignore */
-function _assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(
-    this: VectorAssembler,
-    length: number,
-    begin: number,
-    end: number,
-    values: T['TArray'],
-    valueOffsets: T['TOffsetArray']
-) {
     const byteLength = Math.min(end - begin, values.byteLength - begin);
     // Push in the order FlatList types read their buffers
     addBuffer.call(this, rebaseValueOffsets(-begin, length + 1, valueOffsets 
as any)); // valueOffsets buffer first
@@ -255,7 +237,7 @@ VectorAssembler.prototype.visitBool = assembleBoolVector;
 VectorAssembler.prototype.visitInt = assembleFlatVector;
 VectorAssembler.prototype.visitFloat = assembleFlatVector;
 VectorAssembler.prototype.visitUtf8 = assembleFlatListVector;
-VectorAssembler.prototype.visitLargeUtf8 = assembleLargeFlatListVector;
+VectorAssembler.prototype.visitLargeUtf8 = assembleFlatListVector;
 VectorAssembler.prototype.visitBinary = assembleFlatListVector;
 VectorAssembler.prototype.visitFixedSizeBinary = assembleFlatVector;
 VectorAssembler.prototype.visitDate = assembleFlatVector;

Reply via email to