Copilot commented on code in PR #225:
URL: https://github.com/apache/arrow-js/pull/225#discussion_r2253038956


##########
src/visitor/vectorloader.ts:
##########
@@ -72,6 +72,9 @@ export class VectorLoader extends Visitor {
     public visitUtf8<T extends type.Utf8>(type: T, { length, nullCount } = 
this.nextFieldNode()) {
         return makeData({ type, length, nullCount, nullBitmap: 
this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), 
data: this.readData(type) });
     }
+    public visitUtf8View<T extends type.Utf8View>(type: T, { length, nullCount 
} = this.nextFieldNode()) {
+        return makeData({ type, length, nullCount, nullBitmap: 
this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), 
data: this.readData(type) });

Review Comment:
   The visitUtf8View method implementation is identical to visitUtf8, but 
Utf8View has a different internal representation that may require different 
handling of data buffers. According to the Utf8View specification, it uses a 
view struct and may point to multiple data buffers, which differs from the 
simple offset-based approach used by Utf8.
   ```suggestion
           // Read the null bitmap as usual
           const nullBitmap = this.readNullBitmap(type, nullCount);
           // Read the value offsets as usual
           const valueOffsets = this.readOffsets(type);
           // Read the view struct buffer (describes mapping to data buffers)
           const viewStruct = this.readViewStruct(type, length);
           // Read the referenced data buffers (could be multiple)
           const dataBuffers = this.readDataBuffers(type, viewStruct);
           return makeData({
               type,
               length,
               nullCount,
               nullBitmap,
               valueOffsets,
               viewStruct,
               dataBuffers
           });
   ```



##########
src/builder/utf8view.ts:
##########
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Utf8View } from '../type.js';
+import { encodeUtf8 } from '../util/utf8.js';
+import { BinaryBuilder } from './binary.js';
+import { BufferBuilder } from './buffer.js';
+import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
+
+/** @ignore */
+export class Utf8ViewBuilder<TNull = any> extends 
VariableWidthBuilder<Utf8View, TNull> {
+    constructor(opts: BuilderOptions<Utf8View, TNull>) {
+        super(opts);
+        this._values = new BufferBuilder(Uint8Array);
+    }
+    public get byteLength(): number {
+        let size = this._pendingLength + (this.length * 4);
+        this._offsets && (size += this._offsets.byteLength);
+        this._values && (size += this._values.byteLength);
+        this._nulls && (size += this._nulls.byteLength);
+        return size;
+    }
+    public setValue(index: number, value: string) {
+        return super.setValue(index, encodeUtf8(value) as any);
+    }

Review Comment:
   The @ts-ignore comment suppresses TypeScript errors without explanation. 
This makes it difficult to understand what error is being suppressed and why 
it's safe to ignore.
   ```suggestion
       }
       // TypeScript cannot track that we are intentionally overriding this 
method by direct prototype assignment below.
       // It reports a type error because the method is replaced at runtime. 
This is safe because the assigned method is compatible.
   ```



##########
test/generate-test-data.ts:
##########
@@ -342,6 +346,28 @@ function generateUtf8<T extends Utf8>(this: 
TestDataVectorGenerator, type: T, le
     return { values: () => values, vector: new Vector([makeData({ type, 
length, nullCount, nullBitmap, valueOffsets, data })]) };
 }
 
+function generateUtf8View<T extends Utf8View>(this: TestDataVectorGenerator, 
type: T, length = 100, nullCount = Math.trunc(length * 0.2)): 
GeneratedVector<T> {
+    const nullBitmap = createBitmap(length, nullCount);
+    const valueOffsets = createVariableWidthOffsets32(length, nullBitmap, 10, 
20, nullCount != 0);
+    const values: string[] = new Array(valueOffsets.length - 1).fill(null);
+    [...valueOffsets.slice(1)]
+        .map((o, i) => isValid(nullBitmap, i) ? o - valueOffsets[i] : null)
+        .reduce((map, length, i) => {
+            if (length !== null) {
+                if (length > 0) {
+                    do {
+                        values[i] = randomString(length);
+                    } while (map.has(values[i]));
+                    return map.set(values[i], i);
+                }
+                values[i] = '';
+            }
+            return map;
+        }, new Map<string, number>());
+    const data = createVariableWidthBytes(length, nullBitmap, valueOffsets, 
(i) => encodeUtf8(values[i]));
+    return { values: () => values, vector: new Vector([makeData({ type, 
length, nullCount, nullBitmap, valueOffsets, data })]) };

Review Comment:
   The generateUtf8View function uses the same implementation as generateUtf8 
with valueOffsets and simple byte encoding, but Utf8View should use a different 
internal structure with view structs that contain length and either inline data 
or buffer references. This doesn't match the Utf8View specification.
   ```suggestion
       // Generate random string values, similar to generateUtf8
       const values: string[] = new Array(length).fill(null);
       for (let i = 0; i < length; ++i) {
           if (isValid(nullBitmap, i)) {
               // Random string length between 10 and 20
               values[i] = randomString(10 + Math.floor(Math.random() * 11));
           }
       }
       // Now, for each value, create a view struct
       // We'll use the convention: { length: number, data: Uint8Array } for 
all values
       // (If the Utf8View spec requires inline vs. buffer, you can split here, 
but for simplicity, always use Uint8Array)
       const viewStructs: { length: number, data: Uint8Array | null }[] = [];
       for (let i = 0; i < length; ++i) {
           if (!isValid(nullBitmap, i)) {
               viewStructs.push({ length: 0, data: null });
           } else {
               const utf8 = encodeUtf8(values[i]);
               viewStructs.push({ length: utf8.length, data: utf8 });
           }
       }
       // The vector should be constructed from the array of view structs
       return {
           values: () => values,
           vector: new Vector([makeData({
               type,
               length,
               nullCount,
               nullBitmap,
               data: viewStructs
           })])
       };
   ```



##########
src/builder/utf8view.ts:
##########
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Utf8View } from '../type.js';
+import { encodeUtf8 } from '../util/utf8.js';
+import { BinaryBuilder } from './binary.js';
+import { BufferBuilder } from './buffer.js';
+import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
+
+/** @ignore */
+export class Utf8ViewBuilder<TNull = any> extends 
VariableWidthBuilder<Utf8View, TNull> {
+    constructor(opts: BuilderOptions<Utf8View, TNull>) {
+        super(opts);
+        this._values = new BufferBuilder(Uint8Array);
+    }
+    public get byteLength(): number {
+        let size = this._pendingLength + (this.length * 4);
+        this._offsets && (size += this._offsets.byteLength);
+        this._values && (size += this._values.byteLength);
+        this._nulls && (size += this._nulls.byteLength);
+        return size;
+    }
+    public setValue(index: number, value: string) {
+        return super.setValue(index, encodeUtf8(value) as any);
+    }
+    // @ts-ignore
+    protected _flushPending(pending: Map<number, Uint8Array | undefined>, 
pendingLength: number): void { }
+}
+
+(Utf8ViewBuilder.prototype as any)._flushPending = (BinaryBuilder.prototype as 
any)._flushPending;

Review Comment:
   Using prototype copying with type assertions to share implementation between 
builders is fragile and makes the code harder to maintain. Consider using 
composition or inheritance instead of prototype manipulation.
   ```suggestion
       protected _flushPending(pending: Map<number, Uint8Array | undefined>, 
pendingLength: number): void {
           // Delegate to BinaryBuilder's _flushPending implementation using 
composition
           // This assumes BinaryBuilder's _flushPending is compatible and does 
not rely on internal state
           // If not, copy the logic here or extract to a shared helper function
           (BinaryBuilder.prototype as any)._flushPending.call(this, pending, 
pendingLength);
       }
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to