trxcllnt commented on code in PR #320:
URL: https://github.com/apache/arrow-js/pull/320#discussion_r2525373382
##########
src/builder/utf8view.ts:
##########
@@ -15,143 +15,18 @@
// specific language governing permissions and limitations
// under the License.
-import { Utf8View, BinaryView } from '../type.js';
+import { Utf8View } from '../type.js';
+import { BuilderOptions } from '../builder.js';
+import { BinaryViewBuilder } from './binaryview.js';
import { encodeUtf8 } from '../util/utf8.js';
-import { BuilderOptions, Builder } from '../builder.js';
-import { BufferBuilder } from './buffer.js';
-import { makeData } from '../data.js';
/** @ignore */
-export class Utf8ViewBuilder<TNull = any> extends Builder<Utf8View, TNull> {
- protected _views: BufferBuilder<Uint8Array>;
- protected _variadicBuffers: Uint8Array[] = [];
- protected _currentBuffer: BufferBuilder<Uint8Array> | null = null;
- protected _currentBufferIndex = 0;
- protected _currentBufferOffset = 0;
- protected readonly _bufferSize = 32 * 1024 * 1024;
-
+export class Utf8ViewBuilder<TNull = any> extends BinaryViewBuilder<Utf8View,
string, TNull> {
constructor(opts: BuilderOptions<Utf8View, TNull>) {
super(opts);
- this._views = new BufferBuilder(Uint8Array);
- }
-
- public get byteLength(): number {
- let size = 0;
- this._views && (size += this._views.byteLength);
- this._nulls && (size += this._nulls.byteLength);
- size += this._variadicBuffers.reduce((acc, buffer) => acc +
buffer.byteLength, 0);
- this._currentBuffer && (size += this._currentBuffer.byteLength);
- return size;
- }
-
- public setValue(index: number, value: string) {
- const data = encodeUtf8(value);
- const length = data.length;
-
- // Ensure views buffer has space up to this index
- const bytesNeeded = (index + 1) * BinaryView.ELEMENT_WIDTH;
- const currentBytes = this._views.length;
- if (bytesNeeded > currentBytes) {
- this._views.reserve(bytesNeeded - currentBytes);
- }
-
- const viewBuffer = this._views.buffer;
- const viewOffset = index * BinaryView.ELEMENT_WIDTH;
- const view = new DataView(viewBuffer.buffer, viewBuffer.byteOffset +
viewOffset, BinaryView.ELEMENT_WIDTH);
-
- view.setInt32(BinaryView.LENGTH_OFFSET, length, true);
-
- if (length <= BinaryView.INLINE_CAPACITY) {
- viewBuffer.set(data, viewOffset + BinaryView.INLINE_OFFSET);
- for (let i = length; i < BinaryView.INLINE_CAPACITY; i++) {
- viewBuffer[viewOffset + BinaryView.INLINE_OFFSET + i] = 0;
- }
- } else {
- const prefix = new DataView(data.buffer, data.byteOffset,
Math.min(4, length));
- view.setUint32(BinaryView.INLINE_OFFSET, prefix.getUint32(0,
true), true);
-
- if (!this._currentBuffer || this._currentBufferOffset + length >
this._bufferSize) {
- if (this._currentBuffer) {
-
this._variadicBuffers.push(this._currentBuffer.buffer.slice(0,
this._currentBufferOffset));
- }
- this._currentBuffer = new BufferBuilder(Uint8Array);
- this._currentBufferIndex = this._variadicBuffers.length;
- this._currentBufferOffset = 0;
- }
-
- const bufferData = this._currentBuffer.reserve(length).buffer;
- bufferData.set(data, this._currentBufferOffset);
-
- view.setInt32(BinaryView.BUFFER_INDEX_OFFSET,
this._currentBufferIndex, true);
- view.setInt32(BinaryView.BUFFER_OFFSET_OFFSET,
this._currentBufferOffset, true);
-
- this._currentBufferOffset += length;
- }
-
- return this;
- }
-
- public setValid(index: number, isValid: boolean) {
- // Ensure space is allocated in the views buffer for this index
- const bytesNeeded = (index + 1) * BinaryView.ELEMENT_WIDTH;
- const currentBytes = this._views.length;
- if (bytesNeeded > currentBytes) {
- this._views.reserve(bytesNeeded - currentBytes);
- }
-
- const result = super.setValid(index, isValid);
-
- if (!result) {
- // For null values, zero out the view struct
- const viewBuffer = this._views.buffer;
- const viewOffset = index * BinaryView.ELEMENT_WIDTH;
- for (let i = 0; i < BinaryView.ELEMENT_WIDTH; i++) {
- viewBuffer[viewOffset + i] = 0;
- }
- }
-
- return result;
- }
-
- public clear() {
- this._variadicBuffers = [];
- this._currentBuffer = null;
- this._currentBufferIndex = 0;
- this._currentBufferOffset = 0;
- this._views.clear();
- return super.clear();
- }
-
- public flush() {
- const { type, length, nullCount, _views, _nulls } = this;
-
- if (this._currentBuffer && this._currentBufferOffset > 0) {
- this._variadicBuffers.push(this._currentBuffer.buffer.slice(0,
this._currentBufferOffset));
- this._currentBuffer = null;
- this._currentBufferOffset = 0;
- }
-
- const views = _views.flush(length * BinaryView.ELEMENT_WIDTH);
- const nullBitmap = nullCount > 0 ? _nulls.flush(length) : undefined;
- const variadicBuffers = this._variadicBuffers.slice();
-
- this._variadicBuffers = [];
- this._currentBufferIndex = 0;
-
- this.clear();
-
- return makeData({
- type,
- length,
- nullCount,
- nullBitmap,
- ['views']: views,
- ['variadicBuffers']: variadicBuffers
- });
}
- public finish() {
- this.finished = true;
- return this;
+ protected encodeValue(value: string): Uint8Array {
+ return encodeUtf8(value);
Review Comment:
Similar to what we do in `Utf8Builder`
[here](https://github.com/apache/arrow-js/blob/b38b96715e9b52ea1d2c3211db0c446b91c6a501/src/builder/utf8.ts#L37-L39).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]