This is an automated email from the ASF dual-hosted git repository.
domoritz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4ec654497b GH-39257: [JS] LargeBinary (#39258)
4ec654497b is described below
commit 4ec654497bb14e7ec0fbaead655c129ca61074ff
Author: Dominik Moritz <[email protected]>
AuthorDate: Sun Dec 17 22:27:05 2023 -0500
GH-39257: [JS] LargeBinary (#39258)
Merge after #39249
* Closes: #39257
---
docs/source/status.rst | 2 +-
js/src/Arrow.dom.ts | 4 ++--
js/src/Arrow.ts | 3 ++-
js/src/builder.ts | 6 +++---
js/src/builder/{largeutf8.ts => largebinary.ts} | 17 ++++++-----------
js/src/builder/largeutf8.ts | 22 ++++------------------
js/src/data.ts | 15 +++++++++++++--
js/src/enum.ts | 3 ++-
js/src/interfaces.ts | 4 ++++
js/src/ipc/metadata/json.ts | 3 ++-
js/src/ipc/metadata/message.ts | 3 ++-
js/src/ipc/writer.ts | 5 ++---
js/src/type.ts | 18 +++++++++++++++++-
js/src/visitor.ts | 6 +++++-
js/src/visitor/builderctor.ts | 2 ++
js/src/visitor/bytelength.ts | 21 +++++++++------------
js/src/visitor/get.ts | 8 +++++---
js/src/visitor/indexof.ts | 4 +++-
js/src/visitor/iterator.ts | 4 +++-
js/src/visitor/jsontypeassembler.ts | 3 +++
js/src/visitor/jsonvectorassembler.ts | 10 +++++++---
js/src/visitor/set.ts | 10 +++++-----
js/src/visitor/typeassembler.ts | 5 +++++
js/src/visitor/typecomparator.ts | 4 +++-
js/src/visitor/typector.ts | 1 +
js/src/visitor/vectorassembler.ts | 6 ++++--
js/src/visitor/vectorloader.ts | 5 ++++-
js/test/data/tables.ts | 2 +-
js/test/generate-test-data.ts | 18 ++++++++++++++++--
js/test/unit/builders/builder-tests.ts | 1 +
js/test/unit/generated-data-tests.ts | 1 +
js/test/unit/visitor-tests.ts | 6 +++++-
32 files changed, 143 insertions(+), 79 deletions(-)
diff --git a/docs/source/status.rst b/docs/source/status.rst
index e52e4e4cd4..e860aceb76 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -62,7 +62,7 @@ Data Types
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Binary | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓
| ✓ |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Large Binary | ✓ | ✓ | ✓ | | | ✓ | ✓
| |
+| Large Binary | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Utf8 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓
| ✓ |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
diff --git a/js/src/Arrow.dom.ts b/js/src/Arrow.dom.ts
index 9ec76fdd00..cdb4171162 100644
--- a/js/src/Arrow.dom.ts
+++ b/js/src/Arrow.dom.ts
@@ -48,7 +48,7 @@ export {
Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
Float, Float16, Float32, Float64,
Utf8, LargeUtf8,
- Binary,
+ Binary, LargeBinary,
FixedSizeBinary,
Date_, DateDay, DateMillisecond,
Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond,
TimestampNanosecond,
@@ -78,7 +78,7 @@ export {
} from './Arrow.js';
export {
- BinaryBuilder,
+ BinaryBuilder, LargeBinaryBuilder,
BoolBuilder,
DateBuilder, DateDayBuilder, DateMillisecondBuilder,
DecimalBuilder,
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index b7e5f63a6a..6251a9e777 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -37,7 +37,7 @@ export {
Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
Float, Float16, Float32, Float64,
Utf8, LargeUtf8,
- Binary,
+ Binary, LargeBinary,
FixedSizeBinary,
Date_, DateDay, DateMillisecond,
Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond,
TimestampNanosecond,
@@ -80,6 +80,7 @@ export { DurationBuilder, DurationSecondBuilder,
DurationMillisecondBuilder, Dur
export { Utf8Builder } from './builder/utf8.js';
export { LargeUtf8Builder } from './builder/largeutf8.js';
export { BinaryBuilder } from './builder/binary.js';
+export { LargeBinaryBuilder } from './builder/largebinary.js';
export { ListBuilder } from './builder/list.js';
export { FixedSizeListBuilder } from './builder/fixedsizelist.js';
export { MapBuilder } from './builder/map.js';
diff --git a/js/src/builder.ts b/js/src/builder.ts
index 1a4c52f871..a4e2d4d893 100644
--- a/js/src/builder.ts
+++ b/js/src/builder.ts
@@ -22,7 +22,7 @@ import {
DataType, strideForType,
Float, Int, Decimal, FixedSizeBinary,
Date_, Time, Timestamp, Interval, Duration,
- Utf8, LargeUtf8, Binary, List, Map_,
+ Utf8, LargeUtf8, Binary, LargeBinary, List, Map_,
} from './type.js';
import { createIsValidFunction } from './builder/valid.js';
import { BufferBuilder, BitmapBufferBuilder, DataBufferBuilder,
OffsetsBufferBuilder } from './builder/buffer.js';
@@ -285,7 +285,7 @@ export abstract class Builder<T extends DataType = any,
TNull = any> {
if (typeIds = _typeIds?.flush(length)) { // Unions, DenseUnions
valueOffsets = _offsets?.flush(length);
- } else if (valueOffsets = _offsets?.flush(length)) { // Variable-width
primitives (Binary, Utf8, LargeUtf8), and Lists
+ } else if (valueOffsets = _offsets?.flush(length)) { // Variable-width
primitives (Binary, LargeBinary, Utf8, LargeUtf8), and Lists
data = _values?.flush(_offsets.last());
} else { // Fixed-width primitives (Int, Float, Decimal, Time,
Timestamp, Duration and Interval)
data = _values?.flush(length);
@@ -352,7 +352,7 @@ export abstract class FixedWidthBuilder<T extends Int |
Float | FixedSizeBinary
}
/** @ignore */
-export abstract class VariableWidthBuilder<T extends Binary | Utf8 | LargeUtf8
| List | Map_, TNull = any> extends Builder<T, TNull> {
+export abstract class VariableWidthBuilder<T extends Binary | LargeBinary |
Utf8 | LargeUtf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
protected _pendingLength = 0;
protected _offsets: OffsetsBufferBuilder<T>;
protected _pending: Map<number, any> | undefined;
diff --git a/js/src/builder/largeutf8.ts b/js/src/builder/largebinary.ts
similarity index 74%
copy from js/src/builder/largeutf8.ts
copy to js/src/builder/largebinary.ts
index fddfeaf8e7..59aa7144d2 100644
--- a/js/src/builder/largeutf8.ts
+++ b/js/src/builder/largebinary.ts
@@ -15,14 +15,14 @@
// specific language governing permissions and limitations
// under the License.
-import { LargeUtf8 } from '../type.js';
-import { encodeUtf8 } from '../util/utf8.js';
+import { LargeBinary } from '../type.js';
+import { toUint8Array } from '../util/buffer.js';
import { BufferBuilder } from './buffer.js';
import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
/** @ignore */
-export class LargeUtf8Builder<TNull = any> extends
VariableWidthBuilder<LargeUtf8, TNull> {
- constructor(opts: BuilderOptions<LargeUtf8, TNull>) {
+export class LargeBinaryBuilder<TNull = any> extends
VariableWidthBuilder<LargeBinary, TNull> {
+ constructor(opts: BuilderOptions<LargeBinary, TNull>) {
super(opts);
this._values = new BufferBuilder(new Uint8Array(0));
}
@@ -33,12 +33,9 @@ export class LargeUtf8Builder<TNull = any> extends
VariableWidthBuilder<LargeUtf
this._nulls && (size += this._nulls.byteLength);
return size;
}
- public setValue(index: number, value: string) {
- return super.setValue(index, encodeUtf8(value) as any);
+ public setValue(index: number, value: Uint8Array) {
+ return super.setValue(index, toUint8Array(value));
}
- // @ts-ignore
- // TODO: move to largeBinaryBuilder when implemented
- // protected _flushPending(pending: Map<number, Uint8Array | undefined>,
pendingLength: number): void { }
protected _flushPending(pending: Map<number, Uint8Array | undefined>,
pendingLength: number) {
const offsets = this._offsets;
const data = this._values.reserve(pendingLength).buffer;
@@ -55,5 +52,3 @@ export class LargeUtf8Builder<TNull = any> extends
VariableWidthBuilder<LargeUtf
}
}
}
-
-// (LargeUtf8Builder.prototype as any)._flushPending =
(LargeBinaryBuilder.prototype as any)._flushPending;
diff --git a/js/src/builder/largeutf8.ts b/js/src/builder/largeutf8.ts
index fddfeaf8e7..5189010009 100644
--- a/js/src/builder/largeutf8.ts
+++ b/js/src/builder/largeutf8.ts
@@ -19,6 +19,7 @@ import { LargeUtf8 } from '../type.js';
import { encodeUtf8 } from '../util/utf8.js';
import { BufferBuilder } from './buffer.js';
import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
+import { LargeBinaryBuilder } from './largebinary.js';
/** @ignore */
export class LargeUtf8Builder<TNull = any> extends
VariableWidthBuilder<LargeUtf8, TNull> {
@@ -36,24 +37,9 @@ export class LargeUtf8Builder<TNull = any> extends
VariableWidthBuilder<LargeUtf
public setValue(index: number, value: string) {
return super.setValue(index, encodeUtf8(value) as any);
}
+
// @ts-ignore
- // TODO: move to largeBinaryBuilder when implemented
- // protected _flushPending(pending: Map<number, Uint8Array | undefined>,
pendingLength: number): void { }
- protected _flushPending(pending: Map<number, Uint8Array | undefined>,
pendingLength: number) {
- const offsets = this._offsets;
- const data = this._values.reserve(pendingLength).buffer;
- let offset = 0;
- for (const [index, value] of pending) {
- if (value === undefined) {
- offsets.set(index, BigInt(0));
- } else {
- const length = value.length;
- data.set(value, offset);
- offsets.set(index, BigInt(length));
- offset += length;
- }
- }
- }
+ protected _flushPending(pending: Map<number, Uint8Array | undefined>,
pendingLength: number): void { }
}
-// (LargeUtf8Builder.prototype as any)._flushPending =
(LargeBinaryBuilder.prototype as any)._flushPending;
+(LargeUtf8Builder.prototype as any)._flushPending =
(LargeBinaryBuilder.prototype as any)._flushPending;
diff --git a/js/src/data.ts b/js/src/data.ts
index 145ee9d049..6f87925088 100644
--- a/js/src/data.ts
+++ b/js/src/data.ts
@@ -17,7 +17,7 @@
import { Vector } from './vector.js';
import { BufferType, Type, UnionMode } from './enum.js';
-import { DataType, LargeUtf8, strideForType } from './type.js';
+import { DataType, strideForType } from './type.js';
import { popcnt_bit_range, truncateBitmap } from './util/bit.js';
// When slicing, we do not know the null count of the sliced range without
@@ -253,7 +253,7 @@ export class Data<T extends DataType = DataType> {
import {
Dictionary,
- Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList,
Map_, Struct,
+ Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal,
FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Float,
Int,
Date_,
@@ -324,6 +324,14 @@ class MakeDataVisitor extends Visitor {
const { ['length']: length = valueOffsets.length - 1, ['nullCount']:
nullCount = props['nullBitmap'] ? -1 : 0 } = props;
return new Data(type, offset, length, nullCount, [valueOffsets, data,
nullBitmap]);
}
+ public visitLargeBinary<T extends LargeBinary>(props:
LargeBinaryDataProps<T>) {
+ const { ['type']: type, ['offset']: offset = 0 } = props;
+ const data = toUint8Array(props['data']);
+ const nullBitmap = toUint8Array(props['nullBitmap']);
+ const valueOffsets = toBigInt64Array(props['valueOffsets']);
+ const { ['length']: length = valueOffsets.length - 1, ['nullCount']:
nullCount = props['nullBitmap'] ? -1 : 0 } = props;
+ return new Data(type, offset, length, nullCount, [valueOffsets, data,
nullBitmap]);
+ }
public visitFixedSizeBinary<T extends FixedSizeBinary>(props:
FixedSizeBinaryDataProps<T>) {
const { ['type']: type, ['offset']: offset = 0 } = props;
const nullBitmap = toUint8Array(props['nullBitmap']);
@@ -444,6 +452,7 @@ interface IntervalDataProps<T extends Interval> extends
DataProps_<T> { data?: D
interface DurationDataProps<T extends Duration> extends DataProps_<T> { data?:
DataBuffer<T> }
interface FixedSizeBinaryDataProps<T extends FixedSizeBinary> extends
DataProps_<T> { data?: DataBuffer<T> }
interface BinaryDataProps<T extends Binary> extends DataProps_<T> {
valueOffsets: ValueOffsetsBuffer; data?: DataBuffer<T> }
+interface LargeBinaryDataProps<T extends LargeBinary> extends DataProps_<T> {
valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; data?:
DataBuffer<T> }
interface Utf8DataProps<T extends Utf8> extends DataProps_<T> { valueOffsets:
ValueOffsetsBuffer; data?: DataBuffer<T> }
interface LargeUtf8DataProps<T extends LargeUtf8> extends DataProps_<T> {
valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; data?:
DataBuffer<T> }
interface ListDataProps<T extends List> extends DataProps_<T> { valueOffsets:
ValueOffsetsBuffer; child: Data<T['valueType']> }
@@ -468,6 +477,7 @@ export type DataProps<T extends DataType> = (
T extends Duration /* */ ? DurationDataProps<T> :
T extends FixedSizeBinary /* */ ? FixedSizeBinaryDataProps<T> :
T extends Binary /* */ ? BinaryDataProps<T> :
+ T extends LargeBinary /* */ ? LargeBinaryDataProps<T> :
T extends Utf8 /* */ ? Utf8DataProps<T> :
T extends LargeUtf8 /* */ ? LargeUtf8DataProps<T> :
T extends List /* */ ? ListDataProps<T> :
@@ -495,6 +505,7 @@ export function makeData<T extends Interval>(props:
IntervalDataProps<T>): Data<
export function makeData<T extends Duration>(props: DurationDataProps<T>):
Data<T>;
export function makeData<T extends FixedSizeBinary>(props:
FixedSizeBinaryDataProps<T>): Data<T>;
export function makeData<T extends Binary>(props: BinaryDataProps<T>): Data<T>;
+export function makeData<T extends LargeBinary>(props:
LargeBinaryDataProps<T>): Data<T>;
export function makeData<T extends Utf8>(props: Utf8DataProps<T>): Data<T>;
export function makeData<T extends LargeUtf8>(props: LargeUtf8DataProps<T>):
Data<T>;
export function makeData<T extends List>(props: ListDataProps<T>): Data<T>;
diff --git a/js/src/enum.ts b/js/src/enum.ts
index 764ea64e63..0eecc0c68b 100644
--- a/js/src/enum.ts
+++ b/js/src/enum.ts
@@ -173,7 +173,8 @@ export enum Type {
FixedSizeBinary = 15, /** Fixed-size binary. Each value occupies the same
number of bytes */
FixedSizeList = 16, /** Fixed-size list. Each value occupies the same
number of bytes */
Map = 17, /** Map of named logical types */
- Duration = 18, /** Measure of elapsed time in either seconds,
milliseconds, microseconds or nanoseconds. */
+ Duration = 18, /** Measure of elapsed time in either seconds,
milliseconds, microseconds or nanoseconds */
+ LargeBinary = 19, /** Large variable-length bytes (no guarantee of
UTF8-ness) */
LargeUtf8 = 20, /** Large variable-length string as List<Char> */
Dictionary = -1, /** Dictionary aka Category type */
diff --git a/js/src/interfaces.ts b/js/src/interfaces.ts
index 707d01bb14..c4119a8bd2 100644
--- a/js/src/interfaces.ts
+++ b/js/src/interfaces.ts
@@ -35,6 +35,7 @@ import type { DurationBuilder, DurationSecondBuilder,
DurationMillisecondBuilder
import type { Utf8Builder } from './builder/utf8.js';
import type { LargeUtf8Builder } from './builder/largeutf8.js';
import type { BinaryBuilder } from './builder/binary.js';
+import type { LargeBinaryBuilder } from './builder/largebinary.js';
import type { ListBuilder } from './builder/list.js';
import type { FixedSizeListBuilder } from './builder/fixedsizelist.js';
import type { MapBuilder } from './builder/map.js';
@@ -210,6 +211,7 @@ export type TypeToDataType<T extends Type> = {
[Type.Utf8]: type.Utf8;
[Type.LargeUtf8]: type.LargeUtf8;
[Type.Binary]: type.Binary;
+ [Type.LargeBinary]: type.LargeBinary;
[Type.FixedSizeBinary]: type.FixedSizeBinary;
[Type.Date]: type.Date_;
[Type.DateDay]: type.DateDay;
@@ -264,6 +266,7 @@ type TypeToBuilder<T extends Type = any, TNull = any> = {
[Type.Utf8]: Utf8Builder<TNull>;
[Type.LargeUtf8]: LargeUtf8Builder<TNull>;
[Type.Binary]: BinaryBuilder<TNull>;
+ [Type.LargeBinary]: LargeBinaryBuilder<TNull>;
[Type.FixedSizeBinary]: FixedSizeBinaryBuilder<TNull>;
[Type.Date]: DateBuilder<any, TNull>;
[Type.DateDay]: DateDayBuilder<TNull>;
@@ -318,6 +321,7 @@ type DataTypeToBuilder<T extends DataType = any, TNull =
any> = {
[Type.Utf8]: T extends type.Utf8 ? Utf8Builder<TNull> : never;
[Type.LargeUtf8]: T extends type.LargeUtf8 ? LargeUtf8Builder<TNull> :
never;
[Type.Binary]: T extends type.Binary ? BinaryBuilder<TNull> : never;
+ [Type.LargeBinary]: T extends type.LargeBinary ? LargeBinaryBuilder<TNull>
: never;
[Type.FixedSizeBinary]: T extends type.FixedSizeBinary ?
FixedSizeBinaryBuilder<TNull> : never;
[Type.Date]: T extends type.Date_ ? DateBuilder<T, TNull> : never;
[Type.DateDay]: T extends type.DateDay ? DateDayBuilder<TNull> : never;
diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts
index b669c0c612..8dc81ced3f 100644
--- a/js/src/ipc/metadata/json.ts
+++ b/js/src/ipc/metadata/json.ts
@@ -20,7 +20,7 @@
import { Schema, Field } from '../../schema.js';
import {
DataType, Dictionary, TimeBitWidth,
- Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
+ Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary,
List, FixedSizeList, Map_, Struct, Union,
Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth,
Int32, TKeys, Duration,
} from '../../type.js';
@@ -149,6 +149,7 @@ function typeFromJSON(f: any, children?: Field[]):
DataType<any> {
case 'NONE': return new Null();
case 'null': return new Null();
case 'binary': return new Binary();
+ case 'largebinary': return new LargeBinary();
case 'utf8': return new Utf8();
case 'largeutf8': return new LargeUtf8();
case 'bool': return new Bool();
diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts
index cf05bff54c..552c4d846e 100644
--- a/js/src/ipc/metadata/message.ts
+++ b/js/src/ipc/metadata/message.ts
@@ -56,7 +56,7 @@ import ByteBuffer = flatbuffers.ByteBuffer;
import {
DataType, Dictionary, TimeBitWidth,
- Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
+ Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary,
List, FixedSizeList, Map_, Struct, Union,
Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth,
Int32, TKeys, Duration,
} from '../../type.js';
@@ -432,6 +432,7 @@ function decodeFieldType(f: _Field, children?: Field[]):
DataType<any> {
case Type['NONE']: return new Null();
case Type['Null']: return new Null();
case Type['Binary']: return new Binary();
+ case Type['LargeBinary']: return new LargeBinary();
case Type['Utf8']: return new Utf8();
case Type['LargeUtf8']: return new LargeUtf8();
case Type['Bool']: return new Bool();
diff --git a/js/src/ipc/writer.ts b/js/src/ipc/writer.ts
index 54b4b0249e..565b0825bd 100644
--- a/js/src/ipc/writer.ts
+++ b/js/src/ipc/writer.ts
@@ -391,7 +391,7 @@ export class RecordBatchJSONWriter<T extends TypeMap = any>
extends RecordBatchW
protected _writeDictionaryBatch(dictionary: Data, id: number, isDelta =
false) {
this._dictionaryDeltaOffsets.set(id, dictionary.length +
(this._dictionaryDeltaOffsets.get(id) || 0));
this._write(this._dictionaryBlocks.length === 0 ? ` ` : `,\n `);
- this._write(`${dictionaryBatchToJSON(dictionary, id, isDelta)}`);
+ this._write(dictionaryBatchToJSON(dictionary, id, isDelta));
this._dictionaryBlocks.push(new FileBlock(0, 0, 0));
return this;
}
@@ -401,7 +401,6 @@ export class RecordBatchJSONWriter<T extends TypeMap = any>
extends RecordBatchW
return this;
}
public close() {
-
if (this._dictionaries.length > 0) {
this._write(`,\n "dictionaries": [\n`);
for (const batch of this._dictionaries) {
@@ -413,7 +412,7 @@ export class RecordBatchJSONWriter<T extends TypeMap = any>
extends RecordBatchW
if (this._recordBatches.length > 0) {
for (let i = -1, n = this._recordBatches.length; ++i < n;) {
this._write(i === 0 ? `,\n "batches": [\n ` : `,\n `);
- this._write(`${recordBatchToJSON(this._recordBatches[i])}`);
+ this._write(recordBatchToJSON(this._recordBatches[i]));
this._recordBatchBlocks.push(new FileBlock(0, 0, 0));
}
this._write(`\n ]`);
diff --git a/js/src/type.ts b/js/src/type.ts
index 6223d0316f..dea5301aed 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -58,6 +58,7 @@ export abstract class DataType<TType extends Type = Type,
TChildren extends Type
/** @nocollapse */ static isInt(x: any): x is Int_ { return x?.typeId ===
Type.Int; }
/** @nocollapse */ static isFloat(x: any): x is Float { return x?.typeId
=== Type.Float; }
/** @nocollapse */ static isBinary(x: any): x is Binary { return x?.typeId
=== Type.Binary; }
+ /** @nocollapse */ static isLargeBinary(x: any): x is LargeBinary { return
x?.typeId === Type.LargeBinary; }
/** @nocollapse */ static isUtf8(x: any): x is Utf8 { return x?.typeId ===
Type.Utf8; }
/** @nocollapse */ static isLargeUtf8(x: any): x is LargeUtf8 { return
x?.typeId === Type.LargeUtf8; }
/** @nocollapse */ static isBool(x: any): x is Bool { return x?.typeId ===
Type.Bool; }
@@ -250,6 +251,22 @@ export class Binary extends DataType<Type.Binary> {
})(Binary.prototype);
}
+/** @ignore */
+export interface LargeBinary extends DataType<Type.LargeBinary> { TArray:
Uint8Array; TOffsetArray: BigInt64Array; TValue: Uint8Array; ArrayType:
TypedArrayConstructor<Uint8Array>; OffsetArrayType:
BigIntArrayConstructor<BigInt64Array> }
+/** @ignore */
+export class LargeBinary extends DataType<Type.LargeBinary> {
+ constructor() {
+ super();
+ }
+ public get typeId() { return Type.LargeBinary as Type.LargeBinary; }
+ public toString() { return `LargeBinary`; }
+ protected static [Symbol.toStringTag] = ((proto: LargeBinary) => {
+ (<any>proto).ArrayType = Uint8Array;
+ (<any>proto).OffsetArrayType = BigInt64Array;
+ return proto[Symbol.toStringTag] = 'LargeBinary';
+ })(LargeBinary.prototype);
+}
+
/** @ignore */
export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array;
TOffsetArray: Int32Array; TValue: string; ArrayType:
TypedArrayConstructor<Uint8Array>; OffsetArrayType:
TypedArrayConstructor<Int32Array> }
/** @ignore */
@@ -601,7 +618,6 @@ export class FixedSizeBinary extends
DataType<Type.FixedSizeBinary> {
protected static [Symbol.toStringTag] = ((proto: FixedSizeBinary) => {
(<any>proto).byteWidth = null;
(<any>proto).ArrayType = Uint8Array;
- (<any>proto).OffsetArrayType = Int32Array;
return proto[Symbol.toStringTag] = 'FixedSizeBinary';
})(FixedSizeBinary.prototype);
}
diff --git a/js/src/visitor.ts b/js/src/visitor.ts
index 5b3cc4d3d0..2fb5e7e14b 100644
--- a/js/src/visitor.ts
+++ b/js/src/visitor.ts
@@ -38,6 +38,7 @@ export abstract class Visitor {
public visitUtf8(_node: any, ..._args: any[]): any { return null; }
public visitLargeUtf8(_node: any, ..._args: any[]): any { return null; }
public visitBinary(_node: any, ..._args: any[]): any { return null; }
+ public visitLargeBinary(_node: any, ..._args: any[]): any { return null; }
public visitFixedSizeBinary(_node: any, ..._args: any[]): any { return
null; }
public visitDate(_node: any, ..._args: any[]): any { return null; }
public visitTimestamp(_node: any, ..._args: any[]): any { return null; }
@@ -48,7 +49,7 @@ export abstract class Visitor {
public visitUnion(_node: any, ..._args: any[]): any { return null; }
public visitDictionary(_node: any, ..._args: any[]): any { return null; }
public visitInterval(_node: any, ..._args: any[]): any { return null; }
- public visitDuration(_node: any, ... _args: any[]): any { return null; }
+ public visitDuration(_node: any, ..._args: any[]): any { return null; }
public visitFixedSizeList(_node: any, ..._args: any[]): any { return null;
}
public visitMap(_node: any, ..._args: any[]): any { return null; }
}
@@ -92,6 +93,7 @@ function getVisitFnByTypeId(visitor: Visitor, dtype: Type,
throwIfNotFound = tru
case Type.Utf8: fn = visitor.visitUtf8; break;
case Type.LargeUtf8: fn = visitor.visitLargeUtf8; break;
case Type.Binary: fn = visitor.visitBinary; break;
+ case Type.LargeBinary: fn = visitor.visitLargeBinary; break;
case Type.FixedSizeBinary: fn = visitor.visitFixedSizeBinary; break;
case Type.Date: fn = visitor.visitDate; break;
case Type.DateDay: fn = visitor.visitDateDay || visitor.visitDate;
break;
@@ -153,6 +155,7 @@ function inferDType<T extends DataType>(type: T): Type {
// @ts-ignore
return Type.Float;
case Type.Binary: return Type.Binary;
+ case Type.LargeBinary: return Type.LargeBinary;
case Type.Utf8: return Type.Utf8;
case Type.LargeUtf8: return Type.LargeUtf8;
case Type.Bool: return Type.Bool;
@@ -234,6 +237,7 @@ export interface Visitor {
visitUtf8(node: any, ...args: any[]): any;
visitLargeUtf8(node: any, ...args: any[]): any;
visitBinary(node: any, ...args: any[]): any;
+ visitLargeBinary(node: any, ...args: any[]): any;
visitFixedSizeBinary(node: any, ...args: any[]): any;
visitDate(node: any, ...args: any[]): any;
visitDateDay?(node: any, ...args: any[]): any;
diff --git a/js/src/visitor/builderctor.ts b/js/src/visitor/builderctor.ts
index 83374712b2..5b3758c4e0 100644
--- a/js/src/visitor/builderctor.ts
+++ b/js/src/visitor/builderctor.ts
@@ -22,6 +22,7 @@ import { DataType } from '../type.js';
import { Visitor } from '../visitor.js';
import { BuilderCtor } from '../interfaces.js';
import { BinaryBuilder } from '../builder/binary.js';
+import { LargeBinaryBuilder } from '../builder/largebinary.js';
import { BoolBuilder } from '../builder/bool.js';
import { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from
'../builder/date.js';
import { DecimalBuilder } from '../builder/decimal.js';
@@ -70,6 +71,7 @@ export class GetBuilderCtor extends Visitor {
public visitUtf8() { return Utf8Builder; }
public visitLargeUtf8() { return LargeUtf8Builder; }
public visitBinary() { return BinaryBuilder; }
+ public visitLargeBinary() { return LargeBinaryBuilder; }
public visitFixedSizeBinary() { return FixedSizeBinaryBuilder; }
public visitDate() { return DateBuilder; }
public visitDateDay() { return DateDayBuilder; }
diff --git a/js/src/visitor/bytelength.ts b/js/src/visitor/bytelength.ts
index c3bfadd50e..43399b2571 100644
--- a/js/src/visitor/bytelength.ts
+++ b/js/src/visitor/bytelength.ts
@@ -26,9 +26,10 @@ import { Type, TimeUnit, UnionMode } from '../enum.js';
import {
DataType, Dictionary,
Float, Int, Date_, Interval, Time, Timestamp, Duration,
- Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
+ Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary,
List, FixedSizeList, Map_, Struct, Union, DenseUnion, SparseUnion,
} from '../type.js';
+import { bigIntToNumber } from '../util/bigint.js';
/** @ignore */ const sum = (x: number, y: number) => x + y;
@@ -39,6 +40,7 @@ export interface GetByteLengthVisitor extends Visitor {
getVisitFn<T extends DataType>(node: Data<T> | T): (data: Data<T>, index:
number) => number;
getVisitFn<T extends Type>(node: T): (data: Data<TypeToDataType<T>>,
index: number) => number;
visitBinary<T extends Binary>(data: Data<T>, index: number): number;
+ visitLargeBinary<T extends LargeBinary>(data: Data<T>, index: number):
number;
visitUtf8<T extends Utf8>(data: Data<T>, index: number): number;
visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, index: number): number;
visitList<T extends List>(data: Data<T>, index: number): number;
@@ -95,22 +97,15 @@ export class GetByteLengthVisitor extends Visitor {
}
/** @ignore */
-const getUtf8ByteLength = <T extends Utf8>({ valueOffsets }: Data<T>, index:
number): number => {
+const getBinaryByteLength = <T extends Binary | LargeBinary | Utf8 |
LargeUtf8>({ valueOffsets }: Data<T>, index: number): number => {
// 4 + 4 for the indices, `end - start` for the data bytes
- return 8 + (valueOffsets[index + 1] - valueOffsets[index]);
-};
-
-/** @ignore */
-const getBinaryByteLength = <T extends Binary>({ valueOffsets }: Data<T>,
index: number): number => {
- // 4 + 4 for the indices, `end - start` for the data bytes
- return 8 + (valueOffsets[index + 1] - valueOffsets[index]);
+ return 8 + bigIntToNumber(valueOffsets[index + 1]) -
bigIntToNumber(valueOffsets[index]);
};
/** @ignore */
const getListByteLength = <T extends List>({ valueOffsets, stride, children }:
Data<T>, index: number): number => {
const child: Data<T['valueType']> = children[0];
- const { [index * stride]: start } = valueOffsets;
- const { [index * stride + 1]: end } = valueOffsets;
+ const { [index * stride]: start, [index * stride + 1]: end } =
valueOffsets;
const visit = instance.getVisitFn(child.type);
const slice = child.slice(start, end - start);
let size = 8; // 4 + 4 for the indices
@@ -155,8 +150,10 @@ const getSparseUnionByteLength = <T extends SparseUnion>({
children }: Data<T>,
return 4 + instance.visitMany(children, children.map(() =>
index)).reduce(sum, 0);
};
-GetByteLengthVisitor.prototype.visitUtf8 = getUtf8ByteLength;
+GetByteLengthVisitor.prototype.visitUtf8 = getBinaryByteLength;
+GetByteLengthVisitor.prototype.visitLargeUtf8 = getBinaryByteLength;
GetByteLengthVisitor.prototype.visitBinary = getBinaryByteLength;
+GetByteLengthVisitor.prototype.visitLargeBinary = getBinaryByteLength;
GetByteLengthVisitor.prototype.visitList = getListByteLength;
GetByteLengthVisitor.prototype.visitFixedSizeList = getFixedSizeListByteLength;
GetByteLengthVisitor.prototype.visitUnion = getUnionByteLength;
diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts
index 112d2f2983..3ab3bcb68c 100644
--- a/js/src/visitor/get.ts
+++ b/js/src/visitor/get.ts
@@ -28,7 +28,7 @@ import { uint16ToFloat64 } from '../util/math.js';
import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from
'../enum.js';
import {
DataType, Dictionary,
- Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList,
Map_, Struct,
+ Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal,
FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Float, Float16, Float32, Float64,
Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
Date_, DateDay, DateMillisecond,
@@ -36,7 +36,7 @@ import {
Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond,
TimestampNanosecond,
Duration, DurationSecond, DurationMillisecond, DurationMicrosecond,
DurationNanosecond,
- Union, DenseUnion, SparseUnion, LargeUtf8,
+ Union, DenseUnion, SparseUnion,
} from '../type.js';
/** @ignore */
@@ -63,6 +63,7 @@ export interface GetVisitor extends Visitor {
visitUtf8<T extends Utf8>(data: Data<T>, index: number): T['TValue'] |
null;
visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, index: number):
T['TValue'] | null;
visitBinary<T extends Binary>(data: Data<T>, index: number): T['TValue'] |
null;
+ visitLargeBinary<T extends LargeBinary>(data: Data<T>, index: number):
T['TValue'] | null;
visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, index:
number): T['TValue'] | null;
visitDate<T extends Date_>(data: Data<T>, index: number): T['TValue'] |
null;
visitDateDay<T extends DateDay>(data: Data<T>, index: number): T['TValue']
| null;
@@ -151,7 +152,7 @@ const getBigInts = <T extends Numeric2X>({ values }:
Data<T>, index: number): T[
const getFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }:
Data<T>, index: number): T['TValue'] => values.subarray(stride * index, stride
* (index + 1));
/** @ignore */
-const getBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index:
number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index);
+const getBinary = <T extends Binary | LargeBinary>({ values, valueOffsets }:
Data<T>, index: number): T['TValue'] => getVariableWidthBytes(values,
valueOffsets, index);
/** @ignore */
const getUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }:
Data<T>, index: number): T['TValue'] => {
const bytes = getVariableWidthBytes(values, valueOffsets, index);
@@ -332,6 +333,7 @@ GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric);
GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8);
GetVisitor.prototype.visitLargeUtf8 = wrapGet(getUtf8);
GetVisitor.prototype.visitBinary = wrapGet(getBinary);
+GetVisitor.prototype.visitLargeBinary = wrapGet(getBinary);
GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary);
GetVisitor.prototype.visitDate = wrapGet(getDate);
GetVisitor.prototype.visitDateDay = wrapGet(getDateDay);
diff --git a/js/src/visitor/indexof.ts b/js/src/visitor/indexof.ts
index 76f95788c7..1e1cb87a98 100644
--- a/js/src/visitor/indexof.ts
+++ b/js/src/visitor/indexof.ts
@@ -24,7 +24,7 @@ import { getBool, BitIterator } from '../util/bit.js';
import { createElementComparator } from '../util/vector.js';
import {
DataType, Dictionary,
- Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List,
FixedSizeList, Map_, Struct,
+ Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal,
FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Float, Float16, Float32, Float64,
Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
Date_, DateDay, DateMillisecond,
@@ -59,6 +59,7 @@ export interface IndexOfVisitor extends Visitor {
visitUtf8<T extends Utf8>(data: Data<T>, value: T['TValue'] | null,
index?: number): number;
visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, value: T['TValue'] |
null, index?: number): number;
visitBinary<T extends Binary>(data: Data<T>, value: T['TValue'] | null,
index?: number): number;
+ visitLargeBinary<T extends LargeBinary>(data: Data<T>, value: T['TValue']
| null, index?: number): number;
visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, value:
T['TValue'] | null, index?: number): number;
visitDate<T extends Date_>(data: Data<T>, value: T['TValue'] | null,
index?: number): number;
visitDateDay<T extends DateDay>(data: Data<T>, value: T['TValue'] | null,
index?: number): number;
@@ -175,6 +176,7 @@ IndexOfVisitor.prototype.visitFloat64 = indexOfValue;
IndexOfVisitor.prototype.visitUtf8 = indexOfValue;
IndexOfVisitor.prototype.visitLargeUtf8 = indexOfValue;
IndexOfVisitor.prototype.visitBinary = indexOfValue;
+IndexOfVisitor.prototype.visitLargeBinary = indexOfValue;
IndexOfVisitor.prototype.visitFixedSizeBinary = indexOfValue;
IndexOfVisitor.prototype.visitDate = indexOfValue;
IndexOfVisitor.prototype.visitDateDay = indexOfValue;
diff --git a/js/src/visitor/iterator.ts b/js/src/visitor/iterator.ts
index 09dfcb0b56..bf7e9d1591 100644
--- a/js/src/visitor/iterator.ts
+++ b/js/src/visitor/iterator.ts
@@ -21,7 +21,7 @@ import { Type, Precision } from '../enum.js';
import { TypeToDataType } from '../interfaces.js';
import {
DataType, Dictionary,
- Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List,
FixedSizeList, Map_, Struct,
+ Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal,
FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Float, Float16, Float32, Float64,
Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
Date_, DateDay, DateMillisecond,
@@ -57,6 +57,7 @@ export interface IteratorVisitor extends Visitor {
visitUtf8<T extends Utf8>(vector: Vector<T>): IterableIterator<T['TValue']
| null>;
visitLargeUtf8<T extends LargeUtf8>(vector: Vector<T>):
IterableIterator<T['TValue'] | null>;
visitBinary<T extends Binary>(vector: Vector<T>):
IterableIterator<T['TValue'] | null>;
+ visitLargeBinary<T extends LargeBinary>(vector: Vector<T>):
IterableIterator<T['TValue'] | null>;
visitFixedSizeBinary<T extends FixedSizeBinary>(vector: Vector<T>):
IterableIterator<T['TValue'] | null>;
visitDate<T extends Date_>(vector: Vector<T>):
IterableIterator<T['TValue'] | null>;
visitDateDay<T extends DateDay>(vector: Vector<T>):
IterableIterator<T['TValue'] | null>;
@@ -161,6 +162,7 @@ IteratorVisitor.prototype.visitFloat64 = vectorIterator;
IteratorVisitor.prototype.visitUtf8 = vectorIterator;
IteratorVisitor.prototype.visitLargeUtf8 = vectorIterator;
IteratorVisitor.prototype.visitBinary = vectorIterator;
+IteratorVisitor.prototype.visitLargeBinary = vectorIterator;
IteratorVisitor.prototype.visitFixedSizeBinary = vectorIterator;
IteratorVisitor.prototype.visitDate = vectorIterator;
IteratorVisitor.prototype.visitDateDay = vectorIterator;
diff --git a/js/src/visitor/jsontypeassembler.ts
b/js/src/visitor/jsontypeassembler.ts
index a6746a858e..823b1dea10 100644
--- a/js/src/visitor/jsontypeassembler.ts
+++ b/js/src/visitor/jsontypeassembler.ts
@@ -42,6 +42,9 @@ export class JSONTypeAssembler extends Visitor {
public visitBinary<T extends type.Binary>({ typeId }: T) {
return { 'name': ArrowType[typeId].toLowerCase() };
}
+ public visitLargeBinary<T extends type.LargeBinary>({ typeId }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase() };
+ }
public visitBool<T extends type.Bool>({ typeId }: T) {
return { 'name': ArrowType[typeId].toLowerCase() };
}
diff --git a/js/src/visitor/jsonvectorassembler.ts
b/js/src/visitor/jsonvectorassembler.ts
index 9a3cb8601a..88699d8f16 100644
--- a/js/src/visitor/jsonvectorassembler.ts
+++ b/js/src/visitor/jsonvectorassembler.ts
@@ -27,7 +27,7 @@ import { BitIterator, getBit, getBool } from '../util/bit.js';
import {
DataType,
Float, Int, Date_, Interval, Time, Timestamp, Union, Duration,
- Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList,
Map_, Struct, IntArray, LargeUtf8,
+ Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal,
FixedSizeBinary, List, FixedSizeList, Map_, Struct, IntArray,
} from '../type.js';
/** @ignore */
@@ -44,6 +44,7 @@ export interface JSONVectorAssembler extends Visitor {
visitUtf8<T extends Utf8>(data: Data<T>): { DATA: string[]; OFFSET:
number[] };
visitLargeUtf8<T extends LargeUtf8>(data: Data<T>): { DATA: string[];
OFFSET: string[] };
visitBinary<T extends Binary>(data: Data<T>): { DATA: string[]; OFFSET:
number[] };
+ visitLargeBinary<T extends LargeBinary>(data: Data<T>): { DATA: string[];
OFFSET: string[] };
visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>): { DATA:
string[] };
visitDate<T extends Date_>(data: Data<T>): { DATA: number[] };
visitTimestamp<T extends Timestamp>(data: Data<T>): { DATA: string[] };
@@ -105,7 +106,10 @@ export class JSONVectorAssembler extends Visitor {
return { 'DATA': [...new Vector([data])], 'OFFSET':
[...bigNumsToStrings(data.valueOffsets, 2)] };
}
public visitBinary<T extends Binary>(data: Data<T>) {
- return { 'DATA': [...binaryToString(new Vector([data]))], OFFSET:
[...data.valueOffsets] };
+ return { 'DATA': [...binaryToString(new Vector([data]))], 'OFFSET':
[...data.valueOffsets] };
+ }
+ public visitLargeBinary<T extends LargeBinary>(data: Data<T>) {
+ return { 'DATA': [...binaryToString(new Vector([data]))], 'OFFSET':
[...bigNumsToStrings(data.valueOffsets, 2)] };
}
public visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>) {
return { 'DATA': [...binaryToString(new Vector([data]))] };
@@ -168,7 +172,7 @@ export class JSONVectorAssembler extends Visitor {
}
/** @ignore */
-function* binaryToString(vector: Vector<Binary> | Vector<FixedSizeBinary>) {
+function* binaryToString(vector: Vector<Binary> | Vector<LargeBinary> |
Vector<FixedSizeBinary>) {
for (const octets of vector as Iterable<Uint8Array>) {
yield octets.reduce((str, byte) => {
return `${str}${('0' + (byte & 0xFF).toString(16)).slice(-2)}`;
diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts
index 15b0721660..eb1f280964 100644
--- a/js/src/visitor/set.ts
+++ b/js/src/visitor/set.ts
@@ -26,7 +26,7 @@ import { float64ToUint16 } from '../util/math.js';
import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from
'../enum.js';
import {
DataType, Dictionary,
- Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List,
FixedSizeList, Map_, Struct,
+ Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal,
FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Float, Float16, Float32, Float64,
Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
Date_, DateDay, DateMillisecond,
@@ -61,6 +61,7 @@ export interface SetVisitor extends Visitor {
visitUtf8<T extends Utf8>(data: Data<T>, index: number, value:
T['TValue']): void;
visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, index: number, value:
T['TValue']): void;
visitBinary<T extends Binary>(data: Data<T>, index: number, value:
T['TValue']): void;
+ visitLargeBinary<T extends LargeBinary>(data: Data<T>, index: number,
value: T['TValue']): void;
visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, index:
number, value: T['TValue']): void;
visitDate<T extends Date_>(data: Data<T>, index: number, value:
T['TValue']): void;
visitDateDay<T extends DateDay>(data: Data<T>, index: number, value:
T['TValue']): void;
@@ -165,11 +166,9 @@ export const setDateMillisecond = <T extends
DateMillisecond>({ values }: Data<T
export const setFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values
}: Data<T>, index: number, value: T['TValue']): void => {
values.set(value.subarray(0, stride), stride * index); };
/** @ignore */
-const setBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index:
number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets,
index, value);
+const setBinary = <T extends Binary | LargeBinary>({ values, valueOffsets }:
Data<T>, index: number, value: T['TValue']) => setVariableWidthBytes(values,
valueOffsets, index, value);
/** @ignore */
-const setUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }:
Data<T>, index: number, value: T['TValue']) => {
- setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
-};
+const setUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }:
Data<T>, index: number, value: T['TValue']) => setVariableWidthBytes(values,
valueOffsets, index, encodeUtf8(value));
/* istanbul ignore next */
export const setDate = <T extends Date_>(data: Data<T>, index: number, value:
T['TValue']): void => {
@@ -370,6 +369,7 @@ SetVisitor.prototype.visitFloat64 = wrapSet(setFloat);
SetVisitor.prototype.visitUtf8 = wrapSet(setUtf8);
SetVisitor.prototype.visitLargeUtf8 = wrapSet(setUtf8);
SetVisitor.prototype.visitBinary = wrapSet(setBinary);
+SetVisitor.prototype.visitLargeBinary = wrapSet(setBinary);
SetVisitor.prototype.visitFixedSizeBinary = wrapSet(setFixedSizeBinary);
SetVisitor.prototype.visitDate = wrapSet(setDate);
SetVisitor.prototype.visitDateDay = wrapSet(setDateDay);
diff --git a/js/src/visitor/typeassembler.ts b/js/src/visitor/typeassembler.ts
index f072714222..169f3627a4 100644
--- a/js/src/visitor/typeassembler.ts
+++ b/js/src/visitor/typeassembler.ts
@@ -25,6 +25,7 @@ import { Null } from '../fb/null.js';
import { Int } from '../fb/int.js';
import { FloatingPoint } from '../fb/floating-point.js';
import { Binary } from '../fb/binary.js';
+import { LargeBinary } from '../fb/large-binary.js';
import { Bool } from '../fb/bool.js';
import { Utf8 } from '../fb/utf8.js';
import { LargeUtf8 } from '../fb/large-utf8.js';
@@ -71,6 +72,10 @@ export class TypeAssembler extends Visitor {
Binary.startBinary(b);
return Binary.endBinary(b);
}
+ public visitLargeBinary<T extends type.LargeBinary>(_node: T, b: Builder) {
+ LargeBinary.startLargeBinary(b);
+ return LargeBinary.endLargeBinary(b);
+ }
public visitBool<T extends type.Bool>(_node: T, b: Builder) {
Bool.startBool(b);
return Bool.endBool(b);
diff --git a/js/src/visitor/typecomparator.ts b/js/src/visitor/typecomparator.ts
index 2417dec09c..a113f2ea31 100644
--- a/js/src/visitor/typecomparator.ts
+++ b/js/src/visitor/typecomparator.ts
@@ -21,7 +21,7 @@ import { Visitor } from '../visitor.js';
import { Schema, Field } from '../schema.js';
import {
DataType, TypeMap, Dictionary,
- Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List,
FixedSizeList, Map_, Struct,
+ Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal,
FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Float, Float16, Float32, Float64,
Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
Date_, DateDay, DateMillisecond,
@@ -55,6 +55,7 @@ export interface TypeComparator extends Visitor {
visitUtf8<T extends Utf8>(type: T, other?: DataType | null): other is T;
visitLargeUtf8<T extends LargeUtf8>(type: T, other?: DataType | null):
other is T;
visitBinary<T extends Binary>(type: T, other?: DataType | null): other is
T;
+ visitLargeBinary<T extends LargeBinary>(type: T, other?: DataType | null):
other is T;
visitFixedSizeBinary<T extends FixedSizeBinary>(type: T, other?: DataType
| null): other is T;
visitDate<T extends Date_>(type: T, other?: DataType | null): other is T;
visitDateDay<T extends DateDay>(type: T, other?: DataType | null): other
is T;
@@ -252,6 +253,7 @@ TypeComparator.prototype.visitFloat64 = compareFloat;
TypeComparator.prototype.visitUtf8 = compareAny;
TypeComparator.prototype.visitLargeUtf8 = compareAny;
TypeComparator.prototype.visitBinary = compareAny;
+TypeComparator.prototype.visitLargeBinary = compareAny;
TypeComparator.prototype.visitFixedSizeBinary = compareFixedSizeBinary;
TypeComparator.prototype.visitDate = compareDate;
TypeComparator.prototype.visitDateDay = compareDate;
diff --git a/js/src/visitor/typector.ts b/js/src/visitor/typector.ts
index 2e0bbc4147..a781b5fb14 100644
--- a/js/src/visitor/typector.ts
+++ b/js/src/visitor/typector.ts
@@ -51,6 +51,7 @@ export class GetDataTypeConstructor extends Visitor {
public visitUtf8() { return type.Utf8; }
public visitLargeUtf8() { return type.LargeUtf8; }
public visitBinary() { return type.Binary; }
+ public visitLargeBinary() { return type.LargeBinary; }
public visitFixedSizeBinary() { return type.FixedSizeBinary; }
public visitDate() { return type.Date_; }
public visitDateDay() { return type.DateDay; }
diff --git a/js/src/visitor/vectorassembler.ts
b/js/src/visitor/vectorassembler.ts
index df820e6f5e..7dc3695582 100644
--- a/js/src/visitor/vectorassembler.ts
+++ b/js/src/visitor/vectorassembler.ts
@@ -27,7 +27,7 @@ import { BufferRegion, FieldNode } from
'../ipc/metadata/message.js';
import {
DataType, Dictionary,
Float, Int, Date_, Interval, Time, Timestamp, Union, Duration,
- Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList,
Map_, Struct, LargeUtf8,
+ Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal,
FixedSizeBinary, List, FixedSizeList, Map_, Struct,
} from '../type.js';
import { bigIntToNumber } from '../util/bigint.js';
@@ -44,6 +44,7 @@ export interface VectorAssembler extends Visitor {
visitUtf8<T extends Utf8>(data: Data<T>): this;
visitLargeUtf8<T extends LargeUtf8>(data: Data<T>): this;
visitBinary<T extends Binary>(data: Data<T>): this;
+ visitLargeBinary<T extends LargeBinary>(data: Data<T>): this;
visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>): this;
visitDate<T extends Date_>(data: Data<T>): this;
visitTimestamp<T extends Timestamp>(data: Data<T>): this;
@@ -203,7 +204,7 @@ function assembleFlatVector<T extends Int | Float |
FixedSizeBinary | Date_ | Ti
}
/** @ignore */
-function assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(this:
VectorAssembler, data: Data<T>) {
+function assembleFlatListVector<T extends Utf8 | LargeUtf8 | Binary |
LargeBinary>(this: VectorAssembler, data: Data<T>) {
const { length, values, valueOffsets } = data;
const begin = bigIntToNumber(valueOffsets[0]);
const end = bigIntToNumber(valueOffsets[length]);
@@ -239,6 +240,7 @@ VectorAssembler.prototype.visitFloat = assembleFlatVector;
VectorAssembler.prototype.visitUtf8 = assembleFlatListVector;
VectorAssembler.prototype.visitLargeUtf8 = assembleFlatListVector;
VectorAssembler.prototype.visitBinary = assembleFlatListVector;
+VectorAssembler.prototype.visitLargeBinary = assembleFlatListVector;
VectorAssembler.prototype.visitFixedSizeBinary = assembleFlatVector;
VectorAssembler.prototype.visitDate = assembleFlatVector;
VectorAssembler.prototype.visitTimestamp = assembleFlatVector;
diff --git a/js/src/visitor/vectorloader.ts b/js/src/visitor/vectorloader.ts
index 35f28f49ba..c9c016d6b4 100644
--- a/js/src/visitor/vectorloader.ts
+++ b/js/src/visitor/vectorloader.ts
@@ -77,6 +77,9 @@ export class VectorLoader extends Visitor {
public visitBinary<T extends type.Binary>(type: T, { length, nullCount } =
this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap:
this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type),
data: this.readData(type) });
}
+ public visitLargeBinary<T extends type.LargeBinary>(type: T, { length,
nullCount } = this.nextFieldNode()) {
+ return makeData({ type, length, nullCount, nullBitmap:
this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type),
data: this.readData(type) });
+ }
public visitFixedSizeBinary<T extends type.FixedSizeBinary>(type: T, {
length, nullCount } = this.nextFieldNode()) {
return makeData({ type, length, nullCount, nullBitmap:
this.readNullBitmap(type, nullCount), data: this.readData(type) });
}
@@ -169,7 +172,7 @@ export class JSONVectorLoader extends VectorLoader {
return toArrayBufferView(Uint8Array,
Int64.convertArray(sources[offset] as string[]));
} else if (DataType.isDecimal(type)) {
return toArrayBufferView(Uint8Array,
Int128.convertArray(sources[offset] as string[]));
- } else if (DataType.isBinary(type) ||
DataType.isFixedSizeBinary(type)) {
+ } else if (DataType.isBinary(type) || DataType.isLargeBinary(type) ||
DataType.isFixedSizeBinary(type)) {
return binaryDataFromJSON(sources[offset] as string[]);
} else if (DataType.isBool(type)) {
return packBools(sources[offset] as number[]);
diff --git a/js/test/data/tables.ts b/js/test/data/tables.ts
index 449cfe1fb8..89cf93eab5 100644
--- a/js/test/data/tables.ts
+++ b/js/test/data/tables.ts
@@ -27,7 +27,7 @@ const nestedVectorGeneratorNames = ['struct', 'denseUnion',
'sparseUnion', 'map'
const dictionaryKeyGeneratorNames = ['int8', 'int16', 'int32', 'uint8',
'uint16', 'uint32'];
const valueVectorGeneratorNames = [
'null_', 'bool', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16',
'uint32', 'uint64',
- 'float16', 'float32', 'float64', 'utf8', 'largeUtf8', 'binary',
'fixedSizeBinary', 'dateDay', 'dateMillisecond',
+ 'float16', 'float32', 'float64', 'utf8', 'largeUtf8', 'binary',
'largeBinary', 'fixedSizeBinary', 'dateDay', 'dateMillisecond',
'timestampSecond', 'timestampMillisecond', 'timestampMicrosecond',
'timestampNanosecond',
'timeSecond', 'timeMillisecond', 'timeMicrosecond', 'timeNanosecond',
'decimal',
'dictionary', 'intervalDayTime', 'intervalYearMonth',
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
index 9d7b038331..be248ad2c6 100644
--- a/js/test/generate-test-data.ts
+++ b/js/test/generate-test-data.ts
@@ -25,7 +25,7 @@ import {
Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
Float, Float16, Float32, Float64,
Utf8, LargeUtf8,
- Binary,
+ Binary, LargeBinary,
FixedSizeBinary,
Date_, DateDay, DateMillisecond,
Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond,
TimestampNanosecond,
@@ -54,6 +54,7 @@ interface TestDataVectorGenerator extends Visitor {
visit<T extends Utf8>(type: T, length?: number, nullCount?: number):
GeneratedVector<T>;
visit<T extends LargeUtf8>(type: T, length?: number, nullCount?: number):
GeneratedVector<T>;
visit<T extends Binary>(type: T, length?: number, nullCount?: number):
GeneratedVector<T>;
+ visit<T extends LargeBinary>(type: T, length?: number, nullCount?:
number): GeneratedVector<T>;
visit<T extends FixedSizeBinary>(type: T, length?: number, nullCount?:
number): GeneratedVector<T>;
visit<T extends Date_>(type: T, length?: number, nullCount?: number):
GeneratedVector<T>;
visit<T extends Timestamp>(type: T, length?: number, nullCount?: number):
GeneratedVector<T>;
@@ -78,6 +79,7 @@ interface TestDataVectorGenerator extends Visitor {
visitUtf8: typeof generateUtf8;
visitLargeUtf8: typeof generateLargeUtf8;
visitBinary: typeof generateBinary;
+ visitLargeBinary: typeof generateLargeBinary;
visitFixedSizeBinary: typeof generateFixedSizeBinary;
visitDate: typeof generateDate;
visitTimestamp: typeof generateTimestamp;
@@ -104,6 +106,7 @@ TestDataVectorGenerator.prototype.visitFloat =
generateFloat;
TestDataVectorGenerator.prototype.visitUtf8 = generateUtf8;
TestDataVectorGenerator.prototype.visitLargeUtf8 = generateLargeUtf8;
TestDataVectorGenerator.prototype.visitBinary = generateBinary;
+TestDataVectorGenerator.prototype.visitLargeBinary = generateLargeBinary;
TestDataVectorGenerator.prototype.visitFixedSizeBinary =
generateFixedSizeBinary;
TestDataVectorGenerator.prototype.visitDate = generateDate;
TestDataVectorGenerator.prototype.visitTimestamp = generateTimestamp;
@@ -219,6 +222,7 @@ export const float64 = (length = 100, nullCount =
Math.trunc(length * 0.2)) => v
export const utf8 = (length = 100, nullCount = Math.trunc(length * 0.2)) =>
vectorGenerator.visit(new Utf8(), length, nullCount);
export const largeUtf8 = (length = 100, nullCount = Math.trunc(length * 0.2))
=> vectorGenerator.visit(new LargeUtf8(), length, nullCount);
export const binary = (length = 100, nullCount = Math.trunc(length * 0.2)) =>
vectorGenerator.visit(new Binary(), length, nullCount);
+export const largeBinary = (length = 100, nullCount = Math.trunc(length *
0.2)) => vectorGenerator.visit(new LargeBinary(), length, nullCount);
export const fixedSizeBinary = (length = 100, nullCount = Math.trunc(length *
0.2), byteWidth = 8) => vectorGenerator.visit(new FixedSizeBinary(byteWidth),
length, nullCount);
export const dateDay = (length = 100, nullCount = Math.trunc(length * 0.2)) =>
vectorGenerator.visit(new DateDay(), length, nullCount);
export const dateMillisecond = (length = 100, nullCount = Math.trunc(length *
0.2)) => vectorGenerator.visit(new DateMillisecond(), length, nullCount);
@@ -246,7 +250,7 @@ export const fixedSizeList = (length = 100, nullCount =
Math.trunc(length * 0.2)
export const map = <TKey extends DataType = any, TValue extends DataType =
any>(length = 100, nullCount = Math.trunc(length * 0.2), child: Field<Struct<{
key: TKey; value: TValue }>> = <any>defaultMapChild()) =>
vectorGenerator.visit(new Map_<TKey, TValue>(child), length, nullCount);
export const vecs = {
- null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64,
float16, float32, float64, utf8, largeUtf8, binary, fixedSizeBinary, dateDay,
dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond,
timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond,
timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary,
intervalDayTime, intervalYearMonth, fixedSizeList, map, durationSecond,
durationMillisecond, durationMicrosecond, dura [...]
+ null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64,
float16, float32, float64, utf8, largeUtf8, binary, largeBinary,
fixedSizeBinary, dateDay, dateMillisecond, timestampSecond,
timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond,
timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct,
denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth,
fixedSizeList, map, durationSecond, durationMillisecond, durationMicr [...]
} as { [k: string]: (...args: any[]) => any };
function generateNull<T extends Null>(this: TestDataVectorGenerator, type: T,
length = 100): GeneratedVector<T> {
@@ -368,6 +372,16 @@ function generateBinary<T extends Binary>(this:
TestDataVectorGenerator, type: T
return { values: () => values, vector: new Vector([makeData({ type,
length, nullCount, nullBitmap, valueOffsets, data })]) };
}
+function generateLargeBinary<T extends LargeBinary>(this:
TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length *
0.2)): GeneratedVector<T> {
+ const nullBitmap = createBitmap(length, nullCount);
+ const valueOffsets = createVariableWidthOffsets64(length, nullBitmap, 10,
20, nullCount != 0);
+ const values = [...valueOffsets.slice(1)]
+ .map((o, i) => isValid(nullBitmap, i) ? o - valueOffsets[i] : null)
+ .map((length) => length == null ? null : randomBytes(Number(length)));
+ const data = createVariableWidthBytes(length, nullBitmap, valueOffsets,
(i) => values[i]!);
+ return { values: () => values, vector: new Vector([makeData({ type,
length, nullCount, nullBitmap, valueOffsets, data })]) };
+}
+
function generateFixedSizeBinary<T extends FixedSizeBinary>(this:
TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length *
0.2)): GeneratedVector<T> {
const nullBitmap = createBitmap(length, nullCount);
const data = fillRandom(Uint8Array, length * type.byteWidth);
diff --git a/js/test/unit/builders/builder-tests.ts
b/js/test/unit/builders/builder-tests.ts
index 0137c7aa66..4d1be9b225 100644
--- a/js/test/unit/builders/builder-tests.ts
+++ b/js/test/unit/builders/builder-tests.ts
@@ -46,6 +46,7 @@ describe('Generated Test Data', () => {
describe('Utf8Builder', () => { validateBuilder(generate.utf8); });
describe('LargeUtf8Builder', () => { validateBuilder(generate.largeUtf8);
});
describe('BinaryBuilder', () => { validateBuilder(generate.binary); });
+ describe('LargeBinaryBuilder', () => {
validateBuilder(generate.largeBinary); });
describe('FixedSizeBinaryBuilder', () => {
validateBuilder(generate.fixedSizeBinary); });
describe('DateDayBuilder', () => { validateBuilder(generate.dateDay); });
describe('DateMillisecondBuilder', () => {
validateBuilder(generate.dateMillisecond); });
diff --git a/js/test/unit/generated-data-tests.ts
b/js/test/unit/generated-data-tests.ts
index 0a06bcbab8..1e26e74730 100644
--- a/js/test/unit/generated-data-tests.ts
+++ b/js/test/unit/generated-data-tests.ts
@@ -40,6 +40,7 @@ describe('Generated Test Data', () => {
describe('Utf8', () => { validateVector(generate.utf8()); });
describe('LargeUtf8', () => { validateVector(generate.largeUtf8()); });
describe('Binary', () => { validateVector(generate.binary()); });
+ describe('LargeBinary', () => { validateVector(generate.largeBinary()); });
describe('FixedSizeBinary', () => {
validateVector(generate.fixedSizeBinary()); });
describe('DateDay', () => { validateVector(generate.dateDay()); });
describe('DateMillisecond', () => {
validateVector(generate.dateMillisecond()); });
diff --git a/js/test/unit/visitor-tests.ts b/js/test/unit/visitor-tests.ts
index f78adc59f8..6ecb6cca33 100644
--- a/js/test/unit/visitor-tests.ts
+++ b/js/test/unit/visitor-tests.ts
@@ -18,7 +18,7 @@
import {
Field, Visitor,
DataType, Dictionary,
- Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List,
FixedSizeList, Map_, Struct,
+ Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal,
FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Float, Float16, Float32, Float64,
Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
Date_, DateDay, DateMillisecond,
@@ -38,6 +38,7 @@ class BasicVisitor extends Visitor {
public visitUtf8<T extends Utf8>(type: T) { return (this.type = type); }
public visitLargeUtf8<T extends LargeUtf8>(type: T) { return (this.type =
type); }
public visitBinary<T extends Binary>(type: T) { return (this.type = type);
}
+ public visitLargeBinary<T extends LargeBinary>(type: T) { return
(this.type = type); }
public visitFixedSizeBinary<T extends FixedSizeBinary>(type: T) { return
(this.type = type); }
public visitDate<T extends Date_>(type: T) { return (this.type = type); }
public visitTimestamp<T extends Timestamp>(type: T) { return (this.type =
type); }
@@ -71,6 +72,7 @@ class FeatureVisitor extends Visitor {
public visitUtf8<T extends Utf8>(type: T) { return (this.type = type); }
public visitLargeUtf8<T extends LargeUtf8>(type: T) { return (this.type =
type); }
public visitBinary<T extends Binary>(type: T) { return (this.type = type);
}
+ public visitLargeBinary<T extends LargeBinary>(type: T) { return
(this.type = type); }
public visitFixedSizeBinary<T extends FixedSizeBinary>(type: T) { return
(this.type = type); }
public visitDateDay<T extends DateDay>(type: T) { return (this.type =
type); }
public visitDateMillisecond<T extends DateMillisecond>(type: T) { return
(this.type = type); }
@@ -108,6 +110,7 @@ describe('Visitor', () => {
test(`visits Utf8 types`, () => validateBasicVisitor(new Utf8()));
test(`visits LargeUtf8 types`, () => validateBasicVisitor(new
LargeUtf8()));
test(`visits Binary types`, () => validateBasicVisitor(new Binary()));
+ test(`visits LargeBinary types`, () => validateBasicVisitor(new
LargeBinary()));
test(`visits FixedSizeBinary types`, () => validateBasicVisitor(new
FixedSizeBinary(128)));
test(`visits Date types`, () => validateBasicVisitor(new Date_(0)));
test(`visits Timestamp types`, () => validateBasicVisitor(new
Timestamp(0, 'UTC')));
@@ -149,6 +152,7 @@ describe('Visitor', () => {
test(`visits Utf8 types`, () => validateFeatureVisitor(new Utf8()));
test(`visits LargeUtf8 types`, () => validateFeatureVisitor(new
LargeUtf8()));
test(`visits Binary types`, () => validateFeatureVisitor(new
Binary()));
+ test(`visits LargeBinary types`, () => validateFeatureVisitor(new
LargeBinary()));
test(`visits FixedSizeBinary types`, () => validateFeatureVisitor(new
FixedSizeBinary(128)));
test(`visits DateDay types`, () => validateFeatureVisitor(new
DateDay()));
test(`visits DateMillisecond types`, () => validateFeatureVisitor(new
DateMillisecond()));