This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 9e33d12f1b GH-39341: [C#] Support Utf8View, BinaryView and ListView
(#39342)
9e33d12f1b is described below
commit 9e33d12f1b022c902cc831026ceb3e0016ca4b3c
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Wed Dec 27 10:10:46 2023 -0800
GH-39341: [C#] Support Utf8View, BinaryView and ListView (#39342)
### What changes are included in this PR?
Support for reading, writing and representing Utf8View, BinaryView and
ListView.
### Are these changes tested?
Yes
### Are there any user-facing changes?
New classes and APIs for Utf8View, BinaryView and ListView.
* Closes: #39341
Authored-by: Curt Hagenlocher <[email protected]>
Signed-off-by: Curt Hagenlocher <[email protected]>
---
.../Apache.Arrow/Arrays/ArrayDataConcatenator.cs | 102 +++++-
.../Arrays/ArrowArrayBuilderFactory.cs | 6 +
.../src/Apache.Arrow/Arrays/ArrowArrayFactory.cs | 6 +
csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs | 344 +++++++++++++++++++++
csharp/src/Apache.Arrow/Arrays/ListViewArray.cs | 217 +++++++++++++
csharp/src/Apache.Arrow/Arrays/StringViewArray.cs | 110 +++++++
csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs | 22 +-
csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs | 48 +++
csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs | 3 +
csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs | 6 +-
.../Apache.Arrow/Extensions/ArrayDataExtensions.cs | 11 +
.../Apache.Arrow/Extensions/FlatbufExtensions.cs | 19 --
csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs | 47 +++
.../Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs | 12 +-
csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs | 16 +
csharp/src/Apache.Arrow/Flatbuf/Field.cs | 4 +
csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs | 42 +++
csharp/src/Apache.Arrow/Flatbuf/ListView.cs | 43 +++
csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs | 37 ++-
csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs | 4 +
csharp/src/Apache.Arrow/Flatbuf/Tensor.cs | 4 +
csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs | 47 +++
.../Apache.Arrow/Ipc/ArrowReaderImplementation.cs | 79 ++---
csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 57 +++-
.../Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs | 39 +++
csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs | 10 +
csharp/src/Apache.Arrow/Scalars/BinaryView.cs | 111 +++++++
.../Types/{IArrowType.cs => BinaryViewType.cs} | 48 +--
csharp/src/Apache.Arrow/Types/IArrowType.cs | 3 +
.../Types/{IArrowType.cs => ListViewType.cs} | 55 +---
.../Types/{IArrowType.cs => StringViewType.cs} | 48 +--
.../ArrowWriterBenchmark.cs | 2 +-
.../test/Apache.Arrow.IntegrationTest/JsonFile.cs | 156 +++++++++-
.../Properties/launchSettings.json | 8 +
.../ArrowArrayConcatenatorTests.cs | 89 ++++++
.../test/Apache.Arrow.Tests/ArrowReaderVerifier.cs | 61 ++++
csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs | 89 ++++++
.../CDataInterfacePythonTests.cs | 4 +-
csharp/test/Apache.Arrow.Tests/TableTests.cs | 6 +-
csharp/test/Apache.Arrow.Tests/TestData.cs | 198 +++++++++---
dev/archery/archery/integration/datagen.py | 3 +-
docs/source/status.rst | 10 +-
42 files changed, 1948 insertions(+), 278 deletions(-)
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
index 463ca49e29..698d74e4ba 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
@@ -14,6 +14,7 @@
// limitations under the License.
using Apache.Arrow.Memory;
+using Apache.Arrow.Scalars;
using Apache.Arrow.Types;
using System;
using System.Collections.Generic;
@@ -46,8 +47,11 @@ namespace Apache.Arrow
IArrowTypeVisitor<BooleanType>,
IArrowTypeVisitor<FixedWidthType>,
IArrowTypeVisitor<BinaryType>,
+ IArrowTypeVisitor<BinaryViewType>,
IArrowTypeVisitor<StringType>,
+ IArrowTypeVisitor<StringViewType>,
IArrowTypeVisitor<ListType>,
+ IArrowTypeVisitor<ListViewType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>,
IArrowTypeVisitor<UnionType>,
@@ -84,17 +88,50 @@ namespace Apache.Arrow
{
CheckData(type, 2);
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
- ArrowBuffer valueBuffer =
ConcatenateFixedWidthTypeValueBuffer(type);
+ ArrowBuffer valueBuffer =
ConcatenateFixedWidthTypeValueBuffer(1, type);
Result = new ArrayData(type, _totalLength, _totalNullCount, 0,
new ArrowBuffer[] { validityBuffer, valueBuffer });
}
public void Visit(BinaryType type) =>
ConcatenateVariableBinaryArrayData(type);
+ public void Visit(BinaryViewType type) =>
ConcatenateBinaryViewArrayData(type);
+
public void Visit(StringType type) =>
ConcatenateVariableBinaryArrayData(type);
+ public void Visit(StringViewType type) =>
ConcatenateBinaryViewArrayData(type);
+
public void Visit(ListType type) => ConcatenateLists(type);
+ public void Visit(ListViewType type)
+ {
+ CheckData(type, 3);
+ ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
+
+ var offsetsBuilder = new
ArrowBuffer.Builder<int>(_totalLength);
+ int baseOffset = 0;
+
+ foreach (ArrayData arrayData in _arrayDataList)
+ {
+ if (arrayData.Length > 0)
+ {
+ ReadOnlySpan<int> span =
arrayData.Buffers[1].Span.CastTo<int>().Slice(0, arrayData.Length);
+ foreach (int offset in span)
+ {
+ offsetsBuilder.Append(baseOffset + offset);
+ }
+ }
+
+ baseOffset += arrayData.Children[0].Length;
+ }
+
+ ArrowBuffer offsetBuffer = offsetsBuilder.Build(_allocator);
+ ArrowBuffer sizesBuffer =
ConcatenateFixedWidthTypeValueBuffer(2, Int32Type.Default);
+ ArrayData child = Concatenate(SelectChildren(0), _allocator);
+
+ Result = new ArrayData(type, _totalLength, _totalNullCount, 0,
new ArrowBuffer[] { validityBuffer, offsetBuffer, sizesBuffer }, new[] { child
});
+ }
+
public void Visit(FixedSizeListType type)
{
CheckData(type, 1);
@@ -161,6 +198,15 @@ namespace Apache.Arrow
}
}
+ private void CheckDataVariadicCount(IArrowType type, int
expectedBufferCount)
+ {
+ foreach (ArrayData arrayData in _arrayDataList)
+ {
+ arrayData.EnsureDataType(type.TypeId);
+ arrayData.EnsureVariadicBufferCount(expectedBufferCount);
+ }
+ }
+
private void ConcatenateVariableBinaryArrayData(IArrowType type)
{
CheckData(type, 3);
@@ -171,6 +217,26 @@ namespace Apache.Arrow
Result = new ArrayData(type, _totalLength, _totalNullCount, 0,
new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer });
}
+ private void ConcatenateBinaryViewArrayData(IArrowType type)
+ {
+ CheckDataVariadicCount(type, 2);
+ ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
+ ArrowBuffer viewBuffer = ConcatenateViewBuffer(out int
variadicBufferCount);
+ ArrowBuffer[] buffers = new ArrowBuffer[2 +
variadicBufferCount];
+ buffers[0] = validityBuffer;
+ buffers[1] = viewBuffer;
+ int index = 2;
+ foreach (ArrayData arrayData in _arrayDataList)
+ {
+ for (int i = 2; i < arrayData.Buffers.Length; i++)
+ {
+ buffers[index++] = arrayData.Buffers[i];
+ }
+ }
+
+ Result = new ArrayData(type, _totalLength, _totalNullCount, 0,
buffers);
+ }
+
private void ConcatenateLists(NestedType type)
{
CheckData(type, 2);
@@ -206,7 +272,7 @@ namespace Apache.Arrow
return builder.Build(_allocator);
}
- private ArrowBuffer
ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type)
+ private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(int
bufferIndex, FixedWidthType type)
{
int typeByteWidth = type.BitWidth / 8;
var builder = new ArrowBuffer.Builder<byte>(_totalLength *
typeByteWidth);
@@ -216,7 +282,7 @@ namespace Apache.Arrow
int length = arrayData.Length;
int byteLength = length * typeByteWidth;
- builder.Append(arrayData.Buffers[1].Span.Slice(0,
byteLength));
+
builder.Append(arrayData.Buffers[bufferIndex].Span.Slice(0, byteLength));
}
return builder.Build(_allocator);
@@ -265,6 +331,36 @@ namespace Apache.Arrow
return builder.Build(_allocator);
}
+ private ArrowBuffer ConcatenateViewBuffer(out int
variadicBufferCount)
+ {
+ var builder = new
ArrowBuffer.Builder<BinaryView>(_totalLength);
+ variadicBufferCount = 0;
+ foreach (ArrayData arrayData in _arrayDataList)
+ {
+ if (arrayData.Length == 0)
+ {
+ continue;
+ }
+
+ ReadOnlySpan<BinaryView> span =
arrayData.Buffers[1].Span.CastTo<BinaryView>().Slice(0, arrayData.Length);
+ foreach (BinaryView view in span)
+ {
+ if (view.Length > BinaryView.MaxInlineLength)
+ {
+
builder.Append(view.AdjustBufferIndex(variadicBufferCount));
+ }
+ else
+ {
+ builder.Append(view);
+ }
+ }
+
+ variadicBufferCount += (arrayData.Buffers.Length - 2);
+ }
+
+ return builder.Build(_allocator);
+ }
+
private ArrowBuffer ConcatenateUnionTypeBuffer()
{
var builder = new ArrowBuffer.Builder<byte>(_totalLength);
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
index af5a524798..f836710208 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
@@ -54,8 +54,12 @@ namespace Apache.Arrow
return new DoubleArray.Builder();
case ArrowTypeId.String:
return new StringArray.Builder();
+ case ArrowTypeId.StringView:
+ return new StringViewArray.Builder();
case ArrowTypeId.Binary:
return new BinaryArray.Builder();
+ case ArrowTypeId.BinaryView:
+ return new BinaryViewArray.Builder();
case ArrowTypeId.Timestamp:
return new TimestampArray.Builder();
case ArrowTypeId.Date64:
@@ -70,6 +74,8 @@ namespace Apache.Arrow
return new DurationArray.Builder(dataType as DurationType);
case ArrowTypeId.List:
return new ListArray.Builder(dataType as ListType);
+ case ArrowTypeId.ListView:
+ return new ListViewArray.Builder(dataType as ListViewType);
case ArrowTypeId.FixedSizeList:
return new FixedSizeListArray.Builder(dataType as
FixedSizeListType);
case ArrowTypeId.Decimal128:
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index d6577260bb..3d2ab1d212 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -51,14 +51,20 @@ namespace Apache.Arrow
return new DoubleArray(data);
case ArrowTypeId.String:
return new StringArray(data);
+ case ArrowTypeId.StringView:
+ return new StringViewArray(data);
case ArrowTypeId.FixedSizedBinary:
return new FixedSizeBinaryArray(data);
case ArrowTypeId.Binary:
return new BinaryArray(data);
+ case ArrowTypeId.BinaryView:
+ return new BinaryViewArray(data);
case ArrowTypeId.Timestamp:
return new TimestampArray(data);
case ArrowTypeId.List:
return new ListArray(data);
+ case ArrowTypeId.ListView:
+ return new ListViewArray(data);
case ArrowTypeId.Map:
return new MapArray(data);
case ArrowTypeId.Struct:
diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs
b/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs
new file mode 100644
index 0000000000..4f62dffd1d
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs
@@ -0,0 +1,344 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Apache.Arrow.Memory;
+using Apache.Arrow.Scalars;
+using Apache.Arrow.Types;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Collections;
+
+namespace Apache.Arrow
+{
+ public class BinaryViewArray : Array, IReadOnlyList<byte[]>
+ {
+ public class Builder : BuilderBase<BinaryViewArray, Builder>
+ {
+ public Builder() : base(BinaryViewType.Default) { }
+ public Builder(IArrowType dataType) : base(dataType) { }
+
+ protected override BinaryViewArray Build(ArrayData data)
+ {
+ return new BinaryViewArray(data);
+ }
+ }
+
+ public BinaryViewArray(ArrayData data)
+ : base(data)
+ {
+ data.EnsureDataType(ArrowTypeId.BinaryView);
+ data.EnsureVariadicBufferCount(2);
+ }
+
+ public BinaryViewArray(ArrowTypeId typeId, ArrayData data)
+ : base(data)
+ {
+ data.EnsureDataType(typeId);
+ data.EnsureVariadicBufferCount(2);
+ }
+
+ public abstract class BuilderBase<TArray, TBuilder> :
IArrowArrayBuilder<byte, TArray, TBuilder>
+ where TArray : IArrowArray
+ where TBuilder : class, IArrowArrayBuilder<byte, TArray, TBuilder>
+ {
+ protected IArrowType DataType { get; }
+ protected TBuilder Instance => this as TBuilder;
+ protected ArrowBuffer.Builder<BinaryView> BinaryViews { get; }
+ protected ArrowBuffer.Builder<byte> ValueBuffer { get; }
+ protected ArrowBuffer.BitmapBuilder ValidityBuffer { get; }
+ protected int NullCount => this.ValidityBuffer.UnsetBitCount;
+
+ protected BuilderBase(IArrowType dataType)
+ {
+ DataType = dataType;
+ BinaryViews = new ArrowBuffer.Builder<BinaryView>();
+ ValueBuffer = new ArrowBuffer.Builder<byte>();
+ ValidityBuffer = new ArrowBuffer.BitmapBuilder();
+ }
+
+ protected abstract TArray Build(ArrayData data);
+
+ /// <summary>
+ /// Gets the length of the array built so far.
+ /// </summary>
+ public int Length => BinaryViews.Length;
+
+ /// <summary>
+ /// Build an Arrow array from the appended contents so far.
+ /// </summary>
+ /// <param name="allocator">Optional memory allocator.</param>
+ /// <returns>Returns an array of type <typeparamref
name="TArray"/>.</returns>
+ public TArray Build(MemoryAllocator allocator = default)
+ {
+ bool hasValues = ValueBuffer.Length > 0;
+ var bufs = new ArrowBuffer[hasValues ? 3 : 2];
+ bufs[0] = NullCount > 0 ? ValidityBuffer.Build(allocator) :
ArrowBuffer.Empty;
+ bufs[1] = BinaryViews.Build(allocator);
+ if (hasValues) { bufs[2] = ValueBuffer.Build(allocator); }
+
+ var data = new ArrayData(
+ DataType,
+ length: Length,
+ NullCount,
+ offset: 0,
+ bufs);
+
+ return Build(data);
+ }
+
+ /// <summary>
+ /// Append a single null value to the array.
+ /// </summary>
+ /// <returns>Returns the builder (for fluent-style
composition).</returns>
+ public TBuilder AppendNull()
+ {
+ // Do not add to the value buffer in the case of a null.
+ // Note that we do not need to increment the offset as a
result.
+ ValidityBuffer.Append(false);
+ BinaryViews.Append(default(BinaryView));
+ return Instance;
+ }
+
+ /// <summary>
+ /// Appends a value, consisting of a single byte, to the array.
+ /// </summary>
+ /// <param name="value">Byte value to append.</param>
+ /// <returns>Returns the builder (for fluent-style
composition).</returns>
+ public TBuilder Append(byte value)
+ {
+ ValidityBuffer.Append(true);
+ Span<byte> buf = stackalloc[] { value };
+ BinaryViews.Append(new BinaryView(buf));
+ return Instance;
+ }
+
+ /// <summary>
+ /// Append a value, consisting of a span of bytes, to the array.
+ /// </summary>
+ /// <remarks>
+ /// Note that a single value is added, which consists of
arbitrarily many bytes. If multiple values are
+ /// to be added, use the <see cref="AppendRange"/> method.
+ /// </remarks>
+ /// <param name="span">Span of bytes to add.</param>
+ /// <returns>Returns the builder (for fluent-style
composition).</returns>
+ public TBuilder Append(ReadOnlySpan<byte> span)
+ {
+ if (span.Length > BinaryView.MaxInlineLength)
+ {
+ int offset = ValueBuffer.Length;
+ ValueBuffer.Append(span);
+ BinaryViews.Append(new BinaryView(span.Length,
span.Slice(0, 4), 0, offset));
+ }
+ else
+ {
+ BinaryViews.Append(new BinaryView(span));
+ }
+ ValidityBuffer.Append(true);
+ return Instance;
+ }
+
+ /// <summary>
+ /// Append an enumerable collection of single-byte values to the
array.
+ /// </summary>
+ /// <remarks>
+ /// Note that this method appends multiple values, each of which
is a single byte
+ /// </remarks>
+ /// <param name="values">Single-byte values to add.</param>
+ /// <returns>Returns the builder (for fluent-style
composition).</returns>
+ public TBuilder AppendRange(IEnumerable<byte> values)
+ {
+ if (values == null)
+ {
+ throw new ArgumentNullException(nameof(values));
+ }
+
+ foreach (byte b in values)
+ {
+ Append(b);
+ }
+
+ return Instance;
+ }
+
+ /// <summary>
+ /// Append an enumerable collection of values to the array.
+ /// </summary>
+ /// <param name="values">Values to add.</param>
+ /// <returns>Returns the builder (for fluent-style
composition).</returns>
+ public TBuilder AppendRange(IEnumerable<byte[]> values)
+ {
+ if (values == null)
+ {
+ throw new ArgumentNullException(nameof(values));
+ }
+
+ foreach (byte[] arr in values)
+ {
+ if (arr == null)
+ {
+ AppendNull();
+ }
+ else
+ {
+ Append((ReadOnlySpan<byte>)arr);
+ }
+ }
+
+ return Instance;
+ }
+
+ public TBuilder Reserve(int capacity)
+ {
+ // TODO: [ARROW-9366] Reserve capacity in the value buffer in
a more sensible way.
+ BinaryViews.Reserve(capacity);
+ ValueBuffer.Reserve(capacity);
+ ValidityBuffer.Reserve(capacity);
+ return Instance;
+ }
+
+ public TBuilder Resize(int length)
+ {
+ // TODO: [ARROW-9366] Resize the value buffer to a safe length
based on offsets, not `length`.
+ BinaryViews.Resize(length);
+ ValueBuffer.Resize(length);
+ ValidityBuffer.Resize(length);
+ return Instance;
+ }
+
+ public TBuilder Swap(int i, int j)
+ {
+ ValidityBuffer.Swap(i, j);
+ BinaryView view = BinaryViews.Span[i];
+ BinaryViews.Span[i] = BinaryViews.Span[j];
+ BinaryViews.Span[j] = view;
+ return Instance;
+ }
+
+ public TBuilder Set(int index, byte value)
+ {
+ // TODO: Implement
+ throw new NotImplementedException();
+ }
+
+ /// <summary>
+ /// Clear all contents appended so far.
+ /// </summary>
+ /// <returns>Returns the builder (for fluent-style
composition).</returns>
+ public TBuilder Clear()
+ {
+ BinaryViews.Clear();
+ ValueBuffer.Clear();
+ ValidityBuffer.Clear();
+ return Instance;
+ }
+ }
+
+ public BinaryViewArray(IArrowType dataType, int length,
+ ArrowBuffer binaryViewsBuffer,
+ ArrowBuffer dataBuffer,
+ ArrowBuffer nullBitmapBuffer,
+ int nullCount = 0, int offset = 0)
+ : this(new ArrayData(dataType, length, nullCount, offset,
+ new[] { nullBitmapBuffer, binaryViewsBuffer, dataBuffer }))
+ { }
+
+ public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+
+ public ArrowBuffer ViewsBuffer => Data.Buffers[1];
+
+ public int DataBufferCount => Data.Buffers.Length - 2;
+
+ public ArrowBuffer DataBuffer(int index) => Data.Buffers[index + 2];
+
+ public ReadOnlySpan<BinaryView> Views =>
ViewsBuffer.Span.CastTo<BinaryView>().Slice(Offset, Length);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public int GetValueLength(int index)
+ {
+ if (index < 0 || index >= Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ if (!IsValid(index))
+ {
+ return 0;
+ }
+
+ return Views[index].Length;
+ }
+
+ /// <summary>
+ /// Get the collection of bytes, as a read-only span, at a given index
in the array.
+ /// </summary>
+ /// <remarks>
+ /// Note that this method cannot reliably identify null values, which
are indistinguishable from empty byte
+ /// collection values when seen in the context of this method's return
type of <see cref="ReadOnlySpan{Byte}"/>.
+ /// Use the <see cref="Array.IsNull"/> method or the <see
cref="GetBytes(int, out bool)"/> overload instead
+ /// to reliably determine null values.
+ /// </remarks>
+ /// <param name="index">Index at which to get bytes.</param>
+ /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/>
object.</returns>
+ /// <exception cref="ArgumentOutOfRangeException">If the index is
negative or beyond the length of the array.
+ /// </exception>
+ public ReadOnlySpan<byte> GetBytes(int index) => GetBytes(index, out
_);
+
+ /// <summary>
+ /// Get the collection of bytes, as a read-only span, at a given index
in the array.
+ /// </summary>
+ /// <param name="index">Index at which to get bytes.</param>
+ /// <param name="isNull">Set to <see langword="true"/> if the value at
the given index is null.</param>
+ /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/>
object.</returns>
+ /// <exception cref="ArgumentOutOfRangeException">If the index is
negative or beyond the length of the array.
+ /// </exception>
+ public ReadOnlySpan<byte> GetBytes(int index, out bool isNull)
+ {
+ if (index < 0 || index >= Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ isNull = IsNull(index);
+
+ if (isNull)
+ {
+ // Note that `return null;` is valid syntax, but would be
misleading as `null` in the context of a span
+ // is actually returned as an empty span.
+ return ReadOnlySpan<byte>.Empty;
+ }
+
+ BinaryView binaryView = Views[index];
+ if (binaryView.IsInline)
+ {
+ return ViewsBuffer.Span.Slice(16 * index + 4,
binaryView.Length);
+ }
+
+ return
DataBuffer(binaryView._bufferIndex).Span.Slice(binaryView._bufferOffset,
binaryView.Length);
+ }
+
+ int IReadOnlyCollection<byte[]>.Count => Length;
+ byte[] IReadOnlyList<byte[]>.this[int index] =>
GetBytes(index).ToArray();
+
+ IEnumerator<byte[]> IEnumerable<byte[]>.GetEnumerator()
+ {
+ for (int index = 0; index < Length; index++)
+ {
+ yield return GetBytes(index).ToArray();
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator() =>
((IEnumerable<byte[]>)this).GetEnumerator();
+ }
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/ListViewArray.cs
b/csharp/src/Apache.Arrow/Arrays/ListViewArray.cs
new file mode 100644
index 0000000000..081385d921
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/ListViewArray.cs
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Memory;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+ public class ListViewArray : Array
+ {
+ public class Builder : IArrowArrayBuilder<ListViewArray, Builder>
+ {
+ public IArrowArrayBuilder<IArrowArray,
IArrowArrayBuilder<IArrowArray>> ValueBuilder { get; }
+
+ public int Length => ValueOffsetsBufferBuilder.Length;
+
+ private ArrowBuffer.Builder<int> ValueOffsetsBufferBuilder { get; }
+
+ private ArrowBuffer.Builder<int> SizesBufferBuilder { get; }
+
+ private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; }
+
+ public int NullCount { get; protected set; }
+
+ private IArrowType DataType { get; }
+
+ private int Start { get; set; }
+
+ public Builder(IArrowType valueDataType) : this(new
ListViewType(valueDataType))
+ {
+ }
+
+ public Builder(Field valueField) : this(new
ListViewType(valueField))
+ {
+ }
+
+ internal Builder(ListViewType dataType)
+ {
+ ValueBuilder =
ArrowArrayBuilderFactory.Build(dataType.ValueDataType);
+ ValueOffsetsBufferBuilder = new ArrowBuffer.Builder<int>();
+ SizesBufferBuilder = new ArrowBuffer.Builder<int>();
+ ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder();
+ DataType = dataType;
+ Start = -1;
+ }
+
+ /// <summary>
+ /// Start a new variable-length list slot
+ ///
+ /// This function should be called before beginning to append
elements to the
+ /// value builder. TODO: Consider adding builder APIs to support
construction
+ /// of overlapping lists.
+ /// </summary>
+ public Builder Append()
+ {
+ AppendPrevious();
+
+ ValidityBufferBuilder.Append(true);
+
+ return this;
+ }
+
+ public Builder AppendNull()
+ {
+ AppendPrevious();
+
+ ValidityBufferBuilder.Append(false);
+ ValueOffsetsBufferBuilder.Append(Start);
+ SizesBufferBuilder.Append(0);
+ NullCount++;
+ Start = -1;
+
+ return this;
+ }
+
+ private void AppendPrevious()
+ {
+ if (Start >= 0)
+ {
+ ValueOffsetsBufferBuilder.Append(Start);
+ SizesBufferBuilder.Append(ValueBuilder.Length - Start);
+ }
+ Start = ValueBuilder.Length;
+ }
+
+ public ListViewArray Build(MemoryAllocator allocator = default)
+ {
+ AppendPrevious();
+
+ ArrowBuffer validityBuffer = NullCount > 0
+ ?
ValidityBufferBuilder.Build(allocator)
+ : ArrowBuffer.Empty;
+
+ return new ListViewArray(DataType, Length,
+ ValueOffsetsBufferBuilder.Build(allocator),
SizesBufferBuilder.Build(allocator),
+ ValueBuilder.Build(allocator),
+ validityBuffer, NullCount, 0);
+ }
+
+ public Builder Reserve(int capacity)
+ {
+ ValueOffsetsBufferBuilder.Reserve(capacity);
+ SizesBufferBuilder.Reserve(capacity);
+ ValidityBufferBuilder.Reserve(capacity);
+ return this;
+ }
+
+ public Builder Resize(int length)
+ {
+ ValueOffsetsBufferBuilder.Resize(length);
+ SizesBufferBuilder.Resize(length);
+ ValidityBufferBuilder.Resize(length);
+ return this;
+ }
+
+ public Builder Clear()
+ {
+ ValueOffsetsBufferBuilder.Clear();
+ SizesBufferBuilder.Clear();
+ ValueBuilder.Clear();
+ ValidityBufferBuilder.Clear();
+ return this;
+ }
+
+ }
+
+ public IArrowArray Values { get; }
+
+ public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1];
+
+ public ReadOnlySpan<int> ValueOffsets =>
ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length);
+
+ public ArrowBuffer SizesBuffer => Data.Buffers[2];
+
+ public ReadOnlySpan<int> Sizes =>
SizesBuffer.Span.CastTo<int>().Slice(Offset, Length);
+
+ public ListViewArray(IArrowType dataType, int length,
+ ArrowBuffer valueOffsetsBuffer, ArrowBuffer sizesBuffer,
IArrowArray values,
+ ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0)
+ : this(new ArrayData(dataType, length, nullCount, offset,
+ new[] { nullBitmapBuffer, valueOffsetsBuffer, sizesBuffer },
new[] { values.Data }),
+ values)
+ {
+ }
+
+ public ListViewArray(ArrayData data)
+ : this(data, ArrowArrayFactory.BuildArray(data.Children[0]))
+ {
+ }
+
+ private ListViewArray(ArrayData data, IArrowArray values) : base(data)
+ {
+ data.EnsureBufferCount(3);
+ data.EnsureDataType(ArrowTypeId.ListView);
+ Values = values;
+ }
+
+ public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+
+ public int GetValueLength(int index)
+ {
+ if (index < 0 || index >= Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ if (IsNull(index))
+ {
+ return 0;
+ }
+
+ return Sizes[index];
+ }
+
+ public IArrowArray GetSlicedValues(int index)
+ {
+ if (index < 0 || index >= Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ if (IsNull(index))
+ {
+ return null;
+ }
+
+ if (!(Values is Array array))
+ {
+ return default;
+ }
+
+ return array.Slice(ValueOffsets[index], GetValueLength(index));
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ Values?.Dispose();
+ }
+ base.Dispose(disposing);
+ }
+ }
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/StringViewArray.cs
b/csharp/src/Apache.Arrow/Arrays/StringViewArray.cs
new file mode 100644
index 0000000000..8864476153
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/StringViewArray.cs
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Apache.Arrow.Types;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace Apache.Arrow
+{
+ public class StringViewArray: BinaryViewArray, IReadOnlyList<string>
+ {
+ public static readonly Encoding DefaultEncoding = Encoding.UTF8;
+
+ public new class Builder : BuilderBase<StringViewArray, Builder>
+ {
+ public Builder() : base(StringViewType.Default) { }
+
+ protected override StringViewArray Build(ArrayData data)
+ {
+ return new StringViewArray(data);
+ }
+
+ public Builder Append(string value, Encoding encoding = null)
+ {
+ if (value == null)
+ {
+ return AppendNull();
+ }
+ encoding = encoding ?? DefaultEncoding;
+ byte[] span = encoding.GetBytes(value);
+ return Append(span.AsSpan());
+ }
+
+ public Builder AppendRange(IEnumerable<string> values, Encoding
encoding = null)
+ {
+ foreach (string value in values)
+ {
+ Append(value, encoding);
+ }
+
+ return this;
+ }
+ }
+
+ public StringViewArray(ArrayData data)
+ : base(ArrowTypeId.StringView, data) { }
+
+ public StringViewArray(int length,
+ ArrowBuffer valueOffsetsBuffer,
+ ArrowBuffer dataBuffer,
+ ArrowBuffer nullBitmapBuffer,
+ int nullCount = 0, int offset = 0)
+ : this(new ArrayData(StringViewType.Default, length, nullCount,
offset,
+ new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
+ { }
+
+ public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+
+ public string GetString(int index, Encoding encoding = default)
+ {
+ encoding ??= DefaultEncoding;
+
+ ReadOnlySpan<byte> bytes = GetBytes(index, out bool isNull);
+
+ if (isNull)
+ {
+ return null;
+ }
+ if (bytes.Length == 0)
+ {
+ return string.Empty;
+ }
+
+ unsafe
+ {
+ fixed (byte* data = &MemoryMarshal.GetReference(bytes))
+ return encoding.GetString(data, bytes.Length);
+ }
+ }
+
+ int IReadOnlyCollection<string>.Count => Length;
+
+ string IReadOnlyList<string>.this[int index] => GetString(index);
+
+ IEnumerator<string> IEnumerable<string>.GetEnumerator()
+ {
+ for (int index = 0; index < Length; index++)
+ {
+ yield return GetString(index);
+ };
+ }
+
+ IEnumerator IEnumerable.GetEnumerator() =>
((IEnumerable<string>)this).GetEnumerator();
+ }
+}
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
index 2d9febea33..03059eaf5d 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
@@ -15,10 +15,12 @@
using System;
+using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Apache.Arrow.Memory;
+using Apache.Arrow.Types;
namespace Apache.Arrow.C
{
@@ -121,7 +123,16 @@ namespace Apache.Arrow.C
cArray->buffers = null;
if (cArray->n_buffers > 0)
{
- cArray->buffers =
(byte**)sharedOwner.Allocate(array.Buffers.Length * IntPtr.Size);
+ long* lengths = null;
+ int bufferCount = array.Buffers.Length;
+ if (array.DataType.TypeId == ArrowTypeId.BinaryView ||
array.DataType.TypeId == ArrowTypeId.StringView)
+ {
+ lengths = (long*)sharedOwner.Allocate(8 * bufferCount); //
overallocation to avoid edge case
+ bufferCount++;
+ cArray->n_buffers++;
+ }
+
+ cArray->buffers = (byte**)sharedOwner.Allocate(bufferCount *
IntPtr.Size);
for (int i = 0; i < array.Buffers.Length; i++)
{
ArrowBuffer buffer = array.Buffers[i];
@@ -131,6 +142,15 @@ namespace Apache.Arrow.C
throw new NotSupportedException($"An ArrowArray of
type {array.DataType.TypeId} could not be exported: failed on buffer #{i}");
}
cArray->buffers[i] = (byte*)ptr;
+ if (lengths != null && i >= 2)
+ {
+ lengths[i - 2] = array.Buffers[i].Length;
+ }
+ }
+
+ if (lengths != null)
+ {
+ cArray->buffers[array.Buffers.Length] = (byte*)lengths;
}
}
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
index 1b40ec4965..fbb2be661f 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
@@ -157,10 +157,18 @@ namespace Apache.Arrow.C
case ArrowTypeId.Binary:
buffers = ImportByteArrayBuffers(cArray);
break;
+ case ArrowTypeId.StringView:
+ case ArrowTypeId.BinaryView:
+ buffers = ImportByteArrayViewBuffers(cArray);
+ break;
case ArrowTypeId.List:
children = ProcessListChildren(cArray,
((ListType)type).ValueDataType);
buffers = ImportListBuffers(cArray);
break;
+ case ArrowTypeId.ListView:
+ children = ProcessListChildren(cArray,
((ListViewType)type).ValueDataType);
+ buffers = ImportListViewBuffers(cArray);
+ break;
case ArrowTypeId.FixedSizeList:
children = ProcessListChildren(cArray,
((FixedSizeListType)type).ValueDataType);
buffers = ImportFixedSizeListBuffers(cArray);
@@ -268,6 +276,28 @@ namespace Apache.Arrow.C
return buffers;
}
+ private ArrowBuffer[] ImportByteArrayViewBuffers(CArrowArray*
cArray)
+ {
+ if (cArray->n_buffers < 3)
+ {
+ throw new InvalidOperationException("Byte array views are
expected to have at least three buffers");
+ }
+
+ int length = checked((int)cArray->length);
+ int viewsLength = length * 16;
+
+ long* bufferLengths = (long*)cArray->buffers[cArray->n_buffers
- 1];
+ ArrowBuffer[] buffers = new ArrowBuffer[cArray->n_buffers - 1];
+ buffers[0] = ImportValidityBuffer(cArray);
+ buffers[1] = new
ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, viewsLength));
+ for (int i = 2; i < buffers.Length; i++)
+ {
+ buffers[i] = new
ArrowBuffer(AddMemory((IntPtr)cArray->buffers[i], 0,
checked((int)bufferLengths[i - 2])));
+ }
+
+ return buffers;
+ }
+
private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray)
{
if (cArray->n_buffers != 2)
@@ -285,6 +315,24 @@ namespace Apache.Arrow.C
return buffers;
}
+ private ArrowBuffer[] ImportListViewBuffers(CArrowArray* cArray)
+ {
+ if (cArray->n_buffers != 3)
+ {
+ throw new InvalidOperationException("List view arrays are
expected to have exactly three buffers");
+ }
+
+ int length = checked((int)cArray->length);
+ int offsetsLength = length * 4;
+
+ ArrowBuffer[] buffers = new ArrowBuffer[3];
+ buffers[0] = ImportValidityBuffer(cArray);
+ buffers[1] = new
ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength));
+ buffers[2] = new
ArrowBuffer(AddMemory((IntPtr)cArray->buffers[2], 0, offsetsLength));
+
+ return buffers;
+ }
+
private ArrowBuffer[] ImportFixedSizeListBuffers(CArrowArray*
cArray)
{
if (cArray->n_buffers != 1)
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
index c9b45a8eb2..3bb7134af3 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -167,7 +167,9 @@ namespace Apache.Arrow.C
return
$"d:{decimalType.Precision},{decimalType.Scale},256";
// Binary
case BinaryType _: return "z";
+ case BinaryViewType _: return "vz";
case StringType _: return "u";
+ case StringViewType _: return "vu";
case FixedSizeBinaryType binaryType:
return $"w:{binaryType.ByteWidth}";
// Date
@@ -196,6 +198,7 @@ namespace Apache.Arrow.C
};
// Nested
case ListType _: return "+l";
+ case ListViewType _: return "+vl";
case FixedSizeListType fixedListType:
return $"+w:{fixedListType.ListSize}";
case StructType _: return "+s";
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
index 9c81195771..f1acc007bc 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
@@ -165,7 +165,7 @@ namespace Apache.Arrow.C
}
// Special handling for nested types
- if (format == "+l")
+ if (format == "+l" || format == "+vl")
{
if (_cSchema->n_children != 1)
{
@@ -180,7 +180,7 @@ namespace Apache.Arrow.C
Field childField = childSchema.GetAsField();
- return new ListType(childField);
+ return format[1] == 'v' ? new ListViewType(childField) :
new ListType(childField);
}
else if (format == "+s")
{
@@ -303,8 +303,10 @@ namespace Apache.Arrow.C
"g" => DoubleType.Default,
// Binary data
"z" => BinaryType.Default,
+ "vz" => BinaryViewType.Default,
//"Z" => new LargeBinaryType() // Not yet implemented
"u" => StringType.Default,
+ "vu" => StringViewType.Default,
//"U" => new LargeStringType(), // Not yet implemented
// Date and time
"tdD" => Date32Type.Default,
diff --git a/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
b/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
index 399d9bf5e6..2b6742a3d0 100644
--- a/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
@@ -23,6 +23,17 @@ namespace Apache.Arrow
public static void EnsureBufferCount(this ArrayData data, int count)
{
if (data.Buffers.Length != count)
+ {
+ // TODO: Use localizable string resource
+ throw new ArgumentException(
+ $"Buffer count <{data.Buffers.Length}> must be at exactly
<{count}>",
+ nameof(data.Buffers.Length));
+ }
+ }
+
+ public static void EnsureVariadicBufferCount(this ArrayData data, int
count)
+ {
+ if (data.Buffers.Length < count)
{
// TODO: Use localizable string resource
throw new ArgumentException(
diff --git a/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
b/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
index 5f39680b90..b44c02d854 100644
--- a/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
@@ -19,25 +19,6 @@ namespace Apache.Arrow
{
internal static class FlatbufExtensions
{
- public static bool IsFixedPrimitive(this Flatbuf.Type t)
- {
- if (t == Flatbuf.Type.Utf8 || t == Flatbuf.Type.Binary)
- return false;
- return true;
- }
-
- public static bool IsFixedPrimitive(this Types.IArrowType t)
- {
- return t.TypeId.IsFixedPrimitive();
- }
-
- public static bool IsFixedPrimitive(this Types.ArrowTypeId t)
- {
- if (t == Types.ArrowTypeId.String || t == Types.ArrowTypeId.Binary)
- return false;
- return true;
- }
-
public static Types.IntervalUnit ToArrow(this Flatbuf.IntervalUnit
unit)
{
switch (unit)
diff --git a/csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs
b/csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs
new file mode 100644
index 0000000000..2f9cca5173
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs
@@ -0,0 +1,47 @@
+// <auto-generated>
+// automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Logically the same as Binary, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another
buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+internal struct BinaryView : IFlatbufferObject
+{
+ private Table __p;
+ public ByteBuffer ByteBuffer { get { return __p.bb; } }
+ public static void ValidateVersion() {
FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+ public static BinaryView GetRootAsBinaryView(ByteBuffer _bb) { return
GetRootAsBinaryView(_bb, new BinaryView()); }
+ public static BinaryView GetRootAsBinaryView(ByteBuffer _bb, BinaryView obj)
{ return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+ public BinaryView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return
this; }
+
+
+ public static void StartBinaryView(FlatBufferBuilder builder) {
builder.StartTable(0); }
+ public static Offset<BinaryView> EndBinaryView(FlatBufferBuilder builder) {
+ int o = builder.EndTable();
+ return new Offset<BinaryView>(o);
+ }
+}
+
+
+static internal class BinaryViewVerify
+{
+ static public bool Verify(Google.FlatBuffers.Verifier verifier, uint
tablePos)
+ {
+ return verifier.VerifyTableStart(tablePos)
+ && verifier.VerifyTableEnd(tablePos);
+ }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs
b/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs
index 1e893e8cb6..13b5315805 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs
@@ -8,21 +8,21 @@ namespace Apache.Arrow.Flatbuf
internal enum MetadataVersion : short
{
/// 0.1.0 (October 2016).
- V1 = 0,
+ V1 = 0,
/// 0.2.0 (February 2017). Non-backwards compatible with V1.
- V2 = 1,
+ V2 = 1,
/// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
- V3 = 2,
+ V3 = 2,
/// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
- V4 = 3,
- /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+ V4 = 3,
+ /// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read
V4
/// metadata and IPC messages). Implementations are recommended to provide a
/// V4 compatibility mode with V5 format changes disabled.
///
/// Incompatible changes between V4 and V5:
/// - Union buffer layout has changed. In V5, Unions don't have a validity
/// bitmap buffer.
- V5 = 4,
+ V5 = 4,
};
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs
b/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs
index 10f852efb9..9c04288648 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs
@@ -33,6 +33,10 @@ internal enum Type : byte
LargeUtf8 = 20,
LargeList = 21,
RunEndEncoded = 22,
+ BinaryView = 23,
+ Utf8View = 24,
+ ListView = 25,
+ LargeListView = 26,
};
@@ -110,6 +114,18 @@ static internal class TypeVerify
case Type.RunEndEncoded:
result = RunEndEncodedVerify.Verify(verifier, tablePos);
break;
+ case Type.BinaryView:
+ result = BinaryViewVerify.Verify(verifier, tablePos);
+ break;
+ case Type.Utf8View:
+ result = Utf8ViewVerify.Verify(verifier, tablePos);
+ break;
+ case Type.ListView:
+ result = ListViewVerify.Verify(verifier, tablePos);
+ break;
+ case Type.LargeListView:
+ result = LargeListViewVerify.Verify(verifier, tablePos);
+ break;
default: result = true;
break;
}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Field.cs
b/csharp/src/Apache.Arrow/Flatbuf/Field.cs
index c5c6c0a165..efbc6afb06 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Field.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Field.cs
@@ -57,6 +57,10 @@ internal struct Field : IFlatbufferObject
public LargeUtf8 TypeAsLargeUtf8() { return Type<LargeUtf8>().Value; }
public LargeList TypeAsLargeList() { return Type<LargeList>().Value; }
public RunEndEncoded TypeAsRunEndEncoded() { return
Type<RunEndEncoded>().Value; }
+ public BinaryView TypeAsBinaryView() { return Type<BinaryView>().Value; }
+ public Utf8View TypeAsUtf8View() { return Type<Utf8View>().Value; }
+ public ListView TypeAsListView() { return Type<ListView>().Value; }
+ public LargeListView TypeAsLargeListView() { return
Type<LargeListView>().Value; }
/// Present only if the field is dictionary encoded.
public DictionaryEncoding? Dictionary { get { int o = __p.__offset(12);
return o != 0 ? (DictionaryEncoding?)(new
DictionaryEncoding()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null;
} }
/// children apply only to nested data types like Struct, List and Union. For
diff --git a/csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs
b/csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs
new file mode 100644
index 0000000000..685e91333c
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs
@@ -0,0 +1,42 @@
+// <auto-generated>
+// automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent
+/// extremely large data values.
+internal struct LargeListView : IFlatbufferObject
+{
+ private Table __p;
+ public ByteBuffer ByteBuffer { get { return __p.bb; } }
+ public static void ValidateVersion() {
FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+ public static LargeListView GetRootAsLargeListView(ByteBuffer _bb) { return
GetRootAsLargeListView(_bb, new LargeListView()); }
+ public static LargeListView GetRootAsLargeListView(ByteBuffer _bb,
LargeListView obj) { return (obj.__assign(_bb.GetInt(_bb.Position) +
_bb.Position, _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+ public LargeListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb);
return this; }
+
+
+ public static void StartLargeListView(FlatBufferBuilder builder) {
builder.StartTable(0); }
+ public static Offset<LargeListView> EndLargeListView(FlatBufferBuilder
builder) {
+ int o = builder.EndTable();
+ return new Offset<LargeListView>(o);
+ }
+}
+
+
+static internal class LargeListViewVerify
+{
+ static public bool Verify(Google.FlatBuffers.Verifier verifier, uint
tablePos)
+ {
+ return verifier.VerifyTableStart(tablePos)
+ && verifier.VerifyTableEnd(tablePos);
+ }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/ListView.cs
b/csharp/src/Apache.Arrow/Flatbuf/ListView.cs
new file mode 100644
index 0000000000..d2e54e4285
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/ListView.cs
@@ -0,0 +1,43 @@
+// <auto-generated>
+// automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Represents the same logical types that List can, but contains offsets and
+/// sizes allowing for writes in any order and sharing of child values among
+/// list values.
+internal struct ListView : IFlatbufferObject
+{
+ private Table __p;
+ public ByteBuffer ByteBuffer { get { return __p.bb; } }
+ public static void ValidateVersion() {
FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+ public static ListView GetRootAsListView(ByteBuffer _bb) { return
GetRootAsListView(_bb, new ListView()); }
+ public static ListView GetRootAsListView(ByteBuffer _bb, ListView obj) {
return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+ public ListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return
this; }
+
+
+ public static void StartListView(FlatBufferBuilder builder) {
builder.StartTable(0); }
+ public static Offset<ListView> EndListView(FlatBufferBuilder builder) {
+ int o = builder.EndTable();
+ return new Offset<ListView>(o);
+ }
+}
+
+
+static internal class ListViewVerify
+{
+ static public bool Verify(Google.FlatBuffers.Verifier verifier, uint
tablePos)
+ {
+ return verifier.VerifyTableStart(tablePos)
+ && verifier.VerifyTableEnd(tablePos);
+ }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs
b/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs
index 9ab9715165..2df8716bc1 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs
@@ -38,27 +38,57 @@ internal struct RecordBatch : IFlatbufferObject
public int BuffersLength { get { int o = __p.__offset(8); return o != 0 ?
__p.__vector_len(o) : 0; } }
/// Optional compression of the message body
public BodyCompression? Compression { get { int o = __p.__offset(10); return
o != 0 ? (BodyCompression?)(new BodyCompression()).__assign(__p.__indirect(o +
__p.bb_pos), __p.bb) : null; } }
+ /// Some types such as Utf8View are represented using a variable number of
buffers.
+ /// For each such Field in the pre-ordered flattened logical schema, there
will be
+ /// an entry in variadicBufferCounts to indicate the number of number of
variadic
+ /// buffers which belong to that Field in the current RecordBatch.
+ ///
+ /// For example, the schema
+ /// col1: Struct<alpha: Int32, beta: BinaryView, gamma: Float64>
+ /// col2: Utf8View
+ /// contains two Fields with variadic buffers so variadicBufferCounts will
have
+ /// two entries, the first counting the variadic buffers of `col1.beta` and
the
+ /// second counting `col2`'s.
+ ///
+ /// This field may be omitted if and only if the schema contains no Fields
with
+ /// a variable number of buffers, such as BinaryView and Utf8View.
+ public long VariadicBufferCounts(int j) { int o = __p.__offset(12); return o
!= 0 ? __p.bb.GetLong(__p.__vector(o) + j * 8) : (long)0; }
+ public int VariadicBufferCountsLength { get { int o = __p.__offset(12);
return o != 0 ? __p.__vector_len(o) : 0; } }
+#if ENABLE_SPAN_T
+ public Span<long> GetVariadicCountsBytes() { return
__p.__vector_as_span<long>(12, 8); }
+#else
+ public ArraySegment<byte>? GetVariadicCountsBytes() { return
__p.__vector_as_arraysegment(12); }
+#endif
+ public long[] GetVariadicCountsArray() { return
__p.__vector_as_array<long>(12); }
public static Offset<RecordBatch> CreateRecordBatch(FlatBufferBuilder
builder,
long length = 0,
VectorOffset nodesOffset = default(VectorOffset),
VectorOffset buffersOffset = default(VectorOffset),
- Offset<BodyCompression> compressionOffset =
default(Offset<BodyCompression>)) {
- builder.StartTable(4);
+ Offset<BodyCompression> compressionOffset =
default(Offset<BodyCompression>),
+ VectorOffset variadicCountsOffset = default(VectorOffset)) {
+ builder.StartTable(5);
RecordBatch.AddLength(builder, length);
+ RecordBatch.AddVariadicCounts(builder, variadicCountsOffset);
RecordBatch.AddCompression(builder, compressionOffset);
RecordBatch.AddBuffers(builder, buffersOffset);
RecordBatch.AddNodes(builder, nodesOffset);
return RecordBatch.EndRecordBatch(builder);
}
- public static void StartRecordBatch(FlatBufferBuilder builder) {
builder.StartTable(4); }
+ public static void StartRecordBatch(FlatBufferBuilder builder) {
builder.StartTable(5); }
public static void AddLength(FlatBufferBuilder builder, long length) {
builder.AddLong(0, length, 0); }
public static void AddNodes(FlatBufferBuilder builder, VectorOffset
nodesOffset) { builder.AddOffset(1, nodesOffset.Value, 0); }
public static void StartNodesVector(FlatBufferBuilder builder, int numElems)
{ builder.StartVector(16, numElems, 8); }
public static void AddBuffers(FlatBufferBuilder builder, VectorOffset
buffersOffset) { builder.AddOffset(2, buffersOffset.Value, 0); }
public static void StartBuffersVector(FlatBufferBuilder builder, int
numElems) { builder.StartVector(16, numElems, 8); }
public static void AddCompression(FlatBufferBuilder builder,
Offset<BodyCompression> compressionOffset) { builder.AddOffset(3,
compressionOffset.Value, 0); }
+ public static void AddVariadicCounts(FlatBufferBuilder builder, VectorOffset
variadicCountsOffset) { builder.AddOffset(4, variadicCountsOffset.Value, 0); }
+ public static VectorOffset CreateVariadicCountsVector(FlatBufferBuilder
builder, long[] data) { builder.StartVector(8, data.Length, 8); for (int i =
data.Length - 1; i >= 0; i--) builder.AddLong(data[i]); return
builder.EndVector(); }
+ public static VectorOffset CreateVariadicCountsVectorBlock(FlatBufferBuilder
builder, long[] data) { builder.StartVector(8, data.Length, 8);
builder.Add(data); return builder.EndVector(); }
+ public static VectorOffset CreateVariadicCountsVectorBlock(FlatBufferBuilder
builder, ArraySegment<long> data) { builder.StartVector(8, data.Count, 8);
builder.Add(data); return builder.EndVector(); }
+ public static VectorOffset CreateVariadicCountsVectorBlock(FlatBufferBuilder
builder, IntPtr dataPtr, int sizeInBytes) { builder.StartVector(1, sizeInBytes,
1); builder.Add<long>(dataPtr, sizeInBytes); return builder.EndVector(); }
+ public static void StartVariadicCountsVector(FlatBufferBuilder builder, int
numElems) { builder.StartVector(8, numElems, 8); }
public static Offset<RecordBatch> EndRecordBatch(FlatBufferBuilder builder) {
int o = builder.EndTable();
return new Offset<RecordBatch>(o);
@@ -75,6 +105,7 @@ static internal class RecordBatchVerify
&& verifier.VerifyVectorOfData(tablePos, 6 /*Nodes*/, 16 /*FieldNode*/,
false)
&& verifier.VerifyVectorOfData(tablePos, 8 /*Buffers*/, 16 /*Buffer*/,
false)
&& verifier.VerifyTable(tablePos, 10 /*Compression*/,
BodyCompressionVerify.Verify, false)
+ && verifier.VerifyVectorOfData(tablePos, 12 /*VariadicCounts*/, 8
/*long*/, false)
&& verifier.VerifyTableEnd(tablePos);
}
}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs
b/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs
index 3f9e1de7c0..099950fafe 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs
@@ -47,6 +47,10 @@ internal struct SparseTensor : IFlatbufferObject
public LargeUtf8 TypeAsLargeUtf8() { return Type<LargeUtf8>().Value; }
public LargeList TypeAsLargeList() { return Type<LargeList>().Value; }
public RunEndEncoded TypeAsRunEndEncoded() { return
Type<RunEndEncoded>().Value; }
+ public BinaryView TypeAsBinaryView() { return Type<BinaryView>().Value; }
+ public Utf8View TypeAsUtf8View() { return Type<Utf8View>().Value; }
+ public ListView TypeAsListView() { return Type<ListView>().Value; }
+ public LargeListView TypeAsLargeListView() { return
Type<LargeListView>().Value; }
/// The dimensions of the tensor, optionally named.
public TensorDim? Shape(int j) { int o = __p.__offset(8); return o != 0 ?
(TensorDim?)(new TensorDim()).__assign(__p.__indirect(__p.__vector(o) + j * 4),
__p.bb) : null; }
public int ShapeLength { get { int o = __p.__offset(8); return o != 0 ?
__p.__vector_len(o) : 0; } }
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs
b/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs
index f8c213768a..eb39257d86 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs
@@ -46,6 +46,10 @@ internal struct Tensor : IFlatbufferObject
public LargeUtf8 TypeAsLargeUtf8() { return Type<LargeUtf8>().Value; }
public LargeList TypeAsLargeList() { return Type<LargeList>().Value; }
public RunEndEncoded TypeAsRunEndEncoded() { return
Type<RunEndEncoded>().Value; }
+ public BinaryView TypeAsBinaryView() { return Type<BinaryView>().Value; }
+ public Utf8View TypeAsUtf8View() { return Type<Utf8View>().Value; }
+ public ListView TypeAsListView() { return Type<ListView>().Value; }
+ public LargeListView TypeAsLargeListView() { return
Type<LargeListView>().Value; }
/// The dimensions of the tensor, optionally named
public TensorDim? Shape(int j) { int o = __p.__offset(8); return o != 0 ?
(TensorDim?)(new TensorDim()).__assign(__p.__indirect(__p.__vector(o) + j * 4),
__p.bb) : null; }
public int ShapeLength { get { int o = __p.__offset(8); return o != 0 ?
__p.__vector_len(o) : 0; } }
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs
b/csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs
new file mode 100644
index 0000000000..e85c5374a9
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs
@@ -0,0 +1,47 @@
+// <auto-generated>
+// automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Logically the same as Utf8, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another
buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+internal struct Utf8View : IFlatbufferObject
+{
+ private Table __p;
+ public ByteBuffer ByteBuffer { get { return __p.bb; } }
+ public static void ValidateVersion() {
FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+ public static Utf8View GetRootAsUtf8View(ByteBuffer _bb) { return
GetRootAsUtf8View(_bb, new Utf8View()); }
+ public static Utf8View GetRootAsUtf8View(ByteBuffer _bb, Utf8View obj) {
return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+ public Utf8View __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return
this; }
+
+
+ public static void StartUtf8View(FlatBufferBuilder builder) {
builder.StartTable(0); }
+ public static Offset<Utf8View> EndUtf8View(FlatBufferBuilder builder) {
+ int o = builder.EndTable();
+ return new Offset<Utf8View>(o);
+ }
+}
+
+
+static internal class Utf8ViewVerify
+{
+ static public bool Verify(Google.FlatBuffers.Verifier verifier, uint
tablePos)
+ {
+ return verifier.VerifyTableStart(tablePos)
+ && verifier.VerifyTableEnd(tablePos);
+ }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
index d3115da52c..eb7349a570 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
@@ -191,9 +191,7 @@ namespace Apache.Arrow.Ipc
Field field = schema.GetFieldByIndex(schemaFieldIndex++);
Flatbuf.FieldNode fieldNode =
recordBatchEnumerator.CurrentNode;
- ArrayData arrayData = field.DataType.IsFixedPrimitive()
- ? LoadPrimitiveField(version, ref recordBatchEnumerator,
field, in fieldNode, messageBuffer, bufferCreator)
- : LoadVariableField(version, ref recordBatchEnumerator,
field, in fieldNode, messageBuffer, bufferCreator);
+ ArrayData arrayData = LoadField(version, ref
recordBatchEnumerator, field, in fieldNode, messageBuffer, bufferCreator);
arrays.Add(ArrowArrayFactory.BuildArray(arrayData));
} while (recordBatchEnumerator.MoveNextNode());
@@ -229,7 +227,7 @@ namespace Apache.Arrow.Ipc
return new DecompressingBufferCreator(decompressor, _allocator);
}
- private ArrayData LoadPrimitiveField(
+ private ArrayData LoadField(
MetadataVersion version,
ref RecordBatchEnumerator recordBatchEnumerator,
Field field,
@@ -276,6 +274,16 @@ namespace Apache.Arrow.Ipc
case ArrowTypeId.FixedSizeList:
buffers = 1;
break;
+ case ArrowTypeId.String:
+ case ArrowTypeId.Binary:
+ case ArrowTypeId.ListView:
+ buffers = 3;
+ break;
+ case ArrowTypeId.StringView:
+ case ArrowTypeId.BinaryView:
+ buffers = checked((int)(2 +
recordBatchEnumerator.CurrentVariadicCount));
+ recordBatchEnumerator.MoveNextVariadicCount();
+ break;
default:
buffers = 2;
break;
@@ -300,54 +308,6 @@ namespace Apache.Arrow.Ipc
return new ArrayData(field.DataType, fieldLength, fieldNullCount,
0, arrowBuff, children, dictionary?.Data);
}
- private ArrayData LoadVariableField(
- MetadataVersion version,
- ref RecordBatchEnumerator recordBatchEnumerator,
- Field field,
- in Flatbuf.FieldNode fieldNode,
- ByteBuffer bodyData,
- IBufferCreator bufferCreator)
- {
-
- ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData,
recordBatchEnumerator.CurrentBuffer, bufferCreator);
- if (!recordBatchEnumerator.MoveNextBuffer())
- {
- throw new Exception("Unable to move to the next buffer.");
- }
- ArrowBuffer offsetArrowBuffer = BuildArrowBuffer(bodyData,
recordBatchEnumerator.CurrentBuffer, bufferCreator);
- if (!recordBatchEnumerator.MoveNextBuffer())
- {
- throw new Exception("Unable to move to the next buffer.");
- }
- ArrowBuffer valueArrowBuffer = BuildArrowBuffer(bodyData,
recordBatchEnumerator.CurrentBuffer, bufferCreator);
- recordBatchEnumerator.MoveNextBuffer();
-
- int fieldLength = (int)fieldNode.Length;
- int fieldNullCount = (int)fieldNode.NullCount;
-
- if (fieldLength < 0)
- {
- throw new InvalidDataException("Field length must be >= 0");
// TODO: Localize exception message
- }
-
- if (fieldNullCount < 0)
- {
- throw new InvalidDataException("Null count length must be >=
0"); //TODO: Localize exception message
- }
-
- ArrowBuffer[] arrowBuff = new[] { nullArrowBuffer,
offsetArrowBuffer, valueArrowBuffer };
- ArrayData[] children = GetChildren(version, ref
recordBatchEnumerator, field, bodyData, bufferCreator);
-
- IArrowArray dictionary = null;
- if (field.DataType.TypeId == ArrowTypeId.Dictionary)
- {
- long id = DictionaryMemo.GetId(field);
- dictionary = DictionaryMemo.GetDictionary(id);
- }
-
- return new ArrayData(field.DataType, fieldLength, fieldNullCount,
0, arrowBuff, children, dictionary?.Data);
- }
-
private ArrayData[] GetChildren(
MetadataVersion version,
ref RecordBatchEnumerator recordBatchEnumerator,
@@ -365,11 +325,7 @@ namespace Apache.Arrow.Ipc
Flatbuf.FieldNode childFieldNode =
recordBatchEnumerator.CurrentNode;
Field childField = type.Fields[index];
- ArrayData child = childField.DataType.IsFixedPrimitive()
- ? LoadPrimitiveField(version, ref recordBatchEnumerator,
childField, in childFieldNode, bodyData, bufferCreator)
- : LoadVariableField(version, ref recordBatchEnumerator,
childField, in childFieldNode, bodyData, bufferCreator);
-
- children[index] = child;
+ children[index] = LoadField(version, ref
recordBatchEnumerator, childField, in childFieldNode, bodyData, bufferCreator);
}
return children;
}
@@ -394,11 +350,14 @@ namespace Apache.Arrow.Ipc
private Flatbuf.RecordBatch RecordBatch { get; }
internal int CurrentBufferIndex { get; private set; }
internal int CurrentNodeIndex { get; private set; }
+ internal int CurrentVariadicCountIndex { get; private set; }
internal Flatbuf.Buffer CurrentBuffer =>
RecordBatch.Buffers(CurrentBufferIndex).GetValueOrDefault();
internal Flatbuf.FieldNode CurrentNode =>
RecordBatch.Nodes(CurrentNodeIndex).GetValueOrDefault();
+ internal long CurrentVariadicCount =>
RecordBatch.VariadicBufferCounts(CurrentVariadicCountIndex);
+
internal bool MoveNextBuffer()
{
return ++CurrentBufferIndex < RecordBatch.BuffersLength;
@@ -409,11 +368,17 @@ namespace Apache.Arrow.Ipc
return ++CurrentNodeIndex < RecordBatch.NodesLength;
}
+ internal bool MoveNextVariadicCount()
+ {
+ return ++CurrentVariadicCountIndex <
RecordBatch.VariadicBufferCountsLength;
+ }
+
internal RecordBatchEnumerator(in Flatbuf.RecordBatch recordBatch)
{
RecordBatch = recordBatch;
CurrentBufferIndex = 0;
CurrentNodeIndex = 0;
+ CurrentVariadicCountIndex = 0;
}
}
}
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 5f490019b2..07d1dcfdb1 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -54,9 +54,12 @@ namespace Apache.Arrow.Ipc
IArrowArrayVisitor<DayTimeIntervalArray>,
IArrowArrayVisitor<MonthDayNanosecondIntervalArray>,
IArrowArrayVisitor<ListArray>,
+ IArrowArrayVisitor<ListViewArray>,
IArrowArrayVisitor<FixedSizeListArray>,
IArrowArrayVisitor<StringArray>,
+ IArrowArrayVisitor<StringViewArray>,
IArrowArrayVisitor<BinaryArray>,
+ IArrowArrayVisitor<BinaryViewArray>,
IArrowArrayVisitor<FixedSizeBinaryArray>,
IArrowArrayVisitor<StructArray>,
IArrowArrayVisitor<UnionArray>,
@@ -81,6 +84,7 @@ namespace Apache.Arrow.Ipc
public IReadOnlyList<Buffer> Buffers => _buffers;
+ public List<long> VariadicCounts { get; private set; }
public int TotalLength { get; private set; }
public ArrowRecordBatchFlatBufferBuilder()
@@ -121,6 +125,15 @@ namespace Apache.Arrow.Ipc
array.Values.Accept(this);
}
+ public void Visit(ListViewArray array)
+ {
+ _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
+ _buffers.Add(CreateBuffer(array.ValueOffsetsBuffer));
+ _buffers.Add(CreateBuffer(array.SizesBuffer));
+
+ array.Values.Accept(this);
+ }
+
public void Visit(FixedSizeListArray array)
{
_buffers.Add(CreateBuffer(array.NullBitmapBuffer));
@@ -130,6 +143,8 @@ namespace Apache.Arrow.Ipc
public void Visit(StringArray array) => Visit(array as
BinaryArray);
+ public void Visit(StringViewArray array) => Visit(array as
BinaryViewArray);
+
public void Visit(BinaryArray array)
{
_buffers.Add(CreateBuffer(array.NullBitmapBuffer));
@@ -137,6 +152,18 @@ namespace Apache.Arrow.Ipc
_buffers.Add(CreateBuffer(array.ValueBuffer));
}
+ public void Visit(BinaryViewArray array)
+ {
+ _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
+ _buffers.Add(CreateBuffer(array.ViewsBuffer));
+ for (int i = 0; i < array.DataBufferCount; i++)
+ {
+ _buffers.Add(CreateBuffer(array.DataBuffer(i)));
+ }
+ VariadicCounts = VariadicCounts ?? new List<long>();
+ VariadicCounts.Add(array.DataBufferCount);
+ }
+
public void Visit(FixedSizeBinaryArray array)
{
_buffers.Add(CreateBuffer(array.NullBitmapBuffer));
@@ -328,7 +355,7 @@ namespace Apache.Arrow.Ipc
HasWrittenDictionaryBatch = true;
}
- (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder,
VectorOffset fieldNodesVectorOffset) =
+ (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder,
VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
PreparingWritingRecordBatch(recordBatch);
VectorOffset buffersVectorOffset = Builder.EndVector();
@@ -339,7 +366,9 @@ namespace Apache.Arrow.Ipc
Offset<Flatbuf.RecordBatch> recordBatchOffset =
Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
fieldNodesVectorOffset,
- buffersVectorOffset);
+ buffersVectorOffset,
+ default,
+ variadicCountsOffset);
long metadataLength =
WriteMessage(Flatbuf.MessageHeader.RecordBatch,
recordBatchOffset, recordBatchBuilder.TotalLength);
@@ -367,7 +396,7 @@ namespace Apache.Arrow.Ipc
HasWrittenDictionaryBatch = true;
}
- (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder,
VectorOffset fieldNodesVectorOffset) =
+ (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder,
VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
PreparingWritingRecordBatch(recordBatch);
VectorOffset buffersVectorOffset = Builder.EndVector();
@@ -378,7 +407,9 @@ namespace Apache.Arrow.Ipc
Offset<Flatbuf.RecordBatch> recordBatchOffset =
Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
fieldNodesVectorOffset,
- buffersVectorOffset);
+ buffersVectorOffset,
+ default,
+ variadicCountsOffset);
long metadataLength = await
WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
recordBatchOffset, recordBatchBuilder.TotalLength,
@@ -451,12 +482,12 @@ namespace Apache.Arrow.Ipc
return bodyLength + bodyPaddingLength;
}
- private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset>
PreparingWritingRecordBatch(RecordBatch recordBatch)
+ private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset,
VectorOffset> PreparingWritingRecordBatch(RecordBatch recordBatch)
{
return PreparingWritingRecordBatch(recordBatch.Schema.FieldsList,
recordBatch.ArrayList);
}
- private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset>
PreparingWritingRecordBatch(IReadOnlyList<Field> fields,
IReadOnlyList<IArrowArray> arrays)
+ private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset,
VectorOffset> PreparingWritingRecordBatch(IReadOnlyList<Field> fields,
IReadOnlyList<IArrowArray> arrays)
{
Builder.Clear();
@@ -483,6 +514,12 @@ namespace Apache.Arrow.Ipc
fieldArray.Accept(recordBatchBuilder);
}
+ VectorOffset variadicCountOffset = default;
+ if (recordBatchBuilder.VariadicCounts != null)
+ {
+ variadicCountOffset =
Flatbuf.RecordBatch.CreateVariadicCountsVectorBlock(Builder,
recordBatchBuilder.VariadicCounts.ToArray());
+ }
+
IReadOnlyList<ArrowRecordBatchFlatBufferBuilder.Buffer> buffers =
recordBatchBuilder.Buffers;
Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);
@@ -494,7 +531,7 @@ namespace Apache.Arrow.Ipc
buffers[i].Offset, buffers[i].DataBuffer.Length);
}
- return Tuple.Create(recordBatchBuilder, fieldNodesVectorOffset);
+ return Tuple.Create(recordBatchBuilder, fieldNodesVectorOffset,
variadicCountOffset);
}
private protected virtual void StartingWritingDictionary()
@@ -561,7 +598,7 @@ namespace Apache.Arrow.Ipc
var arrays = new List<IArrowArray> { dictionary };
- (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder,
VectorOffset fieldNodesVectorOffset) =
+ (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder,
VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
PreparingWritingRecordBatch(fields, arrays);
VectorOffset buffersVectorOffset = Builder.EndVector();
@@ -569,7 +606,9 @@ namespace Apache.Arrow.Ipc
// Serialize record batch
Offset<Flatbuf.RecordBatch> recordBatchOffset =
Flatbuf.RecordBatch.CreateRecordBatch(Builder, dictionary.Length,
fieldNodesVectorOffset,
- buffersVectorOffset);
+ buffersVectorOffset,
+ default,
+ variadicCountsOffset);
// TODO: Support delta.
Offset<Flatbuf.DictionaryBatch> dictionaryBatchOffset =
Flatbuf.DictionaryBatch.CreateDictionaryBatch(Builder, id, recordBatchOffset,
false);
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
index 84ff4f9cc7..473e18968f 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
@@ -50,9 +50,13 @@ namespace Apache.Arrow.Ipc
IArrowTypeVisitor<UInt16Type>,
IArrowTypeVisitor<UInt32Type>,
IArrowTypeVisitor<UInt64Type>,
+#if NET5_0_OR_GREATER
+ IArrowTypeVisitor<HalfFloatType>,
+#endif
IArrowTypeVisitor<FloatType>,
IArrowTypeVisitor<DoubleType>,
IArrowTypeVisitor<StringType>,
+ IArrowTypeVisitor<StringViewType>,
IArrowTypeVisitor<Date32Type>,
IArrowTypeVisitor<Date64Type>,
IArrowTypeVisitor<Time32Type>,
@@ -60,8 +64,10 @@ namespace Apache.Arrow.Ipc
IArrowTypeVisitor<DurationType>,
IArrowTypeVisitor<IntervalType>,
IArrowTypeVisitor<BinaryType>,
+ IArrowTypeVisitor<BinaryViewType>,
IArrowTypeVisitor<TimestampType>,
IArrowTypeVisitor<ListType>,
+ IArrowTypeVisitor<ListViewType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<UnionType>,
IArrowTypeVisitor<StructType>,
@@ -106,6 +112,14 @@ namespace Apache.Arrow.Ipc
Flatbuf.Binary.EndBinary(Builder));
}
+ public void Visit(BinaryViewType type)
+ {
+ Flatbuf.BinaryView.StartBinaryView(Builder);
+ Offset<BinaryView> offset =
Flatbuf.BinaryView.EndBinaryView(Builder);
+ Result = FieldType.Build(
+ Flatbuf.Type.BinaryView, offset);
+ }
+
public void Visit(ListType type)
{
Flatbuf.List.StartList(Builder);
@@ -114,6 +128,14 @@ namespace Apache.Arrow.Ipc
Flatbuf.List.EndList(Builder));
}
+ public void Visit(ListViewType type)
+ {
+ Flatbuf.ListView.StartListView(Builder);
+ Result = FieldType.Build(
+ Flatbuf.Type.ListView,
+ Flatbuf.ListView.EndListView(Builder));
+ }
+
public void Visit(FixedSizeListType type)
{
Result = FieldType.Build(
@@ -136,6 +158,14 @@ namespace Apache.Arrow.Ipc
Flatbuf.Type.Utf8, offset);
}
+ public void Visit(StringViewType type)
+ {
+ Flatbuf.Utf8View.StartUtf8View(Builder);
+ Offset<Utf8View> offset =
Flatbuf.Utf8View.EndUtf8View(Builder);
+ Result = FieldType.Build(
+ Flatbuf.Type.Utf8View, offset);
+ }
+
public void Visit(TimestampType type)
{
StringOffset timezoneStringOffset = default;
@@ -169,6 +199,15 @@ namespace Apache.Arrow.Ipc
Flatbuf.Time.CreateTime(Builder, ToFlatBuffer(type.Unit)));
}
+#if NET5_0_OR_GREATER
+ public void Visit(HalfFloatType type)
+ {
+ Result = FieldType.Build(
+ Flatbuf.Type.FloatingPoint,
+ Flatbuf.FloatingPoint.CreateFloatingPoint(Builder,
Precision.HALF));
+ }
+#endif
+
public void Visit(FloatType type)
{
Result = FieldType.Build(
diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
index 633554fc53..0e6f330aef 100644
--- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -184,17 +184,27 @@ namespace Apache.Arrow.Ipc
return
Types.IntervalType.FromIntervalUnit(intervalMetadata.Unit.ToArrow());
case Flatbuf.Type.Utf8:
return Types.StringType.Default;
+ case Flatbuf.Type.Utf8View:
+ return Types.StringViewType.Default;
case Flatbuf.Type.FixedSizeBinary:
Flatbuf.FixedSizeBinary fixedSizeBinaryMetadata =
field.Type<Flatbuf.FixedSizeBinary>().Value;
return new
Types.FixedSizeBinaryType(fixedSizeBinaryMetadata.ByteWidth);
case Flatbuf.Type.Binary:
return Types.BinaryType.Default;
+ case Flatbuf.Type.BinaryView:
+ return Types.BinaryViewType.Default;
case Flatbuf.Type.List:
if (childFields == null || childFields.Length != 1)
{
throw new InvalidDataException($"List type must have
exactly one child.");
}
return new Types.ListType(childFields[0]);
+ case Flatbuf.Type.ListView:
+ if (childFields == null || childFields.Length != 1)
+ {
+ throw new InvalidDataException($"List view type must
have exactly one child.");
+ }
+ return new Types.ListViewType(childFields[0]);
case Flatbuf.Type.FixedSizeList:
if (childFields == null || childFields.Length != 1)
{
diff --git a/csharp/src/Apache.Arrow/Scalars/BinaryView.cs
b/csharp/src/Apache.Arrow/Scalars/BinaryView.cs
new file mode 100644
index 0000000000..eaba89c7a3
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Scalars/BinaryView.cs
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Apache.Arrow.Scalars
+{
+ [StructLayout(LayoutKind.Explicit)]
+ public unsafe struct BinaryView : IEquatable<BinaryView>
+ {
+ public const int PrefixLength = 4;
+ public const int MaxInlineLength = 12;
+
+ [FieldOffset(0)]
+ public readonly int Length;
+
+ [FieldOffset(4)]
+ internal readonly int _prefix;
+
+ [FieldOffset(8)]
+ internal readonly int _bufferIndex;
+
+ [FieldOffset(12)]
+ internal readonly int _bufferOffset;
+
+ [FieldOffset(4)]
+ internal fixed byte _inline[MaxInlineLength];
+
+ public unsafe BinaryView(ReadOnlySpan<byte> inline) : this()
+ {
+ if (inline.Length > MaxInlineLength)
+ {
+ throw new ArgumentException("invalid inline data length",
nameof(inline));
+ }
+
+ Length = inline.Length;
+ fixed (byte* dest = _inline)
+ fixed (byte* src = inline)
+ {
+ Buffer.MemoryCopy(src, dest, MaxInlineLength, inline.Length);
+ }
+ }
+
+ public BinaryView(int length, ReadOnlySpan<byte> prefix, int
bufferIndex, int bufferOffset)
+ {
+ if (length < MaxInlineLength)
+ {
+ throw new ArgumentException("invalid length", nameof(length));
+ }
+ if (prefix.Length != PrefixLength)
+ {
+ throw new ArgumentException("invalid prefix length",
nameof(prefix));
+ }
+
+ Length = length;
+ _bufferIndex = bufferIndex;
+ _bufferOffset = bufferOffset;
+ _prefix = prefix.CastTo<int>()[0];
+ }
+
+ private BinaryView(int length, int prefix, int bufferIndex, int offset)
+ {
+ Length = length;
+ _prefix = prefix;
+ _bufferIndex = bufferIndex;
+ _bufferOffset = offset;
+ }
+
+ public bool IsInline => Length <= MaxInlineLength;
+
+#if NET5_0_OR_GREATER
+ public ReadOnlySpan<byte> Bytes =>
MemoryMarshal.CreateReadOnlySpan<byte>(ref Unsafe.AsRef(_inline[0]), IsInline ?
Length : PrefixLength);
+#else
+ public unsafe ReadOnlySpan<byte> Bytes => new
ReadOnlySpan<byte>(Unsafe.AsPointer(ref _inline[0]), IsInline ? Length :
PrefixLength);
+#endif
+
+ public int BufferIndex => IsInline ? -1 : _bufferIndex;
+
+ public int BufferOffset => IsInline ? -1 : _bufferOffset;
+
+ public override int GetHashCode() => Length ^ _prefix ^ _bufferIndex ^
_bufferOffset;
+
+ public override bool Equals(object obj)
+ {
+ BinaryView? other = obj as BinaryView?;
+ return other != null && Equals(other.Value);
+ }
+
+ public bool Equals(BinaryView other) =>
+ Length == other.Length && _prefix == other._prefix && _bufferIndex
== other._bufferIndex && _bufferOffset == other._bufferOffset;
+
+ internal BinaryView AdjustBufferIndex(int bufferOffset)
+ {
+ return new BinaryView(Length, _prefix, _bufferIndex +
bufferOffset, _bufferOffset);
+ }
+ }
+}
diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs
b/csharp/src/Apache.Arrow/Types/BinaryViewType.cs
similarity index 52%
copy from csharp/src/Apache.Arrow/Types/IArrowType.cs
copy to csharp/src/Apache.Arrow/Types/BinaryViewType.cs
index 5e107813be..f5cfc034dc 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/BinaryViewType.cs
@@ -16,51 +16,13 @@
namespace Apache.Arrow.Types
{
- public enum ArrowTypeId
+ public class BinaryViewType: ArrowType
{
- Null,
- Boolean,
- UInt8,
- Int8,
- UInt16,
- Int16,
- UInt32,
- Int32,
- UInt64,
- Int64,
- HalfFloat,
- Float,
- Double,
- String,
- Binary,
- FixedSizedBinary,
- Date32,
- Date64,
- Timestamp,
- Time32,
- Time64,
- Interval,
- Decimal128,
- Decimal256,
- List,
- Struct,
- Union,
- Dictionary,
- Map,
- FixedSizeList,
- Duration,
- RecordBatch,
- }
-
- public interface IArrowType
- {
- ArrowTypeId TypeId { get; }
+ public static readonly BinaryViewType Default = new BinaryViewType();
- string Name { get; }
-
- void Accept(IArrowTypeVisitor visitor);
+ public override ArrowTypeId TypeId => ArrowTypeId.BinaryView;
+ public override string Name => "binaryview";
- bool IsFixedWidth { get; }
-
+ public override void Accept(IArrowTypeVisitor visitor) => Accept(this,
visitor);
}
}
diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs
b/csharp/src/Apache.Arrow/Types/IArrowType.cs
index 5e107813be..cf520391fe 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/IArrowType.cs
@@ -50,6 +50,9 @@ namespace Apache.Arrow.Types
FixedSizeList,
Duration,
RecordBatch,
+ BinaryView,
+ StringView,
+ ListView,
}
public interface IArrowType
diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs
b/csharp/src/Apache.Arrow/Types/ListViewType.cs
similarity index 52%
copy from csharp/src/Apache.Arrow/Types/IArrowType.cs
copy to csharp/src/Apache.Arrow/Types/ListViewType.cs
index 5e107813be..ecf745723c 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/ListViewType.cs
@@ -13,54 +13,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-
namespace Apache.Arrow.Types
{
- public enum ArrowTypeId
+ public sealed class ListViewType : NestedType
{
- Null,
- Boolean,
- UInt8,
- Int8,
- UInt16,
- Int16,
- UInt32,
- Int32,
- UInt64,
- Int64,
- HalfFloat,
- Float,
- Double,
- String,
- Binary,
- FixedSizedBinary,
- Date32,
- Date64,
- Timestamp,
- Time32,
- Time64,
- Interval,
- Decimal128,
- Decimal256,
- List,
- Struct,
- Union,
- Dictionary,
- Map,
- FixedSizeList,
- Duration,
- RecordBatch,
- }
+ public override ArrowTypeId TypeId => ArrowTypeId.ListView;
+ public override string Name => "listview";
- public interface IArrowType
- {
- ArrowTypeId TypeId { get; }
+ public Field ValueField => Fields[0];
+
+ public IArrowType ValueDataType => Fields[0].DataType;
+
+ public ListViewType(Field valueField)
+ : base(valueField) { }
- string Name { get; }
-
- void Accept(IArrowTypeVisitor visitor);
+ public ListViewType(IArrowType valueDataType)
+ : this(new Field("item", valueDataType, true)) { }
- bool IsFixedWidth { get; }
-
+ public override void Accept(IArrowTypeVisitor visitor) => Accept(this,
visitor);
}
}
diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs
b/csharp/src/Apache.Arrow/Types/StringViewType.cs
similarity index 52%
copy from csharp/src/Apache.Arrow/Types/IArrowType.cs
copy to csharp/src/Apache.Arrow/Types/StringViewType.cs
index 5e107813be..0c539a56b0 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/StringViewType.cs
@@ -16,51 +16,13 @@
namespace Apache.Arrow.Types
{
- public enum ArrowTypeId
+ public sealed class StringViewType : ArrowType
{
- Null,
- Boolean,
- UInt8,
- Int8,
- UInt16,
- Int16,
- UInt32,
- Int32,
- UInt64,
- Int64,
- HalfFloat,
- Float,
- Double,
- String,
- Binary,
- FixedSizedBinary,
- Date32,
- Date64,
- Timestamp,
- Time32,
- Time64,
- Interval,
- Decimal128,
- Decimal256,
- List,
- Struct,
- Union,
- Dictionary,
- Map,
- FixedSizeList,
- Duration,
- RecordBatch,
- }
-
- public interface IArrowType
- {
- ArrowTypeId TypeId { get; }
+ public static StringViewType Default = new StringViewType();
- string Name { get; }
-
- void Accept(IArrowTypeVisitor visitor);
+ public override ArrowTypeId TypeId => ArrowTypeId.StringView;
+ public override string Name => "utf8view";
- bool IsFixedWidth { get; }
-
+ public override void Accept(IArrowTypeVisitor visitor) => Accept(this,
visitor);
}
}
diff --git a/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
b/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
index c791c99693..f35c2a5d78 100644
--- a/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
+++ b/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
@@ -38,7 +38,7 @@ namespace Apache.Arrow.Benchmarks
[GlobalSetup]
public void GlobalSetup()
{
- _batch = TestData.CreateSampleRecordBatch(BatchLength,
ColumnSetCount, false);
+ _batch = TestData.CreateSampleRecordBatch(BatchLength,
ColumnSetCount);
_memoryStream = new MemoryStream();
}
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
index f3fe73588a..31a5676f01 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
@@ -21,6 +21,7 @@ using System.Linq;
using System.Numerics;
using System.Text;
using System.Text.Json;
+using System.Text.Json.Nodes;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Apache.Arrow.Arrays;
@@ -175,7 +176,9 @@ namespace Apache.Arrow.IntegrationTest
"floatingpoint" => ToFloatingPointArrowType(type),
"decimal" => ToDecimalArrowType(type),
"binary" => BinaryType.Default,
+ "binaryview" => BinaryViewType.Default,
"utf8" => StringType.Default,
+ "utf8view" => StringViewType.Default,
"fixedsizebinary" => new FixedSizeBinaryType(type.ByteWidth),
"date" => ToDateArrowType(type),
"time" => ToTimeArrowType(type),
@@ -184,6 +187,7 @@ namespace Apache.Arrow.IntegrationTest
"interval_mdn" => ToIntervalArrowType(type),
"timestamp" => ToTimestampArrowType(type),
"list" => ToListArrowType(type, children),
+ "listview" => ToListViewArrowType(type, children),
"fixedsizelist" => ToFixedSizeListArrowType(type, children),
"struct" => ToStructArrowType(type, children),
"union" => ToUnionArrowType(type, children),
@@ -294,6 +298,11 @@ namespace Apache.Arrow.IntegrationTest
return new ListType(children[0]);
}
+ private static IArrowType ToListViewArrowType(JsonArrowType type,
Field[] children)
+ {
+ return new ListViewType(children[0]);
+ }
+
private static IArrowType ToFixedSizeListArrowType(JsonArrowType type,
Field[] children)
{
return new FixedSizeListType(children[0], type.ListSize);
@@ -451,9 +460,12 @@ namespace Apache.Arrow.IntegrationTest
IArrowTypeVisitor<IntervalType>,
IArrowTypeVisitor<TimestampType>,
IArrowTypeVisitor<StringType>,
+ IArrowTypeVisitor<StringViewType>,
IArrowTypeVisitor<BinaryType>,
+ IArrowTypeVisitor<BinaryViewType>,
IArrowTypeVisitor<FixedSizeBinaryType>,
IArrowTypeVisitor<ListType>,
+ IArrowTypeVisitor<ListViewType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>,
IArrowTypeVisitor<UnionType>,
@@ -652,6 +664,38 @@ namespace Apache.Arrow.IntegrationTest
Array = new StringArray(JsonFieldData.Count, offsetBuffer,
valueBuffer, validityBuffer, nullCount);
}
+ public void Visit(StringViewType type)
+ {
+ ArrowBuffer validityBuffer = GetValidityBuffer(out int
nullCount);
+
+ // ArrowBuffer viewsBuffer = GetViewsBuffer();
+ ArrowBuffer viewsBuffer = ArrowBuffer.Empty;
+ if (JsonFieldData.Views != null)
+ {
+ ArrowBuffer.Builder<BinaryView> viewBuilder = new
ArrowBuffer.Builder<BinaryView>(JsonFieldData.Views.Count);
+ foreach (JsonView jsonView in JsonFieldData.Views)
+ {
+ BinaryView view = (jsonView.BufferIndex == null) ?
+ new
BinaryView(Encoding.UTF8.GetBytes(jsonView.Inlined)) :
+ new BinaryView(jsonView.Size,
Convert.FromHexString(jsonView.PrefixHex), jsonView.BufferIndex.Value,
jsonView.Offset.Value);
+ viewBuilder.Append(view);
+ }
+ viewsBuffer = viewBuilder.Build();
+ }
+
+ int bufferCount = JsonFieldData.VariadicDataBuffers?.Count ??
0;
+ ArrowBuffer[] buffers = new ArrowBuffer[2 + bufferCount];
+ buffers[0] = validityBuffer;
+ buffers[1] = viewsBuffer;
+ for (int i = 0; i < bufferCount; i++)
+ {
+ buffers[i + 2] = new
ArrowBuffer(Convert.FromHexString(JsonFieldData.VariadicDataBuffers[i])).Clone();
+ }
+
+ ArrayData arrayData = new ArrayData(type, JsonFieldData.Count,
nullCount, 0, buffers);
+ Array = new StringViewArray(arrayData);
+ }
+
public void Visit(BinaryType type)
{
ArrowBuffer validityBuffer = GetValidityBuffer(out int
nullCount);
@@ -671,6 +715,38 @@ namespace Apache.Arrow.IntegrationTest
Array = new BinaryArray(arrayData);
}
+ public void Visit(BinaryViewType type)
+ {
+ ArrowBuffer validityBuffer = GetValidityBuffer(out int
nullCount);
+
+ // ArrowBuffer viewsBuffer = GetViewsBuffer();
+ ArrowBuffer viewsBuffer = ArrowBuffer.Empty;
+ if (JsonFieldData.Views != null)
+ {
+ ArrowBuffer.Builder<BinaryView> viewBuilder = new
ArrowBuffer.Builder<BinaryView>(JsonFieldData.Views.Count);
+ foreach (JsonView jsonView in JsonFieldData.Views)
+ {
+ BinaryView view = (jsonView.BufferIndex == null) ?
+ new
BinaryView(Convert.FromHexString(jsonView.Inlined)) :
+ new BinaryView(jsonView.Size,
Convert.FromHexString(jsonView.PrefixHex), jsonView.BufferIndex.Value,
jsonView.Offset.Value);
+ viewBuilder.Append(view);
+ }
+ viewsBuffer = viewBuilder.Build();
+ }
+
+ int bufferCount = JsonFieldData.VariadicDataBuffers?.Count ??
0;
+ ArrowBuffer[] buffers = new ArrowBuffer[2 + bufferCount];
+ buffers[0] = validityBuffer;
+ buffers[1] = viewsBuffer;
+ for (int i = 0; i < bufferCount; i++)
+ {
+ buffers[i + 2] = new
ArrowBuffer(Convert.FromHexString(JsonFieldData.VariadicDataBuffers[i])).Clone();
+ }
+
+ ArrayData arrayData = new ArrayData(type, JsonFieldData.Count,
nullCount, 0, buffers);
+ Array = new BinaryViewArray(arrayData);
+ }
+
public void Visit(FixedSizeBinaryType type)
{
ArrowBuffer validityBuffer = GetValidityBuffer(out int
nullCount);
@@ -704,6 +780,22 @@ namespace Apache.Arrow.IntegrationTest
Array = new ListArray(arrayData);
}
+ public void Visit(ListViewType type)
+ {
+ ArrowBuffer validityBuffer = GetValidityBuffer(out int
nullCount);
+ ArrowBuffer offsetBuffer = GetOffsetBuffer();
+ ArrowBuffer sizeBuffer = GetSizeBuffer();
+
+ var data = JsonFieldData;
+ JsonFieldData = data.Children[0];
+ type.ValueDataType.Accept(this);
+ JsonFieldData = data;
+
+ ArrayData arrayData = new ArrayData(type, JsonFieldData.Count,
nullCount, 0,
+ new[] { validityBuffer, offsetBuffer, sizeBuffer }, new[]
{ Array.Data });
+ Array = new ListViewArray(arrayData);
+ }
+
public void Visit(FixedSizeListType type)
{
ArrowBuffer validityBuffer = GetValidityBuffer(out int
nullCount);
@@ -878,11 +970,18 @@ namespace Apache.Arrow.IntegrationTest
private ArrowBuffer GetOffsetBuffer()
{
- ArrowBuffer.Builder<int> valueOffsets = new
ArrowBuffer.Builder<int>(JsonFieldData.Offset.Length);
- valueOffsets.AppendRange(JsonFieldData.Offset);
+ ArrowBuffer.Builder<int> valueOffsets = new
ArrowBuffer.Builder<int>(JsonFieldData.Offset.Count);
+ valueOffsets.AppendRange(JsonFieldData.IntOffset);
return valueOffsets.Build(default);
}
+ private ArrowBuffer GetSizeBuffer()
+ {
+ ArrowBuffer.Builder<int> valueSizes = new
ArrowBuffer.Builder<int>(JsonFieldData.Size.Count);
+ valueSizes.AppendRange(JsonFieldData.IntSize);
+ return valueSizes.Build(default);
+ }
+
private ArrowBuffer GetTypeIdBuffer()
{
ArrowBuffer.Builder<byte> typeIds = new
ArrowBuffer.Builder<byte>(JsonFieldData.TypeId.Length);
@@ -920,10 +1019,61 @@ namespace Apache.Arrow.IntegrationTest
public string Name { get; set; }
public int Count { get; set; }
public bool[] Validity { get; set; }
- public int[] Offset { get; set; }
+ public JsonArray Offset { get; set; }
+
+ [JsonPropertyName("SIZE")]
+ public JsonArray Size { get; set; }
public int[] TypeId { get; set; }
public JsonElement Data { get; set; }
public List<JsonFieldData> Children { get; set; }
+
+ [JsonPropertyName("VIEWS")]
+ public List<JsonView> Views { get; set; }
+
+ [JsonPropertyName("VARIADIC_DATA_BUFFERS")]
+ public List<string> VariadicDataBuffers { get; set; }
+
+ [JsonIgnore]
+ public IEnumerable<int> IntOffset
+ {
+ get { return Offset.Select(GetInt); }
+ }
+
+ [JsonIgnore]
+ public IEnumerable<int> IntSize
+ {
+ get { return Size.Select(GetInt); }
+ }
+
+ static int GetInt(JsonNode node)
+ {
+ try
+ {
+ return node.GetValue<int>();
+ }
+ catch
+ {
+ return int.Parse(node.GetValue<string>());
+ }
+ }
+ }
+
+ public class JsonView
+ {
+ [JsonPropertyName("SIZE")]
+ public int Size { get; set; }
+
+ [JsonPropertyName("INLINED")]
+ public string Inlined { get; set; }
+
+ [JsonPropertyName("PREFIX_HEX")]
+ public string PrefixHex { get; set; }
+
+ [JsonPropertyName("BUFFER_INDEX")]
+ public int? BufferIndex { get; set; }
+
+ [JsonPropertyName("OFFSET")]
+ public int? Offset { get; set; }
}
internal sealed class ValidityConverter : JsonConverter<bool>
diff --git
a/csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json
b/csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json
new file mode 100644
index 0000000000..46bdeff290
--- /dev/null
+++ b/csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json
@@ -0,0 +1,8 @@
+{
+ "profiles": {
+ "Apache.Arrow.IntegrationTest": {
+ "commandName": "Project",
+ "commandLineArgs": "--mode validate -j
C:\\Users\\curt\\AppData\\Local\\Temp\\arrow-integration-9_cov7dz\\generated_binary_view.json
-a
C:\\Users\\curt\\AppData\\Local\\Temp\\tmpxicbzqpn\\460a151e_generated_binary_view.json_as_file"
+ }
+ }
+}
\ No newline at end of file
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 137dc16d47..25ef289f0d 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -64,13 +64,16 @@ namespace Apache.Arrow.Tests
FloatType.Default,
DoubleType.Default,
BinaryType.Default,
+ BinaryViewType.Default,
StringType.Default,
+ StringViewType.Default,
Date32Type.Default,
Date64Type.Default,
TimestampType.Default,
new Decimal128Type(14, 10),
new Decimal256Type(14,10),
new ListType(Int64Type.Default),
+ new ListViewType(Int64Type.Default),
new StructType(new List<Field>{
new
Field.Builder().Name("Strings").DataType(StringType.Default).Nullable(true).Build(),
new
Field.Builder().Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()
@@ -122,7 +125,9 @@ namespace Apache.Arrow.Tests
IArrowTypeVisitor<FloatType>,
IArrowTypeVisitor<DoubleType>,
IArrowTypeVisitor<BinaryType>,
+ IArrowTypeVisitor<BinaryViewType>,
IArrowTypeVisitor<StringType>,
+ IArrowTypeVisitor<StringViewType>,
IArrowTypeVisitor<Decimal128Type>,
IArrowTypeVisitor<Decimal256Type>,
IArrowTypeVisitor<Date32Type>,
@@ -131,6 +136,7 @@ namespace Apache.Arrow.Tests
IArrowTypeVisitor<IntervalType>,
IArrowTypeVisitor<TimestampType>,
IArrowTypeVisitor<ListType>,
+ IArrowTypeVisitor<ListViewType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>,
IArrowTypeVisitor<UnionType>,
@@ -368,6 +374,34 @@ namespace Apache.Arrow.Tests
ExpectedArray = resultBuilder.Build();
}
+ public void Visit(BinaryViewType type)
+ {
+ BinaryViewArray.Builder resultBuilder = new
BinaryViewArray.Builder().Reserve(_baseDataTotalElementCount);
+
+ for (int i = 0; i < _baseDataListCount; i++)
+ {
+ List<int?> dataList = _baseData[i];
+ BinaryViewArray.Builder builder = new
BinaryViewArray.Builder().Reserve(dataList.Count);
+
+ foreach (byte? value in dataList)
+ {
+ if (value.HasValue)
+ {
+ builder.Append(value.Value);
+ resultBuilder.Append(value.Value);
+ }
+ else
+ {
+ builder.AppendNull();
+ resultBuilder.AppendNull();
+ }
+ }
+ TestTargetArrayList.Add(builder.Build());
+ }
+
+ ExpectedArray = resultBuilder.Build();
+ }
+
public void Visit(StringType type)
{
StringArray.Builder resultBuilder = new
StringArray.Builder().Reserve(_baseDataTotalElementCount);
@@ -388,6 +422,26 @@ namespace Apache.Arrow.Tests
ExpectedArray = resultBuilder.Build();
}
+ public void Visit(StringViewType type)
+ {
+ StringViewArray.Builder resultBuilder = new
StringViewArray.Builder().Reserve(_baseDataTotalElementCount);
+
+ for (int i = 0; i < _baseDataListCount; i++)
+ {
+ List<int?> dataList = _baseData[i];
+ StringViewArray.Builder builder = new
StringViewArray.Builder().Reserve(dataList.Count);
+
+ foreach (string value in dataList.Select(_ => _.ToString()
?? null))
+ {
+ builder.Append(value);
+ resultBuilder.Append(value);
+ }
+ TestTargetArrayList.Add(builder.Build());
+ }
+
+ ExpectedArray = resultBuilder.Build();
+ }
+
public void Visit(ListType type)
{
ListArray.Builder resultBuilder = new
ListArray.Builder(type.ValueDataType).Reserve(_baseDataTotalElementCount);
@@ -423,6 +477,41 @@ namespace Apache.Arrow.Tests
ExpectedArray = resultBuilder.Build();
}
+ public void Visit(ListViewType type)
+ {
+ ListViewArray.Builder resultBuilder = new
ListViewArray.Builder(type.ValueDataType).Reserve(_baseDataTotalElementCount);
+ Int64Array.Builder resultValueBuilder =
(Int64Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount);
+
+ for (int i = 0; i < _baseDataListCount; i++)
+ {
+ List<int?> dataList = _baseData[i];
+
+ ListViewArray.Builder builder = new
ListViewArray.Builder(type.ValueField).Reserve(dataList.Count);
+ Int64Array.Builder valueBuilder =
(Int64Array.Builder)builder.ValueBuilder.Reserve(dataList.Count);
+
+ foreach (long? value in dataList)
+ {
+ if (value.HasValue)
+ {
+ builder.Append();
+ resultBuilder.Append();
+
+ valueBuilder.Append(value.Value);
+ resultValueBuilder.Append(value.Value);
+ }
+ else
+ {
+ builder.AppendNull();
+ resultBuilder.AppendNull();
+ }
+ }
+
+ TestTargetArrayList.Add(builder.Build());
+ }
+
+ ExpectedArray = resultBuilder.Build();
+ }
+
public void Visit(FixedSizeListType type)
{
FixedSizeListArray.Builder resultBuilder = new
FixedSizeListArray.Builder(type.ValueDataType,
type.ListSize).Reserve(_baseDataTotalElementCount);
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
index 2aaffe7835..10315ff287 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
@@ -20,6 +20,7 @@ using System.Linq;
using System.Threading.Tasks;
using Apache.Arrow.Arrays;
using Xunit;
+using System.Diagnostics;
namespace Apache.Arrow.Tests
{
@@ -90,10 +91,13 @@ namespace Apache.Arrow.Tests
IArrowArrayVisitor<DayTimeIntervalArray>,
IArrowArrayVisitor<MonthDayNanosecondIntervalArray>,
IArrowArrayVisitor<ListArray>,
+ IArrowArrayVisitor<ListViewArray>,
IArrowArrayVisitor<FixedSizeListArray>,
IArrowArrayVisitor<StringArray>,
+ IArrowArrayVisitor<StringViewArray>,
IArrowArrayVisitor<FixedSizeBinaryArray>,
IArrowArrayVisitor<BinaryArray>,
+ IArrowArrayVisitor<BinaryViewArray>,
IArrowArrayVisitor<StructArray>,
IArrowArrayVisitor<UnionArray>,
IArrowArrayVisitor<Decimal128Array>,
@@ -136,12 +140,15 @@ namespace Apache.Arrow.Tests
public void Visit(DayTimeIntervalArray array) =>
CompareArrays(array);
public void Visit(MonthDayNanosecondIntervalArray array) =>
CompareArrays(array);
public void Visit(ListArray array) => CompareArrays(array);
+ public void Visit(ListViewArray array) => CompareArrays(array);
public void Visit(FixedSizeListArray array) =>
CompareArrays(array);
public void Visit(FixedSizeBinaryArray array) =>
CompareArrays(array);
public void Visit(Decimal128Array array) => CompareArrays(array);
public void Visit(Decimal256Array array) => CompareArrays(array);
public void Visit(StringArray array) =>
CompareBinaryArrays<StringArray>(array);
+ public void Visit(StringViewArray array) =>
CompareVariadicArrays<StringViewArray>(array);
public void Visit(BinaryArray array) =>
CompareBinaryArrays<BinaryArray>(array);
+ public void Visit(BinaryViewArray array) =>
CompareVariadicArrays<BinaryViewArray>(array);
public void Visit(StructArray array)
{
@@ -230,6 +237,32 @@ namespace Apache.Arrow.Tests
}
}
+ private void CompareVariadicArrays<T>(BinaryViewArray actualArray)
+ where T : IArrowArray
+ {
+ Assert.IsAssignableFrom<T>(_expectedArray);
+ Assert.IsAssignableFrom<T>(actualArray);
+
+ var expectedArray = (BinaryViewArray)_expectedArray;
+
+ actualArray.Data.DataType.Accept(_arrayTypeComparer);
+
+ Assert.Equal(expectedArray.Length, actualArray.Length);
+ Assert.Equal(expectedArray.NullCount, actualArray.NullCount);
+ Assert.Equal(expectedArray.Offset, actualArray.Offset);
+
+ CompareValidityBuffer(expectedArray.NullCount,
_expectedArray.Length, expectedArray.NullBitmapBuffer,
actualArray.NullBitmapBuffer);
+
+
Assert.True(expectedArray.Views.SequenceEqual(actualArray.Views));
+
+ for (int i = 0; i < expectedArray.Length; i++)
+ {
+ Assert.True(
+
expectedArray.GetBytes(i).SequenceEqual(actualArray.GetBytes(i)),
+ $"BinaryArray values do not match at index {i}.");
+ }
+ }
+
private void CompareArrays(FixedSizeBinaryArray actualArray)
{
Assert.IsAssignableFrom<FixedSizeBinaryArray>(_expectedArray);
@@ -346,6 +379,34 @@ namespace Apache.Arrow.Tests
actualArray.Values.Accept(new
ArrayComparer(expectedArray.Values, _strictCompare));
}
+ private void CompareArrays(ListViewArray actualArray)
+ {
+ Assert.IsAssignableFrom<ListViewArray>(_expectedArray);
+ ListViewArray expectedArray = (ListViewArray)_expectedArray;
+
+ actualArray.Data.DataType.Accept(_arrayTypeComparer);
+
+ Assert.Equal(expectedArray.Length, actualArray.Length);
+ Assert.Equal(expectedArray.NullCount, actualArray.NullCount);
+ Assert.Equal(expectedArray.Offset, actualArray.Offset);
+
+ CompareValidityBuffer(expectedArray.NullCount,
_expectedArray.Length, expectedArray.NullBitmapBuffer,
actualArray.NullBitmapBuffer);
+
+ if (_strictCompare)
+ {
+
Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span));
+
Assert.True(expectedArray.SizesBuffer.Span.SequenceEqual(actualArray.SizesBuffer.Span));
+ }
+ else
+ {
+ int length = expectedArray.Length * sizeof(int);
+ Assert.True(expectedArray.ValueOffsetsBuffer.Span.Slice(0,
length).SequenceEqual(actualArray.ValueOffsetsBuffer.Span.Slice(0, length)));
+ Assert.True(expectedArray.SizesBuffer.Span.Slice(0,
length).SequenceEqual(actualArray.SizesBuffer.Span.Slice(0, length)));
+ }
+
+ actualArray.Values.Accept(new
ArrayComparer(expectedArray.Values, _strictCompare));
+ }
+
private void CompareArrays(FixedSizeListArray actualArray)
{
Assert.IsAssignableFrom<FixedSizeListArray>(_expectedArray);
diff --git a/csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs
b/csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs
new file mode 100644
index 0000000000..eb617b4ded
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Scalars;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+ public class BinaryViewTests
+ {
+ private static readonly byte[] empty = new byte[0];
+ private static readonly byte[] oneByte = new byte[1];
+ private static readonly byte[] fourBytes = new byte[] { 1, 2, 3, 4 };
+ private static readonly byte[] fiveBytes = new byte[] { 5, 4, 3, 2, 1
};
+ private static readonly byte[] twelveBytes = new byte[] { 1, 2, 3, 4,
8, 7, 6, 5, 9, 10, 11, 12 };
+ private static readonly byte[] thirteenBytes = new byte[13];
+
+ [Fact]
+ public void Equality()
+ {
+ BinaryView one = new BinaryView(oneByte);
+ BinaryView four = new BinaryView(fourBytes);
+ BinaryView twelve = new BinaryView(twelveBytes);
+ BinaryView twelvePlus = new BinaryView(13, fourBytes, 0, 0);
+ Assert.Equal(one, one);
+ Assert.NotEqual(one, four);
+ Assert.NotEqual(four, twelve);
+ Assert.NotEqual(four, twelvePlus);
+ }
+
+ [Fact]
+ public void ConstructorThrows()
+ {
+ Assert.Throws<ArgumentException>(() => new
BinaryView(thirteenBytes));
+ Assert.Throws<ArgumentException>(() => new BinaryView(20, empty,
0, 0));
+ Assert.Throws<ArgumentException>(() => new BinaryView(20,
fiveBytes, 0, 0));
+ Assert.Throws<ArgumentException>(() => new BinaryView(13,
thirteenBytes, 0, 0));
+ Assert.Throws<ArgumentException>(() => new BinaryView(4,
fourBytes, 0, 0));
+ }
+
+ [Fact]
+ public void ConstructInline()
+ {
+ BinaryView zero = new BinaryView(empty);
+ Assert.Equal(-1, zero.BufferIndex);
+ Assert.Equal(-1, zero.BufferOffset);
+ Assert.Equal(0, zero.Length);
+ Assert.Equal(0, zero.Bytes.Length);
+
+ BinaryView one = new BinaryView(oneByte);
+ Assert.Equal(-1, one.BufferIndex);
+ Assert.Equal(-1, one.BufferOffset);
+ Assert.Equal(1, one.Length);
+ Assert.Equal(1, one.Bytes.Length);
+ Assert.Equal((byte)0, one.Bytes[0]);
+
+ BinaryView twelve = new BinaryView(twelveBytes);
+ Assert.Equal(-1, one.BufferIndex);
+ Assert.Equal(-1, one.BufferOffset);
+ Assert.Equal(12, twelve.Length);
+ Assert.Equal(12, twelve.Bytes.Length);
+ Assert.Equal((byte)8, twelve.Bytes[4]);
+ }
+
+ [Fact]
+ public void ConstructPrefix()
+ {
+ BinaryView four = new BinaryView(14, fourBytes, 2, 3);
+ Assert.Equal(2, four.BufferIndex);
+ Assert.Equal(3, four.BufferOffset);
+ Assert.Equal(14, four.Length);
+ Assert.Equal(4, four.Bytes.Length);
+ Assert.Equal((byte)2, four.Bytes[1]);
+ }
+ }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index 83902d8d93..274434e4ba 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -741,7 +741,9 @@ namespace Apache.Arrow.Tests
[SkippableFact]
public unsafe void RoundTripTestBatch()
{
- RecordBatch batch1 = TestData.CreateSampleRecordBatch(4,
createDictionaryArray: true);
+ // TODO: Enable these once this the version of pyarrow referenced
during testing supports them
+ HashSet<ArrowTypeId> unsupported = new HashSet<ArrowTypeId> {
ArrowTypeId.ListView, ArrowTypeId.BinaryView, ArrowTypeId.StringView };
+ RecordBatch batch1 = TestData.CreateSampleRecordBatch(4,
excludedTypes: unsupported);
RecordBatch batch2 = batch1.Clone();
CArrowArray* cExportArray = CArrowArray.Create();
diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs
b/csharp/test/Apache.Arrow.Tests/TableTests.cs
index d52b514e09..83c88265d1 100644
--- a/csharp/test/Apache.Arrow.Tests/TableTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs
@@ -62,7 +62,11 @@ namespace Apache.Arrow.Tests
Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema,
recordBatches);
Assert.Equal(20, table1.RowCount);
- Assert.Equal(30, table1.ColumnCount);
+#if NET5_0_OR_GREATER
+ Assert.Equal(35, table1.ColumnCount);
+#else
+ Assert.Equal(34, table1.ColumnCount);
+#endif
Assert.Equal("ChunkedArray: Length=20, DataType=list",
table1.Column(0).Data.ToString());
FixedSizeBinaryType type = new FixedSizeBinaryType(17);
diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs
b/csharp/test/Apache.Arrow.Tests/TestData.cs
index b43321abd7..29ddef2864 100644
--- a/csharp/test/Apache.Arrow.Tests/TestData.cs
+++ b/csharp/test/Apache.Arrow.Tests/TestData.cs
@@ -24,53 +24,66 @@ namespace Apache.Arrow.Tests
{
public static class TestData
{
- public static RecordBatch CreateSampleRecordBatch(int length, bool
createDictionaryArray = true)
+ public static RecordBatch CreateSampleRecordBatch(int length, bool
createDictionaryArray)
{
- return CreateSampleRecordBatch(length, columnSetCount: 1,
createDictionaryArray);
+ HashSet<ArrowTypeId> excluded = createDictionaryArray ? null : new
HashSet<ArrowTypeId> { ArrowTypeId.Dictionary };
+ return CreateSampleRecordBatch(length, columnSetCount: 1,
excluded);
}
- public static RecordBatch CreateSampleRecordBatch(int length, int
columnSetCount, bool createAdvancedTypeArrays)
+ public static RecordBatch CreateSampleRecordBatch(
+ int length,
+ int columnSetCount = 1,
+ HashSet<ArrowTypeId> excludedTypes = null)
{
Schema.Builder builder = new Schema.Builder();
- for (int i = 0; i < columnSetCount; i++)
+
+ void AddField(Field field)
{
- builder.Field(CreateField(new ListType(Int64Type.Default), i));
- builder.Field(CreateField(BooleanType.Default, i));
- builder.Field(CreateField(UInt8Type.Default, i));
- builder.Field(CreateField(Int8Type.Default, i));
- builder.Field(CreateField(UInt16Type.Default, i));
- builder.Field(CreateField(Int16Type.Default, i));
- builder.Field(CreateField(UInt32Type.Default, i));
- builder.Field(CreateField(Int32Type.Default, i));
- builder.Field(CreateField(UInt64Type.Default, i));
- builder.Field(CreateField(Int64Type.Default, i));
- builder.Field(CreateField(FloatType.Default, i));
- builder.Field(CreateField(DoubleType.Default, i));
- builder.Field(CreateField(Date32Type.Default, i));
- builder.Field(CreateField(Date64Type.Default, i));
- builder.Field(CreateField(Time32Type.Default, i));
- builder.Field(CreateField(Time64Type.Default, i));
- builder.Field(CreateField(TimestampType.Default, i));
- builder.Field(CreateField(StringType.Default, i));
- builder.Field(CreateField(new StructType(new List<Field> {
CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i));
- builder.Field(CreateField(new Decimal128Type(10, 6), i));
- builder.Field(CreateField(new Decimal256Type(16, 8), i));
- builder.Field(CreateField(new MapType(StringType.Default,
Int32Type.Default), i));
- builder.Field(CreateField(IntervalType.YearMonth, i));
- builder.Field(CreateField(IntervalType.DayTime, i));
- builder.Field(CreateField(IntervalType.MonthDayNanosecond, i));
-
- if (createAdvancedTypeArrays)
+ if (excludedTypes == null ||
!excludedTypes.Contains(field.DataType.TypeId))
{
- builder.Field(CreateField(new
DictionaryType(Int32Type.Default, StringType.Default, false), i));
- builder.Field(CreateField(new FixedSizeBinaryType(16), i));
- builder.Field(CreateField(new
FixedSizeListType(Int32Type.Default, 3), i));
- builder.Field(CreateField(new UnionType(new[] {
CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[]
{ 0, 1 }, UnionMode.Sparse), i));
- builder.Field(CreateField(new UnionType(new[] {
CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[]
{ 0, 1 }, UnionMode.Dense), -i));
+ builder.Field(field);
}
+ }
- //builder.Field(CreateField(HalfFloatType.Default));
- //builder.Field(CreateField(StringType.Default));
+ for (int i = 0; i < columnSetCount; i++)
+ {
+ AddField(CreateField(new ListType(Int64Type.Default), i));
+ AddField(CreateField(new ListViewType(Int64Type.Default), i));
+ AddField(CreateField(BooleanType.Default, i));
+ AddField(CreateField(UInt8Type.Default, i));
+ AddField(CreateField(Int8Type.Default, i));
+ AddField(CreateField(UInt16Type.Default, i));
+ AddField(CreateField(Int16Type.Default, i));
+ AddField(CreateField(UInt32Type.Default, i));
+ AddField(CreateField(Int32Type.Default, i));
+ AddField(CreateField(UInt64Type.Default, i));
+ AddField(CreateField(Int64Type.Default, i));
+#if NET5_0_OR_GREATER
+ AddField(CreateField(HalfFloatType.Default, i));
+#endif
+ AddField(CreateField(FloatType.Default, i));
+ AddField(CreateField(DoubleType.Default, i));
+ AddField(CreateField(Date32Type.Default, i));
+ AddField(CreateField(Date64Type.Default, i));
+ AddField(CreateField(Time32Type.Default, i));
+ AddField(CreateField(Time64Type.Default, i));
+ AddField(CreateField(TimestampType.Default, i));
+ AddField(CreateField(StringType.Default, i));
+ AddField(CreateField(StringViewType.Default, i));
+ AddField(CreateField(new StructType(new List<Field> {
CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i));
+ AddField(CreateField(new Decimal128Type(10, 6), i));
+ AddField(CreateField(new Decimal256Type(16, 8), i));
+ AddField(CreateField(new MapType(StringType.Default,
Int32Type.Default), i));
+ AddField(CreateField(IntervalType.YearMonth, i));
+ AddField(CreateField(IntervalType.DayTime, i));
+ AddField(CreateField(IntervalType.MonthDayNanosecond, i));
+ AddField(CreateField(BinaryType.Default, i));
+ AddField(CreateField(BinaryViewType.Default, i));
+ AddField(CreateField(new FixedSizeBinaryType(16), i));
+ AddField(CreateField(new FixedSizeListType(Int32Type.Default,
3), i));
+ AddField(CreateField(new UnionType(new[] {
CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[]
{ 0, 1 }, UnionMode.Sparse), i));
+ AddField(CreateField(new UnionType(new[] {
CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[]
{ 0, 1 }, UnionMode.Dense), -i));
+ AddField(CreateField(new DictionaryType(Int32Type.Default,
StringType.Default, false), i));
}
Schema schema = builder.Build();
@@ -130,16 +143,23 @@ namespace Apache.Arrow.Tests
IArrowTypeVisitor<DoubleType>,
IArrowTypeVisitor<TimestampType>,
IArrowTypeVisitor<StringType>,
+ IArrowTypeVisitor<StringViewType>,
IArrowTypeVisitor<ListType>,
+ IArrowTypeVisitor<ListViewType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>,
IArrowTypeVisitor<UnionType>,
IArrowTypeVisitor<Decimal128Type>,
IArrowTypeVisitor<Decimal256Type>,
IArrowTypeVisitor<DictionaryType>,
+ IArrowTypeVisitor<BinaryType>,
+ IArrowTypeVisitor<BinaryViewType>,
IArrowTypeVisitor<FixedSizeBinaryType>,
IArrowTypeVisitor<MapType>,
IArrowTypeVisitor<IntervalType>,
+#if NET5_0_OR_GREATER
+ IArrowTypeVisitor<HalfFloatType>,
+#endif
IArrowTypeVisitor<NullType>
{
private int Length { get; }
@@ -160,6 +180,9 @@ namespace Apache.Arrow.Tests
public void Visit(UInt32Type type) => GenerateArray(new
UInt32Array.Builder(), x => (uint)x);
public void Visit(UInt64Type type) => GenerateArray(new
UInt64Array.Builder(), x => (ulong)x);
public void Visit(FloatType type) => GenerateArray(new
FloatArray.Builder(), x => ((float)x / Length));
+#if NET5_0_OR_GREATER
+ public void Visit(HalfFloatType type) => GenerateArray(new
HalfFloatArray.Builder(), x => ((Half)x / (Half)Length));
+#endif
public void Visit(DoubleType type) => GenerateArray(new
DoubleArray.Builder(), x => ((double)x / Length));
public void Visit(Decimal128Type type)
{
@@ -277,6 +300,30 @@ namespace Apache.Arrow.Tests
Array = builder.Build();
}
+ public void Visit(StringViewType type)
+ {
+ var str = "length=ten";
+ var builder = new StringViewArray.Builder();
+
+ for (var i = 0; i < Length; i++)
+ {
+ switch (i % 3)
+ {
+ case 0:
+ builder.AppendNull();
+ break;
+ case 1:
+ builder.Append(str);
+ break;
+ case 2:
+ builder.Append(str + str);
+ break;
+ }
+ }
+
+ Array = builder.Build();
+ }
+
public void Visit(ListType type)
{
var builder = new
ListArray.Builder(type.ValueField).Reserve(Length);
@@ -294,6 +341,23 @@ namespace Apache.Arrow.Tests
Array = builder.Build();
}
+ public void Visit(ListViewType type)
+ {
+ var builder = new
ListViewArray.Builder(type.ValueField).Reserve(Length);
+
+ var valueBuilder =
(Int64Array.Builder)builder.ValueBuilder.Reserve(Length + 1);
+
+ for (var i = 0; i < Length; i++)
+ {
+ builder.Append();
+ valueBuilder.Append(i);
+ }
+ //Add a value to check if Values.Length can exceed
ListArray.Length
+ valueBuilder.Append(0);
+
+ Array = builder.Build();
+ }
+
public void Visit(FixedSizeListType type)
{
var builder = new FixedSizeListArray.Builder(type.ValueField,
type.ListSize).Reserve(Length);
@@ -411,6 +475,64 @@ namespace Apache.Arrow.Tests
Array = new DictionaryArray(type, indicesBuilder.Build(),
valueBuilder.Build());
}
+ public void Visit(BinaryType type)
+ {
+ ReadOnlySpan<byte> shortData = new[] { (byte)0, (byte)1,
(byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9 };
+ ReadOnlySpan<byte> longData = new[]
+ {
+ (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5,
(byte)6, (byte)7, (byte)8, (byte)9,
+ (byte)10, (byte)11, (byte)12, (byte)13, (byte)14,
(byte)15, (byte)16, (byte)17, (byte)18, (byte)19
+ };
+ var builder = new BinaryArray.Builder();
+
+ for (var i = 0; i < Length; i++)
+ {
+ switch (i % 3)
+ {
+ case 0:
+ builder.AppendNull();
+ break;
+ case 1:
+ builder.Append(shortData);
+ break;
+ case 2:
+ builder.Append(longData);
+ break;
+ }
+ }
+
+ Array = builder.Build();
+ }
+
+ public void Visit(BinaryViewType type)
+ {
+ ReadOnlySpan<byte> shortData = new[] { (byte)0, (byte)1,
(byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9 };
+ ReadOnlySpan<byte> longData = new[]
+ {
+ (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5,
(byte)6, (byte)7, (byte)8, (byte)9,
+ (byte)10, (byte)11, (byte)12, (byte)13, (byte)14,
(byte)15, (byte)16, (byte)17, (byte)18, (byte)19
+ };
+ var builder = new BinaryViewArray.Builder();
+
+ for (var i = 0; i < Length; i++)
+ {
+ switch (i % 3)
+ {
+ case 0:
+ builder.AppendNull();
+ break;
+ case 1:
+ builder.Append(shortData);
+ break;
+ case 2:
+ builder.Append(longData);
+ break;
+ }
+ }
+
+ Array = builder.Build();
+ }
+
public void Visit(FixedSizeBinaryType type)
{
ArrowBuffer.Builder<byte> valueBuilder = new
ArrowBuffer.Builder<byte>();
diff --git a/dev/archery/archery/integration/datagen.py
b/dev/archery/archery/integration/datagen.py
index 2bbc843836..230ec5b3ef 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1932,13 +1932,12 @@ def get_generated_json_files(tempdir=None):
.skip_tester('Rust'),
generate_binary_view_case()
- .skip_tester('C#')
.skip_tester('Java')
.skip_tester('JS')
.skip_tester('Rust'),
generate_list_view_case()
- .skip_tester('C#')
+ .skip_tester('C#') # Doesn't support large list views
.skip_tester('Java')
.skip_tester('JS')
.skip_tester('Rust'),
diff --git a/docs/source/status.rst b/docs/source/status.rst
index e860aceb76..03a8701234 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -68,9 +68,13 @@ Data Types
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Large Utf8 | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Binary View | ✓ | | ✓ | | | |
| |
+| Binary View | ✓ | | ✓ | | ✓ | |
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| String View | ✓ | | ✓ | | | |
| |
+| Large Binary View | ✓ | | ✓ | | | |
| |
++-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| Utf8 View | ✓ | | ✓ | | ✓ | |
| |
++-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| Large Utf8 View | ✓ | | ✓ | | | |
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -83,7 +87,7 @@ Data Types
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Large List | ✓ | ✓ | ✓ | | | ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| List View | ✓ | | ✓ | | | |
| |
+| List View | ✓ | | ✓ | | ✓ | |
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Large List View | ✓ | | ✓ | | | |
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+