This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git
The following commit(s) were added to refs/heads/main by this push:
new 59cc1af Add extension type support and implement GuidArray (#268)
59cc1af is described below
commit 59cc1af4fcb5e34981ce847c327b3b74e1fd69a9
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Sun Feb 22 16:41:40 2026 -0800
Add extension type support and implement GuidArray (#268)
## What's Changed
Adds generic support for extension types including an extension type
registry. This has been done in what should be a fairly compatible
fashion. No extension types are enabled by default, and different
consumers of Arrow inside the same process can explicitly designate
which extension types they want to make use of.
Packages several extensibility points into a single `ArrowContext`: the
compression factory, the allocator and the extension registry.
Implements the `GuidArray` extension type.
Adds tests including interop tests with Python for `GuidArray`.
Partially implements #182.
---
.../Arrays/ArrowArrayBuilderFactory.cs | 1 +
src/Apache.Arrow/Arrays/ArrowArrayFactory.cs | 5 +
src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs | 9 +-
src/Apache.Arrow/Arrays/GuidArray.cs | 209 ++++++++++++
src/Apache.Arrow/ArrowContext.cs | 41 +++
src/Apache.Arrow/C/CArrowArrayExporter.cs | 3 +-
src/Apache.Arrow/C/CArrowArrayImporter.cs | 21 +-
src/Apache.Arrow/C/CArrowSchemaExporter.cs | 35 +-
src/Apache.Arrow/C/CArrowSchemaImporter.cs | 63 +++-
src/Apache.Arrow/ExtensionType.cs | 153 +++++++++
src/Apache.Arrow/ExtensionTypeRegistry.cs | 144 +++++++++
src/Apache.Arrow/Ipc/ArrowFileReader.cs | 5 +
.../Ipc/ArrowFileReaderImplementation.cs | 9 +-
src/Apache.Arrow/Ipc/ArrowFooter.cs | 6 +-
.../Ipc/ArrowMemoryReaderImplementation.cs | 10 +-
src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs | 16 +-
src/Apache.Arrow/Ipc/ArrowStreamReader.cs | 18 ++
.../Ipc/ArrowStreamReaderImplementation.cs | 12 +-
src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 43 ++-
src/Apache.Arrow/Ipc/MessageSerializer.cs | 40 ++-
src/Apache.Arrow/Types/ArrowTypeExtensions.cs | 25 ++
src/Apache.Arrow/Types/IArrowType.cs | 1 +
.../CDataInterfacePythonTests.cs | 110 +++++++
test/Apache.Arrow.Tests/ExtensionTypeTests.cs | 351 +++++++++++++++++++++
24 files changed, 1280 insertions(+), 50 deletions(-)
diff --git a/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
b/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
index 4c4586a..f0afb93 100644
--- a/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
+++ b/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
@@ -101,6 +101,7 @@ namespace Apache.Arrow
case ArrowTypeId.Union:
case ArrowTypeId.Dictionary:
case ArrowTypeId.FixedSizedBinary:
+ case ArrowTypeId.Extension:
default:
throw new NotSupportedException($"An ArrowArrayBuilder
cannot be built for type {dataType.TypeId}.");
}
diff --git a/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
b/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index 8aaba2d..2518a32 100644
--- a/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -107,6 +107,11 @@ namespace Apache.Arrow
return new FixedSizeListArray(data);
case ArrowTypeId.Interval:
return IntervalArray.Create(data);
+ case ArrowTypeId.Extension:
+ var extType = (ExtensionType)data.DataType;
+ var storageData = new ArrayData(extType.StorageType,
data.Length, data.NullCount, data.Offset, data.Buffers, data.Children,
data.Dictionary);
+ IArrowArray storageArray = BuildArray(storageData);
+ return extType.CreateArray(storageArray);
default:
throw new NotSupportedException($"An ArrowArray cannot be
built for type {data.DataType.TypeId}.");
}
diff --git a/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs
b/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs
index 3bb0652..2bc1965 100644
--- a/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs
+++ b/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs
@@ -185,7 +185,14 @@ namespace Apache.Arrow.Arrays
public TBuilder AppendNull()
{
- ValueBuffer.Append(new byte[ByteWidth]);
+ if (ByteWidth <= 128)
+ {
+ ValueBuffer.Append(stackalloc byte[ByteWidth]);
+ }
+ else
+ {
+ ValueBuffer.Append(new byte[ByteWidth]);
+ }
ValidityBuffer.Append(false);
return Instance;
}
diff --git a/src/Apache.Arrow/Arrays/GuidArray.cs
b/src/Apache.Arrow/Arrays/GuidArray.cs
new file mode 100644
index 0000000..0f4c3e1
--- /dev/null
+++ b/src/Apache.Arrow/Arrays/GuidArray.cs
@@ -0,0 +1,209 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Apache.Arrow.Arrays;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+ /// <summary>
+ /// Extension definition for the "arrow.uuid" extension type,
+ /// backed by FixedSizeBinary(16).
+ /// </summary>
+ public class GuidExtensionDefinition : ExtensionDefinition
+ {
+ public static GuidExtensionDefinition Instance = new
GuidExtensionDefinition();
+
+ public override string ExtensionName => "arrow.uuid";
+
+ private GuidExtensionDefinition() { }
+
+ public override bool TryCreateType(IArrowType storageType, string
metadata, out ExtensionType type)
+ {
+ if (storageType is FixedSizeBinaryType fsbType &&
fsbType.ByteWidth == GuidType.ByteWidth)
+ {
+ type = new GuidType(fsbType);
+ return true;
+ }
+ type = null;
+ return false;
+ }
+ }
+
+ /// <summary>
+ /// Extension type representing UUIDs/GUIDs, stored as FixedSizeBinary(16).
+ /// </summary>
+ public class GuidType : ExtensionType
+ {
+ public static GuidType Default = new GuidType();
+
+ internal const int ByteWidth = 16;
+
+ public override string Name => "arrow.uuid";
+ public override string ExtensionMetadata => "";
+
+ public GuidType() : base(new FixedSizeBinaryType(ByteWidth)) { }
+
+ internal GuidType(FixedSizeBinaryType storageType) : base(storageType)
{ }
+
+ public override ExtensionArray CreateArray(IArrowArray storageArray)
+ {
+ return new GuidArray(this, storageArray);
+ }
+ }
+
+ /// <summary>
+ /// Extension array for UUID/GUID values, backed by a FixedSizeBinaryArray.
+ /// </summary>
+ public class GuidArray : ExtensionArray, IReadOnlyList<Guid?>
+ {
+ public FixedSizeBinaryArray StorageArray =>
(FixedSizeBinaryArray)Storage;
+
+ public GuidArray(GuidType guidType, IArrowArray storage) :
base(guidType, storage) { }
+
+ public GuidArray(IArrowArray storage) : base(GuidType.Default,
storage) { }
+
+ public class Builder : FixedSizeBinaryArray.BuilderBase<GuidArray,
Builder>
+ {
+ public Builder() : base(GuidType.Default.StorageType,
GuidType.ByteWidth)
+ {
+ }
+
+ protected override GuidArray Build(ArrayData data)
+ {
+ return new GuidArray(GuidType.Default, new
FixedSizeBinaryArray(data));
+ }
+
+ public Builder Append(Guid value)
+ {
+ Span<byte> bytes = stackalloc byte[GuidType.ByteWidth];
+ GuidToRFC4122(value, bytes);
+ return Append(bytes);
+ }
+
+ public Builder AppendRange(IEnumerable<Guid> values)
+ {
+ if (values == null)
+ {
+ throw new ArgumentNullException(nameof(values));
+ }
+
+ foreach (Guid guid in values)
+ {
+ Append(guid);
+ }
+
+ return Instance;
+ }
+
+ public Builder Set(int index, Guid value)
+ {
+ Span<byte> bytes = stackalloc byte[GuidType.ByteWidth];
+ GuidToRFC4122(value, bytes);
+
+ return Set(index, bytes);
+ }
+ }
+
+ /// <summary>
+ /// Converts between <see cref="Guid"/> and the RFC 4122 big-endian
byte layout
+ /// required by the arrow.uuid canonical extension type specification.
+ /// </summary>
+ public static byte[] GuidToBytes(Guid guid)
+ {
+ byte[] bytes = new byte[GuidType.ByteWidth];
+ GuidToRFC4122(guid, bytes);
+ return bytes;
+ }
+
+ /// <summary>
+ /// Converts between <see cref="Guid"/> and the RFC 4122 big-endian
byte layout
+ /// required by the arrow.uuid canonical extension type specification.
+ /// </summary>
+ public static unsafe void GuidToRFC4122(Guid guid, Span<byte> bytes)
+ {
+ if (bytes.Length != GuidType.ByteWidth)
+ throw new ArgumentException("Byte span must be exactly 16
bytes long.", nameof(bytes));
+
+ byte* guidPtr = (byte*)&guid;
+ fixed (byte* bytePtr = bytes)
+ {
+ bytePtr[0] = guidPtr[3];
+ bytePtr[1] = guidPtr[2];
+ bytePtr[2] = guidPtr[1];
+ bytePtr[3] = guidPtr[0];
+ bytePtr[4] = guidPtr[5];
+ bytePtr[5] = guidPtr[4];
+ bytePtr[6] = guidPtr[7];
+ bytePtr[7] = guidPtr[6];
+ ((long*)bytePtr)[1] = ((long*)guidPtr)[1];
+ }
+ }
+
+ /// <summary>
+ /// Converts between <see cref="Guid"/> and the RFC 4122 big-endian
byte layout
+ /// required by the arrow.uuid canonical extension type specification.
+ /// </summary>
+ public static unsafe Guid RFC4122ToGuid(ReadOnlySpan<byte> bytes)
+ {
+ if (bytes.Length != GuidType.ByteWidth)
+ throw new ArgumentException("Byte span must be exactly 16
bytes long.", nameof(bytes));
+
+ Guid result = new Guid();
+ byte* guidPtr = (byte*)&result;
+ fixed (byte* bytePtr = bytes)
+ {
+ guidPtr[0] = bytePtr[3];
+ guidPtr[1] = bytePtr[2];
+ guidPtr[2] = bytePtr[1];
+ guidPtr[3] = bytePtr[0];
+ guidPtr[4] = bytePtr[5];
+ guidPtr[5] = bytePtr[4];
+ guidPtr[6] = bytePtr[7];
+ guidPtr[7] = bytePtr[6];
+ ((long*)guidPtr)[1] = ((long*)bytePtr)[1];
+ return result;
+ }
+ }
+
+ public Guid? GetGuid(int index)
+ {
+ if (index < 0 || index >= Length)
+ throw new ArgumentOutOfRangeException(nameof(index));
+
+ if (IsNull(index))
+ return null;
+
+ ReadOnlySpan<byte> bytes = StorageArray.GetBytes(index);
+ return RFC4122ToGuid(bytes);
+ }
+
+ public int Count => Length;
+ public Guid? this[int index] => GetGuid(index);
+
+ public IEnumerator<Guid?> GetEnumerator()
+ {
+ for (int i = 0; i < Length; i++)
+ {
+ yield return GetGuid(i);
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+ }
+}
diff --git a/src/Apache.Arrow/ArrowContext.cs b/src/Apache.Arrow/ArrowContext.cs
new file mode 100644
index 0000000..8835895
--- /dev/null
+++ b/src/Apache.Arrow/ArrowContext.cs
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Apache.Arrow.Ipc;
+using Apache.Arrow.Memory;
+
+namespace Apache.Arrow
+{
+ /// <summary>
+ /// Carries configuration through Arrow operations: memory allocator,
+ /// compression codec factory, and extension type registry.
+ /// </summary>
+ public class ArrowContext
+ {
+ public MemoryAllocator Allocator { get; }
+ public ICompressionCodecFactory CompressionCodecFactory { get; }
+ public ExtensionTypeRegistry ExtensionRegistry { get; }
+
+ public ArrowContext(
+ MemoryAllocator allocator = null,
+ ICompressionCodecFactory compressionCodecFactory = null,
+ ExtensionTypeRegistry extensionRegistry = null)
+ {
+ Allocator = allocator;
+ CompressionCodecFactory = compressionCodecFactory;
+ ExtensionRegistry = extensionRegistry ??
ExtensionTypeRegistry.Default.Clone();
+ }
+ }
+}
diff --git a/src/Apache.Arrow/C/CArrowArrayExporter.cs
b/src/Apache.Arrow/C/CArrowArrayExporter.cs
index 9e9baf3..0e140b5 100644
--- a/src/Apache.Arrow/C/CArrowArrayExporter.cs
+++ b/src/Apache.Arrow/C/CArrowArrayExporter.cs
@@ -130,7 +130,8 @@ namespace Apache.Arrow.C
{
long* lengths = null;
int bufferCount = array.Buffers.Length;
- if (array.DataType.TypeId == ArrowTypeId.BinaryView ||
array.DataType.TypeId == ArrowTypeId.StringView)
+ var storageTypeId =
Types.ArrowTypeExtensions.GetStorageType(array.DataType).TypeId;
+ if (storageTypeId == ArrowTypeId.BinaryView || storageTypeId
== ArrowTypeId.StringView)
{
lengths = (long*)sharedOwner.Allocate(8 * bufferCount); //
overallocation to avoid edge case
bufferCount++;
diff --git a/src/Apache.Arrow/C/CArrowArrayImporter.cs
b/src/Apache.Arrow/C/CArrowArrayImporter.cs
index 2dc4adb..ed2462b 100644
--- a/src/Apache.Arrow/C/CArrowArrayImporter.cs
+++ b/src/Apache.Arrow/C/CArrowArrayImporter.cs
@@ -152,7 +152,8 @@ namespace Apache.Arrow.C
ArrayData[] children = null;
ArrowBuffer[] buffers = null;
ArrayData dictionary = null;
- switch (type.TypeId)
+ IArrowType storageType =
Types.ArrowTypeExtensions.GetStorageType(type);
+ switch (storageType.TypeId)
{
case ArrowTypeId.String:
case ArrowTypeId.Binary:
@@ -167,27 +168,27 @@ namespace Apache.Arrow.C
buffers = ImportLargeByteArrayBuffers(cArray);
break;
case ArrowTypeId.List:
- children = ProcessListChildren(cArray,
((ListType)type).ValueDataType);
+ children = ProcessListChildren(cArray,
((ListType)storageType).ValueDataType);
buffers = ImportListBuffers(cArray);
break;
case ArrowTypeId.ListView:
- children = ProcessListChildren(cArray,
((ListViewType)type).ValueDataType);
+ children = ProcessListChildren(cArray,
((ListViewType)storageType).ValueDataType);
buffers = ImportListViewBuffers(cArray);
break;
case ArrowTypeId.LargeList:
- children = ProcessListChildren(cArray,
((LargeListType)type).ValueDataType);
+ children = ProcessListChildren(cArray,
((LargeListType)storageType).ValueDataType);
buffers = ImportLargeListBuffers(cArray);
break;
case ArrowTypeId.FixedSizeList:
- children = ProcessListChildren(cArray,
((FixedSizeListType)type).ValueDataType);
+ children = ProcessListChildren(cArray,
((FixedSizeListType)storageType).ValueDataType);
buffers = ImportFixedSizeListBuffers(cArray);
break;
case ArrowTypeId.Struct:
- children = ProcessStructChildren(cArray,
((StructType)type).Fields);
+ children = ProcessStructChildren(cArray,
((StructType)storageType).Fields);
buffers = new ArrowBuffer[] {
ImportValidityBuffer(cArray) };
break;
case ArrowTypeId.Union:
- UnionType unionType = (UnionType)type;
+ UnionType unionType = (UnionType)storageType;
children = ProcessStructChildren(cArray,
unionType.Fields);
buffers = unionType.Mode switch
{
@@ -197,7 +198,7 @@ namespace Apache.Arrow.C
}; ;
break;
case ArrowTypeId.Map:
- MapType mapType = (MapType)type;
+ MapType mapType = (MapType)storageType;
children = ProcessListChildren(cArray,
mapType.Fields[0].DataType);
buffers = ImportListBuffers(cArray);
break;
@@ -205,11 +206,11 @@ namespace Apache.Arrow.C
buffers = System.Array.Empty<ArrowBuffer>();
break;
case ArrowTypeId.Dictionary:
- DictionaryType dictionaryType = (DictionaryType)type;
+ DictionaryType dictionaryType =
(DictionaryType)storageType;
dictionary = GetAsArrayData(cArray->dictionary,
dictionaryType.ValueType);
goto default; // Fall through to get the validity and
index data
default:
- if (type is FixedWidthType fixedWidthType)
+ if (storageType is FixedWidthType fixedWidthType)
{
buffers = ImportFixedWidthBuffers(cArray,
fixedWidthType.BitWidth);
}
diff --git a/src/Apache.Arrow/C/CArrowSchemaExporter.cs
b/src/Apache.Arrow/C/CArrowSchemaExporter.cs
index 4258174..8865c18 100644
--- a/src/Apache.Arrow/C/CArrowSchemaExporter.cs
+++ b/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -62,12 +62,14 @@ namespace Apache.Arrow.C
schema->format = StringUtil.ToCStringUtf8(GetFormat(datatype));
schema->name = null;
schema->metadata = null;
- schema->flags = GetFlags(datatype);
- schema->children = ConstructChildren(datatype, out var
numChildren);
+ IArrowType physicalType = datatype is ExtensionType ext ?
ext.StorageType : datatype;
+ schema->flags = GetFlags(physicalType);
+
+ schema->children = ConstructChildren(physicalType, out var
numChildren);
schema->n_children = numChildren;
- schema->dictionary = ConstructDictionary(datatype);
+ schema->dictionary = ConstructDictionary(physicalType);
schema->release = ReleaseSchemaPtr;
@@ -91,8 +93,29 @@ namespace Apache.Arrow.C
{
ExportType(field.DataType, schema);
schema->name = StringUtil.ToCStringUtf8(field.Name);
- schema->metadata = ConstructMetadata(field.Metadata);
- schema->flags = GetFlags(field.DataType, field.IsNullable);
+
+ // Inject extension metadata if the field type is an ExtensionType
+ IReadOnlyDictionary<string, string> metadata = field.Metadata;
+ ExtensionType extType = field.DataType as ExtensionType;
+ if (extType == null && field.DataType is DictionaryType dt &&
dt.ValueType is ExtensionType dext)
+ extType = dext;
+
+ if (extType != null)
+ {
+ var merged = metadata != null
+ ? new Dictionary<string, string>((IDictionary<string,
string>)metadata)
+ : new Dictionary<string, string>();
+ if (!merged.ContainsKey("ARROW:extension:name"))
+ merged["ARROW:extension:name"] = extType.Name;
+ if (!merged.ContainsKey("ARROW:extension:metadata"))
+ merged["ARROW:extension:metadata"] =
extType.ExtensionMetadata ?? "";
+ metadata = merged;
+ }
+
+ schema->metadata = ConstructMetadata(metadata);
+
+ IArrowType physicalType = field.DataType is ExtensionType ext ?
ext.StorageType : field.DataType;
+ schema->flags = GetFlags(physicalType, field.IsNullable);
}
/// <summary>
@@ -145,6 +168,8 @@ namespace Apache.Arrow.C
{
switch (datatype)
{
+ case ExtensionType extensionType:
+ return GetFormat(extensionType.StorageType);
case NullType _: return "n";
case BooleanType _: return "b";
// Integers
diff --git a/src/Apache.Arrow/C/CArrowSchemaImporter.cs
b/src/Apache.Arrow/C/CArrowSchemaImporter.cs
index a772b8f..fd06793 100644
--- a/src/Apache.Arrow/C/CArrowSchemaImporter.cs
+++ b/src/Apache.Arrow/C/CArrowSchemaImporter.cs
@@ -76,6 +76,16 @@ namespace Apache.Arrow.C
return importedField.GetAsField();
}
+ /// <summary>
+ /// Import C pointer as a <see cref="Field"/>, resolving extension
types
+ /// from the given registry.
+ /// </summary>
+ public static unsafe Field ImportField(CArrowSchema* ptr,
ExtensionTypeRegistry extensionRegistry)
+ {
+ using var importedField = new ImportedArrowSchema(ptr);
+ return importedField.GetAsField(extensionRegistry);
+ }
+
/// <summary>
/// Import C pointer as a <see cref="Schema"/>.
/// </summary>
@@ -101,6 +111,16 @@ namespace Apache.Arrow.C
return importedSchema.GetAsSchema();
}
+ /// <summary>
+ /// Import C pointer as a <see cref="Schema"/>, resolving extension
types
+ /// from the given registry.
+ /// </summary>
+ public static unsafe Schema ImportSchema(CArrowSchema* ptr,
ExtensionTypeRegistry extensionRegistry)
+ {
+ using var importedSchema = new ImportedArrowSchema(ptr);
+ return importedSchema.GetAsSchema(extensionRegistry);
+ }
+
private sealed unsafe class ImportedArrowSchema : IDisposable
{
private readonly CArrowSchema* _cSchema;
@@ -331,20 +351,61 @@ namespace Apache.Arrow.C
}
public Field GetAsField()
+ {
+ return GetAsField(null);
+ }
+
+ public Field GetAsField(ExtensionTypeRegistry extensionRegistry)
{
string name = StringUtil.PtrToStringUtf8(_cSchema->name);
string fieldName = string.IsNullOrEmpty(name) ? "" : name;
bool nullable =
_cSchema->GetFlag(CArrowSchema.ArrowFlagNullable);
- return new Field(fieldName, GetAsType(), nullable,
GetMetadata(_cSchema->metadata));
+ IArrowType type = GetAsType();
+ IReadOnlyDictionary<string, string> metadata =
GetMetadata(_cSchema->metadata);
+
+ // Resolve extension type from metadata
+ if (metadata != null &&
+ extensionRegistry != null &&
+ metadata.TryGetValue("ARROW:extension:name", out string
extName) &&
+ extensionRegistry.TryGetDefinition(extName, out
ExtensionDefinition extDef))
+ {
+ metadata.TryGetValue("ARROW:extension:metadata", out
string extMeta);
+ if (extDef.TryCreateType(type, extMeta, out ExtensionType
extType))
+ {
+ type = extType;
+ }
+ }
+
+ return new Field(fieldName, type, nullable, metadata);
}
public Schema GetAsSchema()
+ {
+ return GetAsSchema(null);
+ }
+
+ public Schema GetAsSchema(ExtensionTypeRegistry extensionRegistry)
{
ArrowType fullType = GetAsType();
if (fullType is StructType structType)
{
+ // Re-resolve fields with extension registry
+ if (extensionRegistry != null)
+ {
+ var fields = new List<Field>();
+ for (int i = 0; i < _cSchema->n_children; i++)
+ {
+ if (_cSchema->GetChild(i) == null)
+ {
+ throw new InvalidDataException("Expected
struct type child to be non-null.");
+ }
+ var childSchema = new
ImportedArrowSchema(_cSchema->GetChild(i), isRoot: false);
+
fields.Add(childSchema.GetAsField(extensionRegistry));
+ }
+ return new Schema(fields,
GetMetadata(_cSchema->metadata));
+ }
return new Schema(structType.Fields,
GetMetadata(_cSchema->metadata));
}
else
diff --git a/src/Apache.Arrow/ExtensionType.cs
b/src/Apache.Arrow/ExtensionType.cs
new file mode 100644
index 0000000..3b0f6a5
--- /dev/null
+++ b/src/Apache.Arrow/ExtensionType.cs
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+ /// <summary>
+ /// Defines a factory for a particular extension type. Register instances
with
+ /// <see cref="ExtensionTypeRegistry"/> to enable automatic resolution
during deserialization.
+ /// </summary>
+ public abstract class ExtensionDefinition
+ {
+ /// <summary>
+ /// The canonical extension type name (e.g. "arrow.uuid").
+ /// </summary>
+ public abstract string ExtensionName { get; }
+
+ /// <summary>
+ /// Attempt to create an <see cref="ExtensionType"/> from a storage
type and serialized metadata.
+ /// </summary>
+ /// <param name="storageType">The underlying Arrow storage
type.</param>
+ /// <param name="metadata">The serialized extension metadata string,
or null.</param>
+ /// <param name="type">The created extension type, if
successful.</param>
+ /// <returns>True if the type was created successfully; false
otherwise.</returns>
+ public abstract bool TryCreateType(IArrowType storageType, string
metadata, out ExtensionType type);
+
+ /// <summary>
+ /// Adds this extension type to the default registry
+ /// </summary>
+ public void AddToDefaultRegistry()
+ {
+ ExtensionTypeRegistry.Default.Register(this);
+ }
+ }
+
+ /// <summary>
+ /// Base class for user-defined extension types. Extension types are
logical types
+ /// layered on top of a built-in Arrow storage type, identified by a name
string.
+ /// </summary>
+ public abstract class ExtensionType : IArrowType
+ {
+ public ArrowTypeId TypeId => ArrowTypeId.Extension;
+
+ /// <summary>
+ /// The canonical extension type name (e.g. "arrow.uuid").
+ /// </summary>
+ public abstract string Name { get; }
+
+ /// <summary>
+ /// Serialized extension metadata. May be null or empty.
+ /// </summary>
+ public abstract string ExtensionMetadata { get; }
+
+ /// <summary>
+ /// The underlying Arrow storage type.
+ /// </summary>
+ public IArrowType StorageType { get; }
+
+ public bool IsFixedWidth => StorageType.IsFixedWidth;
+
+ protected ExtensionType(IArrowType storageType)
+ {
+ StorageType = storageType ?? throw new
ArgumentNullException(nameof(storageType));
+ }
+
+ public void Accept(IArrowTypeVisitor visitor)
+ {
+ StorageType.Accept(visitor);
+ }
+
+ /// <summary>
+ /// Create the appropriate <see cref="ExtensionArray"/> wrapper for a
storage array.
+ /// </summary>
+ public abstract ExtensionArray CreateArray(IArrowArray storageArray);
+ }
+
+ /// <summary>
+ /// Base class for extension array wrappers. Delegates physical array
operations
+ /// to the underlying storage array.
+ /// </summary>
+ public class ExtensionArray : IArrowArray
+ {
+ public IArrowArray Storage { get; }
+ public ExtensionType ExtensionType { get; }
+
+ private ArrayData _data;
+
+ public ExtensionArray(ExtensionType extensionType, IArrowArray storage)
+ {
+ ExtensionType = extensionType ?? throw new
ArgumentNullException(nameof(extensionType));
+ Storage = storage ?? throw new
ArgumentNullException(nameof(storage));
+ }
+
+ public ExtensionArray(IArrowArray storage)
+ {
+ Storage = storage ?? throw new
ArgumentNullException(nameof(storage));
+ }
+
+ public int Length => Storage.Length;
+ public int Offset => Storage.Offset;
+ public int NullCount => Storage.NullCount;
+
+ public ArrayData Data
+ {
+ get
+ {
+ if (ExtensionType == null) return Storage.Data;
+ if (_data == null)
+ {
+ var sd = Storage.Data;
+ _data = new ArrayData(ExtensionType, sd.Length,
sd.NullCount, sd.Offset, sd.Buffers, sd.Children, sd.Dictionary);
+ }
+ return _data;
+ }
+ }
+
+ public bool IsNull(int index) => Storage.IsNull(index);
+ public bool IsValid(int index) => Storage.IsValid(index);
+
+ public void Accept(IArrowArrayVisitor visitor)
+ {
+ Storage.Accept(visitor);
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ Storage.Dispose();
+ }
+ }
+ }
+}
diff --git a/src/Apache.Arrow/ExtensionTypeRegistry.cs
b/src/Apache.Arrow/ExtensionTypeRegistry.cs
new file mode 100644
index 0000000..b9737e3
--- /dev/null
+++ b/src/Apache.Arrow/ExtensionTypeRegistry.cs
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+
+namespace Apache.Arrow
+{
+ /// <summary>
+ /// A registry mapping extension type names to their <see
cref="ExtensionDefinition"/> factories.
+ /// The <see cref="Default"/> registry starts empty; users must register
extension definitions
+ /// to enable automatic resolution during deserialization.
+ /// </summary>
+ public class ExtensionTypeRegistry
+ {
+ private static readonly ExtensionTypeRegistry s_default = new
ExtensionTypeRegistry();
+
+ /// <summary>
+ /// The process-wide default registry. Starts empty.
+ /// </summary>
+ public static ExtensionTypeRegistry Default => s_default;
+
+ private readonly Dictionary<string, ExtensionDefinition> _definitions;
+
+ public ExtensionTypeRegistry()
+ {
+ _definitions = new Dictionary<string, ExtensionDefinition>();
+ }
+
+ private ExtensionTypeRegistry(Dictionary<string, ExtensionDefinition>
definitions)
+ {
+ _definitions = new Dictionary<string,
ExtensionDefinition>(definitions);
+ }
+
+ /// <summary>
+ /// Register an extension definition. Overwrites any existing
definition with the same name.
+ /// </summary>
+ public void Register(ExtensionDefinition definition)
+ {
+ if (definition == null) throw new
ArgumentNullException(nameof(definition));
+ lock (_definitions)
+ {
+ _definitions[definition.ExtensionName] = definition;
+ }
+ }
+
+ /// <summary>
+ /// Temporarily register an extension definition. Overwrites any
existing definition with the same name.
+ /// Restores the original definition when the returned <see
cref="IDisposable"/> is disposed.
+ /// </summary>
+ public IDisposable RegisterTemporary(ExtensionDefinition definition)
+ {
+ if (definition == null) throw new
ArgumentNullException(nameof(definition));
+ lock (_definitions)
+ {
+ if (!_definitions.TryGetValue(definition.ExtensionName, out
ExtensionDefinition previousDefinition))
+ {
+ previousDefinition = null;
+ }
+ IDisposable scope = new Registration(this,
definition.ExtensionName, previousDefinition);
+ _definitions[definition.ExtensionName] = definition;
+ return scope;
+ }
+ }
+
+ /// <summary>
+ /// Unregisters an extension definition
+ /// </summary>
+ public void Unregister(ExtensionDefinition definition)
+ {
+ if (definition == null) throw new
ArgumentNullException(nameof(definition));
+ lock (_definitions)
+ {
+ _definitions.Remove(definition.ExtensionName);
+ }
+ }
+
+ /// <summary>
+ /// Try to get a registered extension definition by name.
+ /// </summary>
+ public bool TryGetDefinition(string extensionName, out
ExtensionDefinition definition)
+ {
+ lock (_definitions)
+ {
+ return _definitions.TryGetValue(extensionName, out definition);
+ }
+ }
+
+ /// <summary>
+ /// Create a snapshot copy of this registry.
+ /// </summary>
+ public ExtensionTypeRegistry Clone()
+ {
+ lock (_definitions)
+ {
+ return new ExtensionTypeRegistry(_definitions);
+ }
+ }
+
+ sealed class Registration : IDisposable
+ {
+ private readonly ExtensionTypeRegistry _registry;
+ private readonly string _extensionName;
+ private readonly ExtensionDefinition _previousDefinition;
+
+ public Registration(
+ ExtensionTypeRegistry registry,
+ string extensionName,
+ ExtensionDefinition previousDefinition)
+ {
+ _previousDefinition = previousDefinition;
+ _extensionName = extensionName;
+ _registry = registry;
+ }
+
+ public void Dispose()
+ {
+ lock (_registry._definitions)
+ {
+ if (_previousDefinition == null)
+ {
+ _registry._definitions.Remove(_extensionName);
+ }
+ else
+ {
+
_registry._definitions[_previousDefinition.ExtensionName] = _previousDefinition;
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/Apache.Arrow/Ipc/ArrowFileReader.cs
b/src/Apache.Arrow/Ipc/ArrowFileReader.cs
index 29677ed..f5414f1 100644
--- a/src/Apache.Arrow/Ipc/ArrowFileReader.cs
+++ b/src/Apache.Arrow/Ipc/ArrowFileReader.cs
@@ -65,6 +65,11 @@ namespace Apache.Arrow.Ipc
{
}
+ public ArrowFileReader(ArrowContext context, Stream stream, bool
leaveOpen = false)
+ : base(new ArrowFileReaderImplementation(stream,
context?.Allocator, context?.CompressionCodecFactory, leaveOpen,
context?.ExtensionRegistry))
+ {
+ }
+
public static ArrowFileReader FromFile(string filename)
{
var stream = new FileStream(filename, FileMode.Open,
FileAccess.Read);
diff --git a/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
b/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
index 7486a51..1498c85 100644
--- a/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
+++ b/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
@@ -38,7 +38,12 @@ namespace Apache.Arrow.Ipc
private bool HasReadDictionaries => HasReadSchema &&
DictionaryMemo.LoadedDictionaryCount >= _footer.DictionaryCount;
public ArrowFileReaderImplementation(Stream stream, MemoryAllocator
allocator, ICompressionCodecFactory compressionCodecFactory, bool leaveOpen)
- : base(stream, allocator, compressionCodecFactory, leaveOpen)
+ : this(stream, allocator, compressionCodecFactory, leaveOpen, null)
+ {
+ }
+
+ public ArrowFileReaderImplementation(Stream stream, MemoryAllocator
allocator, ICompressionCodecFactory compressionCodecFactory, bool leaveOpen,
ExtensionTypeRegistry extensionRegistry)
+ : base(stream, allocator, compressionCodecFactory, leaveOpen,
extensionRegistry)
{
}
@@ -139,7 +144,7 @@ namespace Apache.Arrow.Ipc
private void ReadSchema(Memory<byte> buffer)
{
// Deserialize the footer from the footer flatbuffer
- _footer = new
ArrowFooter(Flatbuf.Footer.GetRootAsFooter(CreateByteBuffer(buffer)), ref
_dictionaryMemo);
+ _footer = new
ArrowFooter(Flatbuf.Footer.GetRootAsFooter(CreateByteBuffer(buffer)), ref
_dictionaryMemo, _extensionRegistry);
_schema = _footer.Schema;
}
diff --git a/src/Apache.Arrow/Ipc/ArrowFooter.cs
b/src/Apache.Arrow/Ipc/ArrowFooter.cs
index 600624e..f879f6b 100644
--- a/src/Apache.Arrow/Ipc/ArrowFooter.cs
+++ b/src/Apache.Arrow/Ipc/ArrowFooter.cs
@@ -62,7 +62,11 @@ namespace Apache.Arrow.Ipc
}
public ArrowFooter(Flatbuf.Footer footer, ref DictionaryMemo
dictionaryMemo)
- :
this(Ipc.MessageSerializer.GetSchema(footer.Schema.GetValueOrDefault(), ref
dictionaryMemo), GetDictionaries(footer),
+ : this(footer, ref dictionaryMemo, null)
+ { }
+
+ public ArrowFooter(Flatbuf.Footer footer, ref DictionaryMemo
dictionaryMemo, ExtensionTypeRegistry extensionRegistry)
+ :
this(Ipc.MessageSerializer.GetSchema(footer.Schema.GetValueOrDefault(), ref
dictionaryMemo, extensionRegistry ?? ExtensionTypeRegistry.Default),
GetDictionaries(footer),
GetRecordBatches(footer))
{ }
diff --git a/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs
b/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs
index 5d619f3..e43d1d5 100644
--- a/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs
+++ b/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs
@@ -28,7 +28,13 @@ namespace Apache.Arrow.Ipc
private readonly ReadOnlyMemory<byte> _buffer;
private int _bufferPosition;
- public ArrowMemoryReaderImplementation(ReadOnlyMemory<byte> buffer,
ICompressionCodecFactory compressionCodecFactory) : base(null,
compressionCodecFactory)
+ public ArrowMemoryReaderImplementation(ReadOnlyMemory<byte> buffer,
ICompressionCodecFactory compressionCodecFactory)
+ : this(buffer, compressionCodecFactory, null)
+ {
+ }
+
+ public ArrowMemoryReaderImplementation(ReadOnlyMemory<byte> buffer,
ICompressionCodecFactory compressionCodecFactory, ExtensionTypeRegistry
extensionRegistry)
+ : base(null, compressionCodecFactory, extensionRegistry)
{
_buffer = buffer;
}
@@ -134,7 +140,7 @@ namespace Apache.Arrow.Ipc
}
ByteBuffer schemaBuffer =
CreateByteBuffer(_buffer.Slice(_bufferPosition));
- _schema =
MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemaBuffer), ref
_dictionaryMemo);
+ _schema =
MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemaBuffer), ref
_dictionaryMemo, _extensionRegistry);
_bufferPosition += schemaMessageLength;
}
}
diff --git a/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
b/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
index d5d3758..1ed8d8a 100644
--- a/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
+++ b/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
@@ -48,15 +48,21 @@ namespace Apache.Arrow.Ipc
private protected DictionaryMemo DictionaryMemo => _dictionaryMemo ??=
new DictionaryMemo();
private protected readonly MemoryAllocator _allocator;
private readonly ICompressionCodecFactory _compressionCodecFactory;
+ private protected readonly ExtensionTypeRegistry _extensionRegistry;
private protected Schema _schema;
private protected ArrowReaderImplementation() : this(null, null)
{ }
private protected ArrowReaderImplementation(MemoryAllocator allocator,
ICompressionCodecFactory compressionCodecFactory)
+ : this(allocator, compressionCodecFactory, null)
+ { }
+
+ private protected ArrowReaderImplementation(MemoryAllocator allocator,
ICompressionCodecFactory compressionCodecFactory, ExtensionTypeRegistry
extensionRegistry)
{
_allocator = allocator ?? MemoryAllocator.Default.Value;
_compressionCodecFactory = compressionCodecFactory;
+ _extensionRegistry = extensionRegistry ??
ExtensionTypeRegistry.Default;
}
public void Dispose()
@@ -265,7 +271,8 @@ namespace Apache.Arrow.Ipc
}
int buffers;
- switch (field.DataType.TypeId)
+ IArrowType storageType =
Types.ArrowTypeExtensions.GetStorageType(field.DataType);
+ switch (storageType.TypeId)
{
case ArrowTypeId.Null:
return new ArrayData(field.DataType, fieldLength,
fieldNullCount, 0, System.Array.Empty<ArrowBuffer>());
@@ -283,7 +290,7 @@ namespace Apache.Arrow.Ipc
}
recordBatchEnumerator.MoveNextBuffer();
}
- buffers = ((UnionType)field.DataType).Mode ==
Types.UnionMode.Dense ? 2 : 1;
+ buffers = ((UnionType)storageType).Mode ==
Types.UnionMode.Dense ? 2 : 1;
break;
case ArrowTypeId.Struct:
case ArrowTypeId.FixedSizeList:
@@ -316,7 +323,7 @@ namespace Apache.Arrow.Ipc
ArrayData[] children = GetChildren(version, ref
recordBatchEnumerator, field, bodyData, bufferCreator);
IArrowArray dictionary = null;
- if (field.DataType.TypeId == ArrowTypeId.Dictionary)
+ if (storageType.TypeId == ArrowTypeId.Dictionary)
{
long id = DictionaryMemo.GetId(field);
dictionary = DictionaryMemo.GetDictionary(id);
@@ -332,7 +339,8 @@ namespace Apache.Arrow.Ipc
ByteBuffer bodyData,
IBufferCreator bufferCreator)
{
- if (!(field.DataType is NestedType type)) return null;
+ IArrowType dataType =
Types.ArrowTypeExtensions.GetStorageType(field.DataType);
+ if (!(dataType is NestedType type)) return null;
int childrenCount = type.Fields.Count;
var children = new ArrayData[childrenCount];
diff --git a/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
b/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
index 540fb19..e5dade2 100644
--- a/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
+++ b/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
@@ -71,6 +71,16 @@ namespace Apache.Arrow.Ipc
_implementation = new ArrowStreamReaderImplementation(stream,
allocator, compressionCodecFactory, leaveOpen);
}
+ public ArrowStreamReader(ArrowContext context, Stream stream, bool
leaveOpen = false)
+ {
+ if (stream == null)
+ throw new ArgumentNullException(nameof(stream));
+ if (context == null)
+ throw new ArgumentNullException(nameof(context));
+
+ _implementation = new ArrowStreamReaderImplementation(stream,
context.Allocator, context.CompressionCodecFactory, leaveOpen,
context.ExtensionRegistry);
+ }
+
public ArrowStreamReader(ReadOnlyMemory<byte> buffer)
{
_implementation = new ArrowMemoryReaderImplementation(buffer,
compressionCodecFactory: null);
@@ -81,6 +91,14 @@ namespace Apache.Arrow.Ipc
_implementation = new ArrowMemoryReaderImplementation(buffer,
compressionCodecFactory);
}
+ public ArrowStreamReader(ArrowContext context, ReadOnlyMemory<byte>
buffer)
+ {
+ if (context == null)
+ throw new ArgumentNullException(nameof(context));
+
+ _implementation = new ArrowMemoryReaderImplementation(buffer,
context.CompressionCodecFactory, context.ExtensionRegistry);
+ }
+
private protected ArrowStreamReader(ArrowReaderImplementation
implementation)
{
_implementation = implementation;
diff --git a/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
b/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
index 0b9de1d..9d0cbe3 100644
--- a/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
+++ b/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
@@ -27,7 +27,13 @@ namespace Apache.Arrow.Ipc
public Stream BaseStream { get; }
private readonly bool _leaveOpen;
- public ArrowStreamReaderImplementation(Stream stream, MemoryAllocator
allocator, ICompressionCodecFactory compressionCodecFactory, bool leaveOpen) :
base(allocator, compressionCodecFactory)
+ public ArrowStreamReaderImplementation(Stream stream, MemoryAllocator
allocator, ICompressionCodecFactory compressionCodecFactory, bool leaveOpen)
+ : this(stream, allocator, compressionCodecFactory, leaveOpen, null)
+ {
+ }
+
+ public ArrowStreamReaderImplementation(Stream stream, MemoryAllocator
allocator, ICompressionCodecFactory compressionCodecFactory, bool leaveOpen,
ExtensionTypeRegistry extensionRegistry)
+ : base(allocator, compressionCodecFactory, extensionRegistry)
{
BaseStream = stream;
_leaveOpen = leaveOpen;
@@ -173,7 +179,7 @@ namespace Apache.Arrow.Ipc
EnsureFullRead(buff, bytesRead);
Google.FlatBuffers.ByteBuffer schemabb =
CreateByteBuffer(buff);
- _schema =
MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemabb), ref
_dictionaryMemo);
+ _schema =
MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemabb), ref
_dictionaryMemo, _extensionRegistry);
return _schema;
}
}
@@ -198,7 +204,7 @@ namespace Apache.Arrow.Ipc
EnsureFullRead(buff, bytesRead);
Google.FlatBuffers.ByteBuffer schemabb =
CreateByteBuffer(buff);
- _schema =
MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemabb), ref
_dictionaryMemo);
+ _schema =
MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemabb), ref
_dictionaryMemo, _extensionRegistry);
}
}
diff --git a/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
b/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 6c58c15..c5eed3b 100644
--- a/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -1091,9 +1091,11 @@ namespace Apache.Arrow.Ipc
private VectorOffset GetChildrenFieldOffset(Field field)
{
- IArrowType targetDataType = field.DataType is DictionaryType
dictionaryType ?
- dictionaryType.ValueType :
- field.DataType;
+ IArrowType targetDataType = field.DataType;
+ if (targetDataType is ExtensionType extType)
+ targetDataType = extType.StorageType;
+ if (targetDataType is DictionaryType dictionaryType)
+ targetDataType = dictionaryType.ValueType;
if (!(targetDataType is NestedType type))
{
@@ -1123,24 +1125,45 @@ namespace Apache.Arrow.Ipc
private VectorOffset GetFieldMetadataOffset(Field field)
{
- if (!field.HasMetadata)
+ IReadOnlyDictionary<string, string> metadata = field.Metadata;
+
+ // Inject extension type metadata if the field type is/wraps an
ExtensionType
+ ExtensionType extType = field.DataType as ExtensionType;
+ if (extType == null && field.DataType is DictionaryType dt &&
dt.ValueType is ExtensionType dext)
+ extType = dext;
+
+ if (extType != null)
+ {
+ // Merge extension metadata with existing field metadata
+ var merged = metadata != null
+ ? new Dictionary<string, string>((IDictionary<string,
string>)metadata)
+ : new Dictionary<string, string>();
+ if (!merged.ContainsKey("ARROW:extension:name"))
+ merged["ARROW:extension:name"] = extType.Name;
+ if (!merged.ContainsKey("ARROW:extension:metadata"))
+ merged["ARROW:extension:metadata"] =
extType.ExtensionMetadata ?? "";
+ metadata = merged;
+ }
+
+ if (metadata == null || metadata.Count == 0)
{
return default;
}
- Offset<Flatbuf.KeyValue>[] metadataOffsets =
GetMetadataOffsets(field.Metadata);
+ Offset<Flatbuf.KeyValue>[] metadataOffsets =
GetMetadataOffsets(metadata);
return Flatbuf.Field.CreateCustomMetadataVector(Builder,
metadataOffsets);
}
private Offset<Flatbuf.DictionaryEncoding> GetDictionaryOffset(Field
field)
{
- if (field.DataType.TypeId != ArrowTypeId.Dictionary)
+ IArrowType dataType = field.DataType is ExtensionType ext ?
ext.StorageType : field.DataType;
+ if (dataType.TypeId != ArrowTypeId.Dictionary)
{
return default;
}
long id = DictionaryMemo.GetOrAssignId(field);
- var dicType = field.DataType as DictionaryType;
+ var dicType = dataType as DictionaryType;
var indexType = dicType.IndexType as NumberType;
Offset<Flatbuf.Int> indexOffset = Flatbuf.Int.CreateInt(Builder,
indexType.BitWidth, indexType.IsSigned);
@@ -1368,7 +1391,8 @@ namespace Apache.Arrow.Ipc
private static void CollectDictionary(Field field, ArrayData
arrayData, ref DictionaryMemo dictionaryMemo)
{
- if (field.DataType is DictionaryType dictionaryType)
+ IArrowType fieldType = field.DataType is ExtensionType ext ?
ext.StorageType : field.DataType;
+ if (fieldType is DictionaryType dictionaryType)
{
if (arrayData.Dictionary == null)
{
@@ -1399,7 +1423,8 @@ namespace Apache.Arrow.Ipc
return;
}
- if (arrayData.DataType is NestedType nestedType)
+ IArrowType walkType = arrayData.DataType is ExtensionType ext ?
ext.StorageType : arrayData.DataType;
+ if (walkType is NestedType nestedType)
{
for (int i = 0; i < nestedType.Fields.Count; i++)
{
diff --git a/src/Apache.Arrow/Ipc/MessageSerializer.cs
b/src/Apache.Arrow/Ipc/MessageSerializer.cs
index 7c7f7a3..0984faf 100644
--- a/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -54,12 +54,17 @@ namespace Apache.Arrow.Ipc
}
internal static Schema GetSchema(Flatbuf.Schema schema, ref
DictionaryMemo dictionaryMemo)
+ {
+ return GetSchema(schema, ref dictionaryMemo,
ExtensionTypeRegistry.Default);
+ }
+
+ internal static Schema GetSchema(Flatbuf.Schema schema, ref
DictionaryMemo dictionaryMemo, ExtensionTypeRegistry extensionRegistry)
{
List<Field> fields = new List<Field>();
for (int i = 0; i < schema.FieldsLength; i++)
{
Flatbuf.Field field = schema.Fields(i).GetValueOrDefault();
- fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo));
+ fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo,
extensionRegistry));
}
Dictionary<string, string> metadata = schema.CustomMetadataLength
> 0 ? new Dictionary<string, string>() : null;
@@ -73,18 +78,39 @@ namespace Apache.Arrow.Ipc
return new Schema(fields, metadata, copyCollections: false);
}
- private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField,
ref DictionaryMemo dictionaryMemo)
+ private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField,
ref DictionaryMemo dictionaryMemo, ExtensionTypeRegistry extensionRegistry)
{
Field[] childFields = flatbufField.ChildrenLength > 0 ? new
Field[flatbufField.ChildrenLength] : null;
for (int i = 0; i < flatbufField.ChildrenLength; i++)
{
Flatbuf.Field? childFlatbufField = flatbufField.Children(i);
- childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value,
ref dictionaryMemo);
+ childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value,
ref dictionaryMemo, extensionRegistry);
}
Flatbuf.DictionaryEncoding? dictionaryEncoding =
flatbufField.Dictionary;
IArrowType type = GetFieldArrowType(flatbufField, childFields);
+ // Build metadata first (needed for extension type resolution)
+ Dictionary<string, string> metadata =
flatbufField.CustomMetadataLength > 0 ? new Dictionary<string, string>() : null;
+ for (int i = 0; i < flatbufField.CustomMetadataLength; i++)
+ {
+ Flatbuf.KeyValue keyValue =
flatbufField.CustomMetadata(i).GetValueOrDefault();
+ metadata[keyValue.Key] = keyValue.Value;
+ }
+
+ // Resolve extension type from metadata (before dictionary
wrapping)
+ if (metadata != null &&
+ metadata.TryGetValue("ARROW:extension:name", out string
extName) &&
+ extensionRegistry != null &&
+ extensionRegistry.TryGetDefinition(extName, out
ExtensionDefinition extDef))
+ {
+ metadata.TryGetValue("ARROW:extension:metadata", out string
extMeta);
+ if (extDef.TryCreateType(type, extMeta, out ExtensionType
extType))
+ {
+ type = extType;
+ }
+ }
+
if (dictionaryEncoding.HasValue)
{
Flatbuf.Int? indexTypeAsInt =
dictionaryEncoding.Value.IndexType;
@@ -95,14 +121,6 @@ namespace Apache.Arrow.Ipc
type = new DictionaryType(indexType, type,
dictionaryEncoding.Value.IsOrdered);
}
- Dictionary<string, string> metadata =
flatbufField.CustomMetadataLength > 0 ? new Dictionary<string, string>() : null;
- for (int i = 0; i < flatbufField.CustomMetadataLength; i++)
- {
- Flatbuf.KeyValue keyValue =
flatbufField.CustomMetadata(i).GetValueOrDefault();
-
- metadata[keyValue.Key] = keyValue.Value;
- }
-
var arrowField = new Field(flatbufField.Name, type,
flatbufField.Nullable, metadata, copyCollections: false);
if (dictionaryEncoding.HasValue)
diff --git a/src/Apache.Arrow/Types/ArrowTypeExtensions.cs
b/src/Apache.Arrow/Types/ArrowTypeExtensions.cs
new file mode 100644
index 0000000..3bd3700
--- /dev/null
+++ b/src/Apache.Arrow/Types/ArrowTypeExtensions.cs
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow.Types
+{
+ internal static class ArrowTypeExtensions
+ {
+ public static IArrowType GetStorageType(IArrowType type)
+ {
+ return type is ExtensionType ext ? ext.StorageType : type;
+ }
+ }
+}
diff --git a/src/Apache.Arrow/Types/IArrowType.cs
b/src/Apache.Arrow/Types/IArrowType.cs
index 657b234..bf16368 100644
--- a/src/Apache.Arrow/Types/IArrowType.cs
+++ b/src/Apache.Arrow/Types/IArrowType.cs
@@ -58,6 +58,7 @@ namespace Apache.Arrow.Types
LargeString,
Decimal32,
Decimal64,
+ Extension,
}
public interface IArrowType
diff --git a/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
b/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index e7cff35..ebd89cc 100644
--- a/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -20,6 +20,7 @@ using System.Linq;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
+using Apache.Arrow.Arrays;
using Apache.Arrow.C;
using Apache.Arrow.Ipc;
using Apache.Arrow.Scalars;
@@ -1091,6 +1092,115 @@ namespace Apache.Arrow.Tests
}
}
+ [SkippableFact]
+ public unsafe void ExportGuidArray()
+ {
+ // Export a C# GuidArray via the C Data Interface and verify
+ // that Python/pyarrow sees it as an arrow.uuid extension array
+ // with the correct UUID values.
+
+ var guid1 = Guid.Parse("01234567-89ab-cdef-0123-456789abcdef");
+ var guid2 = Guid.Parse("fedcba98-7654-3210-fedc-ba9876543210");
+ var guids = new Guid?[] { guid1, null, guid2 };
+
+ var builder = new GuidArray.Builder();
+ foreach (var value in guids)
+ {
+ if (value.HasValue)
+ {
+ builder.Append(value.Value);
+ }
+ else
+ {
+ builder.AppendNull();
+ }
+ }
+ var guidArray = builder.Build();
+
+ var field = new Field("uuids", GuidType.Default, true);
+ var schema = new Schema(new[] { field }, null);
+ var batch = new RecordBatch(schema, new[] { guidArray },
guids.Length);
+
+ CArrowArray* cArray = CArrowArray.Create();
+ CArrowArrayExporter.ExportRecordBatch(batch, cArray);
+
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ CArrowSchemaExporter.ExportSchema(batch.Schema, cSchema);
+
+ long arrayPtr = ((IntPtr)cArray).ToInt64();
+ long schemaPtr = ((IntPtr)cSchema).ToInt64();
+
+ using (Py.GIL())
+ {
+ dynamic pa = Py.Import("pyarrow");
+ dynamic uuid_mod = Py.Import("uuid");
+
+ dynamic pyBatch = pa.RecordBatch._import_from_c(arrayPtr,
schemaPtr);
+ dynamic pyArray = pyBatch.column(0);
+
+ // Build the expected UUID array in Python
+ dynamic expectedArray = pa.array(
+ new PyList(new PyObject[]
+ {
+
(PyObject)uuid_mod.UUID("01234567-89ab-cdef-0123-456789abcdef").bytes,
+ PyObject.None,
+
(PyObject)uuid_mod.UUID("fedcba98-7654-3210-fedc-ba9876543210").bytes,
+ }),
+ type: pa.uuid());
+
+ Assert.True((bool)pyArray.equals(expectedArray));
+ }
+
+ CArrowArray.Free(cArray);
+ CArrowSchema.Free(cSchema);
+ }
+
+ [SkippableFact]
+ public unsafe void ImportGuidArray()
+ {
+ // Create a UUID array in Python, export it via the C Data
Interface,
+ // and verify that C# resolves it as a GuidArray with correct
values.
+
+ CArrowArray* cArray = CArrowArray.Create();
+ CArrowSchema* cSchema = CArrowSchema.Create();
+
+ using (Py.GIL())
+ {
+ dynamic pa = Py.Import("pyarrow");
+ dynamic uuid_mod = Py.Import("uuid");
+
+ dynamic pyArray = pa.array(
+ new PyList(new PyObject[]
+ {
+
(PyObject)uuid_mod.UUID("01234567-89ab-cdef-0123-456789abcdef").bytes,
+ PyObject.None,
+
(PyObject)uuid_mod.UUID("fedcba98-7654-3210-fedc-ba9876543210").bytes,
+ }),
+ type: pa.uuid());
+
+ long arrayPtr = ((IntPtr)cArray).ToInt64();
+ long schemaPtr = ((IntPtr)cSchema).ToInt64();
+ pyArray._export_to_c(arrayPtr, schemaPtr);
+ }
+
+ var registry = new ExtensionTypeRegistry();
+ registry.Register(GuidExtensionDefinition.Instance);
+
+ Field field = CArrowSchemaImporter.ImportField(cSchema, registry);
+ Assert.IsType<GuidType>(field.DataType);
+
+ IArrowArray importedArray =
CArrowArrayImporter.ImportArray(cArray, field.DataType);
+ Assert.IsType<GuidArray>(importedArray);
+
+ var guidArray = (GuidArray)importedArray;
+ Assert.Equal(3, guidArray.Length);
+ Assert.Equal(Guid.Parse("01234567-89ab-cdef-0123-456789abcdef"),
guidArray.GetGuid(0));
+ Assert.Null(guidArray.GetGuid(1));
+ Assert.Equal(Guid.Parse("fedcba98-7654-3210-fedc-ba9876543210"),
guidArray.GetGuid(2));
+
+ CArrowArray.Free(cArray);
+ }
+
private static PyObject List(params int?[] values)
{
return new PyList(values.Select(i => i == null ? PyObject.None :
new PyInt(i.Value)).ToArray());
diff --git a/test/Apache.Arrow.Tests/ExtensionTypeTests.cs
b/test/Apache.Arrow.Tests/ExtensionTypeTests.cs
new file mode 100644
index 0000000..8f8dbd0
--- /dev/null
+++ b/test/Apache.Arrow.Tests/ExtensionTypeTests.cs
@@ -0,0 +1,351 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using Apache.Arrow.Arrays;
+using Apache.Arrow.Ipc;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+ public class ExtensionTypeTests
+ {
+ private static RecordBatch BuildGuidRecordBatch(Guid?[] values)
+ {
+ var builder = new GuidArray.Builder();
+
+ foreach (var value in values)
+ {
+ if (value.HasValue)
+ {
+ builder.Append(value.Value);
+ }
+ else
+ {
+ builder.AppendNull();
+ }
+ }
+ var guidArray = builder.Build();
+
+ var field = new Field("guids", GuidType.Default, true);
+ var schema = new Schema(new[] { field }, null);
+
+ return new RecordBatch(schema, new[] { guidArray }, values.Length);
+ }
+
+ [Fact]
+ public void ExtensionTypeProperties()
+ {
+ var guidType = new GuidType();
+ Assert.Equal(ArrowTypeId.Extension, guidType.TypeId);
+ Assert.Equal("arrow.uuid", guidType.Name);
+ Assert.Equal("", guidType.ExtensionMetadata);
+ Assert.True(guidType.IsFixedWidth);
+ Assert.IsType<FixedSizeBinaryType>(guidType.StorageType);
+ Assert.Equal(16,
((FixedSizeBinaryType)guidType.StorageType).ByteWidth);
+ }
+
+ [Fact]
+ public void GuidArrayReadValues()
+ {
+ var guids = new Guid?[]
+ {
+ Guid.Parse("01234567-89ab-cdef-0123-456789abcdef"),
+ null,
+ Guid.Parse("fedcba98-7654-3210-fedc-ba9876543210"),
+ };
+
+ var batch = BuildGuidRecordBatch(guids);
+ var array = (GuidArray)batch.Column(0);
+
+ Assert.Equal(3, array.Length);
+ Assert.Equal(guids[0], array.GetGuid(0));
+ Assert.Null(array.GetGuid(1));
+ Assert.True(array.IsNull(1));
+ Assert.Equal(guids[2], array.GetGuid(2));
+ }
+
+ [Fact]
+ public void GuidArraySlice()
+ {
+ var guids = new Guid?[]
+ {
+ Guid.NewGuid(),
+ Guid.NewGuid(),
+ Guid.NewGuid(),
+ Guid.NewGuid(),
+ };
+
+ var batch = BuildGuidRecordBatch(guids);
+ var array = (GuidArray)batch.Column(0);
+
+ var sliced = ArrowArrayFactory.Slice(array, 1, 2);
+ Assert.IsType<GuidArray>(sliced);
+ var slicedGuid = (GuidArray)sliced;
+ Assert.Equal(2, slicedGuid.Length);
+ Assert.Equal(guids[1], slicedGuid.GetGuid(0));
+ Assert.Equal(guids[2], slicedGuid.GetGuid(1));
+ }
+
+ [Fact]
+ public void IpcStreamRoundTrip()
+ {
+ var registry = new ExtensionTypeRegistry();
+ registry.Register(GuidExtensionDefinition.Instance);
+ var context = new ArrowContext(extensionRegistry: registry);
+
+ var guids = new Guid?[]
+ {
+ Guid.Parse("01234567-89ab-cdef-0123-456789abcdef"),
+ null,
+ Guid.Parse("fedcba98-7654-3210-fedc-ba9876543210"),
+ };
+
+ var batch = BuildGuidRecordBatch(guids);
+
+ // Write
+ var stream = new MemoryStream();
+ var writer = new ArrowStreamWriter(stream, batch.Schema);
+ writer.WriteRecordBatch(batch);
+ writer.WriteEnd();
+ stream.Position = 0;
+
+ // Read with extension registry
+ var reader = new ArrowStreamReader(context, stream);
+ var readBatch = reader.ReadNextRecordBatch();
+
+ Assert.NotNull(readBatch);
+ Assert.Equal(3, readBatch.Length);
+
+ var readArray = readBatch.Column(0);
+ Assert.IsType<GuidArray>(readArray);
+
+ var guidArray = (GuidArray)readArray;
+ Assert.Equal(guids[0], guidArray.GetGuid(0));
+ Assert.Null(guidArray.GetGuid(1));
+ Assert.Equal(guids[2], guidArray.GetGuid(2));
+ }
+
+ [Fact]
+ public void IpcStreamRoundTripWithoutRegistration()
+ {
+ // Without registering the extension type, the field should come
back
+ // as a FixedSizeBinaryArray (backwards compat)
+ var guids = new Guid?[]
+ {
+ Guid.Parse("01234567-89ab-cdef-0123-456789abcdef"),
+ null,
+ Guid.Parse("fedcba98-7654-3210-fedc-ba9876543210"),
+ };
+
+ var batch = BuildGuidRecordBatch(guids);
+
+ // Write
+ var stream = new MemoryStream();
+ var writer = new ArrowStreamWriter(stream, batch.Schema);
+ writer.WriteRecordBatch(batch);
+ writer.WriteEnd();
+ stream.Position = 0;
+
+ // Read without extension registry
+ var context = new ArrowContext(extensionRegistry: new
ExtensionTypeRegistry());
+ var reader = new ArrowStreamReader(context, stream);
+ var readBatch = reader.ReadNextRecordBatch();
+
+ Assert.NotNull(readBatch);
+ var readArray = readBatch.Column(0);
+ Assert.IsType<FixedSizeBinaryArray>(readArray);
+ Assert.IsNotType<GuidArray>(readArray);
+ }
+
+ [Fact]
+ public void ExtensionMetadataPreservedInRoundTrip()
+ {
+ var guids = new Guid?[] { Guid.NewGuid() };
+ var batch = BuildGuidRecordBatch(guids);
+
+ // Write
+ var stream = new MemoryStream();
+ var writer = new ArrowStreamWriter(stream, batch.Schema);
+ writer.WriteRecordBatch(batch);
+ writer.WriteEnd();
+ stream.Position = 0;
+
+ // Read without extension registry — metadata should still be on
the field
+ var context = new ArrowContext(extensionRegistry: new
ExtensionTypeRegistry());
+ var reader = new ArrowStreamReader(context, stream);
+ var readBatch = reader.ReadNextRecordBatch();
+
+ Assert.NotNull(readBatch);
+ var field = readBatch.Schema.GetFieldByIndex(0);
+ Assert.NotNull(field.Metadata);
+ Assert.True(field.Metadata.ContainsKey("ARROW:extension:name"));
+ Assert.Equal("arrow.uuid", field.Metadata["ARROW:extension:name"]);
+ }
+
+ [Fact]
+ public void ContextIsolation()
+ {
+ // Two contexts with different registries should resolve
differently
+ var registry1 = new ExtensionTypeRegistry();
+ registry1.Register(GuidExtensionDefinition.Instance);
+ var context1 = new ArrowContext(extensionRegistry: registry1);
+
+ var registry2 = new ExtensionTypeRegistry();
+ // registry2 has no Guid definition registered
+ var context2 = new ArrowContext(extensionRegistry: registry2);
+
+ var guids = new Guid?[] { Guid.NewGuid() };
+ var batch = BuildGuidRecordBatch(guids);
+
+ var stream1 = new MemoryStream();
+ var writer = new ArrowStreamWriter(stream1, batch.Schema);
+ writer.WriteRecordBatch(batch);
+ writer.WriteEnd();
+
+ // Read with context1 — should resolve as GuidArray
+ stream1.Position = 0;
+ var reader1 = new ArrowStreamReader(context1, stream1);
+ var readBatch1 = reader1.ReadNextRecordBatch();
+ Assert.IsType<GuidArray>(readBatch1.Column(0));
+
+ // Read with context2 — should resolve as FixedSizeBinaryArray
+ stream1.Position = 0;
+ var reader2 = new ArrowStreamReader(context2, stream1);
+ var readBatch2 = reader2.ReadNextRecordBatch();
+ Assert.IsType<FixedSizeBinaryArray>(readBatch2.Column(0));
+ }
+
+ [Fact]
+ public void ExtensionDefinitionRejectsWrongStorageType()
+ {
+ var def = GuidExtensionDefinition.Instance;
+
+ // Should fail for wrong byte width
+ Assert.False(def.TryCreateType(new FixedSizeBinaryType(8), "", out
_));
+
+ // Should fail for non-FixedSizeBinary type
+ Assert.False(def.TryCreateType(Int32Type.Default, "", out _));
+
+ // Should succeed for correct type
+ Assert.True(def.TryCreateType(new FixedSizeBinaryType(16), "", out
var extType));
+ Assert.IsType<GuidType>(extType);
+ }
+
+ [Fact]
+ public void ExtensionTypeRegistryClone()
+ {
+ var clone = ExtensionTypeRegistry.Default.Clone();
+ clone.Register(GuidExtensionDefinition.Instance);
+
+ Assert.True(clone.TryGetDefinition("arrow.uuid", out _));
+
+ // Mutating the clone should not have affected the default
+
Assert.False(ExtensionTypeRegistry.Default.TryGetDefinition("arrow.uuid", out
_));
+ }
+
+ [Fact]
+ public void ExtensionTypeRegistryScoped()
+ {
+ using
(ExtensionTypeRegistry.Default.RegisterTemporary(GuidExtensionDefinition.Instance))
+ {
+
Assert.True(ExtensionTypeRegistry.Default.TryGetDefinition("arrow.uuid", out
_));
+ }
+
Assert.False(ExtensionTypeRegistry.Default.TryGetDefinition("arrow.uuid", out
_));
+ }
+
+ [Fact]
+ public void ArrowArrayFactoryBuildExtension()
+ {
+ var guidType = new GuidType();
+ var guid = Guid.NewGuid();
+
+ var buffers = new[]
+ {
+ ArrowBuffer.Empty,
+ new
ArrowBuffer.Builder<byte>().Append(GuidArray.GuidToBytes(guid)).Build()
+ };
+ var data = new ArrayData(guidType, 1, 0, 0, buffers);
+
+ var array = ArrowArrayFactory.BuildArray(data);
+ Assert.IsType<GuidArray>(array);
+ var guidArray = (GuidArray)array;
+ Assert.Equal(guid, guidArray.GetGuid(0));
+ }
+
+ [Fact]
+ public unsafe void CDataSchemaRoundTrip()
+ {
+ var registry = new ExtensionTypeRegistry();
+ registry.Register(GuidExtensionDefinition.Instance);
+
+ var guidType = new GuidType();
+ var field = new Field("uuid_field", guidType, true);
+ var schema = new Schema(new[] { field }, null);
+
+ // Export
+ var cSchema = Apache.Arrow.C.CArrowSchema.Create();
+ try
+ {
+ Apache.Arrow.C.CArrowSchemaExporter.ExportSchema(schema,
cSchema);
+
+ // Import with registry
+ var importedSchema =
Apache.Arrow.C.CArrowSchemaImporter.ImportSchema(cSchema, registry);
+
+ Assert.Single(importedSchema.FieldsList);
+ Assert.Equal("uuid_field", importedSchema.FieldsList[0].Name);
+ Assert.IsType<GuidType>(importedSchema.FieldsList[0].DataType);
+ }
+ finally
+ {
+ Apache.Arrow.C.CArrowSchema.Free(cSchema);
+ }
+ }
+
+ [Fact]
+ public unsafe void CDataSchemaRoundTripWithoutRegistry()
+ {
+ var guidType = new GuidType();
+ var field = new Field("uuid_field", guidType, true,
+ new Dictionary<string, string>
+ {
+ ["ARROW:extension:name"] = "arrow.uuid",
+ ["ARROW:extension:metadata"] = ""
+ });
+ var schema = new Schema(new[] { field }, null);
+
+ // Export
+ var cSchema = Apache.Arrow.C.CArrowSchema.Create();
+ try
+ {
+ Apache.Arrow.C.CArrowSchemaExporter.ExportSchema(schema,
cSchema);
+
+ // Import without registry — should fall back to storage type
+ var importedSchema =
Apache.Arrow.C.CArrowSchemaImporter.ImportSchema(cSchema);
+
+ Assert.Single(importedSchema.FieldsList);
+
Assert.IsType<FixedSizeBinaryType>(importedSchema.FieldsList[0].DataType);
+ Assert.Equal(16,
((FixedSizeBinaryType)importedSchema.FieldsList[0].DataType).ByteWidth);
+ }
+ finally
+ {
+ Apache.Arrow.C.CArrowSchema.Free(cSchema);
+ }
+ }
+ }
+}