This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 20e120b2a7 GH-38061: [C#] Implement Duration support (#38062)
20e120b2a7 is described below
commit 20e120b2a71e7da7ecc5ed9b32c7b5ea9558fa00
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Mon Oct 16 09:24:23 2023 -0700
GH-38061: [C#] Implement Duration support (#38062)
### What changes are included in this PR?
Complete support for the Duration array type in the C# implementation.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
The Duration array type is now supported in the C# library. This also
does some slight refactoring of classes which could impact edge cases of
user scenarios.
* Closes: #38061
---
.../Apache.Arrow/Arrays/ArrayDataTypeComparer.cs | 18 +--
.../Arrays/ArrowArrayBuilderFactory.cs | 2 +
.../src/Apache.Arrow/Arrays/ArrowArrayFactory.cs | 2 +
csharp/src/Apache.Arrow/Arrays/DurationArray.cs | 84 +++++++++++++
csharp/src/Apache.Arrow/Arrays/Time64Array.cs | 19 +--
csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs | 3 +
csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs | 5 +-
csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 2 +
.../Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs | 8 ++
csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs | 3 +
csharp/src/Apache.Arrow/RecordBatch.Builder.cs | 2 +
.../Types/{TimestampType.cs => DurationType.cs} | 40 +++---
csharp/src/Apache.Arrow/Types/IArrowType.cs | 1 +
.../Types/{IArrowType.cs => TimeBasedType.cs} | 49 +-------
csharp/src/Apache.Arrow/Types/TimeType.cs | 15 +--
csharp/src/Apache.Arrow/Types/TimeUnit.cs | 58 +++++++++
csharp/src/Apache.Arrow/Types/TimestampType.cs | 7 +-
.../test/Apache.Arrow.IntegrationTest/JsonFile.cs | 15 +++
.../test/Apache.Arrow.Tests/ArrayTypeComparer.cs | 18 +--
.../ArrowArrayConcatenatorTests.cs | 28 +++++
csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs | 2 +
.../test/Apache.Arrow.Tests/ArrowReaderVerifier.cs | 2 +
.../CDataInterfacePythonTests.cs | 16 ++-
.../test/Apache.Arrow.Tests/DurationArrayTests.cs | 134 +++++++++++++++++++++
csharp/test/Apache.Arrow.Tests/TestData.cs | 13 ++
dev/archery/archery/integration/datagen.py | 3 +-
docs/source/status.rst | 2 +-
27 files changed, 417 insertions(+), 134 deletions(-)
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
index 1698e0672f..1abaa7f043 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
@@ -13,7 +13,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-using System;
using Apache.Arrow.Types;
namespace Apache.Arrow
@@ -22,8 +21,7 @@ namespace Apache.Arrow
IArrowTypeVisitor<TimestampType>,
IArrowTypeVisitor<Date32Type>,
IArrowTypeVisitor<Date64Type>,
- IArrowTypeVisitor<Time32Type>,
- IArrowTypeVisitor<Time64Type>,
+ IArrowTypeVisitor<TimeBasedType>,
IArrowTypeVisitor<FixedSizeBinaryType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
@@ -69,18 +67,10 @@ namespace Apache.Arrow
}
}
- public void Visit(Time32Type actualType)
+ public void Visit(TimeBasedType actualType)
{
- if (_expectedType is Time32Type expectedType
- && expectedType.Unit == actualType.Unit)
- {
- _dataTypeMatch = true;
- }
- }
-
- public void Visit(Time64Type actualType)
- {
- if (_expectedType is Time64Type expectedType
+ if (_expectedType.TypeId == actualType.TypeId
+ && _expectedType is TimeBasedType expectedType
&& expectedType.Unit == actualType.Unit)
{
_dataTypeMatch = true;
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
index 1b972d0187..af5a524798 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
@@ -66,6 +66,8 @@ namespace Apache.Arrow
return new Time32Array.Builder(dataType as Time32Type);
case ArrowTypeId.Time64:
return new Time64Array.Builder(dataType as Time64Type);
+ case ArrowTypeId.Duration:
+ return new DurationArray.Builder(dataType as DurationType);
case ArrowTypeId.List:
return new ListArray.Builder(dataType as ListType);
case ArrowTypeId.FixedSizeList:
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index d3b7d65185..0520513334 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -73,6 +73,8 @@ namespace Apache.Arrow
return new Time32Array(data);
case ArrowTypeId.Time64:
return new Time64Array(data);
+ case ArrowTypeId.Duration:
+ return new DurationArray(data);
case ArrowTypeId.Decimal128:
return new Decimal128Array(data);
case ArrowTypeId.Decimal256:
diff --git a/csharp/src/Apache.Arrow/Arrays/DurationArray.cs
b/csharp/src/Apache.Arrow/Arrays/DurationArray.cs
new file mode 100644
index 0000000000..3649dda50c
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/DurationArray.cs
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+ public class DurationArray : PrimitiveArray<long>
+ {
+ public class Builder : PrimitiveArrayBuilder<long, DurationArray,
Builder>
+ {
+ public DurationType DataType { get; }
+
+ public Builder(DurationType dataType)
+ {
+ DataType = dataType;
+ }
+
+ protected override DurationArray Build(
+ ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer,
+ int length, int nullCount, int offset) =>
+ new DurationArray(DataType, valueBuffer, nullBitmapBuffer,
length, nullCount, offset);
+
+ /// <summary>
+ /// Append a duration in the form of a <see cref="TimeSpan"/>
object to the array.
+ /// </summary>
+ /// <param name="value">TimeSpan to add.</param>
+ /// <returns>Returns the builder (for fluent-style
composition).</returns>
+ public Builder Append(TimeSpan value)
+ {
+ Append(DataType.Unit.ConvertFromTicks(value.Ticks));
+ return this;
+ }
+
+ /// <summary>
+ /// Append a duration in the form of a <see cref="TimeSpan"/>
object to the array.
+ /// </summary>
+ /// <param name="value">TimeSpan to add.</param>
+ /// <returns>Returns the builder (for fluent-style
composition).</returns>
+ public Builder Append(TimeSpan? value) =>
+ (value == null) ? AppendNull() : Append(value.Value);
+ }
+
+ public DurationArray(
+ DurationType type,
+ ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer,
+ int length, int nullCount, int offset)
+ : this(new ArrayData(type, length, nullCount, offset,
+ new[] { nullBitmapBuffer, valueBuffer }))
+ { }
+
+ public DurationArray(ArrayData data)
+ : base(data)
+ {
+ data.EnsureDataType(ArrowTypeId.Duration);
+ }
+
+ public DurationType DataType => (DurationType)this.Data.DataType;
+
+ public TimeSpan? GetTimeSpan(int index)
+ {
+ if (index < 0 || index >= Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ return IsValid(index) ? new
TimeSpan(DataType.Unit.ConvertToTicks(Values[index])) : null;
+ }
+
+ public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+ }
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/Time64Array.cs
b/csharp/src/Apache.Arrow/Arrays/Time64Array.cs
index 9fc2ae4be1..3369893304 100644
--- a/csharp/src/Apache.Arrow/Arrays/Time64Array.cs
+++ b/csharp/src/Apache.Arrow/Arrays/Time64Array.cs
@@ -25,9 +25,6 @@ namespace Apache.Arrow
/// </summary>
public class Time64Array : PrimitiveArray<long>
{
- private const long TicksPerMicrosecond = 10;
- private const long NanosecondsPerTick = 100;
-
/// <summary>
/// The <see cref="Builder"/> class can be used to fluently build <see
cref="Time64Array"/> objects.
/// </summary>
@@ -62,13 +59,7 @@ namespace Apache.Arrow
#if NET6_0_OR_GREATER
protected override long Convert(TimeOnly time)
{
- var unit = ((TimeBuilder)InnerBuilder).DataType.Unit;
- return unit switch
- {
- TimeUnit.Microsecond => (long)(time.Ticks /
TicksPerMicrosecond),
- TimeUnit.Nanosecond => (long)(time.Ticks *
NanosecondsPerTick),
- _ => throw new InvalidDataException($"Unsupported time
unit for Time32Type: {unit}")
- };
+ return
((TimeBuilder)InnerBuilder).DataType.Unit.ConvertFromTicks(time.Ticks);
}
#endif
}
@@ -153,13 +144,7 @@ namespace Apache.Arrow
return null;
}
- var unit = ((Time64Type)Data.DataType).Unit;
- return unit switch
- {
- TimeUnit.Microsecond => new TimeOnly(value.Value *
TicksPerMicrosecond),
- TimeUnit.Nanosecond => new TimeOnly(value.Value /
NanosecondsPerTick),
- _ => throw new InvalidDataException($"Unsupported time unit
for Time64Type: {unit}")
- };
+ return new
TimeOnly(((Time64Type)Data.DataType).Unit.ConvertToTicks(value.Value));
}
#endif
}
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
index f347990388..d805e9afc4 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -179,6 +179,9 @@ namespace Apache.Arrow.C
case Time64Type timeType:
// Same prefix as Time32, but allowed time units are
different.
return String.Format("tt{0}",
FormatTimeUnit(timeType.Unit));
+ // Duration
+ case DurationType durationType:
+ return String.Format("tD{0}",
FormatTimeUnit(durationType.Unit));
// Timestamp
case TimestampType timestampType:
return String.Format("ts{0}:{1}",
FormatTimeUnit(timestampType.Unit), timestampType.Timezone);
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
index f2a08f5e4a..12545c9831 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
@@ -313,7 +313,10 @@ namespace Apache.Arrow.C
"ttm" => TimeType.Millisecond,
"ttu" => TimeType.Microsecond,
"ttn" => TimeType.Nanosecond,
- // TODO: duration not yet implemented
+ "tDs" => DurationType.Second,
+ "tDm" => DurationType.Millisecond,
+ "tDu" => DurationType.Microsecond,
+ "tDn" => DurationType.Nanosecond,
"tiM" => IntervalType.YearMonth,
"tiD" => IntervalType.DayTime,
//"tin" => IntervalType.MonthDayNanosecond, // Not yet
implemented
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 2b3815af71..dcb8852bc1 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -49,6 +49,7 @@ namespace Apache.Arrow.Ipc
IArrowArrayVisitor<Date64Array>,
IArrowArrayVisitor<Time32Array>,
IArrowArrayVisitor<Time64Array>,
+ IArrowArrayVisitor<DurationArray>,
IArrowArrayVisitor<ListArray>,
IArrowArrayVisitor<FixedSizeListArray>,
IArrowArrayVisitor<StringArray>,
@@ -104,6 +105,7 @@ namespace Apache.Arrow.Ipc
public void Visit(Date64Array array) => CreateBuffers(array);
public void Visit(Time32Array array) => CreateBuffers(array);
public void Visit(Time64Array array) => CreateBuffers(array);
+ public void Visit(DurationArray array) => CreateBuffers(array);
public void Visit(ListArray array)
{
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
index 425d9326ad..1397eb3e00 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
@@ -57,6 +57,7 @@ namespace Apache.Arrow.Ipc
IArrowTypeVisitor<Date64Type>,
IArrowTypeVisitor<Time32Type>,
IArrowTypeVisitor<Time64Type>,
+ IArrowTypeVisitor<DurationType>,
IArrowTypeVisitor<BinaryType>,
IArrowTypeVisitor<TimestampType>,
IArrowTypeVisitor<ListType>,
@@ -188,6 +189,13 @@ namespace Apache.Arrow.Ipc
Flatbuf.Time.CreateTime(Builder, ToFlatBuffer(type.Unit),
64));
}
+ public void Visit(DurationType type)
+ {
+ Result = FieldType.Build(
+ Flatbuf.Type.Duration,
+ Flatbuf.Duration.CreateDuration(Builder,
ToFlatBuffer(type.Unit)));
+ }
+
public void Visit(StructType type)
{
Flatbuf.Struct_.StartStruct_(Builder);
diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
index 9847c376cf..3f504cf3b9 100644
--- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -177,6 +177,9 @@ namespace Apache.Arrow.Ipc
Types.TimeUnit unit = timestampTypeMetadata.Unit.ToArrow();
string timezone = timestampTypeMetadata.Timezone;
return new Types.TimestampType(unit, timezone);
+ case Flatbuf.Type.Duration:
+ Flatbuf.Duration durationMeta =
field.Type<Flatbuf.Duration>().Value;
+ return
DurationType.FromTimeUnit(durationMeta.Unit.ToArrow());
case Flatbuf.Type.Interval:
Flatbuf.Interval intervalMetadata =
field.Type<Flatbuf.Interval>().Value;
return
Types.IntervalType.FromIntervalUnit(intervalMetadata.Unit.ToArrow());
diff --git a/csharp/src/Apache.Arrow/RecordBatch.Builder.cs
b/csharp/src/Apache.Arrow/RecordBatch.Builder.cs
index b5d5ec9ea0..8e0d17ae06 100644
--- a/csharp/src/Apache.Arrow/RecordBatch.Builder.cs
+++ b/csharp/src/Apache.Arrow/RecordBatch.Builder.cs
@@ -63,6 +63,8 @@ namespace Apache.Arrow
public Time64Array Time64(Time64Type type,
Action<Time64Array.Builder> action) =>
Build<Time64Array, Time64Array.Builder>(
new Time64Array.Builder(type), action);
+ public DurationArray Duration(DurationType type,
Action<DurationArray.Builder> action) =>
+ Build<DurationArray, DurationArray.Builder>(new
DurationArray.Builder(type), action);
public BinaryArray Binary(Action<BinaryArray.Builder> action) =>
Build<BinaryArray, BinaryArray.Builder>(new BinaryArray.Builder(), action);
public StringArray String(Action<StringArray.Builder> action) =>
Build<StringArray, StringArray.Builder>(new StringArray.Builder(), action);
public TimestampArray Timestamp(Action<TimestampArray.Builder>
action) => Build<TimestampArray, TimestampArray.Builder>(new
TimestampArray.Builder(), action);
diff --git a/csharp/src/Apache.Arrow/Types/TimestampType.cs
b/csharp/src/Apache.Arrow/Types/DurationType.cs
similarity index 54%
copy from csharp/src/Apache.Arrow/Types/TimestampType.cs
copy to csharp/src/Apache.Arrow/Types/DurationType.cs
index 66b6ca6097..7e937a6e72 100644
--- a/csharp/src/Apache.Arrow/Types/TimestampType.cs
+++ b/csharp/src/Apache.Arrow/Types/DurationType.cs
@@ -13,38 +13,28 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-
-using System;
-
namespace Apache.Arrow.Types
{
- public sealed class TimestampType : FixedWidthType
+ public sealed class DurationType : TimeBasedType
{
- public static readonly TimestampType Default = new
TimestampType(TimeUnit.Millisecond, "+00:00");
-
- public override ArrowTypeId TypeId => ArrowTypeId.Timestamp;
- public override string Name => "timestamp";
- public override int BitWidth => 64;
-
- public TimeUnit Unit { get; }
- public string Timezone { get; }
-
- public bool IsTimeZoneAware => !string.IsNullOrWhiteSpace(Timezone);
-
- public TimestampType(
- TimeUnit unit = TimeUnit.Millisecond,
- string timezone = default)
+ public static readonly DurationType Second = new
DurationType(TimeUnit.Second);
+ public static readonly DurationType Millisecond = new
DurationType(TimeUnit.Millisecond);
+ public static readonly DurationType Microsecond = new
DurationType(TimeUnit.Microsecond);
+ public static readonly DurationType Nanosecond = new
DurationType(TimeUnit.Nanosecond);
+ private static readonly DurationType[] _types = new DurationType[] {
Second, Millisecond, Microsecond, Nanosecond };
+
+ private DurationType(TimeUnit unit)
+ : base(unit)
{
- Unit = unit;
- Timezone = timezone;
}
- public TimestampType(
- TimeUnit unit = TimeUnit.Millisecond,
- TimeZoneInfo timezone = default)
+ public override ArrowTypeId TypeId => ArrowTypeId.Duration;
+ public override string Name => "duration";
+ public override int BitWidth => 64;
+
+ public static DurationType FromTimeUnit(TimeUnit unit)
{
- Unit = unit;
- Timezone = timezone?.BaseUtcOffset.ToTimeZoneOffsetString();
+ return _types[(int)unit];
}
public override void Accept(IArrowTypeVisitor visitor) => Accept(this,
visitor);
diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs
b/csharp/src/Apache.Arrow/Types/IArrowType.cs
index 447db47329..cdf423e56f 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/IArrowType.cs
@@ -48,6 +48,7 @@ namespace Apache.Arrow.Types
Dictionary,
Map,
FixedSizeList,
+ Duration,
}
public interface IArrowType
diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs
b/csharp/src/Apache.Arrow/Types/TimeBasedType.cs
similarity index 53%
copy from csharp/src/Apache.Arrow/Types/IArrowType.cs
copy to csharp/src/Apache.Arrow/Types/TimeBasedType.cs
index 447db47329..ffcd3f5a7a 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/TimeBasedType.cs
@@ -13,52 +13,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-
namespace Apache.Arrow.Types
{
- public enum ArrowTypeId
+ public abstract class TimeBasedType : FixedWidthType
{
- Null,
- Boolean,
- UInt8,
- Int8,
- UInt16,
- Int16,
- UInt32,
- Int32,
- UInt64,
- Int64,
- HalfFloat,
- Float,
- Double,
- String,
- Binary,
- FixedSizedBinary,
- Date32,
- Date64,
- Timestamp,
- Time32,
- Time64,
- Interval,
- Decimal128,
- Decimal256,
- List,
- Struct,
- Union,
- Dictionary,
- Map,
- FixedSizeList,
- }
-
- public interface IArrowType
- {
- ArrowTypeId TypeId { get; }
-
- string Name { get; }
-
- void Accept(IArrowTypeVisitor visitor);
+ public TimeUnit Unit { get; }
- bool IsFixedWidth { get; }
-
+ protected TimeBasedType(TimeUnit unit)
+ {
+ Unit = unit;
+ }
}
}
diff --git a/csharp/src/Apache.Arrow/Types/TimeType.cs
b/csharp/src/Apache.Arrow/Types/TimeType.cs
index 48c7fdb5f1..b317df265d 100644
--- a/csharp/src/Apache.Arrow/Types/TimeType.cs
+++ b/csharp/src/Apache.Arrow/Types/TimeType.cs
@@ -13,18 +13,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-
namespace Apache.Arrow.Types
{
- public enum TimeUnit
- {
- Second,
- Millisecond,
- Microsecond,
- Nanosecond
- }
-
- public abstract class TimeType: FixedWidthType
+ public abstract class TimeType : TimeBasedType
{
public static readonly Time32Type Second = new
Time32Type(TimeUnit.Second);
public static readonly Time32Type Millisecond = new
Time32Type(TimeUnit.Millisecond);
@@ -32,11 +23,9 @@ namespace Apache.Arrow.Types
public static readonly Time64Type Nanosecond = new
Time64Type(TimeUnit.Nanosecond);
private static readonly TimeType[] _types = new TimeType[] { Second,
Millisecond, Microsecond, Nanosecond };
- public TimeUnit Unit { get; }
-
protected TimeType(TimeUnit unit)
+ : base(unit)
{
- Unit = unit;
}
public static TimeType FromTimeUnit(TimeUnit unit)
diff --git a/csharp/src/Apache.Arrow/Types/TimeUnit.cs
b/csharp/src/Apache.Arrow/Types/TimeUnit.cs
new file mode 100644
index 0000000000..ba60fa9bb8
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/TimeUnit.cs
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System.IO;
+using System;
+
+namespace Apache.Arrow.Types
+{
+ public enum TimeUnit
+ {
+ Second,
+ Millisecond,
+ Microsecond,
+ Nanosecond
+ }
+
+ internal static class TimeUnitExtensions
+ {
+ private const long TicksPerMicrosecond = 10;
+ private const long NanosecondsPerTick = 100;
+
+ public static long ConvertFromTicks(this TimeUnit unit, long ticks)
+ {
+ return unit switch
+ {
+ TimeUnit.Second => ticks / TimeSpan.TicksPerSecond,
+ TimeUnit.Millisecond => ticks / TimeSpan.TicksPerMillisecond,
+ TimeUnit.Microsecond => ticks / TicksPerMicrosecond,
+ TimeUnit.Nanosecond => ticks * NanosecondsPerTick,
+ _ => throw new InvalidDataException($"Unsupported time unit:
{unit}")
+ };
+ }
+
+ public static long ConvertToTicks(this TimeUnit unit, long units)
+ {
+ return unit switch
+ {
+ TimeUnit.Second => units * TimeSpan.TicksPerSecond,
+ TimeUnit.Millisecond => units * TimeSpan.TicksPerMillisecond,
+ TimeUnit.Microsecond => units * TicksPerMicrosecond,
+ TimeUnit.Nanosecond => units / NanosecondsPerTick,
+ _ => throw new InvalidDataException($"Unsupported time unit:
{unit}")
+ };
+ }
+ }
+}
diff --git a/csharp/src/Apache.Arrow/Types/TimestampType.cs
b/csharp/src/Apache.Arrow/Types/TimestampType.cs
index 66b6ca6097..565eb62bb7 100644
--- a/csharp/src/Apache.Arrow/Types/TimestampType.cs
+++ b/csharp/src/Apache.Arrow/Types/TimestampType.cs
@@ -18,7 +18,7 @@ using System;
namespace Apache.Arrow.Types
{
- public sealed class TimestampType : FixedWidthType
+ public sealed class TimestampType : TimeBasedType
{
public static readonly TimestampType Default = new
TimestampType(TimeUnit.Millisecond, "+00:00");
@@ -26,7 +26,6 @@ namespace Apache.Arrow.Types
public override string Name => "timestamp";
public override int BitWidth => 64;
- public TimeUnit Unit { get; }
public string Timezone { get; }
public bool IsTimeZoneAware => !string.IsNullOrWhiteSpace(Timezone);
@@ -34,16 +33,16 @@ namespace Apache.Arrow.Types
public TimestampType(
TimeUnit unit = TimeUnit.Millisecond,
string timezone = default)
+ : base(unit)
{
- Unit = unit;
Timezone = timezone;
}
public TimestampType(
TimeUnit unit = TimeUnit.Millisecond,
TimeZoneInfo timezone = default)
+ : base(unit)
{
- Unit = unit;
Timezone = timezone?.BaseUtcOffset.ToTimeZoneOffsetString();
}
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
index d06249bef2..987a236a10 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
@@ -119,6 +119,7 @@ namespace Apache.Arrow.IntegrationTest
"fixedsizebinary" => new FixedSizeBinaryType(type.ByteWidth),
"date" => ToDateArrowType(type),
"time" => ToTimeArrowType(type),
+ "duration" => ToDurationArrowType(type),
"timestamp" => ToTimestampArrowType(type),
"list" => ToListArrowType(type, children),
"fixedsizelist" => ToFixedSizeListArrowType(type, children),
@@ -191,6 +192,18 @@ namespace Apache.Arrow.IntegrationTest
};
}
+ private static IArrowType ToDurationArrowType(JsonArrowType type)
+ {
+ return type.Unit switch
+ {
+ "SECOND" => DurationType.Second,
+ "MILLISECOND" => DurationType.Millisecond,
+ "MICROSECOND" => DurationType.Microsecond,
+ "NANOSECOND" => DurationType.Nanosecond,
+ _ => throw new NotSupportedException($"Time type not
supported: {type.Unit}, {type.BitWidth}")
+ };
+ }
+
private static IArrowType ToTimestampArrowType(JsonArrowType type)
{
return type.Unit switch
@@ -346,6 +359,7 @@ namespace Apache.Arrow.IntegrationTest
IArrowTypeVisitor<Date64Type>,
IArrowTypeVisitor<Time32Type>,
IArrowTypeVisitor<Time64Type>,
+ IArrowTypeVisitor<DurationType>,
IArrowTypeVisitor<TimestampType>,
IArrowTypeVisitor<StringType>,
IArrowTypeVisitor<BinaryType>,
@@ -396,6 +410,7 @@ namespace Apache.Arrow.IntegrationTest
public void Visit(DoubleType type) => GenerateArray<double,
DoubleArray>((v, n, c, nc, o) => new DoubleArray(v, n, c, nc, o));
public void Visit(Time32Type type) => GenerateArray<int,
Time32Array>((v, n, c, nc, o) => new Time32Array(type, v, n, c, nc, o));
public void Visit(Time64Type type) => GenerateLongArray<long,
Time64Array>((v, n, c, nc, o) => new Time64Array(type, v, n, c, nc, o), s =>
long.Parse(s));
+ public void Visit(DurationType type) => GenerateLongArray<long,
DurationArray>((v, n, c, nc, o) => new DurationArray(type, v, n, c, nc, o), s
=> long.Parse(s));
public void Visit(Decimal128Type type)
{
diff --git a/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs
b/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs
index ad3527c478..ceeffe42c6 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs
@@ -23,8 +23,7 @@ namespace Apache.Arrow.Tests
IArrowTypeVisitor<TimestampType>,
IArrowTypeVisitor<Date32Type>,
IArrowTypeVisitor<Date64Type>,
- IArrowTypeVisitor<Time32Type>,
- IArrowTypeVisitor<Time64Type>,
+ IArrowTypeVisitor<TimeBasedType>,
IArrowTypeVisitor<FixedSizeBinaryType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
@@ -66,18 +65,11 @@ namespace Apache.Arrow.Tests
Assert.Equal(expectedType.Unit, actualType.Unit);
}
- public void Visit(Time32Type actualType)
+ public void Visit(TimeBasedType actualType)
{
- Assert.IsAssignableFrom<Time32Type>(_expectedType);
- var expectedType = (Time32Type)_expectedType;
-
- Assert.Equal(expectedType.Unit, actualType.Unit);
- }
-
- public void Visit(Time64Type actualType)
- {
- Assert.IsAssignableFrom<Time64Type>(_expectedType);
- var expectedType = (Time64Type)_expectedType;
+ Assert.IsAssignableFrom<TimeBasedType>(_expectedType);
+ Assert.Equal(_expectedType.TypeId, actualType.TypeId);
+ var expectedType = (TimeBasedType)_expectedType;
Assert.Equal(expectedType.Unit, actualType.Unit);
}
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 6f4c17a959..1d108d2123 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -125,6 +125,7 @@ namespace Apache.Arrow.Tests
IArrowTypeVisitor<Decimal256Type>,
IArrowTypeVisitor<Date32Type>,
IArrowTypeVisitor<Date64Type>,
+ IArrowTypeVisitor<DurationType>,
IArrowTypeVisitor<TimestampType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
@@ -252,6 +253,33 @@ namespace Apache.Arrow.Tests
ExpectedArray = resultBuilder.Build();
}
+ public void Visit(DurationType type)
+ {
+ DurationArray.Builder resultBuilder = new
DurationArray.Builder(type).Reserve(_baseDataTotalElementCount);
+ DateTimeOffset basis = DateTimeOffset.UtcNow;
+
+ for (int i = 0; i < _baseDataListCount; i++)
+ {
+ List<int?> dataList = _baseData[i];
+ DurationArray.Builder builder = new
DurationArray.Builder(type).Reserve(dataList.Count);
+ foreach (int? value in dataList)
+ {
+ if (value.HasValue)
+ {
+ builder.Append(value.Value);
+ resultBuilder.Append(value.Value);
+ }
+ else
+ {
+ builder.AppendNull();
+ resultBuilder.AppendNull();
+ }
+ }
+ TestTargetArrayList.Add(builder.Build());
+ }
+
+ ExpectedArray = resultBuilder.Build();
+ }
public void Visit(BinaryType type)
{
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs
b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs
index d4f0d8dfd0..96918ff091 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs
@@ -200,6 +200,7 @@ namespace Apache.Arrow.Tests
IArrowArrayVisitor<Date64Array>,
IArrowArrayVisitor<Time32Array>,
IArrowArrayVisitor<Time64Array>,
+ IArrowArrayVisitor<DurationArray>,
#if NET5_0_OR_GREATER
IArrowArrayVisitor<HalfFloatArray>,
#endif
@@ -243,6 +244,7 @@ namespace Apache.Arrow.Tests
}
public void Visit(Time32Array array) => ValidateArrays(array);
public void Visit(Time64Array array) => ValidateArrays(array);
+ public void Visit(DurationArray array) => ValidateArrays(array);
#if NET5_0_OR_GREATER
public void Visit(HalfFloatArray array) => ValidateArrays(array);
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
index 8b41763a70..75d62b25d7 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
@@ -85,6 +85,7 @@ namespace Apache.Arrow.Tests
IArrowArrayVisitor<Date64Array>,
IArrowArrayVisitor<Time32Array>,
IArrowArrayVisitor<Time64Array>,
+ IArrowArrayVisitor<DurationArray>,
IArrowArrayVisitor<ListArray>,
IArrowArrayVisitor<FixedSizeListArray>,
IArrowArrayVisitor<StringArray>,
@@ -127,6 +128,7 @@ namespace Apache.Arrow.Tests
public void Visit(Date64Array array) => CompareArrays(array);
public void Visit(Time32Array array) => CompareArrays(array);
public void Visit(Time64Array array) => CompareArrays(array);
+ public void Visit(DurationArray array) => CompareArrays(array);
public void Visit(ListArray array) => CompareArrays(array);
public void Visit(FixedSizeListArray array) =>
CompareArrays(array);
public void Visit(FixedSizeBinaryArray array) =>
CompareArrays(array);
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index 7aee37b821..4efa94e8c7 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -117,6 +117,11 @@ namespace Apache.Arrow.Tests
.Field(f => f.Name("map").DataType(new
MapType(StringType.Default, Int32Type.Default)).Nullable(false))
+ .Field(f =>
f.Name("duration_s").DataType(DurationType.Second).Nullable(false))
+ .Field(f =>
f.Name("duration_ms").DataType(DurationType.Millisecond).Nullable(true))
+ .Field(f =>
f.Name("duration_us").DataType(DurationType.Microsecond).Nullable(false))
+ .Field(f =>
f.Name("duration_ns").DataType(DurationType.Nanosecond).Nullable(true))
+
// Checking wider characters.
.Field(f => f.Name("hello 你好
😄").DataType(BooleanType.Default).Nullable(true))
@@ -182,6 +187,11 @@ namespace Apache.Arrow.Tests
yield return pa.field("map", pa.map_(pa.@string(),
pa.int32()), false);
+ yield return pa.field("duration_s", pa.duration("s"), false);
+ yield return pa.field("duration_ms", pa.duration("ms"), true);
+ yield return pa.field("duration_us", pa.duration("us"), false);
+ yield return pa.field("duration_ns", pa.duration("ns"), true);
+
yield return pa.field("hello 你好 😄", pa.bool_(), true);
}
}
@@ -520,8 +530,9 @@ namespace Apache.Arrow.Tests
List(0, 0, 1, 2, 4, 10),
pa.array(List("one", "two", "three", "four",
"five", "six", "seven", "eight", "nine", "ten")),
pa.array(List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))),
+ pa.array(List(1234, 2345, 3456, null, 6789),
pa.duration("ms")),
}),
- new[] { "col1", "col2", "col3", "col4", "col5", "col6",
"col7", "col8", "col9", "col10" });
+ new[] { "col1", "col2", "col3", "col4", "col5", "col6",
"col7", "col8", "col9", "col10", "col11" });
dynamic batch = table.to_batches()[0];
@@ -598,6 +609,9 @@ namespace Apache.Arrow.Tests
Assert.Equal(5, col10.Length);
Assert.Equal(new int[] { 0, 0, 1, 2, 4, 10},
col10.ValueOffsets.ToArray());
Assert.Equal(new long?[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 },
((Int64Array)col10.Values).ToList().ToArray());
+
+ DurationArray col11 = (DurationArray)recordBatch.Column("col11");
+ Assert.Equal(5, col11.Length);
}
[SkippableFact]
diff --git a/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs
b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs
new file mode 100644
index 0000000000..0890d356b8
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+ public class DurationArrayTests
+ {
+ private const long TicksPerMicrosecond = 10;
+
+ private static readonly TimeSpan?[] _exampleTimeSpans =
+ {
+ null,
+ TimeSpan.FromDays(10.5),
+ TimeSpan.FromHours(10.5),
+ TimeSpan.FromMinutes(10.5),
+ TimeSpan.FromSeconds(10.5),
+ TimeSpan.FromMilliseconds(10.5),
+ TimeSpan.FromTicks(11),
+ };
+
+ private static readonly long?[] _exampleDurations =
+ {
+ null,
+ 1,
+ 1000,
+ 1000000,
+ 1000000000,
+ 1000000000000,
+ };
+
+ private static readonly DurationType[] _durationTypes =
+ {
+ DurationType.Second,
+ DurationType.Millisecond,
+ DurationType.Microsecond,
+ DurationType.Nanosecond,
+ };
+
+ public static IEnumerable<object[]> GetTimeSpansData() =>
+ from timeSpan in _exampleTimeSpans
+ from type in _durationTypes
+ where type.Unit >= RequiredPrecision(timeSpan)
+ select new object[] { timeSpan, type };
+
+ public static IEnumerable<object[]> GetDurationsData() =>
+ from duration in _exampleDurations
+ from type in _durationTypes
+ select new object[] { duration, type };
+
+ static TimeUnit RequiredPrecision(TimeSpan? timeSpan)
+ {
+ if (timeSpan == null) { return TimeUnit.Second; }
+ if ((timeSpan.Value.Ticks % TicksPerMicrosecond) > 0) { return
TimeUnit.Nanosecond; }
+ if (timeSpan.Value.Microseconds > 0) { return
TimeUnit.Microsecond; }
+ if (timeSpan.Value.Milliseconds > 0) { return
TimeUnit.Millisecond; }
+ return TimeUnit.Second;
+ }
+
+ public class AppendNull
+ {
+ [Fact]
+ public void AppendThenGetGivesNull()
+ {
+ // Arrange
+ var builder = new
DurationArray.Builder(DurationType.Millisecond);
+
+ // Act
+ builder = builder.AppendNull();
+
+ // Assert
+ var array = builder.Build();
+ Assert.Equal(1, array.Length);
+ Assert.Null(array.GetValue(0));
+ Assert.Null(array.GetTimeSpan(0));
+ }
+ }
+
+ public class AppendTimeSpan
+ {
+ [Theory]
+ [MemberData(nameof(GetTimeSpansData), MemberType =
typeof(DurationArrayTests))]
+ public void AppendTimeSpanGivesSameTimeSpan(TimeSpan? timeSpan,
DurationType type)
+ {
+ // Arrange
+ var builder = new DurationArray.Builder(type);
+
+ // Act
+ builder = builder.Append(timeSpan);
+
+ // Assert
+ var array = builder.Build();
+ Assert.Equal(1, array.Length);
+ Assert.Equal(timeSpan, array.GetTimeSpan(0));
+ }
+ }
+
+ public class AppendDuration
+ {
+ [Theory]
+ [MemberData(nameof(GetDurationsData), MemberType =
typeof(DurationArrayTests))]
+ public void AppendDurationGivesSameDuration(long? duration,
DurationType type)
+ {
+ // Arrange
+ var builder = new DurationArray.Builder(type);
+
+ // Act
+ builder = builder.Append(duration);
+
+ // Assert
+ var array = builder.Build();
+ Assert.Equal(1, array.Length);
+ Assert.Equal(duration, array.GetValue(0));
+ }
+ }
+ }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs
b/csharp/test/Apache.Arrow.Tests/TestData.cs
index e3a40dbdaf..3af6efb97b 100644
--- a/csharp/test/Apache.Arrow.Tests/TestData.cs
+++ b/csharp/test/Apache.Arrow.Tests/TestData.cs
@@ -113,6 +113,7 @@ namespace Apache.Arrow.Tests
IArrowTypeVisitor<Date64Type>,
IArrowTypeVisitor<Time32Type>,
IArrowTypeVisitor<Time64Type>,
+ IArrowTypeVisitor<DurationType>,
IArrowTypeVisitor<Int8Type>,
IArrowTypeVisitor<Int16Type>,
IArrowTypeVisitor<Int32Type>,
@@ -233,6 +234,18 @@ namespace Apache.Arrow.Tests
Array = builder.Build();
}
+ public void Visit(DurationType type)
+ {
+ var builder = new DurationArray.Builder(type).Reserve(Length);
+
+ for (var i = 0; i < Length; i++)
+ {
+ builder.Append(i);
+ }
+
+ Array = builder.Build();
+ }
+
public void Visit(TimestampType type)
{
var builder = new TimestampArray.Builder().Reserve(Length);
diff --git a/dev/archery/archery/integration/datagen.py
b/dev/archery/archery/integration/datagen.py
index 01672fbe74..f229012366 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1804,8 +1804,7 @@ def get_generated_json_files(tempdir=None):
generate_datetime_case(),
- generate_duration_case()
- .skip_tester('C#'),
+ generate_duration_case(),
generate_interval_case()
.skip_tester('C#')
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 6024c1d317..c8c0e6dfc1 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -54,7 +54,7 @@ Data Types
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Timestamp | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Duration | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓
| |
+| Duration | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Interval | ✓ | ✓ | ✓ | | | ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+