This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git
The following commit(s) were added to refs/heads/main by this push:
new b14c4a3 fix: Validate additional type information when concatenating
(#309)
b14c4a3 is described below
commit b14c4a35662f87e69c83f9e89c5610b4a1a35a9d
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Wed Apr 8 16:17:41 2026 -0700
fix: Validate additional type information when concatenating (#309)
## What's Changed
When concatenating types, ensure that incompatible types are not being
concatenating (which can result in corrupt data).
Closes #307.
---
src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs | 142 ++++++++++++++-
.../ArrowArrayConcatenatorTests.cs | 191 +++++++++++++++++++++
2 files changed, 331 insertions(+), 2 deletions(-)
diff --git a/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
b/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
index 6cd1caa..764292a 100644
--- a/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
+++ b/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
@@ -92,10 +92,11 @@ namespace Apache.Arrow
public void Visit(FixedWidthType type)
{
CheckData(type, 2);
+ var resolvedType = CheckAndResolveFixedWidthType(type);
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
- ArrowBuffer valueBuffer =
ConcatenateFixedWidthTypeValueBuffer(1, type);
+ ArrowBuffer valueBuffer =
ConcatenateFixedWidthTypeValueBuffer(1, resolvedType);
- Result = new ArrayData(type, _totalLength, _totalNullCount, 0,
new ArrowBuffer[] { validityBuffer, valueBuffer });
+ Result = new ArrayData(resolvedType, _totalLength,
_totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer });
}
public void Visit(BinaryType type) =>
ConcatenateVariableBinaryArrayData(type);
@@ -217,6 +218,7 @@ namespace Apache.Arrow
public void Visit(FixedSizeListType type)
{
CheckData(type, 1);
+ CheckFixedSizeListCompatibility(type);
var listSize = type.ListSize;
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
@@ -513,6 +515,142 @@ namespace Apache.Arrow
}
}
+ private FixedWidthType
CheckAndResolveFixedWidthType(FixedWidthType type)
+ {
+ switch (type)
+ {
+ case Decimal32Type d:
+ return CheckDecimalCompatibility<Decimal32Type>(d,
d.Precision, d.Scale,
+ (other) => ((Decimal32Type)other).Precision,
+ (other) => ((Decimal32Type)other).Scale,
+ (p, s) => new Decimal32Type(p, s));
+ case Decimal64Type d:
+ return CheckDecimalCompatibility<Decimal64Type>(d,
d.Precision, d.Scale,
+ (other) => ((Decimal64Type)other).Precision,
+ (other) => ((Decimal64Type)other).Scale,
+ (p, s) => new Decimal64Type(p, s));
+ case Decimal128Type d:
+ return CheckDecimalCompatibility<Decimal128Type>(d,
d.Precision, d.Scale,
+ (other) => ((Decimal128Type)other).Precision,
+ (other) => ((Decimal128Type)other).Scale,
+ (p, s) => new Decimal128Type(p, s));
+ case Decimal256Type d:
+ return CheckDecimalCompatibility<Decimal256Type>(d,
d.Precision, d.Scale,
+ (other) => ((Decimal256Type)other).Precision,
+ (other) => ((Decimal256Type)other).Scale,
+ (p, s) => new Decimal256Type(p, s));
+ case TimestampType ts:
+ CheckTimestampCompatibility(ts);
+ return type;
+ case TimeBasedType tb:
+ CheckTimeUnitCompatibility(tb);
+ return type;
+ case IntervalType iv:
+ CheckIntervalCompatibility(iv);
+ return type;
+ case FixedSizeBinaryType fsb:
+ CheckFixedSizeBinaryCompatibility(fsb);
+ return type;
+ default:
+ return type;
+ }
+ }
+
+ private T CheckDecimalCompatibility<T>(T firstType, int
firstPrecision, int firstScale,
+ Func<IArrowType, int> getPrecision, Func<IArrowType, int>
getScale,
+ Func<int, int, T> factory) where T : FixedSizeBinaryType
+ {
+ int maxPrecision = firstPrecision;
+
+ for (int i = 1; i < _arrayDataList.Count; i++)
+ {
+ IArrowType otherType = _arrayDataList[i].DataType;
+ int otherScale = getScale(otherType);
+ if (otherScale != firstScale)
+ {
+ throw new ArgumentException(
+ $"Cannot concatenate {firstType.Name} arrays with
different scales: {firstScale} vs {otherScale}");
+ }
+ maxPrecision = Math.Max(maxPrecision,
getPrecision(otherType));
+ }
+
+ if (maxPrecision != firstPrecision)
+ {
+ return factory(maxPrecision, firstScale);
+ }
+ return firstType;
+ }
+
+ private void CheckTimestampCompatibility(TimestampType firstType)
+ {
+ for (int i = 1; i < _arrayDataList.Count; i++)
+ {
+ var otherType = (TimestampType)_arrayDataList[i].DataType;
+ if (otherType.Unit != firstType.Unit)
+ {
+ throw new ArgumentException(
+ $"Cannot concatenate Timestamp arrays with
different time units: {firstType.Unit} vs {otherType.Unit}");
+ }
+ if (otherType.Timezone != firstType.Timezone)
+ {
+ throw new ArgumentException(
+ $"Cannot concatenate Timestamp arrays with
different timezones: \"{firstType.Timezone}\" vs \"{otherType.Timezone}\"");
+ }
+ }
+ }
+
+ private void CheckTimeUnitCompatibility(TimeBasedType firstType)
+ {
+ for (int i = 1; i < _arrayDataList.Count; i++)
+ {
+ var otherType = (TimeBasedType)_arrayDataList[i].DataType;
+ if (otherType.Unit != firstType.Unit)
+ {
+ throw new ArgumentException(
+ $"Cannot concatenate {firstType.Name} arrays with
different time units: {firstType.Unit} vs {otherType.Unit}");
+ }
+ }
+ }
+
+ private void CheckIntervalCompatibility(IntervalType firstType)
+ {
+ for (int i = 1; i < _arrayDataList.Count; i++)
+ {
+ var otherType = (IntervalType)_arrayDataList[i].DataType;
+ if (otherType.Unit != firstType.Unit)
+ {
+ throw new ArgumentException(
+ $"Cannot concatenate Interval arrays with
different units: {firstType.Unit} vs {otherType.Unit}");
+ }
+ }
+ }
+
+ private void CheckFixedSizeBinaryCompatibility(FixedSizeBinaryType
firstType)
+ {
+ for (int i = 1; i < _arrayDataList.Count; i++)
+ {
+ var otherType =
(FixedSizeBinaryType)_arrayDataList[i].DataType;
+ if (otherType.ByteWidth != firstType.ByteWidth)
+ {
+ throw new ArgumentException(
+ $"Cannot concatenate FixedSizeBinary arrays with
different byte widths: {firstType.ByteWidth} vs {otherType.ByteWidth}");
+ }
+ }
+ }
+
+ private void CheckFixedSizeListCompatibility(FixedSizeListType
firstType)
+ {
+ for (int i = 1; i < _arrayDataList.Count; i++)
+ {
+ var otherType =
(FixedSizeListType)_arrayDataList[i].DataType;
+ if (otherType.ListSize != firstType.ListSize)
+ {
+ throw new ArgumentException(
+ $"Cannot concatenate FixedSizeList arrays with
different list sizes: {firstType.ListSize} vs {otherType.ListSize}");
+ }
+ }
+ }
+
private void ConcatenateVariableBinaryArrayData(IArrowType type)
{
CheckData(type, 3);
diff --git a/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
b/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 3067ac4..14ae6ec 100644
--- a/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -16,6 +16,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
+using Apache.Arrow.Arrays;
using Apache.Arrow.Scalars;
using Apache.Arrow.Types;
using Xunit;
@@ -824,5 +825,195 @@ namespace Apache.Arrow.Tests
array, _sliceParameters[targetIndex].Offset,
_sliceParameters[targetIndex].Length);
}
}
+
+ // --- Parameterized type mismatch / compatibility tests ---
+
+ [Fact]
+ public void TestDecimal32ScaleMismatchThrows()
+ {
+ var a = new Decimal32Array.Builder(new Decimal32Type(7,
3)).Append(1).Build();
+ var b = new Decimal32Array.Builder(new Decimal32Type(7,
2)).Append(2).Build();
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("scale", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestDecimal64ScaleMismatchThrows()
+ {
+ var a = new Decimal64Array.Builder(new Decimal64Type(14,
4)).Append(1).Build();
+ var b = new Decimal64Array.Builder(new Decimal64Type(14,
2)).Append(2).Build();
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("scale", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestDecimal128ScaleMismatchThrows()
+ {
+ var a = new Decimal128Array.Builder(new Decimal128Type(14,
10)).Append(1).Build();
+ var b = new Decimal128Array.Builder(new Decimal128Type(14,
5)).Append(2).Build();
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("scale", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestDecimal256ScaleMismatchThrows()
+ {
+ var a = new Decimal256Array.Builder(new Decimal256Type(14,
10)).Append(1).Build();
+ var b = new Decimal256Array.Builder(new Decimal256Type(14,
3)).Append(2).Build();
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("scale", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestDecimal64PrecisionPromoted()
+ {
+ var a = new Decimal64Array.Builder(new Decimal64Type(10,
4)).Append(1).Append(2).Build();
+ var b = new Decimal64Array.Builder(new Decimal64Type(14,
4)).Append(3).Build();
+ var c = new Decimal64Array.Builder(new Decimal64Type(12,
4)).Append(4).Build();
+
+ var result = ArrowArrayConcatenator.Concatenate(new IArrowArray[]
{ a, b, c });
+ var resultType = (Decimal64Type)result.Data.DataType;
+
+ Assert.Equal(14, resultType.Precision);
+ Assert.Equal(4, resultType.Scale);
+ Assert.Equal(4, result.Length);
+ }
+
+ [Fact]
+ public void TestDecimal128PrecisionPromoted()
+ {
+ var a = new Decimal128Array.Builder(new Decimal128Type(10,
5)).Append(1).Build();
+ var b = new Decimal128Array.Builder(new Decimal128Type(20,
5)).Append(2).Build();
+
+ var result = ArrowArrayConcatenator.Concatenate(new IArrowArray[]
{ a, b });
+ var resultType = (Decimal128Type)result.Data.DataType;
+
+ Assert.Equal(20, resultType.Precision);
+ Assert.Equal(5, resultType.Scale);
+ Assert.Equal(2, result.Length);
+ }
+
+ [Fact]
+ public void TestTimestampUnitMismatchThrows()
+ {
+ var a = new TimestampArray.Builder(new
TimestampType(TimeUnit.Millisecond,
"+00:00")).Append(DateTimeOffset.UtcNow).Build();
+ var b = new TimestampArray.Builder(new
TimestampType(TimeUnit.Microsecond,
"+00:00")).Append(DateTimeOffset.UtcNow).Build();
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("time unit", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestTimestampTimezoneMismatchThrows()
+ {
+ var a = new TimestampArray.Builder(new
TimestampType(TimeUnit.Millisecond,
"+00:00")).Append(DateTimeOffset.UtcNow).Build();
+ var b = new TimestampArray.Builder(new
TimestampType(TimeUnit.Millisecond,
"+05:00")).Append(DateTimeOffset.UtcNow).Build();
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("timezone", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestTime32UnitMismatchThrows()
+ {
+ var a = new Time32Array.Builder(new
Time32Type(TimeUnit.Second)).Append(1).Build();
+ var b = new Time32Array.Builder(new
Time32Type(TimeUnit.Millisecond)).Append(2).Build();
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("time unit", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestTime64UnitMismatchThrows()
+ {
+ var a = new Time64Array.Builder(new
Time64Type(TimeUnit.Microsecond)).Append(1).Build();
+ var b = new Time64Array.Builder(new
Time64Type(TimeUnit.Nanosecond)).Append(2).Build();
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("time unit", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestDurationUnitMismatchThrows()
+ {
+ var a = new
DurationArray.Builder(DurationType.Second).Append(1).Build();
+ var b = new
DurationArray.Builder(DurationType.Nanosecond).Append(2).Build();
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("time unit", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestIntervalUnitMismatchThrows()
+ {
+ var a = new YearMonthIntervalArray.Builder().Append(new
YearMonthInterval(1)).Build();
+ var b = new DayTimeIntervalArray.Builder().Append(new
DayTimeInterval(1, 100)).Build();
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("unit", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestFixedSizeBinaryWidthMismatchThrows()
+ {
+ var a = BuildFixedSizeBinaryArray(4, new byte[] { 1, 2, 3, 4 });
+ var b = BuildFixedSizeBinaryArray(8, new byte[] { 1, 2, 3, 4, 5,
6, 7, 8 });
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("byte width", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ [Fact]
+ public void TestFixedSizeListSizeMismatchThrows()
+ {
+ var a = BuildFixedSizeListArray(2, new[] { 1, 2, 3, 4 });
+ var b = BuildFixedSizeListArray(3, new[] { 5, 6, 7, 8, 9, 10 });
+
+ var ex = Assert.Throws<ArgumentException>(() =>
+ ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b
}));
+ Assert.Contains("list size", ex.Message,
StringComparison.OrdinalIgnoreCase);
+ }
+
+ private static FixedSizeListArray BuildFixedSizeListArray(int
listSize, int[] flatValues)
+ {
+ var type = new FixedSizeListType(Int32Type.Default, listSize);
+ var builder = new FixedSizeListArray.Builder(type);
+ int listCount = flatValues.Length / listSize;
+ var valueBuilder = (Int32Array.Builder)builder.ValueBuilder;
+ for (int i = 0; i < listCount; i++)
+ {
+ builder.Append();
+ for (int j = 0; j < listSize; j++)
+ {
+ valueBuilder.Append(flatValues[i * listSize + j]);
+ }
+ }
+ return builder.Build();
+ }
+
+ private static FixedSizeBinaryArray BuildFixedSizeBinaryArray(int
byteWidth, byte[] singleValue)
+ {
+ var validityBuffer = new
ArrowBuffer.BitmapBuilder().Append(true).Build();
+ var dataBuffer = new
ArrowBuffer.Builder<byte>().Append(singleValue).Build();
+ var arrayData = new ArrayData(
+ new FixedSizeBinaryType(byteWidth), 1, 0, 0,
+ new[] { validityBuffer, dataBuffer });
+ return new FixedSizeBinaryArray(arrayData);
+ }
}
}