This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git


The following commit(s) were added to refs/heads/main by this push:
     new b14c4a3  fix: Validate additional type information when concatenating 
(#309)
b14c4a3 is described below

commit b14c4a35662f87e69c83f9e89c5610b4a1a35a9d
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Wed Apr 8 16:17:41 2026 -0700

    fix: Validate additional type information when concatenating (#309)
    
    ## What's Changed
    
    When concatenating types, ensure that incompatible types are not being
    concatenating (which can result in corrupt data).
    
    Closes #307.
---
 src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs   | 142 ++++++++++++++-
 .../ArrowArrayConcatenatorTests.cs                 | 191 +++++++++++++++++++++
 2 files changed, 331 insertions(+), 2 deletions(-)

diff --git a/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs 
b/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
index 6cd1caa..764292a 100644
--- a/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
+++ b/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
@@ -92,10 +92,11 @@ namespace Apache.Arrow
             public void Visit(FixedWidthType type)
             {
                 CheckData(type, 2);
+                var resolvedType = CheckAndResolveFixedWidthType(type);
                 ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
-                ArrowBuffer valueBuffer = 
ConcatenateFixedWidthTypeValueBuffer(1, type);
+                ArrowBuffer valueBuffer = 
ConcatenateFixedWidthTypeValueBuffer(1, resolvedType);
 
-                Result = new ArrayData(type, _totalLength, _totalNullCount, 0, 
new ArrowBuffer[] { validityBuffer, valueBuffer });
+                Result = new ArrayData(resolvedType, _totalLength, 
_totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer });
             }
 
             public void Visit(BinaryType type) => 
ConcatenateVariableBinaryArrayData(type);
@@ -217,6 +218,7 @@ namespace Apache.Arrow
             public void Visit(FixedSizeListType type)
             {
                 CheckData(type, 1);
+                CheckFixedSizeListCompatibility(type);
                 var listSize = type.ListSize;
                 ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
 
@@ -513,6 +515,142 @@ namespace Apache.Arrow
                 }
             }
 
+            private FixedWidthType 
CheckAndResolveFixedWidthType(FixedWidthType type)
+            {
+                switch (type)
+                {
+                    case Decimal32Type d:
+                        return CheckDecimalCompatibility<Decimal32Type>(d, 
d.Precision, d.Scale,
+                            (other) => ((Decimal32Type)other).Precision,
+                            (other) => ((Decimal32Type)other).Scale,
+                            (p, s) => new Decimal32Type(p, s));
+                    case Decimal64Type d:
+                        return CheckDecimalCompatibility<Decimal64Type>(d, 
d.Precision, d.Scale,
+                            (other) => ((Decimal64Type)other).Precision,
+                            (other) => ((Decimal64Type)other).Scale,
+                            (p, s) => new Decimal64Type(p, s));
+                    case Decimal128Type d:
+                        return CheckDecimalCompatibility<Decimal128Type>(d, 
d.Precision, d.Scale,
+                            (other) => ((Decimal128Type)other).Precision,
+                            (other) => ((Decimal128Type)other).Scale,
+                            (p, s) => new Decimal128Type(p, s));
+                    case Decimal256Type d:
+                        return CheckDecimalCompatibility<Decimal256Type>(d, 
d.Precision, d.Scale,
+                            (other) => ((Decimal256Type)other).Precision,
+                            (other) => ((Decimal256Type)other).Scale,
+                            (p, s) => new Decimal256Type(p, s));
+                    case TimestampType ts:
+                        CheckTimestampCompatibility(ts);
+                        return type;
+                    case TimeBasedType tb:
+                        CheckTimeUnitCompatibility(tb);
+                        return type;
+                    case IntervalType iv:
+                        CheckIntervalCompatibility(iv);
+                        return type;
+                    case FixedSizeBinaryType fsb:
+                        CheckFixedSizeBinaryCompatibility(fsb);
+                        return type;
+                    default:
+                        return type;
+                }
+            }
+
+            private T CheckDecimalCompatibility<T>(T firstType, int 
firstPrecision, int firstScale,
+                Func<IArrowType, int> getPrecision, Func<IArrowType, int> 
getScale,
+                Func<int, int, T> factory) where T : FixedSizeBinaryType
+            {
+                int maxPrecision = firstPrecision;
+
+                for (int i = 1; i < _arrayDataList.Count; i++)
+                {
+                    IArrowType otherType = _arrayDataList[i].DataType;
+                    int otherScale = getScale(otherType);
+                    if (otherScale != firstScale)
+                    {
+                        throw new ArgumentException(
+                            $"Cannot concatenate {firstType.Name} arrays with 
different scales: {firstScale} vs {otherScale}");
+                    }
+                    maxPrecision = Math.Max(maxPrecision, 
getPrecision(otherType));
+                }
+
+                if (maxPrecision != firstPrecision)
+                {
+                    return factory(maxPrecision, firstScale);
+                }
+                return firstType;
+            }
+
+            private void CheckTimestampCompatibility(TimestampType firstType)
+            {
+                for (int i = 1; i < _arrayDataList.Count; i++)
+                {
+                    var otherType = (TimestampType)_arrayDataList[i].DataType;
+                    if (otherType.Unit != firstType.Unit)
+                    {
+                        throw new ArgumentException(
+                            $"Cannot concatenate Timestamp arrays with 
different time units: {firstType.Unit} vs {otherType.Unit}");
+                    }
+                    if (otherType.Timezone != firstType.Timezone)
+                    {
+                        throw new ArgumentException(
+                            $"Cannot concatenate Timestamp arrays with 
different timezones: \"{firstType.Timezone}\" vs \"{otherType.Timezone}\"");
+                    }
+                }
+            }
+
+            private void CheckTimeUnitCompatibility(TimeBasedType firstType)
+            {
+                for (int i = 1; i < _arrayDataList.Count; i++)
+                {
+                    var otherType = (TimeBasedType)_arrayDataList[i].DataType;
+                    if (otherType.Unit != firstType.Unit)
+                    {
+                        throw new ArgumentException(
+                            $"Cannot concatenate {firstType.Name} arrays with 
different time units: {firstType.Unit} vs {otherType.Unit}");
+                    }
+                }
+            }
+
+            private void CheckIntervalCompatibility(IntervalType firstType)
+            {
+                for (int i = 1; i < _arrayDataList.Count; i++)
+                {
+                    var otherType = (IntervalType)_arrayDataList[i].DataType;
+                    if (otherType.Unit != firstType.Unit)
+                    {
+                        throw new ArgumentException(
+                            $"Cannot concatenate Interval arrays with 
different units: {firstType.Unit} vs {otherType.Unit}");
+                    }
+                }
+            }
+
+            private void CheckFixedSizeBinaryCompatibility(FixedSizeBinaryType 
firstType)
+            {
+                for (int i = 1; i < _arrayDataList.Count; i++)
+                {
+                    var otherType = 
(FixedSizeBinaryType)_arrayDataList[i].DataType;
+                    if (otherType.ByteWidth != firstType.ByteWidth)
+                    {
+                        throw new ArgumentException(
+                            $"Cannot concatenate FixedSizeBinary arrays with 
different byte widths: {firstType.ByteWidth} vs {otherType.ByteWidth}");
+                    }
+                }
+            }
+
+            private void CheckFixedSizeListCompatibility(FixedSizeListType 
firstType)
+            {
+                for (int i = 1; i < _arrayDataList.Count; i++)
+                {
+                    var otherType = 
(FixedSizeListType)_arrayDataList[i].DataType;
+                    if (otherType.ListSize != firstType.ListSize)
+                    {
+                        throw new ArgumentException(
+                            $"Cannot concatenate FixedSizeList arrays with 
different list sizes: {firstType.ListSize} vs {otherType.ListSize}");
+                    }
+                }
+            }
+
             private void ConcatenateVariableBinaryArrayData(IArrowType type)
             {
                 CheckData(type, 3);
diff --git a/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs 
b/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 3067ac4..14ae6ec 100644
--- a/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -16,6 +16,7 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using Apache.Arrow.Arrays;
 using Apache.Arrow.Scalars;
 using Apache.Arrow.Types;
 using Xunit;
@@ -824,5 +825,195 @@ namespace Apache.Arrow.Tests
                     array, _sliceParameters[targetIndex].Offset, 
_sliceParameters[targetIndex].Length);
             }
         }
+
+        // --- Parameterized type mismatch / compatibility tests ---
+
+        [Fact]
+        public void TestDecimal32ScaleMismatchThrows()
+        {
+            var a = new Decimal32Array.Builder(new Decimal32Type(7, 
3)).Append(1).Build();
+            var b = new Decimal32Array.Builder(new Decimal32Type(7, 
2)).Append(2).Build();
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("scale", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestDecimal64ScaleMismatchThrows()
+        {
+            var a = new Decimal64Array.Builder(new Decimal64Type(14, 
4)).Append(1).Build();
+            var b = new Decimal64Array.Builder(new Decimal64Type(14, 
2)).Append(2).Build();
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("scale", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestDecimal128ScaleMismatchThrows()
+        {
+            var a = new Decimal128Array.Builder(new Decimal128Type(14, 
10)).Append(1).Build();
+            var b = new Decimal128Array.Builder(new Decimal128Type(14, 
5)).Append(2).Build();
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("scale", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestDecimal256ScaleMismatchThrows()
+        {
+            var a = new Decimal256Array.Builder(new Decimal256Type(14, 
10)).Append(1).Build();
+            var b = new Decimal256Array.Builder(new Decimal256Type(14, 
3)).Append(2).Build();
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("scale", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestDecimal64PrecisionPromoted()
+        {
+            var a = new Decimal64Array.Builder(new Decimal64Type(10, 
4)).Append(1).Append(2).Build();
+            var b = new Decimal64Array.Builder(new Decimal64Type(14, 
4)).Append(3).Build();
+            var c = new Decimal64Array.Builder(new Decimal64Type(12, 
4)).Append(4).Build();
+
+            var result = ArrowArrayConcatenator.Concatenate(new IArrowArray[] 
{ a, b, c });
+            var resultType = (Decimal64Type)result.Data.DataType;
+
+            Assert.Equal(14, resultType.Precision);
+            Assert.Equal(4, resultType.Scale);
+            Assert.Equal(4, result.Length);
+        }
+
+        [Fact]
+        public void TestDecimal128PrecisionPromoted()
+        {
+            var a = new Decimal128Array.Builder(new Decimal128Type(10, 
5)).Append(1).Build();
+            var b = new Decimal128Array.Builder(new Decimal128Type(20, 
5)).Append(2).Build();
+
+            var result = ArrowArrayConcatenator.Concatenate(new IArrowArray[] 
{ a, b });
+            var resultType = (Decimal128Type)result.Data.DataType;
+
+            Assert.Equal(20, resultType.Precision);
+            Assert.Equal(5, resultType.Scale);
+            Assert.Equal(2, result.Length);
+        }
+
+        [Fact]
+        public void TestTimestampUnitMismatchThrows()
+        {
+            var a = new TimestampArray.Builder(new 
TimestampType(TimeUnit.Millisecond, 
"+00:00")).Append(DateTimeOffset.UtcNow).Build();
+            var b = new TimestampArray.Builder(new 
TimestampType(TimeUnit.Microsecond, 
"+00:00")).Append(DateTimeOffset.UtcNow).Build();
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("time unit", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestTimestampTimezoneMismatchThrows()
+        {
+            var a = new TimestampArray.Builder(new 
TimestampType(TimeUnit.Millisecond, 
"+00:00")).Append(DateTimeOffset.UtcNow).Build();
+            var b = new TimestampArray.Builder(new 
TimestampType(TimeUnit.Millisecond, 
"+05:00")).Append(DateTimeOffset.UtcNow).Build();
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("timezone", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestTime32UnitMismatchThrows()
+        {
+            var a = new Time32Array.Builder(new 
Time32Type(TimeUnit.Second)).Append(1).Build();
+            var b = new Time32Array.Builder(new 
Time32Type(TimeUnit.Millisecond)).Append(2).Build();
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("time unit", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestTime64UnitMismatchThrows()
+        {
+            var a = new Time64Array.Builder(new 
Time64Type(TimeUnit.Microsecond)).Append(1).Build();
+            var b = new Time64Array.Builder(new 
Time64Type(TimeUnit.Nanosecond)).Append(2).Build();
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("time unit", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestDurationUnitMismatchThrows()
+        {
+            var a = new 
DurationArray.Builder(DurationType.Second).Append(1).Build();
+            var b = new 
DurationArray.Builder(DurationType.Nanosecond).Append(2).Build();
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("time unit", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestIntervalUnitMismatchThrows()
+        {
+            var a = new YearMonthIntervalArray.Builder().Append(new 
YearMonthInterval(1)).Build();
+            var b = new DayTimeIntervalArray.Builder().Append(new 
DayTimeInterval(1, 100)).Build();
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("unit", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestFixedSizeBinaryWidthMismatchThrows()
+        {
+            var a = BuildFixedSizeBinaryArray(4, new byte[] { 1, 2, 3, 4 });
+            var b = BuildFixedSizeBinaryArray(8, new byte[] { 1, 2, 3, 4, 5, 
6, 7, 8 });
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("byte width", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        [Fact]
+        public void TestFixedSizeListSizeMismatchThrows()
+        {
+            var a = BuildFixedSizeListArray(2, new[] { 1, 2, 3, 4 });
+            var b = BuildFixedSizeListArray(3, new[] { 5, 6, 7, 8, 9, 10 });
+
+            var ex = Assert.Throws<ArgumentException>(() =>
+                ArrowArrayConcatenator.Concatenate(new IArrowArray[] { a, b 
}));
+            Assert.Contains("list size", ex.Message, 
StringComparison.OrdinalIgnoreCase);
+        }
+
+        private static FixedSizeListArray BuildFixedSizeListArray(int 
listSize, int[] flatValues)
+        {
+            var type = new FixedSizeListType(Int32Type.Default, listSize);
+            var builder = new FixedSizeListArray.Builder(type);
+            int listCount = flatValues.Length / listSize;
+            var valueBuilder = (Int32Array.Builder)builder.ValueBuilder;
+            for (int i = 0; i < listCount; i++)
+            {
+                builder.Append();
+                for (int j = 0; j < listSize; j++)
+                {
+                    valueBuilder.Append(flatValues[i * listSize + j]);
+                }
+            }
+            return builder.Build();
+        }
+
+        private static FixedSizeBinaryArray BuildFixedSizeBinaryArray(int 
byteWidth, byte[] singleValue)
+        {
+            var validityBuffer = new 
ArrowBuffer.BitmapBuilder().Append(true).Build();
+            var dataBuffer = new 
ArrowBuffer.Builder<byte>().Append(singleValue).Build();
+            var arrayData = new ArrayData(
+                new FixedSizeBinaryType(byteWidth), 1, 0, 0,
+                new[] { validityBuffer, dataBuffer });
+            return new FixedSizeBinaryArray(arrayData);
+        }
     }
 }

Reply via email to