Copilot commented on code in PR #308:
URL: https://github.com/apache/arrow-dotnet/pull/308#discussion_r3045035711


##########
src/Apache.Arrow/Arrays/RunEndEncodedArray.cs:
##########
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow;
+
+/// <summary>
+/// Represents a run-end encoded array.
+/// A run-end encoded array stores consecutive runs of the same value more 
efficiently.
+/// It contains two child arrays: run_ends (Int16/Int32/Int64) and values (any 
type).
+/// The run_ends array stores the cumulative end positions of each run.
+/// </summary>
+public class RunEndEncodedArray : Array
+{
+    /// <summary>
+    /// Gets the run ends array (Int16Array, Int32Array, or Int64Array).
+    /// This array contains the cumulative end indices for each run.
+    /// </summary>
+    public IArrowArray RunEnds { get; }
+
+    /// <summary>
+    /// Gets the values array.
+    /// This array contains the actual values that are run-length encoded.
+    /// </summary>
+    public IArrowArray Values { get; }
+
+    /// <summary>
+    /// Creates a new RunEndEncodedArray from ArrayData.
+    /// </summary>
+    /// <param name="data">The array data containing run ends and values as 
children.</param>
+    public RunEndEncodedArray(ArrayData data)
+        : this(data, ArrowArrayFactory.BuildArray(data.Children[0]), 
ArrowArrayFactory.BuildArray(data.Children[1]))
+    {
+    }
+
+    /// <summary>
+    /// Creates a new RunEndEncodedArray with specified run ends and values 
arrays.
+    /// </summary>
+    /// <param name="runEnds">The run ends array (must be Int16Array, 
Int32Array, or Int64Array).</param>
+    /// <param name="values">The values array (can be any type).</param>
+    public RunEndEncodedArray(IArrowArray runEnds, IArrowArray values)
+        : this(CreateArrayData(runEnds, values), runEnds, values)
+    {
+    }
+
+    private RunEndEncodedArray(ArrayData data, IArrowArray runEnds, 
IArrowArray values)
+        : base(data)
+    {
+        data.EnsureBufferCount(0); // REE arrays have no buffers, only children
+        data.EnsureDataType(ArrowTypeId.RunEndEncoded);
+
+        ValidateRunEndsType(runEnds);
+        RunEnds = runEnds;
+        Values = values;
+    }
+
+    private static ArrayData CreateArrayData(IArrowArray runEnds, IArrowArray 
values)
+    {
+        ValidateRunEndsType(runEnds);
+
+        // The logical length of a REE array is determined by the last value 
in run_ends
+        int logicalLength = GetLogicalLength(runEnds);
+
+        var dataType = new RunEndEncodedType(runEnds.Data.DataType, 
values.Data.DataType);
+
+        return new ArrayData(
+            dataType,
+            logicalLength,
+            nullCount: 0, // REE arrays don't have a validity bitmap
+            offset: 0,
+            buffers: [],
+            children: [runEnds.Data, values.Data]);
+    }
+
+    private static void ValidateRunEndsType(IArrowArray runEnds)
+    {
+        ArrowTypeId typeId = runEnds.Data.DataType.TypeId;
+        if (typeId != ArrowTypeId.Int16 &&
+            typeId != ArrowTypeId.Int32 &&
+            typeId != ArrowTypeId.Int64)
+        {
+            throw new ArgumentException(
+                $"Run ends array must be Int16, Int32, or Int64, but got 
{typeId}",
+                nameof(runEnds));
+        }
+    }
+
+    private static int GetLogicalLength(IArrowArray runEnds)
+    {
+        if (runEnds.Length == 0)
+        {
+            return 0;
+        }
+
+        // Get the last run end value which represents the logical length
+        switch (runEnds)
+        {
+            case Int16Array int16Array:
+                return int16Array.GetValue(int16Array.Length - 1) ?? throw new 
ArgumentException("invalid length");
+            case Int32Array int32Array:
+                return int32Array.GetValue(int32Array.Length - 1) ?? throw new 
ArgumentException("invalid length");
+            case Int64Array int64Array:
+                {
+                    long? lastValue = int64Array.GetValue(int64Array.Length - 
1);
+                    if (lastValue.HasValue && lastValue.Value > int.MaxValue)
+                    {
+                        throw new ArgumentException("Run ends value exceeds 
maximum supported length.");
+                    }
+                    return (int)(lastValue ?? throw new 
ArgumentException("invalid length"));
+                }
+            default:
+                throw new InvalidOperationException($"Unexpected run ends 
array type: {runEnds.Data.DataType.TypeId}");
+        }
+    }
+
+    /// <summary>
+    /// Finds the physical index in the run_ends array that contains the 
specified logical index.
+    /// </summary>
+    /// <param name="logicalIndex">The logical index in the decoded 
array.</param>
+    /// <returns>The physical index in the run_ends/values arrays.</returns>
+    public int FindPhysicalIndex(int logicalIndex)
+    {
+        if (logicalIndex < 0 || logicalIndex >= Length)
+        {
+            throw new ArgumentOutOfRangeException(nameof(logicalIndex));
+        }
+
+        // Binary search to find the run that contains this logical index
+        return RunEnds switch
+        {
+            Int16Array int16Array => BinarySearchRunEnds(int16Array, 
logicalIndex),
+            Int32Array int32Array => BinarySearchRunEnds(int32Array, 
logicalIndex),
+            Int64Array int64Array => BinarySearchRunEnds(int64Array, 
logicalIndex),

Review Comment:
   FindPhysicalIndex doesn't account for the REE array's Data.Offset (slices). 
For a sliced RunEndEncodedArray, logical indices are relative to the slice, but 
the binary search compares against run_ends values from the unsliced physical 
representation, producing incorrect physical indices. Adjust the searched 
logical index by Data.Offset (or otherwise make FindPhysicalIndex slice-aware) 
and add a test covering FindPhysicalIndex on a sliced REE array.
   ```suggestion
           int searchLogicalIndex = logicalIndex + Data.Offset;
   
           // Binary search to find the run that contains this logical index.
           // For sliced arrays, run_ends remain in the original logical 
coordinate
           // space, so we need to account for the array offset before 
searching.
           return RunEnds switch
           {
               Int16Array int16Array => BinarySearchRunEnds(int16Array, 
searchLogicalIndex),
               Int32Array int32Array => BinarySearchRunEnds(int32Array, 
searchLogicalIndex),
               Int64Array int64Array => BinarySearchRunEnds(int64Array, 
searchLogicalIndex),
   ```



##########
src/Apache.Arrow/Arrays/RunEndEncodedArray.cs:
##########
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow;
+
+/// <summary>
+/// Represents a run-end encoded array.
+/// A run-end encoded array stores consecutive runs of the same value more 
efficiently.
+/// It contains two child arrays: run_ends (Int16/Int32/Int64) and values (any 
type).
+/// The run_ends array stores the cumulative end positions of each run.
+/// </summary>
+public class RunEndEncodedArray : Array
+{
+    /// <summary>
+    /// Gets the run ends array (Int16Array, Int32Array, or Int64Array).
+    /// This array contains the cumulative end indices for each run.
+    /// </summary>
+    public IArrowArray RunEnds { get; }
+
+    /// <summary>
+    /// Gets the values array.
+    /// This array contains the actual values that are run-length encoded.
+    /// </summary>
+    public IArrowArray Values { get; }
+
+    /// <summary>
+    /// Creates a new RunEndEncodedArray from ArrayData.
+    /// </summary>
+    /// <param name="data">The array data containing run ends and values as 
children.</param>
+    public RunEndEncodedArray(ArrayData data)
+        : this(data, ArrowArrayFactory.BuildArray(data.Children[0]), 
ArrowArrayFactory.BuildArray(data.Children[1]))
+    {
+    }
+
+    /// <summary>
+    /// Creates a new RunEndEncodedArray with specified run ends and values 
arrays.
+    /// </summary>
+    /// <param name="runEnds">The run ends array (must be Int16Array, 
Int32Array, or Int64Array).</param>
+    /// <param name="values">The values array (can be any type).</param>
+    public RunEndEncodedArray(IArrowArray runEnds, IArrowArray values)
+        : this(CreateArrayData(runEnds, values), runEnds, values)
+    {
+    }
+
+    private RunEndEncodedArray(ArrayData data, IArrowArray runEnds, 
IArrowArray values)
+        : base(data)
+    {
+        data.EnsureBufferCount(0); // REE arrays have no buffers, only children
+        data.EnsureDataType(ArrowTypeId.RunEndEncoded);
+
+        ValidateRunEndsType(runEnds);
+        RunEnds = runEnds;
+        Values = values;
+    }
+
+    private static ArrayData CreateArrayData(IArrowArray runEnds, IArrowArray 
values)
+    {
+        ValidateRunEndsType(runEnds);
+
+        // The logical length of a REE array is determined by the last value 
in run_ends
+        int logicalLength = GetLogicalLength(runEnds);
+
+        var dataType = new RunEndEncodedType(runEnds.Data.DataType, 
values.Data.DataType);
+
+        return new ArrayData(
+            dataType,
+            logicalLength,
+            nullCount: 0, // REE arrays don't have a validity bitmap
+            offset: 0,
+            buffers: [],
+            children: [runEnds.Data, values.Data]);
+    }
+
+    private static void ValidateRunEndsType(IArrowArray runEnds)
+    {
+        ArrowTypeId typeId = runEnds.Data.DataType.TypeId;
+        if (typeId != ArrowTypeId.Int16 &&
+            typeId != ArrowTypeId.Int32 &&
+            typeId != ArrowTypeId.Int64)
+        {
+            throw new ArgumentException(
+                $"Run ends array must be Int16, Int32, or Int64, but got 
{typeId}",
+                nameof(runEnds));
+        }
+    }
+
+    private static int GetLogicalLength(IArrowArray runEnds)
+    {
+        if (runEnds.Length == 0)
+        {
+            return 0;
+        }
+
+        // Get the last run end value which represents the logical length
+        switch (runEnds)
+        {
+            case Int16Array int16Array:
+                return int16Array.GetValue(int16Array.Length - 1) ?? throw new 
ArgumentException("invalid length");
+            case Int32Array int32Array:
+                return int32Array.GetValue(int32Array.Length - 1) ?? throw new 
ArgumentException("invalid length");
+            case Int64Array int64Array:
+                {
+                    long? lastValue = int64Array.GetValue(int64Array.Length - 
1);
+                    if (lastValue.HasValue && lastValue.Value > int.MaxValue)
+                    {
+                        throw new ArgumentException("Run ends value exceeds 
maximum supported length.");
+                    }
+                    return (int)(lastValue ?? throw new 
ArgumentException("invalid length"));
+                }
+            default:
+                throw new InvalidOperationException($"Unexpected run ends 
array type: {runEnds.Data.DataType.TypeId}");
+        }

Review Comment:
   GetLogicalLength can return a negative logical length if the last run_end 
value is negative (Int16/Int32/Int64). ArrayData doesn't validate length >= 0, 
so this can create invalid arrays and downstream failures. Validate that the 
final run_end is >= 0 (and ideally > 0 when runEnds.Length > 0) and throw a 
clear ArgumentException when invalid.



##########
src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs:
##########
@@ -286,6 +287,201 @@ public void Visit(UnionType type)
 
             public void Visit(MapType type) => 
ConcatenateLists(type.UnsortedKey()); /* Can't tell if the output is still 
sorted */
 
+            public void Visit(RunEndEncodedType type)
+            {
+                ArrowTypeId runEndsTypeId = type.RunEndsDataType.TypeId;
+                if (runEndsTypeId != ArrowTypeId.Int16 &&
+                    runEndsTypeId != ArrowTypeId.Int32 &&
+                    runEndsTypeId != ArrowTypeId.Int64)
+                {
+                    throw new InvalidOperationException(
+                        $"Run-ends array must be Int16, Int32, or Int64, but 
got {runEndsTypeId}");
+                }
+
+                var slicedValues = new List<ArrayData>(_arrayDataList.Count);
+                ArrowBuffer.Builder<short> int16Builder = null;
+                ArrowBuffer.Builder<int> int32Builder = null;
+                ArrowBuffer.Builder<long> int64Builder = null;
+
+                switch (runEndsTypeId)
+                {
+                    case ArrowTypeId.Int16: int16Builder = new 
ArrowBuffer.Builder<short>(); break;
+                    case ArrowTypeId.Int32: int32Builder = new 
ArrowBuffer.Builder<int>(); break;
+                    case ArrowTypeId.Int64: int64Builder = new 
ArrowBuffer.Builder<long>(); break;
+                }
+
+                long baseOffset = 0;
+                int physicalRunCount = 0;
+
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    arrayData.EnsureDataType(type.TypeId);
+
+                    ArrayData runEndsData = arrayData.Children[0];
+                    ArrayData valuesData = arrayData.Children[1];
+
+                    if (runEndsData.DataType.TypeId != runEndsTypeId)
+                    {
+                        throw new ArgumentException(
+                            $"All run-end encoded arrays must have the same 
run-ends type. Expected <{runEndsTypeId}> but got 
<{runEndsData.DataType.TypeId}>.");
+                    }
+                    if (valuesData.DataType.TypeId != 
type.ValuesDataType.TypeId)
+                    {
+                        throw new ArgumentException(
+                            $"All run-end encoded arrays must have the same 
values type. Expected <{type.ValuesDataType.TypeId}> but got 
<{valuesData.DataType.TypeId}>.");
+                    }
+
+                    if (arrayData.Length == 0)
+                    {
+                        continue;
+                    }
+
+                    int logicalOffset = arrayData.Offset;
+                    int logicalLength = arrayData.Length;
+                    int logicalEnd = logicalOffset + logicalLength;
+
+                    int physicalStart;
+                    int physicalEndExclusive;
+
+                    switch (runEndsTypeId)
+                    {
+                        case ArrowTypeId.Int16:
+                            {
+                                ReadOnlySpan<short> re = 
runEndsData.Buffers[1].Span.CastTo<short>()
+                                    .Slice(runEndsData.Offset, 
runEndsData.Length);
+                                physicalStart = FindPhysicalStartInt16(re, 
logicalOffset);
+                                physicalEndExclusive = 
FindPhysicalEndExclusiveInt16(re, logicalEnd, physicalStart);
+                                for (int p = physicalStart; p < 
physicalEndExclusive; p++)
+                                {
+                                    int adjustedEnd = Math.Min((int)re[p], 
logicalEnd) - logicalOffset;
+                                    
int16Builder.Append(checked((short)(baseOffset + adjustedEnd)));
+                                }

Review Comment:
   For Int16 run_ends, concatenation appends checked((short)(baseOffset + 
adjustedEnd)). If the concatenated logical length exceeds Int16.MaxValue, this 
will throw OverflowException mid-build. Consider proactively validating that 
the final logical length fits in the chosen run_ends type and throw a targeted 
ArgumentException with a clearer message.



##########
src/Apache.Arrow/Arrays/RunEndEncodedArray.cs:
##########
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow;
+
+/// <summary>
+/// Represents a run-end encoded array.
+/// A run-end encoded array stores consecutive runs of the same value more 
efficiently.
+/// It contains two child arrays: run_ends (Int16/Int32/Int64) and values (any 
type).
+/// The run_ends array stores the cumulative end positions of each run.
+/// </summary>
+public class RunEndEncodedArray : Array
+{
+    /// <summary>
+    /// Gets the run ends array (Int16Array, Int32Array, or Int64Array).
+    /// This array contains the cumulative end indices for each run.
+    /// </summary>
+    public IArrowArray RunEnds { get; }
+
+    /// <summary>
+    /// Gets the values array.
+    /// This array contains the actual values that are run-length encoded.
+    /// </summary>
+    public IArrowArray Values { get; }
+
+    /// <summary>
+    /// Creates a new RunEndEncodedArray from ArrayData.
+    /// </summary>
+    /// <param name="data">The array data containing run ends and values as 
children.</param>
+    public RunEndEncodedArray(ArrayData data)
+        : this(data, ArrowArrayFactory.BuildArray(data.Children[0]), 
ArrowArrayFactory.BuildArray(data.Children[1]))
+    {
+    }
+
+    /// <summary>
+    /// Creates a new RunEndEncodedArray with specified run ends and values 
arrays.
+    /// </summary>
+    /// <param name="runEnds">The run ends array (must be Int16Array, 
Int32Array, or Int64Array).</param>
+    /// <param name="values">The values array (can be any type).</param>
+    public RunEndEncodedArray(IArrowArray runEnds, IArrowArray values)
+        : this(CreateArrayData(runEnds, values), runEnds, values)
+    {
+    }
+
+    private RunEndEncodedArray(ArrayData data, IArrowArray runEnds, 
IArrowArray values)
+        : base(data)
+    {
+        data.EnsureBufferCount(0); // REE arrays have no buffers, only children
+        data.EnsureDataType(ArrowTypeId.RunEndEncoded);
+
+        ValidateRunEndsType(runEnds);
+        RunEnds = runEnds;
+        Values = values;
+    }
+
+    private static ArrayData CreateArrayData(IArrowArray runEnds, IArrowArray 
values)
+    {
+        ValidateRunEndsType(runEnds);

Review Comment:
   RunEndEncodedArray doesn't validate that runEnds.Length == values.Length 
(one value per run). Creating an instance with mismatched lengths yields an 
invalid REE array and can lead to out-of-range access in consumers. Add a check 
in the constructor(s) (and potentially also validate runEnds.NullCount == 0).
   ```suggestion
           ValidateRunEndsType(runEnds);
           ValidateRunEndEncodedChildren(runEnds, values);
           RunEnds = runEnds;
           Values = values;
       }
   
       private static void ValidateRunEndEncodedChildren(IArrowArray runEnds, 
IArrowArray values)
       {
           if (runEnds.Length != values.Length)
           {
               throw new ArgumentException("Run-end encoded arrays require 
runEnds.Length to equal values.Length.", nameof(values));
           }
   
           if (runEnds.NullCount != 0)
           {
               throw new ArgumentException("Run-end encoded arrays require 
runEnds to have no nulls.", nameof(runEnds));
           }
       }
   
       private static ArrayData CreateArrayData(IArrowArray runEnds, 
IArrowArray values)
       {
           ValidateRunEndsType(runEnds);
           ValidateRunEndEncodedChildren(runEnds, values);
   ```



##########
src/Apache.Arrow/Arrays/RunEndEncodedArray.cs:
##########
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow;
+
+/// <summary>
+/// Represents a run-end encoded array.
+/// A run-end encoded array stores consecutive runs of the same value more 
efficiently.
+/// It contains two child arrays: run_ends (Int16/Int32/Int64) and values (any 
type).
+/// The run_ends array stores the cumulative end positions of each run.
+/// </summary>
+public class RunEndEncodedArray : Array
+{
+    /// <summary>
+    /// Gets the run ends array (Int16Array, Int32Array, or Int64Array).
+    /// This array contains the cumulative end indices for each run.
+    /// </summary>
+    public IArrowArray RunEnds { get; }
+
+    /// <summary>
+    /// Gets the values array.
+    /// This array contains the actual values that are run-length encoded.
+    /// </summary>
+    public IArrowArray Values { get; }
+
+    /// <summary>
+    /// Creates a new RunEndEncodedArray from ArrayData.
+    /// </summary>
+    /// <param name="data">The array data containing run ends and values as 
children.</param>
+    public RunEndEncodedArray(ArrayData data)
+        : this(data, ArrowArrayFactory.BuildArray(data.Children[0]), 
ArrowArrayFactory.BuildArray(data.Children[1]))
+    {
+    }
+
+    /// <summary>
+    /// Creates a new RunEndEncodedArray with specified run ends and values 
arrays.
+    /// </summary>
+    /// <param name="runEnds">The run ends array (must be Int16Array, 
Int32Array, or Int64Array).</param>
+    /// <param name="values">The values array (can be any type).</param>
+    public RunEndEncodedArray(IArrowArray runEnds, IArrowArray values)
+        : this(CreateArrayData(runEnds, values), runEnds, values)
+    {
+    }
+
+    private RunEndEncodedArray(ArrayData data, IArrowArray runEnds, 
IArrowArray values)
+        : base(data)
+    {
+        data.EnsureBufferCount(0); // REE arrays have no buffers, only children
+        data.EnsureDataType(ArrowTypeId.RunEndEncoded);
+
+        ValidateRunEndsType(runEnds);
+        RunEnds = runEnds;
+        Values = values;
+    }
+
+    private static ArrayData CreateArrayData(IArrowArray runEnds, IArrowArray 
values)
+    {
+        ValidateRunEndsType(runEnds);
+
+        // The logical length of a REE array is determined by the last value 
in run_ends
+        int logicalLength = GetLogicalLength(runEnds);
+
+        var dataType = new RunEndEncodedType(runEnds.Data.DataType, 
values.Data.DataType);
+
+        return new ArrayData(
+            dataType,
+            logicalLength,
+            nullCount: 0, // REE arrays don't have a validity bitmap
+            offset: 0,
+            buffers: [],
+            children: [runEnds.Data, values.Data]);
+    }
+
+    private static void ValidateRunEndsType(IArrowArray runEnds)
+    {
+        ArrowTypeId typeId = runEnds.Data.DataType.TypeId;
+        if (typeId != ArrowTypeId.Int16 &&
+            typeId != ArrowTypeId.Int32 &&
+            typeId != ArrowTypeId.Int64)
+        {
+            throw new ArgumentException(
+                $"Run ends array must be Int16, Int32, or Int64, but got 
{typeId}",
+                nameof(runEnds));
+        }
+    }
+
+    private static int GetLogicalLength(IArrowArray runEnds)
+    {
+        if (runEnds.Length == 0)
+        {
+            return 0;
+        }
+
+        // Get the last run end value which represents the logical length
+        switch (runEnds)
+        {
+            case Int16Array int16Array:
+                return int16Array.GetValue(int16Array.Length - 1) ?? throw new 
ArgumentException("invalid length");
+            case Int32Array int32Array:
+                return int32Array.GetValue(int32Array.Length - 1) ?? throw new 
ArgumentException("invalid length");
+            case Int64Array int64Array:
+                {
+                    long? lastValue = int64Array.GetValue(int64Array.Length - 
1);
+                    if (lastValue.HasValue && lastValue.Value > int.MaxValue)
+                    {
+                        throw new ArgumentException("Run ends value exceeds 
maximum supported length.");
+                    }
+                    return (int)(lastValue ?? throw new 
ArgumentException("invalid length"));
+                }
+            default:
+                throw new InvalidOperationException($"Unexpected run ends 
array type: {runEnds.Data.DataType.TypeId}");
+        }
+    }
+
+    /// <summary>
+    /// Finds the physical index in the run_ends array that contains the 
specified logical index.
+    /// </summary>
+    /// <param name="logicalIndex">The logical index in the decoded 
array.</param>
+    /// <returns>The physical index in the run_ends/values arrays.</returns>
+    public int FindPhysicalIndex(int logicalIndex)
+    {
+        if (logicalIndex < 0 || logicalIndex >= Length)
+        {
+            throw new ArgumentOutOfRangeException(nameof(logicalIndex));
+        }
+
+        // Binary search to find the run that contains this logical index
+        return RunEnds switch
+        {
+            Int16Array int16Array => BinarySearchRunEnds(int16Array, 
logicalIndex),
+            Int32Array int32Array => BinarySearchRunEnds(int32Array, 
logicalIndex),
+            Int64Array int64Array => BinarySearchRunEnds(int64Array, 
logicalIndex),
+            _ => throw new InvalidOperationException($"Unexpected run ends 
array type: {RunEnds.GetType()}"),
+        };
+    }
+
+    private static int BinarySearchRunEnds(Int16Array runEnds, int 
logicalIndex)
+    {
+        int left = 0;
+        int right = runEnds.Length - 1;
+
+        while (left < right)
+        {
+            int mid = left + (right - left) / 2;
+            int runEnd = runEnds.GetValue(mid) ?? throw new 
ArgumentException("invalid length"); ;
+
+            if (logicalIndex < runEnd)
+            {
+                right = mid;
+            }
+            else
+            {
+                left = mid + 1;
+            }
+        }
+
+        return left;
+    }
+
+    private static int BinarySearchRunEnds(Int32Array runEnds, int 
logicalIndex)
+    {
+        int left = 0;
+        int right = runEnds.Length - 1;
+
+        while (left < right)
+        {
+            int mid = left + (right - left) / 2;
+            int runEnd = runEnds.GetValue(mid) ?? 0;
+
+            if (logicalIndex < runEnd)
+            {
+                right = mid;
+            }
+            else
+            {
+                left = mid + 1;
+            }
+        }
+
+        return left;
+    }
+
+    private static int BinarySearchRunEnds(Int64Array runEnds, int 
logicalIndex)
+    {
+        int left = 0;
+        int right = runEnds.Length - 1;
+
+        while (left < right)
+        {
+            int mid = left + (right - left) / 2;
+            long runEnd = runEnds.GetValue(mid) ?? 0;
+

Review Comment:
   BinarySearchRunEnds for Int32/Int64 treats null run_end values as 0 ("?? 
0"), which can silently return incorrect physical indices and mask invalid 
data. This should be consistent with the Int16 path (throw if a run_end is 
null) since run_ends must be non-null.



##########
src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs:
##########
@@ -286,6 +287,201 @@ public void Visit(UnionType type)
 
             public void Visit(MapType type) => 
ConcatenateLists(type.UnsortedKey()); /* Can't tell if the output is still 
sorted */
 
+            public void Visit(RunEndEncodedType type)
+            {
+                ArrowTypeId runEndsTypeId = type.RunEndsDataType.TypeId;
+                if (runEndsTypeId != ArrowTypeId.Int16 &&
+                    runEndsTypeId != ArrowTypeId.Int32 &&
+                    runEndsTypeId != ArrowTypeId.Int64)
+                {
+                    throw new InvalidOperationException(
+                        $"Run-ends array must be Int16, Int32, or Int64, but 
got {runEndsTypeId}");
+                }
+
+                var slicedValues = new List<ArrayData>(_arrayDataList.Count);
+                ArrowBuffer.Builder<short> int16Builder = null;
+                ArrowBuffer.Builder<int> int32Builder = null;
+                ArrowBuffer.Builder<long> int64Builder = null;
+
+                switch (runEndsTypeId)
+                {
+                    case ArrowTypeId.Int16: int16Builder = new 
ArrowBuffer.Builder<short>(); break;
+                    case ArrowTypeId.Int32: int32Builder = new 
ArrowBuffer.Builder<int>(); break;
+                    case ArrowTypeId.Int64: int64Builder = new 
ArrowBuffer.Builder<long>(); break;
+                }
+
+                long baseOffset = 0;
+                int physicalRunCount = 0;
+
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    arrayData.EnsureDataType(type.TypeId);
+
+                    ArrayData runEndsData = arrayData.Children[0];
+                    ArrayData valuesData = arrayData.Children[1];
+
+                    if (runEndsData.DataType.TypeId != runEndsTypeId)
+                    {
+                        throw new ArgumentException(
+                            $"All run-end encoded arrays must have the same 
run-ends type. Expected <{runEndsTypeId}> but got 
<{runEndsData.DataType.TypeId}>.");
+                    }
+                    if (valuesData.DataType.TypeId != 
type.ValuesDataType.TypeId)
+                    {
+                        throw new ArgumentException(
+                            $"All run-end encoded arrays must have the same 
values type. Expected <{type.ValuesDataType.TypeId}> but got 
<{valuesData.DataType.TypeId}>.");
+                    }
+
+                    if (arrayData.Length == 0)
+                    {
+                        continue;
+                    }
+
+                    int logicalOffset = arrayData.Offset;
+                    int logicalLength = arrayData.Length;
+                    int logicalEnd = logicalOffset + logicalLength;
+
+                    int physicalStart;
+                    int physicalEndExclusive;
+
+                    switch (runEndsTypeId)
+                    {
+                        case ArrowTypeId.Int16:
+                            {
+                                ReadOnlySpan<short> re = 
runEndsData.Buffers[1].Span.CastTo<short>()
+                                    .Slice(runEndsData.Offset, 
runEndsData.Length);
+                                physicalStart = FindPhysicalStartInt16(re, 
logicalOffset);
+                                physicalEndExclusive = 
FindPhysicalEndExclusiveInt16(re, logicalEnd, physicalStart);
+                                for (int p = physicalStart; p < 
physicalEndExclusive; p++)
+                                {
+                                    int adjustedEnd = Math.Min((int)re[p], 
logicalEnd) - logicalOffset;
+                                    
int16Builder.Append(checked((short)(baseOffset + adjustedEnd)));
+                                }
+                                break;
+                            }
+                        case ArrowTypeId.Int32:
+                            {
+                                ReadOnlySpan<int> re = 
runEndsData.Buffers[1].Span.CastTo<int>()
+                                    .Slice(runEndsData.Offset, 
runEndsData.Length);
+                                physicalStart = FindPhysicalStartInt32(re, 
logicalOffset);
+                                physicalEndExclusive = 
FindPhysicalEndExclusiveInt32(re, logicalEnd, physicalStart);
+                                for (int p = physicalStart; p < 
physicalEndExclusive; p++)
+                                {
+                                    int adjustedEnd = Math.Min(re[p], 
logicalEnd) - logicalOffset;
+                                    
int32Builder.Append(checked((int)(baseOffset + adjustedEnd)));
+                                }
+                                break;
+                            }
+                        default: // Int64
+                            {
+                                ReadOnlySpan<long> re = 
runEndsData.Buffers[1].Span.CastTo<long>()
+                                    .Slice(runEndsData.Offset, 
runEndsData.Length);
+                                physicalStart = FindPhysicalStartInt64(re, 
logicalOffset);
+                                physicalEndExclusive = 
FindPhysicalEndExclusiveInt64(re, logicalEnd, physicalStart);
+                                for (int p = physicalStart; p < 
physicalEndExclusive; p++)
+                                {
+                                    long adjustedEnd = Math.Min(re[p], 
(long)logicalEnd) - logicalOffset;
+                                    int64Builder.Append(baseOffset + 
adjustedEnd);
+                                }
+                                break;
+                            }
+                    }
+
+                    int physicalCount = physicalEndExclusive - physicalStart;
+                    physicalRunCount += physicalCount;
+                    slicedValues.Add(valuesData.Slice(physicalStart, 
physicalCount));
+                    baseOffset += logicalLength;
+                }
+
+                ArrowBuffer runEndsValueBuffer = runEndsTypeId switch
+                {
+                    ArrowTypeId.Int16 => int16Builder.Build(_allocator),
+                    ArrowTypeId.Int32 => int32Builder.Build(_allocator),
+                    _ => int64Builder.Build(_allocator),
+                };
+
+                ArrayData runEndsResult = new ArrayData(
+                    type.RunEndsDataType, physicalRunCount, 0, 0,
+                    new[] { ArrowBuffer.Empty, runEndsValueBuffer });
+
+                ArrayData valuesResult = slicedValues.Count == 0
+                    ? new ArrayData(type.ValuesDataType, 0, 0, 0,
+                        new[] { ArrowBuffer.Empty })
+                    : Concatenate(slicedValues, _allocator);
+

Review Comment:
   When concatenating REE arrays, valuesResult for the all-empty case is 
created with a single empty buffer. This is invalid for many values types 
(e.g., primitive arrays require 2 buffers, binary requires 3, etc.) and will 
fail when building the values child array. Construct an empty ArrayData for 
type.ValuesDataType with the correct buffer/child layout (or reuse existing 
concatenation logic to produce an empty of the right shape).
   ```suggestion
                   ArrayData valuesResult;
                   if (slicedValues.Count == 0)
                   {
                       if (_arrayDataList.Count == 0)
                       {
                           throw new InvalidOperationException(
                               "Cannot construct REE values array data without 
any input arrays.");
                       }
   
                       // Reuse an existing empty values child so the 
buffer/child layout
                       // matches type.ValuesDataType for all Arrow value types.
                       valuesResult = _arrayDataList[0].Children[1];
                   }
                   else
                   {
                       valuesResult = Concatenate(slicedValues, _allocator);
                   }
   ```



##########
src/Apache.Arrow/Types/RunEndEncodedType.cs:
##########
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+
+namespace Apache.Arrow.Types;
+
+/// <summary>
+/// Represents a run-end encoded array type.
+/// Contains two child arrays: run_ends and values.
+/// The run_ends child array must be a 16/32/64-bit signed integer array
+/// which encodes the indices at which the run with the value in 
+/// each corresponding index in the values child array ends.
+/// </summary>
+public sealed class RunEndEncodedType : NestedType
+{
+    public override ArrowTypeId TypeId => ArrowTypeId.RunEndEncoded;
+    public override string Name => "run_end_encoded";
+
+    /// <summary>
+    /// Gets the run ends field (must be Int16, Int32, or Int64).
+    /// </summary>
+    public Field RunEndsField => Fields[0];
+
+    /// <summary>
+    /// Gets the values field (can be any type).
+    /// </summary>
+    public Field ValuesField => Fields[1];
+
+    /// <summary>
+    /// Gets the data type of the run ends array.
+    /// </summary>
+    public IArrowType RunEndsDataType => RunEndsField.DataType;
+
+    /// <summary>
+    /// Gets the data type of the values array.
+    /// </summary>
+    public IArrowType ValuesDataType => ValuesField.DataType;
+
+    /// <summary>
+    /// Creates a new RunEndEncodedType with the specified run ends and values 
fields.
+    /// </summary>
+    /// <param name="runEndsField">The run ends field (must be Int16, Int32, 
or Int64).</param>
+    /// <param name="valuesField">The values field (can be any type).</param>
+    public RunEndEncodedType(Field runEndsField, Field valuesField)
+        : base([runEndsField, valuesField])
+    {
+        ValidateRunEndsType(runEndsField.DataType);
+    }

Review Comment:
   ValidateRunEndsType throws an ArgumentException with paramName 
"runEndsDataType", but in the Field-based constructor the invalid input 
originates from runEndsField.DataType. Consider passing a paramName that 
matches the public constructor parameter (e.g., nameof(runEndsField)) to make 
exception reporting clearer for callers.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to