This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch maint-16.x.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit ef4db17ae342fac3fe52400d8d8540c675f93a9c
Author: Adam Reeve <[email protected]>
AuthorDate: Fri Apr 19 14:41:56 2024 +1200

    GH-41302: [C#][Integration] Fix writing list and binary arrays with zero 
length offsets to IPC format (#41303)
    
    ### Rationale for this change
    
    Fixes the integration test failures caused by #41230
    
    ### What changes are included in this PR?
    
    Only try to access the offset values if the array length is non-zero when 
writing list and binary arrays to IPC format.
    
    ### Are these changes tested?
    
    Yes, I've manually run the integration tests with C# and Java to verify 
they pass (when also including the changes from #41264), and also added new 
unit tests for this.
    
    ### Are there any user-facing changes?
    
    This may also be a bug that affects users but it isn't in a released 
version.
    * GitHub Issue: #41302
    
    Authored-by: Adam Reeve <[email protected]>
    Signed-off-by: Curt Hagenlocher <[email protected]>
---
 csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs   | 18 ++++++--
 .../Apache.Arrow.Tests/ArrowFileWriterTests.cs     | 52 ++++++++++++++++++++++
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs 
b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 1b83735925..a7e4c13525 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -165,8 +165,13 @@ namespace Apache.Arrow.Ipc
                 _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, 
array.Offset, array.Length));
                 
_buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer, 
array.Offset, array.Length)));
 
-                int valuesOffset = array.ValueOffsets[0];
-                int valuesLength = array.ValueOffsets[array.Length] - 
valuesOffset;
+                int valuesOffset = 0;
+                int valuesLength = 0;
+                if (array.Length > 0)
+                {
+                    valuesOffset = array.ValueOffsets[0];
+                    valuesLength = array.ValueOffsets[array.Length] - 
valuesOffset;
+                }
 
                 var values = array.Values;
                 if (valuesOffset > 0 || valuesLength < values.Length)
@@ -206,8 +211,13 @@ namespace Apache.Arrow.Ipc
                 _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, 
array.Offset, array.Length));
                 
_buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer, 
array.Offset, array.Length)));
 
-                int valuesOffset = array.ValueOffsets[0];
-                int valuesLength = array.ValueOffsets[array.Length] - 
valuesOffset;
+                int valuesOffset = 0;
+                int valuesLength = 0;
+                if (array.Length > 0)
+                {
+                    valuesOffset = array.ValueOffsets[0];
+                    valuesLength = array.ValueOffsets[array.Length] - 
valuesOffset;
+                }
 
                 _buffers.Add(CreateSlicedBuffer<byte>(array.ValueBuffer, 
valuesOffset, valuesLength));
             }
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs 
b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs
index baea4d61e5..297cb5e181 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs
@@ -193,5 +193,57 @@ namespace Apache.Arrow.Tests
             Assert.Null(readBatch);
             SchemaComparer.Compare(originalBatch.Schema, reader.Schema);
         }
+
+        [Fact]
+        public async Task WriteBinaryArrayWithEmptyOffsets()
+        {
+            // Empty binary arrays generated by the C# builder have a single 
offset,
+            // but some implementations may produce an empty offsets buffer.
+
+            var array = new BinaryArray(
+                new BinaryType(),
+                length: 0,
+                valueOffsetsBuffer: ArrowBuffer.Empty,
+                dataBuffer: ArrowBuffer.Empty,
+                nullBitmapBuffer: ArrowBuffer.Empty,
+                nullCount: 0);
+
+            var recordBatch = new RecordBatch.Builder().Append("x", true, 
array).Build();
+
+            var stream = new MemoryStream();
+            var writer = new ArrowFileWriter(stream, recordBatch.Schema, 
leaveOpen: true);
+
+            await writer.WriteRecordBatchAsync(recordBatch);
+            await writer.WriteEndAsync();
+
+            stream.Position = 0;
+
+            await ValidateRecordBatchFile(stream, recordBatch, strictCompare: 
false);
+        }
+
+        [Fact]
+        public async Task WriteListArrayWithEmptyOffsets()
+        {
+            var values = new Int32Array.Builder().Build();
+            var array = new ListArray(
+                new ListType(new Int32Type()),
+                length: 0,
+                valueOffsetsBuffer: ArrowBuffer.Empty,
+                values: values,
+                nullBitmapBuffer: ArrowBuffer.Empty,
+                nullCount: 0);
+
+            var recordBatch = new RecordBatch.Builder().Append("x", true, 
array).Build();
+
+            var stream = new MemoryStream();
+            var writer = new ArrowFileWriter(stream, recordBatch.Schema, 
leaveOpen: true);
+
+            await writer.WriteRecordBatchAsync(recordBatch);
+            await writer.WriteEndAsync();
+
+            stream.Position = 0;
+
+            await ValidateRecordBatchFile(stream, recordBatch, strictCompare: 
false);
+        }
     }
 }

Reply via email to