This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new bfc0dcbe1b GH-41302: [C#][Integration] Fix writing list and binary
arrays with zero length offsets to IPC format (#41303)
bfc0dcbe1b is described below
commit bfc0dcbe1bafdd86bb0156c3ee4fda9ffab83d0c
Author: Adam Reeve <[email protected]>
AuthorDate: Fri Apr 19 14:41:56 2024 +1200
GH-41302: [C#][Integration] Fix writing list and binary arrays with zero
length offsets to IPC format (#41303)
### Rationale for this change
Fixes the integration test failures caused by #41230
### What changes are included in this PR?
Only try to access the offset values if the array length is non-zero when
writing list and binary arrays to IPC format.
### Are these changes tested?
Yes, I've manually run the integration tests with C# and Java to verify
they pass (when also including the changes from #41264), and also added new
unit tests for this.
### Are there any user-facing changes?
This may also be a bug that affects users but it isn't in a released
version.
* GitHub Issue: #41302
Authored-by: Adam Reeve <[email protected]>
Signed-off-by: Curt Hagenlocher <[email protected]>
---
csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 18 ++++++--
.../Apache.Arrow.Tests/ArrowFileWriterTests.cs | 52 ++++++++++++++++++++++
2 files changed, 66 insertions(+), 4 deletions(-)
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 1b83735925..a7e4c13525 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -165,8 +165,13 @@ namespace Apache.Arrow.Ipc
_buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer,
array.Offset, array.Length));
_buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer,
array.Offset, array.Length)));
- int valuesOffset = array.ValueOffsets[0];
- int valuesLength = array.ValueOffsets[array.Length] -
valuesOffset;
+ int valuesOffset = 0;
+ int valuesLength = 0;
+ if (array.Length > 0)
+ {
+ valuesOffset = array.ValueOffsets[0];
+ valuesLength = array.ValueOffsets[array.Length] -
valuesOffset;
+ }
var values = array.Values;
if (valuesOffset > 0 || valuesLength < values.Length)
@@ -206,8 +211,13 @@ namespace Apache.Arrow.Ipc
_buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer,
array.Offset, array.Length));
_buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer,
array.Offset, array.Length)));
- int valuesOffset = array.ValueOffsets[0];
- int valuesLength = array.ValueOffsets[array.Length] -
valuesOffset;
+ int valuesOffset = 0;
+ int valuesLength = 0;
+ if (array.Length > 0)
+ {
+ valuesOffset = array.ValueOffsets[0];
+ valuesLength = array.ValueOffsets[array.Length] -
valuesOffset;
+ }
_buffers.Add(CreateSlicedBuffer<byte>(array.ValueBuffer,
valuesOffset, valuesLength));
}
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs
b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs
index baea4d61e5..297cb5e181 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs
@@ -193,5 +193,57 @@ namespace Apache.Arrow.Tests
Assert.Null(readBatch);
SchemaComparer.Compare(originalBatch.Schema, reader.Schema);
}
+
+ [Fact]
+ public async Task WriteBinaryArrayWithEmptyOffsets()
+ {
+ // Empty binary arrays generated by the C# builder have a single
offset,
+ // but some implementations may produce an empty offsets buffer.
+
+ var array = new BinaryArray(
+ new BinaryType(),
+ length: 0,
+ valueOffsetsBuffer: ArrowBuffer.Empty,
+ dataBuffer: ArrowBuffer.Empty,
+ nullBitmapBuffer: ArrowBuffer.Empty,
+ nullCount: 0);
+
+ var recordBatch = new RecordBatch.Builder().Append("x", true,
array).Build();
+
+ var stream = new MemoryStream();
+ var writer = new ArrowFileWriter(stream, recordBatch.Schema,
leaveOpen: true);
+
+ await writer.WriteRecordBatchAsync(recordBatch);
+ await writer.WriteEndAsync();
+
+ stream.Position = 0;
+
+ await ValidateRecordBatchFile(stream, recordBatch, strictCompare:
false);
+ }
+
+ [Fact]
+ public async Task WriteListArrayWithEmptyOffsets()
+ {
+ var values = new Int32Array.Builder().Build();
+ var array = new ListArray(
+ new ListType(new Int32Type()),
+ length: 0,
+ valueOffsetsBuffer: ArrowBuffer.Empty,
+ values: values,
+ nullBitmapBuffer: ArrowBuffer.Empty,
+ nullCount: 0);
+
+ var recordBatch = new RecordBatch.Builder().Append("x", true,
array).Build();
+
+ var stream = new MemoryStream();
+ var writer = new ArrowFileWriter(stream, recordBatch.Schema,
leaveOpen: true);
+
+ await writer.WriteRecordBatchAsync(recordBatch);
+ await writer.WriteEndAsync();
+
+ stream.Position = 0;
+
+ await ValidateRecordBatchFile(stream, recordBatch, strictCompare:
false);
+ }
}
}