This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 80700fd7 fix(arrow/array): handle empty binary values correctly in
BinaryBuilder (#634)
80700fd7 is described below
commit 80700fd71b80283b09710c568d188287b566da08
Author: Matt Topol <[email protected]>
AuthorDate: Wed Jan 14 19:16:56 2026 -0500
fix(arrow/array): handle empty binary values correctly in BinaryBuilder
(#634)
### Rationale for this change
Fixes #625
### What changes are included in this PR?
When all values appended to a BinaryBuilder are empty (zero-length byte
slices), they were incorrectly being treated as NULL values instead of
valid empty slices. This occurred because the underlying buffer builder
returned a buffer created from nil bytes, which caused slicing
operations to return nil.
The fix ensures that when no data buffer is allocated (length = 0), we
create a buffer with an empty slice []byte{} instead of nil, allowing
proper distinction between NULL and empty values.
### Are these changes tested?
Yes a test is added.
### Are there any user-facing changes?
Just fixing the behavior.
---
arrow/array/binarybuilder_test.go | 51 +++++++++++++++++++++++++++++++++++++
arrow/array/bufferbuilder.go | 4 ++-
arrow/util/protobuf_reflect_test.go | 2 +-
3 files changed, 55 insertions(+), 2 deletions(-)
diff --git a/arrow/array/binarybuilder_test.go
b/arrow/array/binarybuilder_test.go
index 3cc88324..4992d00d 100644
--- a/arrow/array/binarybuilder_test.go
+++ b/arrow/array/binarybuilder_test.go
@@ -149,3 +149,54 @@ func TestBinaryBuilderLarge_ReserveData(t *testing.T) {
assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray
did not reset state")
assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(),
NewBinaryArray did not reset state")
}
+
+// TestBinaryBuilder_AllEmptyValues reproduces issue #625
+// When all values in a binary column are empty, they should be treated as
+// empty byte slices, not as NULL values
+func TestBinaryBuilder_AllEmptyValues(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer ab.Release()
+
+ // Case 1: Empty + Data - this works correctly
+ ab.Append([]byte(""))
+ ab.Append([]byte(""))
+ ab.Append([]byte("SGVsbG8sIFdvcmxkIQ=="))
+
+ ar := ab.NewBinaryArray()
+ defer ar.Release()
+
+ assert.Equal(t, 3, ar.Len())
+ assert.Equal(t, 0, ar.NullN())
+
+ // First two should be empty slices, not nil
+ for i := 0; i < 2; i++ {
+ assert.False(t, ar.IsNull(i), "value %d should not be null", i)
+ assert.NotNil(t, ar.Value(i), "value %d should not be nil", i)
+ assert.Equal(t, 0, len(ar.Value(i)), "value %d should be empty
slice", i)
+ }
+ assert.Equal(t, []byte("SGVsbG8sIFdvcmxkIQ=="), ar.Value(2))
+
+ // Case 2: All Empty - this is the failing case
+ ab2 := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer ab2.Release()
+
+ ab2.Append([]byte(""))
+ ab2.Append([]byte(""))
+ ab2.Append([]byte(""))
+
+ ar2 := ab2.NewBinaryArray()
+ defer ar2.Release()
+
+ assert.Equal(t, 3, ar2.Len())
+ assert.Equal(t, 0, ar2.NullN(), "no values should be null")
+
+ // All three should be empty slices, not nil
+ for i := 0; i < 3; i++ {
+ assert.False(t, ar2.IsNull(i), "value %d should not be null", i)
+ assert.NotNil(t, ar2.Value(i), "value %d should not be nil", i)
+ assert.Equal(t, 0, len(ar2.Value(i)), "value %d should be empty
slice", i)
+ }
+}
diff --git a/arrow/array/bufferbuilder.go b/arrow/array/bufferbuilder.go
index bc784d6a..73183473 100644
--- a/arrow/array/bufferbuilder.go
+++ b/arrow/array/bufferbuilder.go
@@ -144,7 +144,9 @@ func (b *bufferBuilder) Finish() (buffer *memory.Buffer) {
b.buffer = nil
b.Reset()
if buffer == nil {
- buffer = memory.NewBufferBytes(nil)
+ // Use an empty slice instead of nil to ensure slicing returns
an empty slice
+ // This fixes issue #625 where all empty values were
incorrectly treated as NULL
+ buffer = memory.NewBufferBytes([]byte{})
}
return
}
diff --git a/arrow/util/protobuf_reflect_test.go
b/arrow/util/protobuf_reflect_test.go
index 34957b82..38297bb2 100644
--- a/arrow/util/protobuf_reflect_test.go
+++ b/arrow/util/protobuf_reflect_test.go
@@ -366,7 +366,7 @@ func TestNullRecordFromProtobuf(t *testing.T) {
"fixed64":0,
"sfixed32":0,
"bool":false,
- "bytes":null,
+ "bytes":"",
"double":0,
"enum":"OPTION_0",
"message":null,