This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 80700fd7 fix(arrow/array): handle empty binary values correctly in 
BinaryBuilder (#634)
80700fd7 is described below

commit 80700fd71b80283b09710c568d188287b566da08
Author: Matt Topol <[email protected]>
AuthorDate: Wed Jan 14 19:16:56 2026 -0500

    fix(arrow/array): handle empty binary values correctly in BinaryBuilder 
(#634)
    
    ### Rationale for this change
    Fixes #625
    
    ### What changes are included in this PR?
    When all values appended to a BinaryBuilder are empty (zero-length byte
    slices), they were incorrectly being treated as NULL values instead of
    valid empty slices. This occurred because the underlying buffer builder
    returned a buffer created from nil bytes, which caused slicing
    operations to return nil.
    
    The fix ensures that when no data buffer is allocated (length = 0), we
    create a buffer with an empty slice []byte{} instead of nil, allowing
    proper distinction between NULL and empty values.
    
    ### Are these changes tested?
    Yes a test is added.
    
    ### Are there any user-facing changes?
    Just fixing the behavior.
---
 arrow/array/binarybuilder_test.go   | 51 +++++++++++++++++++++++++++++++++++++
 arrow/array/bufferbuilder.go        |  4 ++-
 arrow/util/protobuf_reflect_test.go |  2 +-
 3 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/arrow/array/binarybuilder_test.go 
b/arrow/array/binarybuilder_test.go
index 3cc88324..4992d00d 100644
--- a/arrow/array/binarybuilder_test.go
+++ b/arrow/array/binarybuilder_test.go
@@ -149,3 +149,54 @@ func TestBinaryBuilderLarge_ReserveData(t *testing.T) {
        assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray 
did not reset state")
        assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), 
NewBinaryArray did not reset state")
 }
+
+// TestBinaryBuilder_AllEmptyValues reproduces issue #625
+// When all values in a binary column are empty, they should be treated as
+// empty byte slices, not as NULL values
+func TestBinaryBuilder_AllEmptyValues(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+
+       ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+       defer ab.Release()
+
+       // Case 1: Empty + Data - this works correctly
+       ab.Append([]byte(""))
+       ab.Append([]byte(""))
+       ab.Append([]byte("SGVsbG8sIFdvcmxkIQ=="))
+
+       ar := ab.NewBinaryArray()
+       defer ar.Release()
+
+       assert.Equal(t, 3, ar.Len())
+       assert.Equal(t, 0, ar.NullN())
+
+       // First two should be empty slices, not nil
+       for i := 0; i < 2; i++ {
+               assert.False(t, ar.IsNull(i), "value %d should not be null", i)
+               assert.NotNil(t, ar.Value(i), "value %d should not be nil", i)
+               assert.Equal(t, 0, len(ar.Value(i)), "value %d should be empty 
slice", i)
+       }
+       assert.Equal(t, []byte("SGVsbG8sIFdvcmxkIQ=="), ar.Value(2))
+
+       // Case 2: All Empty - this is the failing case
+       ab2 := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+       defer ab2.Release()
+
+       ab2.Append([]byte(""))
+       ab2.Append([]byte(""))
+       ab2.Append([]byte(""))
+
+       ar2 := ab2.NewBinaryArray()
+       defer ar2.Release()
+
+       assert.Equal(t, 3, ar2.Len())
+       assert.Equal(t, 0, ar2.NullN(), "no values should be null")
+
+       // All three should be empty slices, not nil
+       for i := 0; i < 3; i++ {
+               assert.False(t, ar2.IsNull(i), "value %d should not be null", i)
+               assert.NotNil(t, ar2.Value(i), "value %d should not be nil", i)
+               assert.Equal(t, 0, len(ar2.Value(i)), "value %d should be empty 
slice", i)
+       }
+}
diff --git a/arrow/array/bufferbuilder.go b/arrow/array/bufferbuilder.go
index bc784d6a..73183473 100644
--- a/arrow/array/bufferbuilder.go
+++ b/arrow/array/bufferbuilder.go
@@ -144,7 +144,9 @@ func (b *bufferBuilder) Finish() (buffer *memory.Buffer) {
        b.buffer = nil
        b.Reset()
        if buffer == nil {
-               buffer = memory.NewBufferBytes(nil)
+               // Use an empty slice instead of nil to ensure slicing returns 
an empty slice
+               // This fixes issue #625 where all empty values were 
incorrectly treated as NULL
+               buffer = memory.NewBufferBytes([]byte{})
        }
        return
 }
diff --git a/arrow/util/protobuf_reflect_test.go 
b/arrow/util/protobuf_reflect_test.go
index 34957b82..38297bb2 100644
--- a/arrow/util/protobuf_reflect_test.go
+++ b/arrow/util/protobuf_reflect_test.go
@@ -366,7 +366,7 @@ func TestNullRecordFromProtobuf(t *testing.T) {
                "fixed64":0,
                "sfixed32":0,
                "bool":false,
-               "bytes":null,
+               "bytes":"",
                "double":0,
                "enum":"OPTION_0",
                "message":null,

Reply via email to