This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 6843412  GH-41: [Array] Make String and Binary consistent (#165)
6843412 is described below

commit 6843412ef8e76cd51fd76a13d719b4426cac69e8
Author: Matt Topol <[email protected]>
AuthorDate: Tue Oct 22 10:31:44 2024 -0400

    GH-41: [Array] Make String and Binary consistent (#165)
    
    Fixes #41
    
    Makes the behavior of `ValueOffset` consistent between String and Binary
    arrays.
---
 arrow/array/string.go                    | 2 +-
 arrow/array/string_test.go               | 6 ------
 arrow/ipc/writer.go                      | 8 ++++++--
 parquet/metadata/statistics_types.gen.go | 4 ++--
 4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arrow/array/string.go b/arrow/array/string.go
index d3de86b..5197e77 100644
--- a/arrow/array/string.go
+++ b/arrow/array/string.go
@@ -69,7 +69,7 @@ func (a *String) ValueStr(i int) string {
 
 // ValueOffset returns the offset of the value at index i.
 func (a *String) ValueOffset(i int) int {
-       if i < 0 || i > a.array.data.length {
+       if i < 0 || i >= a.array.data.length {
                panic("arrow/array: index out of range")
        }
        return int(a.offsets[i+a.array.data.offset])
diff --git a/arrow/array/string_test.go b/arrow/array/string_test.go
index fa8d6c2..80c51f1 100644
--- a/arrow/array/string_test.go
+++ b/arrow/array/string_test.go
@@ -90,9 +90,6 @@ func TestStringArray(t *testing.T) {
                if got, want := arr.ValueOffset(i), int(offsets[i]); got != 
want {
                        t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got, 
want)
                }
-               if got, want := arr.ValueOffset(i+1), int(offsets[i+1]); got != 
want {
-                       t.Fatalf("arr-offset-end[%d]: got=%d, want=%d", i+1, 
got, want)
-               }
        }
 
        if !reflect.DeepEqual(offsets, arr.ValueOffsets()) {
@@ -372,9 +369,6 @@ func TestLargeStringArray(t *testing.T) {
                if got, want := arr.ValueOffset(i), offsets[i]; got != want {
                        t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got, 
want)
                }
-               if got, want := arr.ValueOffset(i+1), offsets[i+1]; got != want 
{
-                       t.Fatalf("arr-offset-end[%d]: got=%d, want=%d", i+1, 
got, want)
-               }
        }
 
        if !reflect.DeepEqual(offsets, arr.ValueOffsets()) {
diff --git a/arrow/ipc/writer.go b/arrow/ipc/writer.go
index 0919494..7ff4267 100644
--- a/arrow/ipc/writer.go
+++ b/arrow/ipc/writer.go
@@ -627,9 +627,13 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) 
error {
                case needTruncate(int64(data.Offset()), values, totalDataBytes):
                        // slice data buffer to include the range we need now.
                        var (
-                               beg = arr.ValueOffset64(0)
-                               len = minI64(paddedLength(totalDataBytes, 
kArrowAlignment), int64(totalDataBytes))
+                               beg int64 = 0
+                               len       = minI64(paddedLength(totalDataBytes, 
kArrowAlignment), int64(totalDataBytes))
                        )
+                       if arr.Len() > 0 {
+                               beg = arr.ValueOffset64(0)
+                       }
+
                        values = 
memory.NewBufferBytes(data.Buffers()[2].Bytes()[beg : beg+len])
                default:
                        if values != nil {
diff --git a/parquet/metadata/statistics_types.gen.go 
b/parquet/metadata/statistics_types.gen.go
index 6e806cc..3c9aa36 100644
--- a/parquet/metadata/statistics_types.gen.go
+++ b/parquet/metadata/statistics_types.gen.go
@@ -1980,12 +1980,12 @@ func (s *ByteArrayStatistics) UpdateFromArrow(values 
arrow.Array, updateCounts b
                min       = s.defaultMin()
                max       = s.defaultMax()
                arr       = values.(array.BinaryLike)
-               data      = arr.ValueBytes()
+               data      = arr.ValueBytes()            
                curOffset = int64(0)
        )
 
        for i := 0; i < arr.Len(); i++ {
-               nextOffset := arr.ValueOffset64(i + 1)
+               nextOffset := curOffset + int64(arr.ValueLen(i))
                v := data[curOffset:nextOffset]
                curOffset = nextOffset
 

Reply via email to