This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 6843412 GH-41: [Array] Make String and Binary consistent (#165)
6843412 is described below
commit 6843412ef8e76cd51fd76a13d719b4426cac69e8
Author: Matt Topol <[email protected]>
AuthorDate: Tue Oct 22 10:31:44 2024 -0400
GH-41: [Array] Make String and Binary consistent (#165)
Fixes #41
Makes the behavior of `ValueOffset` consistent between String and Binary
arrays.
---
arrow/array/string.go | 2 +-
arrow/array/string_test.go | 6 ------
arrow/ipc/writer.go | 8 ++++++--
parquet/metadata/statistics_types.gen.go | 4 ++--
4 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/arrow/array/string.go b/arrow/array/string.go
index d3de86b..5197e77 100644
--- a/arrow/array/string.go
+++ b/arrow/array/string.go
@@ -69,7 +69,7 @@ func (a *String) ValueStr(i int) string {
// ValueOffset returns the offset of the value at index i.
func (a *String) ValueOffset(i int) int {
- if i < 0 || i > a.array.data.length {
+ if i < 0 || i >= a.array.data.length {
panic("arrow/array: index out of range")
}
return int(a.offsets[i+a.array.data.offset])
diff --git a/arrow/array/string_test.go b/arrow/array/string_test.go
index fa8d6c2..80c51f1 100644
--- a/arrow/array/string_test.go
+++ b/arrow/array/string_test.go
@@ -90,9 +90,6 @@ func TestStringArray(t *testing.T) {
if got, want := arr.ValueOffset(i), int(offsets[i]); got !=
want {
t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got,
want)
}
- if got, want := arr.ValueOffset(i+1), int(offsets[i+1]); got !=
want {
- t.Fatalf("arr-offset-end[%d]: got=%d, want=%d", i+1,
got, want)
- }
}
if !reflect.DeepEqual(offsets, arr.ValueOffsets()) {
@@ -372,9 +369,6 @@ func TestLargeStringArray(t *testing.T) {
if got, want := arr.ValueOffset(i), offsets[i]; got != want {
t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got,
want)
}
- if got, want := arr.ValueOffset(i+1), offsets[i+1]; got != want
{
- t.Fatalf("arr-offset-end[%d]: got=%d, want=%d", i+1,
got, want)
- }
}
if !reflect.DeepEqual(offsets, arr.ValueOffsets()) {
diff --git a/arrow/ipc/writer.go b/arrow/ipc/writer.go
index 0919494..7ff4267 100644
--- a/arrow/ipc/writer.go
+++ b/arrow/ipc/writer.go
@@ -627,9 +627,13 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array)
error {
case needTruncate(int64(data.Offset()), values, totalDataBytes):
// slice data buffer to include the range we need now.
var (
- beg = arr.ValueOffset64(0)
- len = minI64(paddedLength(totalDataBytes,
kArrowAlignment), int64(totalDataBytes))
+ beg int64 = 0
+ len = minI64(paddedLength(totalDataBytes,
kArrowAlignment), int64(totalDataBytes))
)
+ if arr.Len() > 0 {
+ beg = arr.ValueOffset64(0)
+ }
+
values =
memory.NewBufferBytes(data.Buffers()[2].Bytes()[beg : beg+len])
default:
if values != nil {
diff --git a/parquet/metadata/statistics_types.gen.go
b/parquet/metadata/statistics_types.gen.go
index 6e806cc..3c9aa36 100644
--- a/parquet/metadata/statistics_types.gen.go
+++ b/parquet/metadata/statistics_types.gen.go
@@ -1980,12 +1980,12 @@ func (s *ByteArrayStatistics) UpdateFromArrow(values
arrow.Array, updateCounts b
min = s.defaultMin()
max = s.defaultMax()
arr = values.(array.BinaryLike)
- data = arr.ValueBytes()
+ data = arr.ValueBytes()
curOffset = int64(0)
)
for i := 0; i < arr.Len(); i++ {
- nextOffset := arr.ValueOffset64(i + 1)
+ nextOffset := curOffset + int64(arr.ValueLen(i))
v := data[curOffset:nextOffset]
curOffset = nextOffset