This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9841dc8 ARROW-14400: [Go] Equals and ApproxEquals for Tables and
Chunked Arrays
9841dc8 is described below
commit 9841dc864c62115d68706750b86ced5e142804f6
Author: Matthew Topol <[email protected]>
AuthorDate: Wed Oct 20 15:07:27 2021 -0400
ARROW-14400: [Go] Equals and ApproxEquals for Tables and Chunked Arrays
Closes #11488 from zeroshade/extra-comparisons
Authored-by: Matthew Topol <[email protected]>
Signed-off-by: Matthew Topol <[email protected]>
---
go/arrow/array/compare.go | 140 +++++++++++++++++++++++++++++++++++++++++
go/arrow/array/compare_test.go | 81 ++++++++++++++++++++++++
2 files changed, 221 insertions(+)
diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index c4ee046..89c81ef 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -65,6 +65,136 @@ func RecordApproxEqual(left, right Record, opts
...EqualOption) bool {
return true
}
+// helper function to evaluate a function on two chunked object having
possibly different
+// chunk layouts. the function passed in will be called for each corresponding
slice of the
+// two chunked arrays and if the function returns false it will end the loop
early.
+func chunkedBinaryApply(left, right *Chunked, fn func(left Interface, lbeg,
lend int64, right Interface, rbeg, rend int64) bool) {
+ var (
+ pos int64
+ length int64 = int64(left.length)
+ leftIdx, rightIdx int
+ leftPos, rightPos int64
+ )
+
+ for pos < length {
+ var cleft, cright Interface
+ for {
+ cleft, cright = left.Chunk(leftIdx),
right.Chunk(rightIdx)
+ if leftPos == int64(cleft.Len()) {
+ leftPos = 0
+ leftIdx++
+ continue
+ }
+ if rightPos == int64(cright.Len()) {
+ rightPos = 0
+ rightIdx++
+ continue
+ }
+ break
+ }
+
+ sz := int64(min(cleft.Len()-int(leftPos),
cright.Len()-int(rightPos)))
+ pos += sz
+ if !fn(cleft, leftPos, leftPos+sz, cright, rightPos,
rightPos+sz) {
+ return
+ }
+
+ leftPos += sz
+ rightPos += sz
+ }
+}
+
+// ChunkedEqual reports whether two chunked arrays are equal regardless of
their chunkings
+func ChunkedEqual(left, right *Chunked) bool {
+ switch {
+ case left == right:
+ return true
+ case left.length != right.length:
+ return false
+ case left.nulls != right.nulls:
+ return false
+ case !arrow.TypeEqual(left.dtype, right.dtype):
+ return false
+ }
+
+ var isequal bool
+ chunkedBinaryApply(left, right, func(left Interface, lbeg, lend int64,
right Interface, rbeg, rend int64) bool {
+ isequal = ArraySliceEqual(left, lbeg, lend, right, rbeg, rend)
+ return isequal
+ })
+
+ return isequal
+}
+
+// ChunkedApproxEqual reports whether two chunked arrays are approximately
equal regardless of their chunkings
+// for non-floating point arrays, this is equivalent to ChunkedEqual
+func ChunkedApproxEqual(left, right *Chunked, opts ...EqualOption) bool {
+ switch {
+ case left == right:
+ return true
+ case left.length != right.length:
+ return false
+ case left.nulls != right.nulls:
+ return false
+ case !arrow.TypeEqual(left.dtype, right.dtype):
+ return false
+ }
+
+ var isequal bool
+ chunkedBinaryApply(left, right, func(left Interface, lbeg, lend int64,
right Interface, rbeg, rend int64) bool {
+ isequal = ArraySliceApproxEqual(left, lbeg, lend, right, rbeg,
rend, opts...)
+ return isequal
+ })
+
+ return isequal
+}
+
+// TableEqual returns if the two tables have the same data in the same schema
+func TableEqual(left, right Table) bool {
+ switch {
+ case left.NumCols() != right.NumCols():
+ return false
+ case left.NumRows() != right.NumRows():
+ return false
+ }
+
+ for i := 0; int64(i) < left.NumCols(); i++ {
+ lc := left.Column(i)
+ rc := right.Column(i)
+ if !lc.field.Equal(rc.field) {
+ return false
+ }
+
+ if !ChunkedEqual(lc.data, rc.data) {
+ return false
+ }
+ }
+ return true
+}
+
+// TableEqual returns if the two tables have the approximately equal data in
the same schema
+func TableApproxEqual(left, right Table, opts ...EqualOption) bool {
+ switch {
+ case left.NumCols() != right.NumCols():
+ return false
+ case left.NumRows() != right.NumRows():
+ return false
+ }
+
+ for i := 0; int64(i) < left.NumCols(); i++ {
+ lc := left.Column(i)
+ rc := right.Column(i)
+ if !lc.field.Equal(rc.field) {
+ return false
+ }
+
+ if !ChunkedApproxEqual(lc.data, rc.data, opts...) {
+ return false
+ }
+ }
+ return true
+}
+
// ArrayEqual reports whether the two provided arrays are equal.
func ArrayEqual(left, right Interface) bool {
switch {
@@ -188,6 +318,16 @@ func ArraySliceEqual(left Interface, lbeg, lend int64,
right Interface, rbeg, re
return ArrayEqual(l, r)
}
+// ArraySliceApproxEqual reports whether slices left[lbeg:lend] and
right[rbeg:rend] are approximately equal.
+func ArraySliceApproxEqual(left Interface, lbeg, lend int64, right Interface,
rbeg, rend int64, opts ...EqualOption) bool {
+ l := NewSlice(left, lbeg, lend)
+ defer l.Release()
+ r := NewSlice(right, rbeg, rend)
+ defer r.Release()
+
+ return ArrayApproxEqual(l, r, opts...)
+}
+
const defaultAbsoluteTolerance = 1e-5
type equalOption struct {
diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go
index 3ed326b..4006087 100644
--- a/go/arrow/array/compare_test.go
+++ b/go/arrow/array/compare_test.go
@@ -21,10 +21,12 @@ import (
"math"
"testing"
+ "github.com/apache/arrow/go/arrow"
"github.com/apache/arrow/go/arrow/array"
"github.com/apache/arrow/go/arrow/float16"
"github.com/apache/arrow/go/arrow/internal/arrdata"
"github.com/apache/arrow/go/arrow/memory"
+ "github.com/stretchr/testify/assert"
)
func TestArrayEqual(t *testing.T) {
@@ -529,3 +531,82 @@ func TestRecordApproxEqual(t *testing.T) {
})
}
}
+
+func TestChunkedEqual(t *testing.T) {
+ for name, recs := range arrdata.Records {
+ t.Run(name, func(t *testing.T) {
+ tbl := array.NewTableFromRecords(recs[0].Schema(), recs)
+ defer tbl.Release()
+
+ for i := 0; i < int(tbl.NumCols()); i++ {
+ if !array.ChunkedEqual(tbl.Column(i).Data(),
tbl.Column(i).Data()) && name != "nulls" {
+ t.Fatalf("identical chunked arrays
should compare as equal:\narr:%v\n", tbl.Column(i).Data())
+ }
+ }
+ })
+ }
+}
+
+func TestChunkedApproxEqual(t *testing.T) {
+ fb := array.NewFloat64Builder(memory.DefaultAllocator)
+ defer fb.Release()
+
+ fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil)
+ f1 := fb.NewFloat64Array()
+ defer f1.Release()
+
+ fb.AppendValues([]float64{6, 7}, nil)
+ f2 := fb.NewFloat64Array()
+ defer f2.Release()
+
+ fb.AppendValues([]float64{8, 9, 10}, nil)
+ f3 := fb.NewFloat64Array()
+ defer f3.Release()
+
+ c1 := array.NewChunked(
+ arrow.PrimitiveTypes.Float64,
+ []array.Interface{f1, f2, f3},
+ )
+ defer c1.Release()
+
+ fb.AppendValues([]float64{1, 2, 3}, nil)
+ f4 := fb.NewFloat64Array()
+ defer f4.Release()
+
+ fb.AppendValues([]float64{4, 5}, nil)
+ f5 := fb.NewFloat64Array()
+ defer f5.Release()
+
+ fb.AppendValues([]float64{6, 7, 8, 9}, nil)
+ f6 := fb.NewFloat64Array()
+ defer f6.Release()
+
+ fb.AppendValues([]float64{10}, nil)
+ f7 := fb.NewFloat64Array()
+ defer f7.Release()
+
+ c2 := array.NewChunked(
+ arrow.PrimitiveTypes.Float64,
+ []array.Interface{f4, f5, f6, f7},
+ )
+ defer c2.Release()
+
+ assert.True(t, array.ChunkedEqual(c1, c2))
+ assert.True(t, array.ChunkedApproxEqual(c1, c2))
+}
+
+func TestTableEqual(t *testing.T) {
+ for name, recs := range arrdata.Records {
+ t.Run(name, func(t *testing.T) {
+ tbl := array.NewTableFromRecords(recs[0].Schema(), recs)
+ defer tbl.Release()
+
+ if !array.TableEqual(tbl, tbl) {
+ t.Fatalf("identical tables should compare as
equal:\tbl:%v\n", tbl)
+ }
+ if !array.TableApproxEqual(tbl, tbl) {
+ t.Fatalf("identical tables should compare as
approx equal:\tbl:%v\n", tbl)
+ }
+ })
+ }
+}