This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9841dc8  ARROW-14400: [Go] Equals and ApproxEquals for Tables and 
Chunked Arrays
9841dc8 is described below

commit 9841dc864c62115d68706750b86ced5e142804f6
Author: Matthew Topol <[email protected]>
AuthorDate: Wed Oct 20 15:07:27 2021 -0400

    ARROW-14400: [Go] Equals and ApproxEquals for Tables and Chunked Arrays
    
    Closes #11488 from zeroshade/extra-comparisons
    
    Authored-by: Matthew Topol <[email protected]>
    Signed-off-by: Matthew Topol <[email protected]>
---
 go/arrow/array/compare.go      | 140 +++++++++++++++++++++++++++++++++++++++++
 go/arrow/array/compare_test.go |  81 ++++++++++++++++++++++++
 2 files changed, 221 insertions(+)

diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index c4ee046..89c81ef 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -65,6 +65,136 @@ func RecordApproxEqual(left, right Record, opts 
...EqualOption) bool {
        return true
 }
 
+// helper function to evaluate a function on two chunked object having 
possibly different
+// chunk layouts. the function passed in will be called for each corresponding 
slice of the
+// two chunked arrays and if the function returns false it will end the loop 
early.
+func chunkedBinaryApply(left, right *Chunked, fn func(left Interface, lbeg, 
lend int64, right Interface, rbeg, rend int64) bool) {
+       var (
+               pos               int64
+               length            int64 = int64(left.length)
+               leftIdx, rightIdx int
+               leftPos, rightPos int64
+       )
+
+       for pos < length {
+               var cleft, cright Interface
+               for {
+                       cleft, cright = left.Chunk(leftIdx), 
right.Chunk(rightIdx)
+                       if leftPos == int64(cleft.Len()) {
+                               leftPos = 0
+                               leftIdx++
+                               continue
+                       }
+                       if rightPos == int64(cright.Len()) {
+                               rightPos = 0
+                               rightIdx++
+                               continue
+                       }
+                       break
+               }
+
+               sz := int64(min(cleft.Len()-int(leftPos), 
cright.Len()-int(rightPos)))
+               pos += sz
+               if !fn(cleft, leftPos, leftPos+sz, cright, rightPos, 
rightPos+sz) {
+                       return
+               }
+
+               leftPos += sz
+               rightPos += sz
+       }
+}
+
+// ChunkedEqual reports whether two chunked arrays are equal regardless of 
their chunkings
+func ChunkedEqual(left, right *Chunked) bool {
+       switch {
+       case left == right:
+               return true
+       case left.length != right.length:
+               return false
+       case left.nulls != right.nulls:
+               return false
+       case !arrow.TypeEqual(left.dtype, right.dtype):
+               return false
+       }
+
+       var isequal bool
+       chunkedBinaryApply(left, right, func(left Interface, lbeg, lend int64, 
right Interface, rbeg, rend int64) bool {
+               isequal = ArraySliceEqual(left, lbeg, lend, right, rbeg, rend)
+               return isequal
+       })
+
+       return isequal
+}
+
+// ChunkedApproxEqual reports whether two chunked arrays are approximately 
equal regardless of their chunkings
+// for non-floating point arrays, this is equivalent to ChunkedEqual
+func ChunkedApproxEqual(left, right *Chunked, opts ...EqualOption) bool {
+       switch {
+       case left == right:
+               return true
+       case left.length != right.length:
+               return false
+       case left.nulls != right.nulls:
+               return false
+       case !arrow.TypeEqual(left.dtype, right.dtype):
+               return false
+       }
+
+       var isequal bool
+       chunkedBinaryApply(left, right, func(left Interface, lbeg, lend int64, 
right Interface, rbeg, rend int64) bool {
+               isequal = ArraySliceApproxEqual(left, lbeg, lend, right, rbeg, 
rend, opts...)
+               return isequal
+       })
+
+       return isequal
+}
+
+// TableEqual returns if the two tables have the same data in the same schema
+func TableEqual(left, right Table) bool {
+       switch {
+       case left.NumCols() != right.NumCols():
+               return false
+       case left.NumRows() != right.NumRows():
+               return false
+       }
+
+       for i := 0; int64(i) < left.NumCols(); i++ {
+               lc := left.Column(i)
+               rc := right.Column(i)
+               if !lc.field.Equal(rc.field) {
+                       return false
+               }
+
+               if !ChunkedEqual(lc.data, rc.data) {
+                       return false
+               }
+       }
+       return true
+}
+
+// TableEqual returns if the two tables have the approximately equal data in 
the same schema
+func TableApproxEqual(left, right Table, opts ...EqualOption) bool {
+       switch {
+       case left.NumCols() != right.NumCols():
+               return false
+       case left.NumRows() != right.NumRows():
+               return false
+       }
+
+       for i := 0; int64(i) < left.NumCols(); i++ {
+               lc := left.Column(i)
+               rc := right.Column(i)
+               if !lc.field.Equal(rc.field) {
+                       return false
+               }
+
+               if !ChunkedApproxEqual(lc.data, rc.data, opts...) {
+                       return false
+               }
+       }
+       return true
+}
+
 // ArrayEqual reports whether the two provided arrays are equal.
 func ArrayEqual(left, right Interface) bool {
        switch {
@@ -188,6 +318,16 @@ func ArraySliceEqual(left Interface, lbeg, lend int64, 
right Interface, rbeg, re
        return ArrayEqual(l, r)
 }
 
+// ArraySliceApproxEqual reports whether slices left[lbeg:lend] and 
right[rbeg:rend] are approximately equal.
+func ArraySliceApproxEqual(left Interface, lbeg, lend int64, right Interface, 
rbeg, rend int64, opts ...EqualOption) bool {
+       l := NewSlice(left, lbeg, lend)
+       defer l.Release()
+       r := NewSlice(right, rbeg, rend)
+       defer r.Release()
+
+       return ArrayApproxEqual(l, r, opts...)
+}
+
 const defaultAbsoluteTolerance = 1e-5
 
 type equalOption struct {
diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go
index 3ed326b..4006087 100644
--- a/go/arrow/array/compare_test.go
+++ b/go/arrow/array/compare_test.go
@@ -21,10 +21,12 @@ import (
        "math"
        "testing"
 
+       "github.com/apache/arrow/go/arrow"
        "github.com/apache/arrow/go/arrow/array"
        "github.com/apache/arrow/go/arrow/float16"
        "github.com/apache/arrow/go/arrow/internal/arrdata"
        "github.com/apache/arrow/go/arrow/memory"
+       "github.com/stretchr/testify/assert"
 )
 
 func TestArrayEqual(t *testing.T) {
@@ -529,3 +531,82 @@ func TestRecordApproxEqual(t *testing.T) {
                })
        }
 }
+
+func TestChunkedEqual(t *testing.T) {
+       for name, recs := range arrdata.Records {
+               t.Run(name, func(t *testing.T) {
+                       tbl := array.NewTableFromRecords(recs[0].Schema(), recs)
+                       defer tbl.Release()
+
+                       for i := 0; i < int(tbl.NumCols()); i++ {
+                               if !array.ChunkedEqual(tbl.Column(i).Data(), 
tbl.Column(i).Data()) && name != "nulls" {
+                                       t.Fatalf("identical chunked arrays 
should compare as equal:\narr:%v\n", tbl.Column(i).Data())
+                               }
+                       }
+               })
+       }
+}
+
+func TestChunkedApproxEqual(t *testing.T) {
+       fb := array.NewFloat64Builder(memory.DefaultAllocator)
+       defer fb.Release()
+
+       fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil)
+       f1 := fb.NewFloat64Array()
+       defer f1.Release()
+
+       fb.AppendValues([]float64{6, 7}, nil)
+       f2 := fb.NewFloat64Array()
+       defer f2.Release()
+
+       fb.AppendValues([]float64{8, 9, 10}, nil)
+       f3 := fb.NewFloat64Array()
+       defer f3.Release()
+
+       c1 := array.NewChunked(
+               arrow.PrimitiveTypes.Float64,
+               []array.Interface{f1, f2, f3},
+       )
+       defer c1.Release()
+
+       fb.AppendValues([]float64{1, 2, 3}, nil)
+       f4 := fb.NewFloat64Array()
+       defer f4.Release()
+
+       fb.AppendValues([]float64{4, 5}, nil)
+       f5 := fb.NewFloat64Array()
+       defer f5.Release()
+
+       fb.AppendValues([]float64{6, 7, 8, 9}, nil)
+       f6 := fb.NewFloat64Array()
+       defer f6.Release()
+
+       fb.AppendValues([]float64{10}, nil)
+       f7 := fb.NewFloat64Array()
+       defer f7.Release()
+
+       c2 := array.NewChunked(
+               arrow.PrimitiveTypes.Float64,
+               []array.Interface{f4, f5, f6, f7},
+       )
+       defer c2.Release()
+
+       assert.True(t, array.ChunkedEqual(c1, c2))
+       assert.True(t, array.ChunkedApproxEqual(c1, c2))
+}
+
+func TestTableEqual(t *testing.T) {
+       for name, recs := range arrdata.Records {
+               t.Run(name, func(t *testing.T) {
+                       tbl := array.NewTableFromRecords(recs[0].Schema(), recs)
+                       defer tbl.Release()
+
+                       if !array.TableEqual(tbl, tbl) {
+                               t.Fatalf("identical tables should compare as 
equal:\tbl:%v\n", tbl)
+                       }
+                       if !array.TableApproxEqual(tbl, tbl) {
+                               t.Fatalf("identical tables should compare as 
approx equal:\tbl:%v\n", tbl)
+                       }
+               })
+       }
+}

Reply via email to