This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d9c2d59617 GH-35828: [Go] Add `array.WithUnorderedMapKeys` option for 
`array.ApproxEqual` (#35823)
d9c2d59617 is described below

commit d9c2d5961766d16fa0cae3b0ba9b2335ffd6bdb6
Author: Alex Shcherbakov <[email protected]>
AuthorDate: Wed May 31 20:12:19 2023 +0300

    GH-35828: [Go] Add `array.WithUnorderedMapKeys` option for 
`array.ApproxEqual` (#35823)
    
    ### Rationale for this change
    
    If we store the value of the `*array.Map` into Go map we can't expect to 
put the values back in the same order, as the map traversal order in Go is 
undefined.
    This PR extends `array.ApproxEqual` by allowing map keys to be in different 
order.
    
    ### What changes are included in this PR?
    
    New helper functions:
    * `array.arrayApproxEqualMap` that is now used instead of 
`array.arrayApproxEqualList` for map comparison
    * `array.arrayApproxEqualSingleMapEntry` that checks if the single map 
entry we have matches to the other one without having keys sorted the same way
    
    ### Are these changes tested?
    
    1. Newly added test `array.TestArrayApproxEqualMaps`
    2. https://github.com/cloudquery/cloudquery/pull/11078
    
    ### Are there any user-facing changes?
    
    New `array.WithUnorderedMapKeys` option for `array.ApproxEqual` that allows 
users to control if the entries order is important or not.
    
    * Closes: #35828
    
    Authored-by: candiduslynx <[email protected]>
    Signed-off-by: Matt Topol <[email protected]>
---
 go/arrow/array/compare.go      |  86 +++++++++++++++++++++++++++++-
 go/arrow/array/compare_test.go | 116 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 200 insertions(+), 2 deletions(-)

diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index a55a9d0c48..48b0df9c84 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -385,8 +385,9 @@ func sliceApproxEqual(left arrow.Array, lbeg, lend int64, 
right arrow.Array, rbe
 const defaultAbsoluteTolerance = 1e-5
 
 type equalOption struct {
-       atol   float64 // absolute tolerance
-       nansEq bool    // whether NaNs are considered equal.
+       atol             float64 // absolute tolerance
+       nansEq           bool    // whether NaNs are considered equal.
+       unorderedMapKeys bool    // whether maps are allowed to have different 
entries order
 }
 
 func (eq equalOption) f16(f1, f2 float16.Num) bool {
@@ -450,6 +451,13 @@ func WithAbsTolerance(atol float64) EqualOption {
        }
 }
 
+// WithUnorderedMapKeys configures the comparison functions so that Map with 
different entries order are considered equal.
+func WithUnorderedMapKeys(v bool) EqualOption {
+       return func(o *equalOption) {
+               o.unorderedMapKeys = v
+       }
+}
+
 // ArrayApproxEqual reports whether the two provided arrays are approximately 
equal.
 // For non-floating point arrays, it is equivalent to ArrayEqual.
 //
@@ -581,6 +589,9 @@ func arrayApproxEqual(left, right arrow.Array, opt 
equalOption) bool {
                return arrayEqualDuration(l, r)
        case *Map:
                r := right.(*Map)
+               if opt.unorderedMapKeys {
+                       return arrayApproxEqualMap(l, r, opt)
+               }
                return arrayApproxEqualList(l.List, r.List, opt)
        case *Dictionary:
                r := right.(*Dictionary)
@@ -732,3 +743,74 @@ func arrayApproxEqualStruct(left, right *Struct, opt 
equalOption) bool {
        }
        return true
 }
+
+// arrayApproxEqualMap doesn't care about the order of keys (in Go map 
traversal order is undefined)
+func arrayApproxEqualMap(left, right *Map, opt equalOption) bool {
+       for i := 0; i < left.Len(); i++ {
+               if left.IsNull(i) {
+                       continue
+               }
+               if 
!arrayApproxEqualSingleMapEntry(left.newListValue(i).(*Struct), 
right.newListValue(i).(*Struct), opt) {
+                       return false
+               }
+       }
+       return true
+}
+
+// arrayApproxEqualSingleMapEntry is a helper function that checks if a single 
entry pair is approx equal.
+// Basically, it doesn't care about key order.
+// structs passed will be released
+func arrayApproxEqualSingleMapEntry(left, right *Struct, opt equalOption) bool 
{
+       defer left.Release()
+       defer right.Release()
+
+       // we don't compare the validity bitmap, but we want other checks from 
baseArrayEqual
+       switch {
+       case left.Len() != right.Len():
+               return false
+       case left.NullN() != right.NullN():
+               return false
+       case !arrow.TypeEqual(left.DataType(), right.DataType()): // We do not 
check for metadata as in the C++ implementation.
+               return false
+       case left.NullN() == left.Len():
+               return true
+       }
+
+       used := make(map[int]bool, right.Len())
+       for i := 0; i < left.Len(); i++ {
+               if left.IsNull(i) {
+                       continue
+               }
+
+               found := false
+               lBeg, lEnd := int64(i), int64(i+1)
+               for j := 0; j < right.Len(); j++ {
+                       if used[j] {
+                               continue
+                       }
+                       if right.IsNull(j) {
+                               used[j] = true
+                               continue
+                       }
+
+                       rBeg, rEnd := int64(j), int64(j+1)
+
+                       // check keys (field 0)
+                       if !sliceApproxEqual(left.Field(0), lBeg, lEnd, 
right.Field(0), rBeg, rEnd, opt) {
+                               continue
+                       }
+
+                       // only now check the values
+                       if sliceApproxEqual(left.Field(1), lBeg, lEnd, 
right.Field(1), rBeg, rEnd, opt) {
+                               found = true
+                               used[j] = true
+                               break
+                       }
+               }
+               if !found {
+                       return false
+               }
+       }
+
+       return len(used) == right.Len()
+}
diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go
index 71109d0ac0..8bee75ffcc 100644
--- a/go/arrow/array/compare_test.go
+++ b/go/arrow/array/compare_test.go
@@ -19,6 +19,7 @@ package array_test
 import (
        "fmt"
        "math"
+       "sort"
        "testing"
 
        "github.com/apache/arrow/go/v13/arrow"
@@ -302,6 +303,121 @@ func TestArrayApproxEqualFloats(t *testing.T) {
        }
 }
 
+func testStringMap(mem memory.Allocator, m map[string]string, keys []string) 
*array.Map {
+       dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.String)
+       builder := array.NewMapBuilderWithType(mem, dt)
+       defer builder.Release()
+       key, item := builder.KeyBuilder().(*array.StringBuilder), 
builder.ItemBuilder().(*array.StringBuilder)
+
+       builder.AppendNull()
+       builder.Append(true)
+
+       for _, k := range keys {
+               key.Append(k)
+
+               v, ok := m[k]
+               if !ok {
+                       item.AppendNull()
+                       continue
+               }
+
+               item.Append(v)
+       }
+
+       return builder.NewMapArray()
+}
+
+func TestArrayApproxEqualMaps(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+
+       t.Run("different order", func(t *testing.T) {
+               m := map[string]string{"x": "x", "y": "y", "z": "z"}
+
+               keys := []string{"z", "y", "x", "null"}
+               a := testStringMap(mem, m, keys)
+               defer a.Release()
+
+               asc := make([]string, len(keys))
+               copy(asc, keys)
+               sort.Strings(asc)
+               assert.NotEqual(t, keys, asc)
+
+               b := testStringMap(mem, m, asc)
+               defer b.Release()
+
+               assert.False(t, array.ApproxEqual(a, b))
+               assert.True(t, array.ApproxEqual(a, b, 
array.WithUnorderedMapKeys(true)))
+       })
+
+       t.Run("extra left value", func(t *testing.T) {
+               m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": 
"extra"}
+
+               aKeys := []string{"z", "y", "x", "extra"}
+               a := testStringMap(mem, m, aKeys)
+               defer a.Release()
+
+               bKeys := []string{"z", "y", "x"}
+               b := testStringMap(mem, m, bKeys)
+               defer b.Release()
+
+               assert.NotEqual(t, aKeys, bKeys)
+               assert.Equal(t, a.NullN(), b.NullN())
+               assert.False(t, array.ApproxEqual(a, b))
+               assert.False(t, array.ApproxEqual(a, b, 
array.WithUnorderedMapKeys(true)))
+       })
+
+       t.Run("extra right value", func(t *testing.T) {
+               m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": 
"extra"}
+
+               aKeys := []string{"z", "y", "x"}
+               a := testStringMap(mem, m, aKeys)
+               defer a.Release()
+
+               bKeys := []string{"z", "y", "x", "extra"}
+               b := testStringMap(mem, m, bKeys)
+               defer b.Release()
+
+               assert.NotEqual(t, aKeys, bKeys)
+               assert.Equal(t, a.NullN(), b.NullN())
+               assert.False(t, array.ApproxEqual(a, b))
+               assert.False(t, array.ApproxEqual(a, b, 
array.WithUnorderedMapKeys(true)))
+       })
+
+       t.Run("unmatched value", func(t *testing.T) {
+               m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": 
"extra", "extra2": "extra"}
+
+               aKeys := []string{"z", "y", "x", "extra"}
+               a := testStringMap(mem, m, aKeys)
+               defer a.Release()
+
+               bKeys := []string{"z", "y", "x", "extra2"}
+               b := testStringMap(mem, m, bKeys)
+               defer b.Release()
+
+               assert.NotEqual(t, aKeys, bKeys)
+               assert.Equal(t, a.NullN(), b.NullN())
+               assert.False(t, array.ApproxEqual(a, b))
+               assert.False(t, array.ApproxEqual(a, b, 
array.WithUnorderedMapKeys(true)))
+       })
+
+       t.Run("different value", func(t *testing.T) {
+               m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": 
"extra"}
+
+               keys := []string{"z", "y", "x", "extra"}
+               a := testStringMap(mem, m, keys)
+               defer a.Release()
+
+               m["extra"] = "different"
+               b := testStringMap(mem, m, keys)
+               defer b.Release()
+
+               assert.Equal(t, a.NullN(), b.NullN())
+               assert.False(t, array.ApproxEqual(a, b))
+               assert.False(t, array.ApproxEqual(a, b, 
array.WithUnorderedMapKeys(true)))
+       })
+}
+
 func arrayOf(mem memory.Allocator, a interface{}, valids []bool) arrow.Array {
        if mem == nil {
                mem = memory.NewGoAllocator()

Reply via email to