[GitHub] [arrow] pitrou commented on a change in pull request #8703: ARROW-10143: [C++] Rewrite Array(Range)Equals

GitBox Sat, 21 Nov 2020 07:57:35 -0800


pitrou commented on a change in pull request #8703:
URL: https://github.com/apache/arrow/pull/8703#discussion_r528210778




##########
File path: cpp/src/arrow/compare.cc
##########
@@ -49,700 +51,441 @@
 namespace arrow {
 
 using internal::BitmapEquals;
+using internal::BitmapReader;
+using internal::BitmapUInt64Reader;
 using internal::checked_cast;
+using internal::OptionalBitBlockCounter;
+using internal::OptionalBitmapEquals;
 
 // ----------------------------------------------------------------------
 // Public method implementations
 
 namespace {
 
-// These helper functions assume we already checked the arrays have equal
-// sizes and null bitmaps.
+bool CompareArrayRanges(const ArrayData& left, const ArrayData& right,
+                        int64_t left_start_idx, int64_t left_end_idx,
+                        int64_t right_start_idx, const EqualOptions& options,
+                        bool floating_approximate);
 
-template <typename ArrowType, typename EqualityFunc>
-inline bool BaseFloatingEquals(const NumericArray<ArrowType>& left,
-                               const NumericArray<ArrowType>& right,
-                               EqualityFunc&& equals) {
-  using T = typename ArrowType::c_type;
-
-  const T* left_data = left.raw_values();
-  const T* right_data = right.raw_values();
-
-  if (left.null_count() > 0) {
-    for (int64_t i = 0; i < left.length(); ++i) {
-      if (left.IsNull(i)) continue;
-      if (!equals(left_data[i], right_data[i])) {
-        return false;
-      }
-    }
-  } else {
-    for (int64_t i = 0; i < left.length(); ++i) {
-      if (!equals(left_data[i], right_data[i])) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-template <typename ArrowType>
-inline bool FloatingEquals(const NumericArray<ArrowType>& left,
-                           const NumericArray<ArrowType>& right,
-                           const EqualOptions& opts) {
-  using T = typename ArrowType::c_type;
-
-  if (opts.nans_equal()) {
-    return BaseFloatingEquals<ArrowType>(left, right, [](T x, T y) -> bool {
-      return (x == y) || (std::isnan(x) && std::isnan(y));
-    });
-  } else {
-    return BaseFloatingEquals<ArrowType>(left, right,
-                                         [](T x, T y) -> bool { return x == y; 
});
-  }
-}
-
-template <typename ArrowType>
-inline bool FloatingApproxEquals(const NumericArray<ArrowType>& left,
-                                 const NumericArray<ArrowType>& right,
-                                 const EqualOptions& opts) {
-  using T = typename ArrowType::c_type;
-  const T epsilon = static_cast<T>(opts.atol());
-
-  if (opts.nans_equal()) {
-    return BaseFloatingEquals<ArrowType>(left, right, [epsilon](T x, T y) -> 
bool {
-      return (fabs(x - y) <= epsilon) || (x == y) || (std::isnan(x) && 
std::isnan(y));
-    });
-  } else {
-    return BaseFloatingEquals<ArrowType>(left, right, [epsilon](T x, T y) -> 
bool {
-      return (fabs(x - y) <= epsilon) || (x == y);
-    });
-  }
-}
-
-// RangeEqualsVisitor assumes the range sizes are equal
-
-class RangeEqualsVisitor {
+class RangeDataEqualsImpl {
  public:
-  RangeEqualsVisitor(const Array& right, int64_t left_start_idx, int64_t 
left_end_idx,
-                     int64_t right_start_idx)
-      : right_(right),
+  // PRE-CONDITIONS:
+  // - the types are equal
+  // - the ranges are in bounds
+  RangeDataEqualsImpl(const EqualOptions& options, bool floating_approximate,
+                      const ArrayData& left, const ArrayData& right,
+                      int64_t left_start_idx, int64_t right_start_idx,
+                      int64_t range_length)
+      : options_(options),
+        floating_approximate_(floating_approximate),
+        left_(left),
+        right_(right),
         left_start_idx_(left_start_idx),
-        left_end_idx_(left_end_idx),
         right_start_idx_(right_start_idx),
+        range_length_(range_length),
         result_(false) {}
 
-  template <typename ArrayType>
-  inline Status CompareValues(const ArrayType& left) {
-    const auto& right = checked_cast<const ArrayType&>(right_);
-
-    for (int64_t i = left_start_idx_, o_i = right_start_idx_; i < 
left_end_idx_;
-         ++i, ++o_i) {
-      const bool is_null = left.IsNull(i);
-      if (is_null != right.IsNull(o_i) ||
-          (!is_null && left.Value(i) != right.Value(o_i))) {
-        result_ = false;
-        return Status::OK();
+  bool Compare() {
+    // Compare null bitmaps
+    if (left_start_idx_ == 0 && right_start_idx_ == 0 && range_length_ == 
left_.length &&
+        range_length_ == right_.length) {
+      // If we're comparing entire arrays, we can first compare the cached 
null counts
+      if (left_.GetNullCount() != right_.GetNullCount()) {
+        return false;
       }
     }

Review comment:
       It's ok to compute the null count, IMHO, the computed value is often 
re-used for other tasks.
   The reason why this heuristic only works for non-ranged arrays is that you 
could have two whole arrays with different null counts, but the compared ranges 
would still be equal.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] pitrou commented on a change in pull request #8703: ARROW-10143: [C++] Rewrite Array(Range)Equals

Reply via email to