Copilot commented on code in PR #47204:
URL: https://github.com/apache/arrow/pull/47204#discussion_r2311340426


##########
cpp/src/arrow/compare.h:
##########
@@ -83,6 +83,37 @@ class EqualOptions {
     return res;
   }
 
+  /// Whether the \ref arrow::Schema property is used in the comparison.
+  ///
+  /// This option only affects the Equals methods
+  /// and has no effect on ApproxEquals methods.
+  bool use_schema() const { return use_schema_; }
+
+  /// Return a new EqualOptions object with the "use_schema_" property changed.
+  /// setting this option is false making the value of \ref 
EqualOptions::use_metadata_
+  /// is useless.

Review Comment:
   The documentation comment has grammatical errors. It should read: 'Return a 
new EqualOptions object with the \"use_schema_\" property changed. Setting this 
option to false makes the value of \\ref EqualOptions::use_metadata_ useless.'
   ```suggestion
     /// Setting this option to false makes the value of \ref 
EqualOptions::use_metadata_ useless.
   ```



##########
cpp/src/arrow/record_batch.cc:
##########
@@ -349,44 +349,27 @@ bool CanIgnoreNaNInEquality(const RecordBatch& batch, 
const EqualOptions& opts)
 
 bool RecordBatch::Equals(const RecordBatch& other, bool check_metadata,
                          const EqualOptions& opts) const {
-  if (this == &other) {
-    if (CanIgnoreNaNInEquality(*this, opts)) {
-      return true;
-    }
-  } else {
-    if (num_columns() != other.num_columns() || num_rows_ != other.num_rows()) 
{
-      return false;
-    } else if (!schema_->Equals(*other.schema(), check_metadata)) {
-      return false;
-    } else if (device_type() != other.device_type()) {
-      return false;
-    }
-  }
-
-  for (int i = 0; i < num_columns(); ++i) {
-    if (!column(i)->Equals(other.column(i), opts)) {
-      return false;
-    }
-  }
-
-  return true;
+  return Equals(other, opts.use_metadata(check_metadata));
 }
 
-bool RecordBatch::ApproxEquals(const RecordBatch& other, const EqualOptions& 
opts) const {
+bool RecordBatch::Equals(const RecordBatch& other, const EqualOptions& opts) 
const {
   if (this == &other) {
     if (CanIgnoreNaNInEquality(*this, opts)) {
       return true;
     }
   } else {
     if (num_columns() != other.num_columns() || num_rows_ != other.num_rows()) 
{
       return false;
+    } else if (opts.use_schema() &&
+               !schema_->Equals(*other.schema(), opts.use_metadata())) {
+      return false;
     } else if (device_type() != other.device_type()) {
       return false;
     }
   }
 
   for (int i = 0; i < num_columns(); ++i) {
-    if (!column(i)->ApproxEquals(other.column(i), opts)) {
+    if (!column(i)->Equals(other.column(i), opts)) {
       return false;

Review Comment:
   This line calls `Equals` on array columns but should call `ApproxEquals` 
when the `use_atol()` option is enabled for approximate comparisons. The 
current implementation breaks the distinction between exact and approximate 
equality.
   ```suggestion
       if (opts.use_atol()) {
         if (!column(i)->ApproxEquals(other.column(i), opts)) {
           return false;
         }
       } else {
         if (!column(i)->Equals(other.column(i), opts)) {
           return false;
         }
   ```



##########
cpp/src/arrow/record_batch.h:
##########
@@ -118,22 +118,32 @@ class ARROW_EXPORT RecordBatch {
   static Result<std::shared_ptr<RecordBatch>> FromStructArray(
       const std::shared_ptr<Array>& array, MemoryPool* pool = 
default_memory_pool());
 
-  /// \brief Determine if two record batches are exactly equal
+  /// \brief Determine if two record batches are equal.
   ///
   /// \param[in] other the RecordBatch to compare with
-  /// \param[in] check_metadata if true, check that Schema metadata is the same
+  /// \param[in] check_metadata If true, the schema metadata will be compared,
+  ///            regardless of the value set in \ref 
EqualOptions::use_metadata_.
   /// \param[in] opts the options for equality comparisons
   /// \return true if batches are equal
   bool Equals(const RecordBatch& other, bool check_metadata = false,
               const EqualOptions& opts = EqualOptions::Defaults()) const;
 
+  /// \brief Determine if two record batches are equal.
+  ///
+  /// \param[in] other the RecordBatch to compare with
+  /// \param[in] opts the options for equality comparisons
+  /// \return true if batches are equal
+  bool Equals(const RecordBatch& other, const EqualOptions& opts) const;
+
   /// \brief Determine if two record batches are approximately equal
   ///
   /// \param[in] other the RecordBatch to compare with
   /// \param[in] opts the options for equality comparisons
   /// \return true if batches are approximately equal
   bool ApproxEquals(const RecordBatch& other,
-                    const EqualOptions& opts = EqualOptions::Defaults()) const;
+                    const EqualOptions& opts = EqualOptions::Defaults()) const 
{
+    return Equals(other, opts.use_schema(false).use_atol(true));
+  }

Review Comment:
   Moving `ApproxEquals` to be inline changes the API structure and could 
affect binary compatibility. The inline implementation also hardcodes 
`use_schema(false)` which may not align with user expectations - users might 
want to compare schemas in approximate equality checks.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to