kou commented on code in PR #37579:
URL: https://github.com/apache/arrow/pull/37579#discussion_r1316525789


##########
cpp/src/arrow/chunked_array.cc:
##########
@@ -111,13 +112,33 @@ bool ChunkedArray::Equals(const ChunkedArray& other) 
const {
       .ok();
 }
 
-bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
-  if (this == other.get()) {
-    return true;
+namespace {
+
+bool supportsNaN(const arrow::DataType& type) {
+  bool supports_nan = false;
+  if (type.num_fields() == 0) {
+    // Only floating types support NaN
+    supports_nan |= is_floating(type.id());

Review Comment:
   How about using early return instead of `|=`?
   
   ```suggestion
       if (is_floating(type.id())) {
         return true;
       }
   ```



##########
cpp/src/arrow/chunked_array.cc:
##########
@@ -111,13 +112,33 @@ bool ChunkedArray::Equals(const ChunkedArray& other) 
const {
       .ok();
 }
 
-bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
-  if (this == other.get()) {
-    return true;
+namespace {
+
+bool supportsNaN(const arrow::DataType& type) {
+  bool supports_nan = false;
+  if (type.num_fields() == 0) {
+    // Only floating types support NaN
+    supports_nan |= is_floating(type.id());
+  } else {
+    for (const auto& field : type.fields()) {
+      supports_nan |= supportsNaN(*field->type());
+      if (supports_nan) {
+        break;
+      }

Review Comment:
   ```suggestion
         if (supportsNaN(*field->type())) {
           return true;
         }
   ```



##########
cpp/src/arrow/chunked_array.cc:
##########
@@ -111,13 +112,33 @@ bool ChunkedArray::Equals(const ChunkedArray& other) 
const {
       .ok();
 }
 
-bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
-  if (this == other.get()) {
-    return true;
+namespace {
+
+bool supportsNaN(const arrow::DataType& type) {

Review Comment:
   How about renaming this to `mayHaveNaN()` because we want to know whether 
the target chunked array has NaN or not? 



##########
cpp/src/arrow/chunked_array.cc:
##########
@@ -111,13 +112,33 @@ bool ChunkedArray::Equals(const ChunkedArray& other) 
const {
       .ok();
 }
 
-bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
-  if (this == other.get()) {
-    return true;
+namespace {
+
+bool supportsNaN(const arrow::DataType& type) {
+  bool supports_nan = false;
+  if (type.num_fields() == 0) {
+    // Only floating types support NaN
+    supports_nan |= is_floating(type.id());
+  } else {
+    for (const auto& field : type.fields()) {
+      supports_nan |= supportsNaN(*field->type());
+      if (supports_nan) {
+        break;
+      }
+    }
   }
+  return supports_nan;
+}
+
+} //  namespace
+
+bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
   if (!other) {
     return false;
   }
+  if (this == other.get() && !supportsNaN(*other->type())) {

Review Comment:
   ```suggestion
     if (this == other.get() && !supportsNaN(*type_)) {
   ```



##########
cpp/src/arrow/chunked_array_test.cc:
##########
@@ -146,6 +146,33 @@ TEST_F(TestChunkedArray, EqualsDifferingMetadata) {
   ASSERT_TRUE(left.Equals(right));
 }
 
+TEST_F(TestChunkedArray, EqualsSameAddressWithNaNs) {
+  auto chunk0 = ArrayFromJSON(float64(), "[0, 1, 2, NaN]");
+  auto chunk1 = ArrayFromJSON(float64(), "[3, 4, 5]");
+  ASSERT_OK_AND_ASSIGN(auto result0, ChunkedArray::Make({chunk0, chunk1}, 
float64()));
+  // result0 has NaN values
+  ASSERT_FALSE(result0->Equals(result0));

Review Comment:
   Can we use more meaningful names? For example:
   
   ```suggestion
     auto chunk_have_nan = ArrayFromJSON(float64(), "[0, 1, 2, NaN]");
     auto chunk_no_nan = ArrayFromJSON(float64(), "[3, 4, 5]");
     ASSERT_OK_AND_ASSIGN(auto chunked_array_have_nan, 
ChunkedArray::Make({chunk_have_nan, chunk_no_nan}, float64()));
     ASSERT_FALSE(chunked_array_have_nan->Equals(chunked_array_have_nan));
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to