kou commented on code in PR #47204:
URL: https://github.com/apache/arrow/pull/47204#discussion_r2311347633
##########
cpp/src/arrow/record_batch.h:
##########
@@ -118,22 +118,32 @@ class ARROW_EXPORT RecordBatch {
static Result<std::shared_ptr<RecordBatch>> FromStructArray(
const std::shared_ptr<Array>& array, MemoryPool* pool =
default_memory_pool());
- /// \brief Determine if two record batches are exactly equal
+ /// \brief Determine if two record batches are equal.
Review Comment:
```suggestion
/// \brief Determine if two record batches are equal
```
##########
cpp/src/arrow/compare.h:
##########
@@ -83,6 +83,37 @@ class EqualOptions {
return res;
}
+ /// Whether the \ref arrow::Schema property is used in the comparison.
+ ///
+ /// This option only affects the Equals methods
+ /// and has no effect on ApproxEquals methods.
+ bool use_schema() const { return use_schema_; }
+
+ /// Return a new EqualOptions object with the "use_schema_" property changed.
+ /// setting this option is false making the value of \ref
EqualOptions::use_metadata_
Review Comment:
```suggestion
///
/// Setting this option is false making the value of \ref
EqualOptions::use_metadata_
```
##########
cpp/src/arrow/record_batch_test.cc:
##########
@@ -62,7 +63,49 @@ using util::Float16;
class TestRecordBatch : public ::testing::Test {};
TEST_F(TestRecordBatch, Equals) {
- const int length = 10;
+ int length = 10;
+
+ auto f0 = field("f0", int32());
+ auto f1 = field("f1", uint8());
+ auto f2 = field("f2", int16());
+
+ auto schema0 = schema({f0, f1, f2});
+ auto schema1 = schema({f0, f1, f2});
+ auto schema2 = schema({f0, f1});
+
+ random::RandomArrayGenerator gen(42);
+
+ auto a0 = gen.ArrayOf(int32(), length);
+ auto a1 = gen.ArrayOf(uint8(), length);
+ auto a2 = gen.ArrayOf(int16(), length);
+ auto a3 = a0->Slice(0, length / 2);
+ auto a4 = a1->Slice(0, length / 2);
+ auto a5 = gen.ArrayOf(int32(), length);
+ auto a6 = gen.ArrayOf(uint8(), length);
+
+ auto b0 = RecordBatch::Make(schema0, length, {a0, a1, a2});
+ auto b1 = RecordBatch::Make(schema1, length, {a0, a1, a2});
+ auto b2 = RecordBatch::Make(schema2, length, {a0, a1});
+ auto b3 = RecordBatch::Make(schema2, length, {a3, a4});
+ auto b4 = RecordBatch::Make(schema2, length, {a5, a6});
Review Comment:
Can we use more meaningful variable names?
The current naming is difficult to understand differences...
##########
cpp/src/arrow/record_batch.h:
##########
@@ -118,22 +118,32 @@ class ARROW_EXPORT RecordBatch {
static Result<std::shared_ptr<RecordBatch>> FromStructArray(
const std::shared_ptr<Array>& array, MemoryPool* pool =
default_memory_pool());
- /// \brief Determine if two record batches are exactly equal
+ /// \brief Determine if two record batches are equal.
///
/// \param[in] other the RecordBatch to compare with
- /// \param[in] check_metadata if true, check that Schema metadata is the same
+ /// \param[in] check_metadata If true, the schema metadata will be compared,
+ /// regardless of the value set in \ref
EqualOptions::use_metadata_.
/// \param[in] opts the options for equality comparisons
/// \return true if batches are equal
bool Equals(const RecordBatch& other, bool check_metadata = false,
const EqualOptions& opts = EqualOptions::Defaults()) const;
+ /// \brief Determine if two record batches are equal.
Review Comment:
```suggestion
/// \brief Determine if two record batches are equal
```
##########
cpp/src/arrow/record_batch_test.cc:
##########
@@ -62,7 +63,49 @@ using util::Float16;
class TestRecordBatch : public ::testing::Test {};
TEST_F(TestRecordBatch, Equals) {
- const int length = 10;
+ int length = 10;
Review Comment:
Why did you remove this `const`?
##########
cpp/src/arrow/record_batch.h:
##########
@@ -118,22 +118,32 @@ class ARROW_EXPORT RecordBatch {
static Result<std::shared_ptr<RecordBatch>> FromStructArray(
const std::shared_ptr<Array>& array, MemoryPool* pool =
default_memory_pool());
- /// \brief Determine if two record batches are exactly equal
+ /// \brief Determine if two record batches are equal.
///
/// \param[in] other the RecordBatch to compare with
- /// \param[in] check_metadata if true, check that Schema metadata is the same
+ /// \param[in] check_metadata If true, the schema metadata will be compared,
Review Comment:
```suggestion
/// \param[in] check_metadata if true, the schema metadata will be
compared,
```
##########
cpp/src/arrow/record_batch_test.cc:
##########
@@ -62,7 +63,49 @@ using util::Float16;
class TestRecordBatch : public ::testing::Test {};
TEST_F(TestRecordBatch, Equals) {
- const int length = 10;
+ int length = 10;
+
+ auto f0 = field("f0", int32());
+ auto f1 = field("f1", uint8());
+ auto f2 = field("f2", int16());
+
+ auto schema0 = schema({f0, f1, f2});
+ auto schema1 = schema({f0, f1, f2});
+ auto schema2 = schema({f0, f1});
+
+ random::RandomArrayGenerator gen(42);
+
+ auto a0 = gen.ArrayOf(int32(), length);
+ auto a1 = gen.ArrayOf(uint8(), length);
+ auto a2 = gen.ArrayOf(int16(), length);
+ auto a3 = a0->Slice(0, length / 2);
+ auto a4 = a1->Slice(0, length / 2);
+ auto a5 = gen.ArrayOf(int32(), length);
+ auto a6 = gen.ArrayOf(uint8(), length);
+
+ auto b0 = RecordBatch::Make(schema0, length, {a0, a1, a2});
+ auto b1 = RecordBatch::Make(schema1, length, {a0, a1, a2});
+ auto b2 = RecordBatch::Make(schema2, length, {a0, a1});
+ auto b3 = RecordBatch::Make(schema2, length, {a3, a4});
+ auto b4 = RecordBatch::Make(schema2, length, {a5, a6});
+
+ // Same Values
+ ASSERT_TRUE(b0->Equals(*b1));
+
+ // Different number of columns
+ ASSERT_FALSE(b0->Equals(*b2));
+
+ // Different number of rows
+ ASSERT_FALSE(b2->Equals(*b3));
+
+ // Different values
+ ASSERT_FALSE(b0->Equals(*b4));
Review Comment:
`b0` -> `b2`? (But what is the difference of `b2->Equals(*b3)` and
`b2->Equals(*b4)`?)
##########
cpp/src/arrow/record_batch_test.cc:
##########
@@ -62,7 +63,49 @@ using util::Float16;
class TestRecordBatch : public ::testing::Test {};
TEST_F(TestRecordBatch, Equals) {
- const int length = 10;
+ int length = 10;
+
+ auto f0 = field("f0", int32());
+ auto f1 = field("f1", uint8());
+ auto f2 = field("f2", int16());
+
+ auto schema0 = schema({f0, f1, f2});
+ auto schema1 = schema({f0, f1, f2});
+ auto schema2 = schema({f0, f1});
+
+ random::RandomArrayGenerator gen(42);
+
+ auto a0 = gen.ArrayOf(int32(), length);
+ auto a1 = gen.ArrayOf(uint8(), length);
+ auto a2 = gen.ArrayOf(int16(), length);
+ auto a3 = a0->Slice(0, length / 2);
+ auto a4 = a1->Slice(0, length / 2);
+ auto a5 = gen.ArrayOf(int32(), length);
+ auto a6 = gen.ArrayOf(uint8(), length);
+
+ auto b0 = RecordBatch::Make(schema0, length, {a0, a1, a2});
+ auto b1 = RecordBatch::Make(schema1, length, {a0, a1, a2});
+ auto b2 = RecordBatch::Make(schema2, length, {a0, a1});
+ auto b3 = RecordBatch::Make(schema2, length, {a3, a4});
+ auto b4 = RecordBatch::Make(schema2, length, {a5, a6});
+
+ // Same Values
+ ASSERT_TRUE(b0->Equals(*b1));
+
+ // Different number of columns
+ ASSERT_FALSE(b0->Equals(*b2));
+
+ // Different number of rows
+ ASSERT_FALSE(b2->Equals(*b3));
Review Comment:
It seems that `b2` and `b3` have the same number of rows. (It seems that
both of them use `length`.)
##########
cpp/src/arrow/compare.h:
##########
@@ -83,6 +83,37 @@ class EqualOptions {
return res;
}
+ /// Whether the \ref arrow::Schema property is used in the comparison.
+ ///
+ /// This option only affects the Equals methods
+ /// and has no effect on ApproxEquals methods.
+ bool use_schema() const { return use_schema_; }
+
+ /// Return a new EqualOptions object with the "use_schema_" property changed.
+ /// setting this option is false making the value of \ref
EqualOptions::use_metadata_
+ /// is useless.
Review Comment:
"ignored" may be better than "useless".
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]