pitrou commented on code in PR #41346:
URL: https://github.com/apache/arrow/pull/41346#discussion_r1603465248


##########
cpp/src/parquet/column_reader_test.cc:
##########
@@ -431,6 +431,66 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) {
                ParquetException);
 }
 
+TEST_F(TestPrimitiveReader, DefLevelNotExpected) {
+  max_def_level_ = 1;
+  max_rep_level_ = 0;
+  std::vector<bool> values(1, false);

Review Comment:
   Why only one value? You're passing `num_values = 3` below.



##########
cpp/src/parquet/column_reader_test.cc:
##########
@@ -431,6 +431,66 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) {
                ParquetException);
 }
 
+TEST_F(TestPrimitiveReader, DefLevelNotExpected) {
+  max_def_level_ = 1;
+  max_rep_level_ = 0;
+  std::vector<bool> values(1, false);
+  // Less than expected
+  {
+    std::vector<int16_t> input_def_levels(1, 1);
+    NodePtr type = schema::Boolean("a", Repetition::OPTIONAL);
+    const ColumnDescriptor descr(type, max_def_level_, max_rep_level_);
+
+    // The data page falls back to plain encoding
+    std::shared_ptr<ResizableBuffer> dummy = AllocateBuffer();
+    std::shared_ptr<DataPageV1> data_page = MakeDataPage<BooleanType>(
+        &descr, values, /*num_values=*/3, Encoding::PLAIN, /*indices=*/{},
+        /*indices_size=*/0, /*def_levels=*/input_def_levels, max_def_level_,
+        /*rep_levels=*/{},
+        /*max_rep_level=*/max_rep_level_);
+    pages_.push_back(data_page);
+    InitReader(&descr);
+    auto reader = static_cast<BoolReader*>(reader_.get());
+    ASSERT_TRUE(reader->HasNext());
+
+    constexpr int batch_size = 3;
+    std::vector<int16_t> def_levels(batch_size, 0);
+    std::vector<int16_t> rep_levels(batch_size, 0);
+    bool values_out[batch_size];
+    int64_t values_read;
+    ASSERT_THROW(reader->ReadBatch(batch_size, def_levels.data(), 
rep_levels.data(),
+                                   values_out, &values_read),
+                 ParquetException);

Review Comment:
   Can you assert the error message as well?



##########
cpp/src/parquet/column_reader_test.cc:
##########
@@ -431,6 +431,66 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) {
                ParquetException);
 }
 
+TEST_F(TestPrimitiveReader, DefLevelNotExpected) {
+  max_def_level_ = 1;
+  max_rep_level_ = 0;
+  std::vector<bool> values(1, false);
+  // Less than expected
+  {
+    std::vector<int16_t> input_def_levels(1, 1);
+    NodePtr type = schema::Boolean("a", Repetition::OPTIONAL);
+    const ColumnDescriptor descr(type, max_def_level_, max_rep_level_);
+
+    // The data page falls back to plain encoding
+    std::shared_ptr<ResizableBuffer> dummy = AllocateBuffer();
+    std::shared_ptr<DataPageV1> data_page = MakeDataPage<BooleanType>(
+        &descr, values, /*num_values=*/3, Encoding::PLAIN, /*indices=*/{},
+        /*indices_size=*/0, /*def_levels=*/input_def_levels, max_def_level_,
+        /*rep_levels=*/{},
+        /*max_rep_level=*/max_rep_level_);
+    pages_.push_back(data_page);
+    InitReader(&descr);
+    auto reader = static_cast<BoolReader*>(reader_.get());
+    ASSERT_TRUE(reader->HasNext());
+
+    constexpr int batch_size = 3;
+    std::vector<int16_t> def_levels(batch_size, 0);
+    std::vector<int16_t> rep_levels(batch_size, 0);
+    bool values_out[batch_size];
+    int64_t values_read;
+    ASSERT_THROW(reader->ReadBatch(batch_size, def_levels.data(), 
rep_levels.data(),
+                                   values_out, &values_read),
+                 ParquetException);
+  }
+  // More than expected

Review Comment:
   I don't understand: what is "more than expected" here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to