kou commented on code in PR #13386:
URL: https://github.com/apache/arrow/pull/13386#discussion_r906642739


##########
cpp/src/parquet/reader_test.cc:
##########
@@ -127,6 +127,90 @@ void CheckRowGroupMetadata(const RowGroupMetaData* 
rg_metadata,
   }
 }
 
+class TestTextDeltaLengthByteArray : public ::testing::Test {
+ public:
+  void SetUp() {
+    reader_ = 
ParquetFileReader::OpenFile(data_file("delta_length_byte_array.parquet"));
+  }
+
+  void TearDown() {}
+
+ protected:
+  std::unique_ptr<ParquetFileReader> reader_;
+};
+
+TEST_F(TestTextDeltaLengthByteArray, TestTextScanner) {
+  auto group = reader_->RowGroup(0);
+
+  // column 0, id
+  auto scanner = std::make_shared<ByteArrayScanner>(group->Column(0));
+  ByteArray val;
+  bool is_null;
+  std::string expected_prefix("apple_banana_mango");
+  for (int i = 0; i < 1000; ++i) {
+    ASSERT_TRUE(scanner->HasNext());
+    ASSERT_TRUE(scanner->NextValue(&val, &is_null));
+    ASSERT_FALSE(is_null);
+    std::string expected = expected_prefix + std::to_string(i * i);
+    ASSERT_TRUE(val.len == expected.length());
+    ASSERT_EQ(::arrow::util::string_view(reinterpret_cast<const 
char*>(val.ptr), val.len),
+              expected);
+  }
+  ASSERT_FALSE(scanner->HasNext());
+  ASSERT_FALSE(scanner->NextValue(&val, &is_null));
+}
+
+TEST_F(TestTextDeltaLengthByteArray, TestBatchRead) {
+  auto group = reader_->RowGroup(0);
+
+  // column 0, id
+  auto col = std::dynamic_pointer_cast<ByteArrayReader>(group->Column(0));
+
+  const int16_t batch_size = 25;
+  int16_t def_levels[batch_size];
+  int16_t rep_levels[batch_size];
+  ByteArray values[batch_size];
+
+  // This file only has 1000 rows
+  ASSERT_EQ(1000, reader_->metadata()->num_rows());
+  // This file only has 1 row group
+  ASSERT_EQ(1, reader_->metadata()->num_row_groups());
+  // Size of the metadata is 105 bytes
+  ASSERT_EQ(105, reader_->metadata()->size());
+  // This row group must have 1000 rows
+  ASSERT_EQ(1000, group->metadata()->num_rows());
+
+  // Check if the column is encoded with DELTA_LENGTH_BYTE_ARRAY
+  auto col_chunk = group->metadata()->ColumnChunk(0);
+
+  ASSERT_TRUE(std::find(col_chunk->encodings().begin(), 
col_chunk->encodings().end(),
+                        Encoding::DELTA_LENGTH_BYTE_ARRAY) !=
+              col_chunk->encodings().end());
+
+  ASSERT_TRUE(col->HasNext());
+  int64_t values_read = 0;
+  int64_t curr_batch_read;
+  std::string expected_prefix("apple_banana_mango");
+  while (values_read < 1000) {
+    auto levels_read =
+        col->ReadBatch(batch_size, def_levels, rep_levels, values, 
&curr_batch_read);
+    ASSERT_EQ(batch_size, levels_read);
+    ASSERT_EQ(batch_size, curr_batch_read);
+    for (int i = 0; i < batch_size; i++) {

Review Comment:
   Could you use the same type for `i` and `batch_size`?



##########
cpp/src/parquet/reader_test.cc:
##########
@@ -127,6 +127,90 @@ void CheckRowGroupMetadata(const RowGroupMetaData* 
rg_metadata,
   }
 }
 
+class TestTextDeltaLengthByteArray : public ::testing::Test {
+ public:
+  void SetUp() {
+    reader_ = 
ParquetFileReader::OpenFile(data_file("delta_length_byte_array.parquet"));
+  }
+
+  void TearDown() {}
+
+ protected:
+  std::unique_ptr<ParquetFileReader> reader_;
+};
+
+TEST_F(TestTextDeltaLengthByteArray, TestTextScanner) {
+  auto group = reader_->RowGroup(0);
+
+  // column 0, id
+  auto scanner = std::make_shared<ByteArrayScanner>(group->Column(0));
+  ByteArray val;
+  bool is_null;
+  std::string expected_prefix("apple_banana_mango");
+  for (int i = 0; i < 1000; ++i) {
+    ASSERT_TRUE(scanner->HasNext());
+    ASSERT_TRUE(scanner->NextValue(&val, &is_null));
+    ASSERT_FALSE(is_null);
+    std::string expected = expected_prefix + std::to_string(i * i);
+    ASSERT_TRUE(val.len == expected.length());
+    ASSERT_EQ(::arrow::util::string_view(reinterpret_cast<const 
char*>(val.ptr), val.len),
+              expected);
+  }
+  ASSERT_FALSE(scanner->HasNext());
+  ASSERT_FALSE(scanner->NextValue(&val, &is_null));
+}
+
+TEST_F(TestTextDeltaLengthByteArray, TestBatchRead) {
+  auto group = reader_->RowGroup(0);
+
+  // column 0, id
+  auto col = std::dynamic_pointer_cast<ByteArrayReader>(group->Column(0));
+
+  const int16_t batch_size = 25;
+  int16_t def_levels[batch_size];
+  int16_t rep_levels[batch_size];
+  ByteArray values[batch_size];

Review Comment:
   Could you move this to the below `while (values_read < 1000) {` because they 
are used only in the `while` block?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to