This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 0f0c0a266b65b7f27ef376e545ff0a5ea6e7d592
Author: 苏小刚 <[email protected]>
AuthorDate: Fri Apr 26 13:51:30 2024 +0800

    [opt](parquet)Skip page with offset index (#33082)
    
    Make skip_page() in ColumnChunkReader more efficient. No more reading page 
headers if there are pagelocations in chunk.
---
 .../parquet/vparquet_column_chunk_reader.cpp       |  82 +++---
 .../format/parquet/vparquet_column_chunk_reader.h  |  11 +-
 .../exec/format/parquet/vparquet_column_reader.cpp |   7 +-
 .../exec/format/parquet/vparquet_column_reader.h   |  22 +-
 .../exec/format/parquet/vparquet_group_reader.cpp  |  12 +-
 .../exec/format/parquet/vparquet_group_reader.h    |   1 -
 .../exec/format/parquet/vparquet_page_reader.cpp   |  15 +-
 .../vec/exec/format/parquet/vparquet_page_reader.h | 123 ++++++++-
 be/src/vec/exec/format/parquet/vparquet_reader.cpp |  11 +-
 be/src/vec/exec/format/parquet/vparquet_reader.h   |   3 +-
 be/test/vec/exec/parquet/parquet_thrift_test.cpp   |   3 +-
 .../hive/test_hive_parquet_skip_page.out           | 289 +++++++++++++++++++++
 .../hive/test_hive_parquet_skip_page.groovy        | 131 ++++++++++
 13 files changed, 641 insertions(+), 69 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
index 6feb9bc1025..af30e63d1e3 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
@@ -47,12 +47,14 @@ namespace doris::vectorized {
 
 ColumnChunkReader::ColumnChunkReader(io::BufferedStreamReader* reader,
                                      tparquet::ColumnChunk* column_chunk, 
FieldSchema* field_schema,
+                                     const tparquet::OffsetIndex* offset_index,
                                      cctz::time_zone* ctz, io::IOContext* 
io_ctx)
         : _field_schema(field_schema),
           _max_rep_level(field_schema->repetition_level),
           _max_def_level(field_schema->definition_level),
           _stream_reader(reader),
           _metadata(column_chunk->meta_data),
+          _offset_index(offset_index),
           //          _ctz(ctz),
           _io_ctx(io_ctx) {}
 
@@ -61,7 +63,9 @@ Status ColumnChunkReader::init() {
                                   ? _metadata.dictionary_page_offset
                                   : _metadata.data_page_offset;
     size_t chunk_size = _metadata.total_compressed_size;
-    _page_reader = std::make_unique<PageReader>(_stream_reader, _io_ctx, 
start_offset, chunk_size);
+    // create page reader
+    _page_reader = create_page_reader(_stream_reader, _io_ctx, start_offset, 
chunk_size,
+                                      _metadata.num_values, _offset_index);
     // get the block compression codec
     RETURN_IF_ERROR(get_block_compression_codec(_metadata.codec, 
&_block_compress_codec));
     if (_metadata.__isset.dictionary_page_offset) {
@@ -88,24 +92,27 @@ Status ColumnChunkReader::next_page() {
     if (UNLIKELY(_remaining_num_values != 0)) {
         return Status::Corruption("Should skip current page");
     }
+
     RETURN_IF_ERROR(_page_reader->next_page_header());
-    if (_page_reader->get_page_header()->type == 
tparquet::PageType::DICTIONARY_PAGE) {
-        // the first page maybe directory page even if 
_metadata.__isset.dictionary_page_offset == false,
-        // so we should parse the directory page in next_page()
-        RETURN_IF_ERROR(_decode_dict_page());
-        // parse the real first data page
-        return next_page();
-    } else if (_page_reader->get_page_header()->type == 
tparquet::PageType::DATA_PAGE_V2) {
-        _remaining_num_values = 
_page_reader->get_page_header()->data_page_header_v2.num_values;
-        _chunk_parsed_values += _remaining_num_values;
-        _state = HEADER_PARSED;
-        return Status::OK();
-    } else {
-        _remaining_num_values = 
_page_reader->get_page_header()->data_page_header.num_values;
-        _chunk_parsed_values += _remaining_num_values;
-        _state = HEADER_PARSED;
-        return Status::OK();
+
+    if (!_dict_checked) {
+        _dict_checked = true;
+        const tparquet::PageHeader* header;
+        RETURN_IF_ERROR(_page_reader->get_page_header(header));
+        if (header->type == tparquet::PageType::DICTIONARY_PAGE) {
+            // the first page maybe directory page even if 
_metadata.__isset.dictionary_page_offset == false,
+            // so we should parse the directory page in next_page()
+            RETURN_IF_ERROR(_decode_dict_page());
+            // parse the real first data page
+            return next_page();
+        }
     }
+
+    RETURN_IF_ERROR(_page_reader->get_num_values(_remaining_num_values));
+    _chunk_parsed_values += _remaining_num_values;
+    _state = HEADER_PARSED;
+
+    return Status::OK();
 }
 
 void ColumnChunkReader::_get_uncompressed_levels(const 
tparquet::DataPageHeaderV2& page_v2,
@@ -119,17 +126,19 @@ void ColumnChunkReader::_get_uncompressed_levels(const 
tparquet::DataPageHeaderV
 }
 
 Status ColumnChunkReader::load_page_data() {
+    // TODO: remove checking HEADER_PARSED or change name
     if (UNLIKELY(_state != HEADER_PARSED)) {
         return Status::Corruption("Should parse page header");
     }
-    const auto& header = *_page_reader->get_page_header();
-    int32_t uncompressed_size = header.uncompressed_page_size;
+    const tparquet::PageHeader* header;
+    RETURN_IF_ERROR(_page_reader->get_page_header(header));
+    int32_t uncompressed_size = header->uncompressed_page_size;
 
     if (_block_compress_codec != nullptr) {
         Slice compressed_data;
         RETURN_IF_ERROR(_page_reader->get_page_data(compressed_data));
-        if (header.__isset.data_page_header_v2) {
-            const tparquet::DataPageHeaderV2& header_v2 = 
header.data_page_header_v2;
+        if (header->__isset.data_page_header_v2) {
+            const tparquet::DataPageHeaderV2& header_v2 = 
header->data_page_header_v2;
             // uncompressed_size = rl + dl + uncompressed_data_size
             // compressed_size = rl + dl + compressed_data_size
             uncompressed_size -= header_v2.repetition_levels_byte_length +
@@ -137,8 +146,8 @@ Status ColumnChunkReader::load_page_data() {
             _get_uncompressed_levels(header_v2, compressed_data);
         }
         bool is_v2_compressed =
-                header.__isset.data_page_header_v2 && 
header.data_page_header_v2.is_compressed;
-        if (header.__isset.data_page_header || is_v2_compressed) {
+                header->__isset.data_page_header_v2 && 
header->data_page_header_v2.is_compressed;
+        if (header->__isset.data_page_header || is_v2_compressed) {
             // check decompressed buffer size
             _reserve_decompress_buf(uncompressed_size);
             _page_data = Slice(_decompress_buf.get(), uncompressed_size);
@@ -151,36 +160,36 @@ Status ColumnChunkReader::load_page_data() {
         }
     } else {
         RETURN_IF_ERROR(_page_reader->get_page_data(_page_data));
-        if (header.__isset.data_page_header_v2) {
-            _get_uncompressed_levels(header.data_page_header_v2, _page_data);
+        if (header->__isset.data_page_header_v2) {
+            _get_uncompressed_levels(header->data_page_header_v2, _page_data);
         }
     }
 
     // Initialize repetition level and definition level. Skip when level = 0, 
which means required field.
     if (_max_rep_level > 0) {
         SCOPED_RAW_TIMER(&_statistics.decode_level_time);
-        if (header.__isset.data_page_header_v2) {
+        if (header->__isset.data_page_header_v2) {
             RETURN_IF_ERROR(_rep_level_decoder.init_v2(_v2_rep_levels, 
_max_rep_level,
                                                        _remaining_num_values));
         } else {
             RETURN_IF_ERROR(_rep_level_decoder.init(
-                    &_page_data, 
header.data_page_header.repetition_level_encoding, _max_rep_level,
+                    &_page_data, 
header->data_page_header.repetition_level_encoding, _max_rep_level,
                     _remaining_num_values));
         }
     }
     if (_max_def_level > 0) {
         SCOPED_RAW_TIMER(&_statistics.decode_level_time);
-        if (header.__isset.data_page_header_v2) {
+        if (header->__isset.data_page_header_v2) {
             RETURN_IF_ERROR(_def_level_decoder.init_v2(_v2_def_levels, 
_max_def_level,
                                                        _remaining_num_values));
         } else {
             RETURN_IF_ERROR(_def_level_decoder.init(
-                    &_page_data, 
header.data_page_header.definition_level_encoding, _max_def_level,
+                    &_page_data, 
header->data_page_header.definition_level_encoding, _max_def_level,
                     _remaining_num_values));
         }
     }
-    auto encoding = header.__isset.data_page_header_v2 ? 
header.data_page_header_v2.encoding
-                                                       : 
header.data_page_header.encoding;
+    auto encoding = header->__isset.data_page_header_v2 ? 
header->data_page_header_v2.encoding
+                                                        : 
header->data_page_header.encoding;
     // change the deprecated encoding to RLE_DICTIONARY
     if (encoding == tparquet::Encoding::PLAIN_DICTIONARY) {
         encoding = tparquet::Encoding::RLE_DICTIONARY;
@@ -207,14 +216,15 @@ Status ColumnChunkReader::load_page_data() {
 }
 
 Status ColumnChunkReader::_decode_dict_page() {
-    const tparquet::PageHeader& header = *_page_reader->get_page_header();
-    DCHECK_EQ(tparquet::PageType::DICTIONARY_PAGE, header.type);
+    const tparquet::PageHeader* header;
+    RETURN_IF_ERROR(_page_reader->get_page_header(header));
+    DCHECK_EQ(tparquet::PageType::DICTIONARY_PAGE, header->type);
     SCOPED_RAW_TIMER(&_statistics.decode_dict_time);
 
     // Using the PLAIN_DICTIONARY enum value is deprecated in the Parquet 2.0 
specification.
     // Prefer using RLE_DICTIONARY in a data page and PLAIN in a dictionary 
page for Parquet 2.0+ files.
     // refer: https://github.com/apache/parquet-format/blob/master/Encodings.md
-    tparquet::Encoding::type dict_encoding = 
header.dictionary_page_header.encoding;
+    tparquet::Encoding::type dict_encoding = 
header->dictionary_page_header.encoding;
     if (dict_encoding != tparquet::Encoding::PLAIN_DICTIONARY &&
         dict_encoding != tparquet::Encoding::PLAIN) {
         return Status::InternalError("Unsupported dictionary encoding {}",
@@ -222,7 +232,7 @@ Status ColumnChunkReader::_decode_dict_page() {
     }
 
     // Prepare dictionary data
-    int32_t uncompressed_size = header.uncompressed_page_size;
+    int32_t uncompressed_size = header->uncompressed_page_size;
     std::unique_ptr<uint8_t[]> dict_data(new uint8_t[uncompressed_size]);
     if (_block_compress_codec != nullptr) {
         Slice compressed_data;
@@ -246,7 +256,7 @@ Status ColumnChunkReader::_decode_dict_page() {
     //    page_decoder->init(_field_schema, _ctz);
     // Set the dictionary data
     RETURN_IF_ERROR(page_decoder->set_dict(dict_data, uncompressed_size,
-                                           
header.dictionary_page_header.num_values));
+                                           
header->dictionary_page_header.num_values));
     _decoders[static_cast<int>(tparquet::Encoding::RLE_DICTIONARY)] = 
std::move(page_decoder);
 
     _has_dict = true;
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h 
b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h
index 0ca6859ac83..79ee3cd6463 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h
@@ -71,7 +71,7 @@ using ColumnString = ColumnStr<UInt32>;
  *   // Or, we can call the chunk_reader.skip_page() to skip current page.
  *   chunk_reader.load_page_data();
  *   // Decode values into column or slice.
- *   // Or, we can call chunk_reader.slip_values(num_values) to skip some 
values.
+ *   // Or, we can call chunk_reader.skip_values(num_values) to skip some 
values.
  *   chunk_reader.decode_values(slice, num_values);
  * }
  */
@@ -84,10 +84,13 @@ public:
         int64_t decode_value_time = 0;
         int64_t decode_dict_time = 0;
         int64_t decode_level_time = 0;
+        int64_t skip_page_header_num = 0;
+        int64_t parse_page_header_num = 0;
     };
 
     ColumnChunkReader(io::BufferedStreamReader* reader, tparquet::ColumnChunk* 
column_chunk,
-                      FieldSchema* field_schema, cctz::time_zone* ctz, 
io::IOContext* io_ctx);
+                      FieldSchema* field_schema, const tparquet::OffsetIndex* 
offset_index,
+                      cctz::time_zone* ctz, io::IOContext* io_ctx);
     ~ColumnChunkReader() = default;
 
     // Initialize chunk reader, will generate the decoder and codec.
@@ -170,6 +173,8 @@ public:
 
     Statistics& statistics() {
         _statistics.decode_header_time = 
_page_reader->statistics().decode_header_time;
+        _statistics.skip_page_header_num = 
_page_reader->statistics().skip_page_header_num;
+        _statistics.parse_page_header_num = 
_page_reader->statistics().parse_page_header_num;
         return _statistics;
     }
 
@@ -204,6 +209,7 @@ private:
 
     io::BufferedStreamReader* _stream_reader = nullptr;
     tparquet::ColumnMetaData _metadata;
+    const tparquet::OffsetIndex* _offset_index;
     //    cctz::time_zone* _ctz;
     io::IOContext* _io_ctx = nullptr;
 
@@ -219,6 +225,7 @@ private:
     size_t _decompress_buf_size = 0;
     Slice _v2_rep_levels;
     Slice _v2_def_levels;
+    bool _dict_checked = false;
     bool _has_dict = false;
     Decoder* _page_decoder = nullptr;
     // Map: encoding -> Decoder
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 2a3782ab449..85d03daebc5 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -108,7 +108,7 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, 
FieldSchema* field,
                                    const std::vector<RowRange>& row_ranges, 
cctz::time_zone* ctz,
                                    io::IOContext* io_ctx,
                                    std::unique_ptr<ParquetColumnReader>& 
reader,
-                                   size_t max_buf_size) {
+                                   size_t max_buf_size, const 
tparquet::OffsetIndex* offset_index) {
     if (field->type.type == TYPE_ARRAY) {
         std::unique_ptr<ParquetColumnReader> element_reader;
         RETURN_IF_ERROR(create(file, &field->children[0], row_group, 
row_ranges, ctz, io_ctx,
@@ -144,7 +144,8 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, 
FieldSchema* field,
         reader.reset(struct_reader.release());
     } else {
         const tparquet::ColumnChunk& chunk = 
row_group.columns[field->physical_column_index];
-        auto scalar_reader = ScalarColumnReader::create_unique(row_ranges, 
chunk, ctz, io_ctx);
+        auto scalar_reader =
+                ScalarColumnReader::create_unique(row_ranges, chunk, 
offset_index, ctz, io_ctx);
         RETURN_IF_ERROR(scalar_reader->init(file, field, max_buf_size));
         reader.reset(scalar_reader.release());
     }
@@ -190,7 +191,7 @@ Status ScalarColumnReader::init(io::FileReaderSPtr file, 
FieldSchema* field, siz
     _stream_reader = std::make_unique<io::BufferedFileStreamReader>(file, 
chunk_start, chunk_len,
                                                                     
prefetch_buffer_size);
     _chunk_reader = std::make_unique<ColumnChunkReader>(_stream_reader.get(), 
&_chunk_meta, field,
-                                                        _ctz, _io_ctx);
+                                                        _offset_index, _ctz, 
_io_ctx);
     RETURN_IF_ERROR(_chunk_reader->init());
     return Status::OK();
 }
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.h 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
index d12eac2f383..f0eadb8bcd6 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
@@ -65,7 +65,9 @@ public:
                   decode_value_time(0),
                   decode_dict_time(0),
                   decode_level_time(0),
-                  decode_null_map_time(0) {}
+                  decode_null_map_time(0),
+                  skip_page_header_num(0),
+                  parse_page_header_num(0) {}
 
         Statistics(io::BufferedStreamReader::Statistics& fs, 
ColumnChunkReader::Statistics& cs,
                    int64_t null_map_time)
@@ -79,7 +81,9 @@ public:
                   decode_value_time(cs.decode_value_time),
                   decode_dict_time(cs.decode_dict_time),
                   decode_level_time(cs.decode_level_time),
-                  decode_null_map_time(null_map_time) {}
+                  decode_null_map_time(null_map_time),
+                  skip_page_header_num(cs.skip_page_header_num),
+                  parse_page_header_num(cs.parse_page_header_num) {}
 
         int64_t read_time;
         int64_t read_calls;
@@ -92,6 +96,8 @@ public:
         int64_t decode_dict_time;
         int64_t decode_level_time;
         int64_t decode_null_map_time;
+        int64_t skip_page_header_num;
+        int64_t parse_page_header_num;
 
         void merge(Statistics& statistics) {
             read_time += statistics.read_time;
@@ -105,6 +111,8 @@ public:
             decode_dict_time += statistics.decode_dict_time;
             decode_level_time += statistics.decode_level_time;
             decode_null_map_time += statistics.decode_null_map_time;
+            skip_page_header_num += statistics.skip_page_header_num;
+            parse_page_header_num += statistics.parse_page_header_num;
         }
     };
 
@@ -134,7 +142,7 @@ public:
                          const tparquet::RowGroup& row_group,
                          const std::vector<RowRange>& row_ranges, 
cctz::time_zone* ctz,
                          io::IOContext* io_ctx, 
std::unique_ptr<ParquetColumnReader>& reader,
-                         size_t max_buf_size);
+                         size_t max_buf_size, const tparquet::OffsetIndex* 
offset_index = nullptr);
     void set_nested_column() { _nested_column = true; }
     virtual const std::vector<level_t>& get_rep_level() const = 0;
     virtual const std::vector<level_t>& get_def_level() const = 0;
@@ -160,9 +168,12 @@ class ScalarColumnReader : public ParquetColumnReader {
     ENABLE_FACTORY_CREATOR(ScalarColumnReader)
 public:
     ScalarColumnReader(const std::vector<RowRange>& row_ranges,
-                       const tparquet::ColumnChunk& chunk_meta, 
cctz::time_zone* ctz,
+                       const tparquet::ColumnChunk& chunk_meta,
+                       const tparquet::OffsetIndex* offset_index, 
cctz::time_zone* ctz,
                        io::IOContext* io_ctx)
-            : ParquetColumnReader(row_ranges, ctz, io_ctx), 
_chunk_meta(chunk_meta) {}
+            : ParquetColumnReader(row_ranges, ctz, io_ctx),
+              _chunk_meta(chunk_meta),
+              _offset_index(offset_index) {}
     ~ScalarColumnReader() override { close(); }
     Status init(io::FileReaderSPtr file, FieldSchema* field, size_t 
max_buf_size);
     Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type,
@@ -182,6 +193,7 @@ public:
 
 private:
     tparquet::ColumnChunk _chunk_meta;
+    const tparquet::OffsetIndex* _offset_index;
     std::unique_ptr<io::BufferedFileStreamReader> _stream_reader;
     std::unique_ptr<ColumnChunkReader> _chunk_reader;
     std::vector<level_t> _rep_levels;
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 3f8000c3173..335207070dd 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -39,7 +39,6 @@
 #include "runtime/thread_context.h"
 #include "runtime/types.h"
 #include "schema_desc.h"
-#include "util/simd/bits.h"
 #include "vec/columns/column_const.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
@@ -124,12 +123,17 @@ Status RowGroupReader::init(
     const size_t MAX_GROUP_BUF_SIZE = config::parquet_rowgroup_max_buffer_mb 
<< 20;
     const size_t MAX_COLUMN_BUF_SIZE = config::parquet_column_max_buffer_mb << 
20;
     size_t max_buf_size = std::min(MAX_COLUMN_BUF_SIZE, MAX_GROUP_BUF_SIZE / 
_read_columns.size());
-    for (auto& read_col : _read_columns) {
-        auto field = const_cast<FieldSchema*>(schema.get_column(read_col));
+    for (const auto& read_col : _read_columns) {
+        auto* field = const_cast<FieldSchema*>(schema.get_column(read_col));
+        auto physical_index = field->physical_column_index;
         std::unique_ptr<ParquetColumnReader> reader;
+        // TODO : support rested column types
+        const tparquet::OffsetIndex* offset_index =
+                col_offsets.find(physical_index) != col_offsets.end() ? 
&col_offsets[physical_index]
+                                                                      : 
nullptr;
         RETURN_IF_ERROR(ParquetColumnReader::create(_file_reader, field, 
_row_group_meta,
                                                     _read_ranges, _ctz, 
_io_ctx, reader,
-                                                    max_buf_size));
+                                                    max_buf_size, 
offset_index));
         if (reader == nullptr) {
             VLOG_DEBUG << "Init row group(" << _row_group_id << ") reader 
failed";
             return Status::Corruption("Init row group reader failed");
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.h 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.h
index 128a7450554..d38f5a74adf 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.h
@@ -29,7 +29,6 @@
 
 #include "io/fs/file_reader_writer_fwd.h"
 #include "vec/columns/column.h"
-#include "vec/common/allocator.h"
 #include "vec/exec/format/parquet/parquet_common.h"
 #include "vec/exprs/vexpr_fwd.h"
 #include "vparquet_column_reader.h"
diff --git a/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp
index 3b4e18c27da..a321e77c692 100644
--- a/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp
@@ -40,11 +40,23 @@ namespace doris::vectorized {
 
 static constexpr size_t INIT_PAGE_HEADER_SIZE = 128;
 
+std::unique_ptr<PageReader> create_page_reader(io::BufferedStreamReader* 
reader,
+                                               io::IOContext* io_ctx, uint64_t 
offset,
+                                               uint64_t length, int64_t 
num_values,
+                                               const tparquet::OffsetIndex* 
offset_index) {
+    if (offset_index) {
+        return std::make_unique<PageReaderWithOffsetIndex>(reader, io_ctx, 
offset, length,
+                                                           num_values, 
offset_index);
+    } else {
+        return std::make_unique<PageReader>(reader, io_ctx, offset, length);
+    }
+}
+
 PageReader::PageReader(io::BufferedStreamReader* reader, io::IOContext* 
io_ctx, uint64_t offset,
                        uint64_t length)
         : _reader(reader), _io_ctx(io_ctx), _start_offset(offset), 
_end_offset(offset + length) {}
 
-Status PageReader::next_page_header() {
+Status PageReader::_parse_page_header() {
     if (UNLIKELY(_offset < _start_offset || _offset >= _end_offset)) {
         return Status::IOError("Out-of-bounds Access");
     }
@@ -82,6 +94,7 @@ Status PageReader::next_page_header() {
         header_size <<= 2;
     }
 
+    _statistics.parse_page_header_num++;
     _offset += real_header_size;
     _next_header_offset = _offset + _cur_page_header.compressed_page_size;
     _state = HEADER_PARSED;
diff --git a/be/src/vec/exec/format/parquet/vparquet_page_reader.h 
b/be/src/vec/exec/format/parquet/vparquet_page_reader.h
index bdd0a8d0f5f..5765df4fc1f 100644
--- a/be/src/vec/exec/format/parquet/vparquet_page_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_page_reader.h
@@ -20,6 +20,8 @@
 #include <gen_cpp/parquet_types.h>
 #include <stdint.h>
 
+#include <memory>
+
 #include "common/status.h"
 
 namespace doris {
@@ -39,11 +41,13 @@ class PageReader {
 public:
     struct Statistics {
         int64_t decode_header_time = 0;
+        int64_t skip_page_header_num = 0;
+        int64_t parse_page_header_num = 0;
     };
 
     PageReader(io::BufferedStreamReader* reader, io::IOContext* io_ctx, 
uint64_t offset,
                uint64_t length);
-    ~PageReader() = default;
+    virtual ~PageReader() = default;
 
     // Deprecated
     // Parquet file may not be standardized,
@@ -52,13 +56,31 @@ public:
     // [[deprecated]]
     bool has_next_page() const { return _offset < _end_offset; }
 
-    Status next_page_header();
+    virtual Status next_page_header() { return _parse_page_header(); }
 
-    Status skip_page();
+    virtual Status get_page_header(const tparquet::PageHeader*& page_header) {
+        if (UNLIKELY(_state != HEADER_PARSED)) {
+            return Status::InternalError("Page header not parsed");
+        }
+        page_header = &_cur_page_header;
+        return Status::OK();
+    }
 
-    const tparquet::PageHeader* get_page_header() const { return 
&_cur_page_header; }
+    virtual Status get_num_values(uint32_t& num_values) {
+        if (_state != HEADER_PARSED) {
+            return Status::InternalError("Page header not parsed");
+        }
+        if (_cur_page_header.type == tparquet::PageType::DATA_PAGE_V2) {
+            num_values = _cur_page_header.data_page_header_v2.num_values;
+        } else {
+            num_values = _cur_page_header.data_page_header.num_values;
+        }
+        return Status::OK();
+    }
 
-    Status get_page_data(Slice& slice);
+    virtual Status skip_page();
+
+    virtual Status get_page_data(Slice& slice);
 
     Statistics& statistics() { return _statistics; }
 
@@ -68,20 +90,99 @@ public:
         _state = INITIALIZED;
     }
 
-private:
+protected:
     enum PageReaderState { INITIALIZED, HEADER_PARSED };
-
-    io::BufferedStreamReader* _reader = nullptr;
-    io::IOContext* _io_ctx = nullptr;
+    PageReaderState _state = INITIALIZED;
     tparquet::PageHeader _cur_page_header;
     Statistics _statistics;
-    PageReaderState _state = INITIALIZED;
 
+    Status _parse_page_header();
+
+private:
+    io::BufferedStreamReader* _reader = nullptr;
+    io::IOContext* _io_ctx = nullptr;
     uint64_t _offset = 0;
     uint64_t _next_header_offset = 0;
-
     uint64_t _start_offset = 0;
     uint64_t _end_offset = 0;
 };
 
+class PageReaderWithOffsetIndex : public PageReader {
+public:
+    PageReaderWithOffsetIndex(io::BufferedStreamReader* reader, io::IOContext* 
io_ctx,
+                              uint64_t offset, uint64_t length, int64_t 
num_values,
+                              const tparquet::OffsetIndex* offset_index)
+            : PageReader(reader, io_ctx, offset, length),
+              _num_values(num_values),
+              _offset_index(offset_index) {}
+
+    Status next_page_header() override {
+        // lazy to parse page header in get_page_header
+        return Status::OK();
+    }
+
+    Status get_page_header(const tparquet::PageHeader*& page_header) override {
+        if (_state != HEADER_PARSED) {
+            RETURN_IF_ERROR(_parse_page_header());
+        }
+        page_header = &_cur_page_header;
+        return Status::OK();
+    }
+
+    Status get_num_values(uint32_t& num_values) override {
+        if (UNLIKELY(_page_index >= _offset_index->page_locations.size())) {
+            return Status::IOError("End of page");
+        }
+
+        if (_page_index < _offset_index->page_locations.size() - 1) {
+            num_values = _offset_index->page_locations[_page_index + 
1].first_row_index -
+                         
_offset_index->page_locations[_page_index].first_row_index;
+        } else {
+            num_values = _num_values - 
_offset_index->page_locations[_page_index].first_row_index;
+        }
+        return Status::OK();
+    }
+
+    Status skip_page() override {
+        if (UNLIKELY(_page_index >= _offset_index->page_locations.size())) {
+            return Status::IOError("End of page");
+        }
+
+        if (_state != HEADER_PARSED) {
+            _statistics.skip_page_header_num++;
+        }
+
+        seek_to_page(_offset_index->page_locations[_page_index].offset +
+                     
_offset_index->page_locations[_page_index].compressed_page_size);
+        _page_index++;
+        return Status::OK();
+    }
+
+    Status get_page_data(Slice& slice) override {
+        if (_page_index >= _offset_index->page_locations.size()) {
+            return Status::IOError("End of page");
+        }
+        if (_state != HEADER_PARSED) {
+            RETURN_IF_ERROR(_parse_page_header());
+        }
+
+        // dirctionary page is not in page location
+        if (LIKELY(_cur_page_header.type != 
tparquet::PageType::DICTIONARY_PAGE)) {
+            _page_index++;
+        }
+
+        return PageReader::get_page_data(slice);
+    }
+
+private:
+    size_t _page_index = 0;
+    int64_t _num_values = 0;
+    const tparquet::OffsetIndex* _offset_index;
+};
+
+std::unique_ptr<PageReader> create_page_reader(io::BufferedStreamReader* 
reader,
+                                               io::IOContext* io_ctx, uint64_t 
offset,
+                                               uint64_t length, int64_t 
num_values = 0,
+                                               const tparquet::OffsetIndex* 
offset_index = nullptr);
+
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 090c1bdf460..17e44e7b9a8 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -23,18 +23,14 @@
 #include <glog/logging.h>
 
 #include <functional>
-#include <ostream>
 #include <utility>
 
 #include "common/status.h"
 #include "exec/schema_scanner.h"
-#include "gen_cpp/descriptors.pb.h"
-#include "gtest/gtest_pred_impl.h"
 #include "io/file_factory.h"
 #include "io/fs/buffered_reader.h"
 #include "io/fs/file_reader.h"
 #include "io/fs/file_reader_writer_fwd.h"
-#include "olap/olap_common.h"
 #include "parquet_pred_cmp.h"
 #include "parquet_thrift_util.h"
 #include "runtime/define_primitive_type.h"
@@ -170,6 +166,10 @@ void ParquetReader::_init_profile() {
                 ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DecodeLevelTime", 
parquet_profile, 1);
         _parquet_profile.decode_null_map_time =
                 ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DecodeNullMapTime", 
parquet_profile, 1);
+        _parquet_profile.skip_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(
+                _profile, "SkipPageHeaderNum", TUnit::UNIT, parquet_profile, 
1);
+        _parquet_profile.parse_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(
+                _profile, "ParsePageHeaderNum", TUnit::UNIT, parquet_profile, 
1);
     }
 }
 
@@ -921,6 +921,9 @@ void ParquetReader::_collect_profile() {
     COUNTER_UPDATE(_parquet_profile.page_index_filter_time, 
_statistics.page_index_filter_time);
     COUNTER_UPDATE(_parquet_profile.row_group_filter_time, 
_statistics.row_group_filter_time);
 
+    COUNTER_UPDATE(_parquet_profile.skip_page_header_num, 
_column_statistics.skip_page_header_num);
+    COUNTER_UPDATE(_parquet_profile.parse_page_header_num,
+                   _column_statistics.parse_page_header_num);
     COUNTER_UPDATE(_parquet_profile.file_read_time, 
_column_statistics.read_time);
     COUNTER_UPDATE(_parquet_profile.file_read_calls, 
_column_statistics.read_calls);
     COUNTER_UPDATE(_parquet_profile.file_meta_read_calls, 
_column_statistics.meta_read_calls);
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h 
b/be/src/vec/exec/format/parquet/vparquet_reader.h
index eba32abd225..0612951c67e 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.h
@@ -22,7 +22,6 @@
 #include <stdint.h>
 
 #include <list>
-#include <map>
 #include <memory>
 #include <string>
 #include <tuple>
@@ -183,6 +182,8 @@ private:
         RuntimeProfile::Counter* decode_dict_time = nullptr;
         RuntimeProfile::Counter* decode_level_time = nullptr;
         RuntimeProfile::Counter* decode_null_map_time = nullptr;
+        RuntimeProfile::Counter* skip_page_header_num = nullptr;
+        RuntimeProfile::Counter* parse_page_header_num = nullptr;
     };
 
     Status _open_file();
diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp 
b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
index 4dfbd6a380f..1407edb08f6 100644
--- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
@@ -205,7 +205,8 @@ static Status get_column_values(io::FileReaderSPtr 
file_reader, tparquet::Column
 
     io::BufferedFileStreamReader stream_reader(file_reader, start_offset, 
chunk_size, 1024);
 
-    ColumnChunkReader chunk_reader(&stream_reader, column_chunk, field_schema, 
&ctz, nullptr);
+    ColumnChunkReader chunk_reader(&stream_reader, column_chunk, field_schema, 
nullptr, &ctz,
+                                   nullptr);
     // initialize chunk reader
     static_cast<void>(chunk_reader.init());
     // seek to next page header
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_parquet_skip_page.out 
b/regression-test/data/external_table_p0/hive/test_hive_parquet_skip_page.out
new file mode 100644
index 00000000000..6c869dbc789
--- /dev/null
+++ 
b/regression-test/data/external_table_p0/hive/test_hive_parquet_skip_page.out
@@ -0,0 +1,289 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !q01 --
+1      2132    4633    4       28.00   28955.64        0.09    0.06    N       
O       1996-04-21      1996-03-30      1996-05-16      NONE    AIR     lites. 
fluffily even de
+1      15635   638     6       32.00   49620.16        0.07    0.02    N       
O       1996-01-30      1996-02-07      1996-02-03      DELIVER IN PERSON       
MAIL    arefully slyly ex
+1      24027   1534    5       24.00   22824.48        0.10    0.04    N       
O       1996-03-30      1996-03-14      1996-04-01      NONE    FOB      
pending foxes. slyly re
+1      63700   3701    3       8.00    13309.60        0.10    0.02    N       
O       1996-01-29      1996-03-05      1996-01-31      TAKE BACK RETURN        
REG AIR riously. regular, express dep
+1      67310   7311    2       36.00   45983.16        0.09    0.06    N       
O       1996-04-12      1996-02-28      1996-04-20      TAKE BACK RETURN        
MAIL    ly final dependencies: slyly bold 
+1      155190  7706    1       17.00   21168.23        0.04    0.02    N       
O       1996-03-13      1996-02-12      1996-03-22      DELIVER IN PERSON       
TRUCK   egular courts above the
+2      106170  1191    1       38.00   44694.46        0.00    0.05    N       
O       1997-01-28      1997-01-14      1997-02-02      TAKE BACK RETURN        
RAIL    ven requests. deposits breach a
+3      4297    1798    1       45.00   54058.05        0.06    0.00    R       
F       1994-02-02      1994-01-04      1994-02-23      NONE    AIR     ongside 
of the furiously brave acco
+3      19036   6540    2       49.00   46796.47        0.10    0.00    R       
F       1993-11-09      1993-12-20      1993-11-24      TAKE BACK RETURN        
RAIL     unusual accounts. eve
+3      29380   1883    4       2.00    2618.76 0.01    0.06    A       F       
1993-12-04      1994-01-07      1994-01-01      NONE    TRUCK   y. fluffily 
pending d
+
+-- !q02 --
+5999008        16312   6313    4       2.00    2456.62 0.08    0.05    R       
F       1994-04-28      1994-06-01      1994-05-12      COLLECT COD     FOB     
longside of the slo
+5999008        32738   2739    3       39.00   65158.47        0.09    0.08    
R       F       1994-07-11      1994-06-15      1994-08-04      TAKE BACK 
RETURN        TRUCK   equests nag along
+5999008        64711   2230    1       29.00   48595.59        0.00    0.02    
R       F       1994-05-16      1994-07-04      1994-05-18      NONE    FOB     
 final requests across 
+5999008        192755  5275    2       32.00   59128.00        0.07    0.08    
R       F       1994-05-15      1994-05-22      1994-06-07      COLLECT COD     
RAIL    ts sleep slyly about the slyly ironic acco
+5999009        12147   7150    1       21.00   22241.94        0.00    0.05    
N       O       1997-11-01      1997-12-11      1997-11-05      NONE    AIR     
 deposits after the blithely ex
+5999010        106595  6596    1       31.00   49649.29        0.02    0.05    
N       O       1997-11-29      1997-10-24      1997-12-11      DELIVER IN 
PERSON       MAIL    ilent instructions? slyly r
+5999010        141441  1442    2       42.00   62262.48        0.05    0.08    
N       O       1997-09-21      1997-10-13      1997-09-25      TAKE BACK 
RETURN        SHIP    c, even ideas. ruth
+5999010        193075  8114    3       40.00   46722.80        0.04    0.07    
N       O       1997-11-19      1997-09-25      1997-11-25      DELIVER IN 
PERSON       AIR      accounts sleep blithely even,
+5999010        198678  1198    4       12.00   21320.04        0.00    0.04    
N       O       1997-09-19      1997-10-15      1997-10-05      DELIVER IN 
PERSON       REG AIR ironic foxes. slyly special id
+5999011        98609   6137    1       44.00   70734.40        0.03    0.01    
N       O       1998-04-05      1998-05-16      1998-05-05      DELIVER IN 
PERSON       REG AIR ructions along the blit
+
+-- !q03 --
+2000001        16877   6878    1       36.00   64579.32        0.01    0.05    
A       F       1995-01-23      1995-01-31      1995-02-20      TAKE BACK 
RETURN        FOB      regular deposits. even 
+2000001        50928   3434    2       36.00   67641.12        0.02    0.03    
A       F       1995-02-03      1995-02-07      1995-02-13      NONE    AIR     
ickly slyl
+2000001        117877  7878    4       20.00   37897.40        0.02    0.07    
R       F       1995-03-29      1995-03-16      1995-04-01      COLLECT COD     
RAIL    . realms boost unusual theodoli
+2000001        135534  8048    3       38.00   59642.14        0.00    0.05    
R       F       1994-12-31      1995-03-06      1995-01-26      COLLECT COD     
MAIL    l theodolites affix quickly alongside of 
+2000001        149269  1784    5       15.00   19773.90        0.05    0.05    
R       F       1995-03-08      1995-02-10      1995-03-23      DELIVER IN 
PERSON       AIR     e bold, silent foxes solve dog
+2000002        41816   1817    6       8.00    14062.48        0.05    0.06    
N       O       1996-02-14      1995-12-25      1996-03-12      DELIVER IN 
PERSON       FOB     y quickly pending foxes. quickly ironic acc
+2000002        62662   2663    2       48.00   77983.68        0.01    0.05    
N       O       1995-11-20      1996-01-11      1995-12-05      TAKE BACK 
RETURN        SHIP     requests sleep blithely. slyly 
+2000002        77402   7403    4       47.00   64831.80        0.07    0.03    
N       O       1996-02-24      1996-02-04      1996-03-09      NONE    SHIP    
ong the carefully silent instructions. even
+2000002        80719   8244    5       42.00   71387.82        0.10    0.03    
N       O       1995-11-24      1996-01-01      1995-11-25      COLLECT COD     
SHIP    ing to the carefully final deposits. care
+2000002        156357  6358    3       28.00   39573.80        0.09    0.05    
N       O       1995-12-26      1996-01-15      1996-01-25      COLLECT COD     
MAIL    ely regular instr
+
+-- !q04 --
+1      Customer#000000001      IVhzIApeRb ot,c,E       15      25-989-741-2988 
711.56  BUILDING        to the even, regular platelets. regular, ironic 
epitaphs nag e
+2      Customer#000000002      XSTf4,NCwDVaWNe6tEgvwfmRchLXak  13      
23-768-687-3665 121.65  AUTOMOBILE      l accounts. blithely ironic theodolites 
integrate boldly: caref
+3      Customer#000000003      MG9kdTD2WBHm    1       11-719-748-3364 7498.12 
AUTOMOBILE       deposits eat slyly ironic, even instructions. express foxes 
detect slyly. blithely even accounts abov
+4      Customer#000000004      XxVSJsLAGtn     4       14-128-190-5944 2866.83 
MACHINERY        requests. final, regular ideas sleep final accou
+5      Customer#000000005      KvpyuHCplrB84WgAiGV6sYpZq7Tj    3       
13-750-942-6364 794.47  HOUSEHOLD       n accounts will have to unwind. foxes 
cajole accor
+6      Customer#000000006      sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn    20      
30-114-968-4951 7638.57 AUTOMOBILE      tions. even deposits boost according to 
the slyly bold packages. final accounts cajole requests. furious
+7      Customer#000000007      TcGe5gaZNgVePxU5kRrvXBfkasDTea  18      
28-190-982-9759 9561.95 AUTOMOBILE      ainst the ironic, express theodolites. 
express, even pinto beans among the exp
+8      Customer#000000008      I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5   17      
27-147-574-9335 6819.74 BUILDING        among the slyly regular theodolites 
kindle blithely courts. carefully even theodolites haggle slyly along the ide
+9      Customer#000000009      xKiAFTjUsCuxfeleNqefumTrjS      8       
18-338-906-3675 8324.07 FURNITURE       r theodolites according to the requests 
wake thinly excuses: pending requests haggle furiousl
+10     Customer#000000010      6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2      5       
15-741-346-9870 2753.54 HOUSEHOLD       es regular deposits haggle. fur
+
+-- !q05 --
+140001 Customer#000140001      CkN1egC06Sc51bbDyQ8VnFn Bz6N1p  15      
25-647-696-2830 2747.48 AUTOMOBILE      heodolites. slyly bold theodolites d
+140002 Customer#000140002      8UWLS,im0k94ivCVx       23      33-146-814-9234 
7914.10 HOUSEHOLD       eep behind the quickly bold foxes. furiously ironic 
ideas shall have to sleep. regular packages
+140003 Customer#000140003      2duVgk HhGGlOeP4S,brWKjKG62bGkupful     23      
33-326-909-7916 9389.77 FURNITURE       pending, even packages are. slyly 
regular accounts wake ironically final packages. bold 
+140004 Customer#000140004      S,V7RhLnmqPK0TDghbcdxotzTyKeUC  13      
23-594-312-2596 5931.66 HOUSEHOLD       t blithely blithely regular packages. 
never silent dependencies cajo
+140005 Customer#000140005      yQemRDs9i8MmTJla7xha xqeZjMznW  20      
30-169-231-7354 9489.56 BUILDING        accounts. even ideas sleep carefu
+140006 Customer#000140006      5,eRqyFjpobN2Wtvt2oXuLcJcNE8oTyRh       15      
25-681-278-7283 67.66   MACHINERY       at the accounts are bold escapades. 
furiously final foxes use carefully unusual orb
+140007 Customer#000140007      AX75sSePE5PlDjD5qS6W1dx08Levf09 24      
34-818-770-8059 2093.37 MACHINERY       ily according to the furiously final 
packages? quickly spe
+140008 Customer#000140008      2zpry AYh9otf4c5vESISPvKLWPKe9i 14      
24-552-949-6395 3264.69 AUTOMOBILE      nstructions are against the requests. 
fin
+140009 Customer#000140009      dNwNUcCv,,0YE6WFYfOgM,6A2       4       
14-940-856-8557 -359.36 HOUSEHOLD       beans. blithely silent dependencies 
haggle slyly. carefully quick accounts across the depos
+140010 Customer#000140010      vZxOW,NtvppKR9mpTl6RDl9sWJJbosYDoLineEm 7       
17-151-800-8260 8216.11 BUILDING        nding foxes across the quickly regular 
forges nod accounts. slyly express ex
+
+-- !q06 --
+100001 Customer#000100001      gQ1s5C45A3PxWmZ1oFFSxt8u EcZ,   24      
34-705-443-4055 1726.66 HOUSEHOLD       ts. ironic instructions sleep. final 
deposits 
+100002 Customer#000100002      qOmTcZ7kHzJLSoaLenr9,Gu 17      27-453-414-8560 
-39.14  BUILDING        wake carefully. blithely regular epitaphs are among the 
quickly regular deposits. 
+100003 Customer#000100003      5AYbJxvjo7ErQB,cGIpKZRAE9,w2l9  5       
15-783-309-8970 72.71   BUILDING        ckly blithely special accounts. 
theodolites are carefully. pending requests ha
+100004 Customer#000100004      cpIOYQpMlm      18      28-316-370-8752 9990.05 
BUILDING        y above the slyly regular pains. unusual requests against the 
always special packages bre
+100005 Customer#000100005      Wud8n74NcIpwiKSjPS zZ   16      26-935-603-9031 
7789.25 BUILDING        ing dugouts boost slyly above the pending, final 
accounts? regular deposits wake slyly alongside of the blithely i
+100006 Customer#000100006      AkjXh4y,QNaF7,0xzbP,sG  7       17-964-673-7626 
974.05  MACHINERY       grate across the slyly even packages; final, special 
idea
+100007 Customer#000100007      d94JW9Hc2ZtGriOBNKyIjOeP,VZZqIX7S       17      
27-244-129-5307 777.86  HOUSEHOLD       foxes are against the ironic 
theodolites. evenly pending ideas according to the qu
+100008 Customer#000100008      Hv2A,YqfNnGRIKaY        18      28-828-394-8424 
3374.90 BUILDING        ccounts. even deposits wake quickly pinto beans. bold 
instructions integrate? never bold theodolites are s
+100009 Customer#000100009      OioQ3EjJZRvxCNh6Q8E3QZH 6       16-928-807-2622 
3932.63 MACHINERY       aggle blithely quickly final accounts. carefully final 
deposits above the fluffily unus
+100010 Customer#000100010       Tbiz2WMJX      0       10-147-978-7806 5693.02 
BUILDING        y regular ideas. quickly unusual gifts n
+
+-- !q07 --
+1      36901   O       173665.47       1996-01-02      5-LOW   Clerk#000000951 
0       nstructions sleep furiously among 
+2      78002   O       46929.18        1996-12-01      1-URGENT        
Clerk#000000880 0        foxes. pending accounts at the pending, silent asymptot
+3      123314  F       193846.25       1993-10-14      5-LOW   Clerk#000000955 
0       sly final accounts boost. carefully regular ideas cajole carefully. 
depos
+4      136777  O       32151.78        1995-10-11      5-LOW   Clerk#000000124 
0       sits. slyly regular warthogs cajole. regular, regular theodolites acro
+5      44485   F       144659.20       1994-07-30      5-LOW   Clerk#000000925 
0       quickly. bold deposits sleep slyly. packages use slyly
+6      55624   F       58749.59        1992-02-21      4-NOT SPECIFIED 
Clerk#000000058 0       ggle. special, final requests are against the furiously 
specia
+7      39136   O       252004.18       1996-01-10      2-HIGH  Clerk#000000470 
0       ly special requests 
+32     130057  O       208660.75       1995-07-16      2-HIGH  Clerk#000000616 
0       ise blithely bold, regular requests. quickly unusual dep
+33     66958   F       163243.98       1993-10-27      3-MEDIUM        
Clerk#000000409 0       uriously. furiously final request
+34     61001   O       58949.67        1998-07-21      3-MEDIUM        
Clerk#000000223 0       ly final packages. fluffily final deposits wake 
blithely ideas. spe
+
+-- !q08 --
+5990016        100807  F       102428.29       1994-01-31      1-URGENT        
Clerk#000000554 0       . fluffily unusual requests cajole furiously. fluffily 
pending accounts ca
+5990017        12382   F       176602.99       1992-07-01      5-LOW   
Clerk#000000205 0       ual pinto beans. final instructions haggle quickly 
alongside of the furio
+5990018        51145   F       78440.49        1992-05-28      1-URGENT        
Clerk#000000996 0       quests play daringly. regula
+5990019        85478   O       250306.69       1998-06-29      5-LOW   
Clerk#000000900 0       ainst the sly pinto beans. unu
+5990020        62137   O       229287.04       1996-08-15      1-URGENT        
Clerk#000000801 0        fluffily special pinto beans. regular, regular pinto 
beans slee
+5990021        24235   O       265459.10       1996-12-16      3-MEDIUM        
Clerk#000000113 0       gside of the ironic, unusual escapades. evenly silent 
tithes are 
+5990022        35143   O       141070.92       1996-07-01      4-NOT SPECIFIED 
Clerk#000000546 0       ests haggle across the blithely bo
+5990023        65318   F       171515.91       1993-07-04      1-URGENT        
Clerk#000000178 0       r the express accounts haggle blithely ironic 
accounts-- regu
+5990048        88213   O       70608.62        1997-10-23      2-HIGH  
Clerk#000000303 0       slyly enticing foxes doze regularly even requests. 
+5990049        115694  F       183390.98       1992-05-21      1-URGENT        
Clerk#000000450 0       ckly final theodolites ca
+
+-- !q09 --
+2000001        44200   F       257495.03       1994-12-18      5-LOW   
Clerk#000000314 0       ometimes theodolites. quickly even accounts among the 
blithely bold 
+2000002        55241   O       263734.77       1995-11-13      1-URGENT        
Clerk#000000749 0       uses along the brave excuses sleep for the packages. 
packages affix? slyl
+2000003        84553   F       78066.42        1992-10-10      5-LOW   
Clerk#000000314 0       e slyly regular asymptotes. fluf
+2000004        125197  F       246917.53       1993-01-06      1-URGENT        
Clerk#000000675 0       ironic ideas. platelets are regularly after the
+2000005        117907  O       229611.23       1996-10-16      2-HIGH  
Clerk#000000458 0       he furiously regular excuses haggle slyly along the 
slyly pending a
+2000006        1538    O       32011.55        1995-12-09      1-URGENT        
Clerk#000000279 0       ual, regular deposits sleep carefully carefully final 
dependencies. dep
+2000007        42958   F       48446.75        1993-03-28      5-LOW   
Clerk#000000956 0       uickly final ideas. final, final requests are courts. 
slyly unu
+2000032        34156   F       56186.58        1994-09-05      4-NOT SPECIFIED 
Clerk#000000612 0       fully regular instructions doze
+2000033        141263  O       130829.92       1997-06-05      3-MEDIUM        
Clerk#000000118 0       inst the final dependencies. even, final pat
+2000034        149275  O       64568.70        1997-09-23      2-HIGH  
Clerk#000000335 0       regular asymptotes. carefu
+
+-- !q10 --
+1      goldenrod lavender spring chocolate lace        Manufacturer#1  
Brand#13        PROMO BURNISHED COPPER  7       JUMBO PKG       901.00  ly. 
slyly ironi
+2      blush thistle blue yellow saddle        Manufacturer#1  Brand#13        
LARGE BRUSHED BRASS     1       LG CASE 902.00  lar accounts amo
+3      spring green yellow purple cornsilk     Manufacturer#4  Brand#42        
STANDARD POLISHED BRASS 21      WRAP CASE       903.00  egular deposits hag
+4      cornflower chocolate smoke green pink   Manufacturer#3  Brand#34        
SMALL PLATED BRASS      14      MED DRUM        904.00  p furiously r
+5      forest brown coral puff cream   Manufacturer#3  Brand#32        
STANDARD POLISHED TIN   15      SM PKG  905.00   wake carefully 
+6      bisque cornflower lawn forest magenta   Manufacturer#2  Brand#24        
PROMO PLATED STEEL      4       MED BAG 906.00  sual a
+7      moccasin green thistle khaki floral     Manufacturer#1  Brand#11        
SMALL PLATED COPPER     45      SM BAG  907.00  lyly. ex
+8      misty lace thistle snow royal   Manufacturer#4  Brand#44        PROMO 
BURNISHED TIN     41      LG DRUM 908.00  eposi
+9      thistle dim navajo dark gainsboro       Manufacturer#4  Brand#43        
SMALL BURNISHED STEEL   12      WRAP CASE       909.00  ironic foxe
+10     linen pink saddle puff powder   Manufacturer#5  Brand#54        LARGE 
BURNISHED STEEL   44      LG CAN  910.01  ithely final deposit
+
+-- !q08 --
+190001 powder coral chiffon burnished bisque   Manufacturer#2  Brand#22        
MEDIUM ANODIZED NICKEL  26      WRAP BOX        1091.00 ly busy deposi
+190002 peru coral rosy azure green     Manufacturer#4  Brand#41        LARGE 
POLISHED TIN      21      SM PKG  1092.00  express, daring sh
+190003 white salmon lemon cornsilk ghost       Manufacturer#4  Brand#41        
PROMO ANODIZED TIN      41      LG BAG  1093.00 ckages according to th
+190004 ivory almond honeydew metallic dodger   Manufacturer#4  Brand#44        
PROMO PLATED NICKEL     23      MED DRUM        1094.00  blithely regular t
+190005 slate indian forest chartreuse rosy     Manufacturer#1  Brand#11        
SMALL BRUSHED BRASS     3       SM CASE 1095.00 ly blithe, regula
+190006 navajo lavender smoke puff olive        Manufacturer#5  Brand#55        
SMALL BRUSHED BRASS     35      LG CASE 1096.00 ilent ideas boo
+190007 khaki lime goldenrod pink grey  Manufacturer#1  Brand#11        
STANDARD PLATED BRASS   30      SM PKG  1097.00 fully final gift
+190008 cream dark peru thistle gainsboro       Manufacturer#3  Brand#31        
ECONOMY ANODIZED STEEL  46      WRAP CASE       1098.00  pinto beans. fur
+190009 orchid goldenrod metallic frosted powder        Manufacturer#3  
Brand#33        STANDARD ANODIZED COPPER        25      LG BAG  1099.00 es 
cajole f
+190010 misty mint white seashell papaya        Manufacturer#3  Brand#34        
STANDARD POLISHED STEEL 38      JUMBO BOX       1100.01 pecia
+
+-- !q12 --
+100001 seashell cyan plum purple honeydew      Manufacturer#3  Brand#35        
STANDARD BRUSHED TIN    37      JUMBO CASE      1001.00 ronic dependencies d
+100002 steel moccasin forest cornflower brown  Manufacturer#3  Brand#34        
STANDARD ANODIZED NICKEL        11      WRAP CAN        1002.00  quickly 
pending 
+100003 beige powder violet orchid yellow       Manufacturer#2  Brand#21        
MEDIUM PLATED BRASS     41      SM BOX  1003.00  carefully even pac
+100004 snow blanched khaki indian azure        Manufacturer#4  Brand#42        
SMALL POLISHED TIN      29      SM CASE 1004.00 sly. blithely
+100005 grey midnight orange peach pale Manufacturer#2  Brand#21        SMALL 
POLISHED STEEL    7       MED BAG 1005.00 ajole? blithe
+100006 violet sandy olive yellow orange        Manufacturer#4  Brand#45        
STANDARD BURNISHED COPPER       23      WRAP CASE       1006.00 he slyly 
regular pack
+100007 snow magenta pale lemon metallic        Manufacturer#1  Brand#12        
PROMO BURNISHED COPPER  4       MED PKG 1007.00 ronic accounts in
+100008 spring powder sienna purple lime        Manufacturer#4  Brand#45        
ECONOMY BRUSHED BRASS   19      SM PKG  1008.00 ts. furious
+100009 goldenrod sandy beige hot orange        Manufacturer#3  Brand#32        
SMALL BURNISHED STEEL   41      WRAP BOX        1009.00 dinos about the quick
+100010 lime lavender slate cream brown Manufacturer#4  Brand#43        PROMO 
ANODIZED COPPER   19      JUMBO PACK      1010.01 gle slyly above the b
+
+-- !q01 --
+1      2132    4633    4       28.00   28955.64        0.09    0.06    N       
O       1996-04-21      1996-03-30      1996-05-16      NONE    AIR     lites. 
fluffily even de
+1      15635   638     6       32.00   49620.16        0.07    0.02    N       
O       1996-01-30      1996-02-07      1996-02-03      DELIVER IN PERSON       
MAIL    arefully slyly ex
+1      24027   1534    5       24.00   22824.48        0.10    0.04    N       
O       1996-03-30      1996-03-14      1996-04-01      NONE    FOB      
pending foxes. slyly re
+1      63700   3701    3       8.00    13309.60        0.10    0.02    N       
O       1996-01-29      1996-03-05      1996-01-31      TAKE BACK RETURN        
REG AIR riously. regular, express dep
+1      67310   7311    2       36.00   45983.16        0.09    0.06    N       
O       1996-04-12      1996-02-28      1996-04-20      TAKE BACK RETURN        
MAIL    ly final dependencies: slyly bold 
+1      155190  7706    1       17.00   21168.23        0.04    0.02    N       
O       1996-03-13      1996-02-12      1996-03-22      DELIVER IN PERSON       
TRUCK   egular courts above the
+2      106170  1191    1       38.00   44694.46        0.00    0.05    N       
O       1997-01-28      1997-01-14      1997-02-02      TAKE BACK RETURN        
RAIL    ven requests. deposits breach a
+3      4297    1798    1       45.00   54058.05        0.06    0.00    R       
F       1994-02-02      1994-01-04      1994-02-23      NONE    AIR     ongside 
of the furiously brave acco
+3      19036   6540    2       49.00   46796.47        0.10    0.00    R       
F       1993-11-09      1993-12-20      1993-11-24      TAKE BACK RETURN        
RAIL     unusual accounts. eve
+3      29380   1883    4       2.00    2618.76 0.01    0.06    A       F       
1993-12-04      1994-01-07      1994-01-01      NONE    TRUCK   y. fluffily 
pending d
+
+-- !q02 --
+5999008        16312   6313    4       2.00    2456.62 0.08    0.05    R       
F       1994-04-28      1994-06-01      1994-05-12      COLLECT COD     FOB     
longside of the slo
+5999008        32738   2739    3       39.00   65158.47        0.09    0.08    
R       F       1994-07-11      1994-06-15      1994-08-04      TAKE BACK 
RETURN        TRUCK   equests nag along
+5999008        64711   2230    1       29.00   48595.59        0.00    0.02    
R       F       1994-05-16      1994-07-04      1994-05-18      NONE    FOB     
 final requests across 
+5999008        192755  5275    2       32.00   59128.00        0.07    0.08    
R       F       1994-05-15      1994-05-22      1994-06-07      COLLECT COD     
RAIL    ts sleep slyly about the slyly ironic acco
+5999009        12147   7150    1       21.00   22241.94        0.00    0.05    
N       O       1997-11-01      1997-12-11      1997-11-05      NONE    AIR     
 deposits after the blithely ex
+5999010        106595  6596    1       31.00   49649.29        0.02    0.05    
N       O       1997-11-29      1997-10-24      1997-12-11      DELIVER IN 
PERSON       MAIL    ilent instructions? slyly r
+5999010        141441  1442    2       42.00   62262.48        0.05    0.08    
N       O       1997-09-21      1997-10-13      1997-09-25      TAKE BACK 
RETURN        SHIP    c, even ideas. ruth
+5999010        193075  8114    3       40.00   46722.80        0.04    0.07    
N       O       1997-11-19      1997-09-25      1997-11-25      DELIVER IN 
PERSON       AIR      accounts sleep blithely even,
+5999010        198678  1198    4       12.00   21320.04        0.00    0.04    
N       O       1997-09-19      1997-10-15      1997-10-05      DELIVER IN 
PERSON       REG AIR ironic foxes. slyly special id
+5999011        98609   6137    1       44.00   70734.40        0.03    0.01    
N       O       1998-04-05      1998-05-16      1998-05-05      DELIVER IN 
PERSON       REG AIR ructions along the blit
+
+-- !q03 --
+2000001        16877   6878    1       36.00   64579.32        0.01    0.05    
A       F       1995-01-23      1995-01-31      1995-02-20      TAKE BACK 
RETURN        FOB      regular deposits. even 
+2000001        50928   3434    2       36.00   67641.12        0.02    0.03    
A       F       1995-02-03      1995-02-07      1995-02-13      NONE    AIR     
ickly slyl
+2000001        117877  7878    4       20.00   37897.40        0.02    0.07    
R       F       1995-03-29      1995-03-16      1995-04-01      COLLECT COD     
RAIL    . realms boost unusual theodoli
+2000001        135534  8048    3       38.00   59642.14        0.00    0.05    
R       F       1994-12-31      1995-03-06      1995-01-26      COLLECT COD     
MAIL    l theodolites affix quickly alongside of 
+2000001        149269  1784    5       15.00   19773.90        0.05    0.05    
R       F       1995-03-08      1995-02-10      1995-03-23      DELIVER IN 
PERSON       AIR     e bold, silent foxes solve dog
+2000002        41816   1817    6       8.00    14062.48        0.05    0.06    
N       O       1996-02-14      1995-12-25      1996-03-12      DELIVER IN 
PERSON       FOB     y quickly pending foxes. quickly ironic acc
+2000002        62662   2663    2       48.00   77983.68        0.01    0.05    
N       O       1995-11-20      1996-01-11      1995-12-05      TAKE BACK 
RETURN        SHIP     requests sleep blithely. slyly 
+2000002        77402   7403    4       47.00   64831.80        0.07    0.03    
N       O       1996-02-24      1996-02-04      1996-03-09      NONE    SHIP    
ong the carefully silent instructions. even
+2000002        80719   8244    5       42.00   71387.82        0.10    0.03    
N       O       1995-11-24      1996-01-01      1995-11-25      COLLECT COD     
SHIP    ing to the carefully final deposits. care
+2000002        156357  6358    3       28.00   39573.80        0.09    0.05    
N       O       1995-12-26      1996-01-15      1996-01-25      COLLECT COD     
MAIL    ely regular instr
+
+-- !q04 --
+1      Customer#000000001      IVhzIApeRb ot,c,E       15      25-989-741-2988 
711.56  BUILDING        to the even, regular platelets. regular, ironic 
epitaphs nag e
+2      Customer#000000002      XSTf4,NCwDVaWNe6tEgvwfmRchLXak  13      
23-768-687-3665 121.65  AUTOMOBILE      l accounts. blithely ironic theodolites 
integrate boldly: caref
+3      Customer#000000003      MG9kdTD2WBHm    1       11-719-748-3364 7498.12 
AUTOMOBILE       deposits eat slyly ironic, even instructions. express foxes 
detect slyly. blithely even accounts abov
+4      Customer#000000004      XxVSJsLAGtn     4       14-128-190-5944 2866.83 
MACHINERY        requests. final, regular ideas sleep final accou
+5      Customer#000000005      KvpyuHCplrB84WgAiGV6sYpZq7Tj    3       
13-750-942-6364 794.47  HOUSEHOLD       n accounts will have to unwind. foxes 
cajole accor
+6      Customer#000000006      sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn    20      
30-114-968-4951 7638.57 AUTOMOBILE      tions. even deposits boost according to 
the slyly bold packages. final accounts cajole requests. furious
+7      Customer#000000007      TcGe5gaZNgVePxU5kRrvXBfkasDTea  18      
28-190-982-9759 9561.95 AUTOMOBILE      ainst the ironic, express theodolites. 
express, even pinto beans among the exp
+8      Customer#000000008      I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5   17      
27-147-574-9335 6819.74 BUILDING        among the slyly regular theodolites 
kindle blithely courts. carefully even theodolites haggle slyly along the ide
+9      Customer#000000009      xKiAFTjUsCuxfeleNqefumTrjS      8       
18-338-906-3675 8324.07 FURNITURE       r theodolites according to the requests 
wake thinly excuses: pending requests haggle furiousl
+10     Customer#000000010      6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2      5       
15-741-346-9870 2753.54 HOUSEHOLD       es regular deposits haggle. fur
+
+-- !q05 --
+140001 Customer#000140001      CkN1egC06Sc51bbDyQ8VnFn Bz6N1p  15      
25-647-696-2830 2747.48 AUTOMOBILE      heodolites. slyly bold theodolites d
+140002 Customer#000140002      8UWLS,im0k94ivCVx       23      33-146-814-9234 
7914.10 HOUSEHOLD       eep behind the quickly bold foxes. furiously ironic 
ideas shall have to sleep. regular packages
+140003 Customer#000140003      2duVgk HhGGlOeP4S,brWKjKG62bGkupful     23      
33-326-909-7916 9389.77 FURNITURE       pending, even packages are. slyly 
regular accounts wake ironically final packages. bold 
+140004 Customer#000140004      S,V7RhLnmqPK0TDghbcdxotzTyKeUC  13      
23-594-312-2596 5931.66 HOUSEHOLD       t blithely blithely regular packages. 
never silent dependencies cajo
+140005 Customer#000140005      yQemRDs9i8MmTJla7xha xqeZjMznW  20      
30-169-231-7354 9489.56 BUILDING        accounts. even ideas sleep carefu
+140006 Customer#000140006      5,eRqyFjpobN2Wtvt2oXuLcJcNE8oTyRh       15      
25-681-278-7283 67.66   MACHINERY       at the accounts are bold escapades. 
furiously final foxes use carefully unusual orb
+140007 Customer#000140007      AX75sSePE5PlDjD5qS6W1dx08Levf09 24      
34-818-770-8059 2093.37 MACHINERY       ily according to the furiously final 
packages? quickly spe
+140008 Customer#000140008      2zpry AYh9otf4c5vESISPvKLWPKe9i 14      
24-552-949-6395 3264.69 AUTOMOBILE      nstructions are against the requests. 
fin
+140009 Customer#000140009      dNwNUcCv,,0YE6WFYfOgM,6A2       4       
14-940-856-8557 -359.36 HOUSEHOLD       beans. blithely silent dependencies 
haggle slyly. carefully quick accounts across the depos
+140010 Customer#000140010      vZxOW,NtvppKR9mpTl6RDl9sWJJbosYDoLineEm 7       
17-151-800-8260 8216.11 BUILDING        nding foxes across the quickly regular 
forges nod accounts. slyly express ex
+
+-- !q06 --
+100001 Customer#000100001      gQ1s5C45A3PxWmZ1oFFSxt8u EcZ,   24      
34-705-443-4055 1726.66 HOUSEHOLD       ts. ironic instructions sleep. final 
deposits 
+100002 Customer#000100002      qOmTcZ7kHzJLSoaLenr9,Gu 17      27-453-414-8560 
-39.14  BUILDING        wake carefully. blithely regular epitaphs are among the 
quickly regular deposits. 
+100003 Customer#000100003      5AYbJxvjo7ErQB,cGIpKZRAE9,w2l9  5       
15-783-309-8970 72.71   BUILDING        ckly blithely special accounts. 
theodolites are carefully. pending requests ha
+100004 Customer#000100004      cpIOYQpMlm      18      28-316-370-8752 9990.05 
BUILDING        y above the slyly regular pains. unusual requests against the 
always special packages bre
+100005 Customer#000100005      Wud8n74NcIpwiKSjPS zZ   16      26-935-603-9031 
7789.25 BUILDING        ing dugouts boost slyly above the pending, final 
accounts? regular deposits wake slyly alongside of the blithely i
+100006 Customer#000100006      AkjXh4y,QNaF7,0xzbP,sG  7       17-964-673-7626 
974.05  MACHINERY       grate across the slyly even packages; final, special 
idea
+100007 Customer#000100007      d94JW9Hc2ZtGriOBNKyIjOeP,VZZqIX7S       17      
27-244-129-5307 777.86  HOUSEHOLD       foxes are against the ironic 
theodolites. evenly pending ideas according to the qu
+100008 Customer#000100008      Hv2A,YqfNnGRIKaY        18      28-828-394-8424 
3374.90 BUILDING        ccounts. even deposits wake quickly pinto beans. bold 
instructions integrate? never bold theodolites are s
+100009 Customer#000100009      OioQ3EjJZRvxCNh6Q8E3QZH 6       16-928-807-2622 
3932.63 MACHINERY       aggle blithely quickly final accounts. carefully final 
deposits above the fluffily unus
+100010 Customer#000100010       Tbiz2WMJX      0       10-147-978-7806 5693.02 
BUILDING        y regular ideas. quickly unusual gifts n
+
+-- !q07 --
+1      36901   O       173665.47       1996-01-02      5-LOW   Clerk#000000951 
0       nstructions sleep furiously among 
+2      78002   O       46929.18        1996-12-01      1-URGENT        
Clerk#000000880 0        foxes. pending accounts at the pending, silent asymptot
+3      123314  F       193846.25       1993-10-14      5-LOW   Clerk#000000955 
0       sly final accounts boost. carefully regular ideas cajole carefully. 
depos
+4      136777  O       32151.78        1995-10-11      5-LOW   Clerk#000000124 
0       sits. slyly regular warthogs cajole. regular, regular theodolites acro
+5      44485   F       144659.20       1994-07-30      5-LOW   Clerk#000000925 
0       quickly. bold deposits sleep slyly. packages use slyly
+6      55624   F       58749.59        1992-02-21      4-NOT SPECIFIED 
Clerk#000000058 0       ggle. special, final requests are against the furiously 
specia
+7      39136   O       252004.18       1996-01-10      2-HIGH  Clerk#000000470 
0       ly special requests 
+32     130057  O       208660.75       1995-07-16      2-HIGH  Clerk#000000616 
0       ise blithely bold, regular requests. quickly unusual dep
+33     66958   F       163243.98       1993-10-27      3-MEDIUM        
Clerk#000000409 0       uriously. furiously final request
+34     61001   O       58949.67        1998-07-21      3-MEDIUM        
Clerk#000000223 0       ly final packages. fluffily final deposits wake 
blithely ideas. spe
+
+-- !q08 --
+5990016        100807  F       102428.29       1994-01-31      1-URGENT        
Clerk#000000554 0       . fluffily unusual requests cajole furiously. fluffily 
pending accounts ca
+5990017        12382   F       176602.99       1992-07-01      5-LOW   
Clerk#000000205 0       ual pinto beans. final instructions haggle quickly 
alongside of the furio
+5990018        51145   F       78440.49        1992-05-28      1-URGENT        
Clerk#000000996 0       quests play daringly. regula
+5990019        85478   O       250306.69       1998-06-29      5-LOW   
Clerk#000000900 0       ainst the sly pinto beans. unu
+5990020        62137   O       229287.04       1996-08-15      1-URGENT        
Clerk#000000801 0        fluffily special pinto beans. regular, regular pinto 
beans slee
+5990021        24235   O       265459.10       1996-12-16      3-MEDIUM        
Clerk#000000113 0       gside of the ironic, unusual escapades. evenly silent 
tithes are 
+5990022        35143   O       141070.92       1996-07-01      4-NOT SPECIFIED 
Clerk#000000546 0       ests haggle across the blithely bo
+5990023        65318   F       171515.91       1993-07-04      1-URGENT        
Clerk#000000178 0       r the express accounts haggle blithely ironic 
accounts-- regu
+5990048        88213   O       70608.62        1997-10-23      2-HIGH  
Clerk#000000303 0       slyly enticing foxes doze regularly even requests. 
+5990049        115694  F       183390.98       1992-05-21      1-URGENT        
Clerk#000000450 0       ckly final theodolites ca
+
+-- !q09 --
+2000001        44200   F       257495.03       1994-12-18      5-LOW   
Clerk#000000314 0       ometimes theodolites. quickly even accounts among the 
blithely bold 
+2000002        55241   O       263734.77       1995-11-13      1-URGENT        
Clerk#000000749 0       uses along the brave excuses sleep for the packages. 
packages affix? slyl
+2000003        84553   F       78066.42        1992-10-10      5-LOW   
Clerk#000000314 0       e slyly regular asymptotes. fluf
+2000004        125197  F       246917.53       1993-01-06      1-URGENT        
Clerk#000000675 0       ironic ideas. platelets are regularly after the
+2000005        117907  O       229611.23       1996-10-16      2-HIGH  
Clerk#000000458 0       he furiously regular excuses haggle slyly along the 
slyly pending a
+2000006        1538    O       32011.55        1995-12-09      1-URGENT        
Clerk#000000279 0       ual, regular deposits sleep carefully carefully final 
dependencies. dep
+2000007        42958   F       48446.75        1993-03-28      5-LOW   
Clerk#000000956 0       uickly final ideas. final, final requests are courts. 
slyly unu
+2000032        34156   F       56186.58        1994-09-05      4-NOT SPECIFIED 
Clerk#000000612 0       fully regular instructions doze
+2000033        141263  O       130829.92       1997-06-05      3-MEDIUM        
Clerk#000000118 0       inst the final dependencies. even, final pat
+2000034        149275  O       64568.70        1997-09-23      2-HIGH  
Clerk#000000335 0       regular asymptotes. carefu
+
+-- !q10 --
+1      goldenrod lavender spring chocolate lace        Manufacturer#1  
Brand#13        PROMO BURNISHED COPPER  7       JUMBO PKG       901.00  ly. 
slyly ironi
+2      blush thistle blue yellow saddle        Manufacturer#1  Brand#13        
LARGE BRUSHED BRASS     1       LG CASE 902.00  lar accounts amo
+3      spring green yellow purple cornsilk     Manufacturer#4  Brand#42        
STANDARD POLISHED BRASS 21      WRAP CASE       903.00  egular deposits hag
+4      cornflower chocolate smoke green pink   Manufacturer#3  Brand#34        
SMALL PLATED BRASS      14      MED DRUM        904.00  p furiously r
+5      forest brown coral puff cream   Manufacturer#3  Brand#32        
STANDARD POLISHED TIN   15      SM PKG  905.00   wake carefully 
+6      bisque cornflower lawn forest magenta   Manufacturer#2  Brand#24        
PROMO PLATED STEEL      4       MED BAG 906.00  sual a
+7      moccasin green thistle khaki floral     Manufacturer#1  Brand#11        
SMALL PLATED COPPER     45      SM BAG  907.00  lyly. ex
+8      misty lace thistle snow royal   Manufacturer#4  Brand#44        PROMO 
BURNISHED TIN     41      LG DRUM 908.00  eposi
+9      thistle dim navajo dark gainsboro       Manufacturer#4  Brand#43        
SMALL BURNISHED STEEL   12      WRAP CASE       909.00  ironic foxe
+10     linen pink saddle puff powder   Manufacturer#5  Brand#54        LARGE 
BURNISHED STEEL   44      LG CAN  910.01  ithely final deposit
+
+-- !q08 --
+190001 powder coral chiffon burnished bisque   Manufacturer#2  Brand#22        
MEDIUM ANODIZED NICKEL  26      WRAP BOX        1091.00 ly busy deposi
+190002 peru coral rosy azure green     Manufacturer#4  Brand#41        LARGE 
POLISHED TIN      21      SM PKG  1092.00  express, daring sh
+190003 white salmon lemon cornsilk ghost       Manufacturer#4  Brand#41        
PROMO ANODIZED TIN      41      LG BAG  1093.00 ckages according to th
+190004 ivory almond honeydew metallic dodger   Manufacturer#4  Brand#44        
PROMO PLATED NICKEL     23      MED DRUM        1094.00  blithely regular t
+190005 slate indian forest chartreuse rosy     Manufacturer#1  Brand#11        
SMALL BRUSHED BRASS     3       SM CASE 1095.00 ly blithe, regula
+190006 navajo lavender smoke puff olive        Manufacturer#5  Brand#55        
SMALL BRUSHED BRASS     35      LG CASE 1096.00 ilent ideas boo
+190007 khaki lime goldenrod pink grey  Manufacturer#1  Brand#11        
STANDARD PLATED BRASS   30      SM PKG  1097.00 fully final gift
+190008 cream dark peru thistle gainsboro       Manufacturer#3  Brand#31        
ECONOMY ANODIZED STEEL  46      WRAP CASE       1098.00  pinto beans. fur
+190009 orchid goldenrod metallic frosted powder        Manufacturer#3  
Brand#33        STANDARD ANODIZED COPPER        25      LG BAG  1099.00 es 
cajole f
+190010 misty mint white seashell papaya        Manufacturer#3  Brand#34        
STANDARD POLISHED STEEL 38      JUMBO BOX       1100.01 pecia
+
+-- !q12 --
+100001 seashell cyan plum purple honeydew      Manufacturer#3  Brand#35        
STANDARD BRUSHED TIN    37      JUMBO CASE      1001.00 ronic dependencies d
+100002 steel moccasin forest cornflower brown  Manufacturer#3  Brand#34        
STANDARD ANODIZED NICKEL        11      WRAP CAN        1002.00  quickly 
pending 
+100003 beige powder violet orchid yellow       Manufacturer#2  Brand#21        
MEDIUM PLATED BRASS     41      SM BOX  1003.00  carefully even pac
+100004 snow blanched khaki indian azure        Manufacturer#4  Brand#42        
SMALL POLISHED TIN      29      SM CASE 1004.00 sly. blithely
+100005 grey midnight orange peach pale Manufacturer#2  Brand#21        SMALL 
POLISHED STEEL    7       MED BAG 1005.00 ajole? blithe
+100006 violet sandy olive yellow orange        Manufacturer#4  Brand#45        
STANDARD BURNISHED COPPER       23      WRAP CASE       1006.00 he slyly 
regular pack
+100007 snow magenta pale lemon metallic        Manufacturer#1  Brand#12        
PROMO BURNISHED COPPER  4       MED PKG 1007.00 ronic accounts in
+100008 spring powder sienna purple lime        Manufacturer#4  Brand#45        
ECONOMY BRUSHED BRASS   19      SM PKG  1008.00 ts. furious
+100009 goldenrod sandy beige hot orange        Manufacturer#3  Brand#32        
SMALL BURNISHED STEEL   41      WRAP BOX        1009.00 dinos about the quick
+100010 lime lavender slate cream brown Manufacturer#4  Brand#43        PROMO 
ANODIZED COPPER   19      JUMBO PACK      1010.01 gle slyly above the b
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_parquet_skip_page.groovy
 
b/regression-test/suites/external_table_p0/hive/test_hive_parquet_skip_page.groovy
new file mode 100644
index 00000000000..67e594b21f1
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/hive/test_hive_parquet_skip_page.groovy
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_parquet_skip_page", 
"p0,external,hive,external_docker,external_docker_hive") {
+    def q01 = {
+        qt_q01 """
+        select * from lineitem where l_orderkey  < 1000 order by 
l_orderkey,l_partkey limit 10;
+    """
+    }
+
+    def q02 = {
+        qt_q02 """
+        select * from lineitem where l_orderkey > 5999000 order by 
l_orderkey,l_partkey limit 10;
+    """
+    }
+
+    def q03 = {
+        qt_q03 """
+        select * from lineitem where l_orderkey > 2000000 and l_orderkey < 
2001000  order by l_orderkey,l_partkey limit 10;
+    """
+    }
+
+    def q04 = {
+        qt_q04 """
+        select * from customer where c_custkey < 10000 order by c_custkey 
limit 10;
+    """
+    }
+
+    def q05 = {
+        qt_q05 """
+        select * from customer where c_custkey > 140000 order by c_custkey 
limit 10;
+    """
+    }
+
+    def q06 = {
+        qt_q06 """
+        select * from customer where c_custkey > 100000 and c_custkey < 110000 
 order by c_custkey limit 10;
+    """
+    }
+
+    def q07 = {
+        qt_q07 """
+        select * from orders where o_orderkey < 10000 order by o_orderkey 
limit 10;
+    """
+    }
+
+    def q08 = {
+        qt_q08 """
+        select * from orders where o_orderkey > 5990000 order by o_orderkey 
limit 10;
+    """
+    }
+
+    def q09 = {
+        qt_q09 """
+        select * from orders where o_orderkey > 2000000 and o_orderkey < 
2010000 order by o_orderkey limit 10;
+    """
+    }
+
+    def q10 = {
+        qt_q10 """
+        select * from part where p_partkey < 10000 order by p_partkey limit 10;
+    """
+    }
+
+    def q11 = {
+        qt_q08 """
+        select * from part where p_partkey > 190000 order by p_partkey limit 
10;
+    """
+    }
+
+    def q12 = {
+        qt_q12 """
+        select * from part where p_partkey > 100000 and p_partkey < 110000 
order by p_partkey limit 10;
+    """
+    }
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    for (String hivePrefix : ["hive2", "hive3"]) {
+        try {
+            String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+            String catalog_name = "${hivePrefix}_test_parquet"
+            String externalEnvIp = 
context.config.otherConfigs.get("externalEnvIp")
+
+            sql """drop catalog if exists ${catalog_name}"""
+            sql """create catalog if not exists ${catalog_name} properties (
+                "type"="hms",
+                'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+            );"""
+
+            sql """switch ${catalog_name}"""
+            sql """use `tpch1_parquet`"""
+
+            sql """set enable_profile=true;"""
+
+            q01()
+            q02()
+            q03()
+            q04()
+            q05()
+            q06()
+            q07()
+            q08()
+            q09()
+            q10()
+            q11()
+            q12()
+
+            sql """drop catalog if exists ${catalog_name}"""
+        } finally {
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to