This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 0f0c0a266b65b7f27ef376e545ff0a5ea6e7d592 Author: 苏小刚 <[email protected]> AuthorDate: Fri Apr 26 13:51:30 2024 +0800 [opt](parquet)Skip page with offset index (#33082) Make skip_page() in ColumnChunkReader more efficient. No more reading page headers if there are pagelocations in chunk. --- .../parquet/vparquet_column_chunk_reader.cpp | 82 +++--- .../format/parquet/vparquet_column_chunk_reader.h | 11 +- .../exec/format/parquet/vparquet_column_reader.cpp | 7 +- .../exec/format/parquet/vparquet_column_reader.h | 22 +- .../exec/format/parquet/vparquet_group_reader.cpp | 12 +- .../exec/format/parquet/vparquet_group_reader.h | 1 - .../exec/format/parquet/vparquet_page_reader.cpp | 15 +- .../vec/exec/format/parquet/vparquet_page_reader.h | 123 ++++++++- be/src/vec/exec/format/parquet/vparquet_reader.cpp | 11 +- be/src/vec/exec/format/parquet/vparquet_reader.h | 3 +- be/test/vec/exec/parquet/parquet_thrift_test.cpp | 3 +- .../hive/test_hive_parquet_skip_page.out | 289 +++++++++++++++++++++ .../hive/test_hive_parquet_skip_page.groovy | 131 ++++++++++ 13 files changed, 641 insertions(+), 69 deletions(-) diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp index 6feb9bc1025..af30e63d1e3 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp @@ -47,12 +47,14 @@ namespace doris::vectorized { ColumnChunkReader::ColumnChunkReader(io::BufferedStreamReader* reader, tparquet::ColumnChunk* column_chunk, FieldSchema* field_schema, + const tparquet::OffsetIndex* offset_index, cctz::time_zone* ctz, io::IOContext* io_ctx) : _field_schema(field_schema), _max_rep_level(field_schema->repetition_level), _max_def_level(field_schema->definition_level), _stream_reader(reader), _metadata(column_chunk->meta_data), + _offset_index(offset_index), // _ctz(ctz), _io_ctx(io_ctx) {} @@ -61,7 +63,9 @@ Status ColumnChunkReader::init() { ? _metadata.dictionary_page_offset : _metadata.data_page_offset; size_t chunk_size = _metadata.total_compressed_size; - _page_reader = std::make_unique<PageReader>(_stream_reader, _io_ctx, start_offset, chunk_size); + // create page reader + _page_reader = create_page_reader(_stream_reader, _io_ctx, start_offset, chunk_size, + _metadata.num_values, _offset_index); // get the block compression codec RETURN_IF_ERROR(get_block_compression_codec(_metadata.codec, &_block_compress_codec)); if (_metadata.__isset.dictionary_page_offset) { @@ -88,24 +92,27 @@ Status ColumnChunkReader::next_page() { if (UNLIKELY(_remaining_num_values != 0)) { return Status::Corruption("Should skip current page"); } + RETURN_IF_ERROR(_page_reader->next_page_header()); - if (_page_reader->get_page_header()->type == tparquet::PageType::DICTIONARY_PAGE) { - // the first page maybe directory page even if _metadata.__isset.dictionary_page_offset == false, - // so we should parse the directory page in next_page() - RETURN_IF_ERROR(_decode_dict_page()); - // parse the real first data page - return next_page(); - } else if (_page_reader->get_page_header()->type == tparquet::PageType::DATA_PAGE_V2) { - _remaining_num_values = _page_reader->get_page_header()->data_page_header_v2.num_values; - _chunk_parsed_values += _remaining_num_values; - _state = HEADER_PARSED; - return Status::OK(); - } else { - _remaining_num_values = _page_reader->get_page_header()->data_page_header.num_values; - _chunk_parsed_values += _remaining_num_values; - _state = HEADER_PARSED; - return Status::OK(); + + if (!_dict_checked) { + _dict_checked = true; + const tparquet::PageHeader* header; + RETURN_IF_ERROR(_page_reader->get_page_header(header)); + if (header->type == tparquet::PageType::DICTIONARY_PAGE) { + // the first page maybe directory page even if _metadata.__isset.dictionary_page_offset == false, + // so we should parse the directory page in next_page() + RETURN_IF_ERROR(_decode_dict_page()); + // parse the real first data page + return next_page(); + } } + + RETURN_IF_ERROR(_page_reader->get_num_values(_remaining_num_values)); + _chunk_parsed_values += _remaining_num_values; + _state = HEADER_PARSED; + + return Status::OK(); } void ColumnChunkReader::_get_uncompressed_levels(const tparquet::DataPageHeaderV2& page_v2, @@ -119,17 +126,19 @@ void ColumnChunkReader::_get_uncompressed_levels(const tparquet::DataPageHeaderV } Status ColumnChunkReader::load_page_data() { + // TODO: remove checking HEADER_PARSED or change name if (UNLIKELY(_state != HEADER_PARSED)) { return Status::Corruption("Should parse page header"); } - const auto& header = *_page_reader->get_page_header(); - int32_t uncompressed_size = header.uncompressed_page_size; + const tparquet::PageHeader* header; + RETURN_IF_ERROR(_page_reader->get_page_header(header)); + int32_t uncompressed_size = header->uncompressed_page_size; if (_block_compress_codec != nullptr) { Slice compressed_data; RETURN_IF_ERROR(_page_reader->get_page_data(compressed_data)); - if (header.__isset.data_page_header_v2) { - const tparquet::DataPageHeaderV2& header_v2 = header.data_page_header_v2; + if (header->__isset.data_page_header_v2) { + const tparquet::DataPageHeaderV2& header_v2 = header->data_page_header_v2; // uncompressed_size = rl + dl + uncompressed_data_size // compressed_size = rl + dl + compressed_data_size uncompressed_size -= header_v2.repetition_levels_byte_length + @@ -137,8 +146,8 @@ Status ColumnChunkReader::load_page_data() { _get_uncompressed_levels(header_v2, compressed_data); } bool is_v2_compressed = - header.__isset.data_page_header_v2 && header.data_page_header_v2.is_compressed; - if (header.__isset.data_page_header || is_v2_compressed) { + header->__isset.data_page_header_v2 && header->data_page_header_v2.is_compressed; + if (header->__isset.data_page_header || is_v2_compressed) { // check decompressed buffer size _reserve_decompress_buf(uncompressed_size); _page_data = Slice(_decompress_buf.get(), uncompressed_size); @@ -151,36 +160,36 @@ Status ColumnChunkReader::load_page_data() { } } else { RETURN_IF_ERROR(_page_reader->get_page_data(_page_data)); - if (header.__isset.data_page_header_v2) { - _get_uncompressed_levels(header.data_page_header_v2, _page_data); + if (header->__isset.data_page_header_v2) { + _get_uncompressed_levels(header->data_page_header_v2, _page_data); } } // Initialize repetition level and definition level. Skip when level = 0, which means required field. if (_max_rep_level > 0) { SCOPED_RAW_TIMER(&_statistics.decode_level_time); - if (header.__isset.data_page_header_v2) { + if (header->__isset.data_page_header_v2) { RETURN_IF_ERROR(_rep_level_decoder.init_v2(_v2_rep_levels, _max_rep_level, _remaining_num_values)); } else { RETURN_IF_ERROR(_rep_level_decoder.init( - &_page_data, header.data_page_header.repetition_level_encoding, _max_rep_level, + &_page_data, header->data_page_header.repetition_level_encoding, _max_rep_level, _remaining_num_values)); } } if (_max_def_level > 0) { SCOPED_RAW_TIMER(&_statistics.decode_level_time); - if (header.__isset.data_page_header_v2) { + if (header->__isset.data_page_header_v2) { RETURN_IF_ERROR(_def_level_decoder.init_v2(_v2_def_levels, _max_def_level, _remaining_num_values)); } else { RETURN_IF_ERROR(_def_level_decoder.init( - &_page_data, header.data_page_header.definition_level_encoding, _max_def_level, + &_page_data, header->data_page_header.definition_level_encoding, _max_def_level, _remaining_num_values)); } } - auto encoding = header.__isset.data_page_header_v2 ? header.data_page_header_v2.encoding - : header.data_page_header.encoding; + auto encoding = header->__isset.data_page_header_v2 ? header->data_page_header_v2.encoding + : header->data_page_header.encoding; // change the deprecated encoding to RLE_DICTIONARY if (encoding == tparquet::Encoding::PLAIN_DICTIONARY) { encoding = tparquet::Encoding::RLE_DICTIONARY; @@ -207,14 +216,15 @@ Status ColumnChunkReader::load_page_data() { } Status ColumnChunkReader::_decode_dict_page() { - const tparquet::PageHeader& header = *_page_reader->get_page_header(); - DCHECK_EQ(tparquet::PageType::DICTIONARY_PAGE, header.type); + const tparquet::PageHeader* header; + RETURN_IF_ERROR(_page_reader->get_page_header(header)); + DCHECK_EQ(tparquet::PageType::DICTIONARY_PAGE, header->type); SCOPED_RAW_TIMER(&_statistics.decode_dict_time); // Using the PLAIN_DICTIONARY enum value is deprecated in the Parquet 2.0 specification. // Prefer using RLE_DICTIONARY in a data page and PLAIN in a dictionary page for Parquet 2.0+ files. // refer: https://github.com/apache/parquet-format/blob/master/Encodings.md - tparquet::Encoding::type dict_encoding = header.dictionary_page_header.encoding; + tparquet::Encoding::type dict_encoding = header->dictionary_page_header.encoding; if (dict_encoding != tparquet::Encoding::PLAIN_DICTIONARY && dict_encoding != tparquet::Encoding::PLAIN) { return Status::InternalError("Unsupported dictionary encoding {}", @@ -222,7 +232,7 @@ Status ColumnChunkReader::_decode_dict_page() { } // Prepare dictionary data - int32_t uncompressed_size = header.uncompressed_page_size; + int32_t uncompressed_size = header->uncompressed_page_size; std::unique_ptr<uint8_t[]> dict_data(new uint8_t[uncompressed_size]); if (_block_compress_codec != nullptr) { Slice compressed_data; @@ -246,7 +256,7 @@ Status ColumnChunkReader::_decode_dict_page() { // page_decoder->init(_field_schema, _ctz); // Set the dictionary data RETURN_IF_ERROR(page_decoder->set_dict(dict_data, uncompressed_size, - header.dictionary_page_header.num_values)); + header->dictionary_page_header.num_values)); _decoders[static_cast<int>(tparquet::Encoding::RLE_DICTIONARY)] = std::move(page_decoder); _has_dict = true; diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h index 0ca6859ac83..79ee3cd6463 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h @@ -71,7 +71,7 @@ using ColumnString = ColumnStr<UInt32>; * // Or, we can call the chunk_reader.skip_page() to skip current page. * chunk_reader.load_page_data(); * // Decode values into column or slice. - * // Or, we can call chunk_reader.slip_values(num_values) to skip some values. + * // Or, we can call chunk_reader.skip_values(num_values) to skip some values. * chunk_reader.decode_values(slice, num_values); * } */ @@ -84,10 +84,13 @@ public: int64_t decode_value_time = 0; int64_t decode_dict_time = 0; int64_t decode_level_time = 0; + int64_t skip_page_header_num = 0; + int64_t parse_page_header_num = 0; }; ColumnChunkReader(io::BufferedStreamReader* reader, tparquet::ColumnChunk* column_chunk, - FieldSchema* field_schema, cctz::time_zone* ctz, io::IOContext* io_ctx); + FieldSchema* field_schema, const tparquet::OffsetIndex* offset_index, + cctz::time_zone* ctz, io::IOContext* io_ctx); ~ColumnChunkReader() = default; // Initialize chunk reader, will generate the decoder and codec. @@ -170,6 +173,8 @@ public: Statistics& statistics() { _statistics.decode_header_time = _page_reader->statistics().decode_header_time; + _statistics.skip_page_header_num = _page_reader->statistics().skip_page_header_num; + _statistics.parse_page_header_num = _page_reader->statistics().parse_page_header_num; return _statistics; } @@ -204,6 +209,7 @@ private: io::BufferedStreamReader* _stream_reader = nullptr; tparquet::ColumnMetaData _metadata; + const tparquet::OffsetIndex* _offset_index; // cctz::time_zone* _ctz; io::IOContext* _io_ctx = nullptr; @@ -219,6 +225,7 @@ private: size_t _decompress_buf_size = 0; Slice _v2_rep_levels; Slice _v2_def_levels; + bool _dict_checked = false; bool _has_dict = false; Decoder* _page_decoder = nullptr; // Map: encoding -> Decoder diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index 2a3782ab449..85d03daebc5 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -108,7 +108,7 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, const std::vector<RowRange>& row_ranges, cctz::time_zone* ctz, io::IOContext* io_ctx, std::unique_ptr<ParquetColumnReader>& reader, - size_t max_buf_size) { + size_t max_buf_size, const tparquet::OffsetIndex* offset_index) { if (field->type.type == TYPE_ARRAY) { std::unique_ptr<ParquetColumnReader> element_reader; RETURN_IF_ERROR(create(file, &field->children[0], row_group, row_ranges, ctz, io_ctx, @@ -144,7 +144,8 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, reader.reset(struct_reader.release()); } else { const tparquet::ColumnChunk& chunk = row_group.columns[field->physical_column_index]; - auto scalar_reader = ScalarColumnReader::create_unique(row_ranges, chunk, ctz, io_ctx); + auto scalar_reader = + ScalarColumnReader::create_unique(row_ranges, chunk, offset_index, ctz, io_ctx); RETURN_IF_ERROR(scalar_reader->init(file, field, max_buf_size)); reader.reset(scalar_reader.release()); } @@ -190,7 +191,7 @@ Status ScalarColumnReader::init(io::FileReaderSPtr file, FieldSchema* field, siz _stream_reader = std::make_unique<io::BufferedFileStreamReader>(file, chunk_start, chunk_len, prefetch_buffer_size); _chunk_reader = std::make_unique<ColumnChunkReader>(_stream_reader.get(), &_chunk_meta, field, - _ctz, _io_ctx); + _offset_index, _ctz, _io_ctx); RETURN_IF_ERROR(_chunk_reader->init()); return Status::OK(); } diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.h b/be/src/vec/exec/format/parquet/vparquet_column_reader.h index d12eac2f383..f0eadb8bcd6 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.h @@ -65,7 +65,9 @@ public: decode_value_time(0), decode_dict_time(0), decode_level_time(0), - decode_null_map_time(0) {} + decode_null_map_time(0), + skip_page_header_num(0), + parse_page_header_num(0) {} Statistics(io::BufferedStreamReader::Statistics& fs, ColumnChunkReader::Statistics& cs, int64_t null_map_time) @@ -79,7 +81,9 @@ public: decode_value_time(cs.decode_value_time), decode_dict_time(cs.decode_dict_time), decode_level_time(cs.decode_level_time), - decode_null_map_time(null_map_time) {} + decode_null_map_time(null_map_time), + skip_page_header_num(cs.skip_page_header_num), + parse_page_header_num(cs.parse_page_header_num) {} int64_t read_time; int64_t read_calls; @@ -92,6 +96,8 @@ public: int64_t decode_dict_time; int64_t decode_level_time; int64_t decode_null_map_time; + int64_t skip_page_header_num; + int64_t parse_page_header_num; void merge(Statistics& statistics) { read_time += statistics.read_time; @@ -105,6 +111,8 @@ public: decode_dict_time += statistics.decode_dict_time; decode_level_time += statistics.decode_level_time; decode_null_map_time += statistics.decode_null_map_time; + skip_page_header_num += statistics.skip_page_header_num; + parse_page_header_num += statistics.parse_page_header_num; } }; @@ -134,7 +142,7 @@ public: const tparquet::RowGroup& row_group, const std::vector<RowRange>& row_ranges, cctz::time_zone* ctz, io::IOContext* io_ctx, std::unique_ptr<ParquetColumnReader>& reader, - size_t max_buf_size); + size_t max_buf_size, const tparquet::OffsetIndex* offset_index = nullptr); void set_nested_column() { _nested_column = true; } virtual const std::vector<level_t>& get_rep_level() const = 0; virtual const std::vector<level_t>& get_def_level() const = 0; @@ -160,9 +168,12 @@ class ScalarColumnReader : public ParquetColumnReader { ENABLE_FACTORY_CREATOR(ScalarColumnReader) public: ScalarColumnReader(const std::vector<RowRange>& row_ranges, - const tparquet::ColumnChunk& chunk_meta, cctz::time_zone* ctz, + const tparquet::ColumnChunk& chunk_meta, + const tparquet::OffsetIndex* offset_index, cctz::time_zone* ctz, io::IOContext* io_ctx) - : ParquetColumnReader(row_ranges, ctz, io_ctx), _chunk_meta(chunk_meta) {} + : ParquetColumnReader(row_ranges, ctz, io_ctx), + _chunk_meta(chunk_meta), + _offset_index(offset_index) {} ~ScalarColumnReader() override { close(); } Status init(io::FileReaderSPtr file, FieldSchema* field, size_t max_buf_size); Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type, @@ -182,6 +193,7 @@ public: private: tparquet::ColumnChunk _chunk_meta; + const tparquet::OffsetIndex* _offset_index; std::unique_ptr<io::BufferedFileStreamReader> _stream_reader; std::unique_ptr<ColumnChunkReader> _chunk_reader; std::vector<level_t> _rep_levels; diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 3f8000c3173..335207070dd 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -39,7 +39,6 @@ #include "runtime/thread_context.h" #include "runtime/types.h" #include "schema_desc.h" -#include "util/simd/bits.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" @@ -124,12 +123,17 @@ Status RowGroupReader::init( const size_t MAX_GROUP_BUF_SIZE = config::parquet_rowgroup_max_buffer_mb << 20; const size_t MAX_COLUMN_BUF_SIZE = config::parquet_column_max_buffer_mb << 20; size_t max_buf_size = std::min(MAX_COLUMN_BUF_SIZE, MAX_GROUP_BUF_SIZE / _read_columns.size()); - for (auto& read_col : _read_columns) { - auto field = const_cast<FieldSchema*>(schema.get_column(read_col)); + for (const auto& read_col : _read_columns) { + auto* field = const_cast<FieldSchema*>(schema.get_column(read_col)); + auto physical_index = field->physical_column_index; std::unique_ptr<ParquetColumnReader> reader; + // TODO : support rested column types + const tparquet::OffsetIndex* offset_index = + col_offsets.find(physical_index) != col_offsets.end() ? &col_offsets[physical_index] + : nullptr; RETURN_IF_ERROR(ParquetColumnReader::create(_file_reader, field, _row_group_meta, _read_ranges, _ctz, _io_ctx, reader, - max_buf_size)); + max_buf_size, offset_index)); if (reader == nullptr) { VLOG_DEBUG << "Init row group(" << _row_group_id << ") reader failed"; return Status::Corruption("Init row group reader failed"); diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.h b/be/src/vec/exec/format/parquet/vparquet_group_reader.h index 128a7450554..d38f5a74adf 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.h @@ -29,7 +29,6 @@ #include "io/fs/file_reader_writer_fwd.h" #include "vec/columns/column.h" -#include "vec/common/allocator.h" #include "vec/exec/format/parquet/parquet_common.h" #include "vec/exprs/vexpr_fwd.h" #include "vparquet_column_reader.h" diff --git a/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp index 3b4e18c27da..a321e77c692 100644 --- a/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp @@ -40,11 +40,23 @@ namespace doris::vectorized { static constexpr size_t INIT_PAGE_HEADER_SIZE = 128; +std::unique_ptr<PageReader> create_page_reader(io::BufferedStreamReader* reader, + io::IOContext* io_ctx, uint64_t offset, + uint64_t length, int64_t num_values, + const tparquet::OffsetIndex* offset_index) { + if (offset_index) { + return std::make_unique<PageReaderWithOffsetIndex>(reader, io_ctx, offset, length, + num_values, offset_index); + } else { + return std::make_unique<PageReader>(reader, io_ctx, offset, length); + } +} + PageReader::PageReader(io::BufferedStreamReader* reader, io::IOContext* io_ctx, uint64_t offset, uint64_t length) : _reader(reader), _io_ctx(io_ctx), _start_offset(offset), _end_offset(offset + length) {} -Status PageReader::next_page_header() { +Status PageReader::_parse_page_header() { if (UNLIKELY(_offset < _start_offset || _offset >= _end_offset)) { return Status::IOError("Out-of-bounds Access"); } @@ -82,6 +94,7 @@ Status PageReader::next_page_header() { header_size <<= 2; } + _statistics.parse_page_header_num++; _offset += real_header_size; _next_header_offset = _offset + _cur_page_header.compressed_page_size; _state = HEADER_PARSED; diff --git a/be/src/vec/exec/format/parquet/vparquet_page_reader.h b/be/src/vec/exec/format/parquet/vparquet_page_reader.h index bdd0a8d0f5f..5765df4fc1f 100644 --- a/be/src/vec/exec/format/parquet/vparquet_page_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_page_reader.h @@ -20,6 +20,8 @@ #include <gen_cpp/parquet_types.h> #include <stdint.h> +#include <memory> + #include "common/status.h" namespace doris { @@ -39,11 +41,13 @@ class PageReader { public: struct Statistics { int64_t decode_header_time = 0; + int64_t skip_page_header_num = 0; + int64_t parse_page_header_num = 0; }; PageReader(io::BufferedStreamReader* reader, io::IOContext* io_ctx, uint64_t offset, uint64_t length); - ~PageReader() = default; + virtual ~PageReader() = default; // Deprecated // Parquet file may not be standardized, @@ -52,13 +56,31 @@ public: // [[deprecated]] bool has_next_page() const { return _offset < _end_offset; } - Status next_page_header(); + virtual Status next_page_header() { return _parse_page_header(); } - Status skip_page(); + virtual Status get_page_header(const tparquet::PageHeader*& page_header) { + if (UNLIKELY(_state != HEADER_PARSED)) { + return Status::InternalError("Page header not parsed"); + } + page_header = &_cur_page_header; + return Status::OK(); + } - const tparquet::PageHeader* get_page_header() const { return &_cur_page_header; } + virtual Status get_num_values(uint32_t& num_values) { + if (_state != HEADER_PARSED) { + return Status::InternalError("Page header not parsed"); + } + if (_cur_page_header.type == tparquet::PageType::DATA_PAGE_V2) { + num_values = _cur_page_header.data_page_header_v2.num_values; + } else { + num_values = _cur_page_header.data_page_header.num_values; + } + return Status::OK(); + } - Status get_page_data(Slice& slice); + virtual Status skip_page(); + + virtual Status get_page_data(Slice& slice); Statistics& statistics() { return _statistics; } @@ -68,20 +90,99 @@ public: _state = INITIALIZED; } -private: +protected: enum PageReaderState { INITIALIZED, HEADER_PARSED }; - - io::BufferedStreamReader* _reader = nullptr; - io::IOContext* _io_ctx = nullptr; + PageReaderState _state = INITIALIZED; tparquet::PageHeader _cur_page_header; Statistics _statistics; - PageReaderState _state = INITIALIZED; + Status _parse_page_header(); + +private: + io::BufferedStreamReader* _reader = nullptr; + io::IOContext* _io_ctx = nullptr; uint64_t _offset = 0; uint64_t _next_header_offset = 0; - uint64_t _start_offset = 0; uint64_t _end_offset = 0; }; +class PageReaderWithOffsetIndex : public PageReader { +public: + PageReaderWithOffsetIndex(io::BufferedStreamReader* reader, io::IOContext* io_ctx, + uint64_t offset, uint64_t length, int64_t num_values, + const tparquet::OffsetIndex* offset_index) + : PageReader(reader, io_ctx, offset, length), + _num_values(num_values), + _offset_index(offset_index) {} + + Status next_page_header() override { + // lazy to parse page header in get_page_header + return Status::OK(); + } + + Status get_page_header(const tparquet::PageHeader*& page_header) override { + if (_state != HEADER_PARSED) { + RETURN_IF_ERROR(_parse_page_header()); + } + page_header = &_cur_page_header; + return Status::OK(); + } + + Status get_num_values(uint32_t& num_values) override { + if (UNLIKELY(_page_index >= _offset_index->page_locations.size())) { + return Status::IOError("End of page"); + } + + if (_page_index < _offset_index->page_locations.size() - 1) { + num_values = _offset_index->page_locations[_page_index + 1].first_row_index - + _offset_index->page_locations[_page_index].first_row_index; + } else { + num_values = _num_values - _offset_index->page_locations[_page_index].first_row_index; + } + return Status::OK(); + } + + Status skip_page() override { + if (UNLIKELY(_page_index >= _offset_index->page_locations.size())) { + return Status::IOError("End of page"); + } + + if (_state != HEADER_PARSED) { + _statistics.skip_page_header_num++; + } + + seek_to_page(_offset_index->page_locations[_page_index].offset + + _offset_index->page_locations[_page_index].compressed_page_size); + _page_index++; + return Status::OK(); + } + + Status get_page_data(Slice& slice) override { + if (_page_index >= _offset_index->page_locations.size()) { + return Status::IOError("End of page"); + } + if (_state != HEADER_PARSED) { + RETURN_IF_ERROR(_parse_page_header()); + } + + // dirctionary page is not in page location + if (LIKELY(_cur_page_header.type != tparquet::PageType::DICTIONARY_PAGE)) { + _page_index++; + } + + return PageReader::get_page_data(slice); + } + +private: + size_t _page_index = 0; + int64_t _num_values = 0; + const tparquet::OffsetIndex* _offset_index; +}; + +std::unique_ptr<PageReader> create_page_reader(io::BufferedStreamReader* reader, + io::IOContext* io_ctx, uint64_t offset, + uint64_t length, int64_t num_values = 0, + const tparquet::OffsetIndex* offset_index = nullptr); + } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index 090c1bdf460..17e44e7b9a8 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -23,18 +23,14 @@ #include <glog/logging.h> #include <functional> -#include <ostream> #include <utility> #include "common/status.h" #include "exec/schema_scanner.h" -#include "gen_cpp/descriptors.pb.h" -#include "gtest/gtest_pred_impl.h" #include "io/file_factory.h" #include "io/fs/buffered_reader.h" #include "io/fs/file_reader.h" #include "io/fs/file_reader_writer_fwd.h" -#include "olap/olap_common.h" #include "parquet_pred_cmp.h" #include "parquet_thrift_util.h" #include "runtime/define_primitive_type.h" @@ -170,6 +166,10 @@ void ParquetReader::_init_profile() { ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DecodeLevelTime", parquet_profile, 1); _parquet_profile.decode_null_map_time = ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DecodeNullMapTime", parquet_profile, 1); + _parquet_profile.skip_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL( + _profile, "SkipPageHeaderNum", TUnit::UNIT, parquet_profile, 1); + _parquet_profile.parse_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL( + _profile, "ParsePageHeaderNum", TUnit::UNIT, parquet_profile, 1); } } @@ -921,6 +921,9 @@ void ParquetReader::_collect_profile() { COUNTER_UPDATE(_parquet_profile.page_index_filter_time, _statistics.page_index_filter_time); COUNTER_UPDATE(_parquet_profile.row_group_filter_time, _statistics.row_group_filter_time); + COUNTER_UPDATE(_parquet_profile.skip_page_header_num, _column_statistics.skip_page_header_num); + COUNTER_UPDATE(_parquet_profile.parse_page_header_num, + _column_statistics.parse_page_header_num); COUNTER_UPDATE(_parquet_profile.file_read_time, _column_statistics.read_time); COUNTER_UPDATE(_parquet_profile.file_read_calls, _column_statistics.read_calls); COUNTER_UPDATE(_parquet_profile.file_meta_read_calls, _column_statistics.meta_read_calls); diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h b/be/src/vec/exec/format/parquet/vparquet_reader.h index eba32abd225..0612951c67e 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_reader.h @@ -22,7 +22,6 @@ #include <stdint.h> #include <list> -#include <map> #include <memory> #include <string> #include <tuple> @@ -183,6 +182,8 @@ private: RuntimeProfile::Counter* decode_dict_time = nullptr; RuntimeProfile::Counter* decode_level_time = nullptr; RuntimeProfile::Counter* decode_null_map_time = nullptr; + RuntimeProfile::Counter* skip_page_header_num = nullptr; + RuntimeProfile::Counter* parse_page_header_num = nullptr; }; Status _open_file(); diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp b/be/test/vec/exec/parquet/parquet_thrift_test.cpp index 4dfbd6a380f..1407edb08f6 100644 --- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp +++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp @@ -205,7 +205,8 @@ static Status get_column_values(io::FileReaderSPtr file_reader, tparquet::Column io::BufferedFileStreamReader stream_reader(file_reader, start_offset, chunk_size, 1024); - ColumnChunkReader chunk_reader(&stream_reader, column_chunk, field_schema, &ctz, nullptr); + ColumnChunkReader chunk_reader(&stream_reader, column_chunk, field_schema, nullptr, &ctz, + nullptr); // initialize chunk reader static_cast<void>(chunk_reader.init()); // seek to next page header diff --git a/regression-test/data/external_table_p0/hive/test_hive_parquet_skip_page.out b/regression-test/data/external_table_p0/hive/test_hive_parquet_skip_page.out new file mode 100644 index 00000000000..6c869dbc789 --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_hive_parquet_skip_page.out @@ -0,0 +1,289 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q01 -- +1 2132 4633 4 28.00 28955.64 0.09 0.06 N O 1996-04-21 1996-03-30 1996-05-16 NONE AIR lites. fluffily even de +1 15635 638 6 32.00 49620.16 0.07 0.02 N O 1996-01-30 1996-02-07 1996-02-03 DELIVER IN PERSON MAIL arefully slyly ex +1 24027 1534 5 24.00 22824.48 0.10 0.04 N O 1996-03-30 1996-03-14 1996-04-01 NONE FOB pending foxes. slyly re +1 63700 3701 3 8.00 13309.60 0.10 0.02 N O 1996-01-29 1996-03-05 1996-01-31 TAKE BACK RETURN REG AIR riously. regular, express dep +1 67310 7311 2 36.00 45983.16 0.09 0.06 N O 1996-04-12 1996-02-28 1996-04-20 TAKE BACK RETURN MAIL ly final dependencies: slyly bold +1 155190 7706 1 17.00 21168.23 0.04 0.02 N O 1996-03-13 1996-02-12 1996-03-22 DELIVER IN PERSON TRUCK egular courts above the +2 106170 1191 1 38.00 44694.46 0.00 0.05 N O 1997-01-28 1997-01-14 1997-02-02 TAKE BACK RETURN RAIL ven requests. deposits breach a +3 4297 1798 1 45.00 54058.05 0.06 0.00 R F 1994-02-02 1994-01-04 1994-02-23 NONE AIR ongside of the furiously brave acco +3 19036 6540 2 49.00 46796.47 0.10 0.00 R F 1993-11-09 1993-12-20 1993-11-24 TAKE BACK RETURN RAIL unusual accounts. eve +3 29380 1883 4 2.00 2618.76 0.01 0.06 A F 1993-12-04 1994-01-07 1994-01-01 NONE TRUCK y. fluffily pending d + +-- !q02 -- +5999008 16312 6313 4 2.00 2456.62 0.08 0.05 R F 1994-04-28 1994-06-01 1994-05-12 COLLECT COD FOB longside of the slo +5999008 32738 2739 3 39.00 65158.47 0.09 0.08 R F 1994-07-11 1994-06-15 1994-08-04 TAKE BACK RETURN TRUCK equests nag along +5999008 64711 2230 1 29.00 48595.59 0.00 0.02 R F 1994-05-16 1994-07-04 1994-05-18 NONE FOB final requests across +5999008 192755 5275 2 32.00 59128.00 0.07 0.08 R F 1994-05-15 1994-05-22 1994-06-07 COLLECT COD RAIL ts sleep slyly about the slyly ironic acco +5999009 12147 7150 1 21.00 22241.94 0.00 0.05 N O 1997-11-01 1997-12-11 1997-11-05 NONE AIR deposits after the blithely ex +5999010 106595 6596 1 31.00 49649.29 0.02 0.05 N O 1997-11-29 1997-10-24 1997-12-11 DELIVER IN PERSON MAIL ilent instructions? slyly r +5999010 141441 1442 2 42.00 62262.48 0.05 0.08 N O 1997-09-21 1997-10-13 1997-09-25 TAKE BACK RETURN SHIP c, even ideas. ruth +5999010 193075 8114 3 40.00 46722.80 0.04 0.07 N O 1997-11-19 1997-09-25 1997-11-25 DELIVER IN PERSON AIR accounts sleep blithely even, +5999010 198678 1198 4 12.00 21320.04 0.00 0.04 N O 1997-09-19 1997-10-15 1997-10-05 DELIVER IN PERSON REG AIR ironic foxes. slyly special id +5999011 98609 6137 1 44.00 70734.40 0.03 0.01 N O 1998-04-05 1998-05-16 1998-05-05 DELIVER IN PERSON REG AIR ructions along the blit + +-- !q03 -- +2000001 16877 6878 1 36.00 64579.32 0.01 0.05 A F 1995-01-23 1995-01-31 1995-02-20 TAKE BACK RETURN FOB regular deposits. even +2000001 50928 3434 2 36.00 67641.12 0.02 0.03 A F 1995-02-03 1995-02-07 1995-02-13 NONE AIR ickly slyl +2000001 117877 7878 4 20.00 37897.40 0.02 0.07 R F 1995-03-29 1995-03-16 1995-04-01 COLLECT COD RAIL . realms boost unusual theodoli +2000001 135534 8048 3 38.00 59642.14 0.00 0.05 R F 1994-12-31 1995-03-06 1995-01-26 COLLECT COD MAIL l theodolites affix quickly alongside of +2000001 149269 1784 5 15.00 19773.90 0.05 0.05 R F 1995-03-08 1995-02-10 1995-03-23 DELIVER IN PERSON AIR e bold, silent foxes solve dog +2000002 41816 1817 6 8.00 14062.48 0.05 0.06 N O 1996-02-14 1995-12-25 1996-03-12 DELIVER IN PERSON FOB y quickly pending foxes. quickly ironic acc +2000002 62662 2663 2 48.00 77983.68 0.01 0.05 N O 1995-11-20 1996-01-11 1995-12-05 TAKE BACK RETURN SHIP requests sleep blithely. slyly +2000002 77402 7403 4 47.00 64831.80 0.07 0.03 N O 1996-02-24 1996-02-04 1996-03-09 NONE SHIP ong the carefully silent instructions. even +2000002 80719 8244 5 42.00 71387.82 0.10 0.03 N O 1995-11-24 1996-01-01 1995-11-25 COLLECT COD SHIP ing to the carefully final deposits. care +2000002 156357 6358 3 28.00 39573.80 0.09 0.05 N O 1995-12-26 1996-01-15 1996-01-25 COLLECT COD MAIL ely regular instr + +-- !q04 -- +1 Customer#000000001 IVhzIApeRb ot,c,E 15 25-989-741-2988 711.56 BUILDING to the even, regular platelets. regular, ironic epitaphs nag e +2 Customer#000000002 XSTf4,NCwDVaWNe6tEgvwfmRchLXak 13 23-768-687-3665 121.65 AUTOMOBILE l accounts. blithely ironic theodolites integrate boldly: caref +3 Customer#000000003 MG9kdTD2WBHm 1 11-719-748-3364 7498.12 AUTOMOBILE deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov +4 Customer#000000004 XxVSJsLAGtn 4 14-128-190-5944 2866.83 MACHINERY requests. final, regular ideas sleep final accou +5 Customer#000000005 KvpyuHCplrB84WgAiGV6sYpZq7Tj 3 13-750-942-6364 794.47 HOUSEHOLD n accounts will have to unwind. foxes cajole accor +6 Customer#000000006 sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn 20 30-114-968-4951 7638.57 AUTOMOBILE tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious +7 Customer#000000007 TcGe5gaZNgVePxU5kRrvXBfkasDTea 18 28-190-982-9759 9561.95 AUTOMOBILE ainst the ironic, express theodolites. express, even pinto beans among the exp +8 Customer#000000008 I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5 17 27-147-574-9335 6819.74 BUILDING among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide +9 Customer#000000009 xKiAFTjUsCuxfeleNqefumTrjS 8 18-338-906-3675 8324.07 FURNITURE r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl +10 Customer#000000010 6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2 5 15-741-346-9870 2753.54 HOUSEHOLD es regular deposits haggle. fur + +-- !q05 -- +140001 Customer#000140001 CkN1egC06Sc51bbDyQ8VnFn Bz6N1p 15 25-647-696-2830 2747.48 AUTOMOBILE heodolites. slyly bold theodolites d +140002 Customer#000140002 8UWLS,im0k94ivCVx 23 33-146-814-9234 7914.10 HOUSEHOLD eep behind the quickly bold foxes. furiously ironic ideas shall have to sleep. regular packages +140003 Customer#000140003 2duVgk HhGGlOeP4S,brWKjKG62bGkupful 23 33-326-909-7916 9389.77 FURNITURE pending, even packages are. slyly regular accounts wake ironically final packages. bold +140004 Customer#000140004 S,V7RhLnmqPK0TDghbcdxotzTyKeUC 13 23-594-312-2596 5931.66 HOUSEHOLD t blithely blithely regular packages. never silent dependencies cajo +140005 Customer#000140005 yQemRDs9i8MmTJla7xha xqeZjMznW 20 30-169-231-7354 9489.56 BUILDING accounts. even ideas sleep carefu +140006 Customer#000140006 5,eRqyFjpobN2Wtvt2oXuLcJcNE8oTyRh 15 25-681-278-7283 67.66 MACHINERY at the accounts are bold escapades. furiously final foxes use carefully unusual orb +140007 Customer#000140007 AX75sSePE5PlDjD5qS6W1dx08Levf09 24 34-818-770-8059 2093.37 MACHINERY ily according to the furiously final packages? quickly spe +140008 Customer#000140008 2zpry AYh9otf4c5vESISPvKLWPKe9i 14 24-552-949-6395 3264.69 AUTOMOBILE nstructions are against the requests. fin +140009 Customer#000140009 dNwNUcCv,,0YE6WFYfOgM,6A2 4 14-940-856-8557 -359.36 HOUSEHOLD beans. blithely silent dependencies haggle slyly. carefully quick accounts across the depos +140010 Customer#000140010 vZxOW,NtvppKR9mpTl6RDl9sWJJbosYDoLineEm 7 17-151-800-8260 8216.11 BUILDING nding foxes across the quickly regular forges nod accounts. slyly express ex + +-- !q06 -- +100001 Customer#000100001 gQ1s5C45A3PxWmZ1oFFSxt8u EcZ, 24 34-705-443-4055 1726.66 HOUSEHOLD ts. ironic instructions sleep. final deposits +100002 Customer#000100002 qOmTcZ7kHzJLSoaLenr9,Gu 17 27-453-414-8560 -39.14 BUILDING wake carefully. blithely regular epitaphs are among the quickly regular deposits. +100003 Customer#000100003 5AYbJxvjo7ErQB,cGIpKZRAE9,w2l9 5 15-783-309-8970 72.71 BUILDING ckly blithely special accounts. theodolites are carefully. pending requests ha +100004 Customer#000100004 cpIOYQpMlm 18 28-316-370-8752 9990.05 BUILDING y above the slyly regular pains. unusual requests against the always special packages bre +100005 Customer#000100005 Wud8n74NcIpwiKSjPS zZ 16 26-935-603-9031 7789.25 BUILDING ing dugouts boost slyly above the pending, final accounts? regular deposits wake slyly alongside of the blithely i +100006 Customer#000100006 AkjXh4y,QNaF7,0xzbP,sG 7 17-964-673-7626 974.05 MACHINERY grate across the slyly even packages; final, special idea +100007 Customer#000100007 d94JW9Hc2ZtGriOBNKyIjOeP,VZZqIX7S 17 27-244-129-5307 777.86 HOUSEHOLD foxes are against the ironic theodolites. evenly pending ideas according to the qu +100008 Customer#000100008 Hv2A,YqfNnGRIKaY 18 28-828-394-8424 3374.90 BUILDING ccounts. even deposits wake quickly pinto beans. bold instructions integrate? never bold theodolites are s +100009 Customer#000100009 OioQ3EjJZRvxCNh6Q8E3QZH 6 16-928-807-2622 3932.63 MACHINERY aggle blithely quickly final accounts. carefully final deposits above the fluffily unus +100010 Customer#000100010 Tbiz2WMJX 0 10-147-978-7806 5693.02 BUILDING y regular ideas. quickly unusual gifts n + +-- !q07 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +4 136777 O 32151.78 1995-10-11 5-LOW Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly +6 55624 F 58749.59 1992-02-21 4-NOT SPECIFIED Clerk#000000058 0 ggle. special, final requests are against the furiously specia +7 39136 O 252004.18 1996-01-10 2-HIGH Clerk#000000470 0 ly special requests +32 130057 O 208660.75 1995-07-16 2-HIGH Clerk#000000616 0 ise blithely bold, regular requests. quickly unusual dep +33 66958 F 163243.98 1993-10-27 3-MEDIUM Clerk#000000409 0 uriously. furiously final request +34 61001 O 58949.67 1998-07-21 3-MEDIUM Clerk#000000223 0 ly final packages. fluffily final deposits wake blithely ideas. spe + +-- !q08 -- +5990016 100807 F 102428.29 1994-01-31 1-URGENT Clerk#000000554 0 . fluffily unusual requests cajole furiously. fluffily pending accounts ca +5990017 12382 F 176602.99 1992-07-01 5-LOW Clerk#000000205 0 ual pinto beans. final instructions haggle quickly alongside of the furio +5990018 51145 F 78440.49 1992-05-28 1-URGENT Clerk#000000996 0 quests play daringly. regula +5990019 85478 O 250306.69 1998-06-29 5-LOW Clerk#000000900 0 ainst the sly pinto beans. unu +5990020 62137 O 229287.04 1996-08-15 1-URGENT Clerk#000000801 0 fluffily special pinto beans. regular, regular pinto beans slee +5990021 24235 O 265459.10 1996-12-16 3-MEDIUM Clerk#000000113 0 gside of the ironic, unusual escapades. evenly silent tithes are +5990022 35143 O 141070.92 1996-07-01 4-NOT SPECIFIED Clerk#000000546 0 ests haggle across the blithely bo +5990023 65318 F 171515.91 1993-07-04 1-URGENT Clerk#000000178 0 r the express accounts haggle blithely ironic accounts-- regu +5990048 88213 O 70608.62 1997-10-23 2-HIGH Clerk#000000303 0 slyly enticing foxes doze regularly even requests. +5990049 115694 F 183390.98 1992-05-21 1-URGENT Clerk#000000450 0 ckly final theodolites ca + +-- !q09 -- +2000001 44200 F 257495.03 1994-12-18 5-LOW Clerk#000000314 0 ometimes theodolites. quickly even accounts among the blithely bold +2000002 55241 O 263734.77 1995-11-13 1-URGENT Clerk#000000749 0 uses along the brave excuses sleep for the packages. packages affix? slyl +2000003 84553 F 78066.42 1992-10-10 5-LOW Clerk#000000314 0 e slyly regular asymptotes. fluf +2000004 125197 F 246917.53 1993-01-06 1-URGENT Clerk#000000675 0 ironic ideas. platelets are regularly after the +2000005 117907 O 229611.23 1996-10-16 2-HIGH Clerk#000000458 0 he furiously regular excuses haggle slyly along the slyly pending a +2000006 1538 O 32011.55 1995-12-09 1-URGENT Clerk#000000279 0 ual, regular deposits sleep carefully carefully final dependencies. dep +2000007 42958 F 48446.75 1993-03-28 5-LOW Clerk#000000956 0 uickly final ideas. final, final requests are courts. slyly unu +2000032 34156 F 56186.58 1994-09-05 4-NOT SPECIFIED Clerk#000000612 0 fully regular instructions doze +2000033 141263 O 130829.92 1997-06-05 3-MEDIUM Clerk#000000118 0 inst the final dependencies. even, final pat +2000034 149275 O 64568.70 1997-09-23 2-HIGH Clerk#000000335 0 regular asymptotes. carefu + +-- !q10 -- +1 goldenrod lavender spring chocolate lace Manufacturer#1 Brand#13 PROMO BURNISHED COPPER 7 JUMBO PKG 901.00 ly. slyly ironi +2 blush thistle blue yellow saddle Manufacturer#1 Brand#13 LARGE BRUSHED BRASS 1 LG CASE 902.00 lar accounts amo +3 spring green yellow purple cornsilk Manufacturer#4 Brand#42 STANDARD POLISHED BRASS 21 WRAP CASE 903.00 egular deposits hag +4 cornflower chocolate smoke green pink Manufacturer#3 Brand#34 SMALL PLATED BRASS 14 MED DRUM 904.00 p furiously r +5 forest brown coral puff cream Manufacturer#3 Brand#32 STANDARD POLISHED TIN 15 SM PKG 905.00 wake carefully +6 bisque cornflower lawn forest magenta Manufacturer#2 Brand#24 PROMO PLATED STEEL 4 MED BAG 906.00 sual a +7 moccasin green thistle khaki floral Manufacturer#1 Brand#11 SMALL PLATED COPPER 45 SM BAG 907.00 lyly. ex +8 misty lace thistle snow royal Manufacturer#4 Brand#44 PROMO BURNISHED TIN 41 LG DRUM 908.00 eposi +9 thistle dim navajo dark gainsboro Manufacturer#4 Brand#43 SMALL BURNISHED STEEL 12 WRAP CASE 909.00 ironic foxe +10 linen pink saddle puff powder Manufacturer#5 Brand#54 LARGE BURNISHED STEEL 44 LG CAN 910.01 ithely final deposit + +-- !q08 -- +190001 powder coral chiffon burnished bisque Manufacturer#2 Brand#22 MEDIUM ANODIZED NICKEL 26 WRAP BOX 1091.00 ly busy deposi +190002 peru coral rosy azure green Manufacturer#4 Brand#41 LARGE POLISHED TIN 21 SM PKG 1092.00 express, daring sh +190003 white salmon lemon cornsilk ghost Manufacturer#4 Brand#41 PROMO ANODIZED TIN 41 LG BAG 1093.00 ckages according to th +190004 ivory almond honeydew metallic dodger Manufacturer#4 Brand#44 PROMO PLATED NICKEL 23 MED DRUM 1094.00 blithely regular t +190005 slate indian forest chartreuse rosy Manufacturer#1 Brand#11 SMALL BRUSHED BRASS 3 SM CASE 1095.00 ly blithe, regula +190006 navajo lavender smoke puff olive Manufacturer#5 Brand#55 SMALL BRUSHED BRASS 35 LG CASE 1096.00 ilent ideas boo +190007 khaki lime goldenrod pink grey Manufacturer#1 Brand#11 STANDARD PLATED BRASS 30 SM PKG 1097.00 fully final gift +190008 cream dark peru thistle gainsboro Manufacturer#3 Brand#31 ECONOMY ANODIZED STEEL 46 WRAP CASE 1098.00 pinto beans. fur +190009 orchid goldenrod metallic frosted powder Manufacturer#3 Brand#33 STANDARD ANODIZED COPPER 25 LG BAG 1099.00 es cajole f +190010 misty mint white seashell papaya Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 38 JUMBO BOX 1100.01 pecia + +-- !q12 -- +100001 seashell cyan plum purple honeydew Manufacturer#3 Brand#35 STANDARD BRUSHED TIN 37 JUMBO CASE 1001.00 ronic dependencies d +100002 steel moccasin forest cornflower brown Manufacturer#3 Brand#34 STANDARD ANODIZED NICKEL 11 WRAP CAN 1002.00 quickly pending +100003 beige powder violet orchid yellow Manufacturer#2 Brand#21 MEDIUM PLATED BRASS 41 SM BOX 1003.00 carefully even pac +100004 snow blanched khaki indian azure Manufacturer#4 Brand#42 SMALL POLISHED TIN 29 SM CASE 1004.00 sly. blithely +100005 grey midnight orange peach pale Manufacturer#2 Brand#21 SMALL POLISHED STEEL 7 MED BAG 1005.00 ajole? blithe +100006 violet sandy olive yellow orange Manufacturer#4 Brand#45 STANDARD BURNISHED COPPER 23 WRAP CASE 1006.00 he slyly regular pack +100007 snow magenta pale lemon metallic Manufacturer#1 Brand#12 PROMO BURNISHED COPPER 4 MED PKG 1007.00 ronic accounts in +100008 spring powder sienna purple lime Manufacturer#4 Brand#45 ECONOMY BRUSHED BRASS 19 SM PKG 1008.00 ts. furious +100009 goldenrod sandy beige hot orange Manufacturer#3 Brand#32 SMALL BURNISHED STEEL 41 WRAP BOX 1009.00 dinos about the quick +100010 lime lavender slate cream brown Manufacturer#4 Brand#43 PROMO ANODIZED COPPER 19 JUMBO PACK 1010.01 gle slyly above the b + +-- !q01 -- +1 2132 4633 4 28.00 28955.64 0.09 0.06 N O 1996-04-21 1996-03-30 1996-05-16 NONE AIR lites. fluffily even de +1 15635 638 6 32.00 49620.16 0.07 0.02 N O 1996-01-30 1996-02-07 1996-02-03 DELIVER IN PERSON MAIL arefully slyly ex +1 24027 1534 5 24.00 22824.48 0.10 0.04 N O 1996-03-30 1996-03-14 1996-04-01 NONE FOB pending foxes. slyly re +1 63700 3701 3 8.00 13309.60 0.10 0.02 N O 1996-01-29 1996-03-05 1996-01-31 TAKE BACK RETURN REG AIR riously. regular, express dep +1 67310 7311 2 36.00 45983.16 0.09 0.06 N O 1996-04-12 1996-02-28 1996-04-20 TAKE BACK RETURN MAIL ly final dependencies: slyly bold +1 155190 7706 1 17.00 21168.23 0.04 0.02 N O 1996-03-13 1996-02-12 1996-03-22 DELIVER IN PERSON TRUCK egular courts above the +2 106170 1191 1 38.00 44694.46 0.00 0.05 N O 1997-01-28 1997-01-14 1997-02-02 TAKE BACK RETURN RAIL ven requests. deposits breach a +3 4297 1798 1 45.00 54058.05 0.06 0.00 R F 1994-02-02 1994-01-04 1994-02-23 NONE AIR ongside of the furiously brave acco +3 19036 6540 2 49.00 46796.47 0.10 0.00 R F 1993-11-09 1993-12-20 1993-11-24 TAKE BACK RETURN RAIL unusual accounts. eve +3 29380 1883 4 2.00 2618.76 0.01 0.06 A F 1993-12-04 1994-01-07 1994-01-01 NONE TRUCK y. fluffily pending d + +-- !q02 -- +5999008 16312 6313 4 2.00 2456.62 0.08 0.05 R F 1994-04-28 1994-06-01 1994-05-12 COLLECT COD FOB longside of the slo +5999008 32738 2739 3 39.00 65158.47 0.09 0.08 R F 1994-07-11 1994-06-15 1994-08-04 TAKE BACK RETURN TRUCK equests nag along +5999008 64711 2230 1 29.00 48595.59 0.00 0.02 R F 1994-05-16 1994-07-04 1994-05-18 NONE FOB final requests across +5999008 192755 5275 2 32.00 59128.00 0.07 0.08 R F 1994-05-15 1994-05-22 1994-06-07 COLLECT COD RAIL ts sleep slyly about the slyly ironic acco +5999009 12147 7150 1 21.00 22241.94 0.00 0.05 N O 1997-11-01 1997-12-11 1997-11-05 NONE AIR deposits after the blithely ex +5999010 106595 6596 1 31.00 49649.29 0.02 0.05 N O 1997-11-29 1997-10-24 1997-12-11 DELIVER IN PERSON MAIL ilent instructions? slyly r +5999010 141441 1442 2 42.00 62262.48 0.05 0.08 N O 1997-09-21 1997-10-13 1997-09-25 TAKE BACK RETURN SHIP c, even ideas. ruth +5999010 193075 8114 3 40.00 46722.80 0.04 0.07 N O 1997-11-19 1997-09-25 1997-11-25 DELIVER IN PERSON AIR accounts sleep blithely even, +5999010 198678 1198 4 12.00 21320.04 0.00 0.04 N O 1997-09-19 1997-10-15 1997-10-05 DELIVER IN PERSON REG AIR ironic foxes. slyly special id +5999011 98609 6137 1 44.00 70734.40 0.03 0.01 N O 1998-04-05 1998-05-16 1998-05-05 DELIVER IN PERSON REG AIR ructions along the blit + +-- !q03 -- +2000001 16877 6878 1 36.00 64579.32 0.01 0.05 A F 1995-01-23 1995-01-31 1995-02-20 TAKE BACK RETURN FOB regular deposits. even +2000001 50928 3434 2 36.00 67641.12 0.02 0.03 A F 1995-02-03 1995-02-07 1995-02-13 NONE AIR ickly slyl +2000001 117877 7878 4 20.00 37897.40 0.02 0.07 R F 1995-03-29 1995-03-16 1995-04-01 COLLECT COD RAIL . realms boost unusual theodoli +2000001 135534 8048 3 38.00 59642.14 0.00 0.05 R F 1994-12-31 1995-03-06 1995-01-26 COLLECT COD MAIL l theodolites affix quickly alongside of +2000001 149269 1784 5 15.00 19773.90 0.05 0.05 R F 1995-03-08 1995-02-10 1995-03-23 DELIVER IN PERSON AIR e bold, silent foxes solve dog +2000002 41816 1817 6 8.00 14062.48 0.05 0.06 N O 1996-02-14 1995-12-25 1996-03-12 DELIVER IN PERSON FOB y quickly pending foxes. quickly ironic acc +2000002 62662 2663 2 48.00 77983.68 0.01 0.05 N O 1995-11-20 1996-01-11 1995-12-05 TAKE BACK RETURN SHIP requests sleep blithely. slyly +2000002 77402 7403 4 47.00 64831.80 0.07 0.03 N O 1996-02-24 1996-02-04 1996-03-09 NONE SHIP ong the carefully silent instructions. even +2000002 80719 8244 5 42.00 71387.82 0.10 0.03 N O 1995-11-24 1996-01-01 1995-11-25 COLLECT COD SHIP ing to the carefully final deposits. care +2000002 156357 6358 3 28.00 39573.80 0.09 0.05 N O 1995-12-26 1996-01-15 1996-01-25 COLLECT COD MAIL ely regular instr + +-- !q04 -- +1 Customer#000000001 IVhzIApeRb ot,c,E 15 25-989-741-2988 711.56 BUILDING to the even, regular platelets. regular, ironic epitaphs nag e +2 Customer#000000002 XSTf4,NCwDVaWNe6tEgvwfmRchLXak 13 23-768-687-3665 121.65 AUTOMOBILE l accounts. blithely ironic theodolites integrate boldly: caref +3 Customer#000000003 MG9kdTD2WBHm 1 11-719-748-3364 7498.12 AUTOMOBILE deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov +4 Customer#000000004 XxVSJsLAGtn 4 14-128-190-5944 2866.83 MACHINERY requests. final, regular ideas sleep final accou +5 Customer#000000005 KvpyuHCplrB84WgAiGV6sYpZq7Tj 3 13-750-942-6364 794.47 HOUSEHOLD n accounts will have to unwind. foxes cajole accor +6 Customer#000000006 sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn 20 30-114-968-4951 7638.57 AUTOMOBILE tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious +7 Customer#000000007 TcGe5gaZNgVePxU5kRrvXBfkasDTea 18 28-190-982-9759 9561.95 AUTOMOBILE ainst the ironic, express theodolites. express, even pinto beans among the exp +8 Customer#000000008 I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5 17 27-147-574-9335 6819.74 BUILDING among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide +9 Customer#000000009 xKiAFTjUsCuxfeleNqefumTrjS 8 18-338-906-3675 8324.07 FURNITURE r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl +10 Customer#000000010 6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2 5 15-741-346-9870 2753.54 HOUSEHOLD es regular deposits haggle. fur + +-- !q05 -- +140001 Customer#000140001 CkN1egC06Sc51bbDyQ8VnFn Bz6N1p 15 25-647-696-2830 2747.48 AUTOMOBILE heodolites. slyly bold theodolites d +140002 Customer#000140002 8UWLS,im0k94ivCVx 23 33-146-814-9234 7914.10 HOUSEHOLD eep behind the quickly bold foxes. furiously ironic ideas shall have to sleep. regular packages +140003 Customer#000140003 2duVgk HhGGlOeP4S,brWKjKG62bGkupful 23 33-326-909-7916 9389.77 FURNITURE pending, even packages are. slyly regular accounts wake ironically final packages. bold +140004 Customer#000140004 S,V7RhLnmqPK0TDghbcdxotzTyKeUC 13 23-594-312-2596 5931.66 HOUSEHOLD t blithely blithely regular packages. never silent dependencies cajo +140005 Customer#000140005 yQemRDs9i8MmTJla7xha xqeZjMznW 20 30-169-231-7354 9489.56 BUILDING accounts. even ideas sleep carefu +140006 Customer#000140006 5,eRqyFjpobN2Wtvt2oXuLcJcNE8oTyRh 15 25-681-278-7283 67.66 MACHINERY at the accounts are bold escapades. furiously final foxes use carefully unusual orb +140007 Customer#000140007 AX75sSePE5PlDjD5qS6W1dx08Levf09 24 34-818-770-8059 2093.37 MACHINERY ily according to the furiously final packages? quickly spe +140008 Customer#000140008 2zpry AYh9otf4c5vESISPvKLWPKe9i 14 24-552-949-6395 3264.69 AUTOMOBILE nstructions are against the requests. fin +140009 Customer#000140009 dNwNUcCv,,0YE6WFYfOgM,6A2 4 14-940-856-8557 -359.36 HOUSEHOLD beans. blithely silent dependencies haggle slyly. carefully quick accounts across the depos +140010 Customer#000140010 vZxOW,NtvppKR9mpTl6RDl9sWJJbosYDoLineEm 7 17-151-800-8260 8216.11 BUILDING nding foxes across the quickly regular forges nod accounts. slyly express ex + +-- !q06 -- +100001 Customer#000100001 gQ1s5C45A3PxWmZ1oFFSxt8u EcZ, 24 34-705-443-4055 1726.66 HOUSEHOLD ts. ironic instructions sleep. final deposits +100002 Customer#000100002 qOmTcZ7kHzJLSoaLenr9,Gu 17 27-453-414-8560 -39.14 BUILDING wake carefully. blithely regular epitaphs are among the quickly regular deposits. +100003 Customer#000100003 5AYbJxvjo7ErQB,cGIpKZRAE9,w2l9 5 15-783-309-8970 72.71 BUILDING ckly blithely special accounts. theodolites are carefully. pending requests ha +100004 Customer#000100004 cpIOYQpMlm 18 28-316-370-8752 9990.05 BUILDING y above the slyly regular pains. unusual requests against the always special packages bre +100005 Customer#000100005 Wud8n74NcIpwiKSjPS zZ 16 26-935-603-9031 7789.25 BUILDING ing dugouts boost slyly above the pending, final accounts? regular deposits wake slyly alongside of the blithely i +100006 Customer#000100006 AkjXh4y,QNaF7,0xzbP,sG 7 17-964-673-7626 974.05 MACHINERY grate across the slyly even packages; final, special idea +100007 Customer#000100007 d94JW9Hc2ZtGriOBNKyIjOeP,VZZqIX7S 17 27-244-129-5307 777.86 HOUSEHOLD foxes are against the ironic theodolites. evenly pending ideas according to the qu +100008 Customer#000100008 Hv2A,YqfNnGRIKaY 18 28-828-394-8424 3374.90 BUILDING ccounts. even deposits wake quickly pinto beans. bold instructions integrate? never bold theodolites are s +100009 Customer#000100009 OioQ3EjJZRvxCNh6Q8E3QZH 6 16-928-807-2622 3932.63 MACHINERY aggle blithely quickly final accounts. carefully final deposits above the fluffily unus +100010 Customer#000100010 Tbiz2WMJX 0 10-147-978-7806 5693.02 BUILDING y regular ideas. quickly unusual gifts n + +-- !q07 -- +1 36901 O 173665.47 1996-01-02 5-LOW Clerk#000000951 0 nstructions sleep furiously among +2 78002 O 46929.18 1996-12-01 1-URGENT Clerk#000000880 0 foxes. pending accounts at the pending, silent asymptot +3 123314 F 193846.25 1993-10-14 5-LOW Clerk#000000955 0 sly final accounts boost. carefully regular ideas cajole carefully. depos +4 136777 O 32151.78 1995-10-11 5-LOW Clerk#000000124 0 sits. slyly regular warthogs cajole. regular, regular theodolites acro +5 44485 F 144659.20 1994-07-30 5-LOW Clerk#000000925 0 quickly. bold deposits sleep slyly. packages use slyly +6 55624 F 58749.59 1992-02-21 4-NOT SPECIFIED Clerk#000000058 0 ggle. special, final requests are against the furiously specia +7 39136 O 252004.18 1996-01-10 2-HIGH Clerk#000000470 0 ly special requests +32 130057 O 208660.75 1995-07-16 2-HIGH Clerk#000000616 0 ise blithely bold, regular requests. quickly unusual dep +33 66958 F 163243.98 1993-10-27 3-MEDIUM Clerk#000000409 0 uriously. furiously final request +34 61001 O 58949.67 1998-07-21 3-MEDIUM Clerk#000000223 0 ly final packages. fluffily final deposits wake blithely ideas. spe + +-- !q08 -- +5990016 100807 F 102428.29 1994-01-31 1-URGENT Clerk#000000554 0 . fluffily unusual requests cajole furiously. fluffily pending accounts ca +5990017 12382 F 176602.99 1992-07-01 5-LOW Clerk#000000205 0 ual pinto beans. final instructions haggle quickly alongside of the furio +5990018 51145 F 78440.49 1992-05-28 1-URGENT Clerk#000000996 0 quests play daringly. regula +5990019 85478 O 250306.69 1998-06-29 5-LOW Clerk#000000900 0 ainst the sly pinto beans. unu +5990020 62137 O 229287.04 1996-08-15 1-URGENT Clerk#000000801 0 fluffily special pinto beans. regular, regular pinto beans slee +5990021 24235 O 265459.10 1996-12-16 3-MEDIUM Clerk#000000113 0 gside of the ironic, unusual escapades. evenly silent tithes are +5990022 35143 O 141070.92 1996-07-01 4-NOT SPECIFIED Clerk#000000546 0 ests haggle across the blithely bo +5990023 65318 F 171515.91 1993-07-04 1-URGENT Clerk#000000178 0 r the express accounts haggle blithely ironic accounts-- regu +5990048 88213 O 70608.62 1997-10-23 2-HIGH Clerk#000000303 0 slyly enticing foxes doze regularly even requests. +5990049 115694 F 183390.98 1992-05-21 1-URGENT Clerk#000000450 0 ckly final theodolites ca + +-- !q09 -- +2000001 44200 F 257495.03 1994-12-18 5-LOW Clerk#000000314 0 ometimes theodolites. quickly even accounts among the blithely bold +2000002 55241 O 263734.77 1995-11-13 1-URGENT Clerk#000000749 0 uses along the brave excuses sleep for the packages. packages affix? slyl +2000003 84553 F 78066.42 1992-10-10 5-LOW Clerk#000000314 0 e slyly regular asymptotes. fluf +2000004 125197 F 246917.53 1993-01-06 1-URGENT Clerk#000000675 0 ironic ideas. platelets are regularly after the +2000005 117907 O 229611.23 1996-10-16 2-HIGH Clerk#000000458 0 he furiously regular excuses haggle slyly along the slyly pending a +2000006 1538 O 32011.55 1995-12-09 1-URGENT Clerk#000000279 0 ual, regular deposits sleep carefully carefully final dependencies. dep +2000007 42958 F 48446.75 1993-03-28 5-LOW Clerk#000000956 0 uickly final ideas. final, final requests are courts. slyly unu +2000032 34156 F 56186.58 1994-09-05 4-NOT SPECIFIED Clerk#000000612 0 fully regular instructions doze +2000033 141263 O 130829.92 1997-06-05 3-MEDIUM Clerk#000000118 0 inst the final dependencies. even, final pat +2000034 149275 O 64568.70 1997-09-23 2-HIGH Clerk#000000335 0 regular asymptotes. carefu + +-- !q10 -- +1 goldenrod lavender spring chocolate lace Manufacturer#1 Brand#13 PROMO BURNISHED COPPER 7 JUMBO PKG 901.00 ly. slyly ironi +2 blush thistle blue yellow saddle Manufacturer#1 Brand#13 LARGE BRUSHED BRASS 1 LG CASE 902.00 lar accounts amo +3 spring green yellow purple cornsilk Manufacturer#4 Brand#42 STANDARD POLISHED BRASS 21 WRAP CASE 903.00 egular deposits hag +4 cornflower chocolate smoke green pink Manufacturer#3 Brand#34 SMALL PLATED BRASS 14 MED DRUM 904.00 p furiously r +5 forest brown coral puff cream Manufacturer#3 Brand#32 STANDARD POLISHED TIN 15 SM PKG 905.00 wake carefully +6 bisque cornflower lawn forest magenta Manufacturer#2 Brand#24 PROMO PLATED STEEL 4 MED BAG 906.00 sual a +7 moccasin green thistle khaki floral Manufacturer#1 Brand#11 SMALL PLATED COPPER 45 SM BAG 907.00 lyly. ex +8 misty lace thistle snow royal Manufacturer#4 Brand#44 PROMO BURNISHED TIN 41 LG DRUM 908.00 eposi +9 thistle dim navajo dark gainsboro Manufacturer#4 Brand#43 SMALL BURNISHED STEEL 12 WRAP CASE 909.00 ironic foxe +10 linen pink saddle puff powder Manufacturer#5 Brand#54 LARGE BURNISHED STEEL 44 LG CAN 910.01 ithely final deposit + +-- !q08 -- +190001 powder coral chiffon burnished bisque Manufacturer#2 Brand#22 MEDIUM ANODIZED NICKEL 26 WRAP BOX 1091.00 ly busy deposi +190002 peru coral rosy azure green Manufacturer#4 Brand#41 LARGE POLISHED TIN 21 SM PKG 1092.00 express, daring sh +190003 white salmon lemon cornsilk ghost Manufacturer#4 Brand#41 PROMO ANODIZED TIN 41 LG BAG 1093.00 ckages according to th +190004 ivory almond honeydew metallic dodger Manufacturer#4 Brand#44 PROMO PLATED NICKEL 23 MED DRUM 1094.00 blithely regular t +190005 slate indian forest chartreuse rosy Manufacturer#1 Brand#11 SMALL BRUSHED BRASS 3 SM CASE 1095.00 ly blithe, regula +190006 navajo lavender smoke puff olive Manufacturer#5 Brand#55 SMALL BRUSHED BRASS 35 LG CASE 1096.00 ilent ideas boo +190007 khaki lime goldenrod pink grey Manufacturer#1 Brand#11 STANDARD PLATED BRASS 30 SM PKG 1097.00 fully final gift +190008 cream dark peru thistle gainsboro Manufacturer#3 Brand#31 ECONOMY ANODIZED STEEL 46 WRAP CASE 1098.00 pinto beans. fur +190009 orchid goldenrod metallic frosted powder Manufacturer#3 Brand#33 STANDARD ANODIZED COPPER 25 LG BAG 1099.00 es cajole f +190010 misty mint white seashell papaya Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 38 JUMBO BOX 1100.01 pecia + +-- !q12 -- +100001 seashell cyan plum purple honeydew Manufacturer#3 Brand#35 STANDARD BRUSHED TIN 37 JUMBO CASE 1001.00 ronic dependencies d +100002 steel moccasin forest cornflower brown Manufacturer#3 Brand#34 STANDARD ANODIZED NICKEL 11 WRAP CAN 1002.00 quickly pending +100003 beige powder violet orchid yellow Manufacturer#2 Brand#21 MEDIUM PLATED BRASS 41 SM BOX 1003.00 carefully even pac +100004 snow blanched khaki indian azure Manufacturer#4 Brand#42 SMALL POLISHED TIN 29 SM CASE 1004.00 sly. blithely +100005 grey midnight orange peach pale Manufacturer#2 Brand#21 SMALL POLISHED STEEL 7 MED BAG 1005.00 ajole? blithe +100006 violet sandy olive yellow orange Manufacturer#4 Brand#45 STANDARD BURNISHED COPPER 23 WRAP CASE 1006.00 he slyly regular pack +100007 snow magenta pale lemon metallic Manufacturer#1 Brand#12 PROMO BURNISHED COPPER 4 MED PKG 1007.00 ronic accounts in +100008 spring powder sienna purple lime Manufacturer#4 Brand#45 ECONOMY BRUSHED BRASS 19 SM PKG 1008.00 ts. furious +100009 goldenrod sandy beige hot orange Manufacturer#3 Brand#32 SMALL BURNISHED STEEL 41 WRAP BOX 1009.00 dinos about the quick +100010 lime lavender slate cream brown Manufacturer#4 Brand#43 PROMO ANODIZED COPPER 19 JUMBO PACK 1010.01 gle slyly above the b + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_parquet_skip_page.groovy b/regression-test/suites/external_table_p0/hive/test_hive_parquet_skip_page.groovy new file mode 100644 index 00000000000..67e594b21f1 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_parquet_skip_page.groovy @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_parquet_skip_page", "p0,external,hive,external_docker,external_docker_hive") { + def q01 = { + qt_q01 """ + select * from lineitem where l_orderkey < 1000 order by l_orderkey,l_partkey limit 10; + """ + } + + def q02 = { + qt_q02 """ + select * from lineitem where l_orderkey > 5999000 order by l_orderkey,l_partkey limit 10; + """ + } + + def q03 = { + qt_q03 """ + select * from lineitem where l_orderkey > 2000000 and l_orderkey < 2001000 order by l_orderkey,l_partkey limit 10; + """ + } + + def q04 = { + qt_q04 """ + select * from customer where c_custkey < 10000 order by c_custkey limit 10; + """ + } + + def q05 = { + qt_q05 """ + select * from customer where c_custkey > 140000 order by c_custkey limit 10; + """ + } + + def q06 = { + qt_q06 """ + select * from customer where c_custkey > 100000 and c_custkey < 110000 order by c_custkey limit 10; + """ + } + + def q07 = { + qt_q07 """ + select * from orders where o_orderkey < 10000 order by o_orderkey limit 10; + """ + } + + def q08 = { + qt_q08 """ + select * from orders where o_orderkey > 5990000 order by o_orderkey limit 10; + """ + } + + def q09 = { + qt_q09 """ + select * from orders where o_orderkey > 2000000 and o_orderkey < 2010000 order by o_orderkey limit 10; + """ + } + + def q10 = { + qt_q10 """ + select * from part where p_partkey < 10000 order by p_partkey limit 10; + """ + } + + def q11 = { + qt_q08 """ + select * from part where p_partkey > 190000 order by p_partkey limit 10; + """ + } + + def q12 = { + qt_q12 """ + select * from part where p_partkey > 100000 and p_partkey < 110000 order by p_partkey limit 10; + """ + } + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + for (String hivePrefix : ["hive2", "hive3"]) { + try { + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = "${hivePrefix}_test_parquet" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + + sql """switch ${catalog_name}""" + sql """use `tpch1_parquet`""" + + sql """set enable_profile=true;""" + + q01() + q02() + q03() + q04() + q05() + q06() + q07() + q08() + q09() + q10() + q11() + q12() + + sql """drop catalog if exists ${catalog_name}""" + } finally { + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
