AshinGau commented on code in PR #32785:
URL: https://github.com/apache/doris/pull/32785#discussion_r1538496642
##########
be/src/vec/exec/format/parquet/vparquet_page_reader.h:
##########
@@ -41,30 +42,38 @@ class PageReader {
int64_t decode_header_time = 0;
};
- PageReader(io::BufferedStreamReader* reader, io::IOContext* io_ctx,
uint64_t offset,
+ PageReader(io::BufferedStreamReader* reader, io::IOContext* io_ctx,
+ const tparquet::OffsetIndex* offset_index, int64_t num_values,
uint64_t offset,
uint64_t length);
+
~PageReader() = default;
- // Deprecated
- // Parquet file may not be standardized,
- // _end_offset may exceed the actual data area.
- // ColumnChunkReader::has_next_page() use the number of parsed values for
judgment
- // [[deprecated]]
- bool has_next_page() const { return _offset < _end_offset; }
+ bool has_next_page() const { return _page_index <
_offset_index->page_locations.size(); }
- Status next_page_header();
+ // Status next_page_header();
Status skip_page();
- const tparquet::PageHeader* get_page_header() const { return
&_cur_page_header; }
+ const tparquet::PageHeader* get_page_header();
+
+ int64_t get_page_num_values() const {
Review Comment:
should be compatible with column chunk without PageLocation
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]