imay commented on a change in pull request #1646: Support page compression in BetaRowset URL: https://github.com/apache/incubator-doris/pull/1646#discussion_r314951133
########## File path: be/src/olap/rowset/segment_v2/column_reader.cpp ########## @@ -106,30 +111,47 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) { return Status::OK(); } // Now we read this from file. we - size_t data_size = pp.size; - if (has_checksum() && data_size < sizeof(uint32_t)) { - return Status::Corruption("Bad page, page size is too small"); + size_t page_size = pp.size; + if (page_size < sizeof(uint32_t)) { + return Status::Corruption(Substitute("Bad page, page size is too small, size=$0", page_size)); } - if (has_checksum()) { - data_size -= sizeof(uint32_t); + + // Now we use this buffer to store page from storage, if this page is compressed + // this buffer will assigned uncompressed page, and origin content will be freed. + std::unique_ptr<uint8_t[]> page(new uint8_t[page_size]); + Slice page_slice(page.get(), page_size); + RETURN_IF_ERROR(_file->read_at(pp.offset, page_slice)); + + size_t data_size = page_size - 4; + if (_opts.verify_checksum) { + uint32_t expect = decode_fixed32_le((uint8_t*)page_slice.data + page_slice.size - 4); + uint32_t actual = HashUtil::crc_hash(page_slice.data, page_slice.size - 4, 0); + if (expect != actual) { + return Status::Corruption( + Substitute("Page checksum mismatch, actual=$0 vs expect=$1", actual, expect)); + } } - uint8_t* buf = new uint8_t[data_size]; - Slice data(buf, data_size); - uint8_t checksum_buf[sizeof(uint32_t)]; - Slice slices[2] = { data, Slice(checksum_buf, 4) }; + // remove page's suffix + page_slice.size = data_size; Review comment: verify_checksum only means to verify or not. Checksum has been written in page. so the size should be modified in both conditions. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org