rok commented on code in PR #14293:
URL: https://github.com/apache/arrow/pull/14293#discussion_r1093535120
##########
cpp/src/parquet/encoding.cc:
##########
@@ -2291,6 +2292,38 @@ std::shared_ptr<Buffer>
DeltaBitPackEncoder<DType>::FlushValues() {
return SliceBuffer(buffer, offset_bytes);
}
+template <typename DType>
+std::shared_ptr<ResizableBuffer>
DeltaBitPackEncoder<DType>::FlushValuesInternal(
+ size_t* offset_bytes) {
+ if (values_current_block_ > 0) {
+ FlushBlock();
+ }
+ PARQUET_ASSIGN_OR_THROW(auto buffer, sink_.Finish(/*shrink_to_fit=*/true));
+
+ uint8_t header_buffer_[kMaxPageHeaderWriterSize] = {};
+ bit_util::BitWriter header_writer(header_buffer_, sizeof(header_buffer_));
+ if (!header_writer.PutVlqInt(values_per_block_) ||
+ !header_writer.PutVlqInt(mini_blocks_per_block_) ||
+ !header_writer.PutVlqInt(total_value_count_) ||
+ !header_writer.PutZigZagVlqInt(static_cast<T>(first_value_))) {
+ throw ParquetException("header writing error");
+ }
+ header_writer.Flush();
+
+ // We reserved enough space at the beginning of the buffer for largest
possible header
+ // and data was written immediately after. We now write the header data
immediately
+ // before the end of reserved space.
+ *offset_bytes = kMaxPageHeaderWriterSize - header_writer.bytes_written();
+ std::memcpy(buffer->mutable_data() + *offset_bytes, header_buffer_,
+ header_writer.bytes_written());
+
+ // Reset counter of cached values
+ total_value_count_ = 0;
+ // Reserve enough space at the beginning of the buffer for largest possible
header.
+ PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
+ return reinterpret_cast<std::shared_ptr<ResizableBuffer>&>(buffer);
Review Comment:
Reverted out `FlushValuesInternal`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]