ffacs commented on code in PR #1932:
URL: https://github.com/apache/orc/pull/1932#discussion_r1599913278
##########
c++/src/Compression.cc:
##########
@@ -96,6 +97,9 @@ namespace orc {
// Compression block header pointer array
static const uint32_t HEADER_SIZE = 3;
std::array<char*, HEADER_SIZE> header;
+
+ // Compression block size
+ uint64_t compressBlockSize;
Review Comment:
compressionBlockSize
##########
c++/src/Compression.cc:
##########
@@ -232,6 +239,13 @@ namespace orc {
}
rawInputBuffer.resize(0);
}
+ }
+
+ bool CompressionStream::Next(void** data, int* size) {
+ // triggle compress when rawInputBuffer is reach the capacity
+ if (rawInputBuffer.size() == compressBlockSize) {
Review Comment:
We should make sure `compressionBlockSize` is a multiple of
`memoryBlockSize`.
##########
c++/src/io/OutputStream.cc:
##########
@@ -98,6 +98,12 @@ namespace orc {
dataBuffer_->resize(0);
}
+ uint64_t BufferedOutputStream::getRawInputBufferSize() const {
+ // we're unable to determine the size of the raw input buffer
+ // simply return 0
+ return 0;
Review Comment:
Throw a exception would be better.
##########
c++/src/Writer.cc:
##########
@@ -45,11 +45,11 @@ namespace orc {
std::string timezone;
WriterMetrics* metrics;
bool useTightNumericVector;
- uint64_t outputBufferCapacity;
+ uint64_t memoryBlockSize;
WriterOptionsPrivate() : fileVersion(FileVersion::v_0_12()) { // default
to Hive_0_12
stripeSize = 64 * 1024 * 1024; // 64M
- compressionBlockSize = 64 * 1024; // 64K
+ compressionBlockSize = 256 * 1024; // 256K
Review Comment:
Why do we make a change here?
##########
c++/src/Writer.cc:
##########
@@ -355,7 +355,7 @@ namespace orc {
// compression stream for stripe footer, file footer and metadata
compressionStream_ =
createCompressor(options_.getCompression(), outStream_,
options_.getCompressionStrategy(),
- options_.getOutputBufferCapacity(),
options_.getCompressionBlockSize(),
+ options_.getMemoryBlockSize(),
options_.getCompressionBlockSize(),
Review Comment:
Parameters do not match declaration.
##########
c++/src/Compression.cc:
##########
@@ -207,7 +214,7 @@ namespace orc {
// PASS
}
Review Comment:
```cpp
CompressionStream::CompressionStream(OutputStream* outStream, int
compressionLevel,
uint64_t compressionBlockSize,
uint64_t memoryBlockSize, MemoryPool& pool,
WriterMetrics* metrics)
```
capacity -> compressionBlockSize, blockSize->memoryBlockSize
Same for other compression streams.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]