Github user xndai commented on a diff in the pull request:
https://github.com/apache/orc/pull/301#discussion_r215486619
--- Diff: c++/src/Compression.cc ---
@@ -899,6 +907,166 @@ DIAGNOSTIC_POP
return static_cast<uint64_t>(result);
}
+ /**
+ * Block compression base class
+ */
+ class BlockCompressionStream: public CompressionStreamBase {
+ public:
+ BlockCompressionStream(OutputStream * outStream,
+ int compressionLevel,
+ uint64_t capacity,
+ uint64_t blockSize,
+ MemoryPool& pool)
+ : CompressionStreamBase(outStream,
+ compressionLevel,
+ capacity,
+ blockSize,
+ pool)
+ , compressorBuffer(pool) {
+ // PASS
+ }
+
+ virtual bool Next(void** data, int*size) override;
+ virtual std::string getName() const override = 0;
+
+ protected:
+ // compresses a block and returns the compressed size
+ virtual uint64_t doBlockCompression() = 0;
+
+ // return maximum possible compression size for allocating space for
+ // compressorBuffer below
+ virtual uint64_t estimateMaxCompressionSize() = 0;
+
+ // should allocate max possible compressed size
+ DataBuffer<unsigned char> compressorBuffer;
+ };
+
+ bool BlockCompressionStream::Next(void** data, int*size) {
+ if (bufferSize != 0) {
+ ensureHeader();
+
+ // perform compression
+ size_t totalCompressedSize = doBlockCompression();
+
+ const unsigned char * dataToWrite = nullptr;
+ int totalSizeToWrite = 0;
+ char * header = outputBuffer + outputPosition - 3;
+
+ if (totalCompressedSize >= static_cast<size_t>(bufferSize)) {
+ writeHeader(header, static_cast<size_t>(bufferSize), true);
+ dataToWrite = rawInputBuffer.data();
+ totalSizeToWrite = bufferSize;
+ } else {
+ writeHeader(header, totalCompressedSize, false);
+ dataToWrite = compressorBuffer.data();
+ totalSizeToWrite = static_cast<int>(totalCompressedSize);
+ }
+
+ char * dst = header + 3;
+ while (totalSizeToWrite > 0) {
+ if (outputPosition >= outputSize) {
--- End diff --
assert outputPosition not larger than outputSize.
---