[GitHub] orc pull request #301: ORC-395: Support ZSTD in C++ writer/reader
Github user xndai commented on a diff in the pull request: https://github.com/apache/orc/pull/301#discussion_r215486726 --- Diff: c++/src/Compression.cc --- @@ -899,6 +907,166 @@ DIAGNOSTIC_POP return static_cast(result); } + /** + * Block compression base class + */ + class BlockCompressionStream: public CompressionStreamBase { --- End diff -- Stream compression is not in this change yet? ---
[GitHub] orc pull request #301: ORC-395: Support ZSTD in C++ writer/reader
Github user xndai commented on a diff in the pull request: https://github.com/apache/orc/pull/301#discussion_r215486619 --- Diff: c++/src/Compression.cc --- @@ -899,6 +907,166 @@ DIAGNOSTIC_POP return static_cast(result); } + /** + * Block compression base class + */ + class BlockCompressionStream: public CompressionStreamBase { + public: +BlockCompressionStream(OutputStream * outStream, + int compressionLevel, + uint64_t capacity, + uint64_t blockSize, + MemoryPool& pool) + : CompressionStreamBase(outStream, + compressionLevel, + capacity, + blockSize, + pool) + , compressorBuffer(pool) { + // PASS +} + +virtual bool Next(void** data, int*size) override; +virtual std::string getName() const override = 0; + + protected: +// compresses a block and returns the compressed size +virtual uint64_t doBlockCompression() = 0; + +// return maximum possible compression size for allocating space for +// compressorBuffer below +virtual uint64_t estimateMaxCompressionSize() = 0; + +// should allocate max possible compressed size +DataBuffer compressorBuffer; + }; + + bool BlockCompressionStream::Next(void** data, int*size) { +if (bufferSize != 0) { + ensureHeader(); + + // perform compression + size_t totalCompressedSize = doBlockCompression(); + + const unsigned char * dataToWrite = nullptr; + int totalSizeToWrite = 0; + char * header = outputBuffer + outputPosition - 3; + + if (totalCompressedSize >= static_cast(bufferSize)) { +writeHeader(header, static_cast(bufferSize), true); +dataToWrite = rawInputBuffer.data(); +totalSizeToWrite = bufferSize; + } else { +writeHeader(header, totalCompressedSize, false); +dataToWrite = compressorBuffer.data(); +totalSizeToWrite = static_cast(totalCompressedSize); + } + + char * dst = header + 3; + while (totalSizeToWrite > 0) { +if (outputPosition >= outputSize) { --- End diff -- assert outputPosition not larger than outputSize. ---
[GitHub] orc pull request #301: ORC-395: Support ZSTD in C++ writer/reader
Github user wgtmac commented on a diff in the pull request: https://github.com/apache/orc/pull/301#discussion_r211793792 --- Diff: c++/src/Compression.cc --- @@ -899,6 +900,177 @@ DIAGNOSTIC_POP return static_cast(result); } + /** + * Block compression base class + */ + class BlockCompressionStream: public CompressionStreamBase { + public: +BlockCompressionStream(OutputStream * outStream, + int compressionLevel, + uint64_t capacity, + uint64_t blockSize, + MemoryPool& pool) + : CompressionStreamBase(outStream, + compressionLevel, + capacity, + blockSize, + pool) + , compressorBuffer(pool) { + // PASS +} + +virtual bool Next(void** data, int*size) override; +virtual std::string getName() const override = 0; + + protected: +// compresses a block and returns the compressed size +virtual uint64_t doBlockCompression() = 0; + +// return maximum possible compression size for allocating space for +// compressorBuffer below +virtual uint64_t estimateMaxCompressionSize() = 0; + +// should allocate max possible compressed size +DataBuffer compressorBuffer; + }; + + bool BlockCompressionStream::Next(void** data, int*size) { +if (bufferSize != 0) { --- End diff -- Done. Please check it again when you have time. Thanks! ---
[GitHub] orc pull request #301: ORC-395: Support ZSTD in C++ writer/reader
GitHub user wgtmac opened a pull request: https://github.com/apache/orc/pull/301 ORC-395: Support ZSTD in C++ writer/reader Support ZSTD block compression/decompression for C++ writer/reader. Test cases have been added to TestCompression.cc You can merge this pull request into a Git repository by running: $ git pull https://github.com/wgtmac/orc ORC-395 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/orc/pull/301.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #301 ---