[
https://issues.apache.org/jira/browse/ORC-192?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16016528#comment-16016528
]
ASF GitHub Bot commented on ORC-192:
------------------------------------
Github user xndai commented on a diff in the pull request:
https://github.com/apache/orc/pull/122#discussion_r117363781
--- Diff: c++/src/Compression.cc ---
@@ -33,6 +33,254 @@
namespace orc {
+ class CompressionStreamBase: public BufferedOutputStream {
+ public:
+ CompressionStreamBase(OutputStream * outStream,
+ int compressionLevel,
+ uint64_t capacity,
+ uint64_t blockSize,
+ MemoryPool& pool);
+
+ virtual bool Next(void** data, int*size) override = 0;
+ virtual void BackUp(int count) override;
+
+ virtual std::string getName() const override = 0;
+ virtual uint64_t flush() override;
+
+ virtual bool isCompressed() const override { return true; }
+ virtual uint64_t getSize() const override;
+
+ protected:
+ void writeHeader(char * buffer, size_t compressedSize, bool original) {
+ buffer[0] = static_cast<char>((compressedSize << 1) + (original ? 1
: 0));
+ buffer[1] = static_cast<char>(compressedSize >> 7);
+ buffer[2] = static_cast<char>(compressedSize >> 15);
+ }
+
+ // Buffer to hold uncompressed data until user calls Next()
+ DataBuffer<unsigned char> rawInputBuffer;
+
+ // Compress level
+ int level;
+
+ // Compressed data output buffer
+ char * outputBuffer;
+
+ // Size for compressionBuffer
+ int bufferSize;
+
+ // Compress output position
+ int outputPosition;
+
+ // Compress output buffer size
+ int outputSize;
+ };
+
+ CompressionStreamBase::CompressionStreamBase(OutputStream * outStream,
+ int compressionLevel,
+ uint64_t capacity,
+ uint64_t blockSize,
+ MemoryPool& pool) :
+ BufferedOutputStream(pool,
+
outStream,
+
capacity,
+
blockSize),
+ rawInputBuffer(pool,
blockSize),
+ level(compressionLevel),
+ outputBuffer(nullptr),
+ bufferSize(0),
+ outputPosition(0),
+ outputSize(0) {
+ // PASS
+ }
+
+ void CompressionStreamBase::BackUp(int count) {
+ if (count > bufferSize) {
+ throw std::logic_error("Can't backup that much!");
+ }
+ bufferSize -= count;
+ }
+
+ uint64_t CompressionStreamBase::flush() {
+ void * data;
+ int size;
+ if (!Next(&data, &size)) {
+ throw std::logic_error("Failed to flush compression buffer.");
--- End diff --
Throwing ParseError is understandable for reader, but would be weird for
writer. If we really want to distinguish the exceptions from compression, we
might come up with a different exception type. But so far I don't see a strong
need here.
> Zlib compression stream
> -----------------------
>
> Key: ORC-192
> URL: https://issues.apache.org/jira/browse/ORC-192
> Project: ORC
> Issue Type: Sub-task
> Components: C++
> Reporter: Xiening Dai
> Assignee: Xiening Dai
>
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)