Github user xndai commented on a diff in the pull request: https://github.com/apache/orc/pull/122#discussion_r117822601 --- Diff: c++/src/Compression.cc --- @@ -636,6 +884,33 @@ DIAGNOSTIC_POP return static_cast<uint64_t>(result); } + std::unique_ptr<BufferedOutputStream> + createCompressor( + CompressionKind kind, + OutputStream * outStream, + CompressionStrategy strategy, + uint64_t bufferCapacity, + uint64_t blockSize, + MemoryPool& pool) { + switch (static_cast<int64_t>(kind)) { + case CompressionKind_NONE: { + return std::unique_ptr<BufferedOutputStream> + (new BufferedOutputStream(pool, outStream, bufferCapacity, blockSize)); + } + case CompressionKind_ZLIB: { + int level = (strategy == CompressionStrategy_SPEED) ? -1 : 9; --- End diff -- According to this - https://orc.apache.org/docs/hive-config.html, there are only two compression strategy defined: SPEED and COMPRESSION. I also checked Java implementation, SPEED maps to zlib level Z_BEST_SPEED + 1, and COMPRESSION maps to Z_DEFAULT_COMPRESSION. I will do the same for C++. Java implementation for your reference - WriterImpl.java ` CompressionCodec result = physicalWriter.getCompressionCodec(); if (result != null) { switch (kind) { case BLOOM_FILTER: case DATA: case DICTIONARY_DATA: case BLOOM_FILTER_UTF8: if (compressionStrategy == OrcFile.CompressionStrategy.SPEED) { result = result.modify(EnumSet.of(CompressionCodec.Modifier.FAST, CompressionCodec.Modifier.TEXT)); } else { result = result.modify(EnumSet.of(CompressionCodec.Modifier.DEFAULT, CompressionCodec.Modifier.TEXT)); } break; case LENGTH: case DICTIONARY_COUNT: case PRESENT: case ROW_INDEX: case SECONDARY: // easily compressed using the fastest modes result = result.modify(EnumSet.of(CompressionCodec.Modifier.FASTEST, CompressionCodec.Modifier.BINARY)); break; default: LOG.info("Missing ORC compression modifiers for " + kind); break; } } ` ZlibCodec.java ` public CompressionCodec modify(/* @Nullable */ EnumSet<Modifier> modifiers) { if (modifiers == null) { return this; } int l = this.level; int s = this.strategy; for (Modifier m : modifiers) { switch (m) { case BINARY: /* filtered == less LZ77, more huffman */ s = Deflater.FILTERED; break; case TEXT: s = Deflater.DEFAULT_STRATEGY; break; case FASTEST: // deflate_fast looking for 8 byte patterns l = Deflater.BEST_SPEED; break; case FAST: // deflate_fast looking for 16 byte patterns l = Deflater.BEST_SPEED + 1; break; case DEFAULT: // deflate_slow looking for 128 byte patterns l = Deflater.DEFAULT_COMPRESSION; break; default: break; } } return new ZlibCodec(l, s); } `
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---