Repository: hive Updated Branches: refs/heads/branch-2.2 b828a1e83 -> 0013facb2
HIVE-17600: Make OrcFile's enforceBufferSize user-settable (Mithun Radhakrishnan, reviewed by Owen O'Malley) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0013facb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0013facb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0013facb Branch: refs/heads/branch-2.2 Commit: 0013facb2e4811f8f646f172f41766201abdd823 Parents: b828a1e Author: Mithun RK <[email protected]> Authored: Mon Sep 25 15:21:25 2017 -0700 Committer: Mithun Radhakrishnan <[email protected]> Committed: Mon Dec 4 11:42:43 2017 -0800 ---------------------------------------------------------------------- orc/src/java/org/apache/hive/orc/OrcConf.java | 2 ++ orc/src/java/org/apache/hive/orc/OrcFile.java | 1 + orc/src/java/org/apache/hive/orc/impl/OutStream.java | 14 ++++++++++++++ .../org/apache/hive/orc/impl/PhysicalFsWriter.java | 11 ++++++----- .../test/org/apache/hive/orc/impl/TestOutStream.java | 14 ++++++++++++++ 5 files changed, 37 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0013facb/orc/src/java/org/apache/hive/orc/OrcConf.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/hive/orc/OrcConf.java b/orc/src/java/org/apache/hive/orc/OrcConf.java index dc2f865..90df756 100644 --- a/orc/src/java/org/apache/hive/orc/OrcConf.java +++ b/orc/src/java/org/apache/hive/orc/OrcConf.java @@ -51,6 +51,8 @@ public enum OrcConf { "Define the version of the file to write. Possible values are 0.11 and\n"+ " 0.12. If this parameter is not defined, ORC will use the run\n" + " length encoding (RLE) introduced in Hive 0.12."), + ENFORCE_COMPRESSION_BUFFER_SIZE("orc.buffer.size.enforce", "hive.exec.orc.buffer.size.enforce", false, + "Defines whether to enforce ORC compression buffer size."), ENCODING_STRATEGY("orc.encoding.strategy", "hive.exec.orc.encoding.strategy", "SPEED", "Define the encoding strategy to use while writing data. Changing this\n"+ http://git-wip-us.apache.org/repos/asf/hive/blob/0013facb/orc/src/java/org/apache/hive/orc/OrcFile.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/hive/orc/OrcFile.java b/orc/src/java/org/apache/hive/orc/OrcFile.java index 5670a61..5c7188f 100644 --- a/orc/src/java/org/apache/hive/orc/OrcFile.java +++ b/orc/src/java/org/apache/hive/orc/OrcFile.java @@ -276,6 +276,7 @@ public class OrcFile { compressValue = CompressionKind.valueOf(OrcConf.COMPRESS.getString(tableProperties, conf).toUpperCase()); + enforceBufferSize = OrcConf.ENFORCE_COMPRESSION_BUFFER_SIZE.getBoolean(tableProperties, conf); String versionName = OrcConf.WRITE_FORMAT.getString(tableProperties, conf); versionValue = Version.byName(versionName); http://git-wip-us.apache.org/repos/asf/hive/blob/0013facb/orc/src/java/org/apache/hive/orc/impl/OutStream.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/hive/orc/impl/OutStream.java b/orc/src/java/org/apache/hive/orc/impl/OutStream.java index 7157ac5..4c46cb3 100644 --- a/orc/src/java/org/apache/hive/orc/impl/OutStream.java +++ b/orc/src/java/org/apache/hive/orc/impl/OutStream.java @@ -113,6 +113,20 @@ public class OutStream extends PositionedOutputStream { } /** + * Throws exception if the bufferSize argument equals or exceeds 2^(3*8 - 1). + * See {@link OutStream#writeHeader(ByteBuffer, int, int, boolean)}. + * The bufferSize needs to be expressible in 3 bytes, and uses the least significant byte + * to indicate original/compressed bytes. + * @param bufferSize The ORC compression buffer size being checked. + * @throws IllegalArgumentException If bufferSize value exceeds threshold. + */ + static void assertBufferSizeValid(int bufferSize) throws IllegalArgumentException { + if (bufferSize >= (1 << 23)) { + throw new IllegalArgumentException("Illegal value of ORC compression buffer size: " + bufferSize); + } + } + + /** * Allocate a new output buffer if we are compressing. */ private ByteBuffer getNewOutputBuffer() throws IOException { http://git-wip-us.apache.org/repos/asf/hive/blob/0013facb/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java b/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java index 47c33bb..1207b2d 100644 --- a/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java +++ b/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java @@ -88,6 +88,7 @@ public class PhysicalFsWriter implements PhysicalWriter { this.defaultStripeSize = this.adjustedStripeSize = opts.getStripeSize(); this.addBlockPadding = opts.getBlockPadding(); if (opts.isEnforceBufferSize()) { + OutStream.assertBufferSizeValid(opts.getBufferSize()); this.bufferSize = opts.getBufferSize(); } else { this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, opts.getBufferSize()); @@ -253,15 +254,15 @@ public class PhysicalFsWriter implements PhysicalWriter { final int kb256 = 256 * 1024; if (estBufferSize <= kb4) { return kb4; - } else if (estBufferSize > kb4 && estBufferSize <= kb8) { + } else if (estBufferSize <= kb8) { return kb8; - } else if (estBufferSize > kb8 && estBufferSize <= kb16) { + } else if (estBufferSize <= kb16) { return kb16; - } else if (estBufferSize > kb16 && estBufferSize <= kb32) { + } else if (estBufferSize <= kb32) { return kb32; - } else if (estBufferSize > kb32 && estBufferSize <= kb64) { + } else if (estBufferSize <= kb64) { return kb64; - } else if (estBufferSize > kb64 && estBufferSize <= kb128) { + } else if (estBufferSize <= kb128) { return kb128; } else { return kb256; http://git-wip-us.apache.org/repos/asf/hive/blob/0013facb/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java ---------------------------------------------------------------------- diff --git a/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java b/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java index 23c13f4..65948c5 100644 --- a/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java +++ b/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java @@ -25,6 +25,7 @@ import org.mockito.Mockito; import java.nio.ByteBuffer; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; public class TestOutStream { @@ -40,4 +41,17 @@ public class TestOutStream { Mockito.verify(receiver).output(Mockito.any(ByteBuffer.class)); assertEquals(0L, stream.getBufferSize()); } + + @Test + public void testAssertBufferSizeValid() throws Exception { + try { + OutStream.assertBufferSizeValid(1 + (1<<23)); + fail("Invalid buffer-size " + (1 + (1<<23)) + " should have been blocked."); + } + catch (IllegalArgumentException expected) { + // Pass. + } + + OutStream.assertBufferSizeValid((1<<23) - 1); + } }
