Repository: hive
Updated Branches:
  refs/heads/branch-2.2 b828a1e83 -> 0013facb2


HIVE-17600: Make OrcFile's enforceBufferSize user-settable (Mithun 
Radhakrishnan, reviewed by Owen O'Malley)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0013facb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0013facb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0013facb

Branch: refs/heads/branch-2.2
Commit: 0013facb2e4811f8f646f172f41766201abdd823
Parents: b828a1e
Author: Mithun RK <[email protected]>
Authored: Mon Sep 25 15:21:25 2017 -0700
Committer: Mithun Radhakrishnan <[email protected]>
Committed: Mon Dec 4 11:42:43 2017 -0800

----------------------------------------------------------------------
 orc/src/java/org/apache/hive/orc/OrcConf.java         |  2 ++
 orc/src/java/org/apache/hive/orc/OrcFile.java         |  1 +
 orc/src/java/org/apache/hive/orc/impl/OutStream.java  | 14 ++++++++++++++
 .../org/apache/hive/orc/impl/PhysicalFsWriter.java    | 11 ++++++-----
 .../test/org/apache/hive/orc/impl/TestOutStream.java  | 14 ++++++++++++++
 5 files changed, 37 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0013facb/orc/src/java/org/apache/hive/orc/OrcConf.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/OrcConf.java 
b/orc/src/java/org/apache/hive/orc/OrcConf.java
index dc2f865..90df756 100644
--- a/orc/src/java/org/apache/hive/orc/OrcConf.java
+++ b/orc/src/java/org/apache/hive/orc/OrcConf.java
@@ -51,6 +51,8 @@ public enum OrcConf {
       "Define the version of the file to write. Possible values are 0.11 
and\n"+
           " 0.12. If this parameter is not defined, ORC will use the run\n" +
           " length encoding (RLE) introduced in Hive 0.12."),
+  ENFORCE_COMPRESSION_BUFFER_SIZE("orc.buffer.size.enforce", 
"hive.exec.orc.buffer.size.enforce", false,
+      "Defines whether to enforce ORC compression buffer size."),
   ENCODING_STRATEGY("orc.encoding.strategy", "hive.exec.orc.encoding.strategy",
       "SPEED",
       "Define the encoding strategy to use while writing data. Changing 
this\n"+

http://git-wip-us.apache.org/repos/asf/hive/blob/0013facb/orc/src/java/org/apache/hive/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/OrcFile.java 
b/orc/src/java/org/apache/hive/orc/OrcFile.java
index 5670a61..5c7188f 100644
--- a/orc/src/java/org/apache/hive/orc/OrcFile.java
+++ b/orc/src/java/org/apache/hive/orc/OrcFile.java
@@ -276,6 +276,7 @@ public class OrcFile {
       compressValue =
           CompressionKind.valueOf(OrcConf.COMPRESS.getString(tableProperties,
               conf).toUpperCase());
+      enforceBufferSize = 
OrcConf.ENFORCE_COMPRESSION_BUFFER_SIZE.getBoolean(tableProperties, conf);
       String versionName = OrcConf.WRITE_FORMAT.getString(tableProperties,
           conf);
       versionValue = Version.byName(versionName);

http://git-wip-us.apache.org/repos/asf/hive/blob/0013facb/orc/src/java/org/apache/hive/orc/impl/OutStream.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/OutStream.java 
b/orc/src/java/org/apache/hive/orc/impl/OutStream.java
index 7157ac5..4c46cb3 100644
--- a/orc/src/java/org/apache/hive/orc/impl/OutStream.java
+++ b/orc/src/java/org/apache/hive/orc/impl/OutStream.java
@@ -113,6 +113,20 @@ public class OutStream extends PositionedOutputStream {
   }
 
   /**
+   * Throws exception if the bufferSize argument equals or exceeds 2^(3*8 - 1).
+   * See {@link OutStream#writeHeader(ByteBuffer, int, int, boolean)}.
+   * The bufferSize needs to be expressible in 3 bytes, and uses the least 
significant byte
+   * to indicate original/compressed bytes.
+   * @param bufferSize The ORC compression buffer size being checked.
+   * @throws IllegalArgumentException If bufferSize value exceeds threshold.
+   */
+  static void assertBufferSizeValid(int bufferSize) throws 
IllegalArgumentException {
+    if (bufferSize >= (1 << 23)) {
+      throw new IllegalArgumentException("Illegal value of ORC compression 
buffer size: " + bufferSize);
+    }
+  }
+
+  /**
    * Allocate a new output buffer if we are compressing.
    */
   private ByteBuffer getNewOutputBuffer() throws IOException {

http://git-wip-us.apache.org/repos/asf/hive/blob/0013facb/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java 
b/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java
index 47c33bb..1207b2d 100644
--- a/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java
+++ b/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java
@@ -88,6 +88,7 @@ public class PhysicalFsWriter implements PhysicalWriter {
     this.defaultStripeSize = this.adjustedStripeSize = opts.getStripeSize();
     this.addBlockPadding = opts.getBlockPadding();
     if (opts.isEnforceBufferSize()) {
+      OutStream.assertBufferSizeValid(opts.getBufferSize());
       this.bufferSize = opts.getBufferSize();
     } else {
       this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, 
opts.getBufferSize());
@@ -253,15 +254,15 @@ public class PhysicalFsWriter implements PhysicalWriter {
     final int kb256 = 256 * 1024;
     if (estBufferSize <= kb4) {
       return kb4;
-    } else if (estBufferSize > kb4 && estBufferSize <= kb8) {
+    } else if (estBufferSize <= kb8) {
       return kb8;
-    } else if (estBufferSize > kb8 && estBufferSize <= kb16) {
+    } else if (estBufferSize <= kb16) {
       return kb16;
-    } else if (estBufferSize > kb16 && estBufferSize <= kb32) {
+    } else if (estBufferSize <= kb32) {
       return kb32;
-    } else if (estBufferSize > kb32 && estBufferSize <= kb64) {
+    } else if (estBufferSize <= kb64) {
       return kb64;
-    } else if (estBufferSize > kb64 && estBufferSize <= kb128) {
+    } else if (estBufferSize <= kb128) {
       return kb128;
     } else {
       return kb256;

http://git-wip-us.apache.org/repos/asf/hive/blob/0013facb/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java 
b/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java
index 23c13f4..65948c5 100644
--- a/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java
+++ b/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java
@@ -25,6 +25,7 @@ import org.mockito.Mockito;
 import java.nio.ByteBuffer;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
 
 public class TestOutStream {
 
@@ -40,4 +41,17 @@ public class TestOutStream {
     Mockito.verify(receiver).output(Mockito.any(ByteBuffer.class));
     assertEquals(0L, stream.getBufferSize());
   }
+
+  @Test
+  public void testAssertBufferSizeValid() throws Exception {
+    try {
+      OutStream.assertBufferSizeValid(1 + (1<<23));
+      fail("Invalid buffer-size " + (1 + (1<<23)) + " should have been 
blocked.");
+    }
+    catch (IllegalArgumentException expected) {
+      // Pass.
+    }
+
+    OutStream.assertBufferSizeValid((1<<23) -  1);
+  }
 }

Reply via email to