This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new 89ad47f  ORC-444: Fix errors in RLE section in ORC spec and improve 
RLEV2 encoder code.
89ad47f is described below

commit 89ad47fbbee67a1f72557a170f15f1692d72921f
Author: Fang Zheng <[email protected]>
AuthorDate: Mon Dec 3 15:38:22 2018 -0800

    ORC-444: Fix errors in RLE section in ORC spec and improve RLEV2 encoder 
code.
    
    Fixes #345
    
    Signed-off-by: Gang Wu <[email protected]>
---
 c++/src/CMakeLists.txt      |  1 +
 c++/src/RLEV2Util.cc        | 29 +++++++++++++++++++++++++++++
 c++/src/RLEV2Util.hh        | 22 +++-------------------
 site/specification/ORCv0.md |  2 +-
 site/specification/ORCv1.md |  4 ++--
 site/specification/ORCv2.md |  4 ++--
 6 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index 72d408d..235ced8 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -199,6 +199,7 @@ set(SOURCE_FILES
   OrcFile.cc
   Reader.cc
   RLEv1.cc
+  RLEV2Util.cc
   RleDecoderV2.cc
   RleEncoderV2.cc
   RLE.cc
diff --git a/c++/src/RLEV2Util.cc b/c++/src/RLEV2Util.cc
new file mode 100644
index 0000000..53d18a0
--- /dev/null
+++ b/c++/src/RLEV2Util.cc
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with option work for additional information
+ * regarding copyright ownership.  The ASF licenses option file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use option file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RLEV2Util.hh"
+
+namespace orc {
+
+  // Map FBS enum to bit width value.
+  const uint32_t FBSToBitWidthMap[FixedBitSizes::SIZE] = {
+    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 
22, 23, 24,
+    26, 28, 30, 32, 40, 48, 56, 64
+  };
+
+}
diff --git a/c++/src/RLEV2Util.hh b/c++/src/RLEV2Util.hh
index a7bc553..794d5f6 100644
--- a/c++/src/RLEV2Util.hh
+++ b/c++/src/RLEV2Util.hh
@@ -22,26 +22,10 @@
 #include "RLEv2.hh"
 
 namespace orc {
+  extern const uint32_t FBSToBitWidthMap[FixedBitSizes::SIZE];
+
   inline uint32_t decodeBitWidth(uint32_t n) {
-    if (n <= FixedBitSizes::TWENTYFOUR) {
-      return n + 1;
-    } else if (n == FixedBitSizes::TWENTYSIX) {
-      return 26;
-    } else if (n == FixedBitSizes::TWENTYEIGHT) {
-      return 28;
-    } else if (n == FixedBitSizes::THIRTY) {
-      return 30;
-    } else if (n == FixedBitSizes::THIRTYTWO) {
-      return 32;
-    } else if (n == FixedBitSizes::FORTY) {
-      return 40;
-    } else if (n == FixedBitSizes::FORTYEIGHT) {
-      return 48;
-    } else if (n == FixedBitSizes::FIFTYSIX) {
-      return 56;
-    } else {
-      return 64;
-    }
+    return FBSToBitWidthMap[n];
   }
 
   inline uint32_t getClosestFixedBits(uint32_t n) {
diff --git a/site/specification/ORCv0.md b/site/specification/ORCv0.md
index 613298a..336896e 100644
--- a/site/specification/ORCv0.md
+++ b/site/specification/ORCv0.md
@@ -438,7 +438,7 @@ values.
 * Run - a sequence of at least 3 identical values
 * Literals - a sequence of non-identical values
 
-The first byte of each group of values is a header than determines
+The first byte of each group of values is a header that determines
 whether it is a run (value between 0 to 127) or literal list (value
 between -128 to -1). For runs, the control byte is the length of the
 run minus the length of the minimal run (3) and the control byte for
diff --git a/site/specification/ORCv1.md b/site/specification/ORCv1.md
index 7835080..b799adc 100644
--- a/site/specification/ORCv1.md
+++ b/site/specification/ORCv1.md
@@ -444,7 +444,7 @@ values.
 * Run - a sequence of at least 3 identical values
 * Literals - a sequence of non-identical values
 
-The first byte of each group of values is a header than determines
+The first byte of each group of values is a header that determines
 whether it is a run (value between 0 to 127) or literal list (value
 between -128 to -1). For runs, the control byte is the length of the
 run minus the length of the minimal run (3) and the control byte for
@@ -622,7 +622,7 @@ if the series is increasing or decreasing.
   * 9 bits for run length (L) (1 to 512 values)
 * Base value - encoded as (signed or unsigned) varint
 * Delta base - encoded as signed varint
-* Delta values $W * (L - 2)$ bytes - encode each delta after the first
+* Delta values (W * (L - 2)) bytes - encode each delta after the first
   one. If the delta base is positive, the sequence is increasing and if it is
   negative the sequence is decreasing.
 
diff --git a/site/specification/ORCv2.md b/site/specification/ORCv2.md
index 79f930e..eb8b106 100644
--- a/site/specification/ORCv2.md
+++ b/site/specification/ORCv2.md
@@ -463,7 +463,7 @@ values.
 * Run - a sequence of at least 3 identical values
 * Literals - a sequence of non-identical values
 
-The first byte of each group of values is a header than determines
+The first byte of each group of values is a header that determines
 whether it is a run (value between 0 to 127) or literal list (value
 between -128 to -1). For runs, the control byte is the length of the
 run minus the length of the minimal run (3) and the control byte for
@@ -641,7 +641,7 @@ if the series is increasing or decreasing.
   * 9 bits for run length (L) (1 to 512 values)
 * Base value - encoded as (signed or unsigned) varint
 * Delta base - encoded as signed varint
-* Delta values $W * (L - 2)$ bytes - encode each delta after the first
+* Delta values (W * (L - 2)) bytes - encode each delta after the first
   one. If the delta base is positive, the sequence is increasing and if it is
   negative the sequence is decreasing.
 

Reply via email to