This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new 89ad47f ORC-444: Fix errors in RLE section in ORC spec and improve
RLEV2 encoder code.
89ad47f is described below
commit 89ad47fbbee67a1f72557a170f15f1692d72921f
Author: Fang Zheng <[email protected]>
AuthorDate: Mon Dec 3 15:38:22 2018 -0800
ORC-444: Fix errors in RLE section in ORC spec and improve RLEV2 encoder
code.
Fixes #345
Signed-off-by: Gang Wu <[email protected]>
---
c++/src/CMakeLists.txt | 1 +
c++/src/RLEV2Util.cc | 29 +++++++++++++++++++++++++++++
c++/src/RLEV2Util.hh | 22 +++-------------------
site/specification/ORCv0.md | 2 +-
site/specification/ORCv1.md | 4 ++--
site/specification/ORCv2.md | 4 ++--
6 files changed, 38 insertions(+), 24 deletions(-)
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index 72d408d..235ced8 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -199,6 +199,7 @@ set(SOURCE_FILES
OrcFile.cc
Reader.cc
RLEv1.cc
+ RLEV2Util.cc
RleDecoderV2.cc
RleEncoderV2.cc
RLE.cc
diff --git a/c++/src/RLEV2Util.cc b/c++/src/RLEV2Util.cc
new file mode 100644
index 0000000..53d18a0
--- /dev/null
+++ b/c++/src/RLEV2Util.cc
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with option work for additional information
+ * regarding copyright ownership. The ASF licenses option file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use option file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RLEV2Util.hh"
+
+namespace orc {
+
+ // Map FBS enum to bit width value.
+ const uint32_t FBSToBitWidthMap[FixedBitSizes::SIZE] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24,
+ 26, 28, 30, 32, 40, 48, 56, 64
+ };
+
+}
diff --git a/c++/src/RLEV2Util.hh b/c++/src/RLEV2Util.hh
index a7bc553..794d5f6 100644
--- a/c++/src/RLEV2Util.hh
+++ b/c++/src/RLEV2Util.hh
@@ -22,26 +22,10 @@
#include "RLEv2.hh"
namespace orc {
+ extern const uint32_t FBSToBitWidthMap[FixedBitSizes::SIZE];
+
inline uint32_t decodeBitWidth(uint32_t n) {
- if (n <= FixedBitSizes::TWENTYFOUR) {
- return n + 1;
- } else if (n == FixedBitSizes::TWENTYSIX) {
- return 26;
- } else if (n == FixedBitSizes::TWENTYEIGHT) {
- return 28;
- } else if (n == FixedBitSizes::THIRTY) {
- return 30;
- } else if (n == FixedBitSizes::THIRTYTWO) {
- return 32;
- } else if (n == FixedBitSizes::FORTY) {
- return 40;
- } else if (n == FixedBitSizes::FORTYEIGHT) {
- return 48;
- } else if (n == FixedBitSizes::FIFTYSIX) {
- return 56;
- } else {
- return 64;
- }
+ return FBSToBitWidthMap[n];
}
inline uint32_t getClosestFixedBits(uint32_t n) {
diff --git a/site/specification/ORCv0.md b/site/specification/ORCv0.md
index 613298a..336896e 100644
--- a/site/specification/ORCv0.md
+++ b/site/specification/ORCv0.md
@@ -438,7 +438,7 @@ values.
* Run - a sequence of at least 3 identical values
* Literals - a sequence of non-identical values
-The first byte of each group of values is a header than determines
+The first byte of each group of values is a header that determines
whether it is a run (value between 0 to 127) or literal list (value
between -128 to -1). For runs, the control byte is the length of the
run minus the length of the minimal run (3) and the control byte for
diff --git a/site/specification/ORCv1.md b/site/specification/ORCv1.md
index 7835080..b799adc 100644
--- a/site/specification/ORCv1.md
+++ b/site/specification/ORCv1.md
@@ -444,7 +444,7 @@ values.
* Run - a sequence of at least 3 identical values
* Literals - a sequence of non-identical values
-The first byte of each group of values is a header than determines
+The first byte of each group of values is a header that determines
whether it is a run (value between 0 to 127) or literal list (value
between -128 to -1). For runs, the control byte is the length of the
run minus the length of the minimal run (3) and the control byte for
@@ -622,7 +622,7 @@ if the series is increasing or decreasing.
* 9 bits for run length (L) (1 to 512 values)
* Base value - encoded as (signed or unsigned) varint
* Delta base - encoded as signed varint
-* Delta values $W * (L - 2)$ bytes - encode each delta after the first
+* Delta values (W * (L - 2)) bytes - encode each delta after the first
one. If the delta base is positive, the sequence is increasing and if it is
negative the sequence is decreasing.
diff --git a/site/specification/ORCv2.md b/site/specification/ORCv2.md
index 79f930e..eb8b106 100644
--- a/site/specification/ORCv2.md
+++ b/site/specification/ORCv2.md
@@ -463,7 +463,7 @@ values.
* Run - a sequence of at least 3 identical values
* Literals - a sequence of non-identical values
-The first byte of each group of values is a header than determines
+The first byte of each group of values is a header that determines
whether it is a run (value between 0 to 127) or literal list (value
between -128 to -1). For runs, the control byte is the length of the
run minus the length of the minimal run (3) and the control byte for
@@ -641,7 +641,7 @@ if the series is increasing or decreasing.
* 9 bits for run length (L) (1 to 512 values)
* Base value - encoded as (signed or unsigned) varint
* Delta base - encoded as signed varint
-* Delta values $W * (L - 2)$ bytes - encode each delta after the first
+* Delta values (W * (L - 2)) bytes - encode each delta after the first
one. If the delta base is positive, the sequence is increasing and if it is
negative the sequence is decreasing.