[
https://issues.apache.org/jira/browse/ORC-445?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16715651#comment-16715651
]
ASF GitHub Bot commented on ORC-445:
------------------------------------
wgtmac closed pull request #346: ORC-445: [C++] Code improvements in RLEV2Util.
URL: https://github.com/apache/orc/pull/346
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/c++/src/RLEV2Util.cc b/c++/src/RLEV2Util.cc
index 53d18a0bd1..12e2d057cd 100644
--- a/c++/src/RLEV2Util.cc
+++ b/c++/src/RLEV2Util.cc
@@ -21,9 +21,50 @@
namespace orc {
// Map FBS enum to bit width value.
- const uint32_t FBSToBitWidthMap[FixedBitSizes::SIZE] = {
+ const uint8_t FBSToBitWidthMap[FixedBitSizes::SIZE] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24,
26, 28, 30, 32, 40, 48, 56, 64
};
+ // Map bit length i to closest fixed bit width that can contain i bits.
+ const uint8_t ClosestFixedBitsMap[65] = {
+ 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24,
+ 26, 26, 28, 28, 30, 30, 32, 32,
+ 40, 40, 40, 40, 40, 40, 40, 40,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 56, 56, 56, 56, 56, 56, 56, 56,
+ 64, 64, 64, 64, 64, 64, 64, 64
+ };
+
+ // Map bit length i to closest aligned fixed bit width that can contain i
bits.
+ const uint8_t ClosestAlignedFixedBitsMap[65] = {
+ 1, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24,
24, 24, 24, 24, 24,
+ 32, 32, 32, 32, 32, 32, 32, 32,
+ 40, 40, 40, 40, 40, 40, 40, 40,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 56, 56, 56, 56, 56, 56, 56, 56,
+ 64, 64, 64, 64, 64, 64, 64, 64
+ };
+
+ // Map bit width to FBS enum.
+ const uint8_t BitWidthToFBSMap[65] = {
+ FixedBitSizes::ONE, FixedBitSizes::ONE, FixedBitSizes::TWO,
FixedBitSizes::THREE, FixedBitSizes::FOUR,
+ FixedBitSizes::FIVE, FixedBitSizes::SIX, FixedBitSizes::SEVEN,
FixedBitSizes::EIGHT,
+ FixedBitSizes::NINE, FixedBitSizes::TEN, FixedBitSizes::ELEVEN,
FixedBitSizes::TWELVE,
+ FixedBitSizes::THIRTEEN, FixedBitSizes::FOURTEEN,
FixedBitSizes::FIFTEEN, FixedBitSizes::SIXTEEN,
+ FixedBitSizes::SEVENTEEN, FixedBitSizes::EIGHTEEN,
FixedBitSizes::NINETEEN, FixedBitSizes::TWENTY,
+ FixedBitSizes::TWENTYONE, FixedBitSizes::TWENTYTWO,
FixedBitSizes::TWENTYTHREE, FixedBitSizes::TWENTYFOUR,
+ FixedBitSizes::TWENTYSIX, FixedBitSizes::TWENTYSIX,
+ FixedBitSizes::TWENTYEIGHT, FixedBitSizes::TWENTYEIGHT,
+ FixedBitSizes::THIRTY, FixedBitSizes::THIRTY,
+ FixedBitSizes::THIRTYTWO, FixedBitSizes::THIRTYTWO,
+ FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY,
FixedBitSizes::FORTY,
+ FixedBitSizes::FORTY, FixedBitSizes::FORTY, FixedBitSizes::FORTY,
FixedBitSizes::FORTY,
+ FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT,
FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT,
+ FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT,
FixedBitSizes::FORTYEIGHT, FixedBitSizes::FORTYEIGHT,
+ FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX,
FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX,
+ FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX,
FixedBitSizes::FIFTYSIX, FixedBitSizes::FIFTYSIX,
+ FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR,
FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR,
+ FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR,
FixedBitSizes::SIXTYFOUR, FixedBitSizes::SIXTYFOUR
+ };
}
diff --git a/c++/src/RLEV2Util.hh b/c++/src/RLEV2Util.hh
index 794d5f62ab..95a6826eaa 100644
--- a/c++/src/RLEV2Util.hh
+++ b/c++/src/RLEV2Util.hh
@@ -22,83 +22,35 @@
#include "RLEv2.hh"
namespace orc {
- extern const uint32_t FBSToBitWidthMap[FixedBitSizes::SIZE];
+ extern const uint8_t FBSToBitWidthMap[FixedBitSizes::SIZE];
+ extern const uint8_t ClosestFixedBitsMap[65];
+ extern const uint8_t ClosestAlignedFixedBitsMap[65];
+ extern const uint8_t BitWidthToFBSMap[65];
+ // The input n must be less than FixedBitSizes::SIZE.
inline uint32_t decodeBitWidth(uint32_t n) {
return FBSToBitWidthMap[n];
}
inline uint32_t getClosestFixedBits(uint32_t n) {
- if (n == 0) {
- return 1;
- }
-
- if (n >= 1 && n <= 24) {
- return n;
- } else if (n <= 26) {
- return 26;
- } else if (n <= 28) {
- return 28;
- } else if (n <= 30) {
- return 30;
- } else if (n <= 32) {
- return 32;
- } else if (n <= 40) {
- return 40;
- } else if (n <= 48) {
- return 48;
- } else if (n <= 56) {
- return 56;
+ if (n <= 64) {
+ return ClosestFixedBitsMap[n];
} else {
return 64;
}
}
inline uint32_t getClosestAlignedFixedBits(uint32_t n) {
- if (n == 0 || n == 1) {
- return 1;
- } else if (n <= 2) {
- return 2;
- } else if (n <= 4) {
- return 4;
- } else if (n <= 8) {
- return 8;
- } else if (n <= 16) {
- return 16;
- } else if (n <= 24) {
- return 24;
- } else if (n <= 32) {
- return 32;
- } else if (n <= 40) {
- return 40;
- } else if (n <= 48) {
- return 48;
- } else if (n <= 56) {
- return 56;
+ if (n <= 64) {
+ return ClosestAlignedFixedBitsMap[n];
} else {
return 64;
}
}
inline uint32_t encodeBitWidth(uint32_t n) {
- n = getClosestFixedBits(n);
-
- if (n >= 1 && n <= 24) {
- return n - 1;
- } else if (n <= 26) {
- return FixedBitSizes::TWENTYSIX;
- } else if (n <= 28) {
- return FixedBitSizes::TWENTYEIGHT;
- } else if (n <= 30) {
- return FixedBitSizes::THIRTY;
- } else if (n <= 32) {
- return FixedBitSizes::THIRTYTWO;
- } else if (n <= 40) {
- return FixedBitSizes::FORTY;
- } else if (n <= 48) {
- return FixedBitSizes::FORTYEIGHT;
- } else if (n <= 56) {
- return FixedBitSizes::FIFTYSIX;
+ if (n <= 64) {
+ return BitWidthToFBSMap[n];
} else {
return FixedBitSizes::SIXTYFOUR;
}
diff --git a/c++/test/CMakeLists.txt b/c++/test/CMakeLists.txt
index 6e3f3b3b13..db685782c7 100644
--- a/c++/test/CMakeLists.txt
+++ b/c++/test/CMakeLists.txt
@@ -40,6 +40,7 @@ add_executable (orc-test
TestReader.cc
TestRleDecoder.cc
TestRleEncoder.cc
+ TestRLEV2Util.cc
TestStripeIndexStatistics.cc
TestTimestampStatistics.cc
TestTimezone.cc
diff --git a/c++/test/TestRLEV2Util.cc b/c++/test/TestRLEV2Util.cc
new file mode 100644
index 0000000000..2ed179b89c
--- /dev/null
+++ b/c++/test/TestRLEV2Util.cc
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdlib>
+
+#include "RLEV2Util.hh"
+
+#include "wrap/orc-proto-wrapper.hh"
+#include "wrap/gtest-wrapper.h"
+
+namespace orc {
+ TEST(RLEV2Util, decodeBitWidth) {
+ for (uint32_t n = 0; n < FixedBitSizes::SIZE; ++n) {
+ uint32_t result = orc::decodeBitWidth(n);
+ if (n <= FixedBitSizes::TWENTYFOUR) {
+ EXPECT_EQ(n + 1, result);
+ } else if (n == FixedBitSizes::TWENTYSIX) {
+ EXPECT_EQ(26, result);
+ } else if (n == FixedBitSizes::TWENTYEIGHT) {
+ EXPECT_EQ(28, result);
+ } else if (n == FixedBitSizes::THIRTY) {
+ EXPECT_EQ(30, result);
+ } else if (n == FixedBitSizes::THIRTYTWO) {
+ EXPECT_EQ(32, result);
+ } else if (n == FixedBitSizes::FORTY) {
+ EXPECT_EQ(40, result);
+ } else if (n == FixedBitSizes::FORTYEIGHT) {
+ EXPECT_EQ(48, result);
+ } else if (n == FixedBitSizes::FIFTYSIX) {
+ EXPECT_EQ(56, result);
+ } else if (n == FixedBitSizes::SIXTYFOUR) {
+ EXPECT_EQ(64, result);
+ }
+ }
+ }
+
+ TEST(RLEV2Util, getClosestFixedBits) {
+ for (uint32_t n = 0; n < 66; ++n) {
+ uint32_t result = orc::getClosestFixedBits(n);
+ if (n == 0) {
+ EXPECT_EQ(1, result);
+ } else if (n >= 1 && n <= 24) {
+ EXPECT_EQ(n, result);
+ } else if (n <= 26) {
+ EXPECT_EQ(26, result);
+ } else if (n <= 28) {
+ EXPECT_EQ(28, result);
+ } else if (n <= 30) {
+ EXPECT_EQ(30, result);
+ } else if (n <= 32) {
+ EXPECT_EQ(32, result);
+ } else if (n <= 40) {
+ EXPECT_EQ(40, result);
+ } else if (n <= 48) {
+ EXPECT_EQ(48, result);
+ } else if (n <= 56) {
+ EXPECT_EQ(56, result);
+ } else {
+ EXPECT_EQ(64, result);
+ }
+ }
+ }
+
+ TEST(RLEV2Util, getClosestAlignedFixedBits) {
+ for (uint32_t n = 0; n < 66; ++n) {
+ uint32_t result = orc::getClosestAlignedFixedBits(n);
+ if (n == 0 || n == 1) {
+ EXPECT_EQ(1, result);
+ } else if (n <= 2) {
+ EXPECT_EQ(2, result);
+ } else if (n <= 4) {
+ EXPECT_EQ(4, result);
+ } else if (n <= 8) {
+ EXPECT_EQ(8, result);
+ } else if (n <= 16) {
+ EXPECT_EQ(16, result);
+ } else if (n <= 24) {
+ EXPECT_EQ(24, result);
+ } else if (n <= 32) {
+ EXPECT_EQ(32, result);
+ } else if (n <= 40) {
+ EXPECT_EQ(40, result);
+ } else if (n <= 48) {
+ EXPECT_EQ(48, result);
+ } else if (n <= 56) {
+ EXPECT_EQ(56, result);
+ } else {
+ EXPECT_EQ(64, result);
+ }
+ }
+ }
+
+ TEST(RLEV2Util, encodeBitWidth) {
+ for (uint32_t i = 0; i < 65; ++i) {
+ uint32_t result = orc::encodeBitWidth(i);
+ uint32_t n = getClosestFixedBits(i);
+
+ if (n >= 1 && n <= 24) {
+ EXPECT_EQ(n - 1, result);
+ } else if (n <= 26) {
+ EXPECT_EQ(FixedBitSizes::TWENTYSIX, result);
+ } else if (n <= 28) {
+ EXPECT_EQ(FixedBitSizes::TWENTYEIGHT, result);
+ } else if (n <= 30) {
+ EXPECT_EQ(FixedBitSizes::THIRTY, result);
+ } else if (n <= 32) {
+ EXPECT_EQ(FixedBitSizes::THIRTYTWO, result);
+ } else if (n <= 40) {
+ EXPECT_EQ(FixedBitSizes::FORTY, result);
+ } else if (n <= 48) {
+ EXPECT_EQ(FixedBitSizes::FORTYEIGHT, result);
+ } else if (n <= 56) {
+ EXPECT_EQ(FixedBitSizes::FIFTYSIX, result);
+ } else {
+ EXPECT_EQ(FixedBitSizes::SIXTYFOUR, result);
+ }
+ }
+ }
+
+} // namespace orc
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [C++] Code improvements in RLEV2Util
> ------------------------------------
>
> Key: ORC-445
> URL: https://issues.apache.org/jira/browse/ORC-445
> Project: ORC
> Issue Type: Improvement
> Components: C++
> Reporter: Fang Zheng
> Priority: Minor
>
> This is a follow-up of ORC-444. The following functions in RLEV2Util.hh can
> be optimized by replacing the if-else statements with direct array lookup:
> inline uint32_t getClosestFixedBits(uint32_t n);
> inline uint32_t getClosestAlignedFixedBits(uint32_t n);
> inline uint32_t encodeBitWidth(uint32_t n);
> inline uint32_t findClosestNumBits(int64_t value);
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)