Github user majetideepak commented on a diff in the pull request:
https://github.com/apache/orc/pull/273#discussion_r191291625
--- Diff: c++/src/RleEncoderV2.cc ---
@@ -0,0 +1,768 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with option work for additional information
+ * regarding copyright ownership. The ASF licenses option file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use option file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "Compression.hh"
+#include "RLEv2.hh"
+#include "RLEV2Util.hh"
+
+#define MAX_LITERAL_SIZE 512
+#define MAX_SHORT_REPEAT_LENGTH 10
+
+namespace orc {
+
+/**
+ * Compute the bits required to represent pth percentile value
+ * @param data - array
+ * @param p - percentile value (>=0.0 to <=1.0)
+ * @return pth percentile bits
+ */
+uint32_t RleEncoderV2::percentileBits(int64_t* data, size_t offset, size_t
length, double p, bool reuseHist) {
+ if ((p > 1.0) || (p <= 0.0)) {
+ throw InvalidArgument("Invalid p value: " + std::to_string(p));
+ }
+
+ if (!reuseHist) {
+ // histogram that store the encoded bit requirement for each
values.
+ // maximum number of bits that can encoded is 32 (refer
FixedBitSizes)
+ memset(histgram, 0, 32 * sizeof(int32_t));
+ // compute the histogram
+ for(size_t i = offset; i < (offset + length); i++) {
+ uint32_t idx = encodeBitWidth(findClosestNumBits(data[i]));
+ histgram[idx] += 1;
+ }
+ }
+
+ int32_t perLen = static_cast<int32_t>(static_cast<double>(length) *
(1.0 - p));
+
+ // return the bits required by pth percentile length
+ for(int32_t i = HIST_LEN - 1; i >= 0; i--) {
+ perLen -= histgram[i];
+ if (perLen < 0) {
+ return decodeBitWidth(static_cast<uint32_t>(i));
+ }
+ }
+
--- End diff --
extra line
---