felipecrv commented on code in PR #34526:
URL: https://github.com/apache/arrow/pull/34526#discussion_r1138798351


##########
cpp/src/parquet/encoding.cc:
##########
@@ -2838,6 +2839,113 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
   std::shared_ptr<ResizableBuffer> buffered_data_;
 };
 
+// ----------------------------------------------------------------------
+// RLE_BOOLEAN_ENCODER
+
+class RleBooleanEncoder final : public EncoderImpl, virtual public 
BooleanEncoder {
+ public:
+  explicit RleBooleanEncoder(const ColumnDescriptor* descr, 
::arrow::MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::RLE, pool) {}
+
+  int64_t EstimatedDataEncodedSize() override {
+    // FIXME(mwish): should we just use buffered_append_values_.size() / 8
+    //  or just use ::arrow::util::RleEncoder::MaxBufferSize?
+    return kRleLengthInBytes + MaxRleBufferSize();
+  }

Review Comment:
   Do one pass over the data to count the number of runs.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to