mapleFU commented on code in PR #14293: URL: https://github.com/apache/arrow/pull/14293#discussion_r1093455297
########## cpp/src/parquet/encoding.cc: ########## @@ -2572,6 +2605,131 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp // ---------------------------------------------------------------------- // DELTA_LENGTH_BYTE_ARRAY +// ---------------------------------------------------------------------- +// DeltaLengthByteArrayEncoder + +template <typename DType> +class DeltaLengthByteArrayEncoder : public EncoderImpl, + virtual public TypedEncoder<ByteArrayType> { + public: + explicit DeltaLengthByteArrayEncoder(const ColumnDescriptor* descr, MemoryPool* pool) + : EncoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY, + pool = ::arrow::default_memory_pool()), + sink_(pool), + length_encoder_(nullptr, pool), + encoded_size_{0} {} + + std::shared_ptr<Buffer> FlushValues() override; + + int64_t EstimatedDataEncodedSize() override { + return encoded_size_ + length_encoder_.EstimatedDataEncodedSize(); + } + + using TypedEncoder<ByteArrayType>::Put; + + void Put(const ::arrow::Array& values) override; + + void Put(const T* buffer, int num_values) override; + + void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, + int64_t valid_bits_offset) override; + + protected: + template <typename ArrayType> + void PutBinaryArray(const ArrayType& array) { + PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>( + *array.data(), + [&](::std::string_view view) { + if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) { + return Status::Invalid("Parquet cannot store strings with size 2GB or more"); + } + length_encoder_.Put({static_cast<int32_t>(view.length())}, 1); + PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length())); Review Comment: > I'll give it another try and report back. Let's wait for pitrou's idea... Seems modifing same line back and back again is really a torment -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org