emkornfield commented on code in PR #48345:
URL: https://github.com/apache/arrow/pull/48345#discussion_r2722306339


##########
cpp/src/parquet/encoder.cc:
##########
@@ -995,6 +999,90 @@ class ByteStreamSplitEncoder<FLBAType> : public 
ByteStreamSplitEncoderBase<FLBAT
   }
 };
 
+// ----------------------------------------------------------------------
+// ALP encoder (Adaptive Lossless floating-Point)
+
+template <typename DType>
+class AlpEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+  using ArrowType = typename EncodingTraits<DType>::ArrowType;
+  using TypedEncoder<DType>::Put;
+
+  explicit AlpEncoder(const ColumnDescriptor* descr,
+                      ::arrow::MemoryPool* pool = 
::arrow::default_memory_pool())
+      : EncoderImpl(descr, Encoding::ALP, pool),
+        sink_{pool} {
+    static_assert(std::is_same<T, float>::value || std::is_same<T, 
double>::value,
+                  "ALP only supports float and double types");
+  }
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    if (sink_.length() == 0) {
+      // Empty buffer case
+      PARQUET_ASSIGN_OR_THROW(auto buf, sink_.Finish());
+      return buf;
+    }
+
+    // Call AlpWrapper::Encode() - it handles sampling, preset selection, and 
compression
+    const size_t decompSize = sink_.length();
+    size_t compSize = 
::arrow::util::alp::AlpWrapper<T>::GetMaxCompressedSize(decompSize);
+
+    PARQUET_ASSIGN_OR_THROW(
+        auto compressed_buffer,
+        ::arrow::AllocateResizableBuffer(compSize, this->memory_pool()));
+
+    ::arrow::util::alp::AlpWrapper<T>::Encode(
+        reinterpret_cast<const T*>(sink_.data()),
+        decompSize,
+        reinterpret_cast<char*>(compressed_buffer->mutable_data()),
+        &compSize);
+
+    PARQUET_THROW_NOT_OK(compressed_buffer->Resize(compSize));
+    sink_.Reset();
+
+    return std::shared_ptr<Buffer>(std::move(compressed_buffer));
+  }
+
+  void Put(const T* buffer, int num_values) override {
+    if (num_values > 0) {
+      PARQUET_THROW_NOT_OK(

Review Comment:
   I think we want to do incremental encoding here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to