rok commented on code in PR #14341:
URL: https://github.com/apache/arrow/pull/14341#discussion_r1158125518


##########
cpp/src/parquet/encoding.cc:
##########
@@ -3187,21 +3203,74 @@ void DeltaByteArrayEncoder<DType>::Put(const T* src, 
int num_values) {
 
     auto view = string_view{reinterpret_cast<const char*>(value->ptr), 
value->len};
     uint32_t j = 0;
-    while (j < std::min(value->len, 
static_cast<uint32_t>(last_value_view.length()))) {
+    const uint32_t common_length =
+        std::min(value->len, static_cast<uint32_t>(last_value_view.length()));
+    while (j < common_length) {
       if (last_value_view[j] != view[j]) {
         break;
       }
       j++;
     }
 
+    last_value_view = view;
     prefix_lengths[i] = j;
-    const uint8_t* suffix_ptr = value->ptr + j;
-    const uint32_t suffix_length = static_cast<uint32_t>(value->len - j);
+    const auto suffix_length = static_cast<uint32_t>(value->len - j);
+    const uint8_t* suffix_ptr;
+    if (suffix_length == 0) {
+      suffix_ptr = reinterpret_cast<const uint8_t*>("");
+    } else {
+      suffix_ptr = value->ptr + j;
+    }
+    // Convert suffix to ByteArray so it can be passed to the suffix_encoder_.
+    const ByteArray suffix(suffix_length, suffix_ptr);
+    suffix_encoder_.Put(&suffix, 1);
+  }
+  prefix_length_encoder_.Put(prefix_lengths.data(), num_values);
+  last_value_ = last_value_view;
+}
+
+template <>
+void DeltaByteArrayEncoder<FLBAType>::Put(const FLBA* src, int num_values) {
+  if (num_values == 0) {
+    return;
+  }
+  ArrowPoolVector<int32_t> prefix_lengths(num_values,
+                                          
::arrow::stl::allocator<int32_t>(pool_));
+  std::string_view last_value_view = last_value_;
+  const int32_t len = descr_->type_length();
+
+  if (ARROW_PREDICT_FALSE(len >= static_cast<int32_t>(kMaxByteArraySize))) {
+    throw Status::Invalid("Parquet cannot store strings with size 2GB or 
more");
+  }
+
+  for (int i = 0; i < num_values; i++) {
+    // Convert to ByteArray so we can pass to the suffix_encoder_.
+    const FLBA* value = reinterpret_cast<const FLBA*>(&src[i].ptr);
+
+    auto view = string_view{reinterpret_cast<const char*>(value->ptr),
+                            static_cast<uint32_t>(len)};
+    int32_t j = 0;
+    const int32_t common_length =
+        std::min(len, static_cast<int32_t>(last_value_view.length()));
+    while (j < common_length) {
+      if (last_value_view[j] != view[j]) {
+        break;
+      }
+      j++;
+    }
+
     last_value_view = view;
+    prefix_lengths[i] = j;
+    const auto suffix_length = static_cast<uint32_t>(len - j);
+    const uint8_t* suffix_ptr;
+    if (suffix_length == 0) {
+      suffix_ptr = reinterpret_cast<const uint8_t*>("");

Review Comment:
   Changed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to