mapleFU commented on code in PR #49914:
URL: https://github.com/apache/arrow/pull/49914#discussion_r3213498079


##########
cpp/src/parquet/decoder.cc:
##########
@@ -1038,7 +1038,16 @@ void DictDecoderImpl<Type>::SetDict(TypedDecoder<Type>* 
dictionary) {
 
 template <>
 void DictDecoderImpl<BooleanType>::SetDict(TypedDecoder<BooleanType>* 
dictionary) {
-  ParquetException::NYI("Dictionary encoding is not implemented for boolean 
values");
+  dictionary_length_ = static_cast<int32_t>(dictionary->values_left());
+  PARQUET_THROW_NOT_OK(dictionary_->Resize(
+      static_cast<int64_t>(dictionary_length_) * sizeof(bool), false));
+  if (dictionary->Decode(dictionary_->mutable_data_as<bool>(), 
dictionary_length_) !=
+      dictionary_length_) {
+    throw ParquetException(
+        "Boolean dictionary decode produced fewer values than the dictionary 
header "
+        "declared (expected ",
+        dictionary_length_, ")");

Review Comment:
   should we add got values here?



##########
cpp/src/parquet/decoder.cc:
##########
@@ -1257,6 +1266,31 @@ void 
DictDecoderImpl<ByteArrayType>::InsertDictionary(::arrow::ArrayBuilder* bui
   PARQUET_THROW_NOT_OK(binary_builder->InsertMemoValues(*arr));
 }
 
+// Dictionary decoder for boolean column data. Decodes PLAIN_DICTIONARY
+// and RLE_DICTIONARY pages into the callers byte buffer.
+class DictBooleanDecoderImpl : public DictDecoderImpl<BooleanType>,
+                               virtual public BooleanDecoder {
+ public:
+  using BASE = DictDecoderImpl<BooleanType>;
+  using BASE::BASE;
+  using BASE::Decode;
+
+  int Decode(uint8_t* buffer, int max_values) override {
+    max_values = std::min(max_values, this->num_values_);
+    const auto* dict = dictionary_->data_as<bool>();
+    for (int i = 0; i < max_values; ++i) {
+      int32_t index;
+      if (ARROW_PREDICT_FALSE(!idx_decoder_.Get(&index))) {

Review Comment:
   Why don't call GetBatch and batching sets?



##########
cpp/src/parquet/decoder.cc:
##########
@@ -1257,6 +1266,31 @@ void 
DictDecoderImpl<ByteArrayType>::InsertDictionary(::arrow::ArrayBuilder* bui
   PARQUET_THROW_NOT_OK(binary_builder->InsertMemoValues(*arr));
 }
 
+// Dictionary decoder for boolean column data. Decodes PLAIN_DICTIONARY
+// and RLE_DICTIONARY pages into the callers byte buffer.
+class DictBooleanDecoderImpl : public DictDecoderImpl<BooleanType>,
+                               virtual public BooleanDecoder {
+ public:
+  using BASE = DictDecoderImpl<BooleanType>;
+  using BASE::BASE;
+  using BASE::Decode;
+
+  int Decode(uint8_t* buffer, int max_values) override {

Review Comment:
   What about DecodeArrow?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to