pitrou commented on code in PR #49914:
URL: https://github.com/apache/arrow/pull/49914#discussion_r3234997586


##########
cpp/src/parquet/decoder.cc:
##########
@@ -1038,7 +1038,16 @@ void DictDecoderImpl<Type>::SetDict(TypedDecoder<Type>* 
dictionary) {
 
 template <>
 void DictDecoderImpl<BooleanType>::SetDict(TypedDecoder<BooleanType>* 
dictionary) {
-  ParquetException::NYI("Dictionary encoding is not implemented for boolean 
values");
+  dictionary_length_ = static_cast<int32_t>(dictionary->values_left());

Review Comment:
   Why can't we simply reuse `DecodeDict` here?



##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -462,8 +462,31 @@ TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) {
   ASSERT_NO_FATAL_FAILURE(this->Execute(2500, 2));
 }
 
-TEST(TestDictionaryEncoding, CannotDictDecodeBoolean) {
-  ASSERT_THROW(MakeDictDecoder<BooleanType>(nullptr), ParquetException);
+// Decode a dictionary encoded boolean page and ensure decoding works.
+TEST(TestDictionaryEncoding, DictDecodesBoolean) {
+  const uint8_t dict_bytes[] = {0x02};
+  auto dict_plain_decoder = MakeTypedDecoder<BooleanType>(Encoding::PLAIN);
+  dict_plain_decoder->SetData(2, dict_bytes, 1);
+
+  auto decoder = MakeDictDecoder<BooleanType>();
+  decoder->SetDict(dict_plain_decoder.get());
+
+  const uint8_t indices[] = {0x01, 0x03, 0x35};

Review Comment:
   Same here: what do those bytes mean?



##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -462,8 +462,31 @@ TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) {
   ASSERT_NO_FATAL_FAILURE(this->Execute(2500, 2));
 }
 
-TEST(TestDictionaryEncoding, CannotDictDecodeBoolean) {
-  ASSERT_THROW(MakeDictDecoder<BooleanType>(nullptr), ParquetException);
+// Decode a dictionary encoded boolean page and ensure decoding works.
+TEST(TestDictionaryEncoding, DictDecodesBoolean) {
+  const uint8_t dict_bytes[] = {0x02};

Review Comment:
   Can you comment on what those bytes mean?



##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -462,8 +462,31 @@ TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) {
   ASSERT_NO_FATAL_FAILURE(this->Execute(2500, 2));
 }
 
-TEST(TestDictionaryEncoding, CannotDictDecodeBoolean) {
-  ASSERT_THROW(MakeDictDecoder<BooleanType>(nullptr), ParquetException);
+// Decode a dictionary encoded boolean page and ensure decoding works.
+TEST(TestDictionaryEncoding, DictDecodesBoolean) {
+  const uint8_t dict_bytes[] = {0x02};
+  auto dict_plain_decoder = MakeTypedDecoder<BooleanType>(Encoding::PLAIN);
+  dict_plain_decoder->SetData(2, dict_bytes, 1);
+
+  auto decoder = MakeDictDecoder<BooleanType>();
+  decoder->SetDict(dict_plain_decoder.get());
+
+  const uint8_t indices[] = {0x01, 0x03, 0x35};
+  decoder->SetData(8, indices, sizeof(indices));
+
+  bool out_bool[8] = {};
+  ASSERT_EQ(8, decoder->Decode(out_bool, 8));
+  const bool expected[8] = {true, false, true, false, true, true, false, 
false};

Review Comment:
   Instead of hard-coding 8 everywhere, can we make it a constant? e.g. 
`constexpr int kNumValues = 8`



##########
cpp/src/parquet/decoder.cc:
##########
@@ -1038,7 +1038,16 @@ void DictDecoderImpl<Type>::SetDict(TypedDecoder<Type>* 
dictionary) {
 
 template <>
 void DictDecoderImpl<BooleanType>::SetDict(TypedDecoder<BooleanType>* 
dictionary) {
-  ParquetException::NYI("Dictionary encoding is not implemented for boolean 
values");
+  dictionary_length_ = static_cast<int32_t>(dictionary->values_left());
+  PARQUET_THROW_NOT_OK(dictionary_->Resize(
+      static_cast<int64_t>(dictionary_length_) * sizeof(bool), false));
+  if (dictionary->Decode(dictionary_->mutable_data_as<bool>(), 
dictionary_length_) !=
+      dictionary_length_) {

Review Comment:
   Why this test? I see no corresponding check in `DecodeDict`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to