jp0317 commented on code in PR #39153:
URL: https://github.com/apache/arrow/pull/39153#discussion_r1421819799


##########
cpp/src/parquet/column_reader.h:
##########
@@ -368,6 +368,11 @@ class PARQUET_EXPORT RecordReader {
 
   virtual void DebugPrintState() = 0;
 
+  /// \brief Returns the dictionary owned by the current decoder. Throws an
+  /// exception if the current decoder is not for dictionary encoding.
+  /// \param[out] dictionary_length The number of dictionary entries.
+  virtual const uint8_t* ReadDictionary(int32_t* dictionary_length) = 0;

Review Comment:
   changed to void*, thanks!



##########
cpp/src/parquet/column_reader.cc:
##########
@@ -1369,6 +1369,24 @@ class TypedRecordReader : public 
TypedColumnReaderImpl<DType>,
     return bytes_for_values;
   }
 
+  const uint8_t* ReadDictionary(int32_t* dictionary_length) override {
+    if (this->current_decoder_ == nullptr && !this->HasNextInternal()) {
+      dictionary_length = 0;
+      return nullptr;
+    }
+    // Verify the current data page is dictionary encoded.
+    if (this->current_encoding_ != Encoding::RLE_DICTIONARY) {
+      std::stringstream ss;
+      ss << "Data page is not dictionary encoded. Encoding: "
+         << EncodingToString(this->current_encoding_);
+      throw ParquetException(ss.str());
+    }
+    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_);

Review Comment:
   done, thanks!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to