mapleFU commented on code in PR #39153:
URL: https://github.com/apache/arrow/pull/39153#discussion_r1428040789
##########
cpp/src/parquet/column_reader.cc:
##########
@@ -1369,6 +1369,26 @@ class TypedRecordReader : public
TypedColumnReaderImpl<DType>,
return bytes_for_values;
}
+ const void* ReadDictionary(int32_t* dictionary_length) override {
+ if (this->current_decoder_ == nullptr && !this->HasNextInternal()) {
+ dictionary_length = 0;
+ return nullptr;
+ }
+ // Verify the current data page is dictionary encoded. The
current_encoding_ should
+ // have been set as RLE_DICTIONARY if the page encoding is RLE_DICTIONARY
or
+ // PLAIN_DICTIONARY.
+ if (this->current_encoding_ != Encoding::RLE_DICTIONARY) {
+ std::stringstream ss;
+ ss << "Data page is not dictionary encoded. Encoding: "
Review Comment:
nit: should we also add `descr_->ToString()` to help debugging?
##########
cpp/src/parquet/column_reader.h:
##########
@@ -368,6 +368,16 @@ class PARQUET_EXPORT RecordReader {
virtual void DebugPrintState() = 0;
+ /// \brief Returns the dictionary owned by the current decoder. Throws an
+ /// exception if the current decoder is not for dictionary encoding. The
caller is
+ /// responsible for casting the returned pointer to proper type depending on
the
+ /// column's physical type. An example:
+ /// ByteArray* dict = reinterpret_cast<const
ByteArray*>(ReadDictionary(&len));
+ /// or:
+ /// float* dict = reinterpret_cast<const float*>(ReadDictionary(&len));
Review Comment:
```suggestion
/// const float* dict = reinterpret_cast<const
float*>(ReadDictionary(&len));
```
##########
cpp/src/parquet/column_reader.h:
##########
@@ -368,6 +368,16 @@ class PARQUET_EXPORT RecordReader {
virtual void DebugPrintState() = 0;
+ /// \brief Returns the dictionary owned by the current decoder. Throws an
+ /// exception if the current decoder is not for dictionary encoding. The
caller is
+ /// responsible for casting the returned pointer to proper type depending on
the
+ /// column's physical type. An example:
+ /// ByteArray* dict = reinterpret_cast<const
ByteArray*>(ReadDictionary(&len));
Review Comment:
```suggestion
/// const ByteArray* dict = reinterpret_cast<const
ByteArray*>(ReadDictionary(&len));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]