mapleFU commented on code in PR #39153:
URL: https://github.com/apache/arrow/pull/39153#discussion_r1428040789


##########
cpp/src/parquet/column_reader.cc:
##########
@@ -1369,6 +1369,26 @@ class TypedRecordReader : public 
TypedColumnReaderImpl<DType>,
     return bytes_for_values;
   }
 
+  const void* ReadDictionary(int32_t* dictionary_length) override {
+    if (this->current_decoder_ == nullptr && !this->HasNextInternal()) {
+      dictionary_length = 0;
+      return nullptr;
+    }
+    // Verify the current data page is dictionary encoded. The 
current_encoding_ should
+    // have been set as RLE_DICTIONARY if the page encoding is RLE_DICTIONARY 
or
+    // PLAIN_DICTIONARY.
+    if (this->current_encoding_ != Encoding::RLE_DICTIONARY) {
+      std::stringstream ss;
+      ss << "Data page is not dictionary encoded. Encoding: "

Review Comment:
   nit: should we also add `descr_->ToString()` to help debugging?



##########
cpp/src/parquet/column_reader.h:
##########
@@ -368,6 +368,16 @@ class PARQUET_EXPORT RecordReader {
 
   virtual void DebugPrintState() = 0;
 
+  /// \brief Returns the dictionary owned by the current decoder. Throws an
+  /// exception if the current decoder is not for dictionary encoding. The 
caller is
+  /// responsible for casting the returned pointer to proper type depending on 
the
+  /// column's physical type. An example:
+  ///   ByteArray* dict = reinterpret_cast<const 
ByteArray*>(ReadDictionary(&len));
+  /// or:
+  ///   float* dict = reinterpret_cast<const float*>(ReadDictionary(&len));

Review Comment:
   ```suggestion
     ///   const float* dict = reinterpret_cast<const 
float*>(ReadDictionary(&len));
   ```



##########
cpp/src/parquet/column_reader.h:
##########
@@ -368,6 +368,16 @@ class PARQUET_EXPORT RecordReader {
 
   virtual void DebugPrintState() = 0;
 
+  /// \brief Returns the dictionary owned by the current decoder. Throws an
+  /// exception if the current decoder is not for dictionary encoding. The 
caller is
+  /// responsible for casting the returned pointer to proper type depending on 
the
+  /// column's physical type. An example:
+  ///   ByteArray* dict = reinterpret_cast<const 
ByteArray*>(ReadDictionary(&len));

Review Comment:
   ```suggestion
     ///   const ByteArray* dict = reinterpret_cast<const 
ByteArray*>(ReadDictionary(&len));
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to