emkornfield commented on a change in pull request #10537:
URL: https://github.com/apache/arrow/pull/10537#discussion_r652314337



##########
File path: cpp/src/parquet/column_reader.cc
##########
@@ -861,8 +864,113 @@ class TypedColumnReaderImpl : public 
TypedColumnReader<DType>,
   Type::type type() const override { return this->descr_->physical_type(); }
 
   const ColumnDescriptor* descr() const override { return this->descr_; }
+
+  ExposedEncodingType GetExposedEncoding() override { return 
this->exposed_encoding_; };
+
+  int64_t ReadBatchWithDictionary(int64_t batch_size, int16_t* def_levels,
+                                  int16_t* rep_levels, int32_t* indices,
+                                  int64_t* indices_read, const T** dict,
+                                  int32_t* dict_len) override;
+
+ protected:
+  void SetExposedEncoding(ExposedEncodingType encoding) override {
+    this->exposed_encoding_ = encoding;
+  }
+
+ private:
+  /// \brief Read dictionary indices. Similar to ReadValues but decode data to
+  /// dictionary indices.
+  ///
+  /// Called only by ReadBatchWithDictionary().
+  ///
+  /// \note API EXPERIMENTAL
+  int64_t ReadDictionaryIndices(int64_t indices_to_read, int32_t* indices) {
+    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_);
+    return decoder->DecodeIndices(indices_to_read, indices);
+  }
+
+  /// \brief Get dictionary. The dictionrary should have been set by SetDict().
+  ///
+  /// Called only by ReadBatchWithDictionary() after dictionary is configured.
+  ///
+  /// @param[out] dict The pointer to dictionary values. Dictionary is owned by
+  /// the internal decoder and is destroyed when the reader is destroyed.
+  /// @param[out] dict_len The dictionary length.
+  ///
+  /// \note API EXPERIMENTAL
+  void GetDictionary(const T** dictionary, int32_t* dictionary_length) {
+    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_);
+    decoder->GetDictionary(dictionary, dictionary_length);
+  }
 };
 
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::ReadBatchWithDictionary(
+    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, int32_t* 
indices,
+    int64_t* indices_read, const T** dict, int32_t* dict_len) {
+  // Similar logic as ReadValues to get pages.
+  if (!HasNext()) {
+    *indices_read = 0;
+    if (dict && dict_len) {
+      *dict = nullptr;
+      *dict_len = 0;
+    }
+    return 0;
+  }
+
+  // Verify the current data page is dictionary encoded.
+  if (this->current_encoding_ != Encoding::RLE_DICTIONARY) {
+    throw ParquetException("data page is not dictionary encoded");

Review comment:
       might pay to include the current encoding.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to