This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 873d774996 MINOR: [C++][Parquet] Fix incorrect comments about
dictionary encoding fallback behaviour (#46158)
873d774996 is described below
commit 873d7749968d0366a89858f29b203d5a8a65c63b
Author: Adam Reeve <[email protected]>
AuthorDate: Thu Apr 17 13:28:16 2025 +1200
MINOR: [C++][Parquet] Fix incorrect comments about dictionary encoding
fallback behaviour (#46158)
### Rationale for this change
Prevent confusion of developers reading the Parquet source code or
generated API documentation.
### What changes are included in this PR?
Updates comments to make it clear that when the dictionary page gets too
large, we always fall back to the plain encoding, regardless of the Parquet
format version or specified encoding.
### Are these changes tested?
N/A
### Are there any user-facing changes?
No.
Authored-by: Adam Reeve <[email protected]>
Signed-off-by: Gang Wu <[email protected]>
---
cpp/src/parquet/column_writer.cc | 2 +-
cpp/src/parquet/properties.h | 12 ++++++------
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index ce44aef323..050c7c9bf5 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -1686,7 +1686,7 @@ class TypedColumnWriterImpl : public ColumnWriterImpl,
// Serialize the buffered Dictionary Indices
FlushBufferedDataPages();
fallback_ = true;
- // Only PLAIN encoding is supported for fallback in V1
+ // Only PLAIN encoding is supported for fallback
current_encoder_ = MakeEncoder(ParquetType::type_num, Encoding::PLAIN,
false,
descr_, properties_->memory_pool());
current_value_encoder_ =
dynamic_cast<ValueEncoderType*>(current_encoder_.get());
diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
index 19436b84a3..a68307d37b 100644
--- a/cpp/src/parquet/properties.h
+++ b/cpp/src/parquet/properties.h
@@ -383,8 +383,8 @@ class PARQUET_EXPORT WriterProperties {
/// \brief Define the encoding that is used when we don't utilise
dictionary encoding.
//
- /// This either apply if dictionary encoding is disabled or if we fallback
- /// as the dictionary grew too large.
+ /// This is only applied if dictionary encoding is disabled. If the
dictionary grows
+ /// too large we always fall back to the PLAIN encoding.
Builder* encoding(Encoding::type encoding_type) {
if (encoding_type == Encoding::PLAIN_DICTIONARY ||
encoding_type == Encoding::RLE_DICTIONARY) {
@@ -397,8 +397,8 @@ class PARQUET_EXPORT WriterProperties {
/// \brief Define the encoding that is used when we don't utilise
dictionary encoding.
//
- /// This either apply if dictionary encoding is disabled or if we fallback
- /// as the dictionary grew too large.
+ /// This is only applied if dictionary encoding is disabled. If the
dictionary grows
+ /// too large we always fall back to the PLAIN encoding.
Builder* encoding(const std::string& path, Encoding::type encoding_type) {
if (encoding_type == Encoding::PLAIN_DICTIONARY ||
encoding_type == Encoding::RLE_DICTIONARY) {
@@ -411,8 +411,8 @@ class PARQUET_EXPORT WriterProperties {
/// \brief Define the encoding that is used when we don't utilise
dictionary encoding.
//
- /// This either apply if dictionary encoding is disabled or if we fallback
- /// as the dictionary grew too large.
+ /// This is only applied if dictionary encoding is disabled. If the
dictionary grows
+ /// too large we always fall back to the PLAIN encoding.
Builder* encoding(const std::shared_ptr<schema::ColumnPath>& path,
Encoding::type encoding_type) {
return this->encoding(path->ToDotString(), encoding_type);