This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new c5f60a02fe GH-39864: [C++] DataType::ToString support optionally show
metadata (#39888)
c5f60a02fe is described below
commit c5f60a02fe5c7106960824cb500f553623b7d97e
Author: Xiansen Chen <[email protected]>
AuthorDate: Wed Feb 28 03:22:12 2024 +0800
GH-39864: [C++] DataType::ToString support optionally show metadata (#39888)
### Rationale for this change
Support showing metadata of nested DataType which have child fields.
### What changes are included in this PR?
Add an optional argument "show_metadata" to the ToString() of DataType and
other DataType derived class. And we also add it to TypeHolder::ToString().
### Are these changes tested?
Yes, I add tests for changes.
### Are there any user-facing changes?
No.
Closes: #39864
* Closes: #39864
Lead-authored-by: xiansen.chen <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/gdb_arrow.py | 6 +-
.../arrow/engine/simple_extension_type_internal.h | 5 +-
cpp/src/arrow/extension/fixed_shape_tensor.cc | 4 +-
cpp/src/arrow/extension/fixed_shape_tensor.h | 2 +-
cpp/src/arrow/extension_type.cc | 2 +-
cpp/src/arrow/extension_type.h | 2 +-
cpp/src/arrow/testing/gtest_util.cc | 26 ++----
cpp/src/arrow/type.cc | 89 ++++++++++----------
cpp/src/arrow/type.h | 68 ++++++++--------
cpp/src/arrow/type_test.cc | 95 ++++++++--------------
python/pyarrow/src/arrow/python/extension_type.cc | 2 +-
python/pyarrow/src/arrow/python/extension_type.h | 2 +-
12 files changed, 140 insertions(+), 163 deletions(-)
diff --git a/cpp/gdb_arrow.py b/cpp/gdb_arrow.py
index 6c3af1680b..e6180f2ff0 100644
--- a/cpp/gdb_arrow.py
+++ b/cpp/gdb_arrow.py
@@ -956,10 +956,12 @@ class ExtensionType:
def to_string(self):
"""
- The result of calling ToString().
+ The result of calling ToString(show_metadata=True).
"""
+ # XXX `show_metadata` is an optional argument, but gdb doesn't allow
+ # omitting it.
return StdString(gdb.parse_and_eval(
- f"{for_evaluation(self.val)}.ToString()"))
+ f"{for_evaluation(self.val)}.ToString(true)"))
class Schema:
diff --git a/cpp/src/arrow/engine/simple_extension_type_internal.h
b/cpp/src/arrow/engine/simple_extension_type_internal.h
index c3f0226283..73dbb9f7cb 100644
--- a/cpp/src/arrow/engine/simple_extension_type_internal.h
+++ b/cpp/src/arrow/engine/simple_extension_type_internal.h
@@ -70,8 +70,9 @@ class SimpleExtensionType : public ExtensionType {
std::string extension_name() const override { return
std::string(kExtensionName); }
- std::string ToString() const override { return "extension<" +
this->Serialize() + ">"; }
-
+ std::string ToString(bool show_metadata = false) const override {
+ return "extension<" + this->Serialize() + ">";
+ }
/// \brief A comparator which returns true iff all parameter properties are
equal
struct ExtensionEqualsImpl {
ExtensionEqualsImpl(const Params& l, const Params& r) : left_(l),
right_(r) {
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.cc
b/cpp/src/arrow/extension/fixed_shape_tensor.cc
index 02e0a890e4..1101b08307 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.cc
@@ -108,10 +108,10 @@ bool FixedShapeTensorType::ExtensionEquals(const
ExtensionType& other) const {
permutation_equivalent;
}
-std::string FixedShapeTensorType::ToString() const {
+std::string FixedShapeTensorType::ToString(bool show_metadata) const {
std::stringstream ss;
ss << "extension<" << this->extension_name()
- << "[value_type=" << value_type_->ToString()
+ << "[value_type=" << value_type_->ToString(show_metadata)
<< ", shape=" << ::arrow::internal::PrintVector{shape_, ","};
if (!permutation_.empty()) {
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h
b/cpp/src/arrow/extension/fixed_shape_tensor.h
index 591a7cee32..3fec79b5c2 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.h
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.h
@@ -61,7 +61,7 @@ class ARROW_EXPORT FixedShapeTensorType : public
ExtensionType {
dim_names_(dim_names) {}
std::string extension_name() const override { return
"arrow.fixed_shape_tensor"; }
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
/// Number of dimensions of tensor elements
size_t ndim() const { return shape_.size(); }
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index 1199336763..cf8dda7a85 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -41,7 +41,7 @@ using internal::checked_cast;
DataTypeLayout ExtensionType::layout() const { return storage_type_->layout();
}
-std::string ExtensionType::ToString() const {
+std::string ExtensionType::ToString(bool show_metadata) const {
std::stringstream ss;
ss << "extension<" << this->extension_name() << ">";
return ss.str();
diff --git a/cpp/src/arrow/extension_type.h b/cpp/src/arrow/extension_type.h
index dd004118e8..0fd7216f18 100644
--- a/cpp/src/arrow/extension_type.h
+++ b/cpp/src/arrow/extension_type.h
@@ -50,7 +50,7 @@ class ARROW_EXPORT ExtensionType : public DataType {
DataTypeLayout layout() const override;
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "extension"; }
diff --git a/cpp/src/arrow/testing/gtest_util.cc
b/cpp/src/arrow/testing/gtest_util.cc
index 5ef1820d5b..3786594888 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -232,21 +232,12 @@ void AssertBufferEqual(const Buffer& buffer, const
Buffer& expected) {
ASSERT_TRUE(buffer.Equals(expected));
}
-template <typename T>
-std::string ToStringWithMetadata(const T& t, bool show_metadata) {
- return t.ToString(show_metadata);
-}
-
-std::string ToStringWithMetadata(const DataType& t, bool show_metadata) {
- return t.ToString();
-}
-
template <typename T>
void AssertFingerprintablesEqual(const T& left, const T& right, bool
check_metadata,
const char* types_plural) {
ASSERT_TRUE(left.Equals(right, check_metadata))
- << types_plural << " '" << ToStringWithMetadata(left, check_metadata) <<
"' and '"
- << ToStringWithMetadata(right, check_metadata) << "' should have
compared equal";
+ << types_plural << " '" << left.ToString(check_metadata) << "' and '"
+ << right.ToString(check_metadata) << "' should have compared equal";
auto lfp = left.fingerprint();
auto rfp = right.fingerprint();
// Note: all types tested in this file should implement fingerprinting,
@@ -256,9 +247,8 @@ void AssertFingerprintablesEqual(const T& left, const T&
right, bool check_metad
rfp += right.metadata_fingerprint();
}
ASSERT_EQ(lfp, rfp) << "Fingerprints for " << types_plural << " '"
- << ToStringWithMetadata(left, check_metadata) << "' and
'"
- << ToStringWithMetadata(right, check_metadata)
- << "' should have compared equal";
+ << left.ToString(check_metadata) << "' and '"
+ << right.ToString(check_metadata) << "' should have
compared equal";
}
template <typename T>
@@ -274,8 +264,8 @@ template <typename T>
void AssertFingerprintablesNotEqual(const T& left, const T& right, bool
check_metadata,
const char* types_plural) {
ASSERT_FALSE(left.Equals(right, check_metadata))
- << types_plural << " '" << ToStringWithMetadata(left, check_metadata) <<
"' and '"
- << ToStringWithMetadata(right, check_metadata) << "' should have
compared unequal";
+ << types_plural << " '" << left.ToString(check_metadata) << "' and '"
+ << right.ToString(check_metadata) << "' should have compared unequal";
auto lfp = left.fingerprint();
auto rfp = right.fingerprint();
// Note: all types tested in this file should implement fingerprinting,
@@ -286,8 +276,8 @@ void AssertFingerprintablesNotEqual(const T& left, const T&
right, bool check_me
rfp += right.metadata_fingerprint();
}
ASSERT_NE(lfp, rfp) << "Fingerprints for " << types_plural << " '"
- << ToStringWithMetadata(left, check_metadata) << "'
and '"
- << ToStringWithMetadata(right, check_metadata)
+ << left.ToString(check_metadata) << "' and '"
+ << right.ToString(check_metadata)
<< "' should have compared unequal";
}
}
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 62d2d61598..edf8f04966 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -874,7 +874,7 @@ bool Field::IsCompatibleWith(const std::shared_ptr<Field>&
other) const {
std::string Field::ToString(bool show_metadata) const {
std::stringstream ss;
- ss << name_ << ": " << type_->ToString();
+ ss << name_ << ": " << type_->ToString(show_metadata);
if (!nullable_) {
ss << " not null";
}
@@ -919,14 +919,15 @@ std::ostream& operator<<(std::ostream& os, const
TypeHolder& type) {
// ----------------------------------------------------------------------
// TypeHolder
-std::string TypeHolder::ToString(const std::vector<TypeHolder>& types) {
+std::string TypeHolder::ToString(const std::vector<TypeHolder>& types,
+ bool show_metadata) {
std::stringstream ss;
ss << "(";
for (size_t i = 0; i < types.size(); ++i) {
if (i > 0) {
ss << ", ";
}
- ss << types[i].type->ToString();
+ ss << types[i].type->ToString(show_metadata);
}
ss << ")";
return ss.str();
@@ -984,27 +985,27 @@ BaseBinaryType::~BaseBinaryType() {}
BaseListType::~BaseListType() {}
-std::string ListType::ToString() const {
+std::string ListType::ToString(bool show_metadata) const {
std::stringstream s;
- s << "list<" << value_field()->ToString() << ">";
+ s << "list<" << value_field()->ToString(show_metadata) << ">";
return s.str();
}
-std::string LargeListType::ToString() const {
+std::string LargeListType::ToString(bool show_metadata) const {
std::stringstream s;
- s << "large_list<" << value_field()->ToString() << ">";
+ s << "large_list<" << value_field()->ToString(show_metadata) << ">";
return s.str();
}
-std::string ListViewType::ToString() const {
+std::string ListViewType::ToString(bool show_metadata) const {
std::stringstream s;
- s << "list_view<" << value_field()->ToString() << ">";
+ s << "list_view<" << value_field()->ToString(show_metadata) << ">";
return s.str();
}
-std::string LargeListViewType::ToString() const {
+std::string LargeListViewType::ToString(bool show_metadata) const {
std::stringstream s;
- s << "large_list_view<" << value_field()->ToString() << ">";
+ s << "large_list_view<" << value_field()->ToString(show_metadata) << ">";
return s.str();
}
@@ -1047,7 +1048,7 @@ Result<std::shared_ptr<DataType>>
MapType::Make(std::shared_ptr<Field> value_fie
return std::make_shared<MapType>(std::move(value_field), keys_sorted);
}
-std::string MapType::ToString() const {
+std::string MapType::ToString(bool show_metadata) const {
std::stringstream s;
const auto print_field_name = [](std::ostream& os, const Field& field,
@@ -1058,7 +1059,7 @@ std::string MapType::ToString() const {
};
const auto print_field = [&](std::ostream& os, const Field& field,
const char* std_name) {
- os << field.type()->ToString();
+ os << field.type()->ToString(show_metadata);
print_field_name(os, field, std_name);
};
@@ -1074,23 +1075,24 @@ std::string MapType::ToString() const {
return s.str();
}
-std::string FixedSizeListType::ToString() const {
+std::string FixedSizeListType::ToString(bool show_metadata) const {
std::stringstream s;
- s << "fixed_size_list<" << value_field()->ToString() << ">[" << list_size_
<< "]";
+ s << "fixed_size_list<" << value_field()->ToString(show_metadata) << ">[" <<
list_size_
+ << "]";
return s.str();
}
-std::string BinaryType::ToString() const { return "binary"; }
+std::string BinaryType::ToString(bool show_metadata) const { return "binary"; }
-std::string BinaryViewType::ToString() const { return "binary_view"; }
+std::string BinaryViewType::ToString(bool show_metadata) const { return
"binary_view"; }
-std::string LargeBinaryType::ToString() const { return "large_binary"; }
+std::string LargeBinaryType::ToString(bool show_metadata) const { return
"large_binary"; }
-std::string StringType::ToString() const { return "string"; }
+std::string StringType::ToString(bool show_metadata) const { return "string"; }
-std::string StringViewType::ToString() const { return "string_view"; }
+std::string StringViewType::ToString(bool show_metadata) const { return
"string_view"; }
-std::string LargeStringType::ToString() const { return "large_string"; }
+std::string LargeStringType::ToString(bool show_metadata) const { return
"large_string"; }
int FixedSizeBinaryType::bit_width() const { return CHAR_BIT * byte_width(); }
@@ -1105,7 +1107,7 @@ Result<std::shared_ptr<DataType>>
FixedSizeBinaryType::Make(int32_t byte_width)
return std::make_shared<FixedSizeBinaryType>(byte_width);
}
-std::string FixedSizeBinaryType::ToString() const {
+std::string FixedSizeBinaryType::ToString(bool show_metadata) const {
std::stringstream ss;
ss << "fixed_size_binary[" << byte_width_ << "]";
return ss.str();
@@ -1122,9 +1124,13 @@ Date32Type::Date32Type() : DateType(Type::DATE32) {}
Date64Type::Date64Type() : DateType(Type::DATE64) {}
-std::string Date64Type::ToString() const { return std::string("date64[ms]"); }
+std::string Date64Type::ToString(bool show_metadata) const {
+ return std::string("date64[ms]");
+}
-std::string Date32Type::ToString() const { return std::string("date32[day]"); }
+std::string Date32Type::ToString(bool show_metadata) const {
+ return std::string("date32[day]");
+}
// ----------------------------------------------------------------------
// Time types
@@ -1137,7 +1143,7 @@ Time32Type::Time32Type(TimeUnit::type unit) :
TimeType(Type::TIME32, unit) {
<< "Must be seconds or milliseconds";
}
-std::string Time32Type::ToString() const {
+std::string Time32Type::ToString(bool show_metadata) const {
std::stringstream ss;
ss << "time32[" << this->unit_ << "]";
return ss.str();
@@ -1148,7 +1154,7 @@ Time64Type::Time64Type(TimeUnit::type unit) :
TimeType(Type::TIME64, unit) {
<< "Must be microseconds or nanoseconds";
}
-std::string Time64Type::ToString() const {
+std::string Time64Type::ToString(bool show_metadata) const {
std::stringstream ss;
ss << "time64[" << this->unit_ << "]";
return ss.str();
@@ -1175,7 +1181,7 @@ std::ostream& operator<<(std::ostream& os, TimeUnit::type
unit) {
// ----------------------------------------------------------------------
// Timestamp types
-std::string TimestampType::ToString() const {
+std::string TimestampType::ToString(bool show_metadata) const {
std::stringstream ss;
ss << "timestamp[" << this->unit_;
if (this->timezone_.size() > 0) {
@@ -1186,7 +1192,7 @@ std::string TimestampType::ToString() const {
}
// Duration types
-std::string DurationType::ToString() const {
+std::string DurationType::ToString(bool show_metadata) const {
std::stringstream ss;
ss << "duration[" << this->unit_ << "]";
return ss.str();
@@ -1245,7 +1251,7 @@ uint8_t UnionType::max_type_code() const {
: *std::max_element(type_codes_.begin(), type_codes_.end());
}
-std::string UnionType::ToString() const {
+std::string UnionType::ToString(bool show_metadata) const {
std::stringstream s;
s << name() << "<";
@@ -1254,7 +1260,7 @@ std::string UnionType::ToString() const {
if (i) {
s << ", ";
}
- s << children_[i]->ToString() << "=" << static_cast<int>(type_codes_[i]);
+ s << children_[i]->ToString(show_metadata) << "=" <<
static_cast<int>(type_codes_[i]);
}
s << ">";
return s.str();
@@ -1291,10 +1297,10 @@
RunEndEncodedType::RunEndEncodedType(std::shared_ptr<DataType> run_end_type,
RunEndEncodedType::~RunEndEncodedType() = default;
-std::string RunEndEncodedType::ToString() const {
+std::string RunEndEncodedType::ToString(bool show_metadata) const {
std::stringstream s;
- s << name() << "<run_ends: " << run_end_type()->ToString()
- << ", values: " << value_type()->ToString() << ">";
+ s << name() << "<run_ends: " << run_end_type()->ToString(show_metadata)
+ << ", values: " << value_type()->ToString(show_metadata) << ">";
return s.str();
}
@@ -1350,7 +1356,7 @@ StructType::StructType(const FieldVector& fields)
StructType::~StructType() {}
-std::string StructType::ToString() const {
+std::string StructType::ToString(bool show_metadata) const {
std::stringstream s;
s << "struct<";
for (int i = 0; i < this->num_fields(); ++i) {
@@ -1358,7 +1364,7 @@ std::string StructType::ToString() const {
s << ", ";
}
std::shared_ptr<Field> field = this->field(i);
- s << field->ToString();
+ s << field->ToString(show_metadata);
}
s << ">";
return s.str();
@@ -1523,17 +1529,18 @@ DataTypeLayout DictionaryType::layout() const {
return layout;
}
-std::string DictionaryType::ToString() const {
+std::string DictionaryType::ToString(bool show_metadata) const {
std::stringstream ss;
- ss << this->name() << "<values=" << value_type_->ToString()
- << ", indices=" << index_type_->ToString() << ", ordered=" << ordered_ <<
">";
+ ss << this->name() << "<values=" << value_type_->ToString(show_metadata)
+ << ", indices=" << index_type_->ToString(show_metadata) << ", ordered="
<< ordered_
+ << ">";
return ss.str();
}
// ----------------------------------------------------------------------
// Null type
-std::string NullType::ToString() const { return name(); }
+std::string NullType::ToString(bool show_metadata) const { return name(); }
// ----------------------------------------------------------------------
// FieldPath
@@ -3304,13 +3311,13 @@ std::shared_ptr<DataType> decimal256(int32_t precision,
int32_t scale) {
return std::make_shared<Decimal256Type>(precision, scale);
}
-std::string Decimal128Type::ToString() const {
+std::string Decimal128Type::ToString(bool show_metadata) const {
std::stringstream s;
s << "decimal128(" << precision_ << ", " << scale_ << ")";
return s.str();
}
-std::string Decimal256Type::ToString() const {
+std::string Decimal256Type::ToString(bool show_metadata) const {
std::stringstream s;
s << "decimal256(" << precision_ << ", " << scale_ << ")";
return s.str();
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 5b1331ab66..3f651741d3 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -162,7 +162,7 @@ class ARROW_EXPORT DataType : public
std::enable_shared_from_this<DataType>,
Status Accept(TypeVisitor* visitor) const;
/// \brief A string representation of the type, including any children
- virtual std::string ToString() const = 0;
+ virtual std::string ToString(bool show_metadata = false) const = 0;
/// \brief Return hash value (excluding metadata in child fields)
size_t Hash() const;
@@ -266,11 +266,11 @@ struct ARROW_EXPORT TypeHolder {
bool operator!=(const TypeHolder& other) const { return !(*this == other); }
- std::string ToString() const {
- return this->type ? this->type->ToString() : "<NULLPTR>";
+ std::string ToString(bool show_metadata = false) const {
+ return this->type ? this->type->ToString(show_metadata) : "<NULLPTR>";
}
- static std::string ToString(const std::vector<TypeHolder>&);
+ static std::string ToString(const std::vector<TypeHolder>&, bool
show_metadata = false);
static std::vector<TypeHolder> FromTypes(
const std::vector<std::shared_ptr<DataType>>& types);
@@ -565,7 +565,7 @@ class ARROW_EXPORT CTypeImpl : public BASE {
std::string name() const override { return DERIVED::type_name(); }
- std::string ToString() const override { return this->name(); }
+ std::string ToString(bool show_metadata = false) const override { return
this->name(); }
};
template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
@@ -587,7 +587,7 @@ class ARROW_EXPORT NullType : public DataType {
NullType() : DataType(Type::NA) {}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
DataTypeLayout layout() const override {
return DataTypeLayout({DataTypeLayout::AlwaysNull()});
@@ -769,7 +769,7 @@ class ARROW_EXPORT BinaryType : public BaseBinaryType {
DataTypeLayout::VariableWidth()});
}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "binary"; }
protected:
@@ -866,7 +866,7 @@ class ARROW_EXPORT BinaryViewType : public DataType {
DataTypeLayout::VariableWidth());
}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "binary_view"; }
protected:
@@ -894,7 +894,7 @@ class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
DataTypeLayout::VariableWidth()});
}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "large_binary"; }
protected:
@@ -915,7 +915,7 @@ class ARROW_EXPORT StringType : public BinaryType {
StringType() : BinaryType(Type::STRING) {}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "utf8"; }
protected:
@@ -933,7 +933,7 @@ class ARROW_EXPORT StringViewType : public BinaryViewType {
StringViewType() : BinaryViewType(Type::STRING_VIEW) {}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "utf8_view"; }
protected:
@@ -951,7 +951,7 @@ class ARROW_EXPORT LargeStringType : public LargeBinaryType
{
LargeStringType() : LargeBinaryType(Type::LARGE_STRING) {}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "large_utf8"; }
protected:
@@ -971,7 +971,7 @@ class ARROW_EXPORT FixedSizeBinaryType : public
FixedWidthType, public Parametri
explicit FixedSizeBinaryType(int32_t byte_width, Type::type override_type_id)
: FixedWidthType(override_type_id), byte_width_(byte_width) {}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "fixed_size_binary"; }
DataTypeLayout layout() const override {
@@ -1050,7 +1050,7 @@ class ARROW_EXPORT Decimal128Type : public DecimalType {
/// Decimal128Type constructor that returns an error on invalid input.
static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t
scale);
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "decimal128"; }
static constexpr int32_t kMinPrecision = 1;
@@ -1083,7 +1083,7 @@ class ARROW_EXPORT Decimal256Type : public DecimalType {
/// Decimal256Type constructor that returns an error on invalid input.
static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t
scale);
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "decimal256"; }
static constexpr int32_t kMinPrecision = 1;
@@ -1134,7 +1134,7 @@ class ARROW_EXPORT ListType : public BaseListType {
{DataTypeLayout::Bitmap(),
DataTypeLayout::FixedWidth(sizeof(offset_type))});
}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "list"; }
@@ -1166,7 +1166,7 @@ class ARROW_EXPORT LargeListType : public BaseListType {
{DataTypeLayout::Bitmap(),
DataTypeLayout::FixedWidth(sizeof(offset_type))});
}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "large_list"; }
@@ -1197,7 +1197,7 @@ class ARROW_EXPORT ListViewType : public BaseListType {
DataTypeLayout::FixedWidth(sizeof(offset_type))});
}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "list_view"; }
@@ -1231,7 +1231,7 @@ class ARROW_EXPORT LargeListViewType : public
BaseListType {
DataTypeLayout::FixedWidth(sizeof(offset_type))});
}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "large_list_view"; }
@@ -1273,7 +1273,7 @@ class ARROW_EXPORT MapType : public ListType {
std::shared_ptr<Field> item_field() const { return value_type()->field(1); }
std::shared_ptr<DataType> item_type() const { return item_field()->type(); }
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "map"; }
@@ -1308,7 +1308,7 @@ class ARROW_EXPORT FixedSizeListType : public
BaseListType {
return DataTypeLayout({DataTypeLayout::Bitmap()});
}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "fixed_size_list"; }
@@ -1335,7 +1335,7 @@ class ARROW_EXPORT StructType : public NestedType {
return DataTypeLayout({DataTypeLayout::Bitmap()});
}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "struct"; }
/// Returns null if name not found
@@ -1385,7 +1385,7 @@ class ARROW_EXPORT UnionType : public NestedType {
DataTypeLayout layout() const override;
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
/// The array of logical type ids.
///
@@ -1488,7 +1488,7 @@ class ARROW_EXPORT RunEndEncodedType : public NestedType {
const std::shared_ptr<DataType>& run_end_type() const { return
fields()[0]->type(); }
const std::shared_ptr<DataType>& value_type() const { return
fields()[1]->type(); }
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "run_end_encoded"; }
@@ -1544,7 +1544,7 @@ class ARROW_EXPORT Date32Type : public DateType {
int bit_width() const override { return static_cast<int>(sizeof(c_type) *
CHAR_BIT); }
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "date32"; }
DateUnit unit() const override { return UNIT; }
@@ -1567,7 +1567,7 @@ class ARROW_EXPORT Date64Type : public DateType {
int bit_width() const override { return static_cast<int>(sizeof(c_type) *
CHAR_BIT); }
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "date64"; }
DateUnit unit() const override { return UNIT; }
@@ -1605,7 +1605,7 @@ class ARROW_EXPORT Time32Type : public TimeType {
explicit Time32Type(TimeUnit::type unit = TimeUnit::MILLI);
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "time32"; }
};
@@ -1624,7 +1624,7 @@ class ARROW_EXPORT Time64Type : public TimeType {
explicit Time64Type(TimeUnit::type unit = TimeUnit::NANO);
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "time64"; }
};
@@ -1679,7 +1679,7 @@ class ARROW_EXPORT TimestampType : public TemporalType,
public ParametricType {
explicit TimestampType(TimeUnit::type unit, const std::string& timezone)
: TemporalType(Type::TIMESTAMP), unit_(unit), timezone_(timezone) {}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "timestamp"; }
TimeUnit::type unit() const { return unit_; }
@@ -1723,7 +1723,7 @@ class ARROW_EXPORT MonthIntervalType : public
IntervalType {
MonthIntervalType() : IntervalType(type_id) {}
- std::string ToString() const override { return name(); }
+ std::string ToString(bool show_metadata = false) const override { return
name(); }
std::string name() const override { return "month_interval"; }
};
@@ -1759,7 +1759,7 @@ class ARROW_EXPORT DayTimeIntervalType : public
IntervalType {
int bit_width() const override { return static_cast<int>(sizeof(c_type) *
CHAR_BIT); }
- std::string ToString() const override { return name(); }
+ std::string ToString(bool show_metadata = false) const override { return
name(); }
std::string name() const override { return "day_time_interval"; }
};
@@ -1799,7 +1799,7 @@ class ARROW_EXPORT MonthDayNanoIntervalType : public
IntervalType {
int bit_width() const override { return static_cast<int>(sizeof(c_type) *
CHAR_BIT); }
- std::string ToString() const override { return name(); }
+ std::string ToString(bool show_metadata = false) const override { return
name(); }
std::string name() const override { return "month_day_nano_interval"; }
};
@@ -1823,7 +1823,7 @@ class ARROW_EXPORT DurationType : public TemporalType,
public ParametricType {
explicit DurationType(TimeUnit::type unit = TimeUnit::MILLI)
: TemporalType(Type::DURATION), unit_(unit) {}
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "duration"; }
TimeUnit::type unit() const { return unit_; }
@@ -1857,7 +1857,7 @@ class ARROW_EXPORT DictionaryType : public FixedWidthType
{
const std::shared_ptr<DataType>& index_type,
const std::shared_ptr<DataType>& value_type, bool ordered = false);
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
std::string name() const override { return "dictionary"; }
int bit_width() const override;
diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc
index 22913f77fb..df484a8fc2 100644
--- a/cpp/src/arrow/type_test.cc
+++ b/cpp/src/arrow/type_test.cc
@@ -20,6 +20,7 @@
#include <algorithm>
#include <cctype>
#include <cstdint>
+#include <functional>
#include <memory>
#include <string>
#include <unordered_set>
@@ -1893,9 +1894,13 @@ TEST(TestListViewType, Equals) {
AssertTypeEqual(list_view_type, list_view_type_named);
ASSERT_FALSE(list_view_type.Equals(list_view_type_named,
/*check_metadata=*/true));
+ ASSERT_NE(list_view_type.ToString(), list_view_type_named.ToString());
}
-TEST(TestListType, Metadata) {
+using ListListTypeFactory =
+ std::function<std::shared_ptr<DataType>(std::shared_ptr<Field>)>;
+
+void CheckListListTypeMetadata(ListListTypeFactory list_type_factory) {
auto md1 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
auto md2 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
auto md3 = key_value_metadata({"foo"}, {"foo value"});
@@ -1906,83 +1911,49 @@ TEST(TestListType, Metadata) {
auto f4 = field("item", utf8());
auto f5 = field("item", utf8(), /*nullable =*/false, md1);
- auto t1 = list(f1);
- auto t2 = list(f2);
- auto t3 = list(f3);
- auto t4 = list(f4);
- auto t5 = list(f5);
+ auto t1 = list_type_factory(f1);
+ auto t2 = list_type_factory(f2);
+ auto t3 = list_type_factory(f3);
+ auto t4 = list_type_factory(f4);
+ auto t5 = list_type_factory(f5);
AssertTypeEqual(*t1, *t2);
AssertTypeEqual(*t1, *t2, /*check_metadata =*/false);
+ ASSERT_EQ(t1->ToString(/*show_metadata=*/true),
t2->ToString(/*show_metadata=*/true));
AssertTypeEqual(*t1, *t3);
AssertTypeNotEqual(*t1, *t3, /*check_metadata =*/true);
+ ASSERT_EQ(t1->ToString(/*show_metadata=*/false),
t3->ToString(/*show_metadata=*/false));
+ ASSERT_NE(t1->ToString(/*show_metadata=*/true),
t3->ToString(/*show_metadata=*/true));
AssertTypeEqual(*t1, *t4);
AssertTypeNotEqual(*t1, *t4, /*check_metadata =*/true);
+ ASSERT_EQ(t1->ToString(/*show_metadata=*/false),
t4->ToString(/*show_metadata=*/false));
+ ASSERT_NE(t1->ToString(/*show_metadata=*/true),
t4->ToString(/*show_metadata=*/true));
AssertTypeNotEqual(*t1, *t5);
AssertTypeNotEqual(*t1, *t5, /*check_metadata =*/true);
+ ASSERT_NE(t1->ToString(/*show_metadata=*/false),
t5->ToString(/*show_metadata=*/false));
+ ASSERT_NE(t1->ToString(/*show_metadata=*/true),
t5->ToString(/*show_metadata=*/true));
}
-TEST(TestListViewType, Metadata) {
- auto md1 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
- auto md2 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
- auto md3 = key_value_metadata({"foo"}, {"foo value"});
-
- auto f1 = field("item", utf8(), /*nullable =*/true, md1);
- auto f2 = field("item", utf8(), /*nullable =*/true, md2);
- auto f3 = field("item", utf8(), /*nullable =*/true, md3);
- auto f4 = field("item", utf8());
- auto f5 = field("item", utf8(), /*nullable =*/false, md1);
-
- auto t1 = list_view(f1);
- auto t2 = list_view(f2);
- auto t3 = list_view(f3);
- auto t4 = list_view(f4);
- auto t5 = list_view(f5);
-
- AssertTypeEqual(*t1, *t2);
- AssertTypeEqual(*t1, *t2, /*check_metadata =*/false);
-
- AssertTypeEqual(*t1, *t3);
- AssertTypeNotEqual(*t1, *t3, /*check_metadata =*/true);
+TEST(TestListType, Metadata) {
+ CheckListListTypeMetadata([](std::shared_ptr<Field> field) { return
list(field); });
+}
- AssertTypeEqual(*t1, *t4);
- AssertTypeNotEqual(*t1, *t4, /*check_metadata =*/true);
+TEST(TestLargeListType, Metadata) {
+ CheckListListTypeMetadata(
+ [](std::shared_ptr<Field> field) { return large_list(field); });
+}
- AssertTypeNotEqual(*t1, *t5);
- AssertTypeNotEqual(*t1, *t5, /*check_metadata =*/true);
+TEST(TestListViewType, Metadata) {
+ CheckListListTypeMetadata(
+ [](std::shared_ptr<Field> field) { return list_view(field); });
}
TEST(TestLargeListViewType, Metadata) {
- auto md1 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
- auto md2 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
- auto md3 = key_value_metadata({"foo"}, {"foo value"});
-
- auto f1 = field("item", utf8(), /*nullable =*/true, md1);
- auto f2 = field("item", utf8(), /*nullable =*/true, md2);
- auto f3 = field("item", utf8(), /*nullable =*/true, md3);
- auto f4 = field("item", utf8());
- auto f5 = field("item", utf8(), /*nullable =*/false, md1);
-
- auto t1 = large_list_view(f1);
- auto t2 = large_list_view(f2);
- auto t3 = large_list_view(f3);
- auto t4 = large_list_view(f4);
- auto t5 = large_list_view(f5);
-
- AssertTypeEqual(*t1, *t2);
- AssertTypeEqual(*t1, *t2, /*check_metadata =*/false);
-
- AssertTypeEqual(*t1, *t3);
- AssertTypeNotEqual(*t1, *t3, /*check_metadata =*/true);
-
- AssertTypeEqual(*t1, *t4);
- AssertTypeNotEqual(*t1, *t4, /*check_metadata =*/true);
-
- AssertTypeNotEqual(*t1, *t5);
- AssertTypeNotEqual(*t1, *t5, /*check_metadata =*/true);
+ CheckListListTypeMetadata(
+ [](std::shared_ptr<Field> field) { return large_list_view(field); });
}
TEST(TestNestedType, Equals) {
@@ -2124,6 +2095,12 @@ TEST(TestStructType, TestFieldsDifferOnlyInMetadata) {
AssertTypeEqual(s0, s1);
AssertTypeNotEqual(s0, s1, /* check_metadata = */ true);
+ ASSERT_NE(s0.ToString(), s1.ToString(/*show_metadata=*/true));
+
+ std::string expected = R"(struct<f: string
+-- metadata --
+foo: baz, f: string>)";
+ ASSERT_EQ(s1.ToString(/*show_metadata=*/true), expected);
ASSERT_EQ(s0.fingerprint(), s1.fingerprint());
ASSERT_NE(s0.metadata_fingerprint(), s1.metadata_fingerprint());
diff --git a/python/pyarrow/src/arrow/python/extension_type.cc
b/python/pyarrow/src/arrow/python/extension_type.cc
index 3ccc171c87..be66b4a1c6 100644
--- a/python/pyarrow/src/arrow/python/extension_type.cc
+++ b/python/pyarrow/src/arrow/python/extension_type.cc
@@ -72,7 +72,7 @@ PyObject* DeserializeExtInstance(PyObject* type_class,
static const char* kExtensionName = "arrow.py_extension_type";
-std::string PyExtensionType::ToString() const {
+std::string PyExtensionType::ToString(bool show_metadata) const {
PyAcquireGIL lock;
std::stringstream ss;
diff --git a/python/pyarrow/src/arrow/python/extension_type.h
b/python/pyarrow/src/arrow/python/extension_type.h
index e433d9aca7..e6523824eb 100644
--- a/python/pyarrow/src/arrow/python/extension_type.h
+++ b/python/pyarrow/src/arrow/python/extension_type.h
@@ -33,7 +33,7 @@ class ARROW_PYTHON_EXPORT PyExtensionType : public
ExtensionType {
// Implement extensionType API
std::string extension_name() const override { return extension_name_; }
- std::string ToString() const override;
+ std::string ToString(bool show_metadata = false) const override;
bool ExtensionEquals(const ExtensionType& other) const override;