mapleFU commented on code in PR #13901:
URL: https://github.com/apache/arrow/pull/13901#discussion_r1735954499
##########
cpp/src/parquet/properties.h:
##########
@@ -941,6 +942,15 @@ class PARQUET_EXPORT ArrowReaderProperties {
return coerce_int96_timestamp_unit_;
}
+ /// Enable Parquet supported Arrow Extension Types.
+ ///
+ /// When enabled, Parquet will use supported Arrow ExtensionTypes in mapping
to Arrow
+ /// schema. Currently only arrow::extension::json() extension type is
supported. This
+ /// will be used for utf8 columns whose LogicalType is JSON.
+ void set_arrow_extensions_enabled() { arrow_extensions_enabled_ = true; }
+ void set_arrow_extensions_disabled() { arrow_extensions_enabled_ = false; }
+ bool get_arrow_extensions_enabled() const { return
arrow_extensions_enabled_; }
Review Comment:
I don't know whether get/set is proper...
##########
cpp/src/parquet/properties.h:
##########
@@ -941,6 +942,15 @@ class PARQUET_EXPORT ArrowReaderProperties {
return coerce_int96_timestamp_unit_;
}
+ /// Enable Parquet supported Arrow Extension Types.
+ ///
+ /// When enabled, Parquet will use supported Arrow ExtensionTypes in mapping
to Arrow
+ /// schema. Currently only arrow::extension::json() extension type is
supported. This
Review Comment:
Can we decribe it more specifically? Like arrow extension type and parquet
type mapping?
##########
cpp/src/parquet/arrow/schema.cc:
##########
@@ -427,6 +428,12 @@ Status FieldToNode(const std::string& name, const
std::shared_ptr<Field>& field,
}
case ArrowTypeId::EXTENSION: {
auto ext_type =
std::static_pointer_cast<::arrow::ExtensionType>(field->type());
+ // Built-in JSON extension is handled differently.
+ if (ext_type->extension_name() == std::string("arrow.json")) {
+ type = ParquetType::BYTE_ARRAY;
+ logical_type = LogicalType::JSON();
+ break;
Review Comment:
what does break means? should we return directly?
##########
cpp/src/arrow/array/validate.cc:
##########
@@ -985,10 +985,23 @@ Status ValidateArrayFull(const Array& array) { return
ValidateArrayFull(*array.d
ARROW_EXPORT
Status ValidateUTF8(const ArrayData& data) {
- DCHECK(data.type->id() == Type::STRING || data.type->id() ==
Type::STRING_VIEW ||
- data.type->id() == Type::LARGE_STRING);
- UTF8DataValidator validator{data};
- return VisitTypeInline(*data.type, &validator);
+ if (data.type->id() == Type::EXTENSION) {
+ const auto& storage_type =
+ checked_pointer_cast<ExtensionType>(data.type)->storage_type();
Review Comment:
what about `checked_cast<ExtensionType*>(data.type.get())->storage_type();`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]