rok commented on code in PR #13901:
URL: https://github.com/apache/arrow/pull/13901#discussion_r1737558825
##########
cpp/src/parquet/arrow/schema.cc:
##########
@@ -984,21 +990,51 @@ Result<bool> ApplyOriginalMetadata(const Field&
origin_field, SchemaField* infer
bool modified = false;
auto& origin_type = origin_field.type();
+ const auto& inferred_type = inferred->field->type();
if (origin_type->id() == ::arrow::Type::EXTENSION) {
const auto& ex_type = checked_cast<const
::arrow::ExtensionType&>(*origin_type);
- auto origin_storage_field = origin_field.WithType(ex_type.storage_type());
+ if (inferred_type->id() != ::arrow::Type::EXTENSION &&
+ ex_type.extension_name() == std::string("arrow.json")) {
+ // Schema mismatch.
+ //
+ // Arrow extensions are DISABLED in Parquet.
+ // origin_type is ::arrow::extension::json()
+ // inferred_type is ::arrow::binary()
+ //
+ // Origin type is restored as Arrow should be considered the source of
truth.
+ DCHECK_EQ(inferred_type->id(), ::arrow::Type::STRING);
Review Comment:
Moved.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]