paleolimbot commented on code in PR #12817: URL: https://github.com/apache/arrow/pull/12817#discussion_r853357707
########## r/src/table.cpp: ########## @@ -226,20 +222,41 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields, cpp11::writable::list metadata(2); metadata.names() = arrow::r::data::names_metadata; - bool has_metadata = false; + bool has_top_level_metadata = false; // "top level" attributes, only relevant if the first object is not named and a data // frame cpp11::strings names = Rf_getAttrib(lst, R_NamesSymbol); if (names[0] == "" && Rf_inherits(VECTOR_ELT(lst, 0), "data.frame")) { SEXP top_level = metadata[0] = arrow_attributes(VECTOR_ELT(lst, 0), true); if (!Rf_isNull(top_level) && XLENGTH(top_level) > 0) { - has_metadata = true; + has_top_level_metadata = true; } } // recurse to get all columns metadata - metadata[1] = CollectColumnMetadata(lst, num_fields, has_metadata); + cpp11::writable::list metadata_columns = CollectColumnMetadata(lst, num_fields); + + // Remove metadata for ExtensionType columns, because these have their own mechanism for + // preserving R type information + for (R_xlen_t i = 0; i < schema->num_fields(); i++) { + if (schema->field(i)->type()->id() == Type::EXTENSION) { + metadata_columns[i] = R_NilValue; Review Comment: This was a really good catch! It led me down a bit of a rabbit hole but it now works! ``` r # remotes::install_github("apache/arrow/r#12817") library(arrow, warn.conflicts = FALSE) nested <- tibble::tibble(x = vctrs::new_vctr(1:5, class = "custom_vctr")) infer_type(nested) #> StructType #> struct<x: <custom_vctr[0]>> as_arrow_array(nested) #> StructArray #> <struct<x: <custom_vctr[0]>>> #> -- is_valid: all not null #> -- child 0 type: <custom_vctr[0]> #> [ #> 1, #> 2, #> 3, #> 4, #> 5 #> ] ``` <sup>Created on 2022-04-19 by the [reprex package](https://reprex.tidyverse.org) (v2.0.1)</sup> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org