This is an automated email from the ASF dual-hosted git repository. kou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
commit b8591e8598ea931e32a32b916eea690aef1f57c7 Author: Romain Francois <[email protected]> AuthorDate: Mon Oct 8 04:18:38 2018 -0400 ARROW-3355: [R] Support for factors With the slight discomfort that in arrow indices are 0-based and in R factor indices are 1 based, so we need to copy the data in both directions. Author: Romain Francois <[email protected]> Closes #2711 from romainfrancois/feature/3355-factors and squashes the following commits: a59b79514 <Romain Francois> using GetValuesSafely 9207ded6f <Romain Francois> support for dictionaries with indices of types int8, int16, int32. 04431a59e <Romain Francois> handle R <-> arrow time differences. 12013bb45 <Romain Francois> custom DictionaryArrayInt32Indices_to_Vector function b44a1a189 <Romain Francois> custom MakeFactorArray function 6a44a87a2 <Romain Francois> ARROW-3340: support for POSIXct vectors bdc8aa0be <Romain Francois> array supports Date (either from int or double as R is lazy about it). (ARROW-3340) 9c0aedcca <Romain Francois> - static_ptr related things. f6c955d24 <Romain Francois> Test empty arrays and arrays with only nulls 4f52335a7 <Romain Francois> using BitUtil::BytesForBits(n) :heavy_check_mark: 38a3832b9 <Romain Francois> test dictionary column in record batch 007a1cf06 <Romain Francois> Move dictionary to its own file f745a0d9e <Romain Francois> test DictionaryArray<string, int32> -> factor adf6edf83 <Romain Francois> test factor -> DictionaryArray 3f82558c6 <Romain Francois> Convert factor to DictionaryArray 34d8e1186 <Romain Francois> minimal support for arrow::DictionaryType --- r/DESCRIPTION | 13 +- r/NAMESPACE | 2 +- r/R/R6.R | 2 +- r/R/RcppExports.R | 180 ++++++----- r/R/array.R | 18 +- r/R/dictionary.R | 44 +++ r/R/enums.R | 37 ++- r/man/dictionary.Rd | 18 ++ r/src/ChunkedArray.cpp | 9 +- r/src/DataType.cpp | 29 ++ r/src/RcppExports.cpp | 592 +++++++++++++++++++++--------------- r/src/array.cpp | 388 ++++++++++++++++++++--- r/src/arrow_types.h | 32 +- r/tests/testthat/test-Array.R | 146 +++++++++ r/tests/testthat/test-DataType.R | 12 + r/tests/testthat/test-RecordBatch.R | 52 +++- 16 files changed, 1164 insertions(+), 410 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index bf76b72..40253a8 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -12,24 +12,26 @@ Encoding: UTF-8 LazyData: true SystemRequirements: C++11 LinkingTo: - Rcpp (>= 0.12.18) + Rcpp (>= 0.12.18.2) Imports: - Rcpp (>= 0.12.18), + Rcpp (>= 0.12.18.2), rlang, purrr, assertthat, glue, R6, - vctrs, + vctrs (>= 0.0.0.9000), fs, tibble, crayon Remotes: - r-lib/vctrs + r-lib/vctrs, + RcppCore/Rcpp Roxygen: list(markdown = TRUE) RoxygenNote: 6.1.0.9000 Suggests: - testthat + testthat, + lubridate Collate: 'enums.R' 'R6.R' @@ -44,6 +46,7 @@ Collate: 'Struct.R' 'Table.R' 'array.R' + 'dictionary.R' 'memory_pool.R' 'reexports-tibble.R' 'zzz.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index d8dfecd..cf5f226 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand S3method("!=","arrow::Object") -S3method("$","arrow-enum") S3method("==","arrow::Array") S3method("==","arrow::DataType") S3method("==","arrow::Field") @@ -22,6 +21,7 @@ export(chunked_array) export(date32) export(date64) export(decimal) +export(dictionary) export(float16) export(float32) export(float64) diff --git a/r/R/R6.R b/r/R/R6.R index 80bdf8e..734ddc0 100644 --- a/r/R/R6.R +++ b/r/R/R6.R @@ -96,7 +96,7 @@ LIST = `arrow::ListType`$new(self$pointer()), STRUCT = `arrow::StructType`$new(self$pointer()), UNION = stop("Type UNION not implemented yet"), - DICTIONARY = stop("Type DICTIONARY not implemented yet"), + DICTIONARY = `arrow::DictionaryType`$new(self$pointer()), MAP = stop("Type MAP not implemented yet") ) } diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index c70a515..0d0299f 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -1,74 +1,6 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -Array__from_vector <- function(x) { - .Call(`_arrow_Array__from_vector`, x) -} - -Array__as_vector <- function(array) { - .Call(`_arrow_Array__as_vector`, array) -} - -Array__Slice1 <- function(array, offset) { - .Call(`_arrow_Array__Slice1`, array, offset) -} - -Array__Slice2 <- function(array, offset, length) { - .Call(`_arrow_Array__Slice2`, array, offset, length) -} - -Array__IsNull <- function(x, i) { - .Call(`_arrow_Array__IsNull`, x, i) -} - -Array__IsValid <- function(x, i) { - .Call(`_arrow_Array__IsValid`, x, i) -} - -Array__length <- function(x) { - .Call(`_arrow_Array__length`, x) -} - -Array__offset <- function(x) { - .Call(`_arrow_Array__offset`, x) -} - -Array__null_count <- function(x) { - .Call(`_arrow_Array__null_count`, x) -} - -Array__type <- function(x) { - .Call(`_arrow_Array__type`, x) -} - -Array__ToString <- function(x) { - .Call(`_arrow_Array__ToString`, x) -} - -Array__type_id <- function(x) { - .Call(`_arrow_Array__type_id`, x) -} - -Array__Equals <- function(lhs, rhs) { - .Call(`_arrow_Array__Equals`, lhs, rhs) -} - -Array__ApproxEquals <- function(lhs, rhs) { - .Call(`_arrow_Array__ApproxEquals`, lhs, rhs) -} - -Array__data <- function(array) { - .Call(`_arrow_Array__data`, array) -} - -Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx) { - .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx) -} - -Array__Mask <- function(array) { - .Call(`_arrow_Array__Mask`, array) -} - ArrayData__get_type <- function(x) { .Call(`_arrow_ArrayData__get_type`, x) } @@ -305,20 +237,24 @@ Object__pointer_address <- function(obj) { .Call(`_arrow_Object__pointer_address`, obj) } -Field__initialize <- function(name, type, nullable = TRUE) { - .Call(`_arrow_Field__initialize`, name, type, nullable) +DictionaryType__initialize <- function(type, array, ordered) { + .Call(`_arrow_DictionaryType__initialize`, type, array, ordered) } -Field__ToString <- function(type) { - .Call(`_arrow_Field__ToString`, type) +DictionaryType__index_type <- function(type) { + .Call(`_arrow_DictionaryType__index_type`, type) } -Field__name <- function(type) { - .Call(`_arrow_Field__name`, type) +DictionaryType__name <- function(type) { + .Call(`_arrow_DictionaryType__name`, type) } -Field__nullable <- function(type) { - .Call(`_arrow_Field__nullable`, type) +DictionaryType__dictionary <- function(type) { + .Call(`_arrow_DictionaryType__dictionary`, type) +} + +DictionaryType__ordered <- function(type) { + .Call(`_arrow_DictionaryType__ordered`, type) } MemoryPool__default <- function() { @@ -421,3 +357,95 @@ Table__column <- function(table, i) { .Call(`_arrow_Table__column`, table, i) } +Array__from_vector <- function(x) { + .Call(`_arrow_Array__from_vector`, x) +} + +Array__as_vector <- function(array) { + .Call(`_arrow_Array__as_vector`, array) +} + +Array__Slice1 <- function(array, offset) { + .Call(`_arrow_Array__Slice1`, array, offset) +} + +Array__Slice2 <- function(array, offset, length) { + .Call(`_arrow_Array__Slice2`, array, offset, length) +} + +Array__IsNull <- function(x, i) { + .Call(`_arrow_Array__IsNull`, x, i) +} + +Array__IsValid <- function(x, i) { + .Call(`_arrow_Array__IsValid`, x, i) +} + +Array__length <- function(x) { + .Call(`_arrow_Array__length`, x) +} + +Array__offset <- function(x) { + .Call(`_arrow_Array__offset`, x) +} + +Array__null_count <- function(x) { + .Call(`_arrow_Array__null_count`, x) +} + +Array__type <- function(x) { + .Call(`_arrow_Array__type`, x) +} + +Array__ToString <- function(x) { + .Call(`_arrow_Array__ToString`, x) +} + +Array__type_id <- function(x) { + .Call(`_arrow_Array__type_id`, x) +} + +Array__Equals <- function(lhs, rhs) { + .Call(`_arrow_Array__Equals`, lhs, rhs) +} + +Array__ApproxEquals <- function(lhs, rhs) { + .Call(`_arrow_Array__ApproxEquals`, lhs, rhs) +} + +Array__data <- function(array) { + .Call(`_arrow_Array__data`, array) +} + +Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx) { + .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx) +} + +Array__Mask <- function(array) { + .Call(`_arrow_Array__Mask`, array) +} + +DictionaryArray__indices <- function(array) { + .Call(`_arrow_DictionaryArray__indices`, array) +} + +DictionaryArray__dictionary <- function(array) { + .Call(`_arrow_DictionaryArray__dictionary`, array) +} + +Field__initialize <- function(name, type, nullable = TRUE) { + .Call(`_arrow_Field__initialize`, name, type, nullable) +} + +Field__ToString <- function(type) { + .Call(`_arrow_Field__ToString`, type) +} + +Field__name <- function(type) { + .Call(`_arrow_Field__name`, type) +} + +Field__nullable <- function(type) { + .Call(`_arrow_Field__nullable`, type) +} + diff --git a/r/R/array.R b/r/R/array.R index 6e90c7d..7e64daf 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -46,6 +46,14 @@ ) ) +`arrow::Array`$dispatch <- function(xp){ + a <- `arrow::Array`$new(xp) + if(a$type_id() == Type$DICTIONARY){ + a <- `arrow::DictionaryArray`$new(xp) + } + a +} + #' @export `length.arrow::Array` <- function(x) x$length() @@ -58,5 +66,13 @@ #' #' @export array <- function(...){ - `arrow::Array`$new(Array__from_vector(vctrs::vec_c(...))) + `arrow::Array`$dispatch(Array__from_vector(vctrs::vec_c(...))) } + +`arrow::DictionaryArray` <- R6Class("arrow::DictionaryArray", inherit = `arrow::Array`, + public = list( + indices = function() `arrow::Array`$dispatch(DictionaryArray__indices(self)), + dictionary = function() `arrow::Array`$dispatch(DictionaryArray__dictionary(self)) + ) +) + diff --git a/r/R/dictionary.R b/r/R/dictionary.R new file mode 100644 index 0000000..b70e70a --- /dev/null +++ b/r/R/dictionary.R @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' @include R6.R + +`arrow::DictionaryType` <- R6Class("arrow::DictionaryType", + inherit = `arrow::FixedWidthType`, + public = list( + index_type = function() `arrow::DataType`$dispatch(DictionaryType__index_type(self)), + name = function() DictionaryType__name(self), + dictionary = function() `arrow::Array`$new(DictionaryType__dictionary(self)), + ordered = function() DictionaryType__ordered(self) + ) + +) + +#' dictionary type factory +#' +#' @param type indices type, e.g. [int32()] +#' @param values values array, typically an arrow array of strings +#' @param ordered Is this an ordred dictionary +#' +#' @export +dictionary <- function(type, values, ordered = FALSE) { + assert_that( + inherits(type, "arrow::DataType"), + inherits(values, "arrow::Array") + ) + `arrow::DictionaryType`$new(DictionaryType__initialize(type, values, ordered)) +} diff --git a/r/R/enums.R b/r/R/enums.R index 3649d4e..a491d52 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -16,48 +16,47 @@ # under the License. #' @export -`$.arrow-enum` <- function(x, y){ - structure(unclass(x)[[y]], class = class(x)) -} - -#' @export `print.arrow-enum` <- function(x, ...){ NextMethod() } #' @importFrom rlang seq2 quo_name set_names #' @importFrom purrr map_chr -enum <- function(class, ...){ - names <- purrr::map_chr(rlang::quos(...), rlang::quo_name) - names[is.na(names)] <- "NA" - +enum <- function(class, ..., .list = list(...)){ structure( - rlang::set_names(rlang::seq2(0L, length(names)-1), names), + .list, class = c(class, "arrow-enum") ) } #' @rdname DataType #' @export -TimeUnit <- enum("arrow::TimeUnit::type", SECOND, MILLI, MICRO, NANO) +TimeUnit <- enum("arrow::TimeUnit::type", + SECOND = 0L, MILLI = 1L, MICRO = 2L, NANO = 3L +) #' @rdname DataType #' @export -DateUnit <- enum("arrow::DateUnit", DAY, MILLI) +DateUnit <- enum("arrow::DateUnit", DAY = 0L, MILLI = 1L) #' @rdname DataType #' @export Type <- enum("arrow::Type::type", - NA, BOOL, UINT8, INT8, UINT16, INT16, UINT32, INT32, UINT64, INT64, - HALF_FLOAT, FLOAT, DOUBLE, STRING, BINARY, DATE32, DATE64, TIMESTAMP, - INTERVAL, DECIMAL, LIST, STRUCT, UNION, DICTIONARY, MAP + "NA" = 0L, BOOL = 1L, UINT8 = 2L, INT8 = 3L, UINT16 = 4L, INT16 = 5L, + UINT32 = 6L, INT32 = 7L, UINT64 = 8L, INT64 = 9L, + HALF_FLOAT = 10L, FLOAT = 11L, DOUBLE = 12L, STRING = 13L, + BINARY = 14L, FIXED_SIZE_BINARY = 15L, DATE32 = 16L, DATE64 = 17L, TIMESTAMP = 18L, + TIME32 = 19L, TIME64 = 20L, INTERVAL = 21L, DECIMAL = 22L, LIST = 23L, STRUCT = 24L, + UNION = 25L, DICTIONARY = 26L, MAP = 27L ) #' @rdname DataType #' @export StatusCode <- enum("arrow::StatusCode", - OK, OutOfMemory, KeyError, TypeError, Invalid, IOError, - CapacityError, UnknownError, NotImplemented, SerializationError, - PythonError, PlasmaObjectExists, PlasmaObjectNonexistent, PlasmaStoreFull, - PlasmaObjectAlreadySealed + OK = 0L, OutOfMemory = 1L, KeyError = 2L, TypeError = 3L, + Invalid = 4L, IOError = 5L, CapacityError = 6L, + UnknownError = 9L, NotImplemented = 10L, SerializationError = 11L, + PythonError = 12L, RError = 13L, + PlasmaObjectExists = 20L, PlasmaObjectNonexistent = 21L, + PlasmaStoreFull = 22L, PlasmaObjectAlreadySealed = 23L ) diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd new file mode 100644 index 0000000..22d35f6 --- /dev/null +++ b/r/man/dictionary.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dictionary.R +\name{dictionary} +\alias{dictionary} +\title{dictionary type factory} +\usage{ +dictionary(type, values, ordered = FALSE) +} +\arguments{ +\item{type}{indices type, e.g. \code{\link[=int32]{int32()}}} + +\item{values}{values array, typically an arrow array of strings} + +\item{ordered}{Is this an ordred dictionary} +} +\description{ +dictionary type factory +} diff --git a/r/src/ChunkedArray.cpp b/r/src/ChunkedArray.cpp index 59f21f5..aa348d9 100644 --- a/r/src/ChunkedArray.cpp +++ b/r/src/ChunkedArray.cpp @@ -23,7 +23,7 @@ using namespace arrow; template <int RTYPE> inline SEXP simple_ChunkedArray_to_Vector( const std::shared_ptr<arrow::ChunkedArray>& chunked_array) { - using stored_type = typename Rcpp::Vector<RTYPE>::stored_type; + using value_type = typename Rcpp::Vector<RTYPE>::stored_type; Rcpp::Vector<RTYPE> out = no_init(chunked_array->length()); auto p = out.begin(); @@ -34,10 +34,9 @@ inline SEXP simple_ChunkedArray_to_Vector( // copy the data auto q = p; - p = std::copy_n( - reinterpret_cast<const stored_type*>(chunk->data()->buffers[1]->data() + - chunk->offset() * sizeof(stored_type)), - n, p); + auto p_chunk = + arrow::r::GetValuesSafely<value_type>(chunk->data(), 1, chunk->offset()); + p = std::copy_n(p_chunk, n, p); // set NA using the bitmap auto bitmap_data = chunk->null_bitmap(); diff --git a/r/src/DataType.cpp b/r/src/DataType.cpp index 00e12eb..bd0b4b9 100644 --- a/r/src/DataType.cpp +++ b/r/src/DataType.cpp @@ -214,3 +214,32 @@ arrow::TimeUnit::type TimestampType__unit( std::string Object__pointer_address(SEXP obj) { return tfm::format("%p", EXTPTR_PTR(obj)); } + +// [[Rcpp::export]] +std::shared_ptr<arrow::DataType> DictionaryType__initialize( + const std::shared_ptr<arrow::DataType>& type, + const std::shared_ptr<arrow::Array>& array, bool ordered) { + return arrow::dictionary(type, array, ordered); +} + +// [[Rcpp::export]] +std::shared_ptr<arrow::DataType> DictionaryType__index_type( + const std::shared_ptr<arrow::DictionaryType>& type) { + return type->index_type(); +} + +// [[Rcpp::export]] +std::string DictionaryType__name(const std::shared_ptr<arrow::DictionaryType>& type) { + return type->name(); +} + +// [[Rcpp::export]] +std::shared_ptr<arrow::Array> DictionaryType__dictionary( + const std::shared_ptr<arrow::DictionaryType>& type) { + return type->dictionary(); +} + +// [[Rcpp::export]] +bool DictionaryType__ordered(const std::shared_ptr<arrow::DictionaryType>& type) { + return type->ordered(); +} diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index 200071b..dcf005a 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -6,204 +6,6 @@ using namespace Rcpp; -// Array__from_vector -std::shared_ptr<arrow::Array> Array__from_vector(SEXP x); -RcppExport SEXP _arrow_Array__from_vector(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(Array__from_vector(x)); - return rcpp_result_gen; -END_RCPP -} -// Array__as_vector -SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array); -RcppExport SEXP _arrow_Array__as_vector(SEXP arraySEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); - rcpp_result_gen = Rcpp::wrap(Array__as_vector(array)); - return rcpp_result_gen; -END_RCPP -} -// Array__Slice1 -std::shared_ptr<arrow::Array> Array__Slice1(const std::shared_ptr<arrow::Array>& array, int offset); -RcppExport SEXP _arrow_Array__Slice1(SEXP arraySEXP, SEXP offsetSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); - Rcpp::traits::input_parameter< int >::type offset(offsetSEXP); - rcpp_result_gen = Rcpp::wrap(Array__Slice1(array, offset)); - return rcpp_result_gen; -END_RCPP -} -// Array__Slice2 -std::shared_ptr<arrow::Array> Array__Slice2(const std::shared_ptr<arrow::Array>& array, int offset, int length); -RcppExport SEXP _arrow_Array__Slice2(SEXP arraySEXP, SEXP offsetSEXP, SEXP lengthSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); - Rcpp::traits::input_parameter< int >::type offset(offsetSEXP); - Rcpp::traits::input_parameter< int >::type length(lengthSEXP); - rcpp_result_gen = Rcpp::wrap(Array__Slice2(array, offset, length)); - return rcpp_result_gen; -END_RCPP -} -// Array__IsNull -bool Array__IsNull(const std::shared_ptr<arrow::Array>& x, int i); -RcppExport SEXP _arrow_Array__IsNull(SEXP xSEXP, SEXP iSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); - Rcpp::traits::input_parameter< int >::type i(iSEXP); - rcpp_result_gen = Rcpp::wrap(Array__IsNull(x, i)); - return rcpp_result_gen; -END_RCPP -} -// Array__IsValid -bool Array__IsValid(const std::shared_ptr<arrow::Array>& x, int i); -RcppExport SEXP _arrow_Array__IsValid(SEXP xSEXP, SEXP iSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); - Rcpp::traits::input_parameter< int >::type i(iSEXP); - rcpp_result_gen = Rcpp::wrap(Array__IsValid(x, i)); - return rcpp_result_gen; -END_RCPP -} -// Array__length -int Array__length(const std::shared_ptr<arrow::Array>& x); -RcppExport SEXP _arrow_Array__length(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(Array__length(x)); - return rcpp_result_gen; -END_RCPP -} -// Array__offset -int Array__offset(const std::shared_ptr<arrow::Array>& x); -RcppExport SEXP _arrow_Array__offset(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(Array__offset(x)); - return rcpp_result_gen; -END_RCPP -} -// Array__null_count -int Array__null_count(const std::shared_ptr<arrow::Array>& x); -RcppExport SEXP _arrow_Array__null_count(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(Array__null_count(x)); - return rcpp_result_gen; -END_RCPP -} -// Array__type -std::shared_ptr<arrow::DataType> Array__type(const std::shared_ptr<arrow::Array>& x); -RcppExport SEXP _arrow_Array__type(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(Array__type(x)); - return rcpp_result_gen; -END_RCPP -} -// Array__ToString -std::string Array__ToString(const std::shared_ptr<arrow::Array>& x); -RcppExport SEXP _arrow_Array__ToString(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(Array__ToString(x)); - return rcpp_result_gen; -END_RCPP -} -// Array__type_id -arrow::Type::type Array__type_id(const std::shared_ptr<arrow::Array>& x); -RcppExport SEXP _arrow_Array__type_id(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(Array__type_id(x)); - return rcpp_result_gen; -END_RCPP -} -// Array__Equals -bool Array__Equals(const std::shared_ptr<arrow::Array>& lhs, const std::shared_ptr<arrow::Array>& rhs); -RcppExport SEXP _arrow_Array__Equals(SEXP lhsSEXP, SEXP rhsSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type lhs(lhsSEXP); - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type rhs(rhsSEXP); - rcpp_result_gen = Rcpp::wrap(Array__Equals(lhs, rhs)); - return rcpp_result_gen; -END_RCPP -} -// Array__ApproxEquals -bool Array__ApproxEquals(const std::shared_ptr<arrow::Array>& lhs, const std::shared_ptr<arrow::Array>& rhs); -RcppExport SEXP _arrow_Array__ApproxEquals(SEXP lhsSEXP, SEXP rhsSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type lhs(lhsSEXP); - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type rhs(rhsSEXP); - rcpp_result_gen = Rcpp::wrap(Array__ApproxEquals(lhs, rhs)); - return rcpp_result_gen; -END_RCPP -} -// Array__data -std::shared_ptr<arrow::ArrayData> Array__data(const std::shared_ptr<arrow::Array>& array); -RcppExport SEXP _arrow_Array__data(SEXP arraySEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); - rcpp_result_gen = Rcpp::wrap(Array__data(array)); - return rcpp_result_gen; -END_RCPP -} -// Array__RangeEquals -bool Array__RangeEquals(const std::shared_ptr<arrow::Array>& self, const std::shared_ptr<arrow::Array>& other, int start_idx, int end_idx, int other_start_idx); -RcppExport SEXP _arrow_Array__RangeEquals(SEXP selfSEXP, SEXP otherSEXP, SEXP start_idxSEXP, SEXP end_idxSEXP, SEXP other_start_idxSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type self(selfSEXP); - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type other(otherSEXP); - Rcpp::traits::input_parameter< int >::type start_idx(start_idxSEXP); - Rcpp::traits::input_parameter< int >::type end_idx(end_idxSEXP); - Rcpp::traits::input_parameter< int >::type other_start_idx(other_start_idxSEXP); - rcpp_result_gen = Rcpp::wrap(Array__RangeEquals(self, other, start_idx, end_idx, other_start_idx)); - return rcpp_result_gen; -END_RCPP -} -// Array__Mask -LogicalVector Array__Mask(const std::shared_ptr<arrow::Array>& array); -RcppExport SEXP _arrow_Array__Mask(SEXP arraySEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); - rcpp_result_gen = Rcpp::wrap(Array__Mask(array)); - return rcpp_result_gen; -END_RCPP -} // ArrayData__get_type std::shared_ptr<arrow::DataType> ArrayData__get_type(const std::shared_ptr<arrow::ArrayData>& x); RcppExport SEXP _arrow_ArrayData__get_type(SEXP xSEXP) { @@ -844,49 +646,60 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// Field__initialize -std::shared_ptr<arrow::Field> Field__initialize(const std::string& name, const std::shared_ptr<arrow::DataType>& type, bool nullable); -RcppExport SEXP _arrow_Field__initialize(SEXP nameSEXP, SEXP typeSEXP, SEXP nullableSEXP) { +// DictionaryType__initialize +std::shared_ptr<arrow::DataType> DictionaryType__initialize(const std::shared_ptr<arrow::DataType>& type, const std::shared_ptr<arrow::Array>& array, bool ordered); +RcppExport SEXP _arrow_DictionaryType__initialize(SEXP typeSEXP, SEXP arraySEXP, SEXP orderedSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::string& >::type name(nameSEXP); Rcpp::traits::input_parameter< const std::shared_ptr<arrow::DataType>& >::type type(typeSEXP); - Rcpp::traits::input_parameter< bool >::type nullable(nullableSEXP); - rcpp_result_gen = Rcpp::wrap(Field__initialize(name, type, nullable)); + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); + Rcpp::traits::input_parameter< bool >::type ordered(orderedSEXP); + rcpp_result_gen = Rcpp::wrap(DictionaryType__initialize(type, array, ordered)); return rcpp_result_gen; END_RCPP } -// Field__ToString -std::string Field__ToString(const std::shared_ptr<arrow::Field>& type); -RcppExport SEXP _arrow_Field__ToString(SEXP typeSEXP) { +// DictionaryType__index_type +std::shared_ptr<arrow::DataType> DictionaryType__index_type(const std::shared_ptr<arrow::DictionaryType>& type); +RcppExport SEXP _arrow_DictionaryType__index_type(SEXP typeSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Field>& >::type type(typeSEXP); - rcpp_result_gen = Rcpp::wrap(Field__ToString(type)); + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::DictionaryType>& >::type type(typeSEXP); + rcpp_result_gen = Rcpp::wrap(DictionaryType__index_type(type)); return rcpp_result_gen; END_RCPP } -// Field__name -std::string Field__name(std::shared_ptr<arrow::Field> type); -RcppExport SEXP _arrow_Field__name(SEXP typeSEXP) { +// DictionaryType__name +std::string DictionaryType__name(const std::shared_ptr<arrow::DictionaryType>& type); +RcppExport SEXP _arrow_DictionaryType__name(SEXP typeSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< std::shared_ptr<arrow::Field> >::type type(typeSEXP); - rcpp_result_gen = Rcpp::wrap(Field__name(type)); + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::DictionaryType>& >::type type(typeSEXP); + rcpp_result_gen = Rcpp::wrap(DictionaryType__name(type)); return rcpp_result_gen; END_RCPP } -// Field__nullable -bool Field__nullable(std::shared_ptr<arrow::Field> type); -RcppExport SEXP _arrow_Field__nullable(SEXP typeSEXP) { +// DictionaryType__dictionary +std::shared_ptr<arrow::Array> DictionaryType__dictionary(const std::shared_ptr<arrow::DictionaryType>& type); +RcppExport SEXP _arrow_DictionaryType__dictionary(SEXP typeSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< std::shared_ptr<arrow::Field> >::type type(typeSEXP); - rcpp_result_gen = Rcpp::wrap(Field__nullable(type)); + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::DictionaryType>& >::type type(typeSEXP); + rcpp_result_gen = Rcpp::wrap(DictionaryType__dictionary(type)); + return rcpp_result_gen; +END_RCPP +} +// DictionaryType__ordered +bool DictionaryType__ordered(const std::shared_ptr<arrow::DictionaryType>& type); +RcppExport SEXP _arrow_DictionaryType__ordered(SEXP typeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::DictionaryType>& >::type type(typeSEXP); + rcpp_result_gen = Rcpp::wrap(DictionaryType__ordered(type)); return rcpp_result_gen; END_RCPP } @@ -1174,36 +987,285 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } - -static const R_CallMethodDef CallEntries[] = { - {"_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 1}, - {"_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, - {"_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, - {"_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, - {"_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, - {"_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2}, - {"_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1}, - {"_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1}, - {"_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1}, - {"_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1}, - {"_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1}, - {"_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1}, - {"_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2}, - {"_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2}, - {"_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1}, - {"_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5}, - {"_arrow_Array__Mask", (DL_FUNC) &_arrow_Array__Mask, 1}, - {"_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, - {"_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, - {"_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, - {"_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset, 1}, - {"_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1}, - {"_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1}, - {"_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1}, - {"_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2}, - {"_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1}, - {"_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1}, - {"_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, +// Array__from_vector +std::shared_ptr<arrow::Array> Array__from_vector(SEXP x); +RcppExport SEXP _arrow_Array__from_vector(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(Array__from_vector(x)); + return rcpp_result_gen; +END_RCPP +} +// Array__as_vector +SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array); +RcppExport SEXP _arrow_Array__as_vector(SEXP arraySEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); + rcpp_result_gen = Rcpp::wrap(Array__as_vector(array)); + return rcpp_result_gen; +END_RCPP +} +// Array__Slice1 +std::shared_ptr<arrow::Array> Array__Slice1(const std::shared_ptr<arrow::Array>& array, int offset); +RcppExport SEXP _arrow_Array__Slice1(SEXP arraySEXP, SEXP offsetSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); + Rcpp::traits::input_parameter< int >::type offset(offsetSEXP); + rcpp_result_gen = Rcpp::wrap(Array__Slice1(array, offset)); + return rcpp_result_gen; +END_RCPP +} +// Array__Slice2 +std::shared_ptr<arrow::Array> Array__Slice2(const std::shared_ptr<arrow::Array>& array, int offset, int length); +RcppExport SEXP _arrow_Array__Slice2(SEXP arraySEXP, SEXP offsetSEXP, SEXP lengthSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); + Rcpp::traits::input_parameter< int >::type offset(offsetSEXP); + Rcpp::traits::input_parameter< int >::type length(lengthSEXP); + rcpp_result_gen = Rcpp::wrap(Array__Slice2(array, offset, length)); + return rcpp_result_gen; +END_RCPP +} +// Array__IsNull +bool Array__IsNull(const std::shared_ptr<arrow::Array>& x, int i); +RcppExport SEXP _arrow_Array__IsNull(SEXP xSEXP, SEXP iSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); + Rcpp::traits::input_parameter< int >::type i(iSEXP); + rcpp_result_gen = Rcpp::wrap(Array__IsNull(x, i)); + return rcpp_result_gen; +END_RCPP +} +// Array__IsValid +bool Array__IsValid(const std::shared_ptr<arrow::Array>& x, int i); +RcppExport SEXP _arrow_Array__IsValid(SEXP xSEXP, SEXP iSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); + Rcpp::traits::input_parameter< int >::type i(iSEXP); + rcpp_result_gen = Rcpp::wrap(Array__IsValid(x, i)); + return rcpp_result_gen; +END_RCPP +} +// Array__length +int Array__length(const std::shared_ptr<arrow::Array>& x); +RcppExport SEXP _arrow_Array__length(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(Array__length(x)); + return rcpp_result_gen; +END_RCPP +} +// Array__offset +int Array__offset(const std::shared_ptr<arrow::Array>& x); +RcppExport SEXP _arrow_Array__offset(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(Array__offset(x)); + return rcpp_result_gen; +END_RCPP +} +// Array__null_count +int Array__null_count(const std::shared_ptr<arrow::Array>& x); +RcppExport SEXP _arrow_Array__null_count(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(Array__null_count(x)); + return rcpp_result_gen; +END_RCPP +} +// Array__type +std::shared_ptr<arrow::DataType> Array__type(const std::shared_ptr<arrow::Array>& x); +RcppExport SEXP _arrow_Array__type(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(Array__type(x)); + return rcpp_result_gen; +END_RCPP +} +// Array__ToString +std::string Array__ToString(const std::shared_ptr<arrow::Array>& x); +RcppExport SEXP _arrow_Array__ToString(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(Array__ToString(x)); + return rcpp_result_gen; +END_RCPP +} +// Array__type_id +arrow::Type::type Array__type_id(const std::shared_ptr<arrow::Array>& x); +RcppExport SEXP _arrow_Array__type_id(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(Array__type_id(x)); + return rcpp_result_gen; +END_RCPP +} +// Array__Equals +bool Array__Equals(const std::shared_ptr<arrow::Array>& lhs, const std::shared_ptr<arrow::Array>& rhs); +RcppExport SEXP _arrow_Array__Equals(SEXP lhsSEXP, SEXP rhsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type lhs(lhsSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type rhs(rhsSEXP); + rcpp_result_gen = Rcpp::wrap(Array__Equals(lhs, rhs)); + return rcpp_result_gen; +END_RCPP +} +// Array__ApproxEquals +bool Array__ApproxEquals(const std::shared_ptr<arrow::Array>& lhs, const std::shared_ptr<arrow::Array>& rhs); +RcppExport SEXP _arrow_Array__ApproxEquals(SEXP lhsSEXP, SEXP rhsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type lhs(lhsSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type rhs(rhsSEXP); + rcpp_result_gen = Rcpp::wrap(Array__ApproxEquals(lhs, rhs)); + return rcpp_result_gen; +END_RCPP +} +// Array__data +std::shared_ptr<arrow::ArrayData> Array__data(const std::shared_ptr<arrow::Array>& array); +RcppExport SEXP _arrow_Array__data(SEXP arraySEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); + rcpp_result_gen = Rcpp::wrap(Array__data(array)); + return rcpp_result_gen; +END_RCPP +} +// Array__RangeEquals +bool Array__RangeEquals(const std::shared_ptr<arrow::Array>& self, const std::shared_ptr<arrow::Array>& other, int start_idx, int end_idx, int other_start_idx); +RcppExport SEXP _arrow_Array__RangeEquals(SEXP selfSEXP, SEXP otherSEXP, SEXP start_idxSEXP, SEXP end_idxSEXP, SEXP other_start_idxSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type self(selfSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type other(otherSEXP); + Rcpp::traits::input_parameter< int >::type start_idx(start_idxSEXP); + Rcpp::traits::input_parameter< int >::type end_idx(end_idxSEXP); + Rcpp::traits::input_parameter< int >::type other_start_idx(other_start_idxSEXP); + rcpp_result_gen = Rcpp::wrap(Array__RangeEquals(self, other, start_idx, end_idx, other_start_idx)); + return rcpp_result_gen; +END_RCPP +} +// Array__Mask +LogicalVector Array__Mask(const std::shared_ptr<arrow::Array>& array); +RcppExport SEXP _arrow_Array__Mask(SEXP arraySEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>& >::type array(arraySEXP); + rcpp_result_gen = Rcpp::wrap(Array__Mask(array)); + return rcpp_result_gen; +END_RCPP +} +// DictionaryArray__indices +std::shared_ptr<arrow::Array> DictionaryArray__indices(const std::shared_ptr<arrow::DictionaryArray>& array); +RcppExport SEXP _arrow_DictionaryArray__indices(SEXP arraySEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::DictionaryArray>& >::type array(arraySEXP); + rcpp_result_gen = Rcpp::wrap(DictionaryArray__indices(array)); + return rcpp_result_gen; +END_RCPP +} +// DictionaryArray__dictionary +std::shared_ptr<arrow::Array> DictionaryArray__dictionary(const std::shared_ptr<arrow::DictionaryArray>& array); +RcppExport SEXP _arrow_DictionaryArray__dictionary(SEXP arraySEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::DictionaryArray>& >::type array(arraySEXP); + rcpp_result_gen = Rcpp::wrap(DictionaryArray__dictionary(array)); + return rcpp_result_gen; +END_RCPP +} +// Field__initialize +std::shared_ptr<arrow::Field> Field__initialize(const std::string& name, const std::shared_ptr<arrow::DataType>& type, bool nullable); +RcppExport SEXP _arrow_Field__initialize(SEXP nameSEXP, SEXP typeSEXP, SEXP nullableSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::string& >::type name(nameSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::DataType>& >::type type(typeSEXP); + Rcpp::traits::input_parameter< bool >::type nullable(nullableSEXP); + rcpp_result_gen = Rcpp::wrap(Field__initialize(name, type, nullable)); + return rcpp_result_gen; +END_RCPP +} +// Field__ToString +std::string Field__ToString(const std::shared_ptr<arrow::Field>& type); +RcppExport SEXP _arrow_Field__ToString(SEXP typeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Field>& >::type type(typeSEXP); + rcpp_result_gen = Rcpp::wrap(Field__ToString(type)); + return rcpp_result_gen; +END_RCPP +} +// Field__name +std::string Field__name(std::shared_ptr<arrow::Field> type); +RcppExport SEXP _arrow_Field__name(SEXP typeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::shared_ptr<arrow::Field> >::type type(typeSEXP); + rcpp_result_gen = Rcpp::wrap(Field__name(type)); + return rcpp_result_gen; +END_RCPP +} +// Field__nullable +bool Field__nullable(std::shared_ptr<arrow::Field> type); +RcppExport SEXP _arrow_Field__nullable(SEXP typeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::shared_ptr<arrow::Field> >::type type(typeSEXP); + rcpp_result_gen = Rcpp::wrap(Field__nullable(type)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, + {"_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, + {"_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, + {"_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset, 1}, + {"_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1}, + {"_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1}, + {"_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1}, + {"_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2}, + {"_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1}, + {"_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1}, + {"_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, {"_arrow_ChunkArray__Slice1", (DL_FUNC) &_arrow_ChunkArray__Slice1, 2}, {"_arrow_ChunkArray__Slice2", (DL_FUNC) &_arrow_ChunkArray__Slice2, 3}, {"_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 1}, @@ -1252,10 +1314,11 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_TimestampType__timezone", (DL_FUNC) &_arrow_TimestampType__timezone, 1}, {"_arrow_TimestampType__unit", (DL_FUNC) &_arrow_TimestampType__unit, 1}, {"_arrow_Object__pointer_address", (DL_FUNC) &_arrow_Object__pointer_address, 1}, - {"_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, - {"_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, - {"_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, - {"_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, + {"_arrow_DictionaryType__initialize", (DL_FUNC) &_arrow_DictionaryType__initialize, 3}, + {"_arrow_DictionaryType__index_type", (DL_FUNC) &_arrow_DictionaryType__index_type, 1}, + {"_arrow_DictionaryType__name", (DL_FUNC) &_arrow_DictionaryType__name, 1}, + {"_arrow_DictionaryType__dictionary", (DL_FUNC) &_arrow_DictionaryType__dictionary, 1}, + {"_arrow_DictionaryType__ordered", (DL_FUNC) &_arrow_DictionaryType__ordered, 1}, {"_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, {"_arrow_MemoryPool__bytes_allocated", (DL_FUNC) &_arrow_MemoryPool__bytes_allocated, 1}, {"_arrow_MemoryPool__max_memory", (DL_FUNC) &_arrow_MemoryPool__max_memory, 1}, @@ -1281,6 +1344,29 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_read_table_", (DL_FUNC) &_arrow_read_table_, 1}, {"_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 1}, {"_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, + {"_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 1}, + {"_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, + {"_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, + {"_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, + {"_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, + {"_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2}, + {"_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1}, + {"_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1}, + {"_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1}, + {"_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1}, + {"_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1}, + {"_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1}, + {"_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2}, + {"_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2}, + {"_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1}, + {"_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5}, + {"_arrow_Array__Mask", (DL_FUNC) &_arrow_Array__Mask, 1}, + {"_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1}, + {"_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1}, + {"_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, + {"_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, + {"_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, + {"_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, {NULL, NULL, 0} }; diff --git a/r/src/array.cpp b/r/src/array.cpp index e11e1f7..0f6c18a 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -36,6 +36,8 @@ class SimpleRBuffer : public Buffer { Vec vec_; }; +// ---------------------------- R vector -> Array + template <int RTYPE, typename Type> std::shared_ptr<Array> SimpleArray(SEXP x) { Rcpp::Vector<RTYPE> vec(x); @@ -49,7 +51,7 @@ std::shared_ptr<Array> SimpleArray(SEXP x) { auto first_na = std::find_if(vec.begin(), vec.end(), Rcpp::Vector<RTYPE>::is_na); if (first_na < vec.end()) { - R_ERROR_NOT_OK(AllocateBuffer(ceil((double)n / 8), &null_bitmap)); + R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &null_bitmap)); internal::FirstTimeBitmapWriter bitmap_writer(null_bitmap->mutable_data(), 0, n); // first loop to clear all the bits before the first NA @@ -87,7 +89,7 @@ std::shared_ptr<arrow::Array> MakeBooleanArray(LogicalVector_ vec) { // allocate a buffer for the data std::shared_ptr<Buffer> data_bitmap; - R_ERROR_NOT_OK(AllocateBuffer(ceil((double)n / 8), &data_bitmap)); + R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &data_bitmap)); auto data_bitmap_data = data_bitmap->mutable_data(); internal::FirstTimeBitmapWriter bitmap_writer(data_bitmap_data, 0, n); R_xlen_t null_count = 0; @@ -108,7 +110,7 @@ std::shared_ptr<arrow::Array> MakeBooleanArray(LogicalVector_ vec) { if (i < n) { // there has been a null before the end, so we need // to collect that information in a null bitmap - R_ERROR_NOT_OK(AllocateBuffer(ceil((double)n / 8), &null_bitmap)); + R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &null_bitmap)); auto null_bitmap_data = null_bitmap->mutable_data(); internal::FirstTimeBitmapWriter null_bitmap_writer(null_bitmap_data, 0, n); @@ -166,7 +168,7 @@ std::shared_ptr<Array> MakeStringArray(StringVector_ vec) { } if (i < n) { - R_ERROR_NOT_OK(AllocateBuffer(ceil((double)n / 8), &null_buffer)); + R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &null_buffer)); internal::FirstTimeBitmapWriter null_bitmap_writer(null_buffer->mutable_data(), 0, n); // catch up @@ -210,6 +212,139 @@ std::shared_ptr<Array> MakeStringArray(StringVector_ vec) { return MakeArray(data); } +template <typename Type> +std::shared_ptr<Array> MakeFactorArrayImpl(Rcpp::IntegerVector_ factor) { + using value_type = typename arrow::TypeTraits<Type>::ArrayType::value_type; + auto dict_values = MakeStringArray(Rf_getAttrib(factor, R_LevelsSymbol)); + auto dict_type = + dictionary(std::make_shared<Type>(), dict_values, Rf_inherits(factor, "ordered")); + + auto n = factor.size(); + + std::shared_ptr<Buffer> indices_buffer; + R_ERROR_NOT_OK(AllocateBuffer(n * sizeof(value_type), &indices_buffer)); + + std::vector<std::shared_ptr<Buffer>> buffers{nullptr, indices_buffer}; + + int64_t null_count = 0; + R_xlen_t i = 0; + auto p_factor = factor.begin(); + auto p_indices = reinterpret_cast<value_type*>(indices_buffer->mutable_data()); + for (; i < n; i++, ++p_indices, ++p_factor) { + if (*p_factor == NA_INTEGER) break; + *p_indices = *p_factor - 1; + } + + if (i < n) { + // there are NA's so we need a null buffer + std::shared_ptr<Buffer> null_buffer; + R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &null_buffer)); + internal::FirstTimeBitmapWriter null_bitmap_writer(null_buffer->mutable_data(), 0, n); + + // catch up + for (R_xlen_t j = 0; j < i; j++, null_bitmap_writer.Next()) { + null_bitmap_writer.Set(); + } + + // resume offset filling + for (; i < n; i++, ++p_indices, ++p_factor, null_bitmap_writer.Next()) { + if (*p_factor == NA_INTEGER) { + null_bitmap_writer.Clear(); + null_count++; + } else { + null_bitmap_writer.Set(); + *p_indices = *p_factor - 1; + } + } + + null_bitmap_writer.Finish(); + buffers[0] = std::move(null_buffer); + } + + auto array_indices_data = + ArrayData::Make(std::make_shared<Type>(), n, std::move(buffers), null_count, 0); + auto array_indices = MakeArray(array_indices_data); + + std::shared_ptr<Array> out; + R_ERROR_NOT_OK(DictionaryArray::FromArrays(dict_type, array_indices, &out)); + return out; +} + +std::shared_ptr<Array> MakeFactorArray(Rcpp::IntegerVector_ factor) { + SEXP levels = factor.attr("levels"); + int n = Rf_length(levels); + if (n < 128) { + return MakeFactorArrayImpl<arrow::Int8Type>(factor); + } else if (n < 32768) { + return MakeFactorArrayImpl<arrow::Int16Type>(factor); + } else { + return MakeFactorArrayImpl<arrow::Int32Type>(factor); + } +} + +template <typename T> +int64_t time_cast(T value); + +template <> +inline int64_t time_cast<int>(int value) { + return static_cast<int64_t>(value) * 1000; +} + +template <> +inline int64_t time_cast<double>(double value) { + return static_cast<int64_t>(value * 1000); +} + +template <int RTYPE> +std::shared_ptr<Array> Date64Array_From_POSIXct(SEXP x) { + using stored_type = typename Rcpp::Vector<RTYPE>::stored_type; + Rcpp::Vector<RTYPE> vec(x); + auto p_vec = vec.begin(); + auto n = vec.size(); + + std::shared_ptr<Buffer> values_buffer; + R_ERROR_NOT_OK(AllocateBuffer(n * sizeof(int64_t), &values_buffer)); + auto p_values = reinterpret_cast<int64_t*>(values_buffer->mutable_data()); + + std::vector<std::shared_ptr<Buffer>> buffers{nullptr, values_buffer}; + + int null_count = 0; + R_xlen_t i = 0; + for (; i < n; i++, ++p_vec, ++p_values) { + if (Rcpp::Vector<RTYPE>::is_na(*p_vec)) break; + *p_values = time_cast(*p_vec); + } + if (i < n) { + std::shared_ptr<Buffer> null_buffer; + R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &null_buffer)); + internal::FirstTimeBitmapWriter bitmap_writer(null_buffer->mutable_data(), 0, n); + + // catch up + for (R_xlen_t j = 0; j < i; j++, bitmap_writer.Next()) { + bitmap_writer.Set(); + } + + // finish + for (; i < n; i++, ++p_vec, ++p_values, bitmap_writer.Next()) { + if (Rcpp::Vector<RTYPE>::is_na(*p_vec)) { + bitmap_writer.Clear(); + null_count++; + } else { + bitmap_writer.Set(); + *p_values = time_cast(*p_vec); + } + } + + bitmap_writer.Finish(); + buffers[0] = std::move(null_buffer); + } + + auto data = ArrayData::Make(std::make_shared<Date64Type>(), n, std::move(buffers), + null_count, 0); + + return std::make_shared<Date64Array>(data); +} + } // namespace r } // namespace arrow @@ -220,11 +355,22 @@ std::shared_ptr<arrow::Array> Array__from_vector(SEXP x) { return arrow::r::MakeBooleanArray(x); case INTSXP: if (Rf_isFactor(x)) { - break; + return arrow::r::MakeFactorArray(x); + } + if (Rf_inherits(x, "Date")) { + return arrow::r::SimpleArray<INTSXP, arrow::Date32Type>(x); + } + if (Rf_inherits(x, "POSIXct")) { + return arrow::r::Date64Array_From_POSIXct<INTSXP>(x); } return arrow::r::SimpleArray<INTSXP, arrow::Int32Type>(x); case REALSXP: - // TODO: Dates, ... + if (Rf_inherits(x, "Date")) { + return arrow::r::SimpleArray<INTSXP, arrow::Date32Type>(x); + } + if (Rf_inherits(x, "POSIXct")) { + return arrow::r::Date64Array_From_POSIXct<REALSXP>(x); + } return arrow::r::SimpleArray<REALSXP, arrow::DoubleType>(x); case RAWSXP: return arrow::r::SimpleArray<RAWSXP, arrow::Int8Type>(x); @@ -238,14 +384,28 @@ std::shared_ptr<arrow::Array> Array__from_vector(SEXP x) { return nullptr; } +// ---------------------------- Array -> R vector + +namespace arrow { +namespace r { + template <int RTYPE> inline SEXP simple_Array_to_Vector(const std::shared_ptr<arrow::Array>& array) { - using stored_type = typename Rcpp::Vector<RTYPE>::stored_type; - auto start = reinterpret_cast<const stored_type*>( - array->data()->buffers[1]->data() + array->offset() * sizeof(stored_type)); + using value_type = typename Rcpp::Vector<RTYPE>::stored_type; + auto n = array->length(); + auto null_count = array->null_count(); - size_t n = array->length(); - Rcpp::Vector<RTYPE> vec(start, start + n); + // special cases + if (n == 0) return Rcpp::Vector<RTYPE>(0); + if (n == null_count) { + return Rcpp::Vector<RTYPE>(n, default_value<RTYPE>()); + } + + // first copy all the data + auto p_values = GetValuesSafely<value_type>(array->data(), 1, array->offset()); + Rcpp::Vector<RTYPE> vec(p_values, p_values + n); + + // then set the sentinel NA if (array->null_count() && RTYPE != RAWSXP) { // TODO: not sure what to do with RAWSXP since // R raw vector do not have a concept of missing data @@ -262,41 +422,25 @@ inline SEXP simple_Array_to_Vector(const std::shared_ptr<arrow::Array>& array) { return vec; } -inline SEXP BooleanArray_to_Vector(const std::shared_ptr<arrow::Array>& array) { - size_t n = array->length(); - LogicalVector vec(n); +inline SEXP StringArray_to_Vector(const std::shared_ptr<arrow::Array>& array) { + auto n = array->length(); + auto null_count = array->null_count(); - // process the data - arrow::internal::BitmapReader data_reader(array->data()->buffers[1]->data(), - array->offset(), n); - for (size_t i = 0; i < n; i++, data_reader.Next()) { - vec[i] = data_reader.IsSet(); - } + // special cases + if (n == 0) return Rcpp::CharacterVector_(0); - // then the null bitmap if needed - if (array->null_count()) { - arrow::internal::BitmapReader null_reader(array->null_bitmap()->data(), - array->offset(), n); - for (size_t i = 0; i < n; i++, null_reader.Next()) { - if (null_reader.IsNotSet()) { - vec[i] = LogicalVector::get_na(); - } - } + // only NA + if (null_count == n) { + return StringVector_(n, NA_STRING); } - return vec; -} - -inline SEXP StringArray_to_Vector(const std::shared_ptr<arrow::Array>& array) { - auto n = array->length(); - Rcpp::CharacterVector res(n); - + Rcpp::CharacterVector res(no_init(n)); const auto& buffers = array->data()->buffers; - auto p_offset = reinterpret_cast<const int32_t*>(buffers[1]->data()) + array->offset(); - auto p_data = reinterpret_cast<const char*>(buffers[2]->data()) + *p_offset; + auto p_offset = GetValuesSafely<int32_t>(array->data(), 1, array->offset()); + auto p_data = GetValuesSafely<char>(array->data(), 2, *p_offset); - if (array->null_count()) { + if (null_count) { // need to watch for nulls arrow::internal::BitmapReader null_reader(array->null_bitmap_data(), array->offset(), n); @@ -323,8 +467,154 @@ inline SEXP StringArray_to_Vector(const std::shared_ptr<arrow::Array>& array) { return res; } +inline SEXP BooleanArray_to_Vector(const std::shared_ptr<arrow::Array>& array) { + auto n = array->length(); + auto null_count = array->null_count(); + + if (n == 0) { + return LogicalVector(0); + } + if (n == null_count) { + return LogicalVector(n, NA_LOGICAL); + } + + LogicalVector vec = no_init(n); + + // process the data + auto p_data = GetValuesSafely<uint8_t>(array->data(), 1, 0); + arrow::internal::BitmapReader data_reader(p_data, array->offset(), n); + for (size_t i = 0; i < n; i++, data_reader.Next()) { + vec[i] = data_reader.IsSet(); + } + + // then the null bitmap if needed + if (array->null_count()) { + arrow::internal::BitmapReader null_reader(array->null_bitmap()->data(), + array->offset(), n); + for (size_t i = 0; i < n; i++, null_reader.Next()) { + if (null_reader.IsNotSet()) { + vec[i] = LogicalVector::get_na(); + } + } + } + + return vec; +} + +template <typename Type> +inline SEXP DictionaryArrayInt32Indices_to_Vector( + const std::shared_ptr<arrow::Array>& array, const std::shared_ptr<arrow::Array>& dict, + bool ordered) { + using value_type = typename arrow::TypeTraits<Type>::ArrayType::value_type; + + size_t n = array->length(); + IntegerVector vec(no_init(n)); + vec.attr("levels") = StringArray_to_Vector(dict); + if (ordered) { + vec.attr("class") = CharacterVector::create("ordered", "factor"); + } else { + vec.attr("class") = "factor"; + } + + if (n == 0) { + return vec; + } + + auto null_count = array->null_count(); + if (n == null_count) { + std::fill(vec.begin(), vec.end(), NA_INTEGER); + return vec; + } + + auto p_array = GetValuesSafely<value_type>(array->data(), 1, array->offset()); + + if (array->null_count()) { + arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), + array->offset(), n); + for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_array) { + vec[i] = bitmap_reader.IsNotSet() ? NA_INTEGER : (static_cast<int>(*p_array) + 1); + } + } else { + std::transform(p_array, p_array + n, vec.begin(), + [](const value_type value) { return static_cast<int>(value) + 1; }); + } + return vec; +} + +SEXP DictionaryArray_to_Vector(arrow::DictionaryArray* dict_array) { + auto dict = dict_array->dictionary(); + auto indices = dict_array->indices(); + + if (dict->type_id() != Type::STRING) { + stop("Cannot convert Dictionary Array of type `%s` to R", + dict_array->type()->ToString()); + } + bool ordered = dict_array->dict_type()->ordered(); + switch (indices->type_id()) { + case Type::UINT8: + return DictionaryArrayInt32Indices_to_Vector<arrow::UInt8Type>(indices, dict, + ordered); + case Type::INT8: + return DictionaryArrayInt32Indices_to_Vector<arrow::Int8Type>(indices, dict, + ordered); + case Type::UINT16: + return DictionaryArrayInt32Indices_to_Vector<arrow::UInt16Type>(indices, dict, + ordered); + case Type::INT16: + return DictionaryArrayInt32Indices_to_Vector<arrow::Int16Type>(indices, dict, + ordered); + case Type::INT32: + return DictionaryArrayInt32Indices_to_Vector<arrow::Int32Type>(indices, dict, + ordered); + default: + stop("Cannot convert Dictionary Array of type `%s` to R", + dict_array->type()->ToString()); + } + return R_NilValue; +} + +SEXP Date32Array_to_Vector(const std::shared_ptr<arrow::Array>& array) { + IntegerVector out(simple_Array_to_Vector<INTSXP>(array)); + out.attr("class") = "Date"; + return out; +} + +SEXP Date64Array_to_Vector(const std::shared_ptr<arrow::Array> array) { + auto n = array->length(); + NumericVector vec(n); + vec.attr("class") = CharacterVector::create("POSIXct", "POSIXt"); + if (n == 0) { + return vec; + } + auto null_count = array->null_count(); + if (null_count == n) { + std::fill(vec.begin(), vec.end(), NA_REAL); + return vec; + } + auto p_values = GetValuesSafely<int64_t>(array->data(), 1, array->offset()); + auto p_vec = vec.begin(); + + if (null_count) { + arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), + array->offset(), n); + for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_vec, ++p_values) { + *p_vec = bitmap_reader.IsSet() ? static_cast<double>(*p_values / 1000) : NA_REAL; + } + } else { + std::transform(p_values, p_values + n, vec.begin(), + [](int64_t value) { return static_cast<double>(value / 1000); }); + } + + return vec; +} + +} // namespace r +} // namespace arrow + // [[Rcpp::export]] SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) { + using namespace arrow::r; + switch (array->type_id()) { case Type::BOOL: return BooleanArray_to_Vector(array); @@ -336,11 +626,17 @@ SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) { return simple_Array_to_Vector<REALSXP>(array); case Type::STRING: return StringArray_to_Vector(array); + case Type::DICTIONARY: + return DictionaryArray_to_Vector(static_cast<arrow::DictionaryArray*>(array.get())); + case Type::DATE32: + return Date32Array_to_Vector(array); + case Type::DATE64: + return Date64Array_to_Vector(array); default: break; } - stop(tfm::format("cannot handle Array of type %d", array->type_id())); + stop(tfm::format("cannot handle Array of type %s", array->type()->name())); return R_NilValue; } @@ -428,3 +724,15 @@ LogicalVector Array__Mask(const std::shared_ptr<arrow::Array>& array) { } return res; } + +// [[Rcpp::export]] +std::shared_ptr<arrow::Array> DictionaryArray__indices( + const std::shared_ptr<arrow::DictionaryArray>& array) { + return array->indices(); +} + +// [[Rcpp::export]] +std::shared_ptr<arrow::Array> DictionaryArray__dictionary( + const std::shared_ptr<arrow::DictionaryArray>& array) { + return array->dictionary(); +} diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index e208d0e..879f59a 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -37,7 +37,6 @@ namespace Rcpp { namespace traits { struct wrap_type_shared_ptr_tag {}; -struct wrap_type_static_ptr_tag {}; template <typename T> struct wrap_type_traits<std::shared_ptr<T>> { @@ -53,9 +52,6 @@ namespace internal { template <typename T> inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_shared_ptr_tag); -template <typename T> -inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_static_ptr_tag); - } // namespace internal } // namespace Rcpp @@ -101,11 +97,39 @@ inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_shared_ptr_tag) { } // namespace Rcpp namespace Rcpp { +using IntegerVector_ = Rcpp::Vector<INTSXP, Rcpp::NoProtectStorage>; using LogicalVector_ = Rcpp::Vector<LGLSXP, Rcpp::NoProtectStorage>; using StringVector_ = Rcpp::Vector<STRSXP, Rcpp::NoProtectStorage>; +using CharacterVector_ = StringVector_; + +template <int RTYPE> +inline typename Rcpp::Vector<RTYPE>::stored_type default_value() { + return Rcpp::Vector<RTYPE>::get_na(); +} +template <> +inline Rbyte default_value<RAWSXP>() { + return 0; +} + } // namespace Rcpp SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array); SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array); std::shared_ptr<arrow::Array> Array__from_vector(SEXP x); std::shared_ptr<arrow::RecordBatch> RecordBatch__from_dataframe(Rcpp::DataFrame tbl); + +namespace arrow { +namespace r { + +template <typename T> +inline const T* GetValuesSafely(const std::shared_ptr<ArrayData>& data, int i, + int64_t offset) { + auto buffer = data->buffers[i]; + if (!buffer) { + Rcpp::stop(tfm::format("invalid data in buffer %d", i)); + }; + return reinterpret_cast<const T*>(buffer->data()) + offset; +} + +} // namespace r +} // namespace arrow diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index c063a07..d562435 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -96,3 +96,149 @@ test_that("Array supports character vectors (ARROW-3339)", { expect_equal(arr_chr$length(), 3L) expect_identical(arr_chr$as_vector(), x) }) + +test_that("empty arrays are supported", { + x <- character() + expect_equal(array(x)$as_vector(), x) + + x <- integer() + expect_equal(array(x)$as_vector(), x) + + x <- numeric() + expect_equal(array(x)$as_vector(), x) + + x <- factor(character()) + expect_equal(array(x)$as_vector(), x) + + x <- logical() + expect_equal(array(x)$as_vector(), x) +}) + +test_that("array with all nulls are supported", { + nas <- c(NA, NA) + + x <- as.logical(nas) + expect_equal(array(x)$as_vector(), x) + + x <- as.integer(nas) + expect_equal(array(x)$as_vector(), x) + + x <- as.numeric(nas) + expect_equal(array(x)$as_vector(), x) + + x <- as.character(nas) + expect_equal(array(x)$as_vector(), x) + + x <- as.factor(nas) + expect_equal(array(x)$as_vector(), x) +}) + +test_that("Array supports unordered factors (ARROW-3355)", { + # without NA + f <- factor(c("itsy", "bitsy", "spider", "spider")) + arr_fac <- array(f) + expect_equal(arr_fac$length(), 4L) + expect_equal(arr_fac$type()$index_type(), int8()) + expect_identical(arr_fac$as_vector(), f) + expect_true(arr_fac$IsValid(0)) + expect_true(arr_fac$IsValid(1)) + expect_true(arr_fac$IsValid(2)) + expect_true(arr_fac$IsValid(3)) + + sl <- arr_fac$Slice(1) + expect_equal(sl$length(), 3L) + expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(sl$as_vector(), f[2:4]) + + # with NA + f <- factor(c("itsy", "bitsy", NA, "spider", "spider")) + # TODO: rm the suppressWarnings when https://github.com/r-lib/vctrs/issues/109 + arr_fac <- suppressWarnings(array(f)) + expect_equal(arr_fac$length(), 5L) + expect_equal(arr_fac$type()$index_type(), int8()) + expect_identical(arr_fac$as_vector(), f) + expect_true(arr_fac$IsValid(0)) + expect_true(arr_fac$IsValid(1)) + expect_true(arr_fac$IsNull(2)) + expect_true(arr_fac$IsValid(3)) + expect_true(arr_fac$IsValid(4)) + + sl <- arr_fac$Slice(1) + expect_equal(sl$length(), 4L) + expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(sl$as_vector(), f[2:5]) +}) + +test_that("Array supports ordered factors (ARROW-3355)", { + # without NA + f <- ordered(c("itsy", "bitsy", "spider", "spider")) + arr_fac <- array(f) + expect_equal(arr_fac$length(), 4L) + expect_equal(arr_fac$type()$index_type(), int8()) + expect_identical(arr_fac$as_vector(), f) + expect_true(arr_fac$IsValid(0)) + expect_true(arr_fac$IsValid(1)) + expect_true(arr_fac$IsValid(2)) + expect_true(arr_fac$IsValid(3)) + + sl <- arr_fac$Slice(1) + expect_equal(sl$length(), 3L) + expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(sl$as_vector(), f[2:4]) + + # with NA + f <- ordered(c("itsy", "bitsy", NA, "spider", "spider")) + # TODO: rm the suppressWarnings when https://github.com/r-lib/vctrs/issues/109 + arr_fac <- suppressWarnings(array(f)) + expect_equal(arr_fac$length(), 5L) + expect_equal(arr_fac$type()$index_type(), int8()) + expect_identical(arr_fac$as_vector(), f) + expect_true(arr_fac$IsValid(0)) + expect_true(arr_fac$IsValid(1)) + expect_true(arr_fac$IsNull(2)) + expect_true(arr_fac$IsValid(3)) + expect_true(arr_fac$IsValid(4)) + + sl <- arr_fac$Slice(1) + expect_equal(sl$length(), 4L) + expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(sl$as_vector(), f[2:5]) +}) + +test_that("array supports Date (ARROW-3340)", { + d <- Sys.Date() + 1:10 + a <- array(d) + expect_equal(a$type(), date32()) + expect_equal(a$length(), 10L) + expect_equal(a$as_vector(), d) + + d[5] <- NA + a <- array(d) + expect_equal(a$type(), date32()) + expect_equal(a$length(), 10L) + expect_equal(a$as_vector(), d) + expect_true(a$IsNull(4)) + + d2 <- d + .5 + a <- array(d2) + expect_equal(a$type(), date32()) + expect_equal(a$length(), 10L) + expect_equal(a$as_vector(), d) + expect_true(a$IsNull(4)) +}) + +test_that("array supports POSIXct (ARROW-3340)", { + times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 + a <- array(times) + expect_equal(a$type(), date64()) + expect_equal(a$length(), 10L) + expect_equal(as.numeric(a$as_vector()), as.numeric(times)) + + times[5] <- NA + a <- array(times) + expect_equal(a$type(), date32()) + expect_equal(a$length(), 10L) + expect_equal(as.numeric(a$as_vector()), as.numeric(times)) + expect_true(a$IsNull(4)) +}) + diff --git a/r/tests/testthat/test-DataType.R b/r/tests/testthat/test-DataType.R index e87175c..b479e5a 100644 --- a/r/tests/testthat/test-DataType.R +++ b/r/tests/testthat/test-DataType.R @@ -312,3 +312,15 @@ test_that("struct type works as expected", { list(field("x", int32()), field("y", boolean())) ) }) + +test_that("DictionaryType works as expected (ARROW-3355)", { + d <- dictionary(int32(), array(c("foo", "bar", "baz"))) + expect_equal(d, d) + expect_true(d == d) + expect_false(d == int32()) + expect_equal(d$id(), Type$DICTIONARY) + expect_equal(d$bit_width(), 32L) + expect_equal(d$ToString(), "dictionary<values=string, indices=int32, ordered=0>") + expect_equal(d$index_type(), int32()) + expect_equal(d$dictionary(), array(c("foo", "bar", "baz"))) +}) diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index 75c59aa..e3557f8 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -21,22 +21,28 @@ test_that("RecordBatch", { tbl <- tibble::tibble( int = 1:10, dbl = as.numeric(1:10), lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), - chr = letters[1:10] + chr = letters[1:10], + fct = factor(letters[1:10]) ) batch <- record_batch(tbl) expect_true(batch == batch) expect_equal( batch$schema(), - schema(int = int32(), dbl = float64(), lgl = boolean(), chr = utf8()) + schema( + int = int32(), dbl = float64(), + lgl = boolean(), chr = utf8(), + fct = dictionary(int32(), array(letters[1:10])) + ) ) - expect_equal(batch$num_columns(), 4L) + expect_equal(batch$num_columns(), 5L) expect_equal(batch$num_rows(), 10L) expect_equal(batch$column_name(0), "int") expect_equal(batch$column_name(1), "dbl") expect_equal(batch$column_name(2), "lgl") expect_equal(batch$column_name(3), "chr") - expect_equal(names(batch), c("int", "dbl", "lgl", "chr")) + expect_equal(batch$column_name(4), "fct") + expect_equal(names(batch), c("int", "dbl", "lgl", "chr", "fct")) col_int <- batch$column(0) expect_true(inherits(col_int, 'arrow::Array')) @@ -58,10 +64,16 @@ test_that("RecordBatch", { expect_equal(col_chr$as_vector(), tbl$chr) expect_equal(col_chr$type(), utf8()) + col_fct <- batch$column(4) + expect_true(inherits(col_fct, 'arrow::Array')) + expect_equal(col_fct$as_vector(), tbl$fct) + expect_equal(col_fct$type(), dictionary(int32(), array(letters[1:10]))) + + batch2 <- batch$RemoveColumn(0) expect_equal( batch2$schema(), - schema(dbl = float64(), lgl = boolean(), chr = utf8()) + schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), array(letters[1:10]))) ) expect_equal(batch2$column(0), batch$column(1)) expect_identical(as_tibble(batch2), tbl[,-1]) @@ -72,3 +84,33 @@ test_that("RecordBatch", { batch4 <- batch$Slice(5, 2) expect_identical(as_tibble(batch4), tbl[6:7,]) }) + +test_that("RecordBatch with 0 rows are supported", { + tbl <- tibble::tibble( + int = integer(), + dbl = numeric(), + lgl = logical(), + chr = character(), + fct = factor(character(), levels = c("a", "b")) + ) + + batch <- record_batch(tbl) + expect_equal(batch$num_columns(), 5L) + expect_equal(batch$num_rows(), 0L) + expect_equal( + batch$schema(), + schema( + int = int32(), + dbl = float64(), + lgl = boolean(), + chr = utf8(), + fct = dictionary(int32(), array(c("a", "b"))) + ) + ) + + tf <- tempfile(); on.exit(unlink(tf)) + batch$to_file(tf) + + res <- read_record_batch(tf) + expect_equal(res, batch) +})
