This is an automated email from the ASF dual-hosted git repository.
npr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 760284f ARROW-10358: [R] Followups to 2.0.0 release
760284f is described below
commit 760284f397256caa2ea2df2e68f408d4bb7cdc99
Author: Neal Richardson <[email protected]>
AuthorDate: Tue Oct 20 14:27:30 2020 -0700
ARROW-10358: [R] Followups to 2.0.0 release
Closes #8495 from nealrichardson/r-post-2.0.0
Authored-by: Neal Richardson <[email protected]>
Signed-off-by: Neal Richardson <[email protected]>
---
r/.Rbuildignore | 1 +
r/.gitignore | 1 +
r/DESCRIPTION | 2 +-
r/NEWS.md | 1 +
r/R/parquet.R | 22 +++++++++++++++++++---
r/man/write_parquet.Rd | 12 +++++++++++-
r/tools/linuxlibs.R | 6 ++++++
7 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/r/.Rbuildignore b/r/.Rbuildignore
index 6830c90..91a8d74 100644
--- a/r/.Rbuildignore
+++ b/r/.Rbuildignore
@@ -9,6 +9,7 @@ Dockerfile
.*\.tar\.gz
^windows
^libarrow
+^revdep
clang_format.sh
^cran-comments\.md$
^arrow_.*.tar.gz$
diff --git a/r/.gitignore b/r/.gitignore
index 5fda633..e5ab119 100644
--- a/r/.gitignore
+++ b/r/.gitignore
@@ -13,6 +13,7 @@ src/Makevars
src/Makevars.win
windows/
libarrow/
+revdep/
vignettes/nyc-taxi/
arrow_*.tar.gz
arrow_*.tgz
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 833dc18..b54616e 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -21,7 +21,7 @@ BugReports:
https://issues.apache.org/jira/projects/ARROW/issues
Encoding: UTF-8
Language: en-US
LazyData: true
-SystemRequirements: C++11
+SystemRequirements: C++11; for AWS S3 support on Linux, libcurl and openssl
(optional)
Biarch: true
Imports:
assertthat,
diff --git a/r/NEWS.md b/r/NEWS.md
index 86d1c7a..15c66ae 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -62,6 +62,7 @@ to send and receive data. See `vignette("flight", package =
"arrow")` for an ove
* File writers now respect the system umask setting
* `ParquetFileReader` has additional methods for accessing individual columns
or row groups from the file
* Various segfaults fixed: invalid input in `ParquetFileWriter`; invalid
`ArrowObject` pointer from a saved R object; converting deeply nested structs
from Arrow to R
+* The `properties` and `arrow_properties` arguments to `write_parquet()` are
deprecated
# arrow 1.0.1
diff --git a/r/R/parquet.R b/r/R/parquet.R
index 1a805c8..1bc6742 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -93,6 +93,12 @@ read_parquet <- function(file,
#' @param allow_truncated_timestamps Allow loss of data when coercing
timestamps to a
#' particular resolution. E.g. if microsecond or nanosecond data is lost
when coercing
#' to "ms", do not raise an exception
+#' @param properties A `ParquetWriterProperties` object, used instead of the
options
+#' enumerated in this function's signature. Providing `properties` as an
argument
+#' is deprecated; if you need to assemble `ParquetWriterProperties` outside
+#' of `write_parquet()`, use `ParquetFileWriter` instead.
+#' @param arrow_properties A `ParquetArrowWriterProperties` object. Like
+#' `properties`, this argument is deprecated.
#'
#' @details The parameters `compression`, `compression_level`,
`use_dictionary` and
#' `write_statistics` support various patterns:
@@ -140,7 +146,9 @@ write_parquet <- function(x,
# arrow writer properties
use_deprecated_int96_timestamps = FALSE,
coerce_timestamps = NULL,
- allow_truncated_timestamps = FALSE) {
+ allow_truncated_timestamps = FALSE,
+ properties = NULL,
+ arrow_properties = NULL) {
x_out <- x
if (is.data.frame(x)) {
x <- Table$create(x)
@@ -151,10 +159,18 @@ write_parquet <- function(x,
on.exit(sink$close())
}
+ # Deprecation warnings
+ if (!is.null(properties)) {
+ warning("Providing 'properties' is deprecated. If you need to assemble
properties outside this function, use ParquetFileWriter instead.")
+ }
+ if (!is.null(arrow_properties)) {
+ warning("Providing 'arrow_properties' is deprecated. If you need to
assemble arrow_properties outside this function, use ParquetFileWriter
instead.")
+ }
+
writer <- ParquetFileWriter$create(
x$schema,
sink,
- properties = ParquetWriterProperties$create(
+ properties = properties %||% ParquetWriterProperties$create(
x,
version = version,
compression = compression,
@@ -163,7 +179,7 @@ write_parquet <- function(x,
write_statistics = write_statistics,
data_page_size = data_page_size
),
- arrow_properties = ParquetArrowWriterProperties$create(
+ arrow_properties = arrow_properties %||%
ParquetArrowWriterProperties$create(
use_deprecated_int96_timestamps = use_deprecated_int96_timestamps,
coerce_timestamps = coerce_timestamps,
allow_truncated_timestamps = allow_truncated_timestamps
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
index f639db9..d0e4f24 100644
--- a/r/man/write_parquet.Rd
+++ b/r/man/write_parquet.Rd
@@ -16,7 +16,9 @@ write_parquet(
data_page_size = NULL,
use_deprecated_int96_timestamps = FALSE,
coerce_timestamps = NULL,
- allow_truncated_timestamps = FALSE
+ allow_truncated_timestamps = FALSE,
+ properties = NULL,
+ arrow_properties = NULL
)
}
\arguments{
@@ -49,6 +51,14 @@ size of data pages within a column chunk (in bytes). Default
1 MiB.}
\item{allow_truncated_timestamps}{Allow loss of data when coercing timestamps
to a
particular resolution. E.g. if microsecond or nanosecond data is lost when
coercing
to "ms", do not raise an exception}
+
+\item{properties}{A \code{ParquetWriterProperties} object, used instead of the
options
+enumerated in this function's signature. Providing \code{properties} as an
argument
+is deprecated; if you need to assemble \code{ParquetWriterProperties} outside
+of \code{write_parquet()}, use \code{ParquetFileWriter} instead.}
+
+\item{arrow_properties}{A \code{ParquetArrowWriterProperties} object. Like
+\code{properties}, this argument is deprecated.}
}
\value{
the input \code{x} invisibly.
diff --git a/r/tools/linuxlibs.R b/r/tools/linuxlibs.R
index e5b928f..d36192e 100644
--- a/r/tools/linuxlibs.R
+++ b/r/tools/linuxlibs.R
@@ -55,6 +55,12 @@ download_binary <- function(os = identify_os()) {
binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip")
if (try_download(binary_url, libfile)) {
cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os))
+ if (!identical(os, "centos-7")) {
+ # centos-7 uses gcc 4.8 so the binary doesn't have ARROW_S3=ON but the
others do
+ # TODO: actually check for system requirements?
+ cat("**** Binary package requires libcurl and openssl\n")
+ cat("**** If installation fails, retry after installing those system
requirements\n")
+ }
} else {
cat(sprintf("*** No C++ binaries found for %s\n", os))
libfile <- NULL