This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 3e6988aca7 GH-48998: [R] Add note to docs on validating IPC streams
(#48999)
3e6988aca7 is described below
commit 3e6988aca71881e46ed8e5b858388d1dcfa235ad
Author: Nic Crane <[email protected]>
AuthorDate: Mon Feb 16 10:30:48 2026 +0000
GH-48998: [R] Add note to docs on validating IPC streams (#48999)
### Rationale for this change
Needed to document how to validate them
### What changes are included in this PR?
Document it
### Are these changes tested?
Nope
### Are there any user-facing changes?
Nope
Note: additional docs changes in `.Rd` files here are as a result of
calling `make doc` on the main branch
* GitHub Issue: #48998
Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/R/ipc-stream.R | 4 ++++
r/man/DictionaryType.Rd | 35 +++++++++++++++++++++++++++++++----
r/man/FixedWidthType.Rd | 17 ++++++++++++++---
r/man/Message.Rd | 19 ++++++++++++++++---
r/man/MessageReader.Rd | 17 ++++++++++++++---
r/man/read_ipc_stream.Rd | 7 +++++++
6 files changed, 86 insertions(+), 13 deletions(-)
diff --git a/r/R/ipc-stream.R b/r/R/ipc-stream.R
index 26a61a790f..8ebb5e3663 100644
--- a/r/R/ipc-stream.R
+++ b/r/R/ipc-stream.R
@@ -95,6 +95,10 @@ write_to_raw <- function(x, format = c("stream", "file")) {
#' Arrow [Table] otherwise
#' @seealso [write_feather()] for writing IPC files. [RecordBatchReader] for a
#' lower-level interface.
+#' @section Untrusted data:
+#' If reading from an untrusted source, you can validate the data by reading
+#' with `as_data_frame = FALSE` and calling `$ValidateFull()` on the Table
+#' before processing.
#' @export
read_ipc_stream <- function(file, as_data_frame = TRUE, ...) {
if (!inherits(file, "InputStream")) {
diff --git a/r/man/DictionaryType.Rd b/r/man/DictionaryType.Rd
index 8c9087f1ab..cda27978b1 100644
--- a/r/man/DictionaryType.Rd
+++ b/r/man/DictionaryType.Rd
@@ -3,13 +3,40 @@
\docType{class}
\name{DictionaryType}
\alias{DictionaryType}
-\title{class DictionaryType}
+\title{DictionaryType class}
\description{
-class DictionaryType
+\code{DictionaryType} is a \link{FixedWidthType} that represents
dictionary-encoded data.
+Dictionary encoding stores unique values in a dictionary and uses integer-type
+indices to reference them, which can be more memory-efficient for data with
many
+repeated values.
}
-\section{Methods}{
+\section{R6 Methods}{
+\itemize{
+\item \verb{$ToString()}: Return a string representation of the dictionary type
+\item \verb{$code(namespace = FALSE)}: Return R code to create this dictionary
type
+}
+}
+
+\section{Active bindings}{
-TODO
+\itemize{
+\item \verb{$index_type}: The \link{DataType} for the dictionary indices (must
be an integer type,
+signed or unsigned)
+\item \verb{$value_type}: The \link{DataType} for the dictionary values
+\item \verb{$name}: The name of the type.
+\item \verb{$ordered}: Whether the dictionary is ordered.
+}
+}
+
+\section{Factory}{
+
+
+\code{DictionaryType$create()} takes the following arguments:
+\itemize{
+\item \code{index_type}: A \link{DataType} for the indices (default
\code{\link[=int32]{int32()}})
+\item \code{value_type}: A \link{DataType} for the values (default
\code{\link[=utf8]{utf8()}})
+\item \code{ordered}: Is this an ordered dictionary (default \code{FALSE})?
+}
}
diff --git a/r/man/FixedWidthType.Rd b/r/man/FixedWidthType.Rd
index ac6723d79d..71d0ab2d27 100644
--- a/r/man/FixedWidthType.Rd
+++ b/r/man/FixedWidthType.Rd
@@ -5,11 +5,22 @@
\alias{FixedWidthType}
\title{FixedWidthType class}
\description{
-FixedWidthType class
+\code{FixedWidthType} is a base class for data types with a fixed width in
bits.
+This includes all integer types, floating-point types, \code{Boolean},
+\code{FixedSizeBinary}, temporal types (dates, times, timestamps, durations),
+and decimal types.
}
-\section{Methods}{
+\section{R6 Methods}{
-TODO
+\code{FixedWidthType} inherits from \link{DataType}, so it has the same
methods.
}
+\section{Active bindings}{
+
+\itemize{
+\item \verb{$bit_width}: The width of the type in bits
+}
+}
+
+\keyword{internal}
diff --git a/r/man/Message.Rd b/r/man/Message.Rd
index fbad235b64..b8be82bfa4 100644
--- a/r/man/Message.Rd
+++ b/r/man/Message.Rd
@@ -5,11 +5,24 @@
\alias{Message}
\title{Message class}
\description{
-Message class
+\code{Message} holds an Arrow IPC message, which includes metadata and
+an optional message body.
}
-\section{Methods}{
+\section{R6 Methods}{
+\itemize{
+\item \verb{$Equals(other)}: Check if this \code{Message} is equal to another
\code{Message}
+\item \verb{$body_length()}: Return the length of the message body in bytes
+\item \verb{$Verify()}: Check if the \code{Message} metadata is valid
Flatbuffer format
+}
+}
-TODO
+\section{Active bindings}{
+
+\itemize{
+\item \verb{$type}: The message type
+\item \verb{$metadata}: The message metadata
+\item \verb{$body}: The message body as a \link{Buffer}
+}
}
diff --git a/r/man/MessageReader.Rd b/r/man/MessageReader.Rd
index 32ca8900b3..4c3bef3fc9 100644
--- a/r/man/MessageReader.Rd
+++ b/r/man/MessageReader.Rd
@@ -5,11 +5,22 @@
\alias{MessageReader}
\title{MessageReader class}
\description{
-MessageReader class
+\code{MessageReader} reads \code{Message} objects from an input stream.
}
-\section{Methods}{
+\section{R6 Methods}{
+\itemize{
+\item \verb{$ReadNextMessage()}: Read the next \code{Message} from the stream.
Returns \code{NULL} if
+there are no more messages.
+}
+}
+
+\section{Factory}{
-TODO
+
+\code{MessageReader$create()} takes the following argument:
+\itemize{
+\item \code{stream}: An \link{InputStream} or object coercible to one (e.g., a
raw vector)
+}
}
diff --git a/r/man/read_ipc_stream.Rd b/r/man/read_ipc_stream.Rd
index 49d3949bfc..601edb2af0 100644
--- a/r/man/read_ipc_stream.Rd
+++ b/r/man/read_ipc_stream.Rd
@@ -27,6 +27,13 @@ Apache Arrow defines two formats for
\href{https://arrow.apache.org/docs/format/
a "stream" format and a "file" format, known as Feather.
\code{read_ipc_stream()}
and \code{\link[=read_feather]{read_feather()}} read those formats,
respectively.
}
+\section{Untrusted data}{
+
+If reading from an untrusted source, you can validate the data by reading
+with \code{as_data_frame = FALSE} and calling \verb{$ValidateFull()} on the
Table
+before processing.
+}
+
\seealso{
\code{\link[=write_feather]{write_feather()}} for writing IPC files.
\link{RecordBatchReader} for a
lower-level interface.