This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 7781bc217 chore: add docs, part of #37 (#6453)
7781bc217 is described below
commit 7781bc2170c84ada387901e09b2cdfe4235c3570
Author: ByteBaker <[email protected]>
AuthorDate: Wed Oct 2 01:33:16 2024 +0530
chore: add docs, part of #37 (#6453)
* chore: add docs, part of #37
- add pragma `#![warn(missing_docs)]` to the following
- `arrow-flight`
- `arrow-ipc`
- `arrow-integration-test`
- `arrow-integration-testing`
- `object_store`
- also document the caveat with using level 10 GZIP compression in
parquet. See #6282.
* chore: resolve PR comments from #6453
---
arrow-flight/examples/flight_sql_server.rs | 3 +-
arrow-flight/src/bin/flight_sql_client.rs | 1 +
arrow-flight/src/decode.rs | 7 +++-
arrow-flight/src/encode.rs | 3 +-
arrow-flight/src/error.rs | 2 +
arrow-flight/src/lib.rs | 5 +++
arrow-flight/src/sql/client.rs | 4 +-
arrow-flight/src/sql/metadata/sql_info.rs | 13 ++----
arrow-flight/src/sql/metadata/xdbc_info.rs | 39 +++++++++++++-----
arrow-flight/src/sql/mod.rs | 11 +++++-
arrow-flight/src/utils.rs | 7 +++-
arrow-integration-test/src/lib.rs | 34 ++++++++++++++++
.../flight_client_scenarios/auth_basic_proto.rs | 3 ++
.../flight_client_scenarios/integration_test.rs | 3 ++
.../src/flight_client_scenarios/middleware.rs | 3 ++
.../mod.rs} | 2 +
.../flight_server_scenarios/auth_basic_proto.rs | 4 ++
.../flight_server_scenarios/integration_test.rs | 9 ++++-
.../src/flight_server_scenarios/middleware.rs | 4 ++
.../mod.rs} | 3 ++
arrow-integration-testing/src/lib.rs | 18 ++++++---
arrow-ipc/src/convert.rs | 3 +-
arrow-ipc/src/lib.rs | 2 +
arrow-ipc/src/writer.rs | 11 +++++-
arrow-json/src/writer.rs | 9 +++--
arrow-schema/src/field.rs | 4 +-
arrow/tests/array_cast.rs | 8 ++--
object_store/src/aws/builder.rs | 1 -
object_store/src/aws/client.rs | 1 -
object_store/src/aws/resolve.rs | 1 -
object_store/src/azure/builder.rs | 1 -
object_store/src/azure/client.rs | 1 -
object_store/src/client/get.rs | 1 -
object_store/src/lib.rs | 46 ++++++++++++++++++++--
object_store/src/local.rs | 1 -
object_store/src/memory.rs | 1 -
object_store/src/path/mod.rs | 35 +++++++++++++---
parquet/src/compression.rs | 29 ++++++++++++++
38 files changed, 269 insertions(+), 64 deletions(-)
diff --git a/arrow-flight/examples/flight_sql_server.rs
b/arrow-flight/examples/flight_sql_server.rs
index 81afecf85..dd3a3943d 100644
--- a/arrow-flight/examples/flight_sql_server.rs
+++ b/arrow-flight/examples/flight_sql_server.rs
@@ -19,6 +19,7 @@ use arrow_flight::sql::server::PeekableFlightDataStream;
use arrow_flight::sql::DoPutPreparedStatementResult;
use base64::prelude::BASE64_STANDARD;
use base64::Engine;
+use core::str;
use futures::{stream, Stream, TryStreamExt};
use once_cell::sync::Lazy;
use prost::Message;
@@ -168,7 +169,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
let bytes = BASE64_STANDARD
.decode(base64)
.map_err(|e| status!("authorization not decodable", e))?;
- let str = String::from_utf8(bytes).map_err(|e| status!("authorization
not parsable", e))?;
+ let str = str::from_utf8(&bytes).map_err(|e| status!("authorization
not parsable", e))?;
let parts: Vec<_> = str.split(':').collect();
let (user, pass) = match parts.as_slice() {
[user, pass] => (user, pass),
diff --git a/arrow-flight/src/bin/flight_sql_client.rs
b/arrow-flight/src/bin/flight_sql_client.rs
index c334b95a9..8f0618f49 100644
--- a/arrow-flight/src/bin/flight_sql_client.rs
+++ b/arrow-flight/src/bin/flight_sql_client.rs
@@ -26,6 +26,7 @@ use arrow_flight::{
};
use arrow_schema::Schema;
use clap::{Parser, Subcommand};
+use core::str;
use futures::TryStreamExt;
use tonic::{
metadata::MetadataMap,
diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs
index 5561f256c..7bafc3843 100644
--- a/arrow-flight/src/decode.rs
+++ b/arrow-flight/src/decode.rs
@@ -388,11 +388,14 @@ struct FlightStreamState {
/// FlightData and the decoded payload (Schema, RecordBatch), if any
#[derive(Debug)]
pub struct DecodedFlightData {
+ /// The original FlightData message
pub inner: FlightData,
+ /// The decoded payload
pub payload: DecodedPayload,
}
impl DecodedFlightData {
+ /// Create a new DecodedFlightData with no payload
pub fn new_none(inner: FlightData) -> Self {
Self {
inner,
@@ -400,6 +403,7 @@ impl DecodedFlightData {
}
}
+ /// Create a new DecodedFlightData with a [`Schema`] payload
pub fn new_schema(inner: FlightData, schema: SchemaRef) -> Self {
Self {
inner,
@@ -407,6 +411,7 @@ impl DecodedFlightData {
}
}
+ /// Create a new [`DecodedFlightData`] with a [`RecordBatch`] payload
pub fn new_record_batch(inner: FlightData, batch: RecordBatch) -> Self {
Self {
inner,
@@ -414,7 +419,7 @@ impl DecodedFlightData {
}
}
- /// return the metadata field of the inner flight data
+ /// Return the metadata field of the inner flight data
pub fn app_metadata(&self) -> Bytes {
self.inner.app_metadata.clone()
}
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index 59fa8afd5..55bc92403 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -144,6 +144,7 @@ impl Default for FlightDataEncoderBuilder {
}
impl FlightDataEncoderBuilder {
+ /// Create a new [`FlightDataEncoderBuilder`].
pub fn new() -> Self {
Self::default()
}
@@ -1403,7 +1404,7 @@ mod tests {
let input_rows = batch.num_rows();
let split = split_batch_for_grpc_response(batch.clone(),
max_flight_data_size_bytes);
- let sizes: Vec<_> = split.iter().map(|batch|
batch.num_rows()).collect();
+ let sizes: Vec<_> = split.iter().map(RecordBatch::num_rows).collect();
let output_rows: usize = sizes.iter().sum();
assert_eq!(sizes, expected_sizes, "mismatch for {batch:?}");
diff --git a/arrow-flight/src/error.rs b/arrow-flight/src/error.rs
index ba979ca9f..499706e1e 100644
--- a/arrow-flight/src/error.rs
+++ b/arrow-flight/src/error.rs
@@ -37,6 +37,7 @@ pub enum FlightError {
}
impl FlightError {
+ /// Generate a new `FlightError::ProtocolError` variant.
pub fn protocol(message: impl Into<String>) -> Self {
Self::ProtocolError(message.into())
}
@@ -98,6 +99,7 @@ impl From<FlightError> for tonic::Status {
}
}
+/// Result type for the Apache Arrow Flight crate
pub type Result<T> = std::result::Result<T, FlightError>;
#[cfg(test)]
diff --git a/arrow-flight/src/lib.rs b/arrow-flight/src/lib.rs
index 64e3ba01c..9f18416c0 100644
--- a/arrow-flight/src/lib.rs
+++ b/arrow-flight/src/lib.rs
@@ -37,6 +37,7 @@
//!
//! [Flight SQL]: https://arrow.apache.org/docs/format/FlightSql.html
#![allow(rustdoc::invalid_html_tags)]
+#![warn(missing_docs)]
use arrow_ipc::{convert, writer, writer::EncodedData, writer::IpcWriteOptions};
use arrow_schema::{ArrowError, Schema};
@@ -52,6 +53,8 @@ type ArrowResult<T> = std::result::Result<T, ArrowError>;
#[allow(clippy::all)]
mod gen {
+ // Since this file is auto-generated, we suppress all warnings
+ #![allow(missing_docs)]
include!("arrow.flight.protocol.rs");
}
@@ -125,6 +128,7 @@ use flight_descriptor::DescriptorType;
/// SchemaAsIpc represents a pairing of a `Schema` with IpcWriteOptions
pub struct SchemaAsIpc<'a> {
+ /// Data type representing a schema and its IPC write options
pub pair: (&'a Schema, &'a IpcWriteOptions),
}
@@ -684,6 +688,7 @@ impl PollInfo {
}
impl<'a> SchemaAsIpc<'a> {
+ /// Create a new `SchemaAsIpc` from a `Schema` and `IpcWriteOptions`
pub fn new(schema: &'a Schema, options: &'a IpcWriteOptions) -> Self {
SchemaAsIpc {
pair: (schema, options),
diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs
index ef52aa27e..e45e505b2 100644
--- a/arrow-flight/src/sql/client.rs
+++ b/arrow-flight/src/sql/client.rs
@@ -695,9 +695,11 @@ fn flight_error_to_arrow_error(err: FlightError) ->
ArrowError {
}
}
-// A polymorphic structure to natively represent different types of data
contained in `FlightData`
+/// A polymorphic structure to natively represent different types of data
contained in `FlightData`
pub enum ArrowFlightData {
+ /// A record batch
RecordBatch(RecordBatch),
+ /// A schema
Schema(Schema),
}
diff --git a/arrow-flight/src/sql/metadata/sql_info.rs
b/arrow-flight/src/sql/metadata/sql_info.rs
index 97304d3c8..2ea30df7f 100644
--- a/arrow-flight/src/sql/metadata/sql_info.rs
+++ b/arrow-flight/src/sql/metadata/sql_info.rs
@@ -331,7 +331,7 @@ impl SqlInfoUnionBuilder {
///
/// Servers constuct - usually static - [`SqlInfoData`] via the
[`SqlInfoDataBuilder`],
/// and build responses using [`CommandGetSqlInfo::into_builder`]
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Default)]
pub struct SqlInfoDataBuilder {
/// Use BTreeMap to ensure the values are sorted by value as
/// to make output consistent
@@ -341,17 +341,10 @@ pub struct SqlInfoDataBuilder {
infos: BTreeMap<u32, SqlInfoValue>,
}
-impl Default for SqlInfoDataBuilder {
- fn default() -> Self {
- Self::new()
- }
-}
-
impl SqlInfoDataBuilder {
+ /// Create a new SQL info builder
pub fn new() -> Self {
- Self {
- infos: BTreeMap::new(),
- }
+ Self::default()
}
/// register the specific sql metadata item
diff --git a/arrow-flight/src/sql/metadata/xdbc_info.rs
b/arrow-flight/src/sql/metadata/xdbc_info.rs
index 2e635d303..485bedaeb 100644
--- a/arrow-flight/src/sql/metadata/xdbc_info.rs
+++ b/arrow-flight/src/sql/metadata/xdbc_info.rs
@@ -41,24 +41,43 @@ use crate::sql::{CommandGetXdbcTypeInfo, Nullable,
Searchable, XdbcDataType, Xdb
/// Data structure representing type information for xdbc types.
#[derive(Debug, Clone, Default)]
pub struct XdbcTypeInfo {
+ /// The name of the type
pub type_name: String,
+ /// The data type of the type
pub data_type: XdbcDataType,
+ /// The column size of the type
pub column_size: Option<i32>,
+ /// The prefix of the type
pub literal_prefix: Option<String>,
+ /// The suffix of the type
pub literal_suffix: Option<String>,
+ /// The create parameters of the type
pub create_params: Option<Vec<String>>,
+ /// The nullability of the type
pub nullable: Nullable,
+ /// Whether the type is case sensitive
pub case_sensitive: bool,
+ /// Whether the type is searchable
pub searchable: Searchable,
+ /// Whether the type is unsigned
pub unsigned_attribute: Option<bool>,
+ /// Whether the type has fixed precision and scale
pub fixed_prec_scale: bool,
+ /// Whether the type is auto-incrementing
pub auto_increment: Option<bool>,
+ /// The local type name of the type
pub local_type_name: Option<String>,
+ /// The minimum scale of the type
pub minimum_scale: Option<i32>,
+ /// The maximum scale of the type
pub maximum_scale: Option<i32>,
+ /// The SQL data type of the type
pub sql_data_type: XdbcDataType,
+ /// The optional datetime subcode of the type
pub datetime_subcode: Option<XdbcDatetimeSubcode>,
+ /// The number precision radix of the type
pub num_prec_radix: Option<i32>,
+ /// The interval precision of the type
pub interval_precision: Option<i32>,
}
@@ -93,16 +112,6 @@ impl XdbcTypeInfoData {
}
}
-pub struct XdbcTypeInfoDataBuilder {
- infos: Vec<XdbcTypeInfo>,
-}
-
-impl Default for XdbcTypeInfoDataBuilder {
- fn default() -> Self {
- Self::new()
- }
-}
-
/// A builder for [`XdbcTypeInfoData`] which is used to create
[`CommandGetXdbcTypeInfo`] responses.
///
/// # Example
@@ -138,6 +147,16 @@ impl Default for XdbcTypeInfoDataBuilder {
/// // to access the underlying record batch
/// let batch = info_list.record_batch(None);
/// ```
+pub struct XdbcTypeInfoDataBuilder {
+ infos: Vec<XdbcTypeInfo>,
+}
+
+impl Default for XdbcTypeInfoDataBuilder {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
impl XdbcTypeInfoDataBuilder {
/// Create a new instance of [`XdbcTypeInfoDataBuilder`].
pub fn new() -> Self {
diff --git a/arrow-flight/src/sql/mod.rs b/arrow-flight/src/sql/mod.rs
index 453f608d3..94bb96a4f 100644
--- a/arrow-flight/src/sql/mod.rs
+++ b/arrow-flight/src/sql/mod.rs
@@ -43,9 +43,11 @@ use bytes::Bytes;
use paste::paste;
use prost::Message;
+#[allow(clippy::all)]
mod gen {
- #![allow(clippy::all)]
#![allow(rustdoc::unportable_markdown)]
+ // Since this file is auto-generated, we suppress all warnings
+ #![allow(missing_docs)]
include!("arrow.flight.protocol.sql.rs");
}
@@ -163,7 +165,9 @@ macro_rules! prost_message_ext {
/// ```
#[derive(Clone, Debug, PartialEq)]
pub enum Command {
- $($name($name),)*
+ $(
+ #[doc = concat!(stringify!($name), "variant")]
+ $name($name),)*
/// Any message that is not any FlightSQL command.
Unknown(Any),
@@ -297,10 +301,12 @@ pub struct Any {
}
impl Any {
+ /// Checks whether the message is of type `M`
pub fn is<M: ProstMessageExt>(&self) -> bool {
M::type_url() == self.type_url
}
+ /// Unpacks the contents of the message if it is of type `M`
pub fn unpack<M: ProstMessageExt>(&self) -> Result<Option<M>, ArrowError> {
if !self.is::<M>() {
return Ok(None);
@@ -310,6 +316,7 @@ impl Any {
Ok(Some(m))
}
+ /// Packs a message into an [`Any`] message
pub fn pack<M: ProstMessageExt>(message: &M) -> Result<Any, ArrowError> {
Ok(message.as_any())
}
diff --git a/arrow-flight/src/utils.rs b/arrow-flight/src/utils.rs
index 37d7ff9e7..f6129ddfe 100644
--- a/arrow-flight/src/utils.rs
+++ b/arrow-flight/src/utils.rs
@@ -160,9 +160,12 @@ pub fn batches_to_flight_data(
dictionaries.extend(encoded_dictionaries.into_iter().map(Into::into));
flight_data.push(encoded_batch.into());
}
- let mut stream = vec![schema_flight_data];
+
+ let mut stream = Vec::with_capacity(1 + dictionaries.len() +
flight_data.len());
+
+ stream.push(schema_flight_data);
stream.extend(dictionaries);
stream.extend(flight_data);
- let flight_data: Vec<_> = stream.into_iter().collect();
+ let flight_data = stream;
Ok(flight_data)
}
diff --git a/arrow-integration-test/src/lib.rs
b/arrow-integration-test/src/lib.rs
index d1486fd5a..ea5b545f2 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -21,6 +21,7 @@
//!
//! This is not a canonical format, but provides a human-readable way of
verifying language implementations
+#![warn(missing_docs)]
use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer};
use hex::decode;
use num::BigInt;
@@ -49,8 +50,11 @@ pub use schema::*;
/// See
<https://github.com/apache/arrow/blob/master/docs/source/format/Integration.rst#json-test-data-format>
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJson {
+ /// The Arrow schema for JSON file
pub schema: ArrowJsonSchema,
+ /// The `RecordBatch`es in the JSON file
pub batches: Vec<ArrowJsonBatch>,
+ /// The dictionaries in the JSON file
#[serde(skip_serializing_if = "Option::is_none")]
pub dictionaries: Option<Vec<ArrowJsonDictionaryBatch>>,
}
@@ -60,7 +64,9 @@ pub struct ArrowJson {
/// Fields are left as JSON `Value` as they vary by `DataType`
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonSchema {
+ /// An array of JSON fields
pub fields: Vec<ArrowJsonField>,
+ /// An array of metadata key-value pairs
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<Vec<HashMap<String, String>>>,
}
@@ -68,13 +74,20 @@ pub struct ArrowJsonSchema {
/// Fields are left as JSON `Value` as they vary by `DataType`
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonField {
+ /// The name of the field
pub name: String,
+ /// The data type of the field,
+ /// can be any valid JSON value
#[serde(rename = "type")]
pub field_type: Value,
+ /// Whether the field is nullable
pub nullable: bool,
+ /// The children fields
pub children: Vec<ArrowJsonField>,
+ /// The dictionary for the field
#[serde(skip_serializing_if = "Option::is_none")]
pub dictionary: Option<ArrowJsonFieldDictionary>,
+ /// The metadata for the field, if any
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<Value>,
}
@@ -115,20 +128,28 @@ impl From<&Field> for ArrowJsonField {
}
}
+/// Represents a dictionary-encoded field in the Arrow JSON format
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonFieldDictionary {
+ /// A unique identifier for the dictionary
pub id: i64,
+ /// The type of the dictionary index
#[serde(rename = "indexType")]
pub index_type: DictionaryIndexType,
+ /// Whether the dictionary is ordered
#[serde(rename = "isOrdered")]
pub is_ordered: bool,
}
+/// Type of an index for a dictionary-encoded field in the Arrow JSON format
#[derive(Deserialize, Serialize, Debug)]
pub struct DictionaryIndexType {
+ /// The name of the dictionary index type
pub name: String,
+ /// Whether the dictionary index type is signed
#[serde(rename = "isSigned")]
pub is_signed: bool,
+ /// The bit width of the dictionary index type
#[serde(rename = "bitWidth")]
pub bit_width: i64,
}
@@ -137,6 +158,7 @@ pub struct DictionaryIndexType {
#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct ArrowJsonBatch {
count: usize,
+ /// The columns in the record batch
pub columns: Vec<ArrowJsonColumn>,
}
@@ -144,7 +166,9 @@ pub struct ArrowJsonBatch {
#[derive(Deserialize, Serialize, Debug, Clone)]
#[allow(non_snake_case)]
pub struct ArrowJsonDictionaryBatch {
+ /// The unique identifier for the dictionary
pub id: i64,
+ /// The data for the dictionary
pub data: ArrowJsonBatch,
}
@@ -152,15 +176,21 @@ pub struct ArrowJsonDictionaryBatch {
#[derive(Deserialize, Serialize, Clone, Debug)]
pub struct ArrowJsonColumn {
name: String,
+ /// The number of elements in the column
pub count: usize,
+ /// The validity bitmap to determine null values
#[serde(rename = "VALIDITY")]
pub validity: Option<Vec<u8>>,
+ /// The data values in the column
#[serde(rename = "DATA")]
pub data: Option<Vec<Value>>,
+ /// The offsets for variable-sized data types
#[serde(rename = "OFFSET")]
pub offset: Option<Vec<Value>>, // leaving as Value as 64-bit offsets are
strings
+ /// The type id for union types
#[serde(rename = "TYPE_ID")]
pub type_id: Option<Vec<i8>>,
+ /// The children columns for nested types
pub children: Option<Vec<ArrowJsonColumn>>,
}
@@ -189,6 +219,7 @@ impl ArrowJson {
Ok(true)
}
+ /// Convert the stored dictionaries to `Vec[RecordBatch]`
pub fn get_record_batches(&self) -> Result<Vec<RecordBatch>> {
let schema = self.schema.to_arrow_schema()?;
@@ -275,6 +306,7 @@ impl ArrowJsonField {
}
}
+/// Generates a [`RecordBatch`] from an Arrow JSON batch, given a schema
pub fn record_batch_from_json(
schema: &Schema,
json_batch: ArrowJsonBatch,
@@ -877,6 +909,7 @@ pub fn array_from_json(
}
}
+/// Construct a [`DictionaryArray`] from a partially typed JSON column
pub fn dictionary_array_from_json(
field: &Field,
json_col: ArrowJsonColumn,
@@ -965,6 +998,7 @@ fn create_null_buf(json_col: &ArrowJsonColumn) -> Buffer {
}
impl ArrowJsonBatch {
+ /// Convert a [`RecordBatch`] to an [`ArrowJsonBatch`]
pub fn from_batch(batch: &RecordBatch) -> ArrowJsonBatch {
let mut json_batch = ArrowJsonBatch {
count: batch.num_rows(),
diff --git
a/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
b/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
index 376e31e15..34c3c7706 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+//! Scenario for testing basic auth.
+
use crate::{AUTH_PASSWORD, AUTH_USERNAME};
use arrow_flight::{flight_service_client::FlightServiceClient, BasicAuth,
HandshakeRequest};
@@ -27,6 +29,7 @@ type Result<T = (), E = Error> = std::result::Result<T, E>;
type Client = FlightServiceClient<tonic::transport::Channel>;
+/// Run a scenario that tests basic auth.
pub async fn run_scenario(host: &str, port: u16) -> Result {
let url = format!("http://{host}:{port}");
let mut client = FlightServiceClient::connect(url).await?;
diff --git
a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
index 1a6c4e28a..c8289ff44 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+//! Integration tests for the Flight client.
+
use crate::open_json_file;
use std::collections::HashMap;
@@ -40,6 +42,7 @@ type Result<T = (), E = Error> = std::result::Result<T, E>;
type Client = FlightServiceClient<tonic::transport::Channel>;
+/// Run a scenario that uploads data to a Flight server and then downloads it
back
pub async fn run_scenario(host: &str, port: u16, path: &str) -> Result {
let url = format!("http://{host}:{port}");
diff --git
a/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
b/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
index 3b71edf44..b826ad456 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+//! Scenario for testing middleware.
+
use arrow_flight::{
flight_descriptor::DescriptorType,
flight_service_client::FlightServiceClient, FlightDescriptor,
};
@@ -24,6 +26,7 @@ use tonic::{Request, Status};
type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
+/// Run a scenario that tests middleware.
pub async fn run_scenario(host: &str, port: u16) -> Result {
let url = format!("http://{host}:{port}");
let conn = tonic::transport::Endpoint::new(url)?.connect().await?;
diff --git a/arrow-integration-testing/src/flight_client_scenarios.rs
b/arrow-integration-testing/src/flight_client_scenarios/mod.rs
similarity index 93%
rename from arrow-integration-testing/src/flight_client_scenarios.rs
rename to arrow-integration-testing/src/flight_client_scenarios/mod.rs
index 66cced5f4..c57944337 100644
--- a/arrow-integration-testing/src/flight_client_scenarios.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/mod.rs
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+//! Collection of utilities for testing the Flight client.
+
pub mod auth_basic_proto;
pub mod integration_test;
pub mod middleware;
diff --git
a/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
b/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
index 20d868953..5462e5bd6 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+//! Basic auth test for the Flight server.
+
use std::pin::Pin;
use std::sync::Arc;
@@ -35,6 +37,7 @@ use prost::Message;
use crate::{AUTH_PASSWORD, AUTH_USERNAME};
+/// Run a scenario that tests basic auth.
pub async fn scenario_setup(port: u16) -> Result {
let service = AuthBasicProtoScenarioImpl {
username: AUTH_USERNAME.into(),
@@ -52,6 +55,7 @@ pub async fn scenario_setup(port: u16) -> Result {
Ok(())
}
+/// Scenario for testing basic auth.
#[derive(Clone)]
pub struct AuthBasicProtoScenarioImpl {
username: Arc<str>,
diff --git
a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
index 76eb9d880..0c58fae93 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
@@ -15,6 +15,9 @@
// specific language governing permissions and limitations
// under the License.
+//! Integration tests for the Flight server.
+
+use core::str;
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::Arc;
@@ -42,6 +45,7 @@ type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send +
Sync + 'static>>;
type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
+/// Run a scenario that tests integration testing.
pub async fn scenario_setup(port: u16) -> Result {
let addr = super::listen_on(port).await?;
@@ -65,6 +69,7 @@ struct IntegrationDataset {
chunks: Vec<RecordBatch>,
}
+/// Flight service implementation for integration testing
#[derive(Clone, Default)]
pub struct FlightServiceImpl {
server_location: String,
@@ -100,13 +105,13 @@ impl FlightService for FlightServiceImpl {
) -> Result<Response<Self::DoGetStream>, Status> {
let ticket = request.into_inner();
- let key = String::from_utf8(ticket.ticket.to_vec())
+ let key = str::from_utf8(&ticket.ticket)
.map_err(|e| Status::invalid_argument(format!("Invalid ticket:
{e:?}")))?;
let uploaded_chunks = self.uploaded_chunks.lock().await;
let flight = uploaded_chunks
- .get(&key)
+ .get(key)
.ok_or_else(|| Status::not_found(format!("Could not find flight.
{key}")))?;
let options = arrow::ipc::writer::IpcWriteOptions::default();
diff --git
a/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
b/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
index e8d9c521b..6685d45df 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+//! Middleware test for the Flight server.
+
use std::pin::Pin;
use arrow_flight::{
@@ -31,6 +33,7 @@ type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send +
Sync + 'static>>;
type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
+/// Run a scenario that tests middleware.
pub async fn scenario_setup(port: u16) -> Result {
let service = MiddlewareScenarioImpl {};
let svc = FlightServiceServer::new(service);
@@ -44,6 +47,7 @@ pub async fn scenario_setup(port: u16) -> Result {
Ok(())
}
+/// Middleware interceptor for testing
#[derive(Clone, Default)]
pub struct MiddlewareScenarioImpl {}
diff --git a/arrow-integration-testing/src/flight_server_scenarios.rs
b/arrow-integration-testing/src/flight_server_scenarios/mod.rs
similarity index 91%
rename from arrow-integration-testing/src/flight_server_scenarios.rs
rename to arrow-integration-testing/src/flight_server_scenarios/mod.rs
index 48d4e6045..3833e1c63 100644
--- a/arrow-integration-testing/src/flight_server_scenarios.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/mod.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+//! Collection of utilities for testing the Flight server.
use std::net::SocketAddr;
use arrow_flight::{FlightEndpoint, Location, Ticket};
@@ -27,6 +28,7 @@ pub mod middleware;
type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
type Result<T = (), E = Error> = std::result::Result<T, E>;
+/// Listen on a port and return the address
pub async fn listen_on(port: u16) -> Result<SocketAddr> {
let addr: SocketAddr = format!("0.0.0.0:{port}").parse()?;
@@ -36,6 +38,7 @@ pub async fn listen_on(port: u16) -> Result<SocketAddr> {
Ok(addr)
}
+/// Create a FlightEndpoint with a ticket and location
pub fn endpoint(ticket: &str, location_uri: impl Into<String>) ->
FlightEndpoint {
FlightEndpoint {
ticket: Some(Ticket {
diff --git a/arrow-integration-testing/src/lib.rs
b/arrow-integration-testing/src/lib.rs
index 4ce7b06a1..ba8e3876c 100644
--- a/arrow-integration-testing/src/lib.rs
+++ b/arrow-integration-testing/src/lib.rs
@@ -17,6 +17,7 @@
//! Common code used in the integration test binaries
+#![warn(missing_docs)]
use serde_json::Value;
use arrow::array::{Array, StructArray};
@@ -42,7 +43,9 @@ pub const AUTH_PASSWORD: &str = "flight";
pub mod flight_client_scenarios;
pub mod flight_server_scenarios;
+/// An Arrow file in JSON format
pub struct ArrowFile {
+ /// The schema of the file
pub schema: Schema,
// we can evolve this into a concrete Arrow type
// this is temporarily not being read from
@@ -51,12 +54,14 @@ pub struct ArrowFile {
}
impl ArrowFile {
+ /// Read a single [RecordBatch] from the file
pub fn read_batch(&self, batch_num: usize) -> Result<RecordBatch> {
let b = self.arrow_json["batches"].get(batch_num).unwrap();
let json_batch: ArrowJsonBatch =
serde_json::from_value(b.clone()).unwrap();
record_batch_from_json(&self.schema, json_batch,
Some(&self.dictionaries))
}
+ /// Read all [RecordBatch]es from the file
pub fn read_batches(&self) -> Result<Vec<RecordBatch>> {
self.arrow_json["batches"]
.as_array()
@@ -70,7 +75,7 @@ impl ArrowFile {
}
}
-// Canonicalize the names of map fields in a schema
+/// Canonicalize the names of map fields in a schema
pub fn canonicalize_schema(schema: &Schema) -> Schema {
let fields = schema
.fields()
@@ -107,6 +112,7 @@ pub fn canonicalize_schema(schema: &Schema) -> Schema {
Schema::new(fields).with_metadata(schema.metadata().clone())
}
+/// Read an Arrow file in JSON format
pub fn open_json_file(json_name: &str) -> Result<ArrowFile> {
let json_file = File::open(json_name)?;
let reader = BufReader::new(json_file);
@@ -157,10 +163,7 @@ pub fn read_gzip_json(version: &str, path: &str) ->
ArrowJson {
arrow_json
}
-//
-// C Data Integration entrypoints
-//
-
+/// C Data Integration entrypoint to export the schema from a JSON file
fn cdata_integration_export_schema_from_json(
c_json_name: *const i8,
out: *mut FFI_ArrowSchema,
@@ -173,6 +176,7 @@ fn cdata_integration_export_schema_from_json(
Ok(())
}
+/// C Data Integration entrypoint to export a batch from a JSON file
fn cdata_integration_export_batch_from_json(
c_json_name: *const i8,
batch_num: c_int,
@@ -263,6 +267,7 @@ pub unsafe extern "C" fn arrow_rs_free_error(c_error: *mut
i8) {
}
}
+/// A C-ABI for exporting an Arrow schema from a JSON file
#[no_mangle]
pub extern "C" fn arrow_rs_cdata_integration_export_schema_from_json(
c_json_name: *const i8,
@@ -272,6 +277,7 @@ pub extern "C" fn
arrow_rs_cdata_integration_export_schema_from_json(
result_to_c_error(&r)
}
+/// A C-ABI to compare an Arrow schema against a JSON file
#[no_mangle]
pub extern "C" fn arrow_rs_cdata_integration_import_schema_and_compare_to_json(
c_json_name: *const i8,
@@ -281,6 +287,7 @@ pub extern "C" fn
arrow_rs_cdata_integration_import_schema_and_compare_to_json(
result_to_c_error(&r)
}
+/// A C-ABI for exporting a RecordBatch from a JSON file
#[no_mangle]
pub extern "C" fn arrow_rs_cdata_integration_export_batch_from_json(
c_json_name: *const i8,
@@ -291,6 +298,7 @@ pub extern "C" fn
arrow_rs_cdata_integration_export_batch_from_json(
result_to_c_error(&r)
}
+/// A C-ABI to compare a RecordBatch against a JSON file
#[no_mangle]
pub extern "C" fn arrow_rs_cdata_integration_import_batch_and_compare_to_json(
c_json_name: *const i8,
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index 52c6a0d61..eef236529 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -133,6 +133,7 @@ pub fn schema_to_fb(schema: &Schema) ->
FlatBufferBuilder<'_> {
IpcSchemaEncoder::new().schema_to_fb(schema)
}
+/// Push a key-value metadata into a FlatBufferBuilder and return [WIPOffset]
pub fn metadata_to_fb<'a>(
fbb: &mut FlatBufferBuilder<'a>,
metadata: &HashMap<String, String>,
@@ -152,7 +153,7 @@ pub fn metadata_to_fb<'a>(
fbb.create_vector(&custom_metadata)
}
-#[deprecated(since = "54.0.0", note = "Use `IpcSchemaConverter`.")]
+/// Adds a [Schema] to a flatbuffer and returns the offset
pub fn schema_to_fb_offset<'a>(
fbb: &mut FlatBufferBuilder<'a>,
schema: &Schema,
diff --git a/arrow-ipc/src/lib.rs b/arrow-ipc/src/lib.rs
index 4f35ffb60..dde137153 100644
--- a/arrow-ipc/src/lib.rs
+++ b/arrow-ipc/src/lib.rs
@@ -19,6 +19,7 @@
//!
//! [Arrow IPC Format]:
https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc
+#![warn(missing_docs)]
pub mod convert;
pub mod reader;
pub mod writer;
@@ -31,6 +32,7 @@ mod compression;
#[allow(clippy::redundant_static_lifetimes)]
#[allow(clippy::redundant_field_names)]
#[allow(non_camel_case_types)]
+#[allow(missing_docs)] // Because this is autogenerated
pub mod gen;
pub use self::gen::File::*;
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index b5cf20ef3..f9256b4e8 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -60,7 +60,7 @@ pub struct IpcWriteOptions {
/// Compression, if desired. Will result in a runtime error
/// if the corresponding feature is not enabled
batch_compression_type: Option<crate::CompressionType>,
- /// Flag indicating whether the writer should preserver the dictionary IDs
defined in the
+ /// Flag indicating whether the writer should preserve the dictionary IDs
defined in the
/// schema or generate unique dictionary IDs internally during encoding.
///
/// Defaults to `true`
@@ -135,6 +135,8 @@ impl IpcWriteOptions {
}
}
+ /// Return whether the writer is configured to preserve the dictionary IDs
+ /// defined in the schema
pub fn preserve_dict_id(&self) -> bool {
self.preserve_dict_id
}
@@ -200,6 +202,11 @@ impl Default for IpcWriteOptions {
pub struct IpcDataGenerator {}
impl IpcDataGenerator {
+ /// Converts a schema to an IPC message along with `dictionary_tracker`
+ /// and returns it encoded inside [EncodedData] as a flatbuffer
+ ///
+ /// Preferred method over [IpcDataGenerator::schema_to_bytes] since it's
+ /// deprecated since Arrow v54.0.0
pub fn schema_to_bytes_with_dictionary_tracker(
&self,
schema: &Schema,
@@ -234,6 +241,7 @@ impl IpcDataGenerator {
since = "54.0.0",
note = "Use `schema_to_bytes_with_dictionary_tracker` instead. This
function signature of `schema_to_bytes_with_dictionary_tracker` in the next
release."
)]
+ /// Converts a schema to an IPC message and returns it encoded inside
[EncodedData] as a flatbuffer
pub fn schema_to_bytes(&self, schema: &Schema, write_options:
&IpcWriteOptions) -> EncodedData {
let mut fbb = FlatBufferBuilder::new();
let schema = {
@@ -951,6 +959,7 @@ impl<W: Write> FileWriter<W> {
})
}
+ /// Adds a key-value pair to the [FileWriter]'s custom metadata
pub fn write_metadata(&mut self, key: impl Into<String>, value: impl
Into<String>) {
self.custom_metadata.insert(key.into(), value.into());
}
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index 86d2e88d9..d973206cc 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -397,6 +397,7 @@ where
#[cfg(test)]
mod tests {
+ use core::str;
use std::fs::{read_to_string, File};
use std::io::{BufReader, Seek};
use std::sync::Arc;
@@ -1111,7 +1112,7 @@ mod tests {
}
}
- let result = String::from_utf8(buf).unwrap();
+ let result = str::from_utf8(&buf).unwrap();
let expected = read_to_string(test_file).unwrap();
for (r, e) in result.lines().zip(expected.lines()) {
let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
@@ -1150,7 +1151,7 @@ mod tests {
fn json_writer_empty() {
let mut writer = ArrayWriter::new(vec![] as Vec<u8>);
writer.finish().unwrap();
- assert_eq!(String::from_utf8(writer.into_inner()).unwrap(), "");
+ assert_eq!(str::from_utf8(&writer.into_inner()).unwrap(), "");
}
#[test]
@@ -1279,7 +1280,7 @@ mod tests {
writer.write(&batch).unwrap();
}
- let result = String::from_utf8(buf).unwrap();
+ let result = str::from_utf8(&buf).unwrap();
let expected = read_to_string(test_file).unwrap();
for (r, e) in result.lines().zip(expected.lines()) {
let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
@@ -1321,7 +1322,7 @@ mod tests {
writer.write_batches(&batches).unwrap();
}
- let result = String::from_utf8(buf).unwrap();
+ let result = str::from_utf8(&buf).unwrap();
let expected = read_to_string(test_file).unwrap();
// result is eq to 2 same batches
let expected = format!("{expected}\n{expected}");
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index fc4852a3d..b532ea861 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -610,14 +610,14 @@ mod test {
#[test]
fn test_new_with_string() {
// Fields should allow owned Strings to support reuse
- let s = String::from("c1");
+ let s = "c1";
Field::new(s, DataType::Int64, false);
}
#[test]
fn test_new_dict_with_string() {
// Fields should allow owned Strings to support reuse
- let s = String::from("c1");
+ let s = "c1";
Field::new_dict(s, DataType::Int64, false, 4, false);
}
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index 0fd89cc2b..8f86cbeab 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -179,7 +179,7 @@ fn test_can_cast_types() {
/// Create instances of arrays with varying types for cast tests
fn get_arrays_of_all_types() -> Vec<ArrayRef> {
- let tz_name = String::from("+08:00");
+ let tz_name = "+08:00";
let binary_data: Vec<&[u8]> = vec![b"foo", b"bar"];
vec![
Arc::new(BinaryArray::from(binary_data.clone())),
@@ -238,9 +238,9 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
Arc::new(TimestampMillisecondArray::from(vec![1000, 2000])),
Arc::new(TimestampMicrosecondArray::from(vec![1000, 2000])),
Arc::new(TimestampNanosecondArray::from(vec![1000, 2000])),
- Arc::new(TimestampSecondArray::from(vec![1000,
2000]).with_timezone(tz_name.clone())),
- Arc::new(TimestampMillisecondArray::from(vec![1000,
2000]).with_timezone(tz_name.clone())),
- Arc::new(TimestampMicrosecondArray::from(vec![1000,
2000]).with_timezone(tz_name.clone())),
+ Arc::new(TimestampSecondArray::from(vec![1000,
2000]).with_timezone(tz_name)),
+ Arc::new(TimestampMillisecondArray::from(vec![1000,
2000]).with_timezone(tz_name)),
+ Arc::new(TimestampMicrosecondArray::from(vec![1000,
2000]).with_timezone(tz_name)),
Arc::new(TimestampNanosecondArray::from(vec![1000,
2000]).with_timezone(tz_name)),
Arc::new(Date32Array::from(vec![1000, 2000])),
Arc::new(Date64Array::from(vec![1000, 2000])),
diff --git a/object_store/src/aws/builder.rs b/object_store/src/aws/builder.rs
index 75acb73e5..c52c3f8df 100644
--- a/object_store/src/aws/builder.rs
+++ b/object_store/src/aws/builder.rs
@@ -44,7 +44,6 @@ static DEFAULT_METADATA_ENDPOINT: &str =
"http://169.254.169.254";
/// A specialized `Error` for object store-related errors
#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
enum Error {
#[snafu(display("Missing bucket name"))]
MissingBucketName,
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 6fe4889db..7034a372e 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -65,7 +65,6 @@ const USER_DEFINED_METADATA_HEADER_PREFIX: &str =
"x-amz-meta-";
/// A specialized `Error` for object store-related errors
#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
pub(crate) enum Error {
#[snafu(display("Error performing DeleteObjects request: {}", source))]
DeleteObjectsRequest { source: crate::client::retry::Error },
diff --git a/object_store/src/aws/resolve.rs b/object_store/src/aws/resolve.rs
index 12c9f26d2..4c7489316 100644
--- a/object_store/src/aws/resolve.rs
+++ b/object_store/src/aws/resolve.rs
@@ -21,7 +21,6 @@ use snafu::{ensure, OptionExt, ResultExt, Snafu};
/// A specialized `Error` for object store-related errors
#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
enum Error {
#[snafu(display("Bucket '{}' not found", bucket))]
BucketNotFound { bucket: String },
diff --git a/object_store/src/azure/builder.rs
b/object_store/src/azure/builder.rs
index 35cedeafc..1c4589ba1 100644
--- a/object_store/src/azure/builder.rs
+++ b/object_store/src/azure/builder.rs
@@ -46,7 +46,6 @@ const MSI_ENDPOINT_ENV_KEY: &str = "IDENTITY_ENDPOINT";
/// A specialized `Error` for Azure builder-related errors
#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
enum Error {
#[snafu(display("Unable parse source url. Url: {}, Error: {}", url,
source))]
UnableToParseUrl {
diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index 049905155..06d3fb5c8 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -60,7 +60,6 @@ static TAGS_HEADER: HeaderName =
HeaderName::from_static("x-ms-tags");
/// A specialized `Error` for object store-related errors
#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
pub(crate) enum Error {
#[snafu(display("Error performing get request {}: {}", path, source))]
GetRequest {
diff --git a/object_store/src/client/get.rs b/object_store/src/client/get.rs
index 0fef5785c..ae6a8d9de 100644
--- a/object_store/src/client/get.rs
+++ b/object_store/src/client/get.rs
@@ -96,7 +96,6 @@ impl ContentRange {
/// A specialized `Error` for get-related errors
#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
enum GetResultError {
#[snafu(context(false))]
Header {
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 8820983b2..a0d83eb0b 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -1224,78 +1224,116 @@ pub type Result<T, E = Error> = std::result::Result<T,
E>;
/// A specialized `Error` for object store-related errors
#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
#[non_exhaustive]
pub enum Error {
+ /// A fallback error type when no variant matches
#[snafu(display("Generic {} error: {}", store, source))]
Generic {
+ /// The store this error originated from
store: &'static str,
+ /// The wrapped error
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
+ /// Error when the object is not found at given location
#[snafu(display("Object at location {} not found: {}", path, source))]
NotFound {
+ /// The path to file
path: String,
+ /// The wrapped error
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
+ /// Error for invalid path
#[snafu(
display("Encountered object with invalid path: {}", source),
context(false)
)]
- InvalidPath { source: path::Error },
+ InvalidPath {
+ /// The wrapped error
+ source: path::Error,
+ },
+ /// Error when `tokio::spawn` failed
#[snafu(display("Error joining spawned task: {}", source), context(false))]
- JoinError { source: tokio::task::JoinError },
+ JoinError {
+ /// The wrapped error
+ source: tokio::task::JoinError,
+ },
+ /// Error when the attempted operation is not supported
#[snafu(display("Operation not supported: {}", source))]
NotSupported {
+ /// The wrapped error
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
+ /// Error when the object already exists
#[snafu(display("Object at location {} already exists: {}", path, source))]
AlreadyExists {
+ /// The path to the
path: String,
+ /// The wrapped error
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
+ /// Error when the required conditions failed for the operation
#[snafu(display("Request precondition failure for path {}: {}", path,
source))]
Precondition {
+ /// The path to the file
path: String,
+ /// The wrapped error
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
+ /// Error when the object at the location isn't modified
#[snafu(display("Object at location {} not modified: {}", path, source))]
NotModified {
+ /// The path to the file
path: String,
+ /// The wrapped error
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
+ /// Error when an operation is not implemented
#[snafu(display("Operation not yet implemented."))]
NotImplemented,
+ /// Error when the used credentials don't have enough permission
+ /// to perform the requested operation
#[snafu(display(
"The operation lacked the necessary privileges to complete for path
{}: {}",
path,
source
))]
PermissionDenied {
+ /// The path to the file
path: String,
+ /// The wrapped error
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
+ /// Error when the used credentials lack valid authentication
#[snafu(display(
"The operation lacked valid authentication credentials for path {}:
{}",
path,
source
))]
Unauthenticated {
+ /// The path to the file
path: String,
+ /// The wrapped error
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
+ /// Error when a configuration key is invalid for the store used
#[snafu(display("Configuration key: '{}' is not valid for store '{}'.",
key, store))]
- UnknownConfigurationKey { store: &'static str, key: String },
+ UnknownConfigurationKey {
+ /// The object store used
+ store: &'static str,
+ /// The configuration key used
+ key: String,
+ },
}
impl From<Error> for std::io::Error {
diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index db4b4b050..ac10f332d 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -44,7 +44,6 @@ use crate::{
/// A specialized `Error` for filesystem object store-related errors
#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
pub(crate) enum Error {
#[snafu(display("File size for {} did not fit in a usize: {}", path,
source))]
FileSizeOverflowedUsize {
diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs
index 0d72983b0..b458bdddf 100644
--- a/object_store/src/memory.rs
+++ b/object_store/src/memory.rs
@@ -38,7 +38,6 @@ use crate::{GetOptions, PutPayload};
/// A specialized `Error` for in-memory object store-related errors
#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
enum Error {
#[snafu(display("No data in memory found. Location: {path}"))]
NoDataInMemory { path: String },
diff --git a/object_store/src/path/mod.rs b/object_store/src/path/mod.rs
index 59e08e2ea..4c9bb5f05 100644
--- a/object_store/src/path/mod.rs
+++ b/object_store/src/path/mod.rs
@@ -36,32 +36,57 @@ pub use parts::{InvalidPart, PathPart};
/// Error returned by [`Path::parse`]
#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
#[non_exhaustive]
pub enum Error {
+ /// Error when there's an empty segment between two slashes `/` in the path
#[snafu(display("Path \"{}\" contained empty path segment", path))]
- EmptySegment { path: String },
+ EmptySegment {
+ /// The source path
+ path: String,
+ },
+ /// Error when an invalid segment is encountered in the given path
#[snafu(display("Error parsing Path \"{}\": {}", path, source))]
- BadSegment { path: String, source: InvalidPart },
+ BadSegment {
+ /// The source path
+ path: String,
+ /// The part containing the error
+ source: InvalidPart,
+ },
+ /// Error when path cannot be canonicalized
#[snafu(display("Failed to canonicalize path \"{}\": {}", path.display(),
source))]
Canonicalize {
+ /// The source path
path: std::path::PathBuf,
+ /// The underlying error
source: std::io::Error,
},
+ /// Error when the path is not a valid URL
#[snafu(display("Unable to convert path \"{}\" to URL", path.display()))]
- InvalidPath { path: std::path::PathBuf },
+ InvalidPath {
+ /// The source path
+ path: std::path::PathBuf,
+ },
+ /// Error when a path contains non-unicode characters
#[snafu(display("Path \"{}\" contained non-unicode characters: {}", path,
source))]
NonUnicode {
+ /// The source path
path: String,
+ /// The underlying `UTF8Error`
source: std::str::Utf8Error,
},
+ /// Error when the a path doesn't start with given prefix
#[snafu(display("Path {} does not start with prefix {}", path, prefix))]
- PrefixMismatch { path: String, prefix: String },
+ PrefixMismatch {
+ /// The source path
+ path: String,
+ /// The mismatched prefix
+ prefix: String,
+ },
}
/// A parsed path representation that can be safely written to object storage
diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs
index edf675f13..ccc060250 100644
--- a/parquet/src/compression.rs
+++ b/parquet/src/compression.rs
@@ -298,6 +298,35 @@ mod gzip_codec {
pub use gzip_codec::*;
/// Represents a valid gzip compression level.
+///
+/// Defaults to 6.
+///
+/// * 0: least compression
+/// * 9: most compression (that other software can read)
+/// * 10: most compression (incompatible with other software, see below)
+/// #### WARNING:
+/// Level 10 compression can offer smallest file size,
+/// but Parquet files created with it will not be readable
+/// by other "standard" paquet readers.
+///
+/// Do **NOT** use level 10 if you need other software to
+/// be able to read the files. Read below for details.
+///
+/// ### IMPORTANT:
+/// There's often confusion about the compression levels in `flate2` vs `arrow`
+/// as highlighted in issue
[#1011](https://github.com/apache/arrow-rs/issues/6282).
+///
+/// `flate2` supports two compression backends: `miniz_oxide` and `zlib`.
+///
+/// - `zlib` supports levels from 0 to 9.
+/// - `miniz_oxide` supports levels from 0 to 10.
+///
+/// `arrow` uses `flate` with `rust_backend` feature,
+/// which provides `miniz_oxide` as the backend.
+/// Therefore 0-10 levels are supported.
+///
+/// `flate2` documents this behavior properly with
+/// [this commit](https://github.com/rust-lang/flate2-rs/pull/430).
#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
pub struct GzipLevel(u32);