(arrow-rs) branch master updated: chore: add docs, part of #37 (#6453)

alamb Tue, 01 Oct 2024 13:03:30 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/master by this push:
     new 7781bc217 chore: add docs, part of #37 (#6453)
7781bc217 is described below

commit 7781bc2170c84ada387901e09b2cdfe4235c3570
Author: ByteBaker <[email protected]>
AuthorDate: Wed Oct 2 01:33:16 2024 +0530

    chore: add docs, part of #37 (#6453)
    
    * chore: add docs, part of #37
    - add pragma `#![warn(missing_docs)]` to the following
      - `arrow-flight`
      - `arrow-ipc`
      - `arrow-integration-test`
      - `arrow-integration-testing`
      - `object_store`
    
    - also document the caveat with using level 10 GZIP compression in
      parquet. See #6282.
    
    * chore: resolve PR comments from #6453
---
 arrow-flight/examples/flight_sql_server.rs         |  3 +-
 arrow-flight/src/bin/flight_sql_client.rs          |  1 +
 arrow-flight/src/decode.rs                         |  7 +++-
 arrow-flight/src/encode.rs                         |  3 +-
 arrow-flight/src/error.rs                          |  2 +
 arrow-flight/src/lib.rs                            |  5 +++
 arrow-flight/src/sql/client.rs                     |  4 +-
 arrow-flight/src/sql/metadata/sql_info.rs          | 13 ++----
 arrow-flight/src/sql/metadata/xdbc_info.rs         | 39 +++++++++++++-----
 arrow-flight/src/sql/mod.rs                        | 11 +++++-
 arrow-flight/src/utils.rs                          |  7 +++-
 arrow-integration-test/src/lib.rs                  | 34 ++++++++++++++++
 .../flight_client_scenarios/auth_basic_proto.rs    |  3 ++
 .../flight_client_scenarios/integration_test.rs    |  3 ++
 .../src/flight_client_scenarios/middleware.rs      |  3 ++
 .../mod.rs}                                        |  2 +
 .../flight_server_scenarios/auth_basic_proto.rs    |  4 ++
 .../flight_server_scenarios/integration_test.rs    |  9 ++++-
 .../src/flight_server_scenarios/middleware.rs      |  4 ++
 .../mod.rs}                                        |  3 ++
 arrow-integration-testing/src/lib.rs               | 18 ++++++---
 arrow-ipc/src/convert.rs                           |  3 +-
 arrow-ipc/src/lib.rs                               |  2 +
 arrow-ipc/src/writer.rs                            | 11 +++++-
 arrow-json/src/writer.rs                           |  9 +++--
 arrow-schema/src/field.rs                          |  4 +-
 arrow/tests/array_cast.rs                          |  8 ++--
 object_store/src/aws/builder.rs                    |  1 -
 object_store/src/aws/client.rs                     |  1 -
 object_store/src/aws/resolve.rs                    |  1 -
 object_store/src/azure/builder.rs                  |  1 -
 object_store/src/azure/client.rs                   |  1 -
 object_store/src/client/get.rs                     |  1 -
 object_store/src/lib.rs                            | 46 ++++++++++++++++++++--
 object_store/src/local.rs                          |  1 -
 object_store/src/memory.rs                         |  1 -
 object_store/src/path/mod.rs                       | 35 +++++++++++++---
 parquet/src/compression.rs                         | 29 ++++++++++++++
 38 files changed, 269 insertions(+), 64 deletions(-)

diff --git a/arrow-flight/examples/flight_sql_server.rs 
b/arrow-flight/examples/flight_sql_server.rs
index 81afecf85..dd3a3943d 100644
--- a/arrow-flight/examples/flight_sql_server.rs
+++ b/arrow-flight/examples/flight_sql_server.rs
@@ -19,6 +19,7 @@ use arrow_flight::sql::server::PeekableFlightDataStream;
 use arrow_flight::sql::DoPutPreparedStatementResult;
 use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
+use core::str;
 use futures::{stream, Stream, TryStreamExt};
 use once_cell::sync::Lazy;
 use prost::Message;
@@ -168,7 +169,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
         let bytes = BASE64_STANDARD
             .decode(base64)
             .map_err(|e| status!("authorization not decodable", e))?;
-        let str = String::from_utf8(bytes).map_err(|e| status!("authorization 
not parsable", e))?;
+        let str = str::from_utf8(&bytes).map_err(|e| status!("authorization 
not parsable", e))?;
         let parts: Vec<_> = str.split(':').collect();
         let (user, pass) = match parts.as_slice() {
             [user, pass] => (user, pass),
diff --git a/arrow-flight/src/bin/flight_sql_client.rs 
b/arrow-flight/src/bin/flight_sql_client.rs
index c334b95a9..8f0618f49 100644
--- a/arrow-flight/src/bin/flight_sql_client.rs
+++ b/arrow-flight/src/bin/flight_sql_client.rs
@@ -26,6 +26,7 @@ use arrow_flight::{
 };
 use arrow_schema::Schema;
 use clap::{Parser, Subcommand};
+use core::str;
 use futures::TryStreamExt;
 use tonic::{
     metadata::MetadataMap,
diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs
index 5561f256c..7bafc3843 100644
--- a/arrow-flight/src/decode.rs
+++ b/arrow-flight/src/decode.rs
@@ -388,11 +388,14 @@ struct FlightStreamState {
 /// FlightData and the decoded payload (Schema, RecordBatch), if any
 #[derive(Debug)]
 pub struct DecodedFlightData {
+    /// The original FlightData message
     pub inner: FlightData,
+    /// The decoded payload
     pub payload: DecodedPayload,
 }
 
 impl DecodedFlightData {
+    /// Create a new DecodedFlightData with no payload
     pub fn new_none(inner: FlightData) -> Self {
         Self {
             inner,
@@ -400,6 +403,7 @@ impl DecodedFlightData {
         }
     }
 
+    /// Create a new DecodedFlightData with a [`Schema`] payload
     pub fn new_schema(inner: FlightData, schema: SchemaRef) -> Self {
         Self {
             inner,
@@ -407,6 +411,7 @@ impl DecodedFlightData {
         }
     }
 
+    /// Create a new [`DecodedFlightData`] with a [`RecordBatch`] payload
     pub fn new_record_batch(inner: FlightData, batch: RecordBatch) -> Self {
         Self {
             inner,
@@ -414,7 +419,7 @@ impl DecodedFlightData {
         }
     }
 
-    /// return the metadata field of the inner flight data
+    /// Return the metadata field of the inner flight data
     pub fn app_metadata(&self) -> Bytes {
         self.inner.app_metadata.clone()
     }
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index 59fa8afd5..55bc92403 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -144,6 +144,7 @@ impl Default for FlightDataEncoderBuilder {
 }
 
 impl FlightDataEncoderBuilder {
+    /// Create a new [`FlightDataEncoderBuilder`].
     pub fn new() -> Self {
         Self::default()
     }
@@ -1403,7 +1404,7 @@ mod tests {
         let input_rows = batch.num_rows();
 
         let split = split_batch_for_grpc_response(batch.clone(), 
max_flight_data_size_bytes);
-        let sizes: Vec<_> = split.iter().map(|batch| 
batch.num_rows()).collect();
+        let sizes: Vec<_> = split.iter().map(RecordBatch::num_rows).collect();
         let output_rows: usize = sizes.iter().sum();
 
         assert_eq!(sizes, expected_sizes, "mismatch for {batch:?}");
diff --git a/arrow-flight/src/error.rs b/arrow-flight/src/error.rs
index ba979ca9f..499706e1e 100644
--- a/arrow-flight/src/error.rs
+++ b/arrow-flight/src/error.rs
@@ -37,6 +37,7 @@ pub enum FlightError {
 }
 
 impl FlightError {
+    /// Generate a new `FlightError::ProtocolError` variant.
     pub fn protocol(message: impl Into<String>) -> Self {
         Self::ProtocolError(message.into())
     }
@@ -98,6 +99,7 @@ impl From<FlightError> for tonic::Status {
     }
 }
 
+/// Result type for the Apache Arrow Flight crate
 pub type Result<T> = std::result::Result<T, FlightError>;
 
 #[cfg(test)]
diff --git a/arrow-flight/src/lib.rs b/arrow-flight/src/lib.rs
index 64e3ba01c..9f18416c0 100644
--- a/arrow-flight/src/lib.rs
+++ b/arrow-flight/src/lib.rs
@@ -37,6 +37,7 @@
 //!
 //! [Flight SQL]: https://arrow.apache.org/docs/format/FlightSql.html
 #![allow(rustdoc::invalid_html_tags)]
+#![warn(missing_docs)]
 
 use arrow_ipc::{convert, writer, writer::EncodedData, writer::IpcWriteOptions};
 use arrow_schema::{ArrowError, Schema};
@@ -52,6 +53,8 @@ type ArrowResult<T> = std::result::Result<T, ArrowError>;
 
 #[allow(clippy::all)]
 mod gen {
+    // Since this file is auto-generated, we suppress all warnings
+    #![allow(missing_docs)]
     include!("arrow.flight.protocol.rs");
 }
 
@@ -125,6 +128,7 @@ use flight_descriptor::DescriptorType;
 
 /// SchemaAsIpc represents a pairing of a `Schema` with IpcWriteOptions
 pub struct SchemaAsIpc<'a> {
+    /// Data type representing a schema and its IPC write options
     pub pair: (&'a Schema, &'a IpcWriteOptions),
 }
 
@@ -684,6 +688,7 @@ impl PollInfo {
 }
 
 impl<'a> SchemaAsIpc<'a> {
+    /// Create a new `SchemaAsIpc` from a `Schema` and `IpcWriteOptions`
     pub fn new(schema: &'a Schema, options: &'a IpcWriteOptions) -> Self {
         SchemaAsIpc {
             pair: (schema, options),
diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs
index ef52aa27e..e45e505b2 100644
--- a/arrow-flight/src/sql/client.rs
+++ b/arrow-flight/src/sql/client.rs
@@ -695,9 +695,11 @@ fn flight_error_to_arrow_error(err: FlightError) -> 
ArrowError {
     }
 }
 
-// A polymorphic structure to natively represent different types of data 
contained in `FlightData`
+/// A polymorphic structure to natively represent different types of data 
contained in `FlightData`
 pub enum ArrowFlightData {
+    /// A record batch
     RecordBatch(RecordBatch),
+    /// A schema
     Schema(Schema),
 }
 
diff --git a/arrow-flight/src/sql/metadata/sql_info.rs 
b/arrow-flight/src/sql/metadata/sql_info.rs
index 97304d3c8..2ea30df7f 100644
--- a/arrow-flight/src/sql/metadata/sql_info.rs
+++ b/arrow-flight/src/sql/metadata/sql_info.rs
@@ -331,7 +331,7 @@ impl SqlInfoUnionBuilder {
 ///
 /// Servers constuct - usually static - [`SqlInfoData`] via the 
[`SqlInfoDataBuilder`],
 /// and build responses using [`CommandGetSqlInfo::into_builder`]
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Default)]
 pub struct SqlInfoDataBuilder {
     /// Use BTreeMap to ensure the values are sorted by value as
     /// to make output consistent
@@ -341,17 +341,10 @@ pub struct SqlInfoDataBuilder {
     infos: BTreeMap<u32, SqlInfoValue>,
 }
 
-impl Default for SqlInfoDataBuilder {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 impl SqlInfoDataBuilder {
+    /// Create a new SQL info builder
     pub fn new() -> Self {
-        Self {
-            infos: BTreeMap::new(),
-        }
+        Self::default()
     }
 
     /// register the specific sql metadata item
diff --git a/arrow-flight/src/sql/metadata/xdbc_info.rs 
b/arrow-flight/src/sql/metadata/xdbc_info.rs
index 2e635d303..485bedaeb 100644
--- a/arrow-flight/src/sql/metadata/xdbc_info.rs
+++ b/arrow-flight/src/sql/metadata/xdbc_info.rs
@@ -41,24 +41,43 @@ use crate::sql::{CommandGetXdbcTypeInfo, Nullable, 
Searchable, XdbcDataType, Xdb
 /// Data structure representing type information for xdbc types.
 #[derive(Debug, Clone, Default)]
 pub struct XdbcTypeInfo {
+    /// The name of the type
     pub type_name: String,
+    /// The data type of the type
     pub data_type: XdbcDataType,
+    /// The column size of the type
     pub column_size: Option<i32>,
+    /// The prefix of the type
     pub literal_prefix: Option<String>,
+    /// The suffix of the type
     pub literal_suffix: Option<String>,
+    /// The create parameters of the type
     pub create_params: Option<Vec<String>>,
+    /// The nullability of the type
     pub nullable: Nullable,
+    /// Whether the type is case sensitive
     pub case_sensitive: bool,
+    /// Whether the type is searchable
     pub searchable: Searchable,
+    /// Whether the type is unsigned
     pub unsigned_attribute: Option<bool>,
+    /// Whether the type has fixed precision and scale
     pub fixed_prec_scale: bool,
+    /// Whether the type is auto-incrementing
     pub auto_increment: Option<bool>,
+    /// The local type name of the type
     pub local_type_name: Option<String>,
+    /// The minimum scale of the type
     pub minimum_scale: Option<i32>,
+    /// The maximum scale of the type
     pub maximum_scale: Option<i32>,
+    /// The SQL data type of the type
     pub sql_data_type: XdbcDataType,
+    /// The optional datetime subcode of the type
     pub datetime_subcode: Option<XdbcDatetimeSubcode>,
+    /// The number precision radix of the type
     pub num_prec_radix: Option<i32>,
+    /// The interval precision of the type
     pub interval_precision: Option<i32>,
 }
 
@@ -93,16 +112,6 @@ impl XdbcTypeInfoData {
     }
 }
 
-pub struct XdbcTypeInfoDataBuilder {
-    infos: Vec<XdbcTypeInfo>,
-}
-
-impl Default for XdbcTypeInfoDataBuilder {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 /// A builder for [`XdbcTypeInfoData`] which is used to create 
[`CommandGetXdbcTypeInfo`] responses.
 ///
 /// # Example
@@ -138,6 +147,16 @@ impl Default for XdbcTypeInfoDataBuilder {
 /// // to access the underlying record batch
 /// let batch = info_list.record_batch(None);
 /// ```
+pub struct XdbcTypeInfoDataBuilder {
+    infos: Vec<XdbcTypeInfo>,
+}
+
+impl Default for XdbcTypeInfoDataBuilder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl XdbcTypeInfoDataBuilder {
     /// Create a new instance of [`XdbcTypeInfoDataBuilder`].
     pub fn new() -> Self {
diff --git a/arrow-flight/src/sql/mod.rs b/arrow-flight/src/sql/mod.rs
index 453f608d3..94bb96a4f 100644
--- a/arrow-flight/src/sql/mod.rs
+++ b/arrow-flight/src/sql/mod.rs
@@ -43,9 +43,11 @@ use bytes::Bytes;
 use paste::paste;
 use prost::Message;
 
+#[allow(clippy::all)]
 mod gen {
-    #![allow(clippy::all)]
     #![allow(rustdoc::unportable_markdown)]
+    // Since this file is auto-generated, we suppress all warnings
+    #![allow(missing_docs)]
     include!("arrow.flight.protocol.sql.rs");
 }
 
@@ -163,7 +165,9 @@ macro_rules! prost_message_ext {
                 /// ```
                 #[derive(Clone, Debug, PartialEq)]
                 pub enum Command {
-                    $($name($name),)*
+                    $(
+                        #[doc = concat!(stringify!($name), "variant")]
+                        $name($name),)*
 
                     /// Any message that is not any FlightSQL command.
                     Unknown(Any),
@@ -297,10 +301,12 @@ pub struct Any {
 }
 
 impl Any {
+    /// Checks whether the message is of type `M`
     pub fn is<M: ProstMessageExt>(&self) -> bool {
         M::type_url() == self.type_url
     }
 
+    /// Unpacks the contents of the message if it is of type `M`
     pub fn unpack<M: ProstMessageExt>(&self) -> Result<Option<M>, ArrowError> {
         if !self.is::<M>() {
             return Ok(None);
@@ -310,6 +316,7 @@ impl Any {
         Ok(Some(m))
     }
 
+    /// Packs a message into an [`Any`] message
     pub fn pack<M: ProstMessageExt>(message: &M) -> Result<Any, ArrowError> {
         Ok(message.as_any())
     }
diff --git a/arrow-flight/src/utils.rs b/arrow-flight/src/utils.rs
index 37d7ff9e7..f6129ddfe 100644
--- a/arrow-flight/src/utils.rs
+++ b/arrow-flight/src/utils.rs
@@ -160,9 +160,12 @@ pub fn batches_to_flight_data(
         dictionaries.extend(encoded_dictionaries.into_iter().map(Into::into));
         flight_data.push(encoded_batch.into());
     }
-    let mut stream = vec![schema_flight_data];
+
+    let mut stream = Vec::with_capacity(1 + dictionaries.len() + 
flight_data.len());
+
+    stream.push(schema_flight_data);
     stream.extend(dictionaries);
     stream.extend(flight_data);
-    let flight_data: Vec<_> = stream.into_iter().collect();
+    let flight_data = stream;
     Ok(flight_data)
 }
diff --git a/arrow-integration-test/src/lib.rs 
b/arrow-integration-test/src/lib.rs
index d1486fd5a..ea5b545f2 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -21,6 +21,7 @@
 //!
 //! This is not a canonical format, but provides a human-readable way of 
verifying language implementations
 
+#![warn(missing_docs)]
 use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer};
 use hex::decode;
 use num::BigInt;
@@ -49,8 +50,11 @@ pub use schema::*;
 /// See 
<https://github.com/apache/arrow/blob/master/docs/source/format/Integration.rst#json-test-data-format>
 #[derive(Deserialize, Serialize, Debug)]
 pub struct ArrowJson {
+    /// The Arrow schema for JSON file
     pub schema: ArrowJsonSchema,
+    /// The `RecordBatch`es in the JSON file
     pub batches: Vec<ArrowJsonBatch>,
+    /// The dictionaries in the JSON file
     #[serde(skip_serializing_if = "Option::is_none")]
     pub dictionaries: Option<Vec<ArrowJsonDictionaryBatch>>,
 }
@@ -60,7 +64,9 @@ pub struct ArrowJson {
 /// Fields are left as JSON `Value` as they vary by `DataType`
 #[derive(Deserialize, Serialize, Debug)]
 pub struct ArrowJsonSchema {
+    /// An array of JSON fields
     pub fields: Vec<ArrowJsonField>,
+    /// An array of metadata key-value pairs
     #[serde(skip_serializing_if = "Option::is_none")]
     pub metadata: Option<Vec<HashMap<String, String>>>,
 }
@@ -68,13 +74,20 @@ pub struct ArrowJsonSchema {
 /// Fields are left as JSON `Value` as they vary by `DataType`
 #[derive(Deserialize, Serialize, Debug)]
 pub struct ArrowJsonField {
+    /// The name of the field
     pub name: String,
+    /// The data type of the field,
+    /// can be any valid JSON value
     #[serde(rename = "type")]
     pub field_type: Value,
+    /// Whether the field is nullable
     pub nullable: bool,
+    /// The children fields
     pub children: Vec<ArrowJsonField>,
+    /// The dictionary for the field
     #[serde(skip_serializing_if = "Option::is_none")]
     pub dictionary: Option<ArrowJsonFieldDictionary>,
+    /// The metadata for the field, if any
     #[serde(skip_serializing_if = "Option::is_none")]
     pub metadata: Option<Value>,
 }
@@ -115,20 +128,28 @@ impl From<&Field> for ArrowJsonField {
     }
 }
 
+/// Represents a dictionary-encoded field in the Arrow JSON format
 #[derive(Deserialize, Serialize, Debug)]
 pub struct ArrowJsonFieldDictionary {
+    /// A unique identifier for the dictionary
     pub id: i64,
+    /// The type of the dictionary index
     #[serde(rename = "indexType")]
     pub index_type: DictionaryIndexType,
+    /// Whether the dictionary is ordered
     #[serde(rename = "isOrdered")]
     pub is_ordered: bool,
 }
 
+/// Type of an index for a dictionary-encoded field in the Arrow JSON format
 #[derive(Deserialize, Serialize, Debug)]
 pub struct DictionaryIndexType {
+    /// The name of the dictionary index type
     pub name: String,
+    /// Whether the dictionary index type is signed
     #[serde(rename = "isSigned")]
     pub is_signed: bool,
+    /// The bit width of the dictionary index type
     #[serde(rename = "bitWidth")]
     pub bit_width: i64,
 }
@@ -137,6 +158,7 @@ pub struct DictionaryIndexType {
 #[derive(Deserialize, Serialize, Debug, Clone)]
 pub struct ArrowJsonBatch {
     count: usize,
+    /// The columns in the record batch
     pub columns: Vec<ArrowJsonColumn>,
 }
 
@@ -144,7 +166,9 @@ pub struct ArrowJsonBatch {
 #[derive(Deserialize, Serialize, Debug, Clone)]
 #[allow(non_snake_case)]
 pub struct ArrowJsonDictionaryBatch {
+    /// The unique identifier for the dictionary
     pub id: i64,
+    /// The data for the dictionary
     pub data: ArrowJsonBatch,
 }
 
@@ -152,15 +176,21 @@ pub struct ArrowJsonDictionaryBatch {
 #[derive(Deserialize, Serialize, Clone, Debug)]
 pub struct ArrowJsonColumn {
     name: String,
+    /// The number of elements in the column
     pub count: usize,
+    /// The validity bitmap to determine null values
     #[serde(rename = "VALIDITY")]
     pub validity: Option<Vec<u8>>,
+    /// The data values in the column
     #[serde(rename = "DATA")]
     pub data: Option<Vec<Value>>,
+    /// The offsets for variable-sized data types
     #[serde(rename = "OFFSET")]
     pub offset: Option<Vec<Value>>, // leaving as Value as 64-bit offsets are 
strings
+    /// The type id for union types
     #[serde(rename = "TYPE_ID")]
     pub type_id: Option<Vec<i8>>,
+    /// The children columns for nested types
     pub children: Option<Vec<ArrowJsonColumn>>,
 }
 
@@ -189,6 +219,7 @@ impl ArrowJson {
         Ok(true)
     }
 
+    /// Convert the stored dictionaries to `Vec[RecordBatch]`
     pub fn get_record_batches(&self) -> Result<Vec<RecordBatch>> {
         let schema = self.schema.to_arrow_schema()?;
 
@@ -275,6 +306,7 @@ impl ArrowJsonField {
     }
 }
 
+/// Generates a [`RecordBatch`] from an Arrow JSON batch, given a schema
 pub fn record_batch_from_json(
     schema: &Schema,
     json_batch: ArrowJsonBatch,
@@ -877,6 +909,7 @@ pub fn array_from_json(
     }
 }
 
+/// Construct a [`DictionaryArray`] from a partially typed JSON column
 pub fn dictionary_array_from_json(
     field: &Field,
     json_col: ArrowJsonColumn,
@@ -965,6 +998,7 @@ fn create_null_buf(json_col: &ArrowJsonColumn) -> Buffer {
 }
 
 impl ArrowJsonBatch {
+    /// Convert a [`RecordBatch`] to an [`ArrowJsonBatch`]
     pub fn from_batch(batch: &RecordBatch) -> ArrowJsonBatch {
         let mut json_batch = ArrowJsonBatch {
             count: batch.num_rows(),
diff --git 
a/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs 
b/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
index 376e31e15..34c3c7706 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Scenario for testing basic auth.
+
 use crate::{AUTH_PASSWORD, AUTH_USERNAME};
 
 use arrow_flight::{flight_service_client::FlightServiceClient, BasicAuth, 
HandshakeRequest};
@@ -27,6 +29,7 @@ type Result<T = (), E = Error> = std::result::Result<T, E>;
 
 type Client = FlightServiceClient<tonic::transport::Channel>;
 
+/// Run a scenario that tests basic auth.
 pub async fn run_scenario(host: &str, port: u16) -> Result {
     let url = format!("http://{host}:{port}";);
     let mut client = FlightServiceClient::connect(url).await?;
diff --git 
a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs 
b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
index 1a6c4e28a..c8289ff44 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Integration tests for the Flight client.
+
 use crate::open_json_file;
 use std::collections::HashMap;
 
@@ -40,6 +42,7 @@ type Result<T = (), E = Error> = std::result::Result<T, E>;
 
 type Client = FlightServiceClient<tonic::transport::Channel>;
 
+/// Run a scenario that uploads data to a Flight server and then downloads it 
back
 pub async fn run_scenario(host: &str, port: u16, path: &str) -> Result {
     let url = format!("http://{host}:{port}";);
 
diff --git 
a/arrow-integration-testing/src/flight_client_scenarios/middleware.rs 
b/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
index 3b71edf44..b826ad456 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Scenario for testing middleware.
+
 use arrow_flight::{
     flight_descriptor::DescriptorType, 
flight_service_client::FlightServiceClient, FlightDescriptor,
 };
@@ -24,6 +26,7 @@ use tonic::{Request, Status};
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 type Result<T = (), E = Error> = std::result::Result<T, E>;
 
+/// Run a scenario that tests middleware.
 pub async fn run_scenario(host: &str, port: u16) -> Result {
     let url = format!("http://{host}:{port}";);
     let conn = tonic::transport::Endpoint::new(url)?.connect().await?;
diff --git a/arrow-integration-testing/src/flight_client_scenarios.rs 
b/arrow-integration-testing/src/flight_client_scenarios/mod.rs
similarity index 93%
rename from arrow-integration-testing/src/flight_client_scenarios.rs
rename to arrow-integration-testing/src/flight_client_scenarios/mod.rs
index 66cced5f4..c57944337 100644
--- a/arrow-integration-testing/src/flight_client_scenarios.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/mod.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Collection of utilities for testing the Flight client.
+
 pub mod auth_basic_proto;
 pub mod integration_test;
 pub mod middleware;
diff --git 
a/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs 
b/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
index 20d868953..5462e5bd6 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Basic auth test for the Flight server.
+
 use std::pin::Pin;
 use std::sync::Arc;
 
@@ -35,6 +37,7 @@ use prost::Message;
 
 use crate::{AUTH_PASSWORD, AUTH_USERNAME};
 
+/// Run a scenario that tests basic auth.
 pub async fn scenario_setup(port: u16) -> Result {
     let service = AuthBasicProtoScenarioImpl {
         username: AUTH_USERNAME.into(),
@@ -52,6 +55,7 @@ pub async fn scenario_setup(port: u16) -> Result {
     Ok(())
 }
 
+/// Scenario for testing basic auth.
 #[derive(Clone)]
 pub struct AuthBasicProtoScenarioImpl {
     username: Arc<str>,
diff --git 
a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs 
b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
index 76eb9d880..0c58fae93 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Integration tests for the Flight server.
+
+use core::str;
 use std::collections::HashMap;
 use std::pin::Pin;
 use std::sync::Arc;
@@ -42,6 +45,7 @@ type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + 
Sync + 'static>>;
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 type Result<T = (), E = Error> = std::result::Result<T, E>;
 
+/// Run a scenario that tests integration testing.
 pub async fn scenario_setup(port: u16) -> Result {
     let addr = super::listen_on(port).await?;
 
@@ -65,6 +69,7 @@ struct IntegrationDataset {
     chunks: Vec<RecordBatch>,
 }
 
+/// Flight service implementation for integration testing
 #[derive(Clone, Default)]
 pub struct FlightServiceImpl {
     server_location: String,
@@ -100,13 +105,13 @@ impl FlightService for FlightServiceImpl {
     ) -> Result<Response<Self::DoGetStream>, Status> {
         let ticket = request.into_inner();
 
-        let key = String::from_utf8(ticket.ticket.to_vec())
+        let key = str::from_utf8(&ticket.ticket)
             .map_err(|e| Status::invalid_argument(format!("Invalid ticket: 
{e:?}")))?;
 
         let uploaded_chunks = self.uploaded_chunks.lock().await;
 
         let flight = uploaded_chunks
-            .get(&key)
+            .get(key)
             .ok_or_else(|| Status::not_found(format!("Could not find flight. 
{key}")))?;
 
         let options = arrow::ipc::writer::IpcWriteOptions::default();
diff --git 
a/arrow-integration-testing/src/flight_server_scenarios/middleware.rs 
b/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
index e8d9c521b..6685d45df 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Middleware test for the Flight server.
+
 use std::pin::Pin;
 
 use arrow_flight::{
@@ -31,6 +33,7 @@ type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + 
Sync + 'static>>;
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 type Result<T = (), E = Error> = std::result::Result<T, E>;
 
+/// Run a scenario that tests middleware.
 pub async fn scenario_setup(port: u16) -> Result {
     let service = MiddlewareScenarioImpl {};
     let svc = FlightServiceServer::new(service);
@@ -44,6 +47,7 @@ pub async fn scenario_setup(port: u16) -> Result {
     Ok(())
 }
 
+/// Middleware interceptor for testing
 #[derive(Clone, Default)]
 pub struct MiddlewareScenarioImpl {}
 
diff --git a/arrow-integration-testing/src/flight_server_scenarios.rs 
b/arrow-integration-testing/src/flight_server_scenarios/mod.rs
similarity index 91%
rename from arrow-integration-testing/src/flight_server_scenarios.rs
rename to arrow-integration-testing/src/flight_server_scenarios/mod.rs
index 48d4e6045..3833e1c63 100644
--- a/arrow-integration-testing/src/flight_server_scenarios.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/mod.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Collection of utilities for testing the Flight server.
 use std::net::SocketAddr;
 
 use arrow_flight::{FlightEndpoint, Location, Ticket};
@@ -27,6 +28,7 @@ pub mod middleware;
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 type Result<T = (), E = Error> = std::result::Result<T, E>;
 
+/// Listen on a port and return the address
 pub async fn listen_on(port: u16) -> Result<SocketAddr> {
     let addr: SocketAddr = format!("0.0.0.0:{port}").parse()?;
 
@@ -36,6 +38,7 @@ pub async fn listen_on(port: u16) -> Result<SocketAddr> {
     Ok(addr)
 }
 
+/// Create a FlightEndpoint with a ticket and location
 pub fn endpoint(ticket: &str, location_uri: impl Into<String>) -> 
FlightEndpoint {
     FlightEndpoint {
         ticket: Some(Ticket {
diff --git a/arrow-integration-testing/src/lib.rs 
b/arrow-integration-testing/src/lib.rs
index 4ce7b06a1..ba8e3876c 100644
--- a/arrow-integration-testing/src/lib.rs
+++ b/arrow-integration-testing/src/lib.rs
@@ -17,6 +17,7 @@
 
 //! Common code used in the integration test binaries
 
+#![warn(missing_docs)]
 use serde_json::Value;
 
 use arrow::array::{Array, StructArray};
@@ -42,7 +43,9 @@ pub const AUTH_PASSWORD: &str = "flight";
 pub mod flight_client_scenarios;
 pub mod flight_server_scenarios;
 
+/// An Arrow file in JSON format
 pub struct ArrowFile {
+    /// The schema of the file
     pub schema: Schema,
     // we can evolve this into a concrete Arrow type
     // this is temporarily not being read from
@@ -51,12 +54,14 @@ pub struct ArrowFile {
 }
 
 impl ArrowFile {
+    /// Read a single [RecordBatch] from the file
     pub fn read_batch(&self, batch_num: usize) -> Result<RecordBatch> {
         let b = self.arrow_json["batches"].get(batch_num).unwrap();
         let json_batch: ArrowJsonBatch = 
serde_json::from_value(b.clone()).unwrap();
         record_batch_from_json(&self.schema, json_batch, 
Some(&self.dictionaries))
     }
 
+    /// Read all [RecordBatch]es from the file
     pub fn read_batches(&self) -> Result<Vec<RecordBatch>> {
         self.arrow_json["batches"]
             .as_array()
@@ -70,7 +75,7 @@ impl ArrowFile {
     }
 }
 
-// Canonicalize the names of map fields in a schema
+/// Canonicalize the names of map fields in a schema
 pub fn canonicalize_schema(schema: &Schema) -> Schema {
     let fields = schema
         .fields()
@@ -107,6 +112,7 @@ pub fn canonicalize_schema(schema: &Schema) -> Schema {
     Schema::new(fields).with_metadata(schema.metadata().clone())
 }
 
+/// Read an Arrow file in JSON format
 pub fn open_json_file(json_name: &str) -> Result<ArrowFile> {
     let json_file = File::open(json_name)?;
     let reader = BufReader::new(json_file);
@@ -157,10 +163,7 @@ pub fn read_gzip_json(version: &str, path: &str) -> 
ArrowJson {
     arrow_json
 }
 
-//
-// C Data Integration entrypoints
-//
-
+/// C Data Integration entrypoint to export the schema from a JSON file
 fn cdata_integration_export_schema_from_json(
     c_json_name: *const i8,
     out: *mut FFI_ArrowSchema,
@@ -173,6 +176,7 @@ fn cdata_integration_export_schema_from_json(
     Ok(())
 }
 
+/// C Data Integration entrypoint to export a batch from a JSON file
 fn cdata_integration_export_batch_from_json(
     c_json_name: *const i8,
     batch_num: c_int,
@@ -263,6 +267,7 @@ pub unsafe extern "C" fn arrow_rs_free_error(c_error: *mut 
i8) {
     }
 }
 
+/// A C-ABI for exporting an Arrow schema from a JSON file
 #[no_mangle]
 pub extern "C" fn arrow_rs_cdata_integration_export_schema_from_json(
     c_json_name: *const i8,
@@ -272,6 +277,7 @@ pub extern "C" fn 
arrow_rs_cdata_integration_export_schema_from_json(
     result_to_c_error(&r)
 }
 
+/// A C-ABI to compare an Arrow schema against a JSON file
 #[no_mangle]
 pub extern "C" fn arrow_rs_cdata_integration_import_schema_and_compare_to_json(
     c_json_name: *const i8,
@@ -281,6 +287,7 @@ pub extern "C" fn 
arrow_rs_cdata_integration_import_schema_and_compare_to_json(
     result_to_c_error(&r)
 }
 
+/// A C-ABI for exporting a RecordBatch from a JSON file
 #[no_mangle]
 pub extern "C" fn arrow_rs_cdata_integration_export_batch_from_json(
     c_json_name: *const i8,
@@ -291,6 +298,7 @@ pub extern "C" fn 
arrow_rs_cdata_integration_export_batch_from_json(
     result_to_c_error(&r)
 }
 
+/// A C-ABI to compare a RecordBatch against a JSON file
 #[no_mangle]
 pub extern "C" fn arrow_rs_cdata_integration_import_batch_and_compare_to_json(
     c_json_name: *const i8,
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index 52c6a0d61..eef236529 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -133,6 +133,7 @@ pub fn schema_to_fb(schema: &Schema) -> 
FlatBufferBuilder<'_> {
     IpcSchemaEncoder::new().schema_to_fb(schema)
 }
 
+/// Push a key-value metadata into a FlatBufferBuilder and return [WIPOffset]
 pub fn metadata_to_fb<'a>(
     fbb: &mut FlatBufferBuilder<'a>,
     metadata: &HashMap<String, String>,
@@ -152,7 +153,7 @@ pub fn metadata_to_fb<'a>(
     fbb.create_vector(&custom_metadata)
 }
 
-#[deprecated(since = "54.0.0", note = "Use `IpcSchemaConverter`.")]
+/// Adds a [Schema] to a flatbuffer and returns the offset
 pub fn schema_to_fb_offset<'a>(
     fbb: &mut FlatBufferBuilder<'a>,
     schema: &Schema,
diff --git a/arrow-ipc/src/lib.rs b/arrow-ipc/src/lib.rs
index 4f35ffb60..dde137153 100644
--- a/arrow-ipc/src/lib.rs
+++ b/arrow-ipc/src/lib.rs
@@ -19,6 +19,7 @@
 //!
 //! [Arrow IPC Format]: 
https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc
 
+#![warn(missing_docs)]
 pub mod convert;
 pub mod reader;
 pub mod writer;
@@ -31,6 +32,7 @@ mod compression;
 #[allow(clippy::redundant_static_lifetimes)]
 #[allow(clippy::redundant_field_names)]
 #[allow(non_camel_case_types)]
+#[allow(missing_docs)] // Because this is autogenerated
 pub mod gen;
 
 pub use self::gen::File::*;
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index b5cf20ef3..f9256b4e8 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -60,7 +60,7 @@ pub struct IpcWriteOptions {
     /// Compression, if desired. Will result in a runtime error
     /// if the corresponding feature is not enabled
     batch_compression_type: Option<crate::CompressionType>,
-    /// Flag indicating whether the writer should preserver the dictionary IDs 
defined in the
+    /// Flag indicating whether the writer should preserve the dictionary IDs 
defined in the
     /// schema or generate unique dictionary IDs internally during encoding.
     ///
     /// Defaults to `true`
@@ -135,6 +135,8 @@ impl IpcWriteOptions {
         }
     }
 
+    /// Return whether the writer is configured to preserve the dictionary IDs
+    /// defined in the schema
     pub fn preserve_dict_id(&self) -> bool {
         self.preserve_dict_id
     }
@@ -200,6 +202,11 @@ impl Default for IpcWriteOptions {
 pub struct IpcDataGenerator {}
 
 impl IpcDataGenerator {
+    /// Converts a schema to an IPC message along with `dictionary_tracker`
+    /// and returns it encoded inside [EncodedData] as a flatbuffer
+    ///
+    /// Preferred method over [IpcDataGenerator::schema_to_bytes] since it's
+    /// deprecated since Arrow v54.0.0
     pub fn schema_to_bytes_with_dictionary_tracker(
         &self,
         schema: &Schema,
@@ -234,6 +241,7 @@ impl IpcDataGenerator {
         since = "54.0.0",
         note = "Use `schema_to_bytes_with_dictionary_tracker` instead. This 
function signature of `schema_to_bytes_with_dictionary_tracker` in the next 
release."
     )]
+    /// Converts a schema to an IPC message and returns it encoded inside 
[EncodedData] as a flatbuffer
     pub fn schema_to_bytes(&self, schema: &Schema, write_options: 
&IpcWriteOptions) -> EncodedData {
         let mut fbb = FlatBufferBuilder::new();
         let schema = {
@@ -951,6 +959,7 @@ impl<W: Write> FileWriter<W> {
         })
     }
 
+    /// Adds a key-value pair to the [FileWriter]'s custom metadata
     pub fn write_metadata(&mut self, key: impl Into<String>, value: impl 
Into<String>) {
         self.custom_metadata.insert(key.into(), value.into());
     }
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index 86d2e88d9..d973206cc 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -397,6 +397,7 @@ where
 
 #[cfg(test)]
 mod tests {
+    use core::str;
     use std::fs::{read_to_string, File};
     use std::io::{BufReader, Seek};
     use std::sync::Arc;
@@ -1111,7 +1112,7 @@ mod tests {
             }
         }
 
-        let result = String::from_utf8(buf).unwrap();
+        let result = str::from_utf8(&buf).unwrap();
         let expected = read_to_string(test_file).unwrap();
         for (r, e) in result.lines().zip(expected.lines()) {
             let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
@@ -1150,7 +1151,7 @@ mod tests {
     fn json_writer_empty() {
         let mut writer = ArrayWriter::new(vec![] as Vec<u8>);
         writer.finish().unwrap();
-        assert_eq!(String::from_utf8(writer.into_inner()).unwrap(), "");
+        assert_eq!(str::from_utf8(&writer.into_inner()).unwrap(), "");
     }
 
     #[test]
@@ -1279,7 +1280,7 @@ mod tests {
             writer.write(&batch).unwrap();
         }
 
-        let result = String::from_utf8(buf).unwrap();
+        let result = str::from_utf8(&buf).unwrap();
         let expected = read_to_string(test_file).unwrap();
         for (r, e) in result.lines().zip(expected.lines()) {
             let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
@@ -1321,7 +1322,7 @@ mod tests {
             writer.write_batches(&batches).unwrap();
         }
 
-        let result = String::from_utf8(buf).unwrap();
+        let result = str::from_utf8(&buf).unwrap();
         let expected = read_to_string(test_file).unwrap();
         // result is eq to 2 same batches
         let expected = format!("{expected}\n{expected}");
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index fc4852a3d..b532ea861 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -610,14 +610,14 @@ mod test {
     #[test]
     fn test_new_with_string() {
         // Fields should allow owned Strings to support reuse
-        let s = String::from("c1");
+        let s = "c1";
         Field::new(s, DataType::Int64, false);
     }
 
     #[test]
     fn test_new_dict_with_string() {
         // Fields should allow owned Strings to support reuse
-        let s = String::from("c1");
+        let s = "c1";
         Field::new_dict(s, DataType::Int64, false, 4, false);
     }
 
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index 0fd89cc2b..8f86cbeab 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -179,7 +179,7 @@ fn test_can_cast_types() {
 
 /// Create instances of arrays with varying types for cast tests
 fn get_arrays_of_all_types() -> Vec<ArrayRef> {
-    let tz_name = String::from("+08:00");
+    let tz_name = "+08:00";
     let binary_data: Vec<&[u8]> = vec![b"foo", b"bar"];
     vec![
         Arc::new(BinaryArray::from(binary_data.clone())),
@@ -238,9 +238,9 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
         Arc::new(TimestampMillisecondArray::from(vec![1000, 2000])),
         Arc::new(TimestampMicrosecondArray::from(vec![1000, 2000])),
         Arc::new(TimestampNanosecondArray::from(vec![1000, 2000])),
-        Arc::new(TimestampSecondArray::from(vec![1000, 
2000]).with_timezone(tz_name.clone())),
-        Arc::new(TimestampMillisecondArray::from(vec![1000, 
2000]).with_timezone(tz_name.clone())),
-        Arc::new(TimestampMicrosecondArray::from(vec![1000, 
2000]).with_timezone(tz_name.clone())),
+        Arc::new(TimestampSecondArray::from(vec![1000, 
2000]).with_timezone(tz_name)),
+        Arc::new(TimestampMillisecondArray::from(vec![1000, 
2000]).with_timezone(tz_name)),
+        Arc::new(TimestampMicrosecondArray::from(vec![1000, 
2000]).with_timezone(tz_name)),
         Arc::new(TimestampNanosecondArray::from(vec![1000, 
2000]).with_timezone(tz_name)),
         Arc::new(Date32Array::from(vec![1000, 2000])),
         Arc::new(Date64Array::from(vec![1000, 2000])),
diff --git a/object_store/src/aws/builder.rs b/object_store/src/aws/builder.rs
index 75acb73e5..c52c3f8df 100644
--- a/object_store/src/aws/builder.rs
+++ b/object_store/src/aws/builder.rs
@@ -44,7 +44,6 @@ static DEFAULT_METADATA_ENDPOINT: &str = 
"http://169.254.169.254";;
 
 /// A specialized `Error` for object store-related errors
 #[derive(Debug, Snafu)]
-#[allow(missing_docs)]
 enum Error {
     #[snafu(display("Missing bucket name"))]
     MissingBucketName,
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 6fe4889db..7034a372e 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -65,7 +65,6 @@ const USER_DEFINED_METADATA_HEADER_PREFIX: &str = 
"x-amz-meta-";
 
 /// A specialized `Error` for object store-related errors
 #[derive(Debug, Snafu)]
-#[allow(missing_docs)]
 pub(crate) enum Error {
     #[snafu(display("Error performing DeleteObjects request: {}", source))]
     DeleteObjectsRequest { source: crate::client::retry::Error },
diff --git a/object_store/src/aws/resolve.rs b/object_store/src/aws/resolve.rs
index 12c9f26d2..4c7489316 100644
--- a/object_store/src/aws/resolve.rs
+++ b/object_store/src/aws/resolve.rs
@@ -21,7 +21,6 @@ use snafu::{ensure, OptionExt, ResultExt, Snafu};
 
 /// A specialized `Error` for object store-related errors
 #[derive(Debug, Snafu)]
-#[allow(missing_docs)]
 enum Error {
     #[snafu(display("Bucket '{}' not found", bucket))]
     BucketNotFound { bucket: String },
diff --git a/object_store/src/azure/builder.rs 
b/object_store/src/azure/builder.rs
index 35cedeafc..1c4589ba1 100644
--- a/object_store/src/azure/builder.rs
+++ b/object_store/src/azure/builder.rs
@@ -46,7 +46,6 @@ const MSI_ENDPOINT_ENV_KEY: &str = "IDENTITY_ENDPOINT";
 
 /// A specialized `Error` for Azure builder-related errors
 #[derive(Debug, Snafu)]
-#[allow(missing_docs)]
 enum Error {
     #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, 
source))]
     UnableToParseUrl {
diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index 049905155..06d3fb5c8 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -60,7 +60,6 @@ static TAGS_HEADER: HeaderName = 
HeaderName::from_static("x-ms-tags");
 
 /// A specialized `Error` for object store-related errors
 #[derive(Debug, Snafu)]
-#[allow(missing_docs)]
 pub(crate) enum Error {
     #[snafu(display("Error performing get request {}: {}", path, source))]
     GetRequest {
diff --git a/object_store/src/client/get.rs b/object_store/src/client/get.rs
index 0fef5785c..ae6a8d9de 100644
--- a/object_store/src/client/get.rs
+++ b/object_store/src/client/get.rs
@@ -96,7 +96,6 @@ impl ContentRange {
 
 /// A specialized `Error` for get-related errors
 #[derive(Debug, Snafu)]
-#[allow(missing_docs)]
 enum GetResultError {
     #[snafu(context(false))]
     Header {
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 8820983b2..a0d83eb0b 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -1224,78 +1224,116 @@ pub type Result<T, E = Error> = std::result::Result<T, 
E>;
 
 /// A specialized `Error` for object store-related errors
 #[derive(Debug, Snafu)]
-#[allow(missing_docs)]
 #[non_exhaustive]
 pub enum Error {
+    /// A fallback error type when no variant matches
     #[snafu(display("Generic {} error: {}", store, source))]
     Generic {
+        /// The store this error originated from
         store: &'static str,
+        /// The wrapped error
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
+    /// Error when the object is not found at given location
     #[snafu(display("Object at location {} not found: {}", path, source))]
     NotFound {
+        /// The path to file
         path: String,
+        /// The wrapped error
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
+    /// Error for invalid path
     #[snafu(
         display("Encountered object with invalid path: {}", source),
         context(false)
     )]
-    InvalidPath { source: path::Error },
+    InvalidPath {
+        /// The wrapped error
+        source: path::Error,
+    },
 
+    /// Error when `tokio::spawn` failed
     #[snafu(display("Error joining spawned task: {}", source), context(false))]
-    JoinError { source: tokio::task::JoinError },
+    JoinError {
+        /// The wrapped error
+        source: tokio::task::JoinError,
+    },
 
+    /// Error when the attempted operation is not supported
     #[snafu(display("Operation not supported: {}", source))]
     NotSupported {
+        /// The wrapped error
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
+    /// Error when the object already exists
     #[snafu(display("Object at location {} already exists: {}", path, source))]
     AlreadyExists {
+        /// The path to the
         path: String,
+        /// The wrapped error
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
+    /// Error when the required conditions failed for the operation
     #[snafu(display("Request precondition failure for path {}: {}", path, 
source))]
     Precondition {
+        /// The path to the file
         path: String,
+        /// The wrapped error
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
+    /// Error when the object at the location isn't modified
     #[snafu(display("Object at location {} not modified: {}", path, source))]
     NotModified {
+        /// The path to the file
         path: String,
+        /// The wrapped error
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
+    /// Error when an operation is not implemented
     #[snafu(display("Operation not yet implemented."))]
     NotImplemented,
 
+    /// Error when the used credentials don't have enough permission
+    /// to perform the requested operation
     #[snafu(display(
         "The operation lacked the necessary privileges to complete for path 
{}: {}",
         path,
         source
     ))]
     PermissionDenied {
+        /// The path to the file
         path: String,
+        /// The wrapped error
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
+    /// Error when the used credentials lack valid authentication
     #[snafu(display(
         "The operation lacked valid authentication credentials for path {}: 
{}",
         path,
         source
     ))]
     Unauthenticated {
+        /// The path to the file
         path: String,
+        /// The wrapped error
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
+    /// Error when a configuration key is invalid for the store used
     #[snafu(display("Configuration key: '{}' is not valid for store '{}'.", 
key, store))]
-    UnknownConfigurationKey { store: &'static str, key: String },
+    UnknownConfigurationKey {
+        /// The object store used
+        store: &'static str,
+        /// The configuration key used
+        key: String,
+    },
 }
 
 impl From<Error> for std::io::Error {
diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index db4b4b050..ac10f332d 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -44,7 +44,6 @@ use crate::{
 
 /// A specialized `Error` for filesystem object store-related errors
 #[derive(Debug, Snafu)]
-#[allow(missing_docs)]
 pub(crate) enum Error {
     #[snafu(display("File size for {} did not fit in a usize: {}", path, 
source))]
     FileSizeOverflowedUsize {
diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs
index 0d72983b0..b458bdddf 100644
--- a/object_store/src/memory.rs
+++ b/object_store/src/memory.rs
@@ -38,7 +38,6 @@ use crate::{GetOptions, PutPayload};
 
 /// A specialized `Error` for in-memory object store-related errors
 #[derive(Debug, Snafu)]
-#[allow(missing_docs)]
 enum Error {
     #[snafu(display("No data in memory found. Location: {path}"))]
     NoDataInMemory { path: String },
diff --git a/object_store/src/path/mod.rs b/object_store/src/path/mod.rs
index 59e08e2ea..4c9bb5f05 100644
--- a/object_store/src/path/mod.rs
+++ b/object_store/src/path/mod.rs
@@ -36,32 +36,57 @@ pub use parts::{InvalidPart, PathPart};
 
 /// Error returned by [`Path::parse`]
 #[derive(Debug, Snafu)]
-#[allow(missing_docs)]
 #[non_exhaustive]
 pub enum Error {
+    /// Error when there's an empty segment between two slashes `/` in the path
     #[snafu(display("Path \"{}\" contained empty path segment", path))]
-    EmptySegment { path: String },
+    EmptySegment {
+        /// The source path
+        path: String,
+    },
 
+    /// Error when an invalid segment is encountered in the given path
     #[snafu(display("Error parsing Path \"{}\": {}", path, source))]
-    BadSegment { path: String, source: InvalidPart },
+    BadSegment {
+        /// The source path
+        path: String,
+        /// The part containing the error
+        source: InvalidPart,
+    },
 
+    /// Error when path cannot be canonicalized
     #[snafu(display("Failed to canonicalize path \"{}\": {}", path.display(), 
source))]
     Canonicalize {
+        /// The source path
         path: std::path::PathBuf,
+        /// The underlying error
         source: std::io::Error,
     },
 
+    /// Error when the path is not a valid URL
     #[snafu(display("Unable to convert path \"{}\" to URL", path.display()))]
-    InvalidPath { path: std::path::PathBuf },
+    InvalidPath {
+        /// The source path
+        path: std::path::PathBuf,
+    },
 
+    /// Error when a path contains non-unicode characters
     #[snafu(display("Path \"{}\" contained non-unicode characters: {}", path, 
source))]
     NonUnicode {
+        /// The source path
         path: String,
+        /// The underlying `UTF8Error`
         source: std::str::Utf8Error,
     },
 
+    /// Error when the a path doesn't start with given prefix
     #[snafu(display("Path {} does not start with prefix {}", path, prefix))]
-    PrefixMismatch { path: String, prefix: String },
+    PrefixMismatch {
+        /// The source path
+        path: String,
+        /// The mismatched prefix
+        prefix: String,
+    },
 }
 
 /// A parsed path representation that can be safely written to object storage
diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs
index edf675f13..ccc060250 100644
--- a/parquet/src/compression.rs
+++ b/parquet/src/compression.rs
@@ -298,6 +298,35 @@ mod gzip_codec {
 pub use gzip_codec::*;
 
 /// Represents a valid gzip compression level.
+///
+/// Defaults to 6.
+///
+/// * 0: least compression
+/// * 9: most compression (that other software can read)
+/// * 10: most compression (incompatible with other software, see below)
+/// #### WARNING:
+/// Level 10 compression can offer smallest file size,
+/// but Parquet files created with it will not be readable
+/// by other "standard" paquet readers.
+///
+/// Do **NOT** use level 10 if you need other software to
+/// be able to read the files. Read below for details.
+///
+/// ### IMPORTANT:
+/// There's often confusion about the compression levels in `flate2` vs `arrow`
+/// as highlighted in issue 
[#1011](https://github.com/apache/arrow-rs/issues/6282).
+///
+/// `flate2` supports two compression backends: `miniz_oxide` and `zlib`.
+///
+/// - `zlib` supports levels from 0 to 9.
+/// - `miniz_oxide` supports levels from 0 to 10.
+///
+/// `arrow` uses `flate` with `rust_backend` feature,
+/// which provides `miniz_oxide` as the backend.
+/// Therefore 0-10 levels are supported.
+///
+/// `flate2` documents this behavior properly with
+/// [this commit](https://github.com/rust-lang/flate2-rs/pull/430).
 #[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
 pub struct GzipLevel(u32);

(arrow-rs) branch master updated: chore: add docs, part of #37 (#6453)

Reply via email to