This is an automated email from the ASF dual-hosted git repository.
mbrobbel pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 521f219e30 Migrate `arrow-avro` to Rust 2024 (#8545)
521f219e30 is described below
commit 521f219e308613811aeae11300bf7a7b0fb5ec29
Author: Matthijs Brobbel <[email protected]>
AuthorDate: Wed Oct 8 10:16:32 2025 +0200
Migrate `arrow-avro` to Rust 2024 (#8545)
# Which issue does this PR close?
- Contribute to #6827
# Rationale for this change
Splitting up #8227.
# What changes are included in this PR?
Migrate `arrow-avro` to Rust 2024
# Are these changes tested?
CI
# Are there any user-facing changes?
Yes
---
arrow-avro/Cargo.toml | 4 +-
arrow-avro/benches/avro_writer.rs | 12 +--
arrow-avro/benches/decoder.rs | 6 +-
arrow-avro/examples/decode_kafka_stream.rs | 2 +-
arrow-avro/src/codec.rs | 26 +++---
arrow-avro/src/reader/header.rs | 4 +-
arrow-avro/src/reader/mod.rs | 22 ++---
arrow-avro/src/reader/record.rs | 10 +--
arrow-avro/src/schema.rs | 14 +--
arrow-avro/src/writer/encoder.rs | 133 +++++++++++++++--------------
arrow-avro/src/writer/format.rs | 2 +-
arrow-avro/src/writer/mod.rs | 2 +-
12 files changed, 120 insertions(+), 117 deletions(-)
diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml
index 0264b74972..975bcbe5e8 100644
--- a/arrow-avro/Cargo.toml
+++ b/arrow-avro/Cargo.toml
@@ -25,7 +25,7 @@ authors = { workspace = true }
license = { workspace = true }
keywords = { workspace = true }
include = { workspace = true }
-edition = { workspace = true }
+edition = "2024"
rust-version = { workspace = true }
[lib]
@@ -93,4 +93,4 @@ harness = false
[[bench]]
name = "avro_writer"
-harness = false
\ No newline at end of file
+harness = false
diff --git a/arrow-avro/benches/avro_writer.rs
b/arrow-avro/benches/avro_writer.rs
index 085ec66c1f..57ab486638 100644
--- a/arrow-avro/benches/avro_writer.rs
+++ b/arrow-avro/benches/avro_writer.rs
@@ -22,21 +22,21 @@ extern crate criterion;
extern crate once_cell;
use arrow_array::{
+ ArrayRef, BinaryArray, BooleanArray, Decimal32Array, Decimal64Array,
Decimal128Array,
+ Decimal256Array, FixedSizeBinaryArray, Float32Array, Float64Array,
ListArray, PrimitiveArray,
+ RecordBatch, StringArray, StructArray,
builder::{ListBuilder, StringBuilder},
types::{Int32Type, Int64Type, IntervalMonthDayNanoType,
TimestampMicrosecondType},
- ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Decimal256Array,
Decimal32Array,
- Decimal64Array, FixedSizeBinaryArray, Float32Array, Float64Array,
ListArray, PrimitiveArray,
- RecordBatch, StringArray, StructArray,
};
use arrow_avro::writer::AvroWriter;
-use arrow_buffer::{i256, Buffer};
+use arrow_buffer::{Buffer, i256};
use arrow_schema::{DataType, Field, IntervalUnit, Schema, TimeUnit,
UnionFields, UnionMode};
-use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId,
Criterion, Throughput};
+use criterion::{BatchSize, BenchmarkId, Criterion, Throughput,
criterion_group, criterion_main};
use once_cell::sync::Lazy;
use rand::{
+ Rng, SeedableRng,
distr::uniform::{SampleRange, SampleUniform},
rngs::StdRng,
- Rng, SeedableRng,
};
use std::collections::HashMap;
use std::io::Cursor;
diff --git a/arrow-avro/benches/decoder.rs b/arrow-avro/benches/decoder.rs
index 5ab0f847ef..5db99b4db7 100644
--- a/arrow-avro/benches/decoder.rs
+++ b/arrow-avro/benches/decoder.rs
@@ -26,10 +26,10 @@ extern crate once_cell;
extern crate uuid;
use apache_avro::types::Value;
-use apache_avro::{to_avro_datum, Decimal, Schema as ApacheSchema};
-use arrow_avro::schema::{Fingerprint, FingerprintAlgorithm, CONFLUENT_MAGIC,
SINGLE_OBJECT_MAGIC};
+use apache_avro::{Decimal, Schema as ApacheSchema, to_avro_datum};
+use arrow_avro::schema::{CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm,
SINGLE_OBJECT_MAGIC};
use arrow_avro::{reader::ReaderBuilder, schema::AvroSchema};
-use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId,
Criterion, Throughput};
+use criterion::{BatchSize, BenchmarkId, Criterion, Throughput,
criterion_group, criterion_main};
use once_cell::sync::Lazy;
use std::{hint::black_box, time::Duration};
use uuid::Uuid;
diff --git a/arrow-avro/examples/decode_kafka_stream.rs
b/arrow-avro/examples/decode_kafka_stream.rs
index f5b0f2e657..84ea3d3931 100644
--- a/arrow-avro/examples/decode_kafka_stream.rs
+++ b/arrow-avro/examples/decode_kafka_stream.rs
@@ -35,7 +35,7 @@
use arrow_array::{Int64Array, RecordBatch, StringArray};
use arrow_avro::reader::ReaderBuilder;
use arrow_avro::schema::{
- AvroSchema, Fingerprint, FingerprintAlgorithm, SchemaStore,
CONFLUENT_MAGIC,
+ AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm,
SchemaStore,
};
use arrow_schema::ArrowError;
diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs
index 0de010cd6b..f946a5a1b9 100644
--- a/arrow-avro/src/codec.rs
+++ b/arrow-avro/src/codec.rs
@@ -15,13 +15,13 @@
// specific language governing permissions and limitations
// under the License.
use crate::schema::{
- make_full_name, Array, Attributes, ComplexType, Enum, Fixed, Map,
Nullability, PrimitiveType,
- Record, Schema, Type, TypeName, AVRO_ENUM_SYMBOLS_METADATA_KEY,
- AVRO_FIELD_DEFAULT_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY,
AVRO_NAME_METADATA_KEY,
+ AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_FIELD_DEFAULT_METADATA_KEY,
AVRO_NAME_METADATA_KEY,
+ AVRO_NAMESPACE_METADATA_KEY, Array, Attributes, ComplexType, Enum, Fixed,
Map, Nullability,
+ PrimitiveType, Record, Schema, Type, TypeName, make_full_name,
};
use arrow_schema::{
- ArrowError, DataType, Field, Fields, IntervalUnit, TimeUnit, UnionFields,
UnionMode,
- DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
+ ArrowError, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType,
Field, Fields,
+ IntervalUnit, TimeUnit, UnionFields, UnionMode,
};
#[cfg(feature = "small_decimals")]
use arrow_schema::{DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION};
@@ -324,14 +324,14 @@ impl AvroDataType {
Codec::Null => {
return Err(ArrowError::SchemaError(
"Default for `null` type must be JSON null".to_string(),
- ))
+ ));
}
Codec::Boolean => match default_json {
Value::Bool(b) => AvroLiteral::Boolean(*b),
_ => {
return Err(ArrowError::SchemaError(
"Boolean default must be a JSON boolean".to_string(),
- ))
+ ));
}
},
Codec::Int32 | Codec::Date32 | Codec::TimeMillis => {
@@ -393,7 +393,7 @@ impl AvroDataType {
_ => {
return Err(ArrowError::SchemaError(
"Default value must be a JSON array for Avro array
type".to_string(),
- ))
+ ));
}
},
Codec::Map(val_dt) => match default_json {
@@ -407,7 +407,7 @@ impl AvroDataType {
_ => {
return Err(ArrowError::SchemaError(
"Default value must be a JSON object for Avro map
type".to_string(),
- ))
+ ));
}
},
Codec::Struct(fields) => match default_json {
@@ -449,7 +449,7 @@ impl AvroDataType {
_ => {
return Err(ArrowError::SchemaError(
"Default value for record/struct must be a JSON
object".to_string(),
- ))
+ ));
}
},
Codec::Union(encodings, _, _) => {
@@ -1622,7 +1622,7 @@ impl<'a> Maker<'a> {
_ => {
return Err(ArrowError::ParseError(format!(
"Illegal promotion {write_primitive:?} to
{read_primitive:?}"
- )))
+ )));
}
};
let mut datatype = self.parse_type(reader_schema, None)?;
@@ -1894,8 +1894,8 @@ impl<'a> Maker<'a> {
mod tests {
use super::*;
use crate::schema::{
- Array, Attributes, ComplexType, Field as AvroFieldSchema, Fixed,
PrimitiveType, Record,
- Schema, Type, TypeName, AVRO_ROOT_RECORD_DEFAULT_NAME,
+ AVRO_ROOT_RECORD_DEFAULT_NAME, Array, Attributes, ComplexType, Field
as AvroFieldSchema,
+ Fixed, PrimitiveType, Record, Schema, Type, TypeName,
};
use indexmap::IndexMap;
use serde_json::{self, Value};
diff --git a/arrow-avro/src/reader/header.rs b/arrow-avro/src/reader/header.rs
index 240536c311..aac267f50e 100644
--- a/arrow-avro/src/reader/header.rs
+++ b/arrow-avro/src/reader/header.rs
@@ -17,9 +17,9 @@
//! Decoder for [`Header`]
-use crate::compression::{CompressionCodec, CODEC_METADATA_KEY};
+use crate::compression::{CODEC_METADATA_KEY, CompressionCodec};
use crate::reader::vlq::VLQDecoder;
-use crate::schema::{Schema, SCHEMA_METADATA_KEY};
+use crate::schema::{SCHEMA_METADATA_KEY, Schema};
use arrow_schema::ArrowError;
use std::io::BufRead;
diff --git a/arrow-avro/src/reader/mod.rs b/arrow-avro/src/reader/mod.rs
index f14467e86e..def0fffe39 100644
--- a/arrow-avro/src/reader/mod.rs
+++ b/arrow-avro/src/reader/mod.rs
@@ -477,8 +477,8 @@
use crate::codec::AvroFieldBuilder;
use crate::reader::header::read_header;
use crate::schema::{
- AvroSchema, Fingerprint, FingerprintAlgorithm, Schema, SchemaStore,
CONFLUENT_MAGIC,
- SINGLE_OBJECT_MAGIC,
+ AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm,
SINGLE_OBJECT_MAGIC, Schema,
+ SchemaStore,
};
use arrow_array::{RecordBatch, RecordBatchReader};
use arrow_schema::{ArrowError, SchemaRef};
@@ -695,7 +695,7 @@ impl Decoder {
None => {
return Err(ArrowError::ParseError(
"Missing magic bytes and fingerprint".to_string(),
- ))
+ ));
}
}
}
@@ -1267,9 +1267,9 @@ mod test {
use crate::reader::record::RecordDecoder;
use crate::reader::{Decoder, Reader, ReaderBuilder};
use crate::schema::{
- AvroSchema, Fingerprint, FingerprintAlgorithm, PrimitiveType,
SchemaStore,
- AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY,
AVRO_NAME_METADATA_KEY,
- CONFLUENT_MAGIC, SINGLE_OBJECT_MAGIC,
+ AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_NAME_METADATA_KEY,
AVRO_NAMESPACE_METADATA_KEY,
+ AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm,
PrimitiveType,
+ SINGLE_OBJECT_MAGIC, SchemaStore,
};
use crate::test_util::arrow_test_data;
use crate::writer::AvroWriter;
@@ -1289,7 +1289,7 @@ mod test {
use arrow_array::types::{Int32Type, IntervalMonthDayNanoType};
use arrow_array::*;
use arrow_buffer::{
- i256, Buffer, IntervalMonthDayNano, NullBuffer, OffsetBuffer,
ScalarBuffer,
+ Buffer, IntervalMonthDayNano, NullBuffer, OffsetBuffer, ScalarBuffer,
i256,
};
#[cfg(feature = "avro_custom_types")]
use arrow_schema::{
@@ -1302,8 +1302,8 @@ mod test {
};
use bytes::Bytes;
use futures::executor::block_on;
- use futures::{stream, Stream, StreamExt, TryStreamExt};
- use serde_json::{json, Value};
+ use futures::{Stream, StreamExt, TryStreamExt, stream};
+ use serde_json::{Value, json};
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufReader, Cursor};
@@ -2810,7 +2810,7 @@ mod test {
let top_meta = proj_field.metadata().clone();
let (expected_field_ref, expected_col): (Arc<Field>, ArrayRef) =
match (full_field.data_type(), proj_field.data_type()) {
- (&DataType::List(_), DataType::List(ref proj_elem)) => {
+ (&DataType::List(_), DataType::List(proj_elem)) => {
let new_col =
rebuild_list_array_with_element(&col_full,
proj_elem.clone(), false);
let nf = Field::new(
@@ -2821,7 +2821,7 @@ mod test {
.with_metadata(top_meta);
(Arc::new(nf), new_col)
}
- (&DataType::LargeList(_), DataType::LargeList(ref
proj_elem)) => {
+ (&DataType::LargeList(_), DataType::LargeList(proj_elem))
=> {
let new_col =
rebuild_list_array_with_element(&col_full,
proj_elem.clone(), true);
let nf = Field::new(
diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs
index f2376e5ade..99e782b0fd 100644
--- a/arrow-avro/src/reader/record.rs
+++ b/arrow-avro/src/reader/record.rs
@@ -21,15 +21,15 @@ use crate::codec::{
};
use crate::reader::cursor::AvroCursor;
use crate::schema::Nullability;
-use arrow_array::builder::{Decimal128Builder, Decimal256Builder,
IntervalMonthDayNanoBuilder};
#[cfg(feature = "small_decimals")]
use arrow_array::builder::{Decimal32Builder, Decimal64Builder};
+use arrow_array::builder::{Decimal128Builder, Decimal256Builder,
IntervalMonthDayNanoBuilder};
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::*;
use arrow_schema::{
- ArrowError, DataType, Field as ArrowField, FieldRef, Fields, Schema as
ArrowSchema, SchemaRef,
- UnionFields, UnionMode, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
+ ArrowError, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType,
Field as ArrowField,
+ FieldRef, Fields, Schema as ArrowSchema, SchemaRef, UnionFields, UnionMode,
};
#[cfg(feature = "small_decimals")]
use arrow_schema::{DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION};
@@ -4047,8 +4047,8 @@ mod tests {
}
#[test]
- fn
test_record_append_default_missing_fields_without_projector_defaults_yields_type_nulls_or_empties(
- ) {
+ fn
test_record_append_default_missing_fields_without_projector_defaults_yields_type_nulls_or_empties()
+ {
let fields = vec![("a", DataType::Int32, true), ("b", DataType::Utf8,
true)];
let mut field_refs: Vec<FieldRef> = Vec::new();
let mut encoders: Vec<Decoder> = Vec::new();
diff --git a/arrow-avro/src/schema.rs b/arrow-avro/src/schema.rs
index 6b985a2fc6..b3081bbd09 100644
--- a/arrow-avro/src/schema.rs
+++ b/arrow-avro/src/schema.rs
@@ -22,7 +22,7 @@ use arrow_schema::{
UnionMode,
};
use serde::{Deserialize, Serialize};
-use serde_json::{json, Map as JsonMap, Value};
+use serde_json::{Map as JsonMap, Value, json};
#[cfg(feature = "sha256")]
use sha2::{Digest, Sha256};
use std::borrow::Cow;
@@ -1432,7 +1432,7 @@ fn datatype_to_avro(
_ => {
return Err(ArrowError::SchemaError(
"Map 'entries' field must be Struct(key,value)".into(),
- ))
+ ));
}
};
let values_schema = process_datatype(
@@ -1556,7 +1556,7 @@ fn datatype_to_avro(
other => {
return Err(ArrowError::NotYetImplemented(format!(
"Arrow type {other:?} has no Avro representation"
- )))
+ )));
}
};
Ok((val, extras))
@@ -2148,9 +2148,11 @@ mod tests {
store.lookup(&Fingerprint::Rabin(fp_val)).cloned(),
Some(schema.clone())
);
- assert!(store
- .lookup(&Fingerprint::Rabin(fp_val.wrapping_add(1)))
- .is_none());
+ assert!(
+ store
+ .lookup(&Fingerprint::Rabin(fp_val.wrapping_add(1)))
+ .is_none()
+ );
}
Fingerprint::Id(_id) => {
unreachable!("This test should only generate Rabin
fingerprints")
diff --git a/arrow-avro/src/writer/encoder.rs b/arrow-avro/src/writer/encoder.rs
index 363fee6f22..0fa217e894 100644
--- a/arrow-avro/src/writer/encoder.rs
+++ b/arrow-avro/src/writer/encoder.rs
@@ -241,9 +241,9 @@ impl<'a> FieldEncoder<'a> {
DataType::Time32(TimeUnit::Millisecond) => {
Encoder::Time32Millis(IntEncoder(array.as_primitive::<Time32MillisecondType>()))
}
- DataType::Time64(TimeUnit::Microsecond) => {
-
Encoder::Time64Micros(LongEncoder(array.as_primitive::<Time64MicrosecondType>()))
- }
+ DataType::Time64(TimeUnit::Microsecond) =>
Encoder::Time64Micros(LongEncoder(
+ array.as_primitive::<Time64MicrosecondType>(),
+ )),
DataType::Float32 => {
Encoder::Float32(F32Encoder(array.as_primitive::<Float32Type>()))
}
@@ -277,36 +277,30 @@ impl<'a> FieldEncoder<'a> {
}
},
DataType::Interval(unit) => match unit {
- IntervalUnit::MonthDayNano => {
- Encoder::IntervalMonthDayNano(DurationEncoder(
- array.as_primitive::<IntervalMonthDayNanoType>(),
- ))
- }
- IntervalUnit::YearMonth => {
- Encoder::IntervalYearMonth(DurationEncoder(
- array.as_primitive::<IntervalYearMonthType>(),
- ))
- }
+ IntervalUnit::MonthDayNano =>
Encoder::IntervalMonthDayNano(DurationEncoder(
+ array.as_primitive::<IntervalMonthDayNanoType>(),
+ )),
+ IntervalUnit::YearMonth =>
Encoder::IntervalYearMonth(DurationEncoder(
+ array.as_primitive::<IntervalYearMonthType>(),
+ )),
IntervalUnit::DayTime =>
Encoder::IntervalDayTime(DurationEncoder(
array.as_primitive::<IntervalDayTimeType>(),
)),
- }
- DataType::Duration(tu) => {
- match tu {
- TimeUnit::Second =>
Encoder::DurationSeconds(LongEncoder(
- array.as_primitive::<DurationSecondType>(),
- )),
- TimeUnit::Millisecond =>
Encoder::DurationMillis(LongEncoder(
- array.as_primitive::<DurationMillisecondType>(),
- )),
- TimeUnit::Microsecond =>
Encoder::DurationMicros(LongEncoder(
- array.as_primitive::<DurationMicrosecondType>(),
- )),
- TimeUnit::Nanosecond =>
Encoder::DurationNanos(LongEncoder(
- array.as_primitive::<DurationNanosecondType>(),
- )),
- }
- }
+ },
+ DataType::Duration(tu) => match tu {
+ TimeUnit::Second => Encoder::DurationSeconds(LongEncoder(
+ array.as_primitive::<DurationSecondType>(),
+ )),
+ TimeUnit::Millisecond =>
Encoder::DurationMillis(LongEncoder(
+ array.as_primitive::<DurationMillisecondType>(),
+ )),
+ TimeUnit::Microsecond =>
Encoder::DurationMicros(LongEncoder(
+ array.as_primitive::<DurationMicrosecondType>(),
+ )),
+ TimeUnit::Nanosecond => Encoder::DurationNanos(LongEncoder(
+ array.as_primitive::<DurationNanosecondType>(),
+ )),
+ },
other => {
return Err(ArrowError::NotYetImplemented(format!(
"Avro scalar type not yet supported: {other:?}"
@@ -349,12 +343,12 @@ impl<'a> FieldEncoder<'a> {
other => {
return Err(ArrowError::SchemaError(format!(
"Avro array site requires Arrow List/LargeList, found:
{other:?}"
- )))
+ )));
}
},
- FieldPlan::Decimal {size} => match array.data_type() {
+ FieldPlan::Decimal { size } => match array.data_type() {
#[cfg(feature = "small_decimals")]
- DataType::Decimal32(_,_) => {
+ DataType::Decimal32(_, _) => {
let arr = array
.as_any()
.downcast_ref::<Decimal32Array>()
@@ -362,49 +356,61 @@ impl<'a> FieldEncoder<'a> {
Encoder::Decimal32(DecimalEncoder::<4,
Decimal32Array>::new(arr, *size))
}
#[cfg(feature = "small_decimals")]
- DataType::Decimal64(_,_) => {
+ DataType::Decimal64(_, _) => {
let arr = array
.as_any()
.downcast_ref::<Decimal64Array>()
.ok_or_else(|| ArrowError::SchemaError("Expected
Decimal64Array".into()))?;
Encoder::Decimal64(DecimalEncoder::<8,
Decimal64Array>::new(arr, *size))
}
- DataType::Decimal128(_,_) => {
+ DataType::Decimal128(_, _) => {
let arr = array
.as_any()
.downcast_ref::<Decimal128Array>()
- .ok_or_else(|| ArrowError::SchemaError("Expected
Decimal128Array".into()))?;
+ .ok_or_else(|| {
+ ArrowError::SchemaError("Expected
Decimal128Array".into())
+ })?;
Encoder::Decimal128(DecimalEncoder::<16,
Decimal128Array>::new(arr, *size))
}
- DataType::Decimal256(_,_) => {
+ DataType::Decimal256(_, _) => {
let arr = array
.as_any()
.downcast_ref::<Decimal256Array>()
- .ok_or_else(|| ArrowError::SchemaError("Expected
Decimal256Array".into()))?;
+ .ok_or_else(|| {
+ ArrowError::SchemaError("Expected
Decimal256Array".into())
+ })?;
Encoder::Decimal256(DecimalEncoder::<32,
Decimal256Array>::new(arr, *size))
}
other => {
return Err(ArrowError::SchemaError(format!(
"Avro decimal site requires Arrow Decimal 32, 64, 128,
or 256, found: {other:?}"
- )))
+ )));
}
},
FieldPlan::Uuid => {
let arr = array
.as_any()
.downcast_ref::<FixedSizeBinaryArray>()
- .ok_or_else(|| ArrowError::SchemaError("Expected
FixedSizeBinaryArray".into()))?;
+ .ok_or_else(|| {
+ ArrowError::SchemaError("Expected
FixedSizeBinaryArray".into())
+ })?;
Encoder::Uuid(UuidEncoder(arr))
}
- FieldPlan::Map { values_nullability,
- value_plan } => {
+ FieldPlan::Map {
+ values_nullability,
+ value_plan,
+ } => {
let arr = array
.as_any()
.downcast_ref::<MapArray>()
.ok_or_else(|| ArrowError::SchemaError("Expected
MapArray".into()))?;
- Encoder::Map(Box::new(MapEncoder::try_new(arr,
*values_nullability, value_plan.as_ref())?))
+ Encoder::Map(Box::new(MapEncoder::try_new(
+ arr,
+ *values_nullability,
+ value_plan.as_ref(),
+ )?))
}
- FieldPlan::Enum { symbols} => match array.data_type() {
+ FieldPlan::Enum { symbols } => match array.data_type() {
DataType::Dictionary(key_dt, value_dt) => {
if **key_dt != DataType::Int32 || **value_dt !=
DataType::Utf8 {
return Err(ArrowError::SchemaError(
@@ -447,9 +453,9 @@ impl<'a> FieldEncoder<'a> {
other => {
return Err(ArrowError::SchemaError(format!(
"Avro enum site requires DataType::Dictionary, found:
{other:?}"
- )))
+ )));
}
- }
+ },
FieldPlan::Union { bindings } => {
let arr = array
.as_any()
@@ -509,11 +515,7 @@ impl<'a> FieldEncoder<'a> {
fn union_value_branch_byte(null_order: Nullability, is_null: bool) -> u8 {
let nulls_first = null_order == Nullability::default();
- if nulls_first == is_null {
- 0x00
- } else {
- 0x02
- }
+ if nulls_first == is_null { 0x00 } else { 0x02 }
}
/// Per‑site encoder plan for a field. This mirrors the Avro structure, so
nested
@@ -728,7 +730,7 @@ impl FieldPlan {
other => {
return Err(ArrowError::SchemaError(format!(
"Avro struct maps to Arrow Struct, found:
{other:?}"
- )))
+ )));
}
};
let mut bindings = Vec::with_capacity(avro_fields.len());
@@ -767,7 +769,7 @@ impl FieldPlan {
other => {
return Err(ArrowError::SchemaError(format!(
"Avro map maps to Arrow DataType::Map, found:
{other:?}"
- )))
+ )));
}
};
let entries_struct_fields = match entries_field.data_type() {
@@ -775,7 +777,7 @@ impl FieldPlan {
other => {
return Err(ArrowError::SchemaError(format!(
"Arrow Map entries must be Struct, found:
{other:?}"
- )))
+ )));
}
};
let value_idx =
@@ -822,7 +824,7 @@ impl FieldPlan {
return Err(ArrowError::SchemaError(format!(
"Avro decimal requires Arrow decimal, got
{other:?} for field '{}'",
arrow_field.name()
- )))
+ )));
}
};
let sc = scale_opt.unwrap_or(0) as i32; // Avro scale defaults
to 0 if absent
@@ -837,12 +839,13 @@ impl FieldPlan {
})
}
Codec::Interval => match arrow_field.data_type() {
- DataType::Interval(IntervalUnit::MonthDayNano |
IntervalUnit::YearMonth | IntervalUnit::DayTime
+ DataType::Interval(
+ IntervalUnit::MonthDayNano | IntervalUnit::YearMonth |
IntervalUnit::DayTime,
) => Ok(FieldPlan::Scalar),
other => Err(ArrowError::SchemaError(format!(
"Avro duration logical type requires Arrow
Interval(MonthDayNano), found: {other:?}"
))),
- }
+ },
Codec::Union(avro_branches, _, UnionMode::Dense) => {
let arrow_union_fields = match arrow_field.data_type() {
DataType::Union(fields, UnionMode::Dense) => fields,
@@ -882,11 +885,9 @@ impl FieldPlan {
Ok(FieldPlan::Union { bindings })
}
- Codec::Union(_, _, UnionMode::Sparse) => {
- Err(ArrowError::NotYetImplemented(
- "Sparse Arrow unions are not yet supported".to_string(),
- ))
- }
+ Codec::Union(_, _, UnionMode::Sparse) =>
Err(ArrowError::NotYetImplemented(
+ "Sparse Arrow unions are not yet supported".to_string(),
+ )),
_ => Ok(FieldPlan::Scalar),
}
}
@@ -1069,7 +1070,7 @@ impl<'a> MapEncoder<'a> {
other => {
return Err(ArrowError::SchemaError(format!(
"Avro map requires string keys; Arrow key type must be
Utf8/LargeUtf8, found: {other:?}"
- )))
+ )));
}
};
@@ -1079,13 +1080,13 @@ impl<'a> MapEncoder<'a> {
other => {
return Err(ArrowError::SchemaError(format!(
"Arrow Map entries must be Struct, found: {other:?}"
- )))
+ )));
}
},
_ => {
return Err(ArrowError::SchemaError(
"Expected MapArray with DataType::Map".into(),
- ))
+ ));
}
};
@@ -1313,7 +1314,7 @@ impl<'a, O: OffsetSizeTrait> ListEncoder<'a, O> {
_ => {
return Err(ArrowError::SchemaError(
"Expected List or LargeList for ListEncoder".into(),
- ))
+ ));
}
};
let values_enc = prepare_value_site_encoder(
diff --git a/arrow-avro/src/writer/format.rs b/arrow-avro/src/writer/format.rs
index 90a2856330..f885fb86a6 100644
--- a/arrow-avro/src/writer/format.rs
+++ b/arrow-avro/src/writer/format.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use crate::compression::{CompressionCodec, CODEC_METADATA_KEY};
+use crate::compression::{CODEC_METADATA_KEY, CompressionCodec};
use crate::schema::{AvroSchema, SCHEMA_METADATA_KEY};
use crate::writer::encoder::write_long;
use arrow_schema::{ArrowError, Schema};
diff --git a/arrow-avro/src/writer/mod.rs b/arrow-avro/src/writer/mod.rs
index 8f7cb666db..0b87d6f13f 100644
--- a/arrow-avro/src/writer/mod.rs
+++ b/arrow-avro/src/writer/mod.rs
@@ -62,7 +62,7 @@ use crate::compression::CompressionCodec;
use crate::schema::{
AvroSchema, Fingerprint, FingerprintAlgorithm, FingerprintStrategy,
SCHEMA_METADATA_KEY,
};
-use crate::writer::encoder::{write_long, RecordEncoder, RecordEncoderBuilder};
+use crate::writer::encoder::{RecordEncoder, RecordEncoderBuilder, write_long};
use crate::writer::format::{AvroBinaryFormat, AvroFormat, AvroOcfFormat};
use arrow_array::RecordBatch;
use arrow_schema::{ArrowError, Schema};